ANOVA Statistical Programming with PHP

Code in this package first appeared on the O'Reilly Network on July 23, 2004.

by Paul Meagher <paul@datavore.com>

Last modified March 31 2006 @ 1:23 pm UTC


Demos:

Source code listing:



<?php

/**
* @package SFA
*
* @author Paul Meagher, Datavore Productions
* @license PHP v3.0
* @version 0.7
*
* Class implementing Single Factor ANOVA steps.
*/

require_once PHPMATH ."/PDL/FDistribution.php";

class 
SingleFactorANOVA {

  
/*
  * Database table holding experiment data.
  */
  
var $table null;

  
/**
  * Database column storing treatment levels.
  */
  
var $treatment null;

  
/**
  * Indexed array of treatment levels.
  */
  
var $levels = array();

  
/**
  * Stores the number of treatment levels.
  */
  
var $num_levels null;

  
/**
  * Database column storing the response measurement.
  */
  
var $response null;

  
/**
  * Associative array of treatment sums and grand sum.
  */
  
var $sums = array();

  
/**
  * Associative array of treatment sample sizes and grand n.
  */    
  
var $n = array();

  
/**
  * Associative array of treatment sample means and grand mean.
  */    
  
var $means = array();

  
/**
  * Associative array of treatment sample variances.
  */    
  
var $variance = array();

  
/**
  * Associative array of treatment effects.
  */    
  
var $effects = array();  

  
/**
  * Associative array of treatment, between, and within sums of squares.
  */
  
var $ss = array();

  
/**
  * Associative array of between and within df values.
  */
  
var $df = array();

  
/**
  * Associative array of between and within mean square estimates.
  */
  
var $ms = array();

  
/**
  * Stores the observed F score.
  */
  
var $f null;

  
/**
  * Stores the probability of the F score.
  */
  
var $p null;

  
/**
  * Alpha setting with traditional .05 default.
  */
  
var $alpha .05;

  
/**
  * Stores the critical F score.
  */
  
var $crit null;

  
/**
  * Indexed array of means sorted lowest to highest for comparison.
  */
  
var $diff_means null;

  
/**
  * Indexed array of treatment labels (shared same index as diff_means).
  */
  
var $diff_labels null;

  
/**
  * Matrix of mean differences.
  */
  
var $diffs null;


  
/**
  * Constructor that allows you to optionally set the data table, the 
  * treatment colum and the response column via the constructor.
  */
  
function SingleFactorANOVA($table=false$treatment=false$response=false$alpha=.05) {
    if ( (
$table != false) AND ($table != false) AND ($table != false) ) {
      
$this->table     $table;
      
$this->treatment $treatment;
      
$this->response  $response
      
$this->alpha     $alpha;       
    }
  }

  
/**  
  * Set database table to use.
  */
  
function setTable($table) {    
    
$this->table $table;
  }

  
/**  
  * Set treatment column.
  */
  
function setTreatment($column) {
    
$this->treatment $column;
  }

  
/**  
  * Set response column.
  */  
  
function setResponse($column) {
    
$this->response $column;
  }

  
/**  
  * Set alpha level.
  */  
  
function setAlpha($alpha) {
    
$this->alpha $alpha;
  }

  
/**  
  * Convenience function for inserting simulated values into data table.  
  * Prior to using this function, you must first have created the data table 
  * then instantiated $this->table, $this->treatment and $this->response to 
  * the relevant values.  See simulation.php example for sample usage.
  */    
  
function insert($level$responses$type=float) {
    global 
$db;
    foreach(
$responses AS $response) {
      if (
$type == "float") {
        
$y = (float) $response;
      } 
      if (
$type == "int") {
        
$y = (int) $response;
      }
      
$sql  " INSERT INTO $this->table ($this->treatment, $this->response) ";
      
$sql .= " VALUES ('$level', $y ) ";
      
$db->query($sql);
    }
  }    

  
/**
  * Purge the data table of existing values - useful for
  * simulation work.
  */
  
function emptyTable() {
    global 
$db;
    
$sql  " DELETE FROM $this->table ";
    
$result $db->query($sql);    
    if (
DB::isError($result)) {
      die(
$result->getMessage());
    } else {
      return 
true;
    }
  }
  
  
/**
  * Compute single factor ANOVA statistics. 
  */
  
function analyze() {

    global 
$db;

    
$sql  " SELECT $this->treatment, sum($this->response), ";
    
$sql .= " sum($this->response * $this->response), count($this->response) ";
    
$sql .= " FROM $this->table ";
    
$sql .= " GROUP BY $this->treatment ";

    
$result $db->query($sql);
    
    if (
DB::isError($result)) {
    
      die(
$result->getMessage());
    
    } else {

      while (
$row $result->fetchRow()) { 
        
$level  $row[0];
        
$this->levels[]      = $row[0];
        
$this->sums[$level]  = $row[1];
        
$this->n[$level]     = $row[3];        
        
$this->means[$level] = $this->sums[$level] / $this->n[$level];
        
$this->ss[$level]    = $row[2] - $this->n[$level] * pow($this->means[$level], 2);                         
        
$this->variance[$level] = $this->ss[$level] / ($this->n[$level] - 1);         
      }    

      
$this->sums["total"]  = array_sum($this->sums);
      
$this->n["total"]     = array_sum($this->n);
      
$this->means["grand"] = $this->sums["total"] / $this->n["total"];
      
$this->ss["within"]   = array_sum($this->ss);   

      foreach(
$this->levels as $level) {
        
$this->effects[$level] = $this->means[$level] - $this->means["grand"];
        
$this->ss["between"]  += $this->n[$level] * pow($this->effects[$level], 2); 
      }

      
$this->num_levels count($this->levels);

      
$this->df["between"] = $this->num_levels 1;
      
$this->df["within"]  = $this->n["total"] - $this->num_levels;  

      
$this->ms["between"] = $this->ss["between"] / $this->df["between"];    
      
$this->ms["within"]  = $this->ss["within"] / $this->df["within"];          

      
$this->$this->ms["between"] / $this->ms["within"];

      
$F          = new FDistribution($this->df["between"], $this->df["within"]);      
      
$this->p    $F->CDF($this->f);
      
$this->crit $F->inverseCDF($this->alpha);      

    }            
    
  }

  
/**
  * Creates an associate array containing the five number summary for each 
  * treatment level: minimum value, first quartile value, median value, 
  * third quartile value, and max value.  These numbers are used to specify 
  * box plot components for each treatment level.
  */
  
function getFiveNumberSummary() {
    global 
$db;
    
$sql  " SELECT $this->treatment, $this->response ";
    
$sql .= " FROM $this->table ";
    
$sql .= " ORDER BY $this->treatment, $this->response ASC ";
    
$result $db->query($sql);
    if (
DB::isError($result)) {
      die(
$result->getMessage());
    } else {
      while (
$row $result->fetchRow()) { 
        
$level     $row[0];
        
$response  $row[1];
        
$columns[$level][] = $response
      }
      foreach(
$this->levels AS $level) {                      
        
$summary[$level]["min"] = $columns[$level][0];
        
$n $this->n[$level];
        if (
$n 2) {
          
$half $n/2;           

          
$q1_lo floor($half 2) - 1;
          
$q1_hi floor($half 2); 
          
$summary[$level]["q1"] = ( $columns[$level][$q1_lo] + $columns[$level][$q2_hi] ) / 2;

          
$q2 ceil($half);
          
$summary[$level]["median"] = $columns[$level][$q2];

          
$q3_lo $half $q1 1;
          
$q3_hi $half $q1
          
$summary[$level]["q3"] = ( $columns[$level][$q3_lo] + $columns[$level][$q3_hi] ) / 2;
        } else {         
          
$half $n/2

          
$q1    floor(($half) / 2);
          
$summary[$level]["q1"] = $columns[$level][$q1];
                    
          
$q2_lo $half 1;
          
$q2_hi $half
          
$summary[$level]["median"] = ( $columns[$level][$q2_lo] + $columns[$level][$q2_hi] ) / 2;          
          
$q3    $half $q1;            
          
$summary[$level]["q3"] = $columns[$level][$q3];
        }        
        
$summary[$level]["max"] = $columns[$level][$n-1];        
      }      
      return 
$summary;
    }
  }
      
  
/*
  * Compute differences between treatment means.
  *
  * @see http://www.uwsp.edu/psych/cw/statmanual/posthocs.html
  */    
  
function getMeanDifferences() {
    
$means $this->means;
    unset(
$means["grand"]);   // remove grand mean
    
asort($means);            // sort lowest to highest
    
$this->diff_means  array_values($means);
    
$this->diff_labels array_keys($means);    
    for (
$i $this->num_levels 1$i >= 0$i--) {
      for (
$j=0$j $this->num_levels$j++) {      
        if (
$i $j) {
          
$this->diffs[$j][$i] = abs($this->diff_means[$j] - $this->diff_means[$i]);        
        }
      }
    }        
  }

  
/**
  * Show mean differences in an extensible tabular format. 
  */       
  
function showMeanDifferences($format="%.2f") {
    
$this->getMeanDifferences();
    
?> 
    <table>
      <tr>
        <th>Mean Differences</th>
      </tr>
      <tr>
        <td>     
          <table border='1' cellpadding='3' cellspacing='0'>      
            <tr bgcolor='#ffffcc'>
              <td>&nbsp;</td>
              <?php
              
for ($i=0$i $this->num_levels$i++) {    
                
$mean sprintf($format$this->diff_means[$i]);
                print 
"<td><b>".ucfirst($this->diff_labels[$i])." [$mean]</b></td>";
              }
              
?>
            </tr>
            <?php
            
for ($i=0$i $this->num_levels$i++) {
              print 
"<tr><td align='right' bgcolor='#ffffcc'><b>".ucfirst($this->diff_labels[$i])."</b></td>";
              for (
$j=0$j $this->num_levels$j++) {
                print 
"<td align='right'>";
                if(isset(
$this->diffs[$j][$i])) {
                  print 
$this->diffs[$j][$i];
                } else {
                  print 
"&nbsp";
                }            
                echo 
"</td>";
              }
              echo 
"</tr>";
            }
            
?>
          </table>
        </td>
      </tr>
    </table>
    <?php                  
  
}    
  
  
/*
  * Output ANOVA source table.
  */  
  
function showSourceTable($format="%.2f") {
    
?>
    <table>
      <tr>
        <th>ANOVA Source Table</th>
      </tr>
      <tr>
        <td align='center'>
          <table border='1' cellpadding='3' cellspacing='0'>
            <tr bgcolor='ffffcc'>
              <td><b>Source</b></td>
              <td align='center'><b>SS</b></td>
              <td align='center'><b>df</b></td>
              <td align='center'><b>MS</b></td>
              <td align='center'><b>F</b></td>
              <td align='center'><b>p</b></td>
            </tr>
            <tr>
              <td>Between</td>
              <td align='right'><?php printf($format$this->ss["between"]); ?></td>
              <td align='center'><?php print $this->df["between"]; ?></td>
              <td align='right'><?php printf($format$this->ms["between"]); ?></td>        
              <td><?php printf($format$this->f); ?></td>                
              <td><?php printf($format$this->p); ?></td>                        
            </tr>
            <tr>
              <td>Within</td>
              <td align='right'><?php printf($format$this->ss["within"]); ?></td>
              <td align='center'><?php print $this->df["within"]; ?></td>
              <td align='right'><?php printf($format$this->ms["within"]); ?></td>        
              <td>&nbsp;</td>                
              <td>&nbsp;</td>                        
            </tr>
            <tr bgcolor='ffffcc'>
              <td colspan='6'>          
                <small>
                <?php
                $crit 
sprintf($format$this->crit);
                print 
"Critical F($this->alpha, "$this->df["between"] .", "$this->df["within"] .") is $crit. ";
                
?>
                </small>
              </td>
            </tr>
          </table>            
        </td>
      </tr>
    </table>
    <?php
  
}
 
  
/*
  * Output basic descriptive statistics (means, stdev, n).
  */  
  
function showDescriptiveStatistics($format="%.2f") {
    
$treatment ucfirst($this->treatment);
    
?>
    <table>
      <tr>
        <th><?php echo "$treatment Levels"?></th>
      </tr>
      <tr>
        <td>
          <table border='1' cellspacing='0' cellpadding='3'>
            <tr bgcolor='ffffcc'>
              <th>&nbsp;</th>
              <?php
              
foreach ($this->levels as $level) {
                print 
"<th>$level</th>";        
              }
              
?>   
            </tr>
            <tr>
              <th>mean</th>
              <?php        
              
foreach ($this->levels as $level) {
                
$mean sprintf($format$this->means[$level]);
                print 
"<td align='right'>$mean</td>";        
              }
              
?>              
            </tr>
            <tr>
              <th>stdev</th>
              <?php
              
foreach ($this->levels as $level) {
                
$stdev sprintf($formatsqrt($this->ss[$level] / $this->n[$level]));
                print 
"<td align='right'>$stdev</td>";        
              }
              
?>              
            </tr>
            <tr>
              <th>n</th>
              <?php
              
foreach ($this->levels as $level) {
                print 
"<td align='right'>".$this->n[$level]."</td>";        
              }
              
?>              
            </tr>
          </table>
        </td>
      </tr>
    </table>    
    <?php    
  
}
  
  
/*
  * Output contents of database table.
  */  
  
function showRawData() {
    global 
$db;
    
$data $db->tableInfo($this->tableDB_TABLEINFO_ORDER);
    
$columns array_keys($data["order"]);
    
$num_columns count($columns);
    
?>
    <table cellspacing='0' cellpadding='0'>
      <tr>
        <td>
          <table border='1' cellspacing='0' cellpadding='3'>
            <?php
            
print "<tr bgcolor='ffffcc'>";
            for (
$i=0$i $num_columns$i++) {
              print 
"<td align='center'><b>".$columns[$i]."</b></td>";
            }
            print 
"</tr>";
            
$fields implode(","$columns);
            
$sql " SELECT $fields FROM $this->table ";
            
$result $db->query($sql);
            if (
DB::isError($result)) {
              die(
$result->getMessage());
            } else {
              while (
$row $result->fetchRow()) {
                print 
"<tr>";
                foreach(
$row as $key=>$value) {
                  print 
"<td>$value</td>";
                }
                print 
"</tr>";
              }
            }
            
?>
          </table>
        </td>
      </tr>
    </table>
    <?php    
  
}  

  
/**
  * The showBoxPlot method is a wrapper for JPGRAPH methods.  
  * The JPGRAPH constant defining to the root of the JPGraph 
  * library is specified in the config.php file.
  *
  * I only do very basic $params array handling in this method.  There 
  * is a lot of room for improvement in making parameter handling more
  * extensive (i.e., so you have more control over aspects of your plot) 
  * and more intelligent. 
  */
  
function showBoxPlot($params=array()) { 
         
    include_once 
JPGRAPH "/src/jpgraph.php";
    include_once 
JPGRAPH "/src/jpgraph_stock.php";

    
$summary $this->getFiveNumberSummary();
    
    
$yData = array();
    
    foreach(
$this->levels AS $level) {
      
// Data must be in the format : q1, q3, min, max, median.
      
$plot_parts = array("q1","q3","min","max","median");
      foreach(
$plot_parts AS $part) {
        
$yData[] = $summary[$level][$part];
      }
    }

    
$figureTitle $params["figureTitle"];
    
$xTitle $params["xTitle"];
    
$yTitle $params["yTitle"];

    if(!isset(
$figureTitle)) 
      
$figureTitle "Figure 1";
    
    if(!isset(
$xTitle)) 
      
$xTitle "x-level";    
    
    if(!isset(
$yTitle)) 
      
$yTitle "y-level";        
    
    
$plotWidth  400;
    
$plotHeight 300;
    
    
$yMin    $params["yMin"];
    
$yMax    $params["yMax"];
    
$yTicks  $params["yTicks"];
    
$yMargin 35;
    
    
$xMargin 15;
    
$xLabels $this->levels;

    
$graph = new Graph($plotWidth$plotHeight);

    
$graph->SetFrame(false); 
    
$graph->SetMarginColor('white');
    
$graph->title->Set($figureTitle);
    
    
$graph->SetScale("textlin"$yMin$yMax); 
    
$graph->yscale->ticks->Set($yTicks);
    
    
$graph->xaxis->SetTickLabels($xLabels); 
    
    
$graph->xaxis->SetTitle($xTitle,"center");
    
$graph->xaxis->SetTitleMargin($xMargin);
    
    
$graph->yaxis->SetTitle($yTitle"center");
    
$graph->yaxis->SetTitleMargin($yMargin);
    
    
// Create a new box plot.
    
$bp = new BoxPlot($yData);
    
    
// Indent bars so they dont start and end at the edge of the plot area. 
    
$bp->SetCenter(); 
    
    
// Width of the bars in pixels.
    
$bp->SetWidth(25);
    
    
// Set bar colors
    
$bp->SetColor('black','white','black','white'); 
    
    
// Set median colors
    
$bp->SetMedianColor('black','black'); 
    
    
$graph->Add($bp);
    
    if (isset(
$params["outputFile"])) {
      
$outputFile $params["outputFile"];
      
$graph_name "temp/$outputfile";      
    } else {  
      
$graph_name "temp/boxplot.png";
    }
    
    
$graph->Stroke($graph_name);
    
    echo 
"<img src='$graph_name' vspace='15' alt='$figureTitle'>";      
    
  }
     
}

?>



Back to source code listing