| // +----------------------------------------------------------------------+ // // $Id: Stats.php,v 1.15 2003/06/01 11:40:30 jmcastagnetto Exp $ // include_once 'PEAR.php'; /** * @package Math_Stats */ // Constants for defining the statistics to calculate /*{{{*/ /** * STATS_BASIC to generate the basic descriptive statistics */ define('STATS_BASIC', 1); /** * STATS_FULL to generate also higher moments, mode, median, etc. */ define('STATS_FULL', 2); /*}}}*/ // Constants describing the data set format /*{{{*/ /** * STATS_DATA_SIMPLE for an array of numeric values. This is the default. * e.g. $data = array(2,3,4,5,1,1,6); */ define('STATS_DATA_SIMPLE', 0); /** * STATS_DATA_CUMMULATIVE for an associative array of frequency values, * where in each array entry, the index is the data point and the * value the count (frequency): * e.g. $data = array(3=>4, 2.3=>5, 1.25=>6, 0.5=>3) */ define('STATS_DATA_CUMMULATIVE', 1); /*}}}*/ // Constants defining how to handle nulls /*{{{*/ /** * STATS_REJECT_NULL, reject data sets with null values. This is the default. * Any non-numeric value is considered a null in this context. */ define('STATS_REJECT_NULL', -1); /** * STATS_IGNORE_NULL, ignore null values and prune them from the data. * Any non-numeric value is considered a null in this context. */ define('STATS_IGNORE_NULL', -2); /** * STATS_USE_NULL_AS_ZERO, assign the value of 0 (zero) to null values. * Any non-numeric value is considered a null in this context. */ define('STATS_USE_NULL_AS_ZERO', -3); /*}}}*/ /** * A class to calculate descriptive statistics from a data set. * Data sets can be simple arrays of data, or a cummulative hash. * The second form is useful when passing large data set, * for example the data set: * *
* $data1 = array (1,2,1,1,1,1,3,3,4.1,3,2,2,4.1,1,1,2,3,3,2,2,1,1,2,2);
* 
* * can be epxressed more compactly as: * *
* $data2 = array('1'=>9, '2'=>8, '3'=>5, '4.1'=>2);
* 
* * Example of use: * *
* include_once 'Math/Stats.php';
* $s = new Math_Stats();
* $s->setData($data1);
* // or
* // $s->setData($data2, STATS_DATA_CUMMULATIVE);
* $stats = $s->calcBasic();
* echo 'Mean: '.$stats['mean'].' StDev: '.$stats['stdev'].' 
\n'; * * // using data with nulls * // first ignoring them: * $data3 = array(1.2, 'foo', 2.4, 3.1, 4.2, 3.2, null, 5.1, 6.2); * $s->setNullOption(STATS_IGNORE_NULL); * $s->setData($data3); * $stats3 = $s->calcFull(); * * // and then assuming nulls == 0 * $s->setNullOption(STATS_USE_NULL_AS_ZERO); * $s->setData($data3); * $stats3 = $s->calcFull(); *
* * Originally this class was part of NumPHP (Numeric PHP package) * * @author Jesus M. Castagnetto * @version 0.8 * @access public * @package Math_Stats */ class Base {/*{{{*/ // properties /*{{{*/ /** * The simple or cummulative data set. * Null by default. * * @access private * @var array */ public $_data = null; /** * Expanded data set. Only set when cummulative data * is being used. Null by default. * * @access private * @var array */ public $_dataExpanded = null; /** * Flag for data type, one of STATS_DATA_SIMPLE or * STATS_DATA_CUMMULATIVE. Null by default. * * @access private * @var int */ public $_dataOption = null; /** * Flag for null handling options. One of STATS_REJECT_NULL, * STATS_IGNORE_NULL or STATS_USE_NULL_AS_ZERO * * @access private * @var int */ public $_nullOption; /** * Array for caching result values, should be reset * when using setData() * * @access private * @var array */ public $_calculatedValues = array(); /*}}}*/ /** * Constructor for the class * * @access public * @param optional int $nullOption how to handle null values * @return object Math_Stats */ function Math_Stats($nullOption=STATS_REJECT_NULL) {/*{{{*/ $this->_nullOption = $nullOption; }/*}}}*/ /** * Sets and verifies the data, checking for nulls and using * the current null handling option * * @access public * @param array $arr the data set * @param optional int $opt data format: STATS_DATA_CUMMULATIVE or STATS_DATA_SIMPLE (default) * @return mixed true on success, a PEAR_Error object otherwise */ function setData($arr, $opt=STATS_DATA_SIMPLE) {/*{{{*/ if (!is_array($arr)) { return PEAR::raiseError('invalid data, an array of numeric data was expected'); } $this->_data = null; $this->_dataExpanded = null; $this->_dataOption = null; $this->_calculatedValues = array(); if ($opt == STATS_DATA_SIMPLE) { $this->_dataOption = $opt; $this->_data = array_values($arr); } else if ($opt == STATS_DATA_CUMMULATIVE) { $this->_dataOption = $opt; $this->_data = $arr; $this->_dataExpanded = array(); } return $this->_validate(); }/*}}}*/ /** * Returns the data which might have been modified * according to the current null handling options. * * @access public * @param boolean $expanded whether to return a expanded list, default is false * @return mixed array of data on success, a PEAR_Error object otherwise * @see _validate() */ function getData($expanded=false) {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } if ($this->_dataOption == STATS_DATA_CUMMULATIVE && $expanded) { return $this->_dataExpanded; } else { return $this->_data; } }/*}}}*/ /** * Sets the null handling option. * Must be called before assigning a new data set containing null values * * @access public * @return mixed true on success, a PEAR_Error object otherwise * @see _validate() */ function setNullOption($nullOption) {/*{{{*/ if ($nullOption == STATS_REJECT_NULL || $nullOption == STATS_IGNORE_NULL || $nullOption == STATS_USE_NULL_AS_ZERO) { $this->_nullOption = $nullOption; return true; } else { return PEAR::raiseError('invalid null handling option expecting: '. 'STATS_REJECT_NULL, STATS_IGNORE_NULL or STATS_USE_NULL_AS_ZERO'); } }/*}}}*/ /** * Transforms the data by substracting each entry from the mean and * dividing by its standard deviation. This will reset all pre-calculated * values to their original (unset) defaults. * * @access public * @return mixed true on success, a PEAR_Error object otherwise * @see mean() * @see stDev() * @see setData() */ function studentize() {/*{{{*/ $mean = $this->mean(); if (PEAR::isError($mean)) { return $mean; } $std = $this->stDev(); if (PEAR::isError($std)) { return $std; } if ($std == 0) { return PEAR::raiseError('cannot studentize data, standard deviation is zero.'); } $arr = array(); if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { foreach ($this->_data as $val=>$freq) { $newval = ($val - $mean) / $std; $arr["$newval"] = $freq; } } else { foreach ($this->_data as $val) { $newval = ($val - $mean) / $std; $arr[] = $newval; } } return $this->setData($arr, $this->_dataOption); }/*}}}*/ /** * Transforms the data by substracting each entry from the mean. * This will reset all pre-calculated values to their original (unset) defaults. * * @access public * @return mixed true on success, a PEAR_Error object otherwise * @see mean() * @see setData() */ function center() {/*{{{*/ $mean = $this->mean(); if (PEAR::isError($mean)) { return $mean; } $arr = array(); if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { foreach ($this->_data as $val=>$freq) { $newval = $val - $mean; $arr["$newval"] = $freq; } } else { foreach ($this->_data as $val) { $newval = $val - $mean; $arr[] = $newval; } } return $this->setData($arr, $this->_dataOption); }/*}}}*/ /** * Calculates the basic or full statistics for the data set * * @access public * @param int $mode one of STATS_BASIC or STATS_FULL * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default), * or only the error message will be returned (when false), if an error happens. * @return mixed an associative array of statistics on success, a PEAR_Error object otherwise * @see calcBasic() * @see calcFull() */ function calc($mode, $returnErrorObject=true) {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } if ($mode == STATS_BASIC) { return $this->calcBasic($returnErrorObject); } elseif ($mode == STATS_FULL) { return $this->calcFull($returnErrorObject); } else { return PEAR::raiseError('incorrect mode, expected STATS_BASIC or STATS_FULL'); } }/*}}}*/ /** * Calculates a basic set of statistics * * @access public * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default), * or only the error message will be returned (when false), if an error happens. * @return mixed an associative array of statistics on success, a PEAR_Error object otherwise * @see calc() * @see calcFull() */ function calcBasic($returnErrorObject=true) {/*{{{*/ return array ( 'min' => $this->__format($this->min(), $returnErrorObject), 'max' => $this->__format($this->max(), $returnErrorObject), 'sum' => $this->__format($this->sum(), $returnErrorObject), 'sum2' => $this->__format($this->sum2(), $returnErrorObject), 'count' => $this->__format($this->count(), $returnErrorObject), 'mean' => $this->__format($this->mean(), $returnErrorObject), 'stdev' => $this->__format($this->stDev(), $returnErrorObject), 'variance' => $this->__format($this->variance(), $returnErrorObject), 'range' => $this->__format($this->range(), $returnErrorObject) ); }/*}}}*/ /** * Calculates a full set of statistics * * @access public * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default), * or only the error message will be returned (when false), if an error happens. * @return mixed an associative array of statistics on success, a PEAR_Error object otherwise * @see calc() * @see calcBasic() */ function calcFull($returnErrorObject=true) {/*{{{*/ return array ( 'min' => $this->__format($this->min(), $returnErrorObject), 'max' => $this->__format($this->max(), $returnErrorObject), 'sum' => $this->__format($this->sum(), $returnErrorObject), 'sum2' => $this->__format($this->sum2(), $returnErrorObject), 'count' => $this->__format($this->count(), $returnErrorObject), 'mean' => $this->__format($this->mean(), $returnErrorObject), 'median' => $this->__format($this->median(), $returnErrorObject), 'mode' => $this->__format($this->mode(), $returnErrorObject), 'midrange' => $this->__format($this->midrange(), $returnErrorObject), 'geometric_mean' => $this->__format($this->geometricMean(), $returnErrorObject), 'harmonic_mean' => $this->__format($this->harmonicMean(), $returnErrorObject), 'stdev' => $this->__format($this->stDev(), $returnErrorObject), 'absdev' => $this->__format($this->absDev(), $returnErrorObject), 'variance' => $this->__format($this->variance(), $returnErrorObject), 'range' => $this->__format($this->range(), $returnErrorObject), 'std_error_of_mean' => $this->__format($this->stdErrorOfMean(), $returnErrorObject), 'skewness' => $this->__format($this->skewness(), $returnErrorObject), 'kurtosis' => $this->__format($this->kurtosis(), $returnErrorObject), 'coeff_of_variation' => $this->__format($this->coeffOfVariation(), $returnErrorObject), 'sample_central_moments' => array ( 1 => $this->__format($this->sampleCentralMoment(1), $returnErrorObject), 2 => $this->__format($this->sampleCentralMoment(2), $returnErrorObject), 3 => $this->__format($this->sampleCentralMoment(3), $returnErrorObject), 4 => $this->__format($this->sampleCentralMoment(4), $returnErrorObject), 5 => $this->__format($this->sampleCentralMoment(5), $returnErrorObject) ), 'sample_raw_moments' => array ( 1 => $this->__format($this->sampleRawMoment(1), $returnErrorObject), 2 => $this->__format($this->sampleRawMoment(2), $returnErrorObject), 3 => $this->__format($this->sampleRawMoment(3), $returnErrorObject), 4 => $this->__format($this->sampleRawMoment(4), $returnErrorObject), 5 => $this->__format($this->sampleRawMoment(5), $returnErrorObject) ), 'frequency' => $this->__format($this->frequency(), $returnErrorObject), 'quartiles' => $this->__format($this->quartiles(), $returnErrorObject), 'interquartile_range' => $this->__format($this->interquartileRange(), $returnErrorObject), 'interquartile_mean' => $this->__format($this->interquartileMean(), $returnErrorObject), 'quartile_deviation' => $this->__format($this->quartileDeviation(), $returnErrorObject), 'quartile_variation_coefficient' => $this->__format($this->quartileVariationCoefficient(), $returnErrorObject), 'quartile_skewness_coefficient' => $this->__format($this->quartileSkewnessCoefficient(), $returnErrorObject) ); }/*}}}*/ /** * Calculates the minimum of a data set. * Handles cummulative data sets correctly * * @access public * @return mixed the minimum value on success, a PEAR_Error object otherwise * @see calc() * @see max() */ function min() {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } if (!array_key_exists('min', $this->_calculatedValues)) { if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { $min = min(array_keys($this->_data)); } else { $min = min($this->_data); } $this->_calculatedValues['min'] = $min; } return $this->_calculatedValues['min']; }/*}}}*/ /** * Calculates the maximum of a data set. * Handles cummulative data sets correctly * * @access public * @return mixed the maximum value on success, a PEAR_Error object otherwise * @see calc() * @see min() */ function max() {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } if (!array_key_exists('max', $this->_calculatedValues)) { if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { $max = max(array_keys($this->_data)); } else { $max = max($this->_data); } $this->_calculatedValues['max'] = $max; } return $this->_calculatedValues['max']; }/*}}}*/ /** * Calculates SUM { xi } * Handles cummulative data sets correctly * * @access public * @return mixed the sum on success, a PEAR_Error object otherwise * @see calc() * @see sum2() * @see sumN() */ function sum() {/*{{{*/ if (!array_key_exists('sum', $this->_calculatedValues)) { $sum = $this->sumN(1); if (PEAR::isError($sum)) { return $sum; } else { $this->_calculatedValues['sum'] = $sum; } } return $this->_calculatedValues['sum']; }/*}}}*/ /** * Calculates SUM { (xi)^2 } * Handles cummulative data sets correctly * * @access public * @return mixed the sum on success, a PEAR_Error object otherwise * @see calc() * @see sum() * @see sumN() */ function sum2() {/*{{{*/ if (!array_key_exists('sum2', $this->_calculatedValues)) { $sum2 = $this->sumN(2); if (PEAR::isError($sum2)) { return $sum2; } else { $this->_calculatedValues['sum2'] = $sum2; } } return $this->_calculatedValues['sum2']; }/*}}}*/ /** * Calculates SUM { (xi)^n } * Handles cummulative data sets correctly * * @access public * @param numeric $n the exponent * @return mixed the sum on success, a PEAR_Error object otherwise * @see calc() * @see sum() * @see sum2() */ function sumN($n) {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } $sumN = 0; if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { foreach($this->_data as $val=>$freq) { $sumN += $freq * pow((double)$val, (double)$n); } } else { foreach($this->_data as $val) { $sumN += pow((double)$val, (double)$n); } } return $sumN; }/*}}}*/ /** * Calculates PROD { (xi) }, (the product of all observations) * Handles cummulative data sets correctly * * @access public * @return mixed the product on success, a PEAR_Error object otherwise * @see productN() */ function product() {/*{{{*/ if (!array_key_exists('product', $this->_calculatedValues)) { $product = $this->productN(1); if (PEAR::isError($product)) { return $product; } else { $this->_calculatedValues['product'] = $product; } } return $this->_calculatedValues['product']; }/*}}}*/ /** * Calculates PROD { (xi)^n }, which is the product of all observations * Handles cummulative data sets correctly * * @access public * @param numeric $n the exponent * @return mixed the product on success, a PEAR_Error object otherwise * @see product() */ function productN($n) {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } $prodN = 1.0; if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { foreach($this->_data as $val=>$freq) { if ($val == 0) { return 0.0; } $prodN *= $freq * pow((double)$val, (double)$n); } } else { foreach($this->_data as $val) { if ($val == 0) { return 0.0; } $prodN *= pow((double)$val, (double)$n); } } return $prodN; }/*}}}*/ /** * Calculates the number of data points in the set * Handles cummulative data sets correctly * * @access public * @return mixed the count on success, a PEAR_Error object otherwise * @see calc() */ function count() {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } if (!array_key_exists('count', $this->_calculatedValues)) { if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { $count = count($this->_dataExpanded); } else { $count = count($this->_data); } $this->_calculatedValues['count'] = $count; } return $this->_calculatedValues['count']; }/*}}}*/ /** * Calculates the mean (average) of the data points in the set * Handles cummulative data sets correctly * * @access public * @return mixed the mean value on success, a PEAR_Error object otherwise * @see calc() * @see sum() * @see count() */ function mean() {/*{{{*/ if (!array_key_exists('mean', $this->_calculatedValues)) { $sum = $this->sum(); if (PEAR::isError($sum)) { return $sum; } $count = $this->count(); if (PEAR::isError($count)) { return $count; } $this->_calculatedValues['mean'] = $sum / $count; } return $this->_calculatedValues['mean']; }/*}}}*/ /** * Calculates the range of the data set = max - min * * @access public * @return mixed the value of the range on success, a PEAR_Error object otherwise. */ function range() {/*{{{*/ if (!array_key_exists('range', $this->_calculatedValues)) { $min = $this->min(); if (PEAR::isError($min)) { return $min; } $max = $this->max(); if (PEAR::isError($max)) { return $max; } $this->_calculatedValues['range'] = $max - $min; } return $this->_calculatedValues['range']; }/*}}}*/ /** * Calculates the variance (unbiased) of the data points in the set * Handles cummulative data sets correctly * * @access public * @return mixed the variance value on success, a PEAR_Error object otherwise * @see calc() * @see __sumdiff() * @see count() */ function variance() {/*{{{*/ if (!array_key_exists('variance', $this->_calculatedValues)) { $variance = $this->__calcVariance(); if (PEAR::isError($variance)) { return $variance; } $this->_calculatedValues['variance'] = $variance; } return $this->_calculatedValues['variance']; }/*}}}*/ /** * Calculates the standard deviation (unbiased) of the data points in the set * Handles cummulative data sets correctly * * @access public * @return mixed the standard deviation on success, a PEAR_Error object otherwise * @see calc() * @see variance() */ function stDev() {/*{{{*/ if (!array_key_exists('stDev', $this->_calculatedValues)) { $variance = $this->variance(); if (PEAR::isError($variance)) { return $variance; } $this->_calculatedValues['stDev'] = sqrt($variance); } return $this->_calculatedValues['stDev']; }/*}}}*/ /** * Calculates the variance (unbiased) of the data points in the set * given a fixed mean (average) value. Not used in calcBasic(), calcFull() * or calc(). * Handles cummulative data sets correctly * * @access public * @param numeric $mean the fixed mean value * @return mixed the variance on success, a PEAR_Error object otherwise * @see __sumdiff() * @see count() * @see variance() */ function varianceWithMean($mean) {/*{{{*/ return $this->__calcVariance($mean); }/*}}}*/ /** * Calculates the standard deviation (unbiased) of the data points in the set * given a fixed mean (average) value. Not used in calcBasic(), calcFull() * or calc(). * Handles cummulative data sets correctly * * @access public * @param numeric $mean the fixed mean value * @return mixed the standard deviation on success, a PEAR_Error object otherwise * @see varianceWithMean() * @see stDev() */ function stDevWithMean($mean) {/*{{{*/ $varianceWM = $this->varianceWithMean($mean); if (PEAR::isError($varianceWM)) { return $varianceWM; } return sqrt($varianceWM); }/*}}}*/ /** * Calculates the absolute deviation of the data points in the set * Handles cummulative data sets correctly * * @access public * @return mixed the absolute deviation on success, a PEAR_Error object otherwise * @see calc() * @see __sumabsdev() * @see count() * @see absDevWithMean() */ function absDev() {/*{{{*/ if (!array_key_exists('absDev', $this->_calculatedValues)) { $absDev = $this->__calcAbsoluteDeviation(); if (PEAR::isError($absdev)) { return $absdev; } $this->_calculatedValues['absDev'] = $absDev; } return $this->_calculatedValues['absDev']; }/*}}}*/ /** * Calculates the absolute deviation of the data points in the set * given a fixed mean (average) value. Not used in calcBasic(), calcFull() * or calc(). * Handles cummulative data sets correctly * * @access public * @param numeric $mean the fixed mean value * @return mixed the absolute deviation on success, a PEAR_Error object otherwise * @see __sumabsdev() * @see absDev() */ function absDevWithMean($mean) {/*{{{*/ return $this->__calcAbsoluteDeviation($mean); }/*}}}*/ /** * Calculates the skewness of the data distribution in the set * The skewness measures the degree of asymmetry of a distribution, * and is related to the third central moment of a distribution. * A normal distribution has a skewness = 0 * A distribution with a tail off towards the high end of the scale * (positive skew) has a skewness > 0 * A distribution with a tail off towards the low end of the scale * (negative skew) has a skewness < 0 * Handles cummulative data sets correctly * * @access public * @return mixed the skewness value on success, a PEAR_Error object otherwise * @see __sumdiff() * @see count() * @see stDev() * @see calc() */ function skewness() {/*{{{*/ if (!array_key_exists('skewness', $this->_calculatedValues)) { $count = $this->count(); if (PEAR::isError($count)) { return $count; } $stDev = $this->stDev(); if (PEAR::isError($stDev)) { return $stDev; } $sumdiff3 = $this->__sumdiff(3); if (PEAR::isError($sumdiff3)) { return $sumdiff3; } $this->_calculatedValues['skewness'] = ($sumdiff3 / ($count * pow($stDev, 3))); } return $this->_calculatedValues['skewness']; }/*}}}*/ /** * Calculates the kurtosis of the data distribution in the set * The kurtosis measures the degrees of peakedness of a distribution. * It is also called the "excess" or "excess coefficient", and is * a normalized form of the fourth central moment of a distribution. * A normal distributions has kurtosis = 0 * A narrow and peaked (leptokurtic) distribution has a * kurtosis > 0 * A flat and wide (platykurtic) distribution has a kurtosis < 0 * Handles cummulative data sets correctly * * @access public * @return mixed the kurtosis value on success, a PEAR_Error object otherwise * @see __sumdiff() * @see count() * @see stDev() * @see calc() */ function kurtosis() {/*{{{*/ if (!array_key_exists('kurtosis', $this->_calculatedValues)) { $count = $this->count(); if (PEAR::isError($count)) { return $count; } $stDev = $this->stDev(); if (PEAR::isError($stDev)) { return $stDev; } $sumdiff4 = $this->__sumdiff(4); if (PEAR::isError($sumdiff4)) { return $sumdiff4; } $this->_calculatedValues['kurtosis'] = ($sumdiff4 / ($count * pow($stDev, 4))) - 3; } return $this->_calculatedValues['kurtosis']; }/*}}}*/ /** * Calculates the median of a data set. * The median is the value such that half of the points are below it * in a sorted data set. * If the number of values is odd, it is the middle item. * If the number of values is even, is the average of the two middle items. * Handles cummulative data sets correctly * * @access public * @return mixed the median value on success, a PEAR_Error object otherwise * @see count() * @see calc() */ function median() {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } if (!array_key_exists('median', $this->_calculatedValues)) { if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { $arr =& $this->_dataExpanded; } else { $arr =& $this->_data; } $n = $this->count(); if (PEAR::isError($n)) { return $n; } $h = intval($n / 2); if ($n % 2 == 0) { $median = ($arr[$h] + $arr[$h - 1]) / 2; } else { $median = $arr[$h + 1]; } $this->_calculatedValues['median'] = $median; } return $this->_calculatedValues['median']; }/*}}}*/ /** * Calculates the mode of a data set. * The mode is the value with the highest frequency in the data set. * There can be more than one mode. * Handles cummulative data sets correctly * * @access public * @return mixed an array of mode value on success, a PEAR_Error object otherwise * @see frequency() * @see calc() */ function mode() {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } if (!array_key_exists('mode', $this->_calculatedValues)) { if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { $arr = $this->_data; } else { $arr = $this->frequency(); } arsort($arr); $mcount = 1; foreach ($arr as $val=>$freq) { if ($mcount == 1) { $mode = array($val); $mfreq = $freq; ++$mcount; continue; } if ($mfreq == $freq) $mode[] = $val; if ($mfreq > $freq) break; } $this->_calculatedValues['mode'] = $mode; } return $this->_calculatedValues['mode']; }/*}}}*/ /** * Calculates the midrange of a data set. * The midrange is the average of the minimum and maximum of the data set. * Handles cummulative data sets correctly * * @access public * @return mixed the midrange value on success, a PEAR_Error object otherwise * @see min() * @see max() * @see calc() */ function midrange() {/*{{{*/ if (!array_key_exists('midrange', $this->_calculatedValues)) { $min = $this->min(); if (PEAR::isError($min)) { return $min; } $max = $this->max(); if (PEAR::isError($max)) { return $max; } $this->_calculatedValues['midrange'] = (($max + $min) / 2); } return $this->_calculatedValues['midrange']; }/*}}}*/ /** * Calculates the geometrical mean of the data points in the set * Handles cummulative data sets correctly * * @access public * @return mixed the geometrical mean value on success, a PEAR_Error object otherwise * @see calc() * @see product() * @see count() */ function geometricMean() {/*{{{*/ if (!array_key_exists('geometricMean', $this->_calculatedValues)) { $count = $this->count(); if (PEAR::isError($count)) { return $count; } $prod = $this->product(); if (PEAR::isError($prod)) { return $prod; } if ($prod == 0.0) { return 0.0; } if ($prod < 0) { return PEAR::raiseError('The product of the data set is negative, geometric mean undefined.'); } $this->_calculatedValues['geometricMean'] = pow($prod , 1 / $count); } return $this->_calculatedValues['geometricMean']; }/*}}}*/ /** * Calculates the harmonic mean of the data points in the set * Handles cummulative data sets correctly * * @access public * @return mixed the harmonic mean value on success, a PEAR_Error object otherwise * @see calc() * @see count() */ function harmonicMean() {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } if (!array_key_exists('harmonicMean', $this->_calculatedValues)) { $count = $this->count(); if (PEAR::isError($count)) { return $count; } $invsum = 0.0; if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { foreach($this->_data as $val=>$freq) { if ($val == 0) { return PEAR::raiseError('cannot calculate a '. 'harmonic mean with data values of zero.'); } $invsum += $freq / $val; } } else { foreach($this->_data as $val) { if ($val == 0) { return PEAR::raiseError('cannot calculate a '. 'harmonic mean with data values of zero.'); } $invsum += 1 / $val; } } $this->_calculatedValues['harmonicMean'] = $count / $invsum; } return $this->_calculatedValues['harmonicMean']; }/*}}}*/ /** * Calculates the nth central moment (m{n}) of a data set. * * The definition of a sample central moment is: * * m{n} = 1/N * SUM { (xi - avg)^n } * * where: N = sample size, avg = sample mean. * * @access public * @param integer $n moment to calculate * @return mixed the numeric value of the moment on success, PEAR_Error otherwise */ function sampleCentralMoment($n) {/*{{{*/ if (!is_int($n) || $n < 1) { return PEAR::isError('moment must be a positive integer >= 1.'); } if ($n == 1) { return 0; } $count = $this->count(); if (PEAR::isError($count)) { return $count; } if ($count == 0) { return PEAR::raiseError("Cannot calculate {$n}th sample moment, ". 'there are zero data entries'); } $sum = $this->__sumdiff($n); if (PEAR::isError($sum)) { return $sum; } return ($sum / $count); }/*}}}*/ /** * Calculates the nth raw moment (m{n}) of a data set. * * The definition of a sample central moment is: * * m{n} = 1/N * SUM { xi^n } * * where: N = sample size, avg = sample mean. * * @access public * @param integer $n moment to calculate * @return mixed the numeric value of the moment on success, PEAR_Error otherwise */ function sampleRawMoment($n) {/*{{{*/ if (!is_int($n) || $n < 1) { return PEAR::isError('moment must be a positive integer >= 1.'); } $count = $this->count(); if (PEAR::isError($count)) { return $count; } if ($count == 0) { return PEAR::raiseError("Cannot calculate {$n}th raw moment, ". 'there are zero data entries.'); } $sum = $this->sumN($n); if (PEAR::isError($sum)) { return $sum; } return ($sum / $count); }/*}}}*/ /** * Calculates the coefficient of variation of a data set. * The coefficient of variation measures the spread of a set of data * as a proportion of its mean. It is often expressed as a percentage. * Handles cummulative data sets correctly * * @access public * @return mixed the coefficient of variation on success, a PEAR_Error object otherwise * @see stDev() * @see mean() * @see calc() */ function coeffOfVariation() {/*{{{*/ if (!array_key_exists('coeffOfVariation', $this->_calculatedValues)) { $mean = $this->mean(); if (PEAR::isError($mean)) { return $mean; } if ($mean == 0.0) { return PEAR::raiseError('cannot calculate the coefficient '. 'of variation, mean of sample is zero'); } $stDev = $this->stDev(); if (PEAR::isError($stDev)) { return $stDev; } $this->_calculatedValues['coeffOfVariation'] = $stDev / $mean; } return $this->_calculatedValues['coeffOfVariation']; }/*}}}*/ /** * Calculates the standard error of the mean. * It is the standard deviation of the sampling distribution of * the mean. The formula is: * * S.E. Mean = SD / (N)^(1/2) * * This formula does not assume a normal distribution, and shows * that the size of the standard error of the mean is inversely * proportional to the square root of the sample size. * * @access public * @return mixed the standard error of the mean on success, a PEAR_Error object otherwise * @see stDev() * @see count() * @see calc() */ function stdErrorOfMean() {/*{{{*/ if (!array_key_exists('stdErrorOfMean', $this->_calculatedValues)) { $count = $this->count(); if (PEAR::isError($count)) { return $count; } $stDev = $this->stDev(); if (PEAR::isError($stDev)) { return $stDev; } $this->_calculatedValues['stdErrorOfMean'] = $stDev / sqrt($count); } return $this->_calculatedValues['stdErrorOfMean']; }/*}}}*/ /** * Calculates the value frequency table of a data set. * Handles cummulative data sets correctly * * @access public * @return mixed an associative array of value=>frequency items on success, a PEAR_Error object otherwise * @see min() * @see max() * @see calc() */ function frequency() {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } if (!array_key_exists('frequency', $this->_calculatedValues)) { if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { $freq = $this->_data; } else { $freq = array(); foreach ($this->_data as $val) { $freq["$val"]++; } ksort($freq); } $this->_calculatedValues['frequency'] = $freq; } return $this->_calculatedValues['frequency']; }/*}}}*/ /** * The quartiles are defined as the values that divide a sorted * data set into four equal-sized subsets, and correspond to the * 25th, 50th, and 75th percentiles. * * @access public * @return mixed an associative array of quartiles on success, a PEAR_Error otherwise * @see percentile() */ function quartiles() {/*{{{*/ if (!array_key_exists('quartiles', $this->_calculatedValues)) { $q1 = $this->percentile(25); if (PEAR::isError($q1)) { return $q1; } $q2 = $this->percentile(50); if (PEAR::isError($q2)) { return $q2; } $q3 = $this->percentile(75); if (PEAR::isError($q3)) { return $q3; } $this->_calculatedValues['quartiles'] = array ( '25' => $q1, '50' => $q2, '75' => $q3 ); } return $this->_calculatedValues['quartiles']; }/*}}}*/ /** * The interquartile mean is defined as the mean of the values left * after discarding the lower 25% and top 25% ranked values, i.e.: * * interquart mean = mean() * * where: P = percentile * * @todo need to double check the equation * @access public * @return mixed a numeric value on success, a PEAR_Error otherwise * @see quartiles() */ function interquartileMean() {/*{{{*/ if (!array_key_exists('interquartileMean', $this->_calculatedValues)) { $quart = $this->quartiles(); if (PEAR::isError($quart)) { return $quart; } $q3 = $quart['75']; $q1 = $quart['25']; $sum = 0; $n = 0; foreach ($this->getData(true) as $val) { if ($val >= $q1 && $val <= $q3) { $sum += $val; ++$n; } } if ($n == 0) { return PEAR::raiseError('error calculating interquartile mean, '. 'empty interquartile range of values.'); } $this->_calculatedValues['interquartileMean'] = $sum / $n; } return $this->_calculatedValues['interquartileMean']; }/*}}}*/ /** * The interquartile range is the distance between the 75th and 25th * percentiles. Basically the range of the middle 50% of the data set, * and thus is not affected by outliers or extreme values. * * interquart range = P(75) - P(25) * * where: P = percentile * * @access public * @return mixed a numeric value on success, a PEAR_Error otherwise * @see quartiles() */ function interquartileRange() {/*{{{*/ if (!array_key_exists('interquartileRange', $this->_calculatedValues)) { $quart = $this->quartiles(); if (PEAR::isError($quart)) { return $quart; } $q3 = $quart['75']; $q1 = $quart['25']; $this->_calculatedValues['interquartileRange'] = $q3 - $q1; } return $this->_calculatedValues['interquartileRange']; }/*}}}*/ /** * The quartile deviation is half of the interquartile range value * * quart dev = (P(75) - P(25)) / 2 * * where: P = percentile * * @access public * @return mixed a numeric value on success, a PEAR_Error otherwise * @see quartiles() * @see interquartileRange() */ function quartileDeviation() {/*{{{*/ if (!array_key_exists('quartileDeviation', $this->_calculatedValues)) { $iqr = $this->interquartileRange(); if (PEAR::isError($iqr)) { return $iqr; } $this->_calculatedValues['quartileDeviation'] = $iqr / 2; } return $this->_calculatedValues['quartileDeviation']; }/*}}}*/ /** * The quartile variation coefficient is defines as follows: * * quart var coeff = 100 * (P(75) - P(25)) / (P(75) + P(25)) * * where: P = percentile * * @todo need to double check the equation * @access public * @return mixed a numeric value on success, a PEAR_Error otherwise * @see quartiles() */ function quartileVariationCoefficient() {/*{{{*/ if (!array_key_exists('quartileVariationCoefficient', $this->_calculatedValues)) { $quart = $this->quartiles(); if (PEAR::isError($quart)) { return $quart; } $q3 = $quart['75']; $q1 = $quart['25']; $d = $q3 - $q1; $s = $q3 + $q1; $this->_calculatedValues['quartileVariationCoefficient'] = 100 * $d / $s; } return $this->_calculatedValues['quartileVariationCoefficient']; }/*}}}*/ /** * The quartile skewness coefficient (also known as Bowley Skewness), * is defined as follows: * * quart skewness coeff = (P(25) - 2*P(50) + P(75)) / (P(75) - P(25)) * * where: P = percentile * * @todo need to double check the equation * @access public * @return mixed a numeric value on success, a PEAR_Error otherwise * @see quartiles() */ function quartileSkewnessCoefficient() {/*{{{*/ if (!array_key_exists('quartileSkewnessCoefficient', $this->_calculatedValues)) { $quart = $this->quartiles(); if (PEAR::isError($quart)) { return $quart; } $q3 = $quart['75']; $q2 = $quart['50']; $q1 = $quart['25']; $d = $q3 - 2*$q2 + $q1; $s = $q3 - $q1; $this->_calculatedValues['quartileSkewnessCoefficient'] = $d / $s; } return $this->_calculatedValues['quartileSkewnessCoefficient']; }/*}}}*/ /** * The pth percentile is the value such that p% of the a sorted data set * is smaller than it, and (100 - p)% of the data is larger. * * A quick algorithm to pick the appropriate value from a sorted data * set is as follows: * * - Count the number of values: n * - Calculate the position of the value in the data list: i = p * (n + 1) * - if i is an integer, return the data at that position * - if i < 1, return the minimum of the data set * - if i > n, return the maximum of the data set * - otherwise, average the entries at adjacent positions to i * * The median is the 50th percentile value. * * @todo need to double check generality of the algorithm * * @access public * @param numeric $p the percentile to estimate, e.g. 25 for 25th percentile * @return mixed a numeric value on success, a PEAR_Error otherwise * @see quartiles() * @see median() */ function percentile($p) {/*{{{*/ $count = $this->count(); if (PEAR::isError($count)) { return $count; } if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { $data =& $this->_dataExpanded; } else { $data =& $this->_data; } $obsidx = $p * ($count + 1) / 100; if (intval($obsidx) == $obsidx) { return $data[($obsidx - 1)]; } elseif ($obsidx < 1) { return $data[0]; } elseif ($obsidx > $count) { return $data[($count - 1)]; } else { $left = floor($obsidx - 1); $right = ceil($obsidx - 1); return ($data[$left] + $data[$right]) / 2; } }/*}}}*/ // private methods /** * Utility function to calculate: SUM { (xi - mean)^n } * * @access private * @param numeric $power the exponent * @param optional double $mean the data set mean value * @return mixed the sum on success, a PEAR_Error object otherwise * * @see stDev() * @see variaceWithMean(); * @see skewness(); * @see kurtosis(); */ function __sumdiff($power, $mean=null) {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } if (is_null($mean)) { $mean = $this->mean(); if (PEAR::isError($mean)) { return $mean; } } $sdiff = 0; if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { foreach ($this->_data as $val=>$freq) { $sdiff += $freq * pow((double)($val - $mean), (double)$power); } } else { foreach ($this->_data as $val) $sdiff += pow((double)($val - $mean), (double)$power); } return $sdiff; }/*}}}*/ /** * Utility function to calculate the variance with or without * a fixed mean * * @access private * @param $mean the fixed mean to use, null as default * @return mixed a numeric value on success, a PEAR_Error otherwise * @see variance() * @see varianceWithMean() */ function __calcVariance($mean = null) {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } $sumdiff2 = $this->__sumdiff(2, $mean); if (PEAR::isError($sumdiff2)) { return $sumdiff2; } $count = $this->count(); if (PEAR::isError($count)) { return $count; } if ($count == 1) { return PEAR::raiseError('cannot calculate variance of a singe data point'); } return ($sumdiff2 / ($count - 1)); }/*}}}*/ /** * Utility function to calculate the absolute deviation with or without * a fixed mean * * @access private * @param $mean the fixed mean to use, null as default * @return mixed a numeric value on success, a PEAR_Error otherwise * @see absDev() * @see absDevWithMean() */ function __calcAbsoluteDeviation($mean = null) {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } $count = $this->count(); if (PEAR::isError($count)) { return $count; } $sumabsdev = $this->__sumabsdev($mean); if (PEAR::isError($sumabsdev)) { return $sumabsdev; } return $sumabsdev / $count; }/*}}}*/ /** * Utility function to calculate: SUM { | xi - mean | } * * @access private * @param optional double $mean the mean value for the set or population * @return mixed the sum on success, a PEAR_Error object otherwise * * @see absDev() * @see absDevWithMean() */ function __sumabsdev($mean=null) {/*{{{*/ if ($this->_data == null) { return PEAR::raiseError('data has not been set'); } if (is_null($mean)) { $mean = $this->mean(); } $sdev = 0; if ($this->_dataOption == STATS_DATA_CUMMULATIVE) { foreach ($this->_data as $val=>$freq) { $sdev += $freq * abs($val - $mean); } } else { foreach ($this->_data as $val) { $sdev += abs($val - $mean); } } return $sdev; }/*}}}*/ /** * Utility function to format a PEAR_Error to be used by calc(), * calcBasic() and calcFull() * * @access private * @param mixed $v value to be formatted * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default), * or only the error message will be returned (when false) * @return mixed if the value is a PEAR_Error object, and $useErrorObject * is false, then a string with the error message will be returned, * otherwise the value will not be modified and returned as passed. */ function __format($v, $useErrorObject=true) {/*{{{*/ if (PEAR::isError($v) && $useErrorObject == false) { return $v->getMessage(); } else { return $v; } }/*}}}*/ /** * Utility function to validate the data and modify it * according to the current null handling option * * @access private * @return mixed true on success, a PEAR_Error object otherwise * * @see setData() */ function _validate() {/*{{{*/ $flag = ($this->_dataOption == STATS_DATA_CUMMULATIVE); foreach ($this->_data as $key=>$value) { $d = ($flag) ? $key : $value; $v = ($flag) ? $value : $key; if (!is_numeric($d)) { switch ($this->_nullOption) { case STATS_IGNORE_NULL : unset($this->_data["$key"]); break; case STATS_USE_NULL_AS_ZERO: if ($flag) { unset($this->_data["$key"]); $this->_data[0] += $v; } else { $this->_data[$key] = 0; } break; case STATS_REJECT_NULL : default: return PEAR::raiseError('data rejected, contains NULL values'); break; } } } if ($flag) { ksort($this->_data); $this->_dataExpanded = array(); foreach ($this->_data as $val=>$freq) { $this->_dataExpanded = array_pad($this->_dataExpanded, count($this->_dataExpanded) + $freq, $val); } sort($this->_dataExpanded); } else { sort($this->_data); } return true; }/*}}}*/ }/*}}}*/ // vim: ts=4:sw=4:et: // vim6: fdl=1: fdm=marker: ?>