Statistics functions extended tests (#1099)

* Merge branch 'master' of C:\Projects\PHPOffice\PHPSpreadsheet\develop with conflicts.

* Further statistical tests

* Additional BINOMDIST tests
Extract boolean and string checking for Average and Count functions into separate methods

* Appease the great god PHPCS

* Yet more appeasement of the great god PHPCS

* Beginning to get really cheesed off with PHPCS, pulling me up over full stops in comments... I want to see this completed before going to bed; but it's nearly half past one in the morning, and phpcs has been pulling me up over trivialities for the past f***ing hour

* And a spurious line

* Further work on statistical tests
This commit is contained in:
Mark Baker 2019-07-21 13:07:18 +02:00 committed by GitHub
parent 554684720d
commit ba2b74c222
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 249 additions and 71 deletions

View File

@ -2713,7 +2713,7 @@ class Engineering
* @param string $fromUOM the units for value
* @param string $toUOM the units for the result
*
* @return float
* @return float|string
*/
public static function CONVERTUOM($value, $fromUOM, $toUOM)
{

View File

@ -518,6 +518,44 @@ class Statistical
return Functions::NULL();
}
/**
* MS Excel does not count Booleans if passed as cell values, but they are counted if passed as literals.
* OpenOffice Calc always counts Booleans.
* Gnumeric never counts Booleans.
*
* @param mixed $arg
* @param mixed $k
*
* @return int|mixed
*/
private static function testAcceptedBoolean($arg, $k)
{
if ((is_bool($arg)) &&
((!Functions::isCellValue($k) && (Functions::getCompatibilityMode() === Functions::COMPATIBILITY_EXCEL)) ||
(Functions::getCompatibilityMode() === Functions::COMPATIBILITY_OPENOFFICE))) {
$arg = (int) $arg;
}
return $arg;
}
/**
* @param mixed $arg
* @param mixed $k
*
* @return bool
*/
private static function isAcceptedCountable($arg, $k)
{
if (((is_numeric($arg)) && (!is_string($arg))) ||
((is_numeric($arg)) && (!Functions::isCellValue($k)) &&
(Functions::getCompatibilityMode() !== Functions::COMPATIBILITY_GNUMERIC))) {
return true;
}
return false;
}
/**
* AVEDEV.
*
@ -538,38 +576,38 @@ class Statistical
$aArgs = Functions::flattenArrayIndexed($args);
// Return value
$returnValue = null;
$returnValue = 0;
$aMean = self::AVERAGE(...$args);
if ($aMean === Functions::DIV0()) {
return Functions::NAN();
} elseif ($aMean === Functions::VALUE()) {
return Functions::VALUE();
}
$aMean = self::AVERAGE($aArgs);
if ($aMean != Functions::DIV0()) {
$aCount = 0;
foreach ($aArgs as $k => $arg) {
if ((is_bool($arg)) &&
((!Functions::isCellValue($k)) || (Functions::getCompatibilityMode() == Functions::COMPATIBILITY_OPENOFFICE))) {
$arg = (int) $arg;
}
$arg = self::testAcceptedBoolean($arg, $k);
// Is it a numeric value?
if ((is_numeric($arg)) && (!is_string($arg))) {
if ($returnValue === null) {
$returnValue = abs($arg - $aMean);
} else {
$returnValue += abs($arg - $aMean);
// Strings containing numeric values are only counted if they are string literals (not cell values)
// and then only in MS Excel and in Open Office, not in Gnumeric
if ((is_string($arg)) && (!is_numeric($arg)) && (!Functions::isCellValue($k))) {
return Functions::VALUE();
}
if (self::isAcceptedCountable($arg, $k)) {
$returnValue += abs($arg - $aMean);
++$aCount;
}
}
// Return
if ($aCount == 0) {
if ($aCount === 0) {
return Functions::DIV0();
}
return $returnValue / $aCount;
}
return Functions::NAN();
}
/**
* AVERAGE.
*
@ -590,12 +628,14 @@ class Statistical
// Loop through arguments
foreach (Functions::flattenArrayIndexed($args) as $k => $arg) {
if ((is_bool($arg)) &&
((!Functions::isCellValue($k)) || (Functions::getCompatibilityMode() == Functions::COMPATIBILITY_OPENOFFICE))) {
$arg = (int) $arg;
}
$arg = self::testAcceptedBoolean($arg, $k);
// Is it a numeric value?
if ((is_numeric($arg)) && (!is_string($arg))) {
// Strings containing numeric values are only counted if they are string literals (not cell values)
// and then only in MS Excel and in Open Office, not in Gnumeric
if ((is_string($arg)) && (!is_numeric($arg)) && (!Functions::isCellValue($k))) {
return Functions::VALUE();
}
if (self::isAcceptedCountable($arg, $k)) {
$returnValue += $arg;
++$aCount;
}
@ -814,16 +854,16 @@ class Statistical
* @param bool $cumulative
*
* @return float|string
*
* @todo Cumulative distribution function
*/
public static function BINOMDIST($value, $trials, $probability, $cumulative)
{
$value = floor(Functions::flattenSingleValue($value));
$trials = floor(Functions::flattenSingleValue($trials));
$value = Functions::flattenSingleValue($value);
$trials = Functions::flattenSingleValue($trials);
$probability = Functions::flattenSingleValue($probability);
if ((is_numeric($value)) && (is_numeric($trials)) && (is_numeric($probability))) {
$value = floor($value);
$trials = floor($trials);
if (($value < 0) || ($value > $trials)) {
return Functions::NAN();
}
@ -860,9 +900,10 @@ class Statistical
public static function CHIDIST($value, $degrees)
{
$value = Functions::flattenSingleValue($value);
$degrees = floor(Functions::flattenSingleValue($degrees));
$degrees = Functions::flattenSingleValue($degrees);
if ((is_numeric($value)) && (is_numeric($degrees))) {
$degrees = floor($degrees);
if ($degrees < 1) {
return Functions::NAN();
}
@ -893,9 +934,11 @@ class Statistical
public static function CHIINV($probability, $degrees)
{
$probability = Functions::flattenSingleValue($probability);
$degrees = floor(Functions::flattenSingleValue($degrees));
$degrees = Functions::flattenSingleValue($degrees);
if ((is_numeric($probability)) && (is_numeric($degrees))) {
$degrees = floor($degrees);
$xLo = 100;
$xHi = 0;
@ -905,7 +948,7 @@ class Statistical
while ((abs($dx) > Functions::PRECISION) && ($i++ < self::MAX_ITERATIONS)) {
// Apply Newton-Raphson step
$result = self::CHIDIST($x, $degrees);
$result = 1 - (self::incompleteGamma($degrees / 2, $x / 2) / self::gamma($degrees / 2));
$error = $result - $probability;
if ($error == 0.0) {
$dx = 0;
@ -953,9 +996,10 @@ class Statistical
{
$alpha = Functions::flattenSingleValue($alpha);
$stdDev = Functions::flattenSingleValue($stdDev);
$size = floor(Functions::flattenSingleValue($size));
$size = Functions::flattenSingleValue($size);
if ((is_numeric($alpha)) && (is_numeric($stdDev)) && (is_numeric($size))) {
$size = floor($size);
if (($alpha <= 0) || ($alpha >= 1)) {
return Functions::NAN();
}
@ -1022,20 +1066,11 @@ class Statistical
// Loop through arguments
$aArgs = Functions::flattenArrayIndexed($args);
foreach ($aArgs as $k => $arg) {
// MS Excel does not count Booleans if passed as cell values, but they are counted if passed as literals
// OpenOffice Calc always counts Booleans
// Gnumeric never counts Booleans
if ((is_bool($arg)) &&
((!Functions::isCellValue($k) && (Functions::getCompatibilityMode() === Functions::COMPATIBILITY_EXCEL)) ||
(Functions::getCompatibilityMode() === Functions::COMPATIBILITY_OPENOFFICE))) {
$arg = (int) $arg;
}
$arg = self::testAcceptedBoolean($arg, $k);
// Is it a numeric value?
// Strings containing numeric values are only counted if they are string literals (not cell values)
// and then only in MS Excel and in Open Office, not in Gnumeric
if (((is_numeric($arg)) && (!is_string($arg))) ||
((is_numeric($arg)) && (!Functions::isCellValue($k)) &&
(Functions::getCompatibilityMode() !== Functions::COMPATIBILITY_GNUMERIC))) {
if (self::isAcceptedCountable($arg, $k)) {
++$returnValue;
}
}
@ -1225,7 +1260,7 @@ class Statistical
* @param mixed $yValues array of mixed Data Series Y
* @param mixed $xValues array of mixed Data Series X
*
* @return float
* @return float|string
*/
public static function COVAR($yValues, $xValues)
{
@ -1258,7 +1293,7 @@ class Statistical
* @param float $probability probability of a success on each trial
* @param float $alpha criterion value
*
* @return int
* @return int|string
*
* @todo Warning. This implementation differs from the algorithm detailed on the MS
* web site in that $CumPGuessMinus1 = $CumPGuess - 1 rather than $CumPGuess - $PGuess
@ -1273,7 +1308,6 @@ class Statistical
if ((is_numeric($trials)) && (is_numeric($probability)) && (is_numeric($alpha))) {
$trials = (int) $trials;
$trialsApprox = $trials;
if ($trials < 0) {
return Functions::NAN();
} elseif (($probability < 0.0) || ($probability > 1.0)) {
@ -1437,7 +1471,7 @@ class Statistical
* @param float $lambda The parameter value
* @param bool $cumulative
*
* @return float
* @return float|string
*/
public static function EXPONDIST($value, $lambda, $cumulative)
{
@ -1470,7 +1504,7 @@ class Statistical
*
* @param float $value
*
* @return float
* @return float|string
*/
public static function FISHER($value)
{
@ -1496,7 +1530,7 @@ class Statistical
*
* @param float $value
*
* @return float
* @return float|string
*/
public static function FISHERINV($value)
{
@ -1552,7 +1586,7 @@ class Statistical
* @param float $b Parameter to the distribution
* @param bool $cumulative
*
* @return float
* @return float|string
*/
public static function GAMMADIST($value, $a, $b, $cumulative)
{
@ -1585,7 +1619,7 @@ class Statistical
* @param float $alpha Parameter to the distribution
* @param float $beta Parameter to the distribution
*
* @return float
* @return float|string
*/
public static function GAMMAINV($probability, $alpha, $beta)
{
@ -1646,7 +1680,7 @@ class Statistical
*
* @param float $value
*
* @return float
* @return float|string
*/
public static function GAMMALN($value)
{
@ -1677,7 +1711,7 @@ class Statistical
*
* @param mixed ...$args Data values
*
* @return float
* @return float|string
*/
public static function GEOMEAN(...$args)
{
@ -1739,7 +1773,7 @@ class Statistical
*
* @param mixed ...$args Data values
*
* @return float
* @return float|string
*/
public static function HARMEAN(...$args)
{
@ -1786,7 +1820,7 @@ class Statistical
* @param float $populationSuccesses Number of successes in the population
* @param float $populationNumber Population size
*
* @return float
* @return float|string
*/
public static function HYPGEOMDIST($sampleSuccesses, $sampleNumber, $populationSuccesses, $populationNumber)
{
@ -1822,7 +1856,7 @@ class Statistical
* @param mixed[] $yValues Data Series Y
* @param mixed[] $xValues Data Series X
*
* @return float
* @return float|string
*/
public static function INTERCEPT($yValues, $xValues)
{
@ -2754,7 +2788,7 @@ class Statistical
* @param int $numObjs Number of different objects
* @param int $numInSet Number of objects in each permutation
*
* @return int Number of permutations
* @return int|string Number of permutations
*/
public static function PERMUT($numObjs, $numInSet)
{
@ -2891,7 +2925,7 @@ class Statistical
* @param mixed[] $yValues Data Series Y
* @param mixed[] $xValues Data Series X
*
* @return float
* @return float|string
*/
public static function RSQ($yValues, $xValues)
{
@ -2959,7 +2993,7 @@ class Statistical
* @param mixed[] $yValues Data Series Y
* @param mixed[] $xValues Data Series X
*
* @return float
* @return float|string
*/
public static function SLOPE($yValues, $xValues)
{
@ -3261,7 +3295,7 @@ class Statistical
* @param mixed[] $yValues Data Series Y
* @param mixed[] $xValues Data Series X
*
* @return float
* @return float|string
*/
public static function STEYX($yValues, $xValues)
{

View File

@ -17,4 +17,29 @@ return [
61.504,
[10.5, 7.2, 200, 5.4, 8.1],
],
[
1.825,
[
// The index simulates a cell value
// Numbers and Booleans are both counted
'0.1.A' => 1,
'0.2.A' => '2',
'0.3.A' => 3.4,
'0.4.A' => true,
'0.5.A' => 5,
'0.6.A' => null,
'0.7.A' => 6.7,
'0.8.A' => 'STRING',
'0.9.A' => ''
],
],
[
1.85,
[1, '2', 3.4, true, 5, 6.7],
],
[
// When non-numeric strings are passed directly, then a #VALUE! error is raised
'#VALUE!',
[1, '2', 3.4, true, 5, null, 6.7, 'STRING', ''],
],
];

View File

@ -25,4 +25,29 @@ return [
46.24,
[10.5, 7.2, 200, 5.4, 8.1],
],
[
4.025,
[
// The index simulates a cell value
// Numbers and Booleans are both counted
'0.1.A' => 1,
'0.2.A' => '2',
'0.3.A' => 3.4,
'0.4.A' => true,
'0.5.A' => 5,
'0.6.A' => null,
'0.7.A' => 6.7,
'0.8.A' => 'STRING',
'0.9.A' => ''
],
],
[
3.183333333333,
[1, '2', 3.4, true, 5, 6.7],
],
[
// When non-numeric strings are passed directly, then a #VALUE! error is raised
'#VALUE!',
[1, '2', 3.4, true, 5, null, 6.7, 'STRING', ''],
],
];

View File

@ -1,6 +1,14 @@
<?php
return [
[
0.4059136,
0.4, 4, 5,
],
[
0.99596045887,
3, 5, 10, 1, 4,
],
[
0.960370937542,
3, 7.5, 9, 1, 4,

View File

@ -1,6 +1,10 @@
<?php
return [
[
1.862243320728,
0.52, 3, 4, 1, 3,
],
[
2.164759759129,
0.3, 7.5, 9, 1, 4,
@ -13,4 +17,8 @@ return [
2.0,
0.685470581055, 8, 10, 1, 3,
],
[
0.303225844664,
0.2, 4, 5, 0, 1,
],
];

View File

@ -5,6 +5,10 @@ return [
0.706399436132,
3, 8, 0.35, true,
],
[
0.2785857790625,
3, 8, 0.35, false,
],
[
0.538748204875,
50, 125, 0.4, true,
@ -17,4 +21,28 @@ return [
0.205078125,
6, 10, 0.5, false,
],
[
1.36554E-17,
10, 100, 0.5, false,
],
[
0.079589237387,
50, 100, 0.5, false,
],
[
0.0008638556657,
65, 100, 0.5, false,
],
[
1.53165E-17,
10, 100, 0.5, true,
],
[
0.539794618694,
50, 100, 0.5, true,
],
[
0.999105034804,
65, 100, 0.5, true,
],
];

View File

@ -13,4 +13,24 @@ return [
0.050000589092,
18.307, 10,
],
[
0.479500122187,
0.5, 1,
],
[
0.113846298007,
2.5, 1,
],
[
0.778800783071,
0.5, 2,
],
[
0.918891411655,
0.5, 3,
],
[
0.046011705689,
8, 3,
],
];

View File

@ -13,4 +13,24 @@ return [
18.30697345702,
0.050001, 10,
],
[
0.45493642312,
0.5, 1,
],
[
0.101531044268,
0.75, 1,
],
[
4.605170185988,
0.1, 2,
],
[
0.446287102628,
0.8, 2,
],
[
4.108344935632,
0.25, 3,
],
];

View File

@ -6,4 +6,9 @@ return [
[3, 2, 4, 5, 6],
[9, 7, 12, 15, 17],
],
[
0.870035103816,
[2, 10, 7, 17, 14, 16, 8, 12, 11, 15, 18, 3, 4, 1, 6, 5, 13, 19, 20, 9],
[22.9, 45.78, 33.49, 49.77, 40.94, 36.18, 21.05, 50.57, 31.32, 53.76, 55.66, 27.61, 11.15, 10.11, 37.9, 31.08, 45.48, 63.83, 63.6, 27.01],
],
];

View File

@ -10,5 +10,10 @@ return [
1.25,
[[1, 2], [3, 4]],
[[5, 6], [7, 8]],
]
],
[
16.633125,
[2, 7, 8, 3, 4, 1, 6, 5],
[22.9, 33.49, 34.5, 27.61, 19.5, 10.11, 37.9, 31.08],
],
];

View File

@ -43,6 +43,6 @@ return [
],
[
10068347520,
49,6,
49, 6,
],
];