MATCH function fix

- fix boolean search
- add support for excel expressions `*?~`

Fixes #1116
Closes #1122
This commit is contained in:
Rolands Usāns 2019-08-12 06:11:36 +03:00 committed by Adrien Crivelli
parent 2166458de3
commit 9df68f12e2
3 changed files with 207 additions and 25 deletions

View File

@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
- Add MAXIFS, MINIFS, COUNTIFS and Remove MINIF, MAXIF - [Issue #1056](https://github.com/PHPOffice/PhpSpreadsheet/issues/1056) - Add MAXIFS, MINIFS, COUNTIFS and Remove MINIF, MAXIF - [Issue #1056](https://github.com/PHPOffice/PhpSpreadsheet/issues/1056)
- HLookup needs an ordered list even if range_lookup is set to false [Issue #1055](https://github.com/PHPOffice/PhpSpreadsheet/issues/1055) and [PR #1076](https://github.com/PHPOffice/PhpSpreadsheet/pull/1076) - HLookup needs an ordered list even if range_lookup is set to false [Issue #1055](https://github.com/PHPOffice/PhpSpreadsheet/issues/1055) and [PR #1076](https://github.com/PHPOffice/PhpSpreadsheet/pull/1076)
- Improve performance of IF function calls via ranch pruning to avoid resolution of every branches [#844](https://github.com/PHPOffice/PhpSpreadsheet/pull/844) - Improve performance of IF function calls via ranch pruning to avoid resolution of every branches [#844](https://github.com/PHPOffice/PhpSpreadsheet/pull/844)
- MATCH function supports `*?~` Excel functionality, when match_type=0 - [Issue #1116](https://github.com/PHPOffice/PhpSpreadsheet/issues/1116)
### Fixed ### Fixed
@ -26,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
- Cover `getSheetByName()` with tests for name with quote and spaces - [#739](https://github.com/PHPOffice/PhpSpreadsheet/issues/739) - Cover `getSheetByName()` with tests for name with quote and spaces - [#739](https://github.com/PHPOffice/PhpSpreadsheet/issues/739)
- Best effort to support invalid colspan values in HTML reader - [878](https://github.com/PHPOffice/PhpSpreadsheet/pull/878) - Best effort to support invalid colspan values in HTML reader - [878](https://github.com/PHPOffice/PhpSpreadsheet/pull/878)
- Fixes incorrect rows deletion [#868](https://github.com/PHPOffice/PhpSpreadsheet/issues/868) - Fixes incorrect rows deletion [#868](https://github.com/PHPOffice/PhpSpreadsheet/issues/868)
- MATCH function fix (value search by type, stop search when match_type=-1 and unordered element encountered) - [Issue #1116](https://github.com/PHPOffice/PhpSpreadsheet/issues/1116)
## [1.8.2] - 2019-07-08 ## [1.8.2] - 2019-07-08

View File

@ -464,9 +464,10 @@ class LookupRef
* *
* @param mixed $lookupValue The value that you want to match in lookup_array * @param mixed $lookupValue The value that you want to match in lookup_array
* @param mixed $lookupArray The range of cells being searched * @param mixed $lookupArray The range of cells being searched
* @param mixed $matchType The number -1, 0, or 1. -1 means above, 0 means exact match, 1 means below. If match_type is 1 or -1, the list has to be ordered. * @param mixed $matchType The number -1, 0, or 1. -1 means above, 0 means exact match, 1 means below.
* If match_type is 1 or -1, the list has to be ordered.
* *
* @return int The relative position of the found item * @return int|string The relative position of the found item
*/ */
public static function MATCH($lookupValue, $lookupArray, $matchType = 1) public static function MATCH($lookupValue, $lookupArray, $matchType = 1)
{ {
@ -474,9 +475,10 @@ class LookupRef
$lookupValue = Functions::flattenSingleValue($lookupValue); $lookupValue = Functions::flattenSingleValue($lookupValue);
$matchType = ($matchType === null) ? 1 : (int) Functions::flattenSingleValue($matchType); $matchType = ($matchType === null) ? 1 : (int) Functions::flattenSingleValue($matchType);
$initialLookupValue = $lookupValue; // MATCH is not case sensitive, so we convert lookup value to be lower cased in case it's string type.
// MATCH is not case sensitive if (is_string($lookupValue)) {
$lookupValue = StringHelper::strToLower($lookupValue); $lookupValue = StringHelper::strToLower($lookupValue);
}
// Lookup_value type has to be number, text, or logical values // Lookup_value type has to be number, text, or logical values
if ((!is_numeric($lookupValue)) && (!is_string($lookupValue)) && (!is_bool($lookupValue))) { if ((!is_numeric($lookupValue)) && (!is_string($lookupValue)) && (!is_bool($lookupValue))) {
@ -522,16 +524,54 @@ class LookupRef
// find the match // find the match
// ** // **
if ($matchType == 0 || $matchType == 1) { if ($matchType === 0 || $matchType === 1) {
foreach ($lookupArray as $i => $lookupArrayValue) { foreach ($lookupArray as $i => $lookupArrayValue) {
$onlyNumeric = is_numeric($lookupArrayValue) && is_numeric($lookupValue); $typeMatch = gettype($lookupValue) === gettype($lookupArrayValue);
$onlyNumericExactMatch = $onlyNumeric && $lookupArrayValue == $lookupValue; $exactTypeMatch = $typeMatch && $lookupArrayValue === $lookupValue;
$nonOnlyNumericExactMatch = !$onlyNumeric && $lookupArrayValue === $lookupValue; $nonOnlyNumericExactMatch = !$typeMatch && $lookupArrayValue === $lookupValue;
$exactMatch = $onlyNumericExactMatch || $nonOnlyNumericExactMatch; $exactMatch = $exactTypeMatch || $nonOnlyNumericExactMatch;
if (($matchType == 0) && $exactMatch) {
// exact match if ($matchType === 0) {
return $i + 1; if ($typeMatch && is_string($lookupValue) && (bool) preg_match('/([\?\*])/', $lookupValue)) {
} elseif (($matchType == 1) && ($lookupArrayValue <= $lookupValue)) { $splitString = $lookupValue;
$chars = array_map(function ($i) use ($splitString) {
return mb_substr($splitString, $i, 1);
}, range(0, mb_strlen($splitString) - 1));
$length = count($chars);
$pattern = '/^';
for ($j = 0; $j < $length; ++$j) {
if ($chars[$j] === '~') {
if (isset($chars[$j + 1])) {
if ($chars[$j + 1] === '*') {
$pattern .= preg_quote($chars[$j + 1], '/');
++$j;
} elseif ($chars[$j + 1] === '?') {
$pattern .= preg_quote($chars[$j + 1], '/');
++$j;
}
} else {
$pattern .= preg_quote($chars[$j], '/');
}
} elseif ($chars[$j] === '*') {
$pattern .= '.*';
} elseif ($chars[$j] === '?') {
$pattern .= '.{1}';
} else {
$pattern .= preg_quote($chars[$j], '/');
}
}
$pattern .= '$/';
if ((bool) preg_match($pattern, $lookupArrayValue)) {
// exact match
return $i + 1;
}
} elseif ($exactMatch) {
// exact match
return $i + 1;
}
} elseif (($matchType === 1) && $typeMatch && ($lookupArrayValue <= $lookupValue)) {
$i = array_search($i, $keySet); $i = array_search($i, $keySet);
// The current value is the (first) match // The current value is the (first) match
@ -539,26 +579,26 @@ class LookupRef
} }
} }
} else { } else {
// matchType = -1
// "Special" case: since the array it's supposed to be ordered in descending order, the
// Excel algorithm gives up immediately if the first element is smaller than the searched value
if ($lookupArray[0] < $lookupValue) {
return Functions::NA();
}
$maxValueKey = null; $maxValueKey = null;
// The basic algorithm is: // The basic algorithm is:
// Iterate and keep the highest match until the next element is smaller than the searched value. // Iterate and keep the highest match until the next element is smaller than the searched value.
// Return immediately if perfect match is found // Return immediately if perfect match is found
foreach ($lookupArray as $i => $lookupArrayValue) { foreach ($lookupArray as $i => $lookupArrayValue) {
if ($lookupArrayValue == $lookupValue) { $typeMatch = gettype($lookupValue) === gettype($lookupArrayValue);
$exactTypeMatch = $typeMatch && $lookupArrayValue === $lookupValue;
$nonOnlyNumericExactMatch = !$typeMatch && $lookupArrayValue === $lookupValue;
$exactMatch = $exactTypeMatch || $nonOnlyNumericExactMatch;
if ($exactMatch) {
// Another "special" case. If a perfect match is found, // Another "special" case. If a perfect match is found,
// the algorithm gives up immediately // the algorithm gives up immediately
return $i + 1; return $i + 1;
} elseif ($lookupArrayValue >= $lookupValue) { } elseif ($typeMatch & $lookupArrayValue >= $lookupValue) {
$maxValueKey = $i + 1; $maxValueKey = $i + 1;
} elseif ($typeMatch & $lookupArrayValue < $lookupValue) {
//Excel algorithm gives up immediately if the first element is smaller than the searched value
break;
} }
} }

View File

@ -104,5 +104,145 @@ return [
[[0], [0], ['x'], ['x'], ['x']], [[0], [0], ['x'], ['x'], ['x']],
0 0
], ],
[
2,
'a',
[false, 'a',1],
-1
],
[
'#N/A', // Expected
0,
['x', true, false],
-1
],
[
'#N/A', // Expected
true,
['a', 'b', 'c'],
-1
],
[
'#N/A', // Expected
true,
[0,1,2],
-1
],
[
'#N/A', // Expected
true,
[0,1,2],
0
],
[
'#N/A', // Expected
true,
[0,1,2],
1
],
[
1, // Expected
true,
[true,true,true],
-1
],
[
1, // Expected
true,
[true,true,true],
0
],
[
3, // Expected
true,
[true,true,true],
1
],
// lookup stops when value < searched one
[
5, // Expected
6,
[true, false, 'a', 'z', 222222, 2, 99999999],
-1
],
// if element of same data type met and it is < than searched one #N/A - no further processing
[
'#N/A', // Expected
6,
[true, false, 'a', 'z', 2, 888 ],
-1
],
[
'#N/A', // Expected
6,
['6'],
-1
],
// expression match
[
2, // Expected
'a?b',
['a', 'abb', 'axc'],
0
],
[
1, // Expected
'a*',
['aAAAAAAA', 'as', 'az'],
0
],
[
3, // Expected
'1*11*1',
['abc', 'efh', '1a11b1'],
0
],
[
3, // Expected
'1*11*1',
['abc', 'efh', '1a11b1'],
0
],
[
2, // Expected
'a*~*c',
['aAAAAA', 'a123456*c', 'az'],
0
],
[
3, // Expected
'a*123*b',
['aAAAAA', 'a123456*c', 'a99999123b'],
0
],
[
1, // Expected
'*',
['aAAAAA', 'a111123456*c', 'qq'],
0
],
[
2, // Expected
'?',
['aAAAAA', 'a', 'a99999123b'],
0
],
[
'#N/A', // Expected
'?',
[1, 22,333],
0
],
[
3, // Expected
'???',
[1, 22,'aaa'],
0
],
[
3, // Expected
'*',
[1, 22,'aaa'],
0
],
]; ];