Code Coverage for Shared\CodePage (#1491)
While investigating something else in Shared, I noticed that CodePage had poor test coverage and a high complexity rating. This change addresses both; Scrutinizer would love it, although its interface on GitHub seems broken at the moment (all PRs show "Waiting for External Code Coverage").
This commit is contained in:
parent
8ca7bfe53c
commit
84e03da5c7
|
@ -6,6 +6,65 @@ use PhpOffice\PhpSpreadsheet\Exception as PhpSpreadsheetException;
|
||||||
|
|
||||||
class CodePage
|
class CodePage
|
||||||
{
|
{
|
||||||
|
private static $pageArray = [
|
||||||
|
0 => 'CP1252', // CodePage is not always correctly set when the xls file was saved by Apple's Numbers program
|
||||||
|
367 => 'ASCII', // ASCII
|
||||||
|
437 => 'CP437', // OEM US
|
||||||
|
//720 => 'notsupported', // OEM Arabic
|
||||||
|
737 => 'CP737', // OEM Greek
|
||||||
|
775 => 'CP775', // OEM Baltic
|
||||||
|
850 => 'CP850', // OEM Latin I
|
||||||
|
852 => 'CP852', // OEM Latin II (Central European)
|
||||||
|
855 => 'CP855', // OEM Cyrillic
|
||||||
|
857 => 'CP857', // OEM Turkish
|
||||||
|
858 => 'CP858', // OEM Multilingual Latin I with Euro
|
||||||
|
860 => 'CP860', // OEM Portugese
|
||||||
|
861 => 'CP861', // OEM Icelandic
|
||||||
|
862 => 'CP862', // OEM Hebrew
|
||||||
|
863 => 'CP863', // OEM Canadian (French)
|
||||||
|
864 => 'CP864', // OEM Arabic
|
||||||
|
865 => 'CP865', // OEM Nordic
|
||||||
|
866 => 'CP866', // OEM Cyrillic (Russian)
|
||||||
|
869 => 'CP869', // OEM Greek (Modern)
|
||||||
|
874 => 'CP874', // ANSI Thai
|
||||||
|
932 => 'CP932', // ANSI Japanese Shift-JIS
|
||||||
|
936 => 'CP936', // ANSI Chinese Simplified GBK
|
||||||
|
949 => 'CP949', // ANSI Korean (Wansung)
|
||||||
|
950 => 'CP950', // ANSI Chinese Traditional BIG5
|
||||||
|
1200 => 'UTF-16LE', // UTF-16 (BIFF8)
|
||||||
|
1250 => 'CP1250', // ANSI Latin II (Central European)
|
||||||
|
1251 => 'CP1251', // ANSI Cyrillic
|
||||||
|
1252 => 'CP1252', // ANSI Latin I (BIFF4-BIFF7)
|
||||||
|
1253 => 'CP1253', // ANSI Greek
|
||||||
|
1254 => 'CP1254', // ANSI Turkish
|
||||||
|
1255 => 'CP1255', // ANSI Hebrew
|
||||||
|
1256 => 'CP1256', // ANSI Arabic
|
||||||
|
1257 => 'CP1257', // ANSI Baltic
|
||||||
|
1258 => 'CP1258', // ANSI Vietnamese
|
||||||
|
1361 => 'CP1361', // ANSI Korean (Johab)
|
||||||
|
10000 => 'MAC', // Apple Roman
|
||||||
|
10001 => 'CP932', // Macintosh Japanese
|
||||||
|
10002 => 'CP950', // Macintosh Chinese Traditional
|
||||||
|
10003 => 'CP1361', // Macintosh Korean
|
||||||
|
10004 => 'MACARABIC', // Apple Arabic
|
||||||
|
10005 => 'MACHEBREW', // Apple Hebrew
|
||||||
|
10006 => 'MACGREEK', // Macintosh Greek
|
||||||
|
10007 => 'MACCYRILLIC', // Macintosh Cyrillic
|
||||||
|
10008 => 'CP936', // Macintosh - Simplified Chinese (GB 2312)
|
||||||
|
10010 => 'MACROMANIA', // Macintosh Romania
|
||||||
|
10017 => 'MACUKRAINE', // Macintosh Ukraine
|
||||||
|
10021 => 'MACTHAI', // Macintosh Thai
|
||||||
|
10029 => 'MACCENTRALEUROPE', // Macintosh Central Europe
|
||||||
|
10079 => 'MACICELAND', // Macintosh Icelandic
|
||||||
|
10081 => 'MACTURKISH', // Macintosh Turkish
|
||||||
|
10082 => 'MACCROATIAN', // Macintosh Croatian
|
||||||
|
21010 => 'UTF-16LE', // UTF-16 (BIFF8) This isn't correct, but some Excel writer libraries erroneously use Codepage 21010 for UTF-16LE
|
||||||
|
32768 => 'MAC', // Apple Roman
|
||||||
|
//32769 => 'unsupported', // ANSI Latin I (BIFF2-BIFF3)
|
||||||
|
65000 => 'UTF-7', // Unicode (UTF-7)
|
||||||
|
65001 => 'UTF-8', // Unicode (UTF-8)
|
||||||
|
];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert Microsoft Code Page Identifier to Code Page Name which iconv
|
* Convert Microsoft Code Page Identifier to Code Page Name which iconv
|
||||||
* and mbstring understands.
|
* and mbstring understands.
|
||||||
|
@ -14,123 +73,20 @@ class CodePage
|
||||||
*
|
*
|
||||||
* @return string Code Page Name
|
* @return string Code Page Name
|
||||||
*/
|
*/
|
||||||
public static function numberToName($codePage)
|
public static function numberToName(int $codePage): string
|
||||||
{
|
{
|
||||||
switch ($codePage) {
|
if (array_key_exists($codePage, self::$pageArray)) {
|
||||||
case 367:
|
return self::$pageArray[$codePage];
|
||||||
return 'ASCII'; // ASCII
|
}
|
||||||
case 437:
|
if ($codePage == 720 || $codePage == 32769) {
|
||||||
return 'CP437'; // OEM US
|
throw new PhpSpreadsheetException("Code page $codePage not supported."); // OEM Arabic
|
||||||
case 720:
|
|
||||||
throw new PhpSpreadsheetException('Code page 720 not supported.'); // OEM Arabic
|
|
||||||
case 737:
|
|
||||||
return 'CP737'; // OEM Greek
|
|
||||||
case 775:
|
|
||||||
return 'CP775'; // OEM Baltic
|
|
||||||
case 850:
|
|
||||||
return 'CP850'; // OEM Latin I
|
|
||||||
case 852:
|
|
||||||
return 'CP852'; // OEM Latin II (Central European)
|
|
||||||
case 855:
|
|
||||||
return 'CP855'; // OEM Cyrillic
|
|
||||||
case 857:
|
|
||||||
return 'CP857'; // OEM Turkish
|
|
||||||
case 858:
|
|
||||||
return 'CP858'; // OEM Multilingual Latin I with Euro
|
|
||||||
case 860:
|
|
||||||
return 'CP860'; // OEM Portugese
|
|
||||||
case 861:
|
|
||||||
return 'CP861'; // OEM Icelandic
|
|
||||||
case 862:
|
|
||||||
return 'CP862'; // OEM Hebrew
|
|
||||||
case 863:
|
|
||||||
return 'CP863'; // OEM Canadian (French)
|
|
||||||
case 864:
|
|
||||||
return 'CP864'; // OEM Arabic
|
|
||||||
case 865:
|
|
||||||
return 'CP865'; // OEM Nordic
|
|
||||||
case 866:
|
|
||||||
return 'CP866'; // OEM Cyrillic (Russian)
|
|
||||||
case 869:
|
|
||||||
return 'CP869'; // OEM Greek (Modern)
|
|
||||||
case 874:
|
|
||||||
return 'CP874'; // ANSI Thai
|
|
||||||
case 932:
|
|
||||||
return 'CP932'; // ANSI Japanese Shift-JIS
|
|
||||||
case 936:
|
|
||||||
return 'CP936'; // ANSI Chinese Simplified GBK
|
|
||||||
case 949:
|
|
||||||
return 'CP949'; // ANSI Korean (Wansung)
|
|
||||||
case 950:
|
|
||||||
return 'CP950'; // ANSI Chinese Traditional BIG5
|
|
||||||
case 1200:
|
|
||||||
return 'UTF-16LE'; // UTF-16 (BIFF8)
|
|
||||||
case 1250:
|
|
||||||
return 'CP1250'; // ANSI Latin II (Central European)
|
|
||||||
case 1251:
|
|
||||||
return 'CP1251'; // ANSI Cyrillic
|
|
||||||
case 0:
|
|
||||||
// CodePage is not always correctly set when the xls file was saved by Apple's Numbers program
|
|
||||||
case 1252:
|
|
||||||
return 'CP1252'; // ANSI Latin I (BIFF4-BIFF7)
|
|
||||||
case 1253:
|
|
||||||
return 'CP1253'; // ANSI Greek
|
|
||||||
case 1254:
|
|
||||||
return 'CP1254'; // ANSI Turkish
|
|
||||||
case 1255:
|
|
||||||
return 'CP1255'; // ANSI Hebrew
|
|
||||||
case 1256:
|
|
||||||
return 'CP1256'; // ANSI Arabic
|
|
||||||
case 1257:
|
|
||||||
return 'CP1257'; // ANSI Baltic
|
|
||||||
case 1258:
|
|
||||||
return 'CP1258'; // ANSI Vietnamese
|
|
||||||
case 1361:
|
|
||||||
return 'CP1361'; // ANSI Korean (Johab)
|
|
||||||
case 10000:
|
|
||||||
return 'MAC'; // Apple Roman
|
|
||||||
case 10001:
|
|
||||||
return 'CP932'; // Macintosh Japanese
|
|
||||||
case 10002:
|
|
||||||
return 'CP950'; // Macintosh Chinese Traditional
|
|
||||||
case 10003:
|
|
||||||
return 'CP1361'; // Macintosh Korean
|
|
||||||
case 10004:
|
|
||||||
return 'MACARABIC'; // Apple Arabic
|
|
||||||
case 10005:
|
|
||||||
return 'MACHEBREW'; // Apple Hebrew
|
|
||||||
case 10006:
|
|
||||||
return 'MACGREEK'; // Macintosh Greek
|
|
||||||
case 10007:
|
|
||||||
return 'MACCYRILLIC'; // Macintosh Cyrillic
|
|
||||||
case 10008:
|
|
||||||
return 'CP936'; // Macintosh - Simplified Chinese (GB 2312)
|
|
||||||
case 10010:
|
|
||||||
return 'MACROMANIA'; // Macintosh Romania
|
|
||||||
case 10017:
|
|
||||||
return 'MACUKRAINE'; // Macintosh Ukraine
|
|
||||||
case 10021:
|
|
||||||
return 'MACTHAI'; // Macintosh Thai
|
|
||||||
case 10029:
|
|
||||||
return 'MACCENTRALEUROPE'; // Macintosh Central Europe
|
|
||||||
case 10079:
|
|
||||||
return 'MACICELAND'; // Macintosh Icelandic
|
|
||||||
case 10081:
|
|
||||||
return 'MACTURKISH'; // Macintosh Turkish
|
|
||||||
case 10082:
|
|
||||||
return 'MACCROATIAN'; // Macintosh Croatian
|
|
||||||
case 21010:
|
|
||||||
return 'UTF-16LE'; // UTF-16 (BIFF8) This isn't correct, but some Excel writer libraries erroneously use Codepage 21010 for UTF-16LE
|
|
||||||
case 32768:
|
|
||||||
return 'MAC'; // Apple Roman
|
|
||||||
case 32769:
|
|
||||||
throw new PhpSpreadsheetException('Code page 32769 not supported.'); // ANSI Latin I (BIFF2-BIFF3)
|
|
||||||
case 65000:
|
|
||||||
return 'UTF-7'; // Unicode (UTF-7)
|
|
||||||
case 65001:
|
|
||||||
return 'UTF-8'; // Unicode (UTF-8)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new PhpSpreadsheetException('Unknown codepage: ' . $codePage);
|
throw new PhpSpreadsheetException('Unknown codepage: ' . $codePage);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static function getEncodings(): array
|
||||||
|
{
|
||||||
|
return self::$pageArray;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,22 @@ class CodePageTest extends TestCase
|
||||||
return require 'tests/data/Shared/CodePage.php';
|
return require 'tests/data/Shared/CodePage.php';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testCoverage(): void
|
||||||
|
{
|
||||||
|
$covered = [];
|
||||||
|
$expected = CodePage::getEncodings();
|
||||||
|
foreach ($expected as $key => $val) {
|
||||||
|
$covered[$key] = 0;
|
||||||
|
}
|
||||||
|
$tests = $this->providerCodePage();
|
||||||
|
foreach ($tests as $test) {
|
||||||
|
$covered[$test[1]] = 1;
|
||||||
|
}
|
||||||
|
foreach ($covered as $key => $val) {
|
||||||
|
self::assertEquals(1, $val, "Codepage $key not tested");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public function testNumberToNameWithInvalidCodePage(): void
|
public function testNumberToNameWithInvalidCodePage(): void
|
||||||
{
|
{
|
||||||
$invalidCodePage = 12345;
|
$invalidCodePage = 12345;
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
return [
|
return [
|
||||||
|
// ANSI Latin I (BIFF4-BIFF7)
|
||||||
|
[
|
||||||
|
'CP1252',
|
||||||
|
0,
|
||||||
|
],
|
||||||
// ASCII
|
// ASCII
|
||||||
[
|
[
|
||||||
'ASCII',
|
'ASCII',
|
||||||
|
@ -127,11 +132,6 @@ return [
|
||||||
1251,
|
1251,
|
||||||
],
|
],
|
||||||
// ANSI Latin I (BIFF4-BIFF7)
|
// ANSI Latin I (BIFF4-BIFF7)
|
||||||
[
|
|
||||||
'CP1252',
|
|
||||||
0,
|
|
||||||
],
|
|
||||||
// ANSI Latin I (BIFF4-BIFF7)
|
|
||||||
[
|
[
|
||||||
'CP1252',
|
'CP1252',
|
||||||
1252,
|
1252,
|
||||||
|
@ -176,6 +176,31 @@ return [
|
||||||
'MAC',
|
'MAC',
|
||||||
10000,
|
10000,
|
||||||
],
|
],
|
||||||
|
// Macintosh Japanese
|
||||||
|
[
|
||||||
|
'CP932',
|
||||||
|
10001,
|
||||||
|
],
|
||||||
|
// Macintosh Chinese Traditional
|
||||||
|
[
|
||||||
|
'CP950',
|
||||||
|
10002,
|
||||||
|
],
|
||||||
|
// Macintosh Korean
|
||||||
|
[
|
||||||
|
'CP1361',
|
||||||
|
10003,
|
||||||
|
],
|
||||||
|
// Apple Arabic
|
||||||
|
[
|
||||||
|
'MACARABIC',
|
||||||
|
10004,
|
||||||
|
],
|
||||||
|
// Apple Hebrew
|
||||||
|
[
|
||||||
|
'MACHEBREW',
|
||||||
|
10005,
|
||||||
|
],
|
||||||
// Macintosh Greek
|
// Macintosh Greek
|
||||||
[
|
[
|
||||||
'MACGREEK',
|
'MACGREEK',
|
||||||
|
@ -186,6 +211,26 @@ return [
|
||||||
'MACCYRILLIC',
|
'MACCYRILLIC',
|
||||||
10007,
|
10007,
|
||||||
],
|
],
|
||||||
|
// Macintosh - Simplified Chinese (GB 2312)
|
||||||
|
[
|
||||||
|
'CP936',
|
||||||
|
10008,
|
||||||
|
],
|
||||||
|
// Macintosh Romania
|
||||||
|
[
|
||||||
|
'MACROMANIA',
|
||||||
|
10010,
|
||||||
|
],
|
||||||
|
// Macintosh Ukraine
|
||||||
|
[
|
||||||
|
'MACUKRAINE',
|
||||||
|
10017,
|
||||||
|
],
|
||||||
|
// Macintosh Thai
|
||||||
|
[
|
||||||
|
'MACTHAI',
|
||||||
|
10021,
|
||||||
|
],
|
||||||
// Macintosh Central Europe
|
// Macintosh Central Europe
|
||||||
[
|
[
|
||||||
'MACCENTRALEUROPE',
|
'MACCENTRALEUROPE',
|
||||||
|
@ -201,6 +246,16 @@ return [
|
||||||
'MACTURKISH',
|
'MACTURKISH',
|
||||||
10081,
|
10081,
|
||||||
],
|
],
|
||||||
|
// Macintosh Croatian
|
||||||
|
[
|
||||||
|
'MACCROATIAN',
|
||||||
|
10082,
|
||||||
|
],
|
||||||
|
// UTF-16 (BIFF8) grandfathers erroneous libraries
|
||||||
|
[
|
||||||
|
'UTF-16LE',
|
||||||
|
21010,
|
||||||
|
],
|
||||||
// Apple Roman
|
// Apple Roman
|
||||||
[
|
[
|
||||||
'MAC',
|
'MAC',
|
||||||
|
|
Loading…
Reference in New Issue