add ability to set codepage explicitly for BIFF5 (#1484)

If BIFF5 excel 95 file doesn't have codepage record, the default codepage CP1252 is used and can't be change.
That causes to problems with decoding cyrillic text.
This commit is contained in:
Pavel Alazankin 2020-06-28 17:39:38 +03:00 committed by GitHub
parent 93fbf8a938
commit 6caa0cb4f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 18 additions and 1 deletions

View File

@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
- Add support for IFS() logical function [#1442](https://github.com/PHPOffice/PhpSpreadsheet/pull/1442)
- Add Cell Address Helper to provide conversions between the R1C1 and A1 address formats [#1558](https://github.com/PHPOffice/PhpSpreadsheet/pull/1558)
- Add ability to edit Html/Pdf before saving [#1499](https://github.com/PHPOffice/PhpSpreadsheet/pull/1499)
- Add ability to set codepage explicitly for BIFF5 [#1018](https://github.com/PHPOffice/PhpSpreadsheet/issues/1018)
### Fixed

View File

@ -439,6 +439,15 @@ class Xls extends BaseReader
}
}
public function setCodepage(string $codepage): void
{
if (!CodePage::validate($codepage)) {
throw new PhpSpreadsheetException('Unknown codepage: ' . $codepage);
}
$this->codepage = $codepage;
}
/**
* Reads names of the worksheets from a file, without parsing the whole file to a PhpSpreadsheet object.
*
@ -640,7 +649,7 @@ class Xls extends BaseReader
// initialize
$this->pos = 0;
$this->codepage = 'CP1252';
$this->codepage = $this->codepage ?: CodePage::DEFAULT_CODE_PAGE;
$this->formats = [];
$this->objFonts = [];
$this->palette = [];

View File

@ -6,6 +6,8 @@ use PhpOffice\PhpSpreadsheet\Exception as PhpSpreadsheetException;
class CodePage
{
public const DEFAULT_CODE_PAGE = 'CP1252';
private static $pageArray = [
0 => 'CP1252', // CodePage is not always correctly set when the xls file was saved by Apple's Numbers program
367 => 'ASCII', // ASCII
@ -65,6 +67,11 @@ class CodePage
65001 => 'UTF-8', // Unicode (UTF-8)
];
public static function validate(string $codePage): bool
{
return in_array($codePage, self::$pageArray, true);
}
/**
* Convert Microsoft Code Page Identifier to Code Page Name which iconv
* and mbstring understands.