diff --git a/CHANGELOG.md b/CHANGELOG.md index 610615d5..b97431c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). - Add support for IFS() logical function [#1442](https://github.com/PHPOffice/PhpSpreadsheet/pull/1442) - Add Cell Address Helper to provide conversions between the R1C1 and A1 address formats [#1558](https://github.com/PHPOffice/PhpSpreadsheet/pull/1558) - Add ability to edit Html/Pdf before saving [#1499](https://github.com/PHPOffice/PhpSpreadsheet/pull/1499) +- Add ability to set codepage explicitly for BIFF5 [#1018](https://github.com/PHPOffice/PhpSpreadsheet/issues/1018) ### Fixed diff --git a/src/PhpSpreadsheet/Reader/Xls.php b/src/PhpSpreadsheet/Reader/Xls.php index c7c5f77f..11a6195c 100644 --- a/src/PhpSpreadsheet/Reader/Xls.php +++ b/src/PhpSpreadsheet/Reader/Xls.php @@ -439,6 +439,15 @@ class Xls extends BaseReader } } + public function setCodepage(string $codepage): void + { + if (!CodePage::validate($codepage)) { + throw new PhpSpreadsheetException('Unknown codepage: ' . $codepage); + } + + $this->codepage = $codepage; + } + /** * Reads names of the worksheets from a file, without parsing the whole file to a PhpSpreadsheet object. * @@ -640,7 +649,7 @@ class Xls extends BaseReader // initialize $this->pos = 0; - $this->codepage = 'CP1252'; + $this->codepage = $this->codepage ?: CodePage::DEFAULT_CODE_PAGE; $this->formats = []; $this->objFonts = []; $this->palette = []; diff --git a/src/PhpSpreadsheet/Shared/CodePage.php b/src/PhpSpreadsheet/Shared/CodePage.php index 97cbfbbe..1d5d8933 100644 --- a/src/PhpSpreadsheet/Shared/CodePage.php +++ b/src/PhpSpreadsheet/Shared/CodePage.php @@ -6,6 +6,8 @@ use PhpOffice\PhpSpreadsheet\Exception as PhpSpreadsheetException; class CodePage { + public const DEFAULT_CODE_PAGE = 'CP1252'; + private static $pageArray = [ 0 => 'CP1252', // CodePage is not always correctly set when the xls file was saved by Apple's Numbers program 367 => 'ASCII', // ASCII @@ -65,6 +67,11 @@ class CodePage 65001 => 'UTF-8', // Unicode (UTF-8) ]; + public static function validate(string $codePage): bool + { + return in_array($codePage, self::$pageArray, true); + } + /** * Convert Microsoft Code Page Identifier to Code Page Name which iconv * and mbstring understands.