From ba705448464ec38f3527cd3ddc1ace20609b40b3 Mon Sep 17 00:00:00 2001 From: Adrien Crivelli Date: Thu, 25 Aug 2016 11:14:03 +0900 Subject: [PATCH] Fix crash when reading HTML files --- composer.json | 1 + src/PhpSpreadsheet/Helper/HTML.php | 17 +++++++++++------ src/PhpSpreadsheet/Reader/HTML.php | 26 ++++++++++++++++---------- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/composer.json b/composer.json index ca41778b..41864bfd 100644 --- a/composer.json +++ b/composer.json @@ -37,6 +37,7 @@ "suggest": { "ext-zip": "*", "ext-gd": "*", + "ext-dom": "Option to read and write HTML files", "mpdf/mpdf": "Option for rendering PDF with PDF Writer", "dompdf/dompdf": "Option for rendering PDF with PDF Writer", "tecnick.com/tcpdf": "Option for rendering PDF with PDF Writer", diff --git a/src/PhpSpreadsheet/Helper/HTML.php b/src/PhpSpreadsheet/Helper/HTML.php index 0baf0491..0e3de1d4 100644 --- a/src/PhpSpreadsheet/Helper/HTML.php +++ b/src/PhpSpreadsheet/Helper/HTML.php @@ -2,6 +2,11 @@ namespace PhpSpreadsheet\Helper; +use DOMDocument; +use DOMElement; +use DOMNode; +use DOMText; + /** * Copyright (c) 2006 - 2016 PhpSpreadsheet * @@ -612,7 +617,7 @@ class HTML $this->initialise(); // Create a new DOM object - $dom = new \DOMDocument(); + $dom = new DOMDocument(); // Load the HTML file into the DOM object // Note the use of error suppression, because typically this will be an html fragment, so not fully valid markup $loaded = @$dom->loadHTML($html); @@ -786,7 +791,7 @@ class HTML $this->stringData .= "\n"; } - protected function parseTextNode(\DOMText $textNode) + protected function parseTextNode(DOMText $textNode) { $domText = preg_replace( '/\s+/u', @@ -807,7 +812,7 @@ class HTML } } - protected function parseElementNode(\DOMElement $element) + protected function parseElementNode(DOMElement $element) { $callbackTag = strtolower($element->nodeName); $this->stack[] = $callbackTag; @@ -820,12 +825,12 @@ class HTML $this->handleCallback($element, $callbackTag, $this->endTagCallbacks); } - protected function parseElements(\DOMNode $element) + protected function parseElements(DOMNode $element) { foreach ($element->childNodes as $child) { - if ($child instanceof \DOMText) { + if ($child instanceof DOMText) { $this->parseTextNode($child); - } elseif ($child instanceof \DOMElement) { + } elseif ($child instanceof DOMElement) { $this->parseElementNode($child); } } diff --git a/src/PhpSpreadsheet/Reader/HTML.php b/src/PhpSpreadsheet/Reader/HTML.php index 2f85b977..668af2b5 100644 --- a/src/PhpSpreadsheet/Reader/HTML.php +++ b/src/PhpSpreadsheet/Reader/HTML.php @@ -2,6 +2,12 @@ namespace PhpSpreadsheet\Reader; +use DOMDocument; +use DOMElement; +use DOMNode; +use DOMText; +use PhpSpreadsheet\Spreadsheet; + /** * Copyright (c) 2006 - 2016 PhpSpreadsheet * @@ -131,16 +137,16 @@ class HTML extends BaseReader implements IReader } /** - * Loads PhpSpreadsheet from file + * Loads Spreadsheet from file * * @param string $pFilename * @throws Exception - * @return PhpSpreadsheet + * @return Spreadsheet */ public function load($pFilename) { - // Create new PhpSpreadsheet - $spreadsheet = new PhpSpreadsheet(); + // Create new Spreadsheet + $spreadsheet = new Spreadsheet(); // Load into this instance return $this->loadIntoExisting($pFilename, $spreadsheet); @@ -168,7 +174,7 @@ class HTML extends BaseReader implements IReader return $this->inputEncoding; } - // Data Array used for testing only, should write to PhpSpreadsheet object on completion of tests + // Data Array used for testing only, should write to Spreadsheet object on completion of tests protected $dataArray = []; protected $tableLevel = 0; protected $nestedColumn = ['A']; @@ -458,11 +464,11 @@ class HTML extends BaseReader implements IReader * Loads PhpSpreadsheet from file into PhpSpreadsheet instance * * @param string $pFilename - * @param \PhpSpreadsheet\Spreadsheet $spreadsheet + * @param Spreadsheet $spreadsheet * @throws Exception - * @return \PhpSpreadsheet\Spreadsheet + * @return Spreadsheet */ - public function loadIntoExisting($pFilename, \PhpSpreadsheet\Spreadsheet $spreadsheet) + public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) { // Open file to validate $this->openFile($pFilename); @@ -473,14 +479,14 @@ class HTML extends BaseReader implements IReader // Close after validating fclose($this->fileHandle); - // Create new PhpSpreadsheet + // Create new sheet while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { $spreadsheet->createSheet(); } $spreadsheet->setActiveSheetIndex($this->sheetIndex); // Create a new DOM object - $dom = new domDocument(); + $dom = new DOMDocument(); // Reload the HTML file into the DOM object $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanFile($pFilename), 'HTML-ENTITIES', 'UTF-8')); if ($loaded === false) {