From f968a95ca5d6b9cb9a2165c5d02369a0d8e5bf55 Mon Sep 17 00:00:00 2001 From: MarkBaker Date: Wed, 15 Jul 2015 23:23:20 +0100 Subject: [PATCH] Bugfix: Work Item GH-554 - Whitespace after toRichTextObject() --- Examples/42richText.php | 60 ++++++++++++++++++------------ changelog.txt | 4 +- src/PhpSpreadsheet/Helper/HTML.php | 39 ++++++++++++++----- 3 files changed, 68 insertions(+), 35 deletions(-) diff --git a/Examples/42richText.php b/Examples/42richText.php index b8f994e6..acd09f2b 100644 --- a/Examples/42richText.php +++ b/Examples/42richText.php @@ -34,28 +34,28 @@ date_default_timezone_set('Europe/London'); define('EOL',(PHP_SAPI == 'cli') ? PHP_EOL : '
'); /** Include PHPExcel */ -require_once dirname(__FILE__) . '/../Classes/PHPExcel.php'; +require_once dirname(__FILE__) . '/../src/Bootstrap.php'; // Create new PHPExcel object echo date('H:i:s') , " Create new PHPExcel object" , EOL; -$objPHPExcel = new PHPExcel(); +$objPHPExcel = new \PHPExcel\Spreadsheet(); // Set document properties echo date('H:i:s') , " Set document properties" , EOL; $objPHPExcel->getProperties()->setCreator("Maarten Balliauw") - ->setLastModifiedBy("Maarten Balliauw") - ->setTitle("PHPExcel Test Document") - ->setSubject("PHPExcel Test Document") - ->setDescription("Test document for PHPExcel, generated using PHP classes.") - ->setKeywords("office PHPExcel php") - ->setCategory("Test result file"); + ->setLastModifiedBy("Maarten Balliauw") + ->setTitle("PHPExcel Test Document") + ->setSubject("PHPExcel Test Document") + ->setDescription("Test document for PHPExcel, generated using PHP classes.") + ->setKeywords("office PHPExcel php") + ->setCategory("Test result file"); // Add some data echo date('H:i:s') , " Add some data" , EOL; -$html1=' +$html1 = '

My very first example of rich text
generated from html markup

@@ -64,11 +64,11 @@ while this block uses an underline.

-I want to eat healthy foodpizza. +I want to eat healthy food pizza. '; -$html2='

+$html2 = '

100°C is a hot temperature @@ -78,43 +78,55 @@ $html2='

'; -$html3='23 equals 8'; +$html3 = '23 equals 8'; -$html4='H2SO4 is the chemical formula for Sulphuric acid'; +$html4 = 'H2SO4 is the chemical formula for Sulphuric acid'; +$html5 = 'bold, italic, bold+italic'; -$wizard = new PHPExcel_Helper_HTML; +$wizard = new \PHPExcel\Helper\HTML; $richText = $wizard->toRichTextObject($html1); -$objPHPExcel->setActiveSheetIndex(0) +$objPHPExcel->getActiveSheet() ->setCellValue('A1', $richText); -$objPHPExcel->getActiveSheet()->getColumnDimension('A')->setWidth(48); -$objPHPExcel->getActiveSheet()->getRowDimension(1)->setRowHeight(-1); +$objPHPExcel->getActiveSheet() + ->getColumnDimension('A') + ->setWidth(48); +$objPHPExcel->getActiveSheet() + ->getRowDimension(1) + ->setRowHeight(-1); $objPHPExcel->getActiveSheet()->getStyle('A1') ->getAlignment() ->setWrapText(true); $richText = $wizard->toRichTextObject($html2); -$objPHPExcel->setActiveSheetIndex(0) +$objPHPExcel->getActiveSheet() ->setCellValue('A2', $richText); -$objPHPExcel->getActiveSheet()->getRowDimension(1)->setRowHeight(-1); -$objPHPExcel->getActiveSheet()->getStyle('A2') +$objPHPExcel->getActiveSheet() + ->getRowDimension(1) + ->setRowHeight(-1); +$objPHPExcel->getActiveSheet() + ->getStyle('A2') ->getAlignment() ->setWrapText(true); $objPHPExcel->setActiveSheetIndex(0) - ->setCellValue('A3', $wizard->toRichTextObject($html3)); + ->setCellValue('A3', $wizard->toRichTextObject($html3)); $objPHPExcel->setActiveSheetIndex(0) ->setCellValue('A4', $wizard->toRichTextObject($html4)); +$objPHPExcel->setActiveSheetIndex(0) + ->setCellValue('A5', $wizard->toRichTextObject($html5)); + // Rename worksheet echo date('H:i:s') , " Rename worksheet" , EOL; -$objPHPExcel->getActiveSheet()->setTitle('Simple'); +$objPHPExcel->getActiveSheet() + ->setTitle('Rich Text Examples'); // Set active sheet index to the first sheet, so Excel opens this as the first sheet @@ -125,7 +137,7 @@ $objPHPExcel->setActiveSheetIndex(0); echo date('H:i:s') , " Write to Excel2007 format" , EOL; $callStartTime = microtime(true); -$objWriter = PHPExcel_IOFactory::createWriter($objPHPExcel, 'Excel2007'); +$objWriter = \PHPExcel\IOFactory::createWriter($objPHPExcel, 'Excel2007'); $objWriter->save(str_replace('.php', '.xlsx', __FILE__)); $callEndTime = microtime(true); $callTime = $callEndTime - $callStartTime; @@ -140,7 +152,7 @@ echo date('H:i:s') , ' Current memory usage: ' , (memory_get_usage(true) / 1024 echo date('H:i:s') , " Write to Excel5 format" , EOL; $callStartTime = microtime(true); -$objWriter = PHPExcel_IOFactory::createWriter($objPHPExcel, 'Excel5'); +$objWriter = \PHPExcel\IOFactory::createWriter($objPHPExcel, 'Excel5'); $objWriter->save(str_replace('.php', '.xls', __FILE__)); $callEndTime = microtime(true); $callTime = $callEndTime - $callStartTime; diff --git a/changelog.txt b/changelog.txt index 2f455c2d..14dae427 100644 --- a/changelog.txt +++ b/changelog.txt @@ -23,12 +23,12 @@ ************************************************************************************** -Planned for 1.8.2 +Planned for 1.9 - Bugfix: (MBaker) - Fix to getCell() method when cell reference includes a worksheet reference - Bugfix: (ncrypthic) Work Item GH-570 - Ignore inlineStr type if formula element exists +- Bugfix: (MBaker) Work Item GH-554 - Whitespace after toRichTextObject() - General: (umpirsky) Work Item GH-548 - Optimize vlookup() sort - 2015-04-30 (v1.8.1): - Bugfix: (goncons) Work Item GH-397 - Fix for Writing an Open Document cell with non-numeric formula - Bugfix: (sarciszewski) Work Item GH-329 - Avoid potential divide by zero in basedrawing diff --git a/src/PhpSpreadsheet/Helper/HTML.php b/src/PhpSpreadsheet/Helper/HTML.php index 0826d10b..703bf20b 100644 --- a/src/PhpSpreadsheet/Helper/HTML.php +++ b/src/PhpSpreadsheet/Helper/HTML.php @@ -615,7 +615,7 @@ class HTML $this->initialise(); // Create a new DOM object - $dom = new domDocument; + $dom = new \DOMDocument; // Load the HTML file into the DOM object // Note the use of error suppression, because typically this will be an html fragment, so not fully valid markup $loaded = @$dom->loadHTML($html); @@ -625,9 +625,27 @@ class HTML $this->richTextObject = new \PHPExcel\RichText(); $this->parseElements($dom); + + // Clean any further spurious whitespace + $this->cleanWhitespace(); + return $this->richTextObject; } + protected function cleanWhitespace() + { + foreach($this->richTextObject->getRichTextElements() as $key => $element) { + $text = $element->getText(); + // Trim any leading spaces on the first run + if ($key == 0) { + $text = ltrim($text); + } + // Trim any spaces immediately after a line break + $text = preg_replace('/\n */mu', "\n", $text); + $element->setText($text); + } + } + protected function buildTextRun() { $text = $this->stringData; @@ -767,12 +785,16 @@ class HTML protected function breakTag() { - $this->stringData .= PHP_EOL; + $this->stringData .= "\n"; } - protected function parseTextNode(DOMText $textNode) + protected function parseTextNode(\DOMText $textNode) { - $domText = preg_replace('/\s+/u', ' ', ltrim($textNode->nodeValue)); + $domText = preg_replace( + '/\s+/u', + ' ', + str_replace(["\r", "\n"], ' ', $textNode->nodeValue) + ); $this->stringData .= $domText; $this->buildTextRun(); } @@ -787,7 +809,7 @@ class HTML } } - protected function parseElementNode(DOMElement $element) + protected function parseElementNode(\DOMElement $element) { $callbackTag = strtolower($element->nodeName); $this->stack[] = $callbackTag; @@ -795,18 +817,17 @@ class HTML $this->handleCallback($element, $callbackTag, $this->startTagCallbacks); $this->parseElements($element); - $this->stringData .= ' '; array_pop($this->stack); $this->handleCallback($element, $callbackTag, $this->endTagCallbacks); } - protected function parseElements(DOMNode $element) + protected function parseElements(\DOMNode $element) { foreach ($element->childNodes as $child) { - if ($child instanceof DOMText) { + if ($child instanceof \DOMText) { $this->parseTextNode($child); - } elseif ($child instanceof DOMElement) { + } elseif ($child instanceof \DOMElement) { $this->parseElementNode($child); } }