diff --git a/CHANGELOG.md b/CHANGELOG.md
index 20d26f9e..3891842e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
- HLookup needs an ordered list even if range_lookup is set to false [Issue #1055](https://github.com/PHPOffice/PhpSpreadsheet/issues/1055) and [PR #1076](https://github.com/PHPOffice/PhpSpreadsheet/pull/1076)
- Improve performance of IF function calls via ranch pruning to avoid resolution of every branches [#844](https://github.com/PHPOffice/PhpSpreadsheet/pull/844)
- MATCH function supports `*?~` Excel functionality, when match_type=0 - [Issue #1116](https://github.com/PHPOffice/PhpSpreadsheet/issues/1116)
+- Allow HTML Reader to accept HTML as a string [Issue #1136](https://github.com/PHPOffice/PhpSpreadsheet/pull/1136)
### Fixed
diff --git a/docs/topics/reading-and-writing-to-file.md b/docs/topics/reading-and-writing-to-file.md
index 0b27f8c1..b26cc6a9 100644
--- a/docs/topics/reading-and-writing-to-file.md
+++ b/docs/topics/reading-and-writing-to-file.md
@@ -875,3 +875,31 @@ $writer->save('write.xls');
```
Notice that it is ok to load an xlsx file and generate an xls file.
+
+## Generating Excel files from HTML content
+
+If you are generating an Excel file from pre-rendered HTML content you can do so
+automatically using the HTML Reader. This is most useful when you are generating
+Excel files from web application content that would be downloaded/sent to a user.
+
+For example:
+
+```php
+$htmlString = '
+
+ Hello World |
+
+
+ Hello World |
+
+
+ Hello World |
+
+
';
+
+$reader = new \PhpOffice\PhpSpreadsheet\Reader\Html();
+$spreadsheet = $reader->loadFromString($htmlString);
+
+$writer = \PhpOffice\PhpSpreadsheet\IOFactory::createWriter($spreadsheet, 'Xls');
+$writer->save('write.xls');
+```
diff --git a/src/PhpSpreadsheet/Reader/Html.php b/src/PhpSpreadsheet/Reader/Html.php
index ff2c909e..bf9c6038 100644
--- a/src/PhpSpreadsheet/Reader/Html.php
+++ b/src/PhpSpreadsheet/Reader/Html.php
@@ -592,28 +592,64 @@ class Html extends BaseReader
throw new Exception($pFilename . ' is an Invalid HTML file.');
}
- // Create new sheet
- while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
- $spreadsheet->createSheet();
- }
- $spreadsheet->setActiveSheetIndex($this->sheetIndex);
-
- // Create a new DOM object
+ // Create a new DOM object
$dom = new DOMDocument();
- // Reload the HTML file into the DOM object
+ // Reload the HTML file into the DOM object
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
if ($loaded === false) {
throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document');
}
- // Discard white space
- $dom->preserveWhiteSpace = false;
+ return $this->loadDocument($dom, $spreadsheet);
+ }
+
+ /**
+ * Spreadsheet from content.
+ *
+ * @param string $content
+ *
+ * @throws Exception
+ *
+ * @return Spreadsheet
+ */
+ public function loadFromString($content): Spreadsheet
+ {
+ // Create a new DOM object
+ $dom = new DOMDocument();
+ // Reload the HTML file into the DOM object
+ $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scan($content), 'HTML-ENTITIES', 'UTF-8'));
+ if ($loaded === false) {
+ throw new Exception('Failed to load content as a DOM Document');
+ }
+
+ return $this->loadDocument($dom, new Spreadsheet());
+ }
+
+ /**
+ * Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance.
+ *
+ * @param DOMDocument $document
+ * @param Spreadsheet $spreadsheet
+ *
+ * @throws \PhpOffice\PhpSpreadsheet\Exception
+ *
+ * @return Spreadsheet
+ */
+ private function loadDocument(DOMDocument $document, Spreadsheet $spreadsheet): Spreadsheet
+ {
+ while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
+ $spreadsheet->createSheet();
+ }
+ $spreadsheet->setActiveSheetIndex($this->sheetIndex);
+
+ // Discard white space
+ $document->preserveWhiteSpace = false;
$row = 0;
$column = 'A';
$content = '';
$this->rowspan = [];
- $this->processDomElement($dom, $spreadsheet->getActiveSheet(), $row, $column, $content);
+ $this->processDomElement($document, $spreadsheet->getActiveSheet(), $row, $column, $content);
// Return
return $spreadsheet;
diff --git a/tests/PhpSpreadsheetTests/Reader/HtmlTest.php b/tests/PhpSpreadsheetTests/Reader/HtmlTest.php
index e8b00f1a..e9dd207f 100644
--- a/tests/PhpSpreadsheetTests/Reader/HtmlTest.php
+++ b/tests/PhpSpreadsheetTests/Reader/HtmlTest.php
@@ -299,6 +299,36 @@ class HtmlTest extends TestCase
unlink($filename);
}
+ public function testCanLoadFromString()
+ {
+ $html = '
+
+ Hello World |
+
+
+ Hello World |
+
+
+ Hello World |
+
+
';
+ $spreadsheet = (new Html())->loadFromString($html);
+ $firstSheet = $spreadsheet->getSheet(0);
+
+ $cellStyle = $firstSheet->getStyle('A1');
+ self::assertFalse($cellStyle->getAlignment()->getWrapText());
+
+ $cellStyle = $firstSheet->getStyle('A2');
+ self::assertTrue($cellStyle->getAlignment()->getWrapText());
+ $cellValue = $firstSheet->getCell('A2')->getValue();
+ $this->assertContains("\n", $cellValue);
+
+ $cellStyle = $firstSheet->getStyle('A3');
+ self::assertTrue($cellStyle->getAlignment()->getWrapText());
+ $cellValue = $firstSheet->getCell('A3')->getValue();
+ $this->assertContains("\n", $cellValue);
+ }
+
/**
* @param string $html
*