Improve Coverage for HTML Reader

Reader/Html is now covered except for 1 statement.
There is some coverage of RichText when you know in advance that the
html will expand into a single cell.
It is a tougher nut, one that I have not yet cracked,
to try to handle rich text while converting unkown html to multiple cells.
The original author left this as a TODO, and so for now must I.

It made sense to restructure some of the code. There are some changes.
- Issue #1532 is fixed (links are now saved when using rowspan).
- Colors can now be specified as html color name. To accomplish this,
  Helper/Html function colourNameLookup was changed from protected
  to public, and changed to static.
- Superfluous empty lines were eliminated in a number of places, e.g.
  <ul><li>A</li><li>B</li><li>C</li></ul>
  had formerly caused a wrapped cell to be created with 2 empty lines
  followed by A, B, and C on separate lines; it will now just have the
  3 A/B/C lines, which seems like a more sensible interpretation.
- Img alt tag, which had been cast to float, is now used as a string.

Private member "encoding" is not used. Functions getEncoding and setEncoding
have therefore been marked deprecated. In fact, I was unable to get
SecurityScanner to pass *any* html which is not UTF-8. There are
possibly ways of getting around this (in Reader/Html - I have no
intention of messing with Security Scanner), as can be seen in my
companion pull request for Excel2003 Xml Reader. Doing this would be
easier for ASCII-compatible character sets (like ISO-8859-1),
than for non-compatible charsets (like UTF-16). I am not
convinced that the effort is worth it, but am willing to investigate
further.

I added a number of tests, creating an Html directory, and moving
HtmlTest to that directory.
This commit is contained in:
Owen Leibman 2020-06-25 22:42:38 -07:00
parent 79d024fec0
commit 6080c4561d
11 changed files with 978 additions and 514 deletions

View File

@ -30,7 +30,7 @@ $html1 = '<font color="#0000ff">
while this block uses an <u>underline</u>.
</font>
</p>
<p align="right"><font size="9" color="red">
<p align="right"><font size="9" color="red" face="Times New Roman, serif">
I want to eat <ins><del>healthy food</del> <strong>pizza</strong></ins>.
</font>
';

View File

@ -694,9 +694,9 @@ class Html
return implode('', $values[0]);
}
protected function colourNameLookup($rgb)
public static function colourNameLookup(string $rgb): string
{
return self::$colourMap[$rgb];
return self::$colourMap[$rgb] ?? '';
}
protected function startFontTag($tag): void

View File

@ -2,6 +2,7 @@
namespace PhpOffice\PhpSpreadsheet\Reader;
use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\Shared\File;
@ -133,11 +134,7 @@ abstract class BaseReader implements IReader
public function getSecurityScanner()
{
if (property_exists($this, 'securityScanner')) {
return $this->securityScanner;
}
return null;
return $this->securityScanner;
}
/**
@ -147,12 +144,18 @@ abstract class BaseReader implements IReader
*/
protected function openFile($pFilename): void
{
File::assertFile($pFilename);
if ($pFilename) {
File::assertFile($pFilename);
// Open file
$this->fileHandle = fopen($pFilename, 'rb');
if ($this->fileHandle === false) {
throw new Exception('Could not open file ' . $pFilename . ' for reading.');
// Open file
$fileHandle = fopen($pFilename, 'rb');
} else {
$fileHandle = false;
}
if ($fileHandle !== false) {
$this->fileHandle = $fileHandle;
} else {
throw new ReaderException('Could not open file ' . $pFilename . ' for reading.');
}
}
}

View File

@ -16,6 +16,7 @@ use PhpOffice\PhpSpreadsheet\Style\Font;
use PhpOffice\PhpSpreadsheet\Style\Style;
use PhpOffice\PhpSpreadsheet\Worksheet\Drawing;
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
use Throwable;
/** PhpSpreadsheet root directory */
class Html extends BaseReader
@ -219,9 +220,13 @@ class Html extends BaseReader
/**
* Set input encoding.
*
* @deprecated no use is made of this property
*
* @param string $pValue Input encoding, eg: 'ANSI'
*
* @return $this
*
* @codeCoverageIgnore
*/
public function setInputEncoding($pValue)
{
@ -233,7 +238,11 @@ class Html extends BaseReader
/**
* Get input encoding.
*
* @deprecated no use is made of this property
*
* @return string
*
* @codeCoverageIgnore
*/
public function getInputEncoding()
{
@ -289,12 +298,319 @@ class Html extends BaseReader
$cellContent = (string) '';
}
/**
* @param int $row
* @param string $column
* @param string $cellContent
*/
protected function processDomElement(DOMNode $element, Worksheet $sheet, &$row, &$column, &$cellContent): void
private function processDomElementBody(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child): void
{
$attributeArray = [];
foreach ($child->attributes as $attribute) {
$attributeArray[$attribute->name] = $attribute->value;
}
if ($child->nodeName === 'body') {
$row = 1;
$column = 'A';
$cellContent = '';
$this->tableLevel = 0;
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
} else {
$this->processDomElementTitle($element, $sheet, $row, $column, $cellContent, $child, $attributeArray);
}
}
private function processDomElementTitle(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
{
if ($child->nodeName === 'title') {
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
$sheet->setTitle($cellContent, true, false);
$cellContent = '';
} else {
$this->processDomElementSpanEtc($element, $sheet, $row, $column, $cellContent, $child, $attributeArray);
}
}
private static $spanEtc = ['span', 'div', 'font', 'i', 'em', 'strong', 'b'];
private function processDomElementSpanEtc(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
{
if (in_array($child->nodeName, self::$spanEtc)) {
if (isset($attributeArray['class']) && $attributeArray['class'] === 'comment') {
$sheet->getComment($column . $row)
->getText()
->createTextRun($child->textContent);
}
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
if (isset($this->formats[$child->nodeName])) {
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
}
} else {
$this->processDomElementHr($element, $sheet, $row, $column, $cellContent, $child, $attributeArray);
}
}
private function processDomElementHr(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
{
if ($child->nodeName === 'hr') {
$this->flushCell($sheet, $column, $row, $cellContent);
++$row;
if (isset($this->formats[$child->nodeName])) {
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
}
++$row;
}
// fall through to br
$this->processDomElementBr($element, $sheet, $row, $column, $cellContent, $child, $attributeArray);
}
private function processDomElementBr(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
{
if ($child->nodeName === 'br' || $child->nodeName === 'hr') {
if ($this->tableLevel > 0) {
// If we're inside a table, replace with a \n and set the cell to wrap
$cellContent .= "\n";
$sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
} else {
// Otherwise flush our existing content and move the row cursor on
$this->flushCell($sheet, $column, $row, $cellContent);
++$row;
}
} else {
$this->processDomElementA($element, $sheet, $row, $column, $cellContent, $child, $attributeArray);
}
}
private function processDomElementA(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
{
if ($child->nodeName === 'a') {
foreach ($attributeArray as $attributeName => $attributeValue) {
switch ($attributeName) {
case 'href':
$sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue);
if (isset($this->formats[$child->nodeName])) {
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
}
break;
case 'class':
if ($attributeValue === 'comment-indicator') {
break; // Ignore - it's just a red square.
}
}
}
// no idea why this should be needed
//$cellContent .= ' ';
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
} else {
$this->processDomElementH1Etc($element, $sheet, $row, $column, $cellContent, $child, $attributeArray);
}
}
private static $h1Etc = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'p'];
private function processDomElementH1Etc(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
{
if (in_array($child->nodeName, self::$h1Etc)) {
if ($this->tableLevel > 0) {
// If we're inside a table, replace with a \n
$cellContent .= $cellContent ? "\n" : '';
$sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
} else {
if ($cellContent > '') {
$this->flushCell($sheet, $column, $row, $cellContent);
++$row;
}
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
$this->flushCell($sheet, $column, $row, $cellContent);
if (isset($this->formats[$child->nodeName])) {
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
}
++$row;
$column = 'A';
}
} else {
$this->processDomElementLi($element, $sheet, $row, $column, $cellContent, $child, $attributeArray);
}
}
private function processDomElementLi(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
{
if ($child->nodeName === 'li') {
if ($this->tableLevel > 0) {
// If we're inside a table, replace with a \n
$cellContent .= $cellContent ? "\n" : '';
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
} else {
if ($cellContent > '') {
$this->flushCell($sheet, $column, $row, $cellContent);
}
++$row;
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
$this->flushCell($sheet, $column, $row, $cellContent);
$column = 'A';
}
} else {
$this->processDomElementImg($element, $sheet, $row, $column, $cellContent, $child, $attributeArray);
}
}
private function processDomElementImg(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
{
if ($child->nodeName === 'img') {
$this->insertImage($sheet, $column, $row, $attributeArray);
} else {
$this->processDomElementTable($element, $sheet, $row, $column, $cellContent, $child, $attributeArray);
}
}
private function processDomElementTable(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
{
if ($child->nodeName === 'table') {
$this->flushCell($sheet, $column, $row, $cellContent);
$column = $this->setTableStartColumn($column);
if ($this->tableLevel > 1) {
--$row;
}
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
$column = $this->releaseTableStartColumn();
if ($this->tableLevel > 1) {
++$column;
} else {
++$row;
}
} else {
$this->processDomElementTr($element, $sheet, $row, $column, $cellContent, $child, $attributeArray);
}
}
private function processDomElementTr(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
{
if ($child->nodeName === 'tr') {
$column = $this->getTableStartColumn();
$cellContent = '';
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
if (isset($attributeArray['height'])) {
$sheet->getRowDimension($row)->setRowHeight($attributeArray['height']);
}
++$row;
} else {
$this->processDomElementThTdOther($element, $sheet, $row, $column, $cellContent, $child, $attributeArray);
}
}
private function processDomElementThTdOther(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
{
if ($child->nodeName !== 'td' && $child->nodeName !== 'th') {
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
} else {
$this->processDomElementThTd($element, $sheet, $row, $column, $cellContent, $child, $attributeArray);
}
}
private function processDomElementBgcolor(Worksheet $sheet, int $row, string $column, array $attributeArray): void
{
if (isset($attributeArray['bgcolor'])) {
$sheet->getStyle("$column$row")->applyFromArray(
[
'fill' => [
'fillType' => Fill::FILL_SOLID,
'color' => ['rgb' => $this->getStyleColor($attributeArray['bgcolor'])],
],
]
);
}
}
private function processDomElementWidth(Worksheet $sheet, string $column, array $attributeArray): void
{
if (isset($attributeArray['width'])) {
$sheet->getColumnDimension($column)->setWidth($attributeArray['width']);
}
}
private function processDomElementHeight(Worksheet $sheet, int $row, array $attributeArray): void
{
if (isset($attributeArray['height'])) {
$sheet->getRowDimension($row)->setRowHeight($attributeArray['height']);
}
}
private function processDomElementAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
{
if (isset($attributeArray['align'])) {
$sheet->getStyle($column . $row)->getAlignment()->setHorizontal($attributeArray['align']);
}
}
private function processDomElementVAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
{
if (isset($attributeArray['valign'])) {
$sheet->getStyle($column . $row)->getAlignment()->setVertical($attributeArray['valign']);
}
}
private function processDomElementDataFormat(Worksheet $sheet, int $row, string $column, array $attributeArray): void
{
if (isset($attributeArray['data-format'])) {
$sheet->getStyle($column . $row)->getNumberFormat()->setFormatCode($attributeArray['data-format']);
}
}
private function processDomElementThTd(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
{
while (isset($this->rowspan[$column . $row])) {
++$column;
}
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
// apply inline style
$this->applyInlineStyle($sheet, $row, $column, $attributeArray);
$this->flushCell($sheet, $column, $row, $cellContent);
$this->processDomElementBgcolor($sheet, $row, $column, $attributeArray);
$this->processDomElementWidth($sheet, $column, $attributeArray);
$this->processDomElementHeight($sheet, $row, $attributeArray);
$this->processDomElementAlign($sheet, $row, $column, $attributeArray);
$this->processDomElementVAlign($sheet, $row, $column, $attributeArray);
$this->processDomElementDataFormat($sheet, $row, $column, $attributeArray);
if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
//create merging rowspan and colspan
$columnTo = $column;
for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
++$columnTo;
}
$range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
$this->rowspan[$value] = true;
}
$sheet->mergeCells($range);
$column = $columnTo;
} elseif (isset($attributeArray['rowspan'])) {
//create merging rowspan
$range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
$this->rowspan[$value] = true;
}
$sheet->mergeCells($range);
} elseif (isset($attributeArray['colspan'])) {
//create merging colspan
$columnTo = $column;
for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
++$columnTo;
}
$sheet->mergeCells($column . $row . ':' . $columnTo . $row);
$column = $columnTo;
}
++$column;
}
protected function processDomElement(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent): void
{
foreach ($element->childNodes as $child) {
if ($child instanceof DOMText) {
@ -306,267 +622,7 @@ class Html extends BaseReader
// but if we have a rich text run instead, we need to append it correctly
// TODO
} elseif ($child instanceof DOMElement) {
$attributeArray = [];
foreach ($child->attributes as $attribute) {
$attributeArray[$attribute->name] = $attribute->value;
}
switch ($child->nodeName) {
case 'meta':
foreach ($attributeArray as $attributeName => $attributeValue) {
// Extract character set, so we can convert to UTF-8 if required
if ($attributeName === 'charset') {
$this->setInputEncoding($attributeValue);
}
}
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
break;
case 'title':
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
$sheet->setTitle($cellContent, true, false);
$cellContent = '';
break;
case 'span':
case 'div':
case 'font':
case 'i':
case 'em':
case 'strong':
case 'b':
if (isset($attributeArray['class']) && $attributeArray['class'] === 'comment') {
$sheet->getComment($column . $row)
->getText()
->createTextRun($child->textContent);
break;
}
if ($cellContent > '') {
$cellContent .= ' ';
}
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
if ($cellContent > '') {
$cellContent .= ' ';
}
if (isset($this->formats[$child->nodeName])) {
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
}
break;
case 'hr':
$this->flushCell($sheet, $column, $row, $cellContent);
++$row;
if (isset($this->formats[$child->nodeName])) {
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
} else {
$cellContent = '----------';
$this->flushCell($sheet, $column, $row, $cellContent);
}
++$row;
// Add a break after a horizontal rule, simply by allowing the code to dropthru
// no break
case 'br':
if ($this->tableLevel > 0) {
// If we're inside a table, replace with a \n and set the cell to wrap
$cellContent .= "\n";
$sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
} else {
// Otherwise flush our existing content and move the row cursor on
$this->flushCell($sheet, $column, $row, $cellContent);
++$row;
}
break;
case 'a':
foreach ($attributeArray as $attributeName => $attributeValue) {
switch ($attributeName) {
case 'href':
$sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue);
if (isset($this->formats[$child->nodeName])) {
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
}
break;
case 'class':
if ($attributeValue === 'comment-indicator') {
break; // Ignore - it's just a red square.
}
}
}
$cellContent .= ' ';
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
break;
case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
case 'ol':
case 'ul':
case 'p':
if ($this->tableLevel > 0) {
// If we're inside a table, replace with a \n
$cellContent .= "\n";
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
} else {
if ($cellContent > '') {
$this->flushCell($sheet, $column, $row, $cellContent);
++$row;
}
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
$this->flushCell($sheet, $column, $row, $cellContent);
if (isset($this->formats[$child->nodeName])) {
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
}
++$row;
$column = 'A';
}
break;
case 'li':
if ($this->tableLevel > 0) {
// If we're inside a table, replace with a \n
$cellContent .= "\n";
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
} else {
if ($cellContent > '') {
$this->flushCell($sheet, $column, $row, $cellContent);
}
++$row;
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
$this->flushCell($sheet, $column, $row, $cellContent);
$column = 'A';
}
break;
case 'img':
$this->insertImage($sheet, $column, $row, $attributeArray);
break;
case 'table':
$this->flushCell($sheet, $column, $row, $cellContent);
$column = $this->setTableStartColumn($column);
if ($this->tableLevel > 1) {
--$row;
}
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
$column = $this->releaseTableStartColumn();
if ($this->tableLevel > 1) {
++$column;
} else {
++$row;
}
break;
case 'thead':
case 'tbody':
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
break;
case 'tr':
$column = $this->getTableStartColumn();
$cellContent = '';
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
if (isset($attributeArray['height'])) {
$sheet->getRowDimension($row)->setRowHeight($attributeArray['height']);
}
++$row;
break;
case 'th':
case 'td':
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
while (isset($this->rowspan[$column . $row])) {
++$column;
}
// apply inline style
$this->applyInlineStyle($sheet, $row, $column, $attributeArray);
$this->flushCell($sheet, $column, $row, $cellContent);
if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
//create merging rowspan and colspan
$columnTo = $column;
for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
++$columnTo;
}
$range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
$this->rowspan[$value] = true;
}
$sheet->mergeCells($range);
$column = $columnTo;
} elseif (isset($attributeArray['rowspan'])) {
//create merging rowspan
$range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
$this->rowspan[$value] = true;
}
$sheet->mergeCells($range);
} elseif (isset($attributeArray['colspan'])) {
//create merging colspan
$columnTo = $column;
for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
++$columnTo;
}
$sheet->mergeCells($column . $row . ':' . $columnTo . $row);
$column = $columnTo;
} elseif (isset($attributeArray['bgcolor'])) {
$sheet->getStyle($column . $row)->applyFromArray(
[
'fill' => [
'fillType' => Fill::FILL_SOLID,
'color' => ['rgb' => $attributeArray['bgcolor']],
],
]
);
}
if (isset($attributeArray['width'])) {
$sheet->getColumnDimension($column)->setWidth($attributeArray['width']);
}
if (isset($attributeArray['height'])) {
$sheet->getRowDimension($row)->setRowHeight($attributeArray['height']);
}
if (isset($attributeArray['align'])) {
$sheet->getStyle($column . $row)->getAlignment()->setHorizontal($attributeArray['align']);
}
if (isset($attributeArray['valign'])) {
$sheet->getStyle($column . $row)->getAlignment()->setVertical($attributeArray['valign']);
}
if (isset($attributeArray['data-format'])) {
$sheet->getStyle($column . $row)->getNumberFormat()->setFormatCode($attributeArray['data-format']);
}
++$column;
break;
case 'body':
$row = 1;
$column = 'A';
$cellContent = '';
$this->tableLevel = 0;
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
break;
default:
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
}
$this->processDomElementBody($element, $sheet, $row, $column, $cellContent, $child);
}
}
}
@ -588,7 +644,11 @@ class Html extends BaseReader
// Create a new DOM object
$dom = new DOMDocument();
// Reload the HTML file into the DOM object
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
try {
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
} catch (Throwable $e) {
$loaded = false;
}
if ($loaded === false) {
throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document');
}
@ -606,7 +666,11 @@ class Html extends BaseReader
// Create a new DOM object
$dom = new DOMDocument();
// Reload the HTML file into the DOM object
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scan($content), 'HTML-ENTITIES', 'UTF-8'));
try {
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scan($content), 'HTML-ENTITIES', 'UTF-8'));
} catch (Throwable $e) {
$loaded = false;
}
if ($loaded === false) {
throw new Exception('Failed to load content as a DOM Document');
}
@ -837,7 +901,7 @@ class Html extends BaseReader
return substr($value, 1);
}
return null;
return \PhpOffice\PhpSpreadsheet\Helper\Html::colourNameLookup((string) $value);
}
/**
@ -853,7 +917,7 @@ class Html extends BaseReader
$src = urldecode($attributes['src']);
$width = isset($attributes['width']) ? (float) $attributes['width'] : null;
$height = isset($attributes['height']) ? (float) $attributes['height'] : null;
$name = isset($attributes['alt']) ? (float) $attributes['alt'] : null;
$name = $attributes['alt'] ?? null;
$drawing = new Drawing();
$drawing->setPath($src);
@ -884,6 +948,28 @@ class Html extends BaseReader
);
}
private static $borderMappings = [
'dash-dot' => Border::BORDER_DASHDOT,
'dash-dot-dot' => Border::BORDER_DASHDOTDOT,
'dashed' => Border::BORDER_DASHED,
'dotted' => Border::BORDER_DOTTED,
'double' => Border::BORDER_DOUBLE,
'hair' => Border::BORDER_HAIR,
'medium' => Border::BORDER_MEDIUM,
'medium-dashed' => Border::BORDER_MEDIUMDASHED,
'medium-dash-dot' => Border::BORDER_MEDIUMDASHDOT,
'medium-dash-dot-dot' => Border::BORDER_MEDIUMDASHDOTDOT,
'none' => Border::BORDER_NONE,
'slant-dash-dot' => Border::BORDER_SLANTDASHDOT,
'solid' => Border::BORDER_THIN,
'thick' => Border::BORDER_THICK,
];
public static function getBorderMappings(): array
{
return self::$borderMappings;
}
/**
* Map html border style to PhpSpreadsheet border style.
*
@ -893,38 +979,7 @@ class Html extends BaseReader
*/
public function getBorderStyle($style)
{
switch ($style) {
case 'solid':
return Border::BORDER_THIN;
case 'dashed':
return Border::BORDER_DASHED;
case 'dotted':
return Border::BORDER_DOTTED;
case 'medium':
return Border::BORDER_MEDIUM;
case 'thick':
return Border::BORDER_THICK;
case 'none':
return Border::BORDER_NONE;
case 'dash-dot':
return Border::BORDER_DASHDOT;
case 'dash-dot-dot':
return Border::BORDER_DASHDOTDOT;
case 'double':
return Border::BORDER_DOUBLE;
case 'hair':
return Border::BORDER_HAIR;
case 'medium-dash-dot':
return Border::BORDER_MEDIUMDASHDOT;
case 'medium-dash-dot-dot':
return Border::BORDER_MEDIUMDASHDOTDOT;
case 'medium-dashed':
return Border::BORDER_MEDIUMDASHED;
case 'slant-dash-dot':
return Border::BORDER_SLANTDASHDOT;
}
return null;
return (array_key_exists($style, self::$borderMappings)) ? self::$borderMappings[$style] : null;
}
/**

View File

@ -0,0 +1,110 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader\Html;
use PhpOffice\PhpSpreadsheet\Reader\Html;
use PhpOffice\PhpSpreadsheet\Style\Border;
use PHPUnit\Framework\TestCase;
class HtmlBorderTest extends TestCase
{
public function testCanApplyInlineBordersStyles(): void
{
$html = '<table>
<tr>
<td style="border: 1px solid #333333;">Thin border</td>
<td style="border-bottom: 1px dashed #333333;">Border bottom</td>
<td style="border-top: 1px solid #333333;">Border top</td>
<td style="border-left: 1px solid green;">Border left</td>
<td style="border-right: 1px solid #333333;">Border right</td>
<td style="border: none"></td>
</tr>
</table>';
$filename = HtmlHelper::createHtml($html);
$spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true);
$firstSheet = $spreadsheet->getSheet(0);
$style = $firstSheet->getCell('A1')->getStyle();
$borders = $style->getBorders();
/** @var Border $border */
foreach ([$borders->getTop(), $borders->getBottom(), $borders->getLeft(), $borders->getRight()] as $border) {
self::assertEquals('333333', $border->getColor()->getRGB());
self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle());
}
$style = $firstSheet->getCell('B1')->getStyle();
$border = $style->getBorders()->getBottom();
self::assertEquals('333333', $border->getColor()->getRGB());
self::assertEquals(Border::BORDER_DASHED, $border->getBorderStyle());
self::assertEquals(Border::BORDER_NONE, $style->getBorders()->getTop()->getBorderStyle());
$style = $firstSheet->getCell('C1')->getStyle();
$border = $style->getBorders()->getTop();
self::assertEquals('333333', $border->getColor()->getRGB());
self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle());
self::assertEquals(Border::BORDER_NONE, $style->getBorders()->getBottom()->getBorderStyle());
$style = $firstSheet->getCell('D1')->getStyle();
$border = $style->getBorders()->getLeft();
self::assertEquals('00ff00', $border->getColor()->getRGB());
self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle());
self::assertEquals(Border::BORDER_NONE, $style->getBorders()->getBottom()->getBorderStyle());
$style = $firstSheet->getCell('E1')->getStyle();
$border = $style->getBorders()->getRight();
self::assertEquals('333333', $border->getColor()->getRGB());
self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle());
self::assertEquals(Border::BORDER_NONE, $style->getBorders()->getBottom()->getBorderStyle());
$style = $firstSheet->getCell('F1')->getStyle();
$borders = $style->getBorders();
foreach ([$borders->getTop(), $borders->getBottom(), $borders->getLeft(), $borders->getRight()] as $border) {
self::assertEquals(Border::BORDER_NONE, $border->getBorderStyle());
}
}
/**
* @dataProvider providerBorderStyle
*/
public function testBorderStyle(string $style, string $expectedResult): void
{
$borders = Html::getBorderMappings();
self::assertEquals($expectedResult, $borders[$style]);
}
public function testBorderStyleCoverage(): void
{
$expected = Html::getBorderMappings();
$covered = [];
foreach ($expected as $key => $val) {
$covered[$key] = 0;
}
$tests = $this->providerBorderStyle();
foreach ($tests as $test) {
$covered[$test[0]] = 1;
}
foreach ($covered as $key => $val) {
self::assertEquals(1, $val, "Borderstyle $key not tested");
}
}
public function providerBorderStyle(): array
{
return [
['dash-dot', Border::BORDER_DASHDOT],
['dash-dot-dot', Border::BORDER_DASHDOTDOT],
['dashed', Border::BORDER_DASHED],
['dotted', Border::BORDER_DOTTED],
['double', Border::BORDER_DOUBLE],
['hair', Border::BORDER_HAIR],
['medium', Border::BORDER_MEDIUM],
['medium-dashed', Border::BORDER_MEDIUMDASHED],
['medium-dash-dot', Border::BORDER_MEDIUMDASHDOT],
['medium-dash-dot-dot', Border::BORDER_MEDIUMDASHDOTDOT],
['none', Border::BORDER_NONE],
['slant-dash-dot', Border::BORDER_SLANTDASHDOT],
['solid', Border::BORDER_THIN],
['thick', Border::BORDER_THICK],
];
}
}

View File

@ -0,0 +1,28 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader\Html;
use PhpOffice\PhpSpreadsheet\Reader\Html;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
class HtmlHelper
{
public static function createHtml(string $html): string
{
$filename = tempnam(sys_get_temp_dir(), 'html');
file_put_contents($filename, $html);
return $filename;
}
public static function loadHtmlIntoSpreadsheet(string $filename, bool $unlink = false): Spreadsheet
{
$html = new Html();
$spreadsheet = $html->load($filename);
if ($unlink) {
unlink($filename);
}
return $spreadsheet;
}
}

View File

@ -0,0 +1,84 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader\Html;
use PhpOffice\PhpSpreadsheet\Worksheet\Drawing;
use PHPUnit\Framework\TestCase;
class HtmlImageTest extends TestCase
{
public function testCanInsertImage(): void
{
$imagePath = realpath(__DIR__ . '/../../../data/Reader/HTML/image.jpg');
$html = '<table>
<tr>
<td><img src="' . $imagePath . '" alt="test image"></td>
</tr>
</table>';
$filename = HtmlHelper::createHtml($html);
$spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true);
$firstSheet = $spreadsheet->getSheet(0);
/** @var Drawing $drawing */
$drawing = $firstSheet->getDrawingCollection()[0];
self::assertEquals($imagePath, $drawing->getPath());
self::assertEquals('A1', $drawing->getCoordinates());
self::assertEquals('test image', $drawing->getName());
self::assertEquals('100', $drawing->getWidth());
self::assertEquals('100', $drawing->getHeight());
}
public function testCanInsertImageWidth(): void
{
$imagePath = realpath(__DIR__ . '/../../../data/Reader/HTML/image.jpg');
$html = '<table>
<tr>
<td><img src="' . $imagePath . '" alt="test image" width="50"></td>
</tr>
</table>';
$filename = HtmlHelper::createHtml($html);
$spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true);
$firstSheet = $spreadsheet->getSheet(0);
/** @var Drawing $drawing */
$drawing = $firstSheet->getDrawingCollection()[0];
self::assertEquals('50', $drawing->getWidth());
self::assertEquals('50', $drawing->getHeight());
}
public function testCanInsertImageHeight(): void
{
$imagePath = realpath(__DIR__ . '/../../../data/Reader/HTML/image.jpg');
$html = '<table>
<tr>
<td><img src="' . $imagePath . '" height="75"></td>
</tr>
</table>';
$filename = HtmlHelper::createHtml($html);
$spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true);
$firstSheet = $spreadsheet->getSheet(0);
/** @var Drawing $drawing */
$drawing = $firstSheet->getDrawingCollection()[0];
self::assertEquals('', $drawing->getName());
self::assertEquals('75', $drawing->getWidth());
self::assertEquals('75', $drawing->getHeight());
}
public function testImageWithourSrc(): void
{
$html = '<table>
<tr>
<td><img></td>
</tr>
</table>';
$filename = HtmlHelper::createHtml($html);
$spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true);
$firstSheet = $spreadsheet->getSheet(0);
self::assertCount(0, $firstSheet->getDrawingCollection());
}
}

View File

@ -0,0 +1,92 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader\Html;
use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
use PhpOffice\PhpSpreadsheet\Reader\Html;
use PHPUnit\Framework\TestCase;
class HtmlLoadStringTest extends TestCase
{
public function testCanLoadFromString(): void
{
$html = '<table>
<tr>
<td>Hello World</td>
</tr>
<tr>
<td>Hello<br />World</td>
</tr>
<tr>
<td>Hello<br>World</td>
</tr>
</table>';
$spreadsheet = (new Html())->loadFromString($html);
$firstSheet = $spreadsheet->getSheet(0);
$cellStyle = $firstSheet->getStyle('A1');
self::assertFalse($cellStyle->getAlignment()->getWrapText());
$cellStyle = $firstSheet->getStyle('A2');
self::assertTrue($cellStyle->getAlignment()->getWrapText());
$cellValue = $firstSheet->getCell('A2')->getValue();
self::assertStringContainsString("\n", $cellValue);
$cellStyle = $firstSheet->getStyle('A3');
self::assertTrue($cellStyle->getAlignment()->getWrapText());
$cellValue = $firstSheet->getCell('A3')->getValue();
self::assertStringContainsString("\n", $cellValue);
}
public function testLoadInvalidString(): void
{
$this->expectException(ReaderException::class);
$html = '<table<>';
$spreadsheet = (new Html())->loadFromString($html);
$firstSheet = $spreadsheet->getSheet(0);
$cellStyle = $firstSheet->getStyle('A1');
self::assertFalse($cellStyle->getAlignment()->getWrapText());
}
public function testCanLoadFromStringIntoExistingSpreadsheet(): void
{
$html = '<table>
<tr>
<td>Hello World</td>
</tr>
<tr>
<td>Hello<br />World</td>
</tr>
<tr>
<td>Hello<br>World</td>
</tr>
</table>';
$reader = new Html();
$spreadsheet = $reader->loadFromString($html);
$firstSheet = $spreadsheet->getSheet(0);
$cellStyle = $firstSheet->getStyle('A1');
self::assertFalse($cellStyle->getAlignment()->getWrapText());
$cellStyle = $firstSheet->getStyle('A2');
self::assertTrue($cellStyle->getAlignment()->getWrapText());
$cellValue = $firstSheet->getCell('A2')->getValue();
self::assertStringContainsString("\n", $cellValue);
$cellStyle = $firstSheet->getStyle('A3');
self::assertTrue($cellStyle->getAlignment()->getWrapText());
$cellValue = $firstSheet->getCell('A3')->getValue();
self::assertStringContainsString("\n", $cellValue);
$reader->setSheetIndex(1);
$html = '<table>
<tr>
<td>Goodbye World</td>
</tr>
</table>';
self::assertEquals(1, $spreadsheet->getSheetCount());
$spreadsheet = $reader->loadFromString($html, $spreadsheet);
self::assertEquals(2, $spreadsheet->getSheetCount());
}
}

View File

@ -0,0 +1,236 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader\Html;
use PhpOffice\PhpSpreadsheet\Reader\Html;
use PhpOffice\PhpSpreadsheet\Style\Border;
use PHPUnit\Framework\TestCase;
class HtmlTagsTest extends TestCase
{
public function testTags(): void
{
$reader = new Html();
$html1 = <<<EOF
<table><tbody>
<tr><td>1</td><td>2</td><td>3</td></tr>
<tr><td><a href='www.google.com'>hyperlink</a></td><td>5<hr></td><td>6</td></tr>
<tr><td>7</td><td>8</td><td>9</td></tr>
<tr><td>10</td><td>11</td><td>12</td></tr>
</tbody></table>
<hr>
<table><tbody>
<tr><td>1</td><td><i>2</i></td><td>3</td></tr>
<tr height='20'><td>4</td><td>5</td><td>6</td></tr>
<tr><td>7</td><td>8</td><td>9</td></tr>
<tr><td><ul><li>A</li><li>B</li><li>C</li></ul></td><td>11</td><td>12</td></tr>
</tbody></table>
<ul><li>D</li><li>E</li><li>F</li></ul>
<br>
<table><tbody>
<tr><td>M</td>
<td>
<table><tbody>
<tr><td>N</td><td>O</td></tr>
<tr><td>P</td><td>Q</td></tr>
</tbody></table>
</td>
<td>R</td>
</tr>
<tr><td>S</td><td>T</td><td>U</td></tr>
</tbody></table>
EOF;
$robj = $reader->loadFromString($html1);
$sheet = $robj->getActiveSheet();
self::assertEquals('www.google.com', $sheet->getCell('A2')->getHyperlink()->getUrl());
self::assertEquals('hyperlink', $sheet->getCell('A2')->getValue());
self::assertEquals(-1, $sheet->getRowDimension(11)->getRowHeight());
self::assertEquals(20, $sheet->getRowDimension(12)->getRowHeight());
self::assertEquals(5, $sheet->getCell('B2')->getValue());
self::assertEquals(Border::BORDER_THIN, $sheet->getCell('B3')->getStyle()->getBorders()->getBottom()->getBorderStyle());
self::assertEquals(6, $sheet->getCell('C4')->getValue());
self::assertEquals(Border::BORDER_THIN, $sheet->getCell('A9')->getStyle()->getBorders()->getBottom()->getBorderStyle());
self::assertEquals(2, $sheet->getCell('B11')->getValue());
self::assertTrue($sheet->getCell('B11')->getStyle()->getFont()->getItalic());
// list within table
self::assertEquals("A\nB\nC", $sheet->getCell('A14')->getValue());
self::assertTrue($sheet->getCell('A14')->getStyle()->getAlignment()->getWrapText());
// list outside of table
self::assertEquals('D', $sheet->getCell('A17')->getValue());
self::assertEquals('E', $sheet->getCell('A18')->getValue());
self::assertEquals('F', $sheet->getCell('A19')->getValue());
// embedded table
self::assertEquals('M', $sheet->getCell('A21')->getValue());
self::assertEquals('N', $sheet->getCell('B20')->getValue());
self::assertEquals('O', $sheet->getCell('C20')->getValue());
self::assertEquals('P', $sheet->getCell('B21')->getValue());
self::assertEquals('Q', $sheet->getCell('C21')->getValue());
self::assertEquals('R', $sheet->getCell('C23')->getValue());
self::assertEquals('S', $sheet->getCell('A24')->getValue());
}
public static function testTagsRowColSpans(): void
{
$reader = new Html();
$html1 = <<<EOF
<table>
<tr>
<th>Month</th>
<th>Savings</th>
<th>Expenses</th>
</tr>
<tr>
<td>January</td>
<td>$100</td>
<td rowspan="2">$50</td>
</tr>
<tr>
<td>February</td>
<td>$80</td>
</tr>
<tr>
<td rowspan="2" colspan="2" bgcolor="#00FFFF">Away in March</td>
<td>$30</td>
</tr>
<tr>
<td>$40</td>
</tr>
</table>
EOF;
$robj = $reader->loadFromString($html1);
$sheet = $robj->getActiveSheet();
self::assertEquals(['C2:C3' => 'C2:C3', 'A4:B5' => 'A4:B5'], $sheet->getMergeCells());
self::assertEquals('Away in March', $sheet->getCell('A4')->getValue());
self::assertEquals('00FFFF', $sheet->getCell('A4')->getStyle()->getFill()->getEndColor()->getRGB());
}
public static function testDoublyEmbeddedTable(): void
{
$reader = new Html();
$html1 = <<<EOF
<table><tbody>
<tr><td>1</td><td>2</td><td>3</td></tr>
<tr><td>4</td><td>5</td><td>6</td></tr>
<tr><td>7</td><td>8</td><td>9</td></tr>
<tr><td></td><td></td><td></td></tr>
<tr><td></td><td></td><td></td></tr>
<tr><td></td><td></td><td></td></tr>
<tr><td>M</td>
<td>
<table><tbody>
<tr><td>N</td>
<td>
<table><tbody>
<tr><td>10</td><td>11</td></tr>
<tr><td>12</td><td>13</td></tr>
</tbody></table>
</td>
<td>Y</td>
</tr>
<tr><td>P</td><td>Q</td><td>X</td></tr>
</tbody></table>
</td>
<td>R</td>
</tr>
<tr><td>S</td><td>T</td><td>U</td></tr>
</tbody></table>
EOF;
$robj = $reader->loadFromString($html1);
$sheet = $robj->getActiveSheet();
self::assertEquals('1', $sheet->getCell('A1')->getValue());
self::assertEquals('2', $sheet->getCell('B1')->getValue());
self::assertEquals('3', $sheet->getCell('C1')->getValue());
self::assertEquals('4', $sheet->getCell('A2')->getValue());
self::assertEquals('5', $sheet->getCell('B2')->getValue());
self::assertEquals('6', $sheet->getCell('C2')->getValue());
self::assertEquals('7', $sheet->getCell('A3')->getValue());
self::assertEquals('8', $sheet->getCell('B3')->getValue());
self::assertEquals('9', $sheet->getCell('C3')->getValue());
self::assertEquals('10', $sheet->getCell('C5')->getValue());
self::assertEquals('11', $sheet->getCell('D5')->getValue());
self::assertEquals('12', $sheet->getCell('C6')->getValue());
self::assertEquals('13', $sheet->getCell('D6')->getValue());
self::assertEquals('N', $sheet->getCell('B6')->getValue());
self::assertEquals('M', $sheet->getCell('A7')->getValue());
self::assertEquals('Y', $sheet->getCell('E7')->getValue());
self::assertEquals('P', $sheet->getCell('B8')->getValue());
self::assertEquals('Q', $sheet->getCell('C8')->getValue());
self::assertEquals('X', $sheet->getCell('D8')->getValue());
self::assertEquals('R', $sheet->getCell('C10')->getValue());
self::assertEquals('S', $sheet->getCell('A11')->getValue());
self::assertEquals('T', $sheet->getCell('B11')->getValue());
self::assertEquals('U', $sheet->getCell('C11')->getValue());
}
public static function testTagsOutsideTable(): void
{
$reader = new Html();
$html1 = <<<EOF
<h1>Here comes a list</h1>
<ol>
<li>Item 1</li>
<li>Item 2</li>
<li>Item 3</li>
<li>Item 4</li>
</ol>
And here's another
<ul>
<li>Item A</li>
<li>Item B</li>
</ul>
<ol>
Content before list
<li>Item I</li>
<li>Item II</li>
<li>This <i>is</i> <span style='color: #ff0000;'>rich</span> text</li>
</ol>
EOF;
$robj = $reader->loadFromString($html1);
$sheet = $robj->getActiveSheet();
self::assertTrue($sheet->getCell('A1')->getStyle()->getFont()->getBold());
self::assertEquals('Here comes a list', $sheet->getCell('A1')->getValue());
self::assertEquals('Item 1', $sheet->getCell('A3')->getValue());
self::assertEquals('Item 2', $sheet->getCell('A4')->getValue());
self::assertEquals('Item 3', $sheet->getCell('A5')->getValue());
self::assertEquals('Item 4', $sheet->getCell('A6')->getValue());
self::assertEquals('And here\'s another', $sheet->getCell('A7')->getValue());
self::assertEquals('Item A', $sheet->getCell('A9')->getValue());
self::assertEquals('Item B', $sheet->getCell('A10')->getValue());
self::assertEquals('Content before list', $sheet->getCell('A11')->getValue());
self::assertEquals('Item I', $sheet->getCell('A12')->getValue());
self::assertEquals('Item II', $sheet->getCell('A13')->getValue());
// TODO Rich Text not yet supported
}
public static function testHyperlinksWithRowspan(): void
{
$reader = new Html();
$html1 = <<<EOF
<table>
<tr>
<td rowspan="3">Title</td>
<td><a href="https://google.com">Link 1</a></td>
</tr>
<tr>
<td><a href="https://google.com">Link 2</a></td>
</tr>
<tr>
<td><a href="https://google.com">Link 3</a></td>
</tr>
</table>
EOF;
$robj = $reader->loadFromString($html1);
$sheet = $robj->getActiveSheet();
self::assertEquals('https://google.com', $sheet->getCell('B1')->getHyperlink()->getUrl());
self::assertEquals('https://google.com', $sheet->getCell('B2')->getHyperlink()->getUrl());
self::assertEquals('https://google.com', $sheet->getCell('B3')->getHyperlink()->getUrl());
}
}

View File

@ -1,12 +1,11 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader;
namespace PhpOffice\PhpSpreadsheetTests\Reader\Html;
use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
use PhpOffice\PhpSpreadsheet\Reader\Html;
use PhpOffice\PhpSpreadsheet\Style\Alignment;
use PhpOffice\PhpSpreadsheet\Style\Border;
use PhpOffice\PhpSpreadsheet\Style\Font;
use PhpOffice\PhpSpreadsheet\Worksheet\Drawing;
use PHPUnit\Framework\TestCase;
class HtmlTest extends TestCase
@ -18,6 +17,33 @@ class HtmlTest extends TestCase
self::assertFalse($reader->canRead($filename));
}
public function testBadHtml(): void
{
$this->expectException(ReaderException::class);
$filename = 'tests/data/Reader/HTML/badhtml.html';
$reader = new Html();
self::assertTrue($reader->canRead($filename));
$spreadsheet = $reader->load($filename);
self::assertTrue(false);
}
public function testNonHtml(): void
{
$this->expectException(ReaderException::class);
$filename = __FILE__;
$reader = new Html();
self::assertFalse($reader->canRead($filename));
$spreadsheet = $reader->load($filename);
self::assertTrue(false);
}
public function testInvalidFilename(): void
{
$reader = new Html();
self::assertEquals(0, $reader->getSheetIndex());
self::assertFalse($reader->canRead(''));
}
public function providerCanReadVerySmallFile()
{
$padding = str_repeat('a', 2048);
@ -38,7 +64,7 @@ class HtmlTest extends TestCase
*/
public function testCanReadVerySmallFile($expected, $content): void
{
$filename = $this->createHtml($content);
$filename = HtmlHelper::createHtml($content);
$reader = new Html();
$actual = $reader->canRead($filename);
@ -51,63 +77,21 @@ class HtmlTest extends TestCase
{
$html = '<table>
<tr>
<td style="background-color: #000000;color: #FFFFFF">Blue background</td>
<td style="background-color: #0000FF;color: #FFFFFF">Blue background</td>
<td style="background-color: unknown1;color: unknown2">Unknown fore/background</td>
</tr>
</table>';
$filename = $this->createHtml($html);
$spreadsheet = $this->loadHtmlIntoSpreadsheet($filename);
$filename = HtmlHelper::createHtml($html);
$spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true);
$firstSheet = $spreadsheet->getSheet(0);
$style = $firstSheet->getCell('A1')->getStyle();
self::assertEquals('FFFFFF', $style->getFont()->getColor()->getRGB());
unlink($filename);
}
public function testCanApplyInlineBordersStyles(): void
{
$html = '<table>
<tr>
<td style="border: 1px solid #333333;">Thin border</td>
<td style="border-bottom: 1px solid #333333;">Border bottom</td>
<td style="border-top: 1px solid #333333;">Border top</td>
<td style="border-left: 1px solid #333333;">Border left</td>
<td style="border-right: 1px solid #333333;">Border right</td>
</tr>
</table>';
$filename = $this->createHtml($html);
$spreadsheet = $this->loadHtmlIntoSpreadsheet($filename);
$firstSheet = $spreadsheet->getSheet(0);
$style = $firstSheet->getCell('A1')->getStyle();
$borders = $style->getBorders();
/** @var Border $border */
foreach ([$borders->getTop(), $borders->getBottom(), $borders->getLeft(), $borders->getRight()] as $border) {
self::assertEquals('333333', $border->getColor()->getRGB());
self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle());
}
self::assertEquals('0000FF', $style->getFill()->getStartColor()->getRGB());
self::assertEquals('0000FF', $style->getFill()->getEndColor()->getRGB());
$style = $firstSheet->getCell('B1')->getStyle();
$border = $style->getBorders()->getBottom();
self::assertEquals('333333', $border->getColor()->getRGB());
self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle());
$style = $firstSheet->getCell('C1')->getStyle();
$border = $style->getBorders()->getTop();
self::assertEquals('333333', $border->getColor()->getRGB());
self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle());
$style = $firstSheet->getCell('D1')->getStyle();
$border = $style->getBorders()->getLeft();
self::assertEquals('333333', $border->getColor()->getRGB());
self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle());
$style = $firstSheet->getCell('E1')->getStyle();
$border = $style->getBorders()->getRight();
self::assertEquals('333333', $border->getColor()->getRGB());
self::assertEquals(Border::BORDER_THIN, $border->getBorderStyle());
unlink($filename);
self::assertEquals('000000', $style->getFont()->getColor()->getRGB());
self::assertEquals('000000', $style->getFill()->getEndColor()->getRGB());
self::assertEquals('FFFFFF', $style->getFill()->getstartColor()->getRGB());
}
public function testCanApplyInlineFontStyles(): void
@ -122,8 +106,8 @@ class HtmlTest extends TestCase
<td style="text-decoration: line-through;">Line through</td>
</tr>
</table>';
$filename = $this->createHtml($html);
$spreadsheet = $this->loadHtmlIntoSpreadsheet($filename);
$filename = HtmlHelper::createHtml($html);
$spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true);
$firstSheet = $spreadsheet->getSheet(0);
$style = $firstSheet->getCell('A1')->getStyle();
@ -143,8 +127,6 @@ class HtmlTest extends TestCase
$style = $firstSheet->getCell('F1')->getStyle();
self::assertTrue($style->getFont()->getStrikethrough());
unlink($filename);
}
public function testCanApplyInlineWidth(): void
@ -155,8 +137,8 @@ class HtmlTest extends TestCase
<td style="width: 100px;">100px</td>
</tr>
</table>';
$filename = $this->createHtml($html);
$spreadsheet = $this->loadHtmlIntoSpreadsheet($filename);
$filename = HtmlHelper::createHtml($html);
$spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true);
$firstSheet = $spreadsheet->getSheet(0);
$dimension = $firstSheet->getColumnDimension('A');
@ -164,8 +146,6 @@ class HtmlTest extends TestCase
$dimension = $firstSheet->getColumnDimension('B');
self::assertEquals(100, $dimension->getWidth());
unlink($filename);
}
public function testCanApplyInlineHeight(): void
@ -178,8 +158,8 @@ class HtmlTest extends TestCase
<td style="height: 100px;">2</td>
</tr>
</table>';
$filename = $this->createHtml($html);
$spreadsheet = $this->loadHtmlIntoSpreadsheet($filename);
$filename = HtmlHelper::createHtml($html);
$spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true);
$firstSheet = $spreadsheet->getSheet(0);
$dimension = $firstSheet->getRowDimension(1);
@ -187,8 +167,6 @@ class HtmlTest extends TestCase
$dimension = $firstSheet->getRowDimension(2);
self::assertEquals(100, $dimension->getRowHeight());
unlink($filename);
}
public function testCanApplyAlignment(): void
@ -203,8 +181,8 @@ class HtmlTest extends TestCase
<td style="word-wrap: break-word;">Wraptext</td>
</tr>
</table>';
$filename = $this->createHtml($html);
$spreadsheet = $this->loadHtmlIntoSpreadsheet($filename);
$filename = HtmlHelper::createHtml($html);
$spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true);
$firstSheet = $spreadsheet->getSheet(0);
$style = $firstSheet->getCell('A1')->getStyle();
@ -224,8 +202,6 @@ class HtmlTest extends TestCase
$style = $firstSheet->getCell('F1')->getStyle();
self::assertTrue($style->getAlignment()->getWrapText());
unlink($filename);
}
public function testCanApplyInlineDataFormat(): void
@ -235,35 +211,12 @@ class HtmlTest extends TestCase
<td data-format="mmm-yy">2019-02-02 12:34:00</td>
</tr>
</table>';
$filename = $this->createHtml($html);
$spreadsheet = $this->loadHtmlIntoSpreadsheet($filename);
$filename = HtmlHelper::createHtml($html);
$spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true);
$firstSheet = $spreadsheet->getSheet(0);
$style = $firstSheet->getCell('A1')->getStyle();
self::assertEquals('mmm-yy', $style->getNumberFormat()->getFormatCode());
unlink($filename);
}
public function testCanInsertImage(): void
{
$imagePath = realpath(__DIR__ . '/../../data/Reader/HTML/image.jpg');
$html = '<table>
<tr>
<td><img src="' . $imagePath . '" alt=""></td>
</tr>
</table>';
$filename = $this->createHtml($html);
$spreadsheet = $this->loadHtmlIntoSpreadsheet($filename);
$firstSheet = $spreadsheet->getSheet(0);
/** @var Drawing $drawing */
$drawing = $firstSheet->getDrawingCollection()[0];
self::assertEquals($imagePath, $drawing->getPath());
self::assertEquals('A1', $drawing->getCoordinates());
unlink($filename);
}
public function testCanApplyCellWrapping(): void
@ -279,8 +232,8 @@ class HtmlTest extends TestCase
<td>Hello<br>World</td>
</tr>
</table>';
$filename = $this->createHtml($html);
$spreadsheet = $this->loadHtmlIntoSpreadsheet($filename);
$filename = HtmlHelper::createHtml($html);
$spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true);
$firstSheet = $spreadsheet->getSheet(0);
$cellStyle = $firstSheet->getStyle('A1');
@ -295,103 +248,6 @@ class HtmlTest extends TestCase
self::assertTrue($cellStyle->getAlignment()->getWrapText());
$cellValue = $firstSheet->getCell('A3')->getValue();
self::assertStringContainsString("\n", $cellValue);
unlink($filename);
}
public function testCanLoadFromString(): void
{
$html = '<table>
<tr>
<td>Hello World</td>
</tr>
<tr>
<td>Hello<br />World</td>
</tr>
<tr>
<td>Hello<br>World</td>
</tr>
</table>';
$spreadsheet = (new Html())->loadFromString($html);
$firstSheet = $spreadsheet->getSheet(0);
$cellStyle = $firstSheet->getStyle('A1');
self::assertFalse($cellStyle->getAlignment()->getWrapText());
$cellStyle = $firstSheet->getStyle('A2');
self::assertTrue($cellStyle->getAlignment()->getWrapText());
$cellValue = $firstSheet->getCell('A2')->getValue();
self::assertStringContainsString("\n", $cellValue);
$cellStyle = $firstSheet->getStyle('A3');
self::assertTrue($cellStyle->getAlignment()->getWrapText());
$cellValue = $firstSheet->getCell('A3')->getValue();
self::assertStringContainsString("\n", $cellValue);
}
public function testCanLoadFromStringIntoExistingSpreadsheet(): void
{
$html = '<table>
<tr>
<td>Hello World</td>
</tr>
<tr>
<td>Hello<br />World</td>
</tr>
<tr>
<td>Hello<br>World</td>
</tr>
</table>';
$reader = new Html();
$spreadsheet = $reader->loadFromString($html);
$firstSheet = $spreadsheet->getSheet(0);
$cellStyle = $firstSheet->getStyle('A1');
self::assertFalse($cellStyle->getAlignment()->getWrapText());
$cellStyle = $firstSheet->getStyle('A2');
self::assertTrue($cellStyle->getAlignment()->getWrapText());
$cellValue = $firstSheet->getCell('A2')->getValue();
self::assertStringContainsString("\n", $cellValue);
$cellStyle = $firstSheet->getStyle('A3');
self::assertTrue($cellStyle->getAlignment()->getWrapText());
$cellValue = $firstSheet->getCell('A3')->getValue();
self::assertStringContainsString("\n", $cellValue);
$reader->setSheetIndex(1);
$html = '<table>
<tr>
<td>Goodbye World</td>
</tr>
</table>';
self::assertEquals(1, $spreadsheet->getSheetCount());
$spreadsheet = $reader->loadFromString($html, $spreadsheet);
self::assertEquals(2, $spreadsheet->getSheetCount());
}
/**
* @param string $html
*
* @return string
*/
private function createHtml($html)
{
$filename = tempnam(sys_get_temp_dir(), 'html');
file_put_contents($filename, $html);
return $filename;
}
/**
* @param $filename
*
* @return \PhpOffice\PhpSpreadsheet\Spreadsheet
*/
private function loadHtmlIntoSpreadsheet($filename)
{
return (new Html())->load($filename);
}
public function testRowspanInRendering(): void
@ -417,11 +273,10 @@ class HtmlTest extends TestCase
<td style="text-indent:10px">Text Indent</td>
</tr>
</table>';
$filename = $this->createHtml($html);
$spreadsheet = $this->loadHtmlIntoSpreadsheet($filename);
$filename = HtmlHelper::createHtml($html);
$spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true);
$firstSheet = $spreadsheet->getSheet(0);
$style = $firstSheet->getCell('C2')->getStyle();
self::assertEquals(10, $style->getAlignment()->getIndent());
unlink($filename);
}
}

View File

@ -0,0 +1 @@
<table<>