In Xml reader throw exception in case of invalid XML (#222)

When the xml file is not a standard xml file, the `simplexml_load_string` will return false, this will cause an error on "$xml->getNamespaces(true);" . So instead of showing the error, we throw an exception.
This commit is contained in:
GreatHumorist 2017-09-20 13:20:12 +08:00 committed by Adrien Crivelli
parent febbe87172
commit 0477e6fcfe
4 changed files with 64 additions and 16 deletions

View File

@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
### Changed ### Changed
- Merge data-validations to reduce written worksheet size - @billblume [#131](https://github.com/PHPOffice/PhpSpreadSheet/issues/131) - Merge data-validations to reduce written worksheet size - @billblume [#131](https://github.com/PHPOffice/PhpSpreadSheet/issues/131)
- Throws exception if a XML file is invalid - @GreatHumorist [#222](https://github.com/PHPOffice/PhpSpreadsheet/pull/222)
### Fixed ### Fixed

View File

@ -117,6 +117,30 @@ class Xml extends BaseReader implements IReader
return $valid; return $valid;
} }
/**
* Check if the file is a valid SimpleXML.
*
* @param string $pFilename
*
* @throws Exception
*
* @return false|\SimpleXMLElement
*/
public function trySimpleXMLLoadString($pFilename)
{
try {
$xml = simplexml_load_string(
$this->securityScan(file_get_contents($pFilename)),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
} catch (\Exception $e) {
throw new Exception('Cannot load invalid XML file: ' . $pFilename, 0, $e);
}
return $xml;
}
/** /**
* Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object. * Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object.
* *
@ -133,11 +157,8 @@ class Xml extends BaseReader implements IReader
$worksheetNames = []; $worksheetNames = [];
$xml = simplexml_load_string( $xml = $this->trySimpleXMLLoadString($pFilename);
$this->securityScan(file_get_contents($pFilename)),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
$namespaces = $xml->getNamespaces(true); $namespaces = $xml->getNamespaces(true);
$xml_ss = $xml->children($namespaces['ss']); $xml_ss = $xml->children($namespaces['ss']);
@ -162,11 +183,8 @@ class Xml extends BaseReader implements IReader
$worksheetInfo = []; $worksheetInfo = [];
$xml = simplexml_load_string( $xml = $this->trySimpleXMLLoadString($pFilename);
$this->securityScan(file_get_contents($pFilename)),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
$namespaces = $xml->getNamespaces(true); $namespaces = $xml->getNamespaces(true);
$worksheetID = 1; $worksheetID = 1;
@ -339,11 +357,8 @@ class Xml extends BaseReader implements IReader
throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
} }
$xml = simplexml_load_string( $xml = $this->trySimpleXMLLoadString($pFilename);
$this->securityScan(file_get_contents($pFilename)),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
$namespaces = $xml->getNamespaces(true); $namespaces = $xml->getNamespaces(true);
$docProps = $spreadsheet->getProperties(); $docProps = $spreadsheet->getProperties();

View File

@ -3,6 +3,8 @@
namespace PhpOffice\PhpSpreadsheetTests\Reader; namespace PhpOffice\PhpSpreadsheetTests\Reader;
use PhpOffice\PhpSpreadsheet\Reader\BaseReader; use PhpOffice\PhpSpreadsheet\Reader\BaseReader;
use PhpOffice\PhpSpreadsheet\Reader\Exception;
use PhpOffice\PhpSpreadsheet\Reader\Xml;
use PHPUnit_Framework_TestCase; use PHPUnit_Framework_TestCase;
class XEEValidatorTest extends PHPUnit_Framework_TestCase class XEEValidatorTest extends PHPUnit_Framework_TestCase
@ -24,7 +26,29 @@ class XEEValidatorTest extends PHPUnit_Framework_TestCase
public function providerInvalidXML() public function providerInvalidXML()
{ {
$tests = []; $tests = [];
foreach (glob(__DIR__ . '/../../data/Reader/XEE/XEETestInvalid*.xml') as $file) { foreach (glob(__DIR__ . '/../../data/Reader/XEE/XEETestInvalidUTF*.xml') as $file) {
$tests[basename($file)] = [realpath($file)];
}
return $tests;
}
/**
* @dataProvider providerInvalidSimpleXML
* @expectedException \PhpOffice\PhpSpreadsheet\Reader\Exception
*
* @param $filename
*/
public function testInvalidSimpleXML($filename)
{
$xmlReader = new Xml();
$xmlReader->trySimpleXMLLoadString($filename);
}
public function providerInvalidSimpleXML()
{
$tests = [];
foreach (glob(__DIR__ . '/../../data/Reader/XEE/XEETestInvalidSimpleXML*.xml') as $file) {
$tests[basename($file)] = [realpath($file)]; $tests[basename($file)] = [realpath($file)];
} }

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<root>
<data>R&d</data>
<data>R<d</data>
<data>R>d</data>
<data>R'd</data>
<data>R"d</data>
</root>