From 6c25b6f422668b569a65ff196812980de681ee6d Mon Sep 17 00:00:00 2001 From: Mark Baker Date: Mon, 10 Jun 2019 16:44:55 +0200 Subject: [PATCH] Refactor Xlsx Properties Reader code into a separate class (#1001) * Unit tests for refactoring Spreadsheet properties * Refactor Xlsx Properties Reader code into a separate class --- src/PhpSpreadsheet/Reader/Xlsx.php | 61 ++---------- src/PhpSpreadsheet/Reader/Xlsx/Properties.php | 91 ++++++++++++++++++ tests/PhpSpreadsheetTests/Reader/XlsxTest.php | 25 +++++ tests/data/Reader/XLSX/propertyTest.xlsx | Bin 0 -> 9076 bytes 4 files changed, 122 insertions(+), 55 deletions(-) create mode 100644 src/PhpSpreadsheet/Reader/Xlsx/Properties.php create mode 100644 tests/data/Reader/XLSX/propertyTest.xlsx diff --git a/src/PhpSpreadsheet/Reader/Xlsx.php b/src/PhpSpreadsheet/Reader/Xlsx.php index 79972e6a..b5ca97a9 100644 --- a/src/PhpSpreadsheet/Reader/Xlsx.php +++ b/src/PhpSpreadsheet/Reader/Xlsx.php @@ -8,6 +8,7 @@ use PhpOffice\PhpSpreadsheet\Document\Properties; use PhpOffice\PhpSpreadsheet\NamedRange; use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; use PhpOffice\PhpSpreadsheet\Reader\Xlsx\Chart; +use PhpOffice\PhpSpreadsheet\Reader\Xlsx\Properties as PropertyReader; use PhpOffice\PhpSpreadsheet\ReferenceHelper; use PhpOffice\PhpSpreadsheet\RichText\RichText; use PhpOffice\PhpSpreadsheet\Settings; @@ -456,70 +457,20 @@ class Xlsx extends BaseReader 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); + + $propertyReader = new PropertyReader($this->securityScanner, $excel->getProperties()); foreach ($rels->Relationship as $rel) { switch ($rel['Type']) { case 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties': - $xmlCore = simplexml_load_string( - $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")), - 'SimpleXMLElement', - Settings::getLibXmlLoaderOptions() - ); - if (is_object($xmlCore)) { - $xmlCore->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/'); - $xmlCore->registerXPathNamespace('dcterms', 'http://purl.org/dc/terms/'); - $xmlCore->registerXPathNamespace('cp', 'http://schemas.openxmlformats.org/package/2006/metadata/core-properties'); - $docProps = $excel->getProperties(); - $docProps->setCreator((string) self::getArrayItem($xmlCore->xpath('dc:creator'))); - $docProps->setLastModifiedBy((string) self::getArrayItem($xmlCore->xpath('cp:lastModifiedBy'))); - $docProps->setCreated(strtotime(self::getArrayItem($xmlCore->xpath('dcterms:created')))); //! respect xsi:type - $docProps->setModified(strtotime(self::getArrayItem($xmlCore->xpath('dcterms:modified')))); //! respect xsi:type - $docProps->setTitle((string) self::getArrayItem($xmlCore->xpath('dc:title'))); - $docProps->setDescription((string) self::getArrayItem($xmlCore->xpath('dc:description'))); - $docProps->setSubject((string) self::getArrayItem($xmlCore->xpath('dc:subject'))); - $docProps->setKeywords((string) self::getArrayItem($xmlCore->xpath('cp:keywords'))); - $docProps->setCategory((string) self::getArrayItem($xmlCore->xpath('cp:category'))); - } + $propertyReader->readCoreProperties($this->getFromZipArchive($zip, "{$rel['Target']}")); break; case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties': - $xmlCore = simplexml_load_string( - $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")), - 'SimpleXMLElement', - Settings::getLibXmlLoaderOptions() - ); - if (is_object($xmlCore)) { - $docProps = $excel->getProperties(); - if (isset($xmlCore->Company)) { - $docProps->setCompany((string) $xmlCore->Company); - } - if (isset($xmlCore->Manager)) { - $docProps->setManager((string) $xmlCore->Manager); - } - } + $propertyReader->readExtendedProperties($this->getFromZipArchive($zip, "{$rel['Target']}")); break; case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties': - $xmlCore = simplexml_load_string( - $this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")), - 'SimpleXMLElement', - Settings::getLibXmlLoaderOptions() - ); - if (is_object($xmlCore)) { - $docProps = $excel->getProperties(); - /** @var SimpleXMLElement $xmlProperty */ - foreach ($xmlCore as $xmlProperty) { - $cellDataOfficeAttributes = $xmlProperty->attributes(); - if (isset($cellDataOfficeAttributes['name'])) { - $propertyName = (string) $cellDataOfficeAttributes['name']; - $cellDataOfficeChildren = $xmlProperty->children('http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes'); - $attributeType = $cellDataOfficeChildren->getName(); - $attributeValue = (string) $cellDataOfficeChildren->{$attributeType}; - $attributeValue = Properties::convertProperty($attributeValue, $attributeType); - $attributeType = Properties::convertPropertyType($attributeType); - $docProps->setCustomProperty($propertyName, $attributeValue, $attributeType); - } - } - } + $propertyReader->readCustomProperties($this->getFromZipArchive($zip, "{$rel['Target']}")); break; //Ribbon diff --git a/src/PhpSpreadsheet/Reader/Xlsx/Properties.php b/src/PhpSpreadsheet/Reader/Xlsx/Properties.php new file mode 100644 index 00000000..bc6bba2c --- /dev/null +++ b/src/PhpSpreadsheet/Reader/Xlsx/Properties.php @@ -0,0 +1,91 @@ +securityScanner = $securityScanner; + $this->docProps = $docProps; + } + + private function extractPropertyData($propertyData) + { + return simplexml_load_string( + $this->securityScanner->scan($propertyData), + 'SimpleXMLElement', + Settings::getLibXmlLoaderOptions() + ); + } + + public function readCoreProperties($propertyData) + { + $xmlCore = $this->extractPropertyData($propertyData); + + if (is_object($xmlCore)) { + $xmlCore->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/'); + $xmlCore->registerXPathNamespace('dcterms', 'http://purl.org/dc/terms/'); + $xmlCore->registerXPathNamespace('cp', 'http://schemas.openxmlformats.org/package/2006/metadata/core-properties'); + + $this->docProps->setCreator((string) self::getArrayItem($xmlCore->xpath('dc:creator'))); + $this->docProps->setLastModifiedBy((string) self::getArrayItem($xmlCore->xpath('cp:lastModifiedBy'))); + $this->docProps->setCreated(strtotime(self::getArrayItem($xmlCore->xpath('dcterms:created')))); //! respect xsi:type + $this->docProps->setModified(strtotime(self::getArrayItem($xmlCore->xpath('dcterms:modified')))); //! respect xsi:type + $this->docProps->setTitle((string) self::getArrayItem($xmlCore->xpath('dc:title'))); + $this->docProps->setDescription((string) self::getArrayItem($xmlCore->xpath('dc:description'))); + $this->docProps->setSubject((string) self::getArrayItem($xmlCore->xpath('dc:subject'))); + $this->docProps->setKeywords((string) self::getArrayItem($xmlCore->xpath('cp:keywords'))); + $this->docProps->setCategory((string) self::getArrayItem($xmlCore->xpath('cp:category'))); + } + } + + public function readExtendedProperties($propertyData) + { + $xmlCore = $this->extractPropertyData($propertyData); + + if (is_object($xmlCore)) { + if (isset($xmlCore->Company)) { + $this->docProps->setCompany((string) $xmlCore->Company); + } + if (isset($xmlCore->Manager)) { + $this->docProps->setManager((string) $xmlCore->Manager); + } + } + } + + public function readCustomProperties($propertyData) + { + $xmlCore = $this->extractPropertyData($propertyData); + + if (is_object($xmlCore)) { + foreach ($xmlCore as $xmlProperty) { + /** @var \SimpleXMLElement $xmlProperty */ + $cellDataOfficeAttributes = $xmlProperty->attributes(); + if (isset($cellDataOfficeAttributes['name'])) { + $propertyName = (string) $cellDataOfficeAttributes['name']; + $cellDataOfficeChildren = $xmlProperty->children('http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes'); + + $attributeType = $cellDataOfficeChildren->getName(); + $attributeValue = (string) $cellDataOfficeChildren->{$attributeType}; + $attributeValue = DocumentProperties::convertProperty($attributeValue, $attributeType); + $attributeType = DocumentProperties::convertPropertyType($attributeType); + $this->docProps->setCustomProperty($propertyName, $attributeValue, $attributeType); + } + } + } + } + + private static function getArrayItem(array $array, $key = 0) + { + return isset($array[$key]) ? $array[$key] : null; + } +} diff --git a/tests/PhpSpreadsheetTests/Reader/XlsxTest.php b/tests/PhpSpreadsheetTests/Reader/XlsxTest.php index b92c0359..f9f8090c 100644 --- a/tests/PhpSpreadsheetTests/Reader/XlsxTest.php +++ b/tests/PhpSpreadsheetTests/Reader/XlsxTest.php @@ -2,12 +2,37 @@ namespace PhpOffice\PhpSpreadsheetTests\Reader; +use PhpOffice\PhpSpreadsheet\Document\Properties; use PhpOffice\PhpSpreadsheet\Reader\Xlsx; use PhpOffice\PhpSpreadsheet\Shared\File; use PHPUnit\Framework\TestCase; class XlsxTest extends TestCase { + public function testLoadWorkbookProperties() + { + $filename = './data/Reader/XLSX/propertyTest.xlsx'; + $reader = new Xlsx(); + $spreadsheet = $reader->load($filename); + + $properties = $spreadsheet->getProperties(); + // Core Properties + $this->assertEquals('Mark Baker', $properties->getCreator()); + $this->assertEquals('Unit Testing', $properties->getTitle()); + $this->assertEquals('Property Test', $properties->getSubject()); + // Extended Properties + $this->assertEquals('PHPOffice', $properties->getCompany()); + $this->assertEquals('The Big Boss', $properties->getManager()); + // Custom Properties + $customProperties = $properties->getCustomProperties(); + $this->assertInternalType('array', $customProperties); + $customProperties = array_flip($customProperties); + $this->assertArrayHasKey('Publisher', $customProperties); + $this->assertTrue($properties->isCustomPropertySet('Publisher')); + $this->assertEquals(Properties::PROPERTY_TYPE_STRING, $properties->getCustomPropertyType('Publisher')); + $this->assertEquals('PHPOffice Suite', $properties->getCustomPropertyValue('Publisher')); + } + /** * Test load Xlsx file without cell reference. */ diff --git a/tests/data/Reader/XLSX/propertyTest.xlsx b/tests/data/Reader/XLSX/propertyTest.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..3f3ac114954cf57bddfcb3380fd2d964d97aa268 GIT binary patch literal 9076 zcmeHNg;!K-*B?Nnr9 z`wQOt%v$HntaE;Qoq6{D?K~hkxJNjE#{fhC06-3~hxOpnfB^u49svN@07O`Aaj>nE zsjZW~n!BB;qb{?XjWt=$BUrjD04((Wf7bu-4wNPi+IFyFNM0meqsP^~G}_83W`ISR zW7fv|4yWTO#&jERo^^EL{MlGJkL=MqjWF|rhRjqq&RGrBQu3Vha^l+zuJrs~U_AB#RGTvpeUyl95;SJ+sfg`G`{}H|r3zzCU%DA&;KE zDQ9wv0suTbzyUyiA|$q6t0^p$kV{a$p+E_#?_g@}$in>N_zw~PhuQbbUoVYUkb`5x z2tM?GAphJVFwa0SZp|XOrC@Rg!^xA+jw^&CF+z7A@NB6=$h%CdS+Jj!R-Tv zrwi6wNhJStvxz444!OJfdIZb_2dSt%nGQb+>*C##Gma>RjqI5-;qcPcoIqykhc^Vy z((2v*LJc~vc{P$82(-S-q8C%^ByA7}FFgYdadr>OAPP9beJElYpS1zV+%@$jn=9<1TTs%z*j{FO3hO#n4gH~`@8 zF#v!Ly~E9##TD#eWdsIW{fJ+sFLjmXS#iAI*F0cc33Uy-QmRPItBD(F!&@JLkW#y- zNnWJSJ^_l|95yK;T2~-&tLNFLzax6@dB`_>-(Fr`s!fB7uMplxB86w=pnUX=Us8Yg ztxF_R-fPuSSxzQNSNZIK?Hkt{ZE|pVV0|SZOPQXb))u>0*}dPg_*`P}S*UejX;E-d zvx^$zQ=nRZN;zY1H?NcB03jzC)d?4YQS}O+c4%{n<_83feuiSdRtEPHsTzJ?G#!Nv zh3UD5LhU#8tGLsyLUdI-f_#m(fw`}6b6Y6=Y7^`Nk7x-L59+Z3^_X6s>L62dd!T1! z_?Nw>}xKwV3zGsoho!Dh+4Iz}2ayki?YletYL4?y_IHhAW|q)rh?=PrA_@uinmG zQ}X>OqI)2MdEiPH+UEqa$WB>~S~Y2Z3)g$!kKJ1m}%Ow<+|eft9GYOhVWAx-3o2>Fqg7ySW8`y!Zhu&7|k?( zZvp?z6zpAvYpI}5HBA;qC{IRhmgV|`v)*_kpE~pAsoA(_<{&dnX_AlatH_wKhFS& zFisQ*MG7-(7umGZy6O5+ACJAb4_LH7YE-a;5;5rda9Fs+a=DHJXdfGC11mv=Wk!j;DeIFfMLOH>D1Dn1}}$%j^i_f z)US!`^&+gEbPEx;dJEb~QCn!YBvqEE3&^!gamQwH8CYxIAK=us~&!kw{bv*;7{M zV2^Jm#D=BjJ5!;{%}W*o3nh*}heK=m9)m3F;)#X~v;r6c~Wek+H9rFZkve@VXI$vZtaduR+1Hp%r<+E(~}Bt3vf+wZ9uxX%w-lK7x*Kxoe(!0uc)wj74KmuSSn1 z`||Rmu-|_G&P>?ctdJtYY;mE=#NT$;oyC z{*w3Ercw;?+&mF+$`N^3^Lw_~)2gE`fKt&4O)KvyG(34OG#oAEheczZyrn5V-O$7pDy#9oc2dFC?5cgf)eY##tRO z%a#HzHaW6ht*XpMRhWK53&#q}=WQQXZL1WIBra(Z58ap+VqcZ72>X73@i8EDf?C|c zfLevQM%*ZLu)~xraPDMi(dJ`kq1BX-mkFgI^f_34LW?$f1}&JC_f+^K*tm#WBTUxW z9emrRY06osBs{|DPp^%c0_DE(hnzhVZ9_&~dvZgf+r=F(=dd;25JA8ZC*(2{1rl_e zlaKG~@}YS`E*a+<@B-f}W>!E#NSp{%#rCxh3DR+AMo|yHZkd)y8+vUzOkCL>=>16L!=XHRKrFY#4h*VUR?;CtJ$cH~jJ} z-2lckzS!yuy+bl0qv^$QqD_T4*xug_k1h;zDfjA`#Dc6Y??$?sM@IA$g=H?-4bt_9 z3HoXHE!Oy^wimlmh??ff9(V^{x^|2{;Obrj{Y9bu)<;jhET58{*PRbxC|mgp8U`aG zqv9|4hvMGrNMw{~Ssx_6n|a5L4u>P9-h!R59@RY(Rz-X3IzZwI86BVHPQtsla+@>=GRYl@uiMm++RBCONz_<6SYij_VOE` zIXv0QRBEo=;2=*t7=-U`5_9!_)I1i6z*;!3+(_GC2{TM_nZ}jE^fi$ZxMoiyLd!lE zGm5GuZo2R(JzV}w&5eU5%Q!d0nGyH>XxWY8sas6cgra#vX=7Ru3Vi&P8@*1Hc(QE% zmX^@rV%g-b&z732X7XdY*$Dzec{4&~X98vYCt^5y=7D;%sTyNW8e^cHe6IniJTILEyQTB% z;ugtVAZ4UebrkD3t3UnK~EpYssJeau}8Z3+m(P3{Q0vvXR*!a8DQcw!!``H0F`V^NJ)?jf%@QGh0cyM1NyQ-oU+c7iQQ`$ z9m7#cm)9SAr8FyZfXazG;LfggUo7$=pMBw zei+^R!XKnprhSO!u~0EaBzV+9iS}e}$`-WCgIw!Sr;TVyZR+zuvk)OkR-mws@Gk3) zjM3O&gWlVu6>wqM1bO(|wA?-Wjdda;@B#HdfGF>t1-q z7vuV1z-5gI2{Jf73EJw(BHk-88VoDci=;9i?zz8FbkQT?hm84zDQ-?Ujx0E;jK5Pv zt}*U$C}Stm4I*HBxf5QZ5Zoq=+m1@PTUcB;8gA+~!qnQo)FxLpk8)mb`kDLeVSYn327~2PT*8F7m3Zfx5bNB$*L?}GS4;%j?J$S+`g*0eOP?-zS%K!K=lT0 zzYw4l_#*SImPG)O3_`}PP`E6K}>EN4+G+E~J0D<#py?ycY^-<45s?1))O1n@L zdm~&=*yTRe*b%l5$TL|1cQs(L7XKH72G{kz5@6H?X?u45=huPmjbHNp`-Lyowgyz& zEfFNpmaOk~oZ#B5_K6DL7vL5UmK7f5p_0+U4bZ1z#Q0pdP2tbzMyWBPY#)@Mb_$&+ z@TP(K+KG;Wqk2g{pv{1TC3U^S&_gNb@N<>6D>9T8mKQc zz-PbubX79VvzZ9buUwf1=@7~gHoLU;HNpj(dWBzixXztnJQbct*DJEvajX~?(jZ{b zP_}Xl_FV*ue17FBN%Xm6gDFL)nkt2M@MWyLZF0#=HuSZl^XV-Zx6ZS}r4qrQh}sHR z5+p~ubI|JAZ1T#OS=Q1B-6)CU``(EoW2_)idt#|}6It_iYmbaal33j(+a~VGIQ^lX zCj;gbRLY!!yqR8Ab~^0sQ_3YD#Ee-&AI-Pnh(^oCp1v2oC;Yo?Qo75$dKhZxHqa^p z6Ix}OfQ>;8U^_<^W3Yqi50i$rI{zsyp*)XNRfw5j#c8HICN2G{O{-Zs7|h8C9L1iZ zsAa_9F>cGvSG1n}sBn5{pN_85?q|ICnQ-Wx{YimUSs#39LgTDNKYuhLrw+=j+Pkf@ z3f-2NQ1LH_`A%@}3gPgDaKTIKUrlthFD+|fBCv>#Wupe`XC{6edbzt(od=0&JHvXnsf3!o9>?K$I^OeaY>+4cp2}x$RU@Tmm zk5Lj1fDqyF^x?J4eK-Njf44Ws#$lfDvP`>?A7gD3T_DXni zRU%Q};wAA)G(bjJpRX{TS+k`W&|Aj2<;kMwC2I0<9%yi&($mQ2^}<);q03@XeS^bZFY$-GUwkDY^a{8%dD#V-e#RFd!ky$#apbDcYjv>0Gw4dqzGF7J6iP*GaJmVfd^)$I(OD+r>z) z?>a;`h^150t>7}4=(QCX3<+)%MY9>Z5uZ{ojK6 zc|`Xyda^eR;9hebnLY0mrZVyEt<;TJ$H$1~O(h`Hup_GMf$*iSLTH~{7of9DlCjf2 zB{Z`~PHq{T@%ge_ME*EA>O(s!7D{qx&<5UiZgmO8kk;LHV&~RD(gHN)6RIN8FD;|0`j?&tf>C!WYB!Hz z-yY`;k2v5>GAjz!eCUV3yfSv*xoTo%?mxZ7{(fzir8k@AB4Wk$|yaTGAZ-#oUj zv`jQ>pPjp=TKz+5**7Zb7>_mJy*hjH>|%zG*1%F!X5JTq-!u0>z9}xa9V`tm(!V}W)|HjedbeedG|RDW(!vq1P4sFgLJAjnfXCR* zRxu*&w}AQ3ci_4w*;HQ7Eb1-2M9M%N1C+kRM+q42z~U71S^t<&RPDN> z4S{Xy^1|R-B+t^+jpW+XKnLYMm6eRgS&{2ycJzus8=|}_(!06E&G|9-`InD{fdvBo zzZV>S?)<;L{mttRAh|ya`13&aR{?QQ`~GD-`#bRWvD8m!7c`6iZAkSy_|JX#pHOJ1 zK>h>#|8(Vlm-BnS_NOFP)c-!jU%I!yOZmO~_frbplfUf+{x0D6`sJs90^C0Y{H$hv zhyFQN{0RgAT%Zl;zhsWT!~Yy*e}#KOQ`7&#{~dFGNB`NC{fbs2`2}tKU&jWLgNG^? R06>O*e4z!~64{Sm{|E1YLFfPg literal 0 HcmV?d00001