/** * Builds an in-memory array containing all the shared strings of the sheet. * All the strings are stored in a XML file, located at 'xl/sharedStrings.xml'. * It is then accessed by the sheet data, via the string index in the built table. * * More documentation available here: http://msdn.microsoft.com/en-us/library/office/gg278314.aspx * * The XML file can be really big with sheets containing a lot of data. That is why * we need to use a XML reader that provides streaming like the XMLReader library. * Please note that SimpleXML does not provide such a functionality but since it is faster * and more handy to parse few XML nodes, it is used in combination with XMLReader for that purpose. * * @return void * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read */ public function extractSharedStrings() { $xmlReader = new XMLReader(); $sharedStringIndex = 0; /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ $escaper = \Box\Spout\Common\Escaper\XLSX::getInstance(); $sharedStringsFilePath = $this->getSharedStringsFilePath(); if ($xmlReader->open($sharedStringsFilePath) === false) { throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".'); } try { $sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader); $this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount); $xmlReader->readUntilNodeFound('si'); while ($xmlReader->name === 'si') { $this->processSharedStringsItem($xmlReader, $sharedStringIndex, $escaper); $sharedStringIndex++; // jump to the next 'si' tag $xmlReader->next('si'); } $this->cachingStrategy->closeCache(); } catch (XMLProcessingException $exception) { throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$exception->getMessage()}]"); } $xmlReader->close(); }
/** * Builds an in-memory array containing all the shared strings of the sheet. * All the strings are stored in a XML file, located at 'xl/sharedStrings.xml'. * It is then accessed by the sheet data, via the string index in the built table. * * More documentation available here: http://msdn.microsoft.com/en-us/library/office/gg278314.aspx * * The XML file can be really big with sheets containing a lot of data. That is why * we need to use a XML reader that provides streaming like the XMLReader library. * Please note that SimpleXML does not provide such a functionality but since it is faster * and more handy to parse few XML nodes, it is used in combination with XMLReader for that purpose. * * @return void * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read */ public function extractSharedStrings() { $xmlReader = new XMLReader(); $sharedStringIndex = 0; /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ $escaper = \Box\Spout\Common\Escaper\XLSX::getInstance(); $sharedStringsFilePath = $this->getSharedStringsFilePath(); if ($xmlReader->open($sharedStringsFilePath) === false) { throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".'); } try { $sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader); $this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount); $xmlReader->readUntilNodeFound('si'); while ($xmlReader->name === 'si') { $node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader); $node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML); // removes nodes that should not be read, like the pronunciation of the Kanji characters $cleanNode = $this->removeSuperfluousTextNodes($node); // find all text nodes "t"; there can be multiple if the cell contains formatting $textNodes = $cleanNode->xpath('//ns:t'); $textValue = ''; foreach ($textNodes as $nodeIndex => $textNode) { if ($nodeIndex !== 0) { // add a space between each "t" node $textValue .= ' '; } if ($this->shouldPreserveWhitespace($textNode)) { $textValue .= $textNode->__toString(); } else { $textValue .= trim($textNode->__toString()); } } $unescapedTextValue = $escaper->unescape($textValue); $this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex); $sharedStringIndex++; // jump to the next 'si' tag $xmlReader->next('si'); } } catch (XMLProcessingException $exception) { throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$exception->getMessage()}]"); } $this->cachingStrategy->closeCache(); $xmlReader->close(); }
/** * Reads the styles.xml file and extract the relevant information from the file. * * @return void */ protected function extractRelevantInfo() { $this->customNumberFormats = []; $this->stylesAttributes = []; $stylesXmlFilePath = $this->filePath . '#' . self::STYLES_XML_FILE_PATH; $xmlReader = new XMLReader(); if ($xmlReader->open('zip://' . $stylesXmlFilePath)) { while ($xmlReader->read()) { if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)) { $numFmtsNode = new SimpleXMLElement($xmlReader->readOuterXml()); $this->extractNumberFormats($numFmtsNode); } else { if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL_XFS)) { $cellXfsNode = new SimpleXMLElement($xmlReader->readOuterXml()); $this->extractStyleAttributes($cellXfsNode); } } } $xmlReader->close(); } }
/** * @param string $fileName * @return \DOMNode[] */ private function getCellElementsFromSheetXmlFile($fileName) { $cellElements = []; $resourcePath = $this->getGeneratedResourcePath($fileName); $pathToStylesXmlFile = $resourcePath . '#xl/worksheets/sheet1.xml'; $xmlReader = new \XMLReader(); $xmlReader->open('zip://' . $pathToStylesXmlFile); while ($xmlReader->read()) { if ($xmlReader->nodeType === \XMLReader::ELEMENT && $xmlReader->name === 'c') { $cellElements[] = $xmlReader->expand(); } } return $cellElements; }
/** * @param string $fileName * @param string $section * @return \DomElement */ private function getXmlSectionFromStylesXmlFile($fileName, $section) { $resourcePath = $this->getGeneratedResourcePath($fileName); $pathToStylesXmlFile = $resourcePath . '#styles.xml'; $xmlReader = new XMLReader(); $xmlReader->open('zip://' . $pathToStylesXmlFile); $xmlReader->readUntilNodeFound($section); return $xmlReader->expand(); }
/** * @param string $fileName * @param int $sheetIndex * @return XMLReader */ private function moveReaderToCorrectTableNode($fileName, $sheetIndex) { $resourcePath = $this->getGeneratedResourcePath($fileName); $pathToSheetFile = $resourcePath . '#content.xml'; $xmlReader = new XMLReader(); $xmlReader->open('zip://' . $pathToSheetFile); $xmlReader->readUntilNodeFound('table:table'); for ($i = 1; $i < $sheetIndex; $i++) { $xmlReader->readUntilNodeFound('table:table'); } return $xmlReader; }