/** * @expectedException \Box\Spout\Reader\Exception\XMLProcessingException * * @return void */ public function testNextShouldThrowExceptionOnError() { // The sharedStrings.xml file in "attack_billion_laughs.xlsx" contains // a doctype element that causes read errors $resourcePath = $this->getResourcePath('attack_billion_laughs.xlsx'); $sheetDataXMLFilePath = 'zip://' . $resourcePath . '#xl/sharedStrings.xml'; $xmlReader = new XMLReader(); if ($xmlReader->open($sheetDataXMLFilePath) !== false) { @$xmlReader->next('sst'); } }
/** * Reads the styles.xml file and extract the relevant information from the file. * * @return void */ protected function extractRelevantInfo() { $this->customNumberFormats = []; $this->stylesAttributes = []; $xmlReader = new XMLReader(); if ($xmlReader->openFileInZip($this->filePath, self::STYLES_XML_FILE_PATH)) { while ($xmlReader->read()) { if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)) { $numFmtsNode = new SimpleXMLElement($xmlReader->readOuterXml()); $this->extractNumberFormats($numFmtsNode); } else { if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL_XFS)) { $cellXfsNode = new SimpleXMLElement($xmlReader->readOuterXml()); $this->extractStyleAttributes($cellXfsNode); } } } $xmlReader->close(); } }
/** * Returns a SimpleXMLElement node from the current node in the given XMLReader instance. * This is to simplify the parsing of the subtree. * * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader * @return \Box\Spout\Reader\Wrapper\SimpleXMLElement * @throws \Box\Spout\Common\Exception\IOException If the current node cannot be read */ protected function getSimpleXmlElementNodeFromXMLReader($xmlReader) { $node = null; try { $node = new SimpleXMLElement($xmlReader->readOuterXml()); } catch (XMLProcessingException $exception) { throw new IOException("The sharedStrings.xml file contains unreadable data [{$exception->getMessage()}]."); } return $node; }
/** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" tag * @return int * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid */ protected function getCellIndex($xmlReader) { // Get "r" attribute if present (from something like <c r="A1"...> $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX); return $currentCellIndex !== null ? CellHelper::getColumnIndexFromCellIndex($currentCellIndex) : $this->lastColumnIndexProcessed + 1; }
/** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing */ protected function getNumColumnsRepeatedForCurrentNode($xmlReader) { $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED); return $numColumnsRepeated !== null ? intval($numColumnsRepeated) : 1; }
/** * @param string $sheetId The sheet ID, as defined in "workbook.xml" * @return string The XML file path describing the sheet inside "workbook.xml.res", for the given sheet ID */ protected function getSheetDataXMLFilePathForSheetId($sheetId) { $sheetDataXMLFilePath = ''; // find the file path of the sheet, by looking at the "workbook.xml.res" file $xmlReader = new XMLReader(); if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_RELS_FILE_PATH)) { while ($xmlReader->read()) { if ($xmlReader->isPositionedOnStartingNode('Relationship')) { $relationshipSheetId = $xmlReader->getAttribute('Id'); if ($relationshipSheetId === $sheetId) { // In workbook.xml.rels, it is only "worksheets/sheet1.xml" // In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml" $sheetDataXMLFilePath = $xmlReader->getAttribute('Target'); // sometimes, the sheet data file path already contains "/xl/"... if (strpos($sheetDataXMLFilePath, '/xl/') !== 0) { $sheetDataXMLFilePath = '/xl/' . $sheetDataXMLFilePath; break; } } } } $xmlReader->close(); } return $sheetDataXMLFilePath; }
/** * @param string $fileName * @return \DOMNode[] */ private function getCellElementsFromSheetXmlFile($fileName) { $cellElements = []; $resourcePath = $this->getGeneratedResourcePath($fileName); $pathToStylesXmlFile = $resourcePath . '#xl/worksheets/sheet1.xml'; $xmlReader = new \XMLReader(); $xmlReader->open('zip://' . $pathToStylesXmlFile); while ($xmlReader->read()) { if ($xmlReader->nodeType === \XMLReader::ELEMENT && $xmlReader->name === 'c') { $cellElements[] = $xmlReader->expand(); } } return $cellElements; }
/** * @param string $fileName * @param string $section * @return \DomElement */ private function getXmlSectionFromStylesXmlFile($fileName, $section) { $resourcePath = $this->getGeneratedResourcePath($fileName); $pathToStylesXmlFile = $resourcePath . '#styles.xml'; $xmlReader = new XMLReader(); $xmlReader->open('zip://' . $pathToStylesXmlFile); $xmlReader->readUntilNodeFound($section); return $xmlReader->expand(); }
/** * @param string $fileName * @param int $sheetIndex * @return XMLReader */ private function moveReaderToCorrectTableNode($fileName, $sheetIndex) { $resourcePath = $this->getGeneratedResourcePath($fileName); $pathToSheetFile = $resourcePath . '#content.xml'; $xmlReader = new XMLReader(); $xmlReader->open('zip://' . $pathToSheetFile); $xmlReader->readUntilNodeFound('table:table'); for ($i = 1; $i < $sheetIndex; $i++) { $xmlReader->readUntilNodeFound('table:table'); } return $xmlReader; }
/** * Extracts style attributes from the "xf" nodes, inside the "cellXfs" section. * For simplicity, the styles attributes are kept in memory. This is possible thanks * to the reuse of styles. So 1 million cells should not use 1 million styles. * * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on the "cellXfs" node * @return void */ protected function extractStyleAttributes($xmlReader) { while ($xmlReader->read()) { if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_XF)) { $numFmtId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID); $normalizedNumFmtId = $numFmtId !== null ? intval($numFmtId) : null; $applyNumberFormat = $xmlReader->getAttribute(self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT); $normalizedApplyNumberFormat = $applyNumberFormat !== null ? !!$applyNumberFormat : null; $this->stylesAttributes[] = [self::XML_ATTRIBUTE_NUM_FMT_ID => $normalizedNumFmtId, self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT => $normalizedApplyNumberFormat]; } else { if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_CELL_XFS)) { // Once done reading "cellXfs" node's children break; } } } }
/** * Extracts style attributes from the "xf" nodes, inside the "cellXfs" section. * For simplicity, the styles attributes are kept in memory. This is possible thanks * to the reuse of styles. So 1 million cells should not use 1 million styles. * * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on the "cellXfs" node * @return void */ protected function extractStyleAttributes($xmlReader) { while ($xmlReader->read()) { if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_XF)) { $this->stylesAttributes[] = [self::XML_ATTRIBUTE_NUM_FMT_ID => intval($xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID)), self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT => !!$xmlReader->getAttribute(self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT)]; } else { if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_CELL_XFS)) { // Once done reading "cellXfs" node's children break; } } } }