Beispiel #1
0
 /**
  * Builds an in-memory array containing all the shared strings of the sheet.
  * All the strings are stored in a XML file, located at 'xl/sharedStrings.xml'.
  * It is then accessed by the sheet data, via the string index in the built table.
  *
  * More documentation available here: http://msdn.microsoft.com/en-us/library/office/gg278314.aspx
  *
  * The XML file can be really big with sheets containing a lot of data. That is why
  * we need to use a XML reader that provides streaming like the XMLReader library.
  * Please note that SimpleXML does not provide such a functionality but since it is faster
  * and more handy to parse few XML nodes, it is used in combination with XMLReader for that purpose.
  *
  * @return void
  * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read
  */
 public function extractSharedStrings()
 {
     $xmlReader = new XMLReader();
     $sharedStringIndex = 0;
     /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
     $escaper = \Box\Spout\Common\Escaper\XLSX::getInstance();
     $sharedStringsFilePath = $this->getSharedStringsFilePath();
     if ($xmlReader->open($sharedStringsFilePath) === false) {
         throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".');
     }
     try {
         $sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader);
         $this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount);
         $xmlReader->readUntilNodeFound('si');
         while ($xmlReader->name === 'si') {
             $this->processSharedStringsItem($xmlReader, $sharedStringIndex, $escaper);
             $sharedStringIndex++;
             // jump to the next 'si' tag
             $xmlReader->next('si');
         }
         $this->cachingStrategy->closeCache();
     } catch (XMLProcessingException $exception) {
         throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$exception->getMessage()}]");
     }
     $xmlReader->close();
 }
Beispiel #2
0
 /**
  * Builds an in-memory array containing all the shared strings of the sheet.
  * All the strings are stored in a XML file, located at 'xl/sharedStrings.xml'.
  * It is then accessed by the sheet data, via the string index in the built table.
  *
  * More documentation available here: http://msdn.microsoft.com/en-us/library/office/gg278314.aspx
  *
  * The XML file can be really big with sheets containing a lot of data. That is why
  * we need to use a XML reader that provides streaming like the XMLReader library.
  * Please note that SimpleXML does not provide such a functionality but since it is faster
  * and more handy to parse few XML nodes, it is used in combination with XMLReader for that purpose.
  *
  * @return void
  * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read
  */
 public function extractSharedStrings()
 {
     $xmlReader = new XMLReader();
     $sharedStringIndex = 0;
     /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
     $escaper = \Box\Spout\Common\Escaper\XLSX::getInstance();
     $sharedStringsFilePath = $this->getSharedStringsFilePath();
     if ($xmlReader->open($sharedStringsFilePath) === false) {
         throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".');
     }
     try {
         $sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader);
         $this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount);
         $xmlReader->readUntilNodeFound('si');
         while ($xmlReader->name === 'si') {
             $node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader);
             $node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML);
             // removes nodes that should not be read, like the pronunciation of the Kanji characters
             $cleanNode = $this->removeSuperfluousTextNodes($node);
             // find all text nodes "t"; there can be multiple if the cell contains formatting
             $textNodes = $cleanNode->xpath('//ns:t');
             $textValue = '';
             foreach ($textNodes as $nodeIndex => $textNode) {
                 if ($nodeIndex !== 0) {
                     // add a space between each "t" node
                     $textValue .= ' ';
                 }
                 if ($this->shouldPreserveWhitespace($textNode)) {
                     $textValue .= $textNode->__toString();
                 } else {
                     $textValue .= trim($textNode->__toString());
                 }
             }
             $unescapedTextValue = $escaper->unescape($textValue);
             $this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex);
             $sharedStringIndex++;
             // jump to the next 'si' tag
             $xmlReader->next('si');
         }
     } catch (XMLProcessingException $exception) {
         throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$exception->getMessage()}]");
     }
     $this->cachingStrategy->closeCache();
     $xmlReader->close();
 }
Beispiel #3
0
 /**
  * Reads the styles.xml file and extract the relevant information from the file.
  *
  * @return void
  */
 protected function extractRelevantInfo()
 {
     $this->customNumberFormats = [];
     $this->stylesAttributes = [];
     $stylesXmlFilePath = $this->filePath . '#' . self::STYLES_XML_FILE_PATH;
     $xmlReader = new XMLReader();
     if ($xmlReader->open('zip://' . $stylesXmlFilePath)) {
         while ($xmlReader->read()) {
             if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)) {
                 $numFmtsNode = new SimpleXMLElement($xmlReader->readOuterXml());
                 $this->extractNumberFormats($numFmtsNode);
             } else {
                 if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL_XFS)) {
                     $cellXfsNode = new SimpleXMLElement($xmlReader->readOuterXml());
                     $this->extractStyleAttributes($cellXfsNode);
                 }
             }
         }
         $xmlReader->close();
     }
 }
 /**
  * @param string $fileName
  * @return \DOMNode[]
  */
 private function getCellElementsFromSheetXmlFile($fileName)
 {
     $cellElements = [];
     $resourcePath = $this->getGeneratedResourcePath($fileName);
     $pathToStylesXmlFile = $resourcePath . '#xl/worksheets/sheet1.xml';
     $xmlReader = new \XMLReader();
     $xmlReader->open('zip://' . $pathToStylesXmlFile);
     while ($xmlReader->read()) {
         if ($xmlReader->nodeType === \XMLReader::ELEMENT && $xmlReader->name === 'c') {
             $cellElements[] = $xmlReader->expand();
         }
     }
     return $cellElements;
 }
 /**
  * @param string $fileName
  * @param string $section
  * @return \DomElement
  */
 private function getXmlSectionFromStylesXmlFile($fileName, $section)
 {
     $resourcePath = $this->getGeneratedResourcePath($fileName);
     $pathToStylesXmlFile = $resourcePath . '#styles.xml';
     $xmlReader = new XMLReader();
     $xmlReader->open('zip://' . $pathToStylesXmlFile);
     $xmlReader->readUntilNodeFound($section);
     return $xmlReader->expand();
 }
Beispiel #6
0
 /**
  * @param string $fileName
  * @param int $sheetIndex
  * @return XMLReader
  */
 private function moveReaderToCorrectTableNode($fileName, $sheetIndex)
 {
     $resourcePath = $this->getGeneratedResourcePath($fileName);
     $pathToSheetFile = $resourcePath . '#content.xml';
     $xmlReader = new XMLReader();
     $xmlReader->open('zip://' . $pathToSheetFile);
     $xmlReader->readUntilNodeFound('table:table');
     for ($i = 1; $i < $sheetIndex; $i++) {
         $xmlReader->readUntilNodeFound('table:table');
     }
     return $xmlReader;
 }