/** * @param SharedStringsHelper $sharedStringsHelper Helper to work with shared strings * @param StyleHelper $styleHelper Helper to work with styles * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings */ public function __construct($sharedStringsHelper, $styleHelper, $shouldFormatDates) { $this->sharedStringsHelper = $sharedStringsHelper; $this->styleHelper = $styleHelper; $this->shouldFormatDates = $shouldFormatDates; /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ $this->escaper = \Box\Spout\Common\Escaper\XLSX::getInstance(); }
/** * Returns an instance of a sheet, given the XML node describing the sheet - from "workbook.xml". * We can find the XML file path describing the sheet inside "workbook.xml.res", by mapping with the sheet ID * ("r:id" in "workbook.xml", "Id" in "workbook.xml.res"). * * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReaderOnSheetNode XML Reader instance, pointing on the node describing the sheet, as defined in "workbook.xml" * @param int $sheetIndexZeroBased Index of the sheet, based on order of appearance in the workbook (zero-based) * @return \Box\Spout\Reader\XLSX\Sheet Sheet instance */ protected function getSheetFromSheetXMLNode($xmlReaderOnSheetNode, $sheetIndexZeroBased) { $sheetId = $xmlReaderOnSheetNode->getAttribute('r:id'); $escapedSheetName = $xmlReaderOnSheetNode->getAttribute('name'); /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ $escaper = \Box\Spout\Common\Escaper\XLSX::getInstance(); $sheetName = $escaper->unescape($escapedSheetName); $sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId); return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->shouldFormatDates, $sheetIndexZeroBased, $sheetName); }
/** * @param \Box\Spout\Writer\Common\Sheet $externalSheet The associated "external" sheet * @param string $worksheetFilesFolder Temporary folder where the files to create the XLSX will be stored * @param \Box\Spout\Writer\XLSX\Helper\SharedStringsHelper $sharedStringsHelper Helper for shared strings * @param bool $shouldUseInlineStrings Whether inline or shared strings should be used * @throws \Box\Spout\Common\Exception\IOException If the sheet data file cannot be opened for writing */ public function __construct($externalSheet, $worksheetFilesFolder, $sharedStringsHelper, $shouldUseInlineStrings) { $this->externalSheet = $externalSheet; $this->sharedStringsHelper = $sharedStringsHelper; $this->shouldUseInlineStrings = $shouldUseInlineStrings; /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ $this->stringsEscaper = \Box\Spout\Common\Escaper\XLSX::getInstance(); $this->worksheetFilePath = $worksheetFilesFolder . '/' . strtolower($this->externalSheet->getName()) . '.xml'; $this->startSheet(); }
/** * @param string $xlFolder Path to the "xl" folder */ public function __construct($xlFolder) { $sharedStringsFilePath = $xlFolder . '/' . self::SHARED_STRINGS_FILE_NAME; $this->sharedStringsFilePointer = fopen($sharedStringsFilePath, 'w'); $this->throwIfSharedStringsFilePointerIsNotAvailable(); // the headers is split into different parts so that we can fseek and put in the correct count and uniqueCount later $header = self::SHARED_STRINGS_XML_FILE_FIRST_PART_HEADER . ' ' . self::DEFAULT_STRINGS_COUNT_PART . '>'; fwrite($this->sharedStringsFilePointer, $header); /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ $this->stringsEscaper = \Box\Spout\Common\Escaper\XLSX::getInstance(); }
/** * Builds an in-memory array containing all the shared strings of the sheet. * All the strings are stored in a XML file, located at 'xl/sharedStrings.xml'. * It is then accessed by the sheet data, via the string index in the built table. * * More documentation available here: http://msdn.microsoft.com/en-us/library/office/gg278314.aspx * * The XML file can be really big with sheets containing a lot of data. That is why * we need to use a XML reader that provides streaming like the XMLReader library. * Please note that SimpleXML does not provide such a functionality but since it is faster * and more handy to parse few XML nodes, it is used in combination with XMLReader for that purpose. * * @return void * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read */ public function extractSharedStrings() { $xmlReader = new XMLReader(); $sharedStringIndex = 0; /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ $escaper = \Box\Spout\Common\Escaper\XLSX::getInstance(); $sharedStringsFilePath = $this->getSharedStringsFilePath(); if ($xmlReader->open($sharedStringsFilePath) === false) { throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".'); } try { $sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader); $this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount); $xmlReader->readUntilNodeFound('si'); while ($xmlReader->name === 'si') { $node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader); $node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML); // removes nodes that should not be read, like the pronunciation of the Kanji characters $cleanNode = $this->removeSuperfluousTextNodes($node); // find all text nodes "t"; there can be multiple if the cell contains formatting $textNodes = $cleanNode->xpath('//ns:t'); $textValue = ''; foreach ($textNodes as $nodeIndex => $textNode) { if ($nodeIndex !== 0) { // add a space between each "t" node $textValue .= ' '; } if ($this->shouldPreserveWhitespace($textNode)) { $textValue .= $textNode->__toString(); } else { $textValue .= trim($textNode->__toString()); } } $unescapedTextValue = $escaper->unescape($textValue); $this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex); $sharedStringIndex++; // jump to the next 'si' tag $xmlReader->next('si'); } } catch (XMLProcessingException $exception) { throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$exception->getMessage()}]"); } $this->cachingStrategy->closeCache(); $xmlReader->close(); }
/** * Processes the shared strings item XML node which the given XML reader is positioned on. * * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader * @param int $sharedStringIndex Index of the processed shared strings item * @param \Box\Spout\Common\Escaper\XLSX $escaper Helper to escape values * @return void */ protected function processSharedStringsItem($xmlReader, $sharedStringIndex, $escaper) { $node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader); $node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML); // removes nodes that should not be read, like the pronunciation of the Kanji characters $cleanNode = $this->removeSuperfluousTextNodes($node); // find all text nodes "t"; there can be multiple if the cell contains formatting $textNodes = $cleanNode->xpath('//ns:t'); $textValue = $this->extractTextValueForNodes($textNodes); $unescapedTextValue = $escaper->unescape($textValue); $this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex); }
/** * Creates the "workbook.xml" file under the "xl" folder * * @param Worksheet[] $worksheets * @return FileSystemHelper */ public function createWorkbookFile($worksheets) { $workbookXmlFileContents = <<<EOD <?xml version="1.0" encoding="UTF-8" standalone="yes"?> <workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"> <sheets> EOD; /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ $escaper = \Box\Spout\Common\Escaper\XLSX::getInstance(); /** @var Worksheet $worksheet */ foreach ($worksheets as $worksheet) { $worksheetName = $worksheet->getExternalSheet()->getName(); $worksheetId = $worksheet->getId(); $workbookXmlFileContents .= '<sheet name="' . $escaper->escape($worksheetName) . '" sheetId="' . $worksheetId . '" r:id="rIdSheet' . $worksheetId . '"/>'; } $workbookXmlFileContents .= <<<EOD </sheets> </workbook> EOD; $this->createFileWithContents($this->xlFolder, self::WORKBOOK_XML_FILE_NAME, $workbookXmlFileContents); return $this; }
/** * Returns the cell String value, where string is stored in value node. * * @param string $nodeValue * @param \Box\Spout\Common\Escaper\XLSX $escaper * @return string The value associated with the cell (null when the cell has an error) */ protected function formatStrCellValue($nodeValue, $escaper) { $escapedCellValue = trim($nodeValue); $cellValue = $escaper->unescape($escapedCellValue); return $cellValue; }