/** * Read styles.xml. * * @param \PhpOffice\PhpWord\PhpWord $phpWord * @return void */ public function read(PhpWord $phpWord) { $xmlReader = new XMLReader(); $xmlReader->getDomFromZip($this->docFile, $this->xmlFile); $nodes = $xmlReader->getElements('w:style'); if ($nodes->length > 0) { foreach ($nodes as $node) { $type = $xmlReader->getAttribute('w:type', $node); $name = $xmlReader->getAttribute('w:styleId', $node); if (is_null($name)) { $name = $xmlReader->getAttribute('w:val', $node, 'w:name'); } preg_match('/Heading(\\d)/', $name, $headingMatches); // $default = ($xmlReader->getAttribute('w:default', $node) == 1); switch ($type) { case 'paragraph': $paragraphStyle = $this->readParagraphStyle($xmlReader, $node); $fontStyle = $this->readFontStyle($xmlReader, $node); if (!empty($headingMatches)) { $phpWord->addTitleStyle($headingMatches[1], $fontStyle, $paragraphStyle); } else { if (empty($fontStyle)) { if (is_array($paragraphStyle)) { $phpWord->addParagraphStyle($name, $paragraphStyle); } } else { $phpWord->addFontStyle($name, $fontStyle, $paragraphStyle); } } break; case 'character': $fontStyle = $this->readFontStyle($xmlReader, $node); if (!empty($fontStyle)) { $phpWord->addFontStyle($name, $fontStyle); } break; case 'table': $tStyle = $this->readTableStyle($xmlReader, $node); if (!empty($tStyle)) { $phpWord->addTableStyle($name, $tStyle); } break; } } } }
/** * Read content.xml * * @param \PhpOffice\PhpWord\PhpWord $phpWord */ public function read(PhpWord &$phpWord) { $xmlReader = new XMLReader(); $xmlReader->getDomFromZip($this->docFile, $this->xmlFile); $nodes = $xmlReader->getElements('office:body/office:text/*'); if ($nodes->length > 0) { $section = $phpWord->addSection(); foreach ($nodes as $node) { // $styleName = $xmlReader->getAttribute('text:style-name', $node); switch ($node->nodeName) { case 'text:h': // Heading $depth = $xmlReader->getAttribute('text:outline-level', $node); $section->addTitle($node->nodeValue, $depth); break; case 'text:p': // Paragraph $section->addText($node->nodeValue); break; case 'text:list': // List $listItems = $xmlReader->getElements('text:list-item/text:p', $node); foreach ($listItems as $listItem) { // $listStyleName = $xmlReader->getAttribute('text:style-name', $listItem); $section->addListItem($listItem->nodeValue); } break; } } } }
/** * Read (footnotes|endnotes).xml. * * @param \PhpOffice\PhpWord\PhpWord $phpWord * @return void */ public function read(PhpWord $phpWord) { $getMethod = "get{$this->collection}"; $collection = $phpWord->{$getMethod}()->getItems(); $xmlReader = new XMLReader(); $xmlReader->getDomFromZip($this->docFile, $this->xmlFile); $nodes = $xmlReader->getElements('*'); if ($nodes->length > 0) { foreach ($nodes as $node) { $id = $xmlReader->getAttribute('w:id', $node); $type = $xmlReader->getAttribute('w:type', $node); // Avoid w:type "separator" and "continuationSeparator" // Only look for <footnote> or <endnote> without w:type attribute if (is_null($type) && isset($collection[$id])) { $element = $collection[$id]; $pNodes = $xmlReader->getElements('w:p/*', $node); foreach ($pNodes as $pNode) { $this->readRun($xmlReader, $pNode, $element, $this->collection); } $addMethod = "add{$this->element}"; $phpWord->{$addMethod}($element); } } } }
/** * Read header footer * * @param array $settings * @param \PhpOffice\PhpWord\Element\Section $section */ private function readHeaderFooter($settings, Section &$section) { $readMethods = array('w:p' => 'readParagraph', 'w:tbl' => 'readTable'); if (is_array($settings) && array_key_exists('hf', $settings)) { foreach ($settings['hf'] as $rId => $hfSetting) { if (array_key_exists($rId, $this->rels['document'])) { list($hfType, $xmlFile, $docPart) = array_values($this->rels['document'][$rId]); $addMethod = "add{$hfType}"; $hfObject = $section->{$addMethod}($hfSetting['type']); // Read header/footer content $xmlReader = new XMLReader(); $xmlReader->getDomFromZip($this->docFile, $xmlFile); $nodes = $xmlReader->getElements('*'); if ($nodes->length > 0) { foreach ($nodes as $node) { if (array_key_exists($node->nodeName, $readMethods)) { $readMethod = $readMethods[$node->nodeName]; $this->{$readMethod}($xmlReader, $node, $hfObject, $docPart); } } } } } } }
/** * Read (footnotes|endnotes).xml * * @param \PhpOffice\PhpWord\PhpWord $phpWord */ public function read(PhpWord &$phpWord) { $this->type = $this->type == 'endnotes' ? 'endnotes' : 'footnotes'; $collectionClass = 'PhpOffice\\PhpWord\\' . ucfirst($this->type); $collection = $collectionClass::getElements(); $xmlReader = new XMLReader(); $xmlReader->getDomFromZip($this->docFile, $this->xmlFile); $nodes = $xmlReader->getElements('*'); if ($nodes->length > 0) { foreach ($nodes as $node) { $id = $xmlReader->getAttribute('w:id', $node); $type = $xmlReader->getAttribute('w:type', $node); // Avoid w:type "separator" and "continuationSeparator" // Only look for <footnote> or <endnote> without w:type attribute if (is_null($type) && array_key_exists($id, $collection)) { $element = $collection[$id]; $pNodes = $xmlReader->getElements('w:p/*', $node); foreach ($pNodes as $pNode) { $this->readRun($xmlReader, $pNode, $element, $type); } $collectionClass::setElement($id, $element); } } } }
/** * Read meta.xml * * @param \PhpOffice\PhpWord\PhpWord $phpWord * @todo Process property type */ public function read(PhpWord &$phpWord) { $xmlReader = new XMLReader(); $xmlReader->getDomFromZip($this->docFile, $this->xmlFile); $docProps = $phpWord->getDocumentProperties(); $metaNode = $xmlReader->getElement('office:meta'); // Standard properties $properties = array('title' => 'dc:title', 'subject' => 'dc:subject', 'description' => 'dc:description', 'keywords' => 'meta:keyword', 'creator' => 'meta:initial-creator', 'lastModifiedBy' => 'dc:creator'); foreach ($properties as $property => $path) { $method = "set{$property}"; $propertyNode = $xmlReader->getElement($path, $metaNode); if ($propertyNode !== null && method_exists($docProps, $method)) { $docProps->{$method}($propertyNode->nodeValue); } } // Custom properties $propertyNodes = $xmlReader->getElements('meta:user-defined', $metaNode); foreach ($propertyNodes as $propertyNode) { $property = $xmlReader->getAttribute('meta:name', $propertyNode); // Set category, company, and manager property if (in_array($property, array('Category', 'Company', 'Manager'))) { $method = "set{$property}"; $docProps->{$method}($propertyNode->nodeValue); // Set other custom properties } else { $docProps->setCustomProperty($property, $propertyNode->nodeValue); } } }
/** * Test get element returns null */ public function testGetElementReturnsNull() { $filename = __DIR__ . "/../_files/documents/reader.docx.zip"; $object = new XMLReader(); $object->getDomFromZip($filename, '[Content_Types].xml'); $element = $object->getElements('*')->item(0); $this->assertNull($object->getElement('yadayadaya', $element)); }
/** * Read numbering.xml. * * @param \PhpOffice\PhpWord\PhpWord $phpWord * @return void */ public function read(PhpWord $phpWord) { $abstracts = array(); $numberings = array(); $xmlReader = new XMLReader(); $xmlReader->getDomFromZip($this->docFile, $this->xmlFile); // Abstract numbering definition $nodes = $xmlReader->getElements('w:abstractNum'); if ($nodes->length > 0) { foreach ($nodes as $node) { $abstractId = $xmlReader->getAttribute('w:abstractNumId', $node); $abstracts[$abstractId] = array('levels' => array()); $abstract =& $abstracts[$abstractId]; $subnodes = $xmlReader->getElements('*', $node); foreach ($subnodes as $subnode) { switch ($subnode->nodeName) { case 'w:multiLevelType': $abstract['type'] = $xmlReader->getAttribute('w:val', $subnode); break; case 'w:lvl': $levelId = $xmlReader->getAttribute('w:ilvl', $subnode); $abstract['levels'][$levelId] = $this->readLevel($xmlReader, $subnode, $levelId); break; } } } } // Numbering instance definition $nodes = $xmlReader->getElements('w:num'); if ($nodes->length > 0) { foreach ($nodes as $node) { $numId = $xmlReader->getAttribute('w:numId', $node); $abstractId = $xmlReader->getAttribute('w:val', $node, 'w:abstractNumId'); $numberings[$numId] = $abstracts[$abstractId]; $numberings[$numId]['numId'] = $numId; $subnodes = $xmlReader->getElements('w:lvlOverride/w:lvl', $node); foreach ($subnodes as $subnode) { $levelId = $xmlReader->getAttribute('w:ilvl', $subnode); $overrides = $this->readLevel($xmlReader, $subnode, $levelId); foreach ($overrides as $key => $value) { $numberings[$numId]['levels'][$levelId][$key] = $value; } } } } // Push to Style collection foreach ($numberings as $numId => $numbering) { $phpWord->addNumberingStyle("PHPWordList{$numId}", $numbering); } }
/** * Read all relationship files * * @param string $docFile * @return array */ private function readRelationships($docFile) { $rels = array(); $xmlFile = 'META-INF/manifest.xml'; $xmlReader = new XMLReader(); $xmlReader->getDomFromZip($docFile, $xmlFile); $nodes = $xmlReader->getElements('manifest:file-entry'); foreach ($nodes as $node) { $type = $xmlReader->getAttribute('manifest:media-type', $node); $target = $xmlReader->getAttribute('manifest:full-path', $node); $rels[] = array('type' => $type, 'target' => $target); } return $rels; }
/** * Read custom document properties. * * @param \PhpOffice\PhpWord\PhpWord $phpWord * @return void */ public function read(PhpWord $phpWord) { $xmlReader = new XMLReader(); $xmlReader->getDomFromZip($this->docFile, $this->xmlFile); $docProps = $phpWord->getDocInfo(); $nodes = $xmlReader->getElements('*'); if ($nodes->length > 0) { foreach ($nodes as $node) { $propertyName = $xmlReader->getAttribute('name', $node); $attributeNode = $xmlReader->getElement('*', $node); $attributeType = $attributeNode->nodeName; $attributeValue = $attributeNode->nodeValue; $attributeValue = DocInfo::convertProperty($attributeValue, $attributeType); $attributeType = DocInfo::convertPropertyType($attributeType); $docProps->setCustomProperty($propertyName, $attributeValue, $attributeType); } } }
/** * Read core/extended document properties * * @param \PhpOffice\PhpWord\PhpWord $phpWord */ public function read(PhpWord &$phpWord) { $xmlReader = new XMLReader(); $xmlReader->getDomFromZip($this->docFile, $this->xmlFile); $docProps = $phpWord->getDocumentProperties(); $nodes = $xmlReader->getElements('*'); if ($nodes->length > 0) { foreach ($nodes as $node) { if (!array_key_exists($node->nodeName, $this->mapping)) { continue; } $method = $this->mapping[$node->nodeName]; $value = $node->nodeValue == '' ? null : $node->nodeValue; if (array_key_exists($node->nodeName, $this->callbacks)) { $value = $this->callbacks[$node->nodeName]($value); } if (method_exists($docProps, $method)) { $docProps->{$method}($value); } } } }
/** * Get relationship array * * @param string $docFile * @param string $xmlFile * @param string $targetPrefix * @return array */ private function getRels($docFile, $xmlFile, $targetPrefix = '') { $metaPrefix = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/'; $officePrefix = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/'; $rels = array(); $xmlReader = new XMLReader(); $xmlReader->getDomFromZip($docFile, $xmlFile); $nodes = $xmlReader->getElements('*'); foreach ($nodes as $node) { $rId = $xmlReader->getAttribute('Id', $node); $type = $xmlReader->getAttribute('Type', $node); $target = $xmlReader->getAttribute('Target', $node); // Remove URL prefixes from $type to make it easier to read $type = str_replace($metaPrefix, '', $type); $type = str_replace($officePrefix, '', $type); $docPart = str_replace('.xml', '', $target); // Do not add prefix to link source if (!in_array($type, array('hyperlink'))) { $target = $targetPrefix . $target; } // Push to return array $rels[$rId] = array('type' => $type, 'target' => $target, 'docPart' => $docPart); } ksort($rels); return $rels; }