/** * Filter a single element * * @param DOMElement $element * @return void */ public function filterElement(DOMElement $element) { // Only decorate non-empty paragraphs if (trim($element->textContent) && $element->getProperty('type') === false) { $element->setProperty('type', 'para'); } }
/** * Filter a single element * * @param DOMElement $element * @return void */ public function filterElement(DOMElement $element) { if ($element->hasAttribute('rowspan') && $element->getAttribute('rowspan') > 1) { $attributes = $element->getProperty('attributes'); $attributes['morerows'] = $element->getAttribute('rowspan') - 1; $element->setProperty('attributes', $attributes); } // @todo: Handle colspan, too - even it is quite complex to express in // docbook. }
/** * Filter a single element. * * @param DOMElement $element * @return void */ public function filterElement(DOMElement $element) { $element->setProperty('type', 'ulink'); $attrs = $element->getProperty('attributes'); if (!is_array($attrs)) { $attrs = array(); } // @todo: Can we convert more attributes here? Maybe <ulink type="…"/>? $attrs['url'] = $element->getAttributeNS(ezcDocumentOdt::NS_XLINK, 'href'); $element->setProperty('attributes', $attrs); }
/** * Filter a single element. * * @param DOMElement $element * @return void */ public function filterElement(DOMElement $element) { $element->setProperty('type', 'footnote'); $citations = $element->getElementsByTagNameNS(ezcDocumentOdt::NS_ODT_TEXT, 'note-citation'); // Should be only 1, foreach to remove all foreach ($citations as $cite) { $attrs = $element->getProperty('attributes'); if ($attrs === false) { $attrs = array(); } $attrs['label'] = $cite->nodeValue; $element->setProperty('attributes', $attrs); $element->removeChild($cite); } }
/** * Shows a string representation of the current node. * * Is only there for debugging purposes * * @param DOMElement $element * @param bool $newLine * @access private */ protected function showCurrentNode(DOMElement $element, $newLine = true) { if ($element->parentNode && $element->parentNode instanceof DOMElement) { $this->showCurrentNode($element->parentNode, false); } echo '> ', $element->tagName; if ($element->getProperty('type') !== false) { echo ' (', $element->getProperty('type'), ')'; } echo $newLine ? "\n" : ' '; }
/** * Calculate content factors * * Try to calculate some kind of probability for each node in the document, * that the respective node is the root of the actual document content. * * @param DOMElement $element * @return float */ protected function calculateContentFactors(DOMElement $element) { $textLength = 0; $childElements = 0; $childFactors = 0; $childTypeBonus = 0; foreach ($element->childNodes as $child) { switch ($child->nodeType) { case XML_ELEMENT_NODE: ++$childElements; $childFactors += $this->calculateContentFactors($child); if (isset($this->bonus[$child->tagName])) { $childTypeBonus += $this->bonus[$child->tagName]; } break; case XML_TEXT_NODE: $textLength += strlen(trim($child->wholeText)); break; } } // Use an exponential metric on text amount. $textFactor = max(1, pow($textLength / 50, 4)); $factor = $textFactor * (($childFactors + $childTypeBonus) / max(1, abs(10 - $childElements))); if ($factor > $this->maximumImportance && $element->getProperty('type') === 'section') { $this->maximumImportance = $factor; $this->mostImportantNode = $element; } // $attributes = $element->getProperty( 'attributes' ); // $attributes['factor'] = $factor; // $element->setProperty( 'attributes', $attributes ); return $factor; }
/** * Recursively transform annotated ODT elements to docbook * * @param DOMElement $odt * @param DOMElement $docbook * @param bool $significantWhitespace * @return void */ protected function transformToDocbook(DOMElement $odt, DOMElement $docbook, $significantWhitespace = false) { if (($spaces = $odt->getProperty('spaces')) !== false) { $docbook->appendChild(new DOMText($spaces)); } if (($tagName = $odt->getProperty('type')) !== false) { $node = new DOMElement($tagName); $docbook->appendChild($node); $docbook = $node; if (($attributes = $odt->getProperty('attributes')) !== false) { foreach ($attributes as $name => $value) { $node->setAttribute($name, $value); } } } $numChildren = $odt->childNodes->length; for ($i = 0; $i < $numChildren; ++$i) { $child = $odt->childNodes->item($i); switch ($child->nodeType) { case XML_ELEMENT_NODE: $this->transformToDocbook($child, $docbook, $significantWhitespace || $odt->getProperty('whitespace') === 'significant'); break; case XML_TEXT_NODE: $docbook->appendChild(new DOMText($child->data)); break; case XML_CDATA_SECTION_NODE: $docbook->appendChild($docbook->ownerDocument->createCDATASection($child->data)); break; // case XML_ENTITY_NODE: // Seems not required, as entities in the source document // are automatically transformed back to their text // targets. // break; // case XML_ENTITY_NODE: // Seems not required, as entities in the source document // are automatically transformed back to their text // targets. // break; case XML_COMMENT_NODE: $comment = new DOMElement('comment', $child->data); $docbook->appendChild($comment); break; } } }
/** * Recursively transform annotated XHtml elements to docbook * * @param DOMElement $xhtml * @param DOMElement $docbook * @param bool $significantWhitespace * @return void */ protected function transformToDocbook(DOMElement $xhtml, DOMElement $docbook, $significantWhitespace = false) { if (($tagName = $xhtml->getProperty('type')) !== false) { $node = new DOMElement($tagName); $docbook->appendChild($node); $docbook = $node; if (($attributes = $xhtml->getProperty('attributes')) !== false) { foreach ($attributes as $name => $value) { $node->setAttribute($name, htmlspecialchars($value)); } } } foreach ($xhtml->childNodes as $child) { switch ($child->nodeType) { case XML_ELEMENT_NODE: $this->transformToDocbook($child, $docbook, $significantWhitespace || $xhtml->getProperty('whitespace') === 'significant'); break; case XML_TEXT_NODE: // Skip pure whitespace text nodes, except for // intentionally converted <br> elements. if (trim($text = $child->data) === '' && !$significantWhitespace && $xhtml->getProperty('whitespace') !== 'significant') { continue; } if ($xhtml->getProperty('whitespace') === 'significant' || $significantWhitespace) { // Don't normalize inside nodes with significant whitespaces. $text = new DOMText($text); $docbook->appendChild($text); } else { if ($this->isInlineElement($docbook)) { $text = new DOMText(preg_replace('(\\s+)', ' ', $text)); $docbook->appendChild($text); } else { // Wrap contents into a paragraph, if we are yet // outside of an inline element. $text = new DOMText(trim(preg_replace('(\\s+)', ' ', $text))); $para = $docbook->ownerDocument->createElement('para'); $para->appendChild($text); $docbook->appendChild($para); } } break; case XML_CDATA_SECTION_NODE: // $data = new DOMCharacterData(); // $data->appendData( $child->data ); // $docbook->appendChild( $data ); break; case XML_ENTITY_NODE: // Seems not required, as entities in the source document // are automatically transformed back to their text // targets. break; case XML_COMMENT_NODE: // Ignore comments break; $comment = new DOMElement('comment', $child->data); $docbook->appendChild($comment); break; } } }