Example #1
0
 /**
  * Filter a single element
  * 
  * @param DOMElement $element 
  * @return void
  */
 public function filterElement(DOMElement $element)
 {
     // Only decorate non-empty paragraphs
     if (trim($element->textContent) && $element->getProperty('type') === false) {
         $element->setProperty('type', 'para');
     }
 }
Example #2
0
 /**
  * Filter a single element
  *
  * @param DOMElement $element
  * @return void
  */
 public function filterElement(DOMElement $element)
 {
     if ($element->hasAttribute('rowspan') && $element->getAttribute('rowspan') > 1) {
         $attributes = $element->getProperty('attributes');
         $attributes['morerows'] = $element->getAttribute('rowspan') - 1;
         $element->setProperty('attributes', $attributes);
     }
     // @todo: Handle colspan, too - even it is quite complex to express in
     // docbook.
 }
Example #3
0
 /**
  * Filter a single element.
  *
  * @param DOMElement $element
  * @return void
  */
 public function filterElement(DOMElement $element)
 {
     $element->setProperty('type', 'ulink');
     $attrs = $element->getProperty('attributes');
     if (!is_array($attrs)) {
         $attrs = array();
     }
     // @todo: Can we convert more attributes here? Maybe <ulink type="…"/>?
     $attrs['url'] = $element->getAttributeNS(ezcDocumentOdt::NS_XLINK, 'href');
     $element->setProperty('attributes', $attrs);
 }
Example #4
0
 /**
  * Filter a single element.
  *
  * @param DOMElement $element
  * @return void
  */
 public function filterElement(DOMElement $element)
 {
     $element->setProperty('type', 'footnote');
     $citations = $element->getElementsByTagNameNS(ezcDocumentOdt::NS_ODT_TEXT, 'note-citation');
     // Should be only 1, foreach to remove all
     foreach ($citations as $cite) {
         $attrs = $element->getProperty('attributes');
         if ($attrs === false) {
             $attrs = array();
         }
         $attrs['label'] = $cite->nodeValue;
         $element->setProperty('attributes', $attrs);
         $element->removeChild($cite);
     }
 }
Example #5
0
 /**
  * Shows a string representation of the current node.
  *
  * Is only there for debugging purposes
  * 
  * @param DOMElement $element 
  * @param bool $newLine
  * @access private
  */
 protected function showCurrentNode(DOMElement $element, $newLine = true)
 {
     if ($element->parentNode && $element->parentNode instanceof DOMElement) {
         $this->showCurrentNode($element->parentNode, false);
     }
     echo '> ', $element->tagName;
     if ($element->getProperty('type') !== false) {
         echo ' (', $element->getProperty('type'), ')';
     }
     echo $newLine ? "\n" : ' ';
 }
 /**
  * Calculate content factors
  *
  * Try to calculate some kind of probability for each node in the document,
  * that the respective node is the root of the actual document content.
  *
  * @param DOMElement $element
  * @return float
  */
 protected function calculateContentFactors(DOMElement $element)
 {
     $textLength = 0;
     $childElements = 0;
     $childFactors = 0;
     $childTypeBonus = 0;
     foreach ($element->childNodes as $child) {
         switch ($child->nodeType) {
             case XML_ELEMENT_NODE:
                 ++$childElements;
                 $childFactors += $this->calculateContentFactors($child);
                 if (isset($this->bonus[$child->tagName])) {
                     $childTypeBonus += $this->bonus[$child->tagName];
                 }
                 break;
             case XML_TEXT_NODE:
                 $textLength += strlen(trim($child->wholeText));
                 break;
         }
     }
     // Use an exponential metric on text amount.
     $textFactor = max(1, pow($textLength / 50, 4));
     $factor = $textFactor * (($childFactors + $childTypeBonus) / max(1, abs(10 - $childElements)));
     if ($factor > $this->maximumImportance && $element->getProperty('type') === 'section') {
         $this->maximumImportance = $factor;
         $this->mostImportantNode = $element;
     }
     //        $attributes = $element->getProperty( 'attributes' );
     //        $attributes['factor'] = $factor;
     //        $element->setProperty( 'attributes', $attributes );
     return $factor;
 }
Example #7
0
File: odt.php Project: bmdevel/ezc
 /**
  * Recursively transform annotated ODT elements to docbook
  *
  * @param DOMElement $odt
  * @param DOMElement $docbook
  * @param bool $significantWhitespace
  * @return void
  */
 protected function transformToDocbook(DOMElement $odt, DOMElement $docbook, $significantWhitespace = false)
 {
     if (($spaces = $odt->getProperty('spaces')) !== false) {
         $docbook->appendChild(new DOMText($spaces));
     }
     if (($tagName = $odt->getProperty('type')) !== false) {
         $node = new DOMElement($tagName);
         $docbook->appendChild($node);
         $docbook = $node;
         if (($attributes = $odt->getProperty('attributes')) !== false) {
             foreach ($attributes as $name => $value) {
                 $node->setAttribute($name, $value);
             }
         }
     }
     $numChildren = $odt->childNodes->length;
     for ($i = 0; $i < $numChildren; ++$i) {
         $child = $odt->childNodes->item($i);
         switch ($child->nodeType) {
             case XML_ELEMENT_NODE:
                 $this->transformToDocbook($child, $docbook, $significantWhitespace || $odt->getProperty('whitespace') === 'significant');
                 break;
             case XML_TEXT_NODE:
                 $docbook->appendChild(new DOMText($child->data));
                 break;
             case XML_CDATA_SECTION_NODE:
                 $docbook->appendChild($docbook->ownerDocument->createCDATASection($child->data));
                 break;
                 // case XML_ENTITY_NODE:
                 // Seems not required, as entities in the source document
                 // are automatically transformed back to their text
                 // targets.
                 // break;
             // case XML_ENTITY_NODE:
             // Seems not required, as entities in the source document
             // are automatically transformed back to their text
             // targets.
             // break;
             case XML_COMMENT_NODE:
                 $comment = new DOMElement('comment', $child->data);
                 $docbook->appendChild($comment);
                 break;
         }
     }
 }
Example #8
0
 /**
  * Recursively transform annotated XHtml elements to docbook
  *
  * @param DOMElement $xhtml
  * @param DOMElement $docbook
  * @param bool $significantWhitespace
  * @return void
  */
 protected function transformToDocbook(DOMElement $xhtml, DOMElement $docbook, $significantWhitespace = false)
 {
     if (($tagName = $xhtml->getProperty('type')) !== false) {
         $node = new DOMElement($tagName);
         $docbook->appendChild($node);
         $docbook = $node;
         if (($attributes = $xhtml->getProperty('attributes')) !== false) {
             foreach ($attributes as $name => $value) {
                 $node->setAttribute($name, htmlspecialchars($value));
             }
         }
     }
     foreach ($xhtml->childNodes as $child) {
         switch ($child->nodeType) {
             case XML_ELEMENT_NODE:
                 $this->transformToDocbook($child, $docbook, $significantWhitespace || $xhtml->getProperty('whitespace') === 'significant');
                 break;
             case XML_TEXT_NODE:
                 // Skip pure whitespace text nodes, except for
                 // intentionally converted <br> elements.
                 if (trim($text = $child->data) === '' && !$significantWhitespace && $xhtml->getProperty('whitespace') !== 'significant') {
                     continue;
                 }
                 if ($xhtml->getProperty('whitespace') === 'significant' || $significantWhitespace) {
                     // Don't normalize inside nodes with significant whitespaces.
                     $text = new DOMText($text);
                     $docbook->appendChild($text);
                 } else {
                     if ($this->isInlineElement($docbook)) {
                         $text = new DOMText(preg_replace('(\\s+)', ' ', $text));
                         $docbook->appendChild($text);
                     } else {
                         // Wrap contents into a paragraph, if we are yet
                         // outside of an inline element.
                         $text = new DOMText(trim(preg_replace('(\\s+)', ' ', $text)));
                         $para = $docbook->ownerDocument->createElement('para');
                         $para->appendChild($text);
                         $docbook->appendChild($para);
                     }
                 }
                 break;
             case XML_CDATA_SECTION_NODE:
                 //                    $data = new DOMCharacterData();
                 //                    $data->appendData( $child->data );
                 //                    $docbook->appendChild( $data );
                 break;
             case XML_ENTITY_NODE:
                 // Seems not required, as entities in the source document
                 // are automatically transformed back to their text
                 // targets.
                 break;
             case XML_COMMENT_NODE:
                 // Ignore comments
                 break;
                 $comment = new DOMElement('comment', $child->data);
                 $docbook->appendChild($comment);
                 break;
         }
     }
 }