Beispiel #1
0
 /**
  * Return arrays of data from an xml.
  *
  * @param SimpleXML $xml
  * @return array Cleaned array of arrays of data.
  */
 protected function _getArraysFromXml($xml)
 {
     // The content cannot be get directly, because extra spaces are encoded
     // and end of lines are needed. So some processes are needed.
     $xml->registerXPathNamespace('office', 'urn:oasis:names:tc:opendocument:xmlns:office:1.0');
     $xml->registerXPathNamespace('table', 'urn:oasis:names:tc:opendocument:xmlns:table:1.0');
     $xml->registerXPathNamespace('text', 'urn:oasis:names:tc:opendocument:xmlns:text:1.0');
     $arrays = array();
     $xpath = '/office:document-content/office:body/office:spreadsheet/table:table';
     $tables = $xml->xpath($xpath);
     foreach ($tables as $table) {
         $array = array();
         $table->registerXPathNamespace('table', 'urn:oasis:names:tc:opendocument:xmlns:table:1.0');
         $table->registerXPathNamespace('text', 'urn:oasis:names:tc:opendocument:xmlns:text:1.0');
         $xpath = 'table:table-row';
         $rows = $table->xpath($xpath);
         foreach ($rows as $row) {
             $currentRow = array();
             $row->registerXPathNamespace('table', 'urn:oasis:names:tc:opendocument:xmlns:table:1.0');
             $row->registerXPathNamespace('text', 'urn:oasis:names:tc:opendocument:xmlns:text:1.0');
             $xpath = '@table:number-rows-repeated';
             $repeatedRows = $row->xpath($xpath);
             $repeatedRows = $repeatedRows ? (int) reset($repeatedRows) : 1;
             $xpath = 'table:table-cell';
             $cells = $row->xpath($xpath);
             foreach ($cells as $cell) {
                 $text = '';
                 $cell->registerXPathNamespace('table', 'urn:oasis:names:tc:opendocument:xmlns:table:1.0');
                 $cell->registerXPathNamespace('text', 'urn:oasis:names:tc:opendocument:xmlns:text:1.0');
                 $xpath = '@table:number-columns-repeated';
                 $repeatedColumns = $cell->xpath($xpath);
                 $repeatedColumns = $repeatedColumns ? (int) reset($repeatedColumns) : 1;
                 // TODO Convert encoded spaces (<text:s text:c="2"/>) to true spaces.
                 // TODO Convert styles into html (via xsl or ods 2 html).
                 // All cells are paragraphs, even numbers.
                 $xpath = 'text:p';
                 $paragraphs = $cell->xpath($xpath);
                 foreach ($paragraphs as $paragraph) {
                     // __toString() is not used, because there can be
                     // sub-elements.
                     $text .= strip_tags($paragraph->saveXML());
                     $text .= $this->_endOfLine;
                 }
                 $text = trim($text);
                 for ($i = 1; $i <= $repeatedColumns; $i++) {
                     $currentRow[] = $text;
                 }
             }
             for ($i = 1; $i <= $repeatedRows; $i++) {
                 $array[] = $currentRow;
             }
         }
         $arrays[] = $array;
     }
     return $arrays;
 }
 /**
  * Return the raw text from an xml.
  *
  * @param SimpleXML $xml
  * @return string Cleaned raw text.
  */
 protected function _getRawTextFromXml($xml)
 {
     $content = '';
     // The content cannot be get directly, because extra spaces are encoded
     // and end of lines are needed. So some processes are needed.
     $xml->registerXPathNamespace('office', 'urn:oasis:names:tc:opendocument:xmlns:office:1.0');
     $xml->registerXPathNamespace('text', 'urn:oasis:names:tc:opendocument:xmlns:text:1.0');
     // TODO Convert encoded spaces (<text:s text:c="2"/>) to true spaces.
     $xpath = '/office:document-content/office:body/office:text/text:p';
     $paragraphs = $xml->xpath($xpath);
     foreach ($paragraphs as $paragraph) {
         // $paragraph->__toString() is not used, because there can be
         // sub-elements.
         $content .= strip_tags($paragraph->saveXML());
         $content .= $this->_endOfLine;
     }
     return $content;
 }