Esempio n. 1
0
 /**
  * Detect if document pointed to by $ps_filepath is a valid Word, Excel or PowerPoint XML (OpenOffice) document.
  *
  * @param string $ps_filepath The path to the file to analyze
  * @param string $ps_sig The signature (first 9 bytes) of the file
  * @return string WORD if the document is a Word doc, EXCEL if the document is an Excel doc, PPT if it is a PowerPoint doc or boolean false if it's not a valid Word or Excel XML (OpenOffice) file
  */
 private function isWordExcelorPPTXMLdoc($ps_filepath, $ps_sig)
 {
     if (substr($ps_sig, 0, 2) == 'PK') {
         $o_unzip = new UnZipFile($ps_filepath);
         if (is_array($va_list = $o_unzip->getFileList())) {
             foreach ($va_list as $vs_file => $vn_size) {
                 if (substr($vs_file, 0, 5) == 'word/') {
                     try {
                         $o_doc = Zend_Search_Lucene_Document_Docx::loadDocxFile($ps_filepath);
                         $this->opa_metadata = array('WORD' => array('title' => $o_doc->getFieldUtf8Value('title'), 'subject' => $o_doc->getFieldUtf8Value('subject'), 'creator' => $o_doc->getFieldUtf8Value('creator'), 'created' => $o_doc->getFieldUtf8Value('created'), 'modified' => $o_doc->getFieldUtf8Value('modified')));
                         $this->handle['content'] = $o_doc->getFieldUtf8Value('body');
                     } catch (Exception $e) {
                         // noop
                     }
                     return 'WORD';
                 }
                 if (substr($vs_file, 0, 3) == 'xl/') {
                     try {
                         $o_doc = Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($ps_filepath);
                         $this->opa_metadata = array('EXCEL' => array('title' => $o_doc->getFieldUtf8Value('title'), 'creator' => $o_doc->getFieldUtf8Value('creator'), 'created' => $o_doc->getFieldUtf8Value('created'), 'modified' => $o_doc->getFieldUtf8Value('modified')));
                         $this->handle['content'] = $o_doc->getFieldUtf8Value('body');
                     } catch (Exception $e) {
                         // noop
                     }
                     return 'EXCEL';
                 }
                 if (substr($vs_file, 0, 4) == 'ppt/') {
                     try {
                         $o_doc = Zend_Search_Lucene_Document_Pptx::loadPptxFile($ps_filepath);
                         $this->opa_metadata = array('PPT' => array('title' => $o_doc->getFieldUtf8Value('title'), 'creator' => $o_doc->getFieldUtf8Value('creator'), 'created' => $o_doc->getFieldUtf8Value('created'), 'modified' => $o_doc->getFieldUtf8Value('modified')));
                         $this->handle['content'] = $o_doc->getFieldUtf8Value('body');
                     } catch (Exception $e) {
                         // noop
                     }
                     return 'PPT';
                 }
             }
         }
         return false;
     }
     return false;
 }
Esempio n. 2
0
 public function parse($ps_filepath)
 {
     if (!function_exists("simplexml_load_file")) {
         return null;
     }
     $this->init();
     // Is file a KMZ file?
     $o_unzip = new UnZipFile($ps_filepath);
     $vs_tmp_dirname = tempnam(caGetTempDirPath(), 'kml');
     @unlink($vs_tmp_dirname);
     @mkdir($vs_tmp_dirname);
     if ($o_unzip->extract($vs_tmp_dirname, 'doc.kml')) {
         if (file_exists($vs_tmp_dirname . '/doc.kml')) {
             $ps_filepath = $vs_tmp_dirname . '/doc.kml';
         } else {
             return false;
         }
     }
     $o_kml = @simplexml_load_file($ps_filepath);
     if (!$o_kml) {
         return false;
     }
     caRemoveDirectory($vs_tmp_dirname, true);
     //
     // Placemarks
     //
     $va_namespaces = $o_kml->getNamespaces(true);
     foreach ($va_namespaces as $vs_prefix => $vs_schema_url) {
         $o_kml->registerXPathNamespace($vs_prefix ? $vs_prefix : 'g', $vs_schema_url);
     }
     $va_placemarks = $o_kml->xpath('//g:Placemark');
     $this->opa_filedata['placemarks'] = array();
     foreach ($va_placemarks as $va_placemark) {
         $vs_name = '' . $va_placemark->name[0];
         $vs_description = '' . $va_placemark->description[0];
         if (isset($va_placemark->Point)) {
             $vs_coord = $va_placemark->Point->coordinates;
             $va_tmp = explode(',', $vs_coord);
             $this->opa_filedata['placemarks'][] = array('name' => $vs_name, 'type' => 'POINT', 'description' => $vs_description, 'latitude' => $va_tmp[1], 'longitude' => $va_tmp[0]);
         } else {
             if (isset($va_placemark->LineString) && isset($va_placemark->LineString->coordinates)) {
                 $vs_coords = trim($va_placemark->LineString->coordinates);
                 $va_coord_lines = preg_split("/[ \n\r]+/", $vs_coords);
             } else {
                 if (isset($va_placemark->Polygon) && isset($va_placemark->Polygon->outerBoundaryIs) && isset($va_placemark->Polygon->outerBoundaryIs->LinearRing) && isset($va_placemark->Polygon->outerBoundaryIs->LinearRing->coordinates)) {
                     $vs_coords = trim($va_placemark->Polygon->outerBoundaryIs->LinearRing->coordinates);
                     $va_coord_lines = preg_split("/[ \n\r]+/", $vs_coords);
                 }
             }
             if (sizeof($va_coord_lines) > 0) {
                 $va_coord_list = array();
                 foreach ($va_coord_lines as $vs_coord_line) {
                     $va_tmp = explode(',', $vs_coord_line);
                     $va_coord_list[] = array('latitude' => $va_tmp[1], 'longitude' => $va_tmp[0]);
                 }
                 $this->opa_filedata['placemarks'][] = array('name' => $vs_name, 'type' => 'PATH', 'description' => $vs_description, 'coordinates' => $va_coord_list);
             }
         }
     }
     return true;
 }