/** * Detect if document pointed to by $ps_filepath is a valid Word, Excel or PowerPoint XML (OpenOffice) document. * * @param string $ps_filepath The path to the file to analyze * @param string $ps_sig The signature (first 9 bytes) of the file * @return string WORD if the document is a Word doc, EXCEL if the document is an Excel doc, PPT if it is a PowerPoint doc or boolean false if it's not a valid Word or Excel XML (OpenOffice) file */ private function isWordExcelorPPTXMLdoc($ps_filepath, $ps_sig) { if (substr($ps_sig, 0, 2) == 'PK') { $o_unzip = new UnZipFile($ps_filepath); if (is_array($va_list = $o_unzip->getFileList())) { foreach ($va_list as $vs_file => $vn_size) { if (substr($vs_file, 0, 5) == 'word/') { try { $o_doc = Zend_Search_Lucene_Document_Docx::loadDocxFile($ps_filepath); $this->opa_metadata = array('WORD' => array('title' => $o_doc->getFieldUtf8Value('title'), 'subject' => $o_doc->getFieldUtf8Value('subject'), 'creator' => $o_doc->getFieldUtf8Value('creator'), 'created' => $o_doc->getFieldUtf8Value('created'), 'modified' => $o_doc->getFieldUtf8Value('modified'))); $this->handle['content'] = $o_doc->getFieldUtf8Value('body'); } catch (Exception $e) { // noop } return 'WORD'; } if (substr($vs_file, 0, 3) == 'xl/') { try { $o_doc = Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($ps_filepath); $this->opa_metadata = array('EXCEL' => array('title' => $o_doc->getFieldUtf8Value('title'), 'creator' => $o_doc->getFieldUtf8Value('creator'), 'created' => $o_doc->getFieldUtf8Value('created'), 'modified' => $o_doc->getFieldUtf8Value('modified'))); $this->handle['content'] = $o_doc->getFieldUtf8Value('body'); } catch (Exception $e) { // noop } return 'EXCEL'; } if (substr($vs_file, 0, 4) == 'ppt/') { try { $o_doc = Zend_Search_Lucene_Document_Pptx::loadPptxFile($ps_filepath); $this->opa_metadata = array('PPT' => array('title' => $o_doc->getFieldUtf8Value('title'), 'creator' => $o_doc->getFieldUtf8Value('creator'), 'created' => $o_doc->getFieldUtf8Value('created'), 'modified' => $o_doc->getFieldUtf8Value('modified'))); $this->handle['content'] = $o_doc->getFieldUtf8Value('body'); } catch (Exception $e) { // noop } return 'PPT'; } } } return false; } return false; }
public function parse($ps_filepath) { if (!function_exists("simplexml_load_file")) { return null; } $this->init(); // Is file a KMZ file? $o_unzip = new UnZipFile($ps_filepath); $vs_tmp_dirname = tempnam(caGetTempDirPath(), 'kml'); @unlink($vs_tmp_dirname); @mkdir($vs_tmp_dirname); if ($o_unzip->extract($vs_tmp_dirname, 'doc.kml')) { if (file_exists($vs_tmp_dirname . '/doc.kml')) { $ps_filepath = $vs_tmp_dirname . '/doc.kml'; } else { return false; } } $o_kml = @simplexml_load_file($ps_filepath); if (!$o_kml) { return false; } caRemoveDirectory($vs_tmp_dirname, true); // // Placemarks // $va_namespaces = $o_kml->getNamespaces(true); foreach ($va_namespaces as $vs_prefix => $vs_schema_url) { $o_kml->registerXPathNamespace($vs_prefix ? $vs_prefix : 'g', $vs_schema_url); } $va_placemarks = $o_kml->xpath('//g:Placemark'); $this->opa_filedata['placemarks'] = array(); foreach ($va_placemarks as $va_placemark) { $vs_name = '' . $va_placemark->name[0]; $vs_description = '' . $va_placemark->description[0]; if (isset($va_placemark->Point)) { $vs_coord = $va_placemark->Point->coordinates; $va_tmp = explode(',', $vs_coord); $this->opa_filedata['placemarks'][] = array('name' => $vs_name, 'type' => 'POINT', 'description' => $vs_description, 'latitude' => $va_tmp[1], 'longitude' => $va_tmp[0]); } else { if (isset($va_placemark->LineString) && isset($va_placemark->LineString->coordinates)) { $vs_coords = trim($va_placemark->LineString->coordinates); $va_coord_lines = preg_split("/[ \n\r]+/", $vs_coords); } else { if (isset($va_placemark->Polygon) && isset($va_placemark->Polygon->outerBoundaryIs) && isset($va_placemark->Polygon->outerBoundaryIs->LinearRing) && isset($va_placemark->Polygon->outerBoundaryIs->LinearRing->coordinates)) { $vs_coords = trim($va_placemark->Polygon->outerBoundaryIs->LinearRing->coordinates); $va_coord_lines = preg_split("/[ \n\r]+/", $vs_coords); } } if (sizeof($va_coord_lines) > 0) { $va_coord_list = array(); foreach ($va_coord_lines as $vs_coord_line) { $va_tmp = explode(',', $vs_coord_line); $va_coord_list[] = array('latitude' => $va_tmp[1], 'longitude' => $va_tmp[0]); } $this->opa_filedata['placemarks'][] = array('name' => $vs_name, 'type' => 'PATH', 'description' => $vs_description, 'coordinates' => $va_coord_list); } } } return true; }