/** * Detect if document pointed to by $ps_filepath is a valid Word, Excel or PowerPoint XML (OpenOffice) document. * * @param string $ps_filepath The path to the file to analyze * @param string $ps_sig The signature (first 9 bytes) of the file * @return string WORD if the document is a Word doc, EXCEL if the document is an Excel doc, PPT if it is a PowerPoint doc or boolean false if it's not a valid Word or Excel XML (OpenOffice) file */ private function isWordExcelorPPTXMLdoc($ps_filepath, $ps_sig) { if (substr($ps_sig, 0, 2) == 'PK') { $o_unzip = new UnZipFile($ps_filepath); if (is_array($va_list = $o_unzip->getFileList())) { foreach ($va_list as $vs_file => $vn_size) { if (substr($vs_file, 0, 5) == 'word/') { try { $o_doc = Zend_Search_Lucene_Document_Docx::loadDocxFile($ps_filepath); $this->opa_metadata = array('WORD' => array('title' => $o_doc->getFieldUtf8Value('title'), 'subject' => $o_doc->getFieldUtf8Value('subject'), 'creator' => $o_doc->getFieldUtf8Value('creator'), 'created' => $o_doc->getFieldUtf8Value('created'), 'modified' => $o_doc->getFieldUtf8Value('modified'))); $this->handle['content'] = $o_doc->getFieldUtf8Value('body'); } catch (Exception $e) { // noop } return 'WORD'; } if (substr($vs_file, 0, 3) == 'xl/') { try { $o_doc = Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($ps_filepath); $this->opa_metadata = array('EXCEL' => array('title' => $o_doc->getFieldUtf8Value('title'), 'creator' => $o_doc->getFieldUtf8Value('creator'), 'created' => $o_doc->getFieldUtf8Value('created'), 'modified' => $o_doc->getFieldUtf8Value('modified'))); $this->handle['content'] = $o_doc->getFieldUtf8Value('body'); } catch (Exception $e) { // noop } return 'EXCEL'; } if (substr($vs_file, 0, 4) == 'ppt/') { try { $o_doc = Zend_Search_Lucene_Document_Pptx::loadPptxFile($ps_filepath); $this->opa_metadata = array('PPT' => array('title' => $o_doc->getFieldUtf8Value('title'), 'creator' => $o_doc->getFieldUtf8Value('creator'), 'created' => $o_doc->getFieldUtf8Value('created'), 'modified' => $o_doc->getFieldUtf8Value('modified'))); $this->handle['content'] = $o_doc->getFieldUtf8Value('body'); } catch (Exception $e) { // noop } return 'PPT'; } } } return false; } return false; }