public function extractTextContent()
 {
     global $default;
     $docId = $this->document->getId();
     if (empty($this->extension)) {
         $default->log->info("DocumentId: {$docId} - Document does not have an extension");
         Indexer::unqueueDocument($docId, sprintf("Removing document from queue: documentId %d", $docId));
         return false;
     }
     // Open Office does not support the following files
     if (in_array($this->extension, array('xlt'))) {
         $default->log->info("DocumentId: {$docId} - Open Office does not support .xlt.");
         Indexer::unqueueDocument($docId, sprintf("Removing document from queue - Open Office does not support .xlt: documentId %d", $docId));
         return false;
     }
     if (false === parent::extractTextContent()) {
         if (strpos($this->output, 'OpenOffice process not found or not listening') !== false) {
             $indexer = Indexer::get();
             $indexer->restartBatch();
             return false;
         } elseif (strpos($this->output, 'Unexpected connection closure') !== false || strpos($this->output, '\'NoneType\' object has no attribute \'storeToURL\'') !== false || strpos($this->output, 'The document could not be opened for conversion. This could indicate an unsupported mimetype.') !== false || strpos($this->output, 'URL seems to be an unsupported one.') !== false || strpos($this->output, '__main__.com.sun.star.task.ErrorCodeIOException') !== false) {
             $default->log->info("DocumentId: {$docId} - Suspect the file cannot be indexed by Open Office.");
             file_put_contents($this->targetfile, '');
             $indexer = Indexer::get();
             $indexer->restartBatch();
             Indexer::unqueueDocument($docId, sprintf(_kt("Removing document from queue: documentId %d"), $docId));
             return true;
         }
         return false;
     }
     if ($this->targetExtension != 'html') {
         file_put_contents($this->targetfile, '');
         return true;
     }
     $content = file_get_contents($this->targetfile);
     $this->setTargetFile($this->targetfile . '.txt');
     $content = $this->filter($content);
     if (empty($content)) {
         return touch($this->targetfile);
     }
     return file_put_contents($this->targetfile, $content);
 }
Пример #2
0
 public function extractTextContent()
 {
     if ($this->cmd !== false) {
         // so we have catppt or something
         $result = parent::extractTextContent();
         if ($result !== false) {
             // if it returns true, we can bail
             return true;
         }
         // if failure, fallthrough, and attempt OO
     }
     /*
     if ($this->useOO)
     {
         $this->oo->setSourceFile($this->sourcefile);
         $this->oo->setMimeType($this->mimetype);
         $this->oo->setExtension($this->extension);
         $this->oo->setTargetFile($this->targetfile);
         $this->oo->setDocument($this->document);
         $this->oo->setIndexingStatus(null);
         $this->oo->setExtractionStatus(null);
     
         $result = $this->oo->extractTextContent();
     
         $this->setIndexingStatus($this->oo->getIndexingStatus());
         $this->setExtractionStatus($this->oo->getExtractionStatus());
         $this->setTargetFile($this->oo->getTargetFile());
     
         return $result;
     }
     else
     {
     */
     global $default;
     $docId = $this->document->getId();
     $cmd = $this->cmd;
     $default->log->info("The document {$docId} cannot be indexed as {$cmd} is not available and OpenOffice is not in use.");
     file_put_contents($this->targetfile, '');
     return true;
     //}
 }