public function __construct()
 {
     $config = KTConfig::getSingleton();
     $this->unzip = KTUtil::findCommand("import/unzip", 'unzip');
     $this->unzip = str_replace('\\', '/', $this->unzip);
     $this->unzip_params = $config->get('extractorParameters/unzip', '"{source}" "{part}" -d "{target_dir}"');
     parent::__construct();
 }
 public function extractTextContent()
 {
     global $default;
     $docId = $this->document->getId();
     if (empty($this->extension)) {
         $default->log->info("DocumentId: {$docId} - Document does not have an extension");
         Indexer::unqueueDocument($docId, sprintf("Removing document from queue: documentId %d", $docId));
         return false;
     }
     // Open Office does not support the following files
     if (in_array($this->extension, array('xlt'))) {
         $default->log->info("DocumentId: {$docId} - Open Office does not support .xlt.");
         Indexer::unqueueDocument($docId, sprintf("Removing document from queue - Open Office does not support .xlt: documentId %d", $docId));
         return false;
     }
     if (false === parent::extractTextContent()) {
         if (strpos($this->output, 'OpenOffice process not found or not listening') !== false) {
             $indexer = Indexer::get();
             $indexer->restartBatch();
             return false;
         } elseif (strpos($this->output, 'Unexpected connection closure') !== false || strpos($this->output, '\'NoneType\' object has no attribute \'storeToURL\'') !== false || strpos($this->output, 'The document could not be opened for conversion. This could indicate an unsupported mimetype.') !== false || strpos($this->output, 'URL seems to be an unsupported one.') !== false || strpos($this->output, '__main__.com.sun.star.task.ErrorCodeIOException') !== false) {
             $default->log->info("DocumentId: {$docId} - Suspect the file cannot be indexed by Open Office.");
             file_put_contents($this->targetfile, '');
             $indexer = Indexer::get();
             $indexer->restartBatch();
             Indexer::unqueueDocument($docId, sprintf(_kt("Removing document from queue: documentId %d"), $docId));
             return true;
         }
         return false;
     }
     if ($this->targetExtension != 'html') {
         file_put_contents($this->targetfile, '');
         return true;
     }
     $content = file_get_contents($this->targetfile);
     $this->setTargetFile($this->targetfile . '.txt');
     $content = $this->filter($content);
     if (empty($content)) {
         return touch($this->targetfile);
     }
     return file_put_contents($this->targetfile, $content);
 }
Exemplo n.º 3
0
 /**
  * Initialise the extractor.
  *
  * @param string $section The section in the config file.
  * @param string $appname The application name in the config file.
  * @param string $command The command that can be run.
  * @param string $displayname
  * @param string $params
  */
 public function __construct($section, $appname, $command, $displayname, $params)
 {
     parent::__construct();
     $this->application = KTUtil::findCommand("{$section}/{$appname}", $command);
     $this->command = $command;
     $this->displayname = $displayname;
     $this->params = $params;
 }