/** * Schedule the indexing of a document. * * @param string $document * @param string $what */ public static function index($document, $what = 'A') { global $default; if (is_numeric($document)) { $document = Document::get($document + 0); } if (PEAR::isError($document)) { $default->log->error("index: Could not index document: " . $document->getMessage()); return; } $document_id = $document->getId(); $userid = $_SESSION['userID']; if (empty($userid)) { $userid = 1; } // we dequeue the document so that there are no issues when enqueuing Indexer::unqueueDocument($document_id); // enqueue item $sql = "INSERT INTO index_files(document_id, user_id, what) VALUES({$document_id}, {$userid}, '{$what}')"; DBUtil::runQuery($sql); $default->log->debug("index: Queuing indexing of {$document_id}"); // Appending the process queue to the index for convenience // Don't want to complicate matters by creating too many new classes and files Indexer::unqueueDocFromProcessing($document_id); // enqueue item $date = date('Y-m-d H:i:s'); $sql = "INSERT INTO process_queue(document_id, date_added) VALUES({$document_id}, '{$date}')"; DBUtil::runQuery($sql); $default->log->debug("Processing queue: Queuing document for processing - {$document_id}"); }
/** * Fetch the process queue for running the processors on * */ public function processQueue() { global $default; $default->log->debug('documentProcessor: starting processing'); if ($this->processors === false) { $default->log->info('documentProcessor: stopping - no processors enabled'); return; } // Get processing queue // Use the same batch size as the indexer (for now) // If the batch size is huge then reset it to a smaller number // Open office leaks memory, so we don't want to do too many documents at once $batch = $this->limit > 500 ? 500 : $this->limit; $queue = $this->indexer->getDocumentProcessingQueue($batch); if (empty($queue)) { $default->log->debug('documentProcessor: stopping - no documents in processing queue'); return; } // Process queue foreach ($queue as $item) { // Get the document object $docId = $item['document_id']; $document = Document::get($docId); if (PEAR::isError($document)) { Indexer::unqueueDocFromProcessing($docId, "Cannot resolve document id: {$document->getMessage()}", 'error'); continue; } // loop through processors if ($this->processors !== false) { foreach ($this->processors as $processor) { $default->log->debug('documentProcessor: running processor: ' . $processor->getNamespace()); // Check document mime type against supported types if (!$this->isSupportedMimeType($item['mimetypes'], $processor->getSupportedMimeTypes())) { $default->log->debug('documentProcessor: not a supported mimetype: ' . $item['mimetypes']); continue; } // Process document $processor->setDocument($document); $processor->processDocument(); } Indexer::unqueueDocFromProcessing($docId, "Document processed", 'debug'); } } $default->log->debug('documentProcessor: stopping processing, batch completed'); }