예제 #1
0
 /**
  * Execute the console command.
  *
  * @return mixed
  */
 public function fire()
 {
     $watches = array();
     $in = inotify_init();
     // add watches starting from root directory
     $root = Path::fromRelative('');
     $this->addWatches($in, $root, $watches);
     printf("\nReading for events\n");
     while (true) {
         $events = inotify_read($in);
         foreach ($events as $event) {
             $path = $watches[$event['wd']];
             $expanded = $this->expandMask($event['mask']);
             $eventName = trim(implode(', ', $expanded), ', ');
             // if the event has a name attached, then index that
             if ($event['name']) {
                 $newPathName = $path->getPathname() . '/' . $event['name'];
                 $newPath = new Path($newPathName);
                 Indexer::index($newPath, 1);
                 // this may be a new directory, so add a watch to it anyway
                 if ($newPath->exists() && $newPath->isDir()) {
                     try {
                         $wd = inotify_add_watch($in, $newPath->getPathname(), $this->computedMask);
                         $watches[$wd] = $newPath;
                     } catch (Exception $e) {
                         echo 'Caught exception: ', $e->getMessage(), "\n";
                     }
                 }
             } else {
                 // event must apply to this directory, so index it, 1 level deep
                 Indexer::index($path, 1);
             }
         }
     }
 }
예제 #2
0
 /**
  * Execute the console command.
  *
  * @return mixed
  */
 public function fire()
 {
     $basePath = Config::get('app.manga_path');
     $path = new Path($basePath);
     $count = 0;
     Indexer::index($path, null, $count);
     $this->info(sprintf('Added %s new paths', $count));
 }
예제 #3
0
 function do_main()
 {
     $doc = $this->oDocument;
     $docid = $doc->getId();
     if (Permission::userIsSystemAdministrator()) {
         $full_path = $doc->getFullPath();
         if (Indexer::isDocumentScheduled($docid)) {
             Indexer::unqueueDocument($docid);
             $this->addInfoMessage(sprintf(_kt("Document '%s' has been removed from the indexing queue."), $full_path));
         } else {
             Indexer::index($doc, 'A');
             $this->addInfoMessage(sprintf(_kt("Document '%s' has been added to the indexing queue."), $full_path));
         }
     }
     redirect("view.php?fDocumentId={$docid}");
     exit;
 }
예제 #4
0
 /**
  * Process a document - extract text and index it
  * Refactored from indexDocuments()
  *
  * @param unknown_type $docinfo
  */
 public function processDocument($document, $docinfo)
 {
     global $default;
     static $extractorCache = array();
     // increment indexed documents count
     Indexer::incrementCount();
     // if document is a zero byte file, let's just unqueue and return
     if ($document->getFileSize() == 0) {
         Indexer::unqueueDocument($docinfo['document_id'], sprintf(_kt("Zero Byte documents do not need to be indexed: %d"), $docinfo['document_id']));
         return;
     }
     $docId = $docinfo['document_id'];
     $extension = $docinfo['filetypes'];
     $mimeType = $docinfo['mimetypes'];
     $extractorClass = $docinfo['extractor'];
     $indexDocument = in_array($docinfo['what'], array('A', 'C'));
     $indexDiscussion = in_array($docinfo['what'], array('A', 'D'));
     $this->indexingHistory = '';
     $tempPath = $this->tempPath;
     $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Indexing docid: %d extension: '%s' mimetype: '%s' extractor: '%s'"), $docId, $extension, $mimeType, $extractorClass), 'debug');
     if (empty($extractorClass)) {
         /*
         if no extractor is found and we don't need to index discussions, then we can remove the item from the queue.
         */
         if ($indexDiscussion) {
             $indexDocument = false;
             $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Not indexing docid: %d content because extractor could not be resolve. Still indexing discussion."), $docId), 'info');
         } else {
             Indexer::unqueueDocument($docId, sprintf(_kt("No extractor for docid: %d"), $docId));
             return;
         }
     } else {
         /*
         If an extractor is available, we must ensure it is enabled.
         */
         if (!$this->isExtractorEnabled($extractorClass)) {
             $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("diagnose: Not indexing docid: %d because extractor '%s' is disabled."), $docId, $extractorClass), 'info');
             return;
         }
     }
     if ($this->debug) {
         $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Processing docid: %d.\n"), $docId), 'info');
     }
     if ($this->restartCurrentBatch) {
         Indexer::unqueueDocument($docId);
         Indexer::index($docId, 'A');
         return;
     }
     $filename = $document->getFileName();
     if (substr($filename, 0, 1) == '~' || substr($filename, -1) == '~') {
         Indexer::unqueueDocument($docId, sprintf(_kt("indexDocuments: Filename for document id %d starts with a tilde (~). This is assumed to be a temporary file. This is ignored."), $docId), 'error');
         return;
     }
     $removeFromQueue = true;
     if ($indexDocument) {
         if (array_key_exists($extractorClass, $extractorCache)) {
             $extractor = $extractorCache[$extractorClass];
         } else {
             $extractor = $extractorCache[$extractorClass] = $this->getExtractor($extractorClass);
         }
         if (!$extractor instanceof DocumentExtractor) {
             $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("indexDocuments: extractor '%s' is not a document extractor class."), $extractorClass), 'error');
             return;
         }
         $version = $document->getMajorVersionNumber() . '.' . $document->getMinorVersionNumber();
         $sourceFile = $this->storageManager->temporaryFile($document);
         if (empty($sourceFile) || !is_file($sourceFile)) {
             Indexer::unqueueDocument($docId, sprintf(_kt("indexDocuments: source file '%s' for document %d does not exist."), $sourceFile, $docId), 'error');
             continue;
         }
         if ($extractor->needsIntermediateSourceFile()) {
             //$extension =  pathinfo($document->getFileName(), PATHINFO_EXTENSION);
             $intermediate = $tempPath . '/' . $docId . '.' . $extension;
             $result = @copy($sourceFile, $intermediate);
             if ($result === false) {
                 $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Could not create intermediate file from document %d"), $docId), 'error');
                 // problem. lets try again later. probably permission related. log the issue.
                 continue;
             }
             $sourceFile = $intermediate;
         }
         $extractor->setSourceFile($sourceFile);
         $extractor->setMimeType($mimeType);
         $extractor->setExtension($extension);
         $extractor->setDocument($document);
         $extractor->setIndexingStatus(null);
         $extractor->setExtractionStatus(null);
         $targetFile = tempnam($tempPath, 'ktindexer');
         $extractor->setTargetFile($targetFile);
         $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Extra Info docid: %d Source File: '%s' Target File: '%s'"), $docId, $sourceFile, $targetFile), 'debug');
         $this->executeHook($extractor, 'pre_extract');
         $this->executeHook($extractor, 'pre_extract', $mimeType);
         $removeFromQueue = false;
         if ($extractor->extractTextContent()) {
             // the extractor may need to create another target file
             $targetFile = $extractor->getTargetFile();
             $extractor->setExtractionStatus(true);
             $this->executeHook($extractor, 'pre_index');
             $this->executeHook($extractor, 'pre_index', $mimeType);
             $title = $document->getName();
             if ($indexDiscussion) {
                 if (!$this->filterText($targetFile)) {
                     $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem filtering document %d"), $docId), 'error');
                 } else {
                     $indexStatus = $this->indexDocumentAndDiscussion($docId, $targetFile, $title, $version);
                     $removeFromQueue = $indexStatus;
                     if (!$indexStatus) {
                         $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem indexing document %d - indexDocumentAndDiscussion"), $docId), 'error');
                     }
                     $extractor->setIndexingStatus($indexStatus);
                 }
             } else {
                 if (!$this->filterText($targetFile)) {
                     $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem filtering document %d"), $docId), 'error');
                 } else {
                     $indexStatus = $this->indexDocument($docId, $targetFile, $title, $version);
                     $removeFromQueue = $indexStatus;
                     if (!$indexStatus) {
                         $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Problem indexing document %d - indexDocument"), $docId), 'error');
                         $this->logPendingDocumentInfoStatus($docId, '<output>' . $extractor->output . '</output>', 'error');
                     }
                     $extractor->setIndexingStatus($indexStatus);
                 }
             }
             $this->executeHook($extractor, 'post_index', $mimeType);
             $this->executeHook($extractor, 'post_index');
         } else {
             $extractor->setExtractionStatus(false);
             $this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Could not extract contents from document %d"), $docId), 'error');
             $this->logPendingDocumentInfoStatus($docId, '<output>' . $extractor->output . '</output>', 'error');
         }
         $this->executeHook($extractor, 'post_extract', $mimeType);
         $this->executeHook($extractor, 'post_extract');
         if ($extractor->needsIntermediateSourceFile()) {
             @unlink($sourceFile);
         }
         @unlink($targetFile);
     } else {
         $indexStatus = $this->indexDiscussion($docId);
         $removeFromQueue = $indexStatus;
     }
     if ($removeFromQueue) {
         Indexer::unqueueDocument($docId, sprintf(_kt("Done indexing docid: %d"), $docId));
     } else {
         if ($this->debug) {
             $default->log->debug(sprintf(_kt("Document docid: %d was not removed from the queue as it looks like there was a problem with the extraction process"), $docId));
         }
     }
 }
    public function index()
    {
        $dir = realpath(dirname(__FILE__)) . DIRECTORY_SEPARATOR . "data" . DIRECTORY_SEPARATOR;
        $jsonDir = $dir . "json";
        $indexDir = $dir . "index";
        // ler aquivos json
        $files = scandir($jsonDir);
        foreach ($files as $file) {
            if ($file == '.' || $file == '..') {
                continue;
            }
            // Se arquivo existe
            if (is_file($jsonDir . DIRECTORY_SEPARATOR . $file)) {
                $json = json_decode(file_get_contents($jsonDir . DIRECTORY_SEPARATOR . $file));
                $indexName = substr($file, 0, -5);
                // Cria index
                $index = Lucene\Lucene::create($indexDir . DIRECTORY_SEPARATOR . $indexName);
                // Cria documento e define campos para indexar
                foreach ($json as $entry) {
                    $doc = new Lucene\Document();
                    $doc->addField(Lucene\Document\Field::Text('url', $entry->title));
                    $doc->addField(Lucene\Document\Field::UnStored('contents', $entry->text));
                    $index->addDocument($doc);
                }
            }
        }
    }
}
$ix = new Indexer();
$ix->index();
$server->post('/reindex', function ($request, $response, $next) use($indexer) {
    echo "Starting reindex...\n";
    if ($request->file) {
        $file = realpath($request->file);
        $indexer->delete_file_index($file);
        $indexer->index_file($file);
    } else {
        $indexer->delete_index();
        $indexer->index();
    }
    echo "Indexing completed.\n";
    $next();
});
$server->post('/index-php', function ($request, $response, $next) use($indexer_php) {
    $indexer_php->delete_index();
    $indexer_php->index();
    echo "Indexing Complete\n";
    $next();
});
$server->post('/updateindex', function ($request, $response, $next) use($indexer) {
    echo "Updating reindex...\n";
    $indexer->index();
    echo "Indexing completed.\n";
    $next();
});
$server->get('/search', function ($request, $response, $next) use($db) {
    $params = $request->httpRequest->getQuery();
    $query = array();
    if (isset($params['namespace'])) {
        $query['namespace'] = $params['namespace'];
    }
예제 #7
0
파일: YASE.php 프로젝트: vichingo/yase
 public function index($domain)
 {
     $indexer = new Indexer($this->getAccountId($domain));
     $indexer->clear();
     $indexer->index();
 }
예제 #8
0
 function reindexDocument($oDocument)
 {
     Indexer::index($oDocument);
 }
<?php

require __DIR__ . '/vendor/autoload.php';
require __DIR__ . '/inc/DB.php';
require __DIR__ . '/inc/Indexer.php';
require __DIR__ . '/inc/IndexerNodeTraverserVisitor.php';
$indexer = new Indexer(realpath($argv[1]));
$indexer->index();
$i = 0;
foreach ($rows as $row) {
    $docId = $row['id'];
    if (!$indexer->isDocumentIndexed($docId)) {
        $notIndexed[] = $docId;
    }
    if ($i % 100 == 0) {
        print '.';
    }
    if ($i++ % 4000 == 0) {
        print "\n";
    }
}
print "\nReporting...\n";
if (empty($notIndexed)) {
    print "All documents are indexed\n";
} else {
    print "\n-----START-----\n\"Document Id\",\"Title\",\"Full Path\"\n";
    $notIndexed = implode(',', $notIndexed);
    $sql = "select d.id, dm.name as title, d.full_path  from documents d inner join document_metadata_version dm on d.metadata_version_id = dm.id where d.id in ({$notIndexed}) ";
    $rows = DBUtil::getResultArray($sql);
    foreach ($rows as $row) {
        print '"' . $row['id'] . '","' . $row['title'] . '","' . $row['full_path'] . '"' . "\n";
        if ($reindex) {
            Indexer::index($docId);
            $GLOBALS["_OBJECTCACHE"] = array();
        }
    }
    print "-----END-----\n\nDone\n";
}
exit;