/** * index a file * * @param File $file the file to be indexed * @param bool $commit * * @return bool true when something was stored in the index, false otherwise (eg, folders are not indexed) * @throws NotIndexedException when an unsupported file type is encountered */ public function indexFile(File $file, $commit = true) { // we decide how to index on mime type or file extension $mimeType = $file->getMimeType(); $fileExtension = strtolower(pathinfo($file->getName(), PATHINFO_EXTENSION)); // initialize plain lucene document $doc = new Document(); // index content for local files only $storage = $file->getStorage(); if ($storage->isLocal()) { $path = $storage->getLocalFile($file->getInternalPath()); //try to use special lucene document types if ('text/html' === $mimeType) { //TODO could be indexed, even if not local $doc = HTML::loadHTML($file->getContent()); } else { if ('text/' === substr($mimeType, 0, 5) || 'application/x-tex' === $mimeType) { $body = $file->getContent(); if ($body != '') { $doc->addField(Document\Field::UnStored('body', $body)); } } else { if ('application/pdf' === $mimeType) { $doc = Pdf::loadPdf($file->getContent()); // the zend classes only understand docx and not doc files } else { if ($fileExtension === 'docx') { $doc = Document\Docx::loadDocxFile($path); //} else if ('application/msexcel' === $mimeType) { } else { if ($fileExtension === 'xlsx') { $doc = Document\Xlsx::loadXlsxFile($path); //} else if ('application/mspowerpoint' === $mimeType) { } else { if ($fileExtension === 'pptx') { $doc = Document\Pptx::loadPptxFile($path); } else { if ($fileExtension === 'odt') { $doc = Odt::loadOdtFile($path); } else { if ($fileExtension === 'ods') { $doc = Ods::loadOdsFile($path); } else { throw new NotIndexedException(); } } } } } } } } } // Store filecache id as unique id to lookup by when deleting $doc->addField(Document\Field::Keyword('fileId', $file->getId())); // Store document path for the search results $doc->addField(Document\Field::Text('path', $file->getPath(), 'UTF-8')); $doc->addField(Document\Field::unIndexed('mtime', $file->getMTime())); $doc->addField(Document\Field::unIndexed('size', $file->getSize())); $doc->addField(Document\Field::unIndexed('mimetype', $mimeType)); $this->index->updateFile($doc, $file->getId(), $commit); return true; }