/** * index a file * * @author Jörn Dreyer <*****@*****.**> * * @param string $path the path of the file * * @return bool */ public static function indexFile($path = '', $user = null) { if (!Filesystem::isValidPath($path)) { return; } if ($path === '') { //ignore the empty path element return false; } if (is_null($user)) { $view = Filesystem::getView(); $user = \OCP\User::getUser(); } else { $view = new \OC\Files\View('/' . $user . '/files'); } if (!$view) { Util::writeLog('search_lucene', 'could not resolve filesystem view', Util::WARN); return false; } $root = $view->getRoot(); $pk = md5($root . $path); // the cache already knows mime and other basic stuff $data = $view->getFileInfo($path); if (isset($data['mimetype'])) { $mimetype = $data['mimetype']; if ('text/html' === $mimetype) { $doc = \Zend_Search_Lucene_Document_Html::loadHTML($view->file_get_contents($path)); } else { if ('application/msword' === $mimetype) { // FIXME uses ZipArchive ... make compatible with OC\Files\Filesystem //$doc = Zend_Search_Lucene_Document_Docx::loadDocxFile(OC\Files\Filesystem::file_get_contents($path)); //no special treatment yet $doc = new \Zend_Search_Lucene_Document(); } else { $doc = new \Zend_Search_Lucene_Document(); } } // store fscacheid as unique id to lookup by when deleting $doc->addField(\Zend_Search_Lucene_Field::Keyword('pk', $pk)); // Store document URL to identify it in the search results $doc->addField(\Zend_Search_Lucene_Field::Text('path', $path)); $doc->addField(\Zend_Search_Lucene_Field::unIndexed('size', $data['size'])); $doc->addField(\Zend_Search_Lucene_Field::unIndexed('mimetype', $mimetype)); self::extractMetadata($doc, $path, $view, $mimetype); Lucene::updateFile($doc, $path, $user); return true; } else { Util::writeLog('search_lucene', 'need mimetype for content extraction', Util::ERROR); return false; } }
/** * index a file * * @author Jörn Dreyer <*****@*****.**> * * @param string $path the path of the file * * @return bool */ public static function indexFile($path = '', $user = null) { if (!Filesystem::isValidPath($path)) { return; } if ($path === '') { //ignore the empty path element return false; } if (is_null($user)) { $view = Filesystem::getView(); $user = \OCP\User::getUser(); } else { $view = new \OC\Files\View('/' . $user . '/files'); } if (!$view) { Util::writeLog('search_lucene', 'could not resolve filesystem view', Util::WARN); return false; } if (!$view->file_exists($path)) { Util::writeLog('search_lucene', 'file vanished, ignoring', Util::DEBUG); return true; } $root = $view->getRoot(); $pk = md5($root . $path); // the cache already knows mime and other basic stuff $data = $view->getFileInfo($path); if (isset($data['mimetype'])) { $mimeType = $data['mimetype']; // initialize plain lucene document $doc = new \Zend_Search_Lucene_Document(); // index content for local files only $localFile = $view->getLocalFile($path); if ($localFile) { //try to use special lucene document types if ('text/plain' === $mimeType) { $body = $view->file_get_contents($path); if ($body != '') { $doc->addField(\Zend_Search_Lucene_Field::UnStored('body', $body)); } } else { if ('text/html' === $mimeType) { //TODO could be indexed, even if not local $doc = \Zend_Search_Lucene_Document_Html::loadHTML($view->file_get_contents($path)); } else { if ('application/pdf' === $mimeType) { $doc = Pdf::loadPdf($view->file_get_contents($path)); // commented the mimetype checks, as the zend classes only understand docx and not doc files. // FIXME distinguish doc and docx, xls and xlsx, ppt and pptx, in oc core mimetype helper ... //} else if ('application/msword' === $mimeType) { } else { if (strtolower(substr($data['name'], -5)) === '.docx') { $doc = \Zend_Search_Lucene_Document_Docx::loadDocxFile($localFile); //} else if ('application/msexcel' === $mimeType) { } else { if (strtolower(substr($data['name'], -5)) === '.xlsx') { $doc = \Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($localFile); //} else if ('application/mspowerpoint' === $mimeType) { } else { if (strtolower(substr($data['name'], -5)) === '.pptx') { $doc = \Zend_Search_Lucene_Document_Pptx::loadPptxFile($localFile); } else { if (strtolower(substr($data['name'], -4)) === '.odt') { $doc = Odt::loadOdtFile($localFile); } else { if (strtolower(substr($data['name'], -4)) === '.ods') { $doc = Ods::loadOdsFile($localFile); } } } } } } } } } // Store filecache id as unique id to lookup by when deleting $doc->addField(\Zend_Search_Lucene_Field::Keyword('pk', $pk)); // Store filename $doc->addField(\Zend_Search_Lucene_Field::Text('filename', $data['name'], 'UTF-8')); // Store document path to identify it in the search results $doc->addField(\Zend_Search_Lucene_Field::Text('path', $path, 'UTF-8')); $doc->addField(\Zend_Search_Lucene_Field::unIndexed('size', $data['size'])); $doc->addField(\Zend_Search_Lucene_Field::unIndexed('mimetype', $mimeType)); //self::extractMetadata($doc, $path, $view, $mimeType); Lucene::updateFile($doc, $path, $user); return true; } else { Util::writeLog('search_lucene', 'need mimetype for content extraction', Util::ERROR); return false; } }
public static function doDeleteFile($param) { $data = json_decode($param); if (!isset($data->path)) { Util::writeLog('search_lucene', 'missing path parameter', Util::WARN); return false; } if (!isset($data->user)) { Util::writeLog('search_lucene', 'missing user parameter', Util::WARN); return false; } Lucene::deleteFile($data->path, $data->user); }