Example #1
0
 /**
  * index a file
  *
  * @author Jörn Dreyer <*****@*****.**>
  *
  * @param string $path the path of the file
  *
  * @return bool
  */
 public static function indexFile($path = '', $user = null)
 {
     if (!Filesystem::isValidPath($path)) {
         return;
     }
     if ($path === '') {
         //ignore the empty path element
         return false;
     }
     if (is_null($user)) {
         $view = Filesystem::getView();
         $user = \OCP\User::getUser();
     } else {
         $view = new \OC\Files\View('/' . $user . '/files');
     }
     if (!$view) {
         Util::writeLog('search_lucene', 'could not resolve filesystem view', Util::WARN);
         return false;
     }
     $root = $view->getRoot();
     $pk = md5($root . $path);
     // the cache already knows mime and other basic stuff
     $data = $view->getFileInfo($path);
     if (isset($data['mimetype'])) {
         $mimetype = $data['mimetype'];
         if ('text/html' === $mimetype) {
             $doc = \Zend_Search_Lucene_Document_Html::loadHTML($view->file_get_contents($path));
         } else {
             if ('application/msword' === $mimetype) {
                 // FIXME uses ZipArchive ... make compatible with OC\Files\Filesystem
                 //$doc = Zend_Search_Lucene_Document_Docx::loadDocxFile(OC\Files\Filesystem::file_get_contents($path));
                 //no special treatment yet
                 $doc = new \Zend_Search_Lucene_Document();
             } else {
                 $doc = new \Zend_Search_Lucene_Document();
             }
         }
         // store fscacheid as unique id to lookup by when deleting
         $doc->addField(\Zend_Search_Lucene_Field::Keyword('pk', $pk));
         // Store document URL to identify it in the search results
         $doc->addField(\Zend_Search_Lucene_Field::Text('path', $path));
         $doc->addField(\Zend_Search_Lucene_Field::unIndexed('size', $data['size']));
         $doc->addField(\Zend_Search_Lucene_Field::unIndexed('mimetype', $mimetype));
         self::extractMetadata($doc, $path, $view, $mimetype);
         Lucene::updateFile($doc, $path, $user);
         return true;
     } else {
         Util::writeLog('search_lucene', 'need mimetype for content extraction', Util::ERROR);
         return false;
     }
 }
Example #2
0
 /**
  * index a file
  *
  * @author Jörn Dreyer <*****@*****.**>
  *
  * @param string $path the path of the file
  *
  * @return bool
  */
 public static function indexFile($path = '', $user = null)
 {
     if (!Filesystem::isValidPath($path)) {
         return;
     }
     if ($path === '') {
         //ignore the empty path element
         return false;
     }
     if (is_null($user)) {
         $view = Filesystem::getView();
         $user = \OCP\User::getUser();
     } else {
         $view = new \OC\Files\View('/' . $user . '/files');
     }
     if (!$view) {
         Util::writeLog('search_lucene', 'could not resolve filesystem view', Util::WARN);
         return false;
     }
     if (!$view->file_exists($path)) {
         Util::writeLog('search_lucene', 'file vanished, ignoring', Util::DEBUG);
         return true;
     }
     $root = $view->getRoot();
     $pk = md5($root . $path);
     // the cache already knows mime and other basic stuff
     $data = $view->getFileInfo($path);
     if (isset($data['mimetype'])) {
         $mimeType = $data['mimetype'];
         // initialize plain lucene document
         $doc = new \Zend_Search_Lucene_Document();
         // index content for local files only
         $localFile = $view->getLocalFile($path);
         if ($localFile) {
             //try to use special lucene document types
             if ('text/plain' === $mimeType) {
                 $body = $view->file_get_contents($path);
                 if ($body != '') {
                     $doc->addField(\Zend_Search_Lucene_Field::UnStored('body', $body));
                 }
             } else {
                 if ('text/html' === $mimeType) {
                     //TODO could be indexed, even if not local
                     $doc = \Zend_Search_Lucene_Document_Html::loadHTML($view->file_get_contents($path));
                 } else {
                     if ('application/pdf' === $mimeType) {
                         $doc = Pdf::loadPdf($view->file_get_contents($path));
                         // commented the mimetype checks, as the zend classes only understand docx and not doc files.
                         // FIXME distinguish doc and docx, xls and xlsx, ppt and pptx, in oc core mimetype helper ...
                         //} else if ('application/msword' === $mimeType) {
                     } else {
                         if (strtolower(substr($data['name'], -5)) === '.docx') {
                             $doc = \Zend_Search_Lucene_Document_Docx::loadDocxFile($localFile);
                             //} else if ('application/msexcel' === $mimeType) {
                         } else {
                             if (strtolower(substr($data['name'], -5)) === '.xlsx') {
                                 $doc = \Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($localFile);
                                 //} else if ('application/mspowerpoint' === $mimeType) {
                             } else {
                                 if (strtolower(substr($data['name'], -5)) === '.pptx') {
                                     $doc = \Zend_Search_Lucene_Document_Pptx::loadPptxFile($localFile);
                                 } else {
                                     if (strtolower(substr($data['name'], -4)) === '.odt') {
                                         $doc = Odt::loadOdtFile($localFile);
                                     } else {
                                         if (strtolower(substr($data['name'], -4)) === '.ods') {
                                             $doc = Ods::loadOdsFile($localFile);
                                         }
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
         // Store filecache id as unique id to lookup by when deleting
         $doc->addField(\Zend_Search_Lucene_Field::Keyword('pk', $pk));
         // Store filename
         $doc->addField(\Zend_Search_Lucene_Field::Text('filename', $data['name'], 'UTF-8'));
         // Store document path to identify it in the search results
         $doc->addField(\Zend_Search_Lucene_Field::Text('path', $path, 'UTF-8'));
         $doc->addField(\Zend_Search_Lucene_Field::unIndexed('size', $data['size']));
         $doc->addField(\Zend_Search_Lucene_Field::unIndexed('mimetype', $mimeType));
         //self::extractMetadata($doc, $path, $view, $mimeType);
         Lucene::updateFile($doc, $path, $user);
         return true;
     } else {
         Util::writeLog('search_lucene', 'need mimetype for content extraction', Util::ERROR);
         return false;
     }
 }
Example #3
0
 public static function doDeleteFile($param)
 {
     $data = json_decode($param);
     if (!isset($data->path)) {
         Util::writeLog('search_lucene', 'missing path parameter', Util::WARN);
         return false;
     }
     if (!isset($data->user)) {
         Util::writeLog('search_lucene', 'missing user parameter', Util::WARN);
         return false;
     }
     Lucene::deleteFile($data->path, $data->user);
 }