Exemple #1
0
 /**
  * Index files attached to the docuemnt, ensuring the index matches the current document files.
  *
  * For documents that aren't known to be new, we check the index for existing files.
  * - New files we will add.
  * - Existing and unchanged files we will skip.
  * - File that are in the index but not on the document will be deleted from the index.
  * - Files that have changed will be re-indexed.
  *
  * @param document $document
  */
 protected function process_document_files($document)
 {
     if (!$this->file_indexing_enabled()) {
         return;
     }
     // Maximum rows to process at a time.
     $rows = 500;
     // Get the attached files.
     $files = $document->get_files();
     // If this isn't a new document, we need to check the exiting indexed files.
     if (!$document->get_is_new()) {
         // We do this progressively, so we can handle lots of files cleanly.
         list($numfound, $indexedfiles) = $this->get_indexed_files($document, 0, $rows);
         $count = 0;
         $idstodelete = array();
         do {
             // Go through each indexed file. We want to not index any stored and unchanged ones, delete any missing ones.
             foreach ($indexedfiles as $indexedfile) {
                 $fileid = $indexedfile->solr_fileid;
                 if (isset($files[$fileid])) {
                     // Check for changes that would mean we need to re-index the file. If so, just leave in $files.
                     // Filelib does not guarantee time modified is updated, so we will check important values.
                     if ($indexedfile->modified != $files[$fileid]->get_timemodified()) {
                         continue;
                     }
                     if (strcmp($indexedfile->title, $files[$fileid]->get_filename()) !== 0) {
                         continue;
                     }
                     if ($indexedfile->solr_filecontenthash != $files[$fileid]->get_contenthash()) {
                         continue;
                     }
                     if ($indexedfile->solr_fileindexstatus == document::INDEXED_FILE_FALSE && $this->file_is_indexable($files[$fileid])) {
                         // This means that the last time we indexed this file, filtering blocked it.
                         // Current settings say it is indexable, so we will allow it to be indexed.
                         continue;
                     }
                     // If the file is already indexed, we can just remove it from the files array and skip it.
                     unset($files[$fileid]);
                 } else {
                     // This means we have found a file that is no longer attached, so we need to delete from the index.
                     // We do it later, since this is progressive, and it could reorder results.
                     $idstodelete[] = $indexedfile->id;
                 }
             }
             $count += $rows;
             if ($count < $numfound) {
                 // If we haven't hit the total count yet, fetch the next batch.
                 list($numfound, $indexedfiles) = $this->get_indexed_files($document, $count, $rows);
             }
         } while ($count < $numfound);
         // Delete files that are no longer attached.
         foreach ($idstodelete as $id) {
             // We directly delete the item using the client, as the engine delete_by_id won't work on file docs.
             $this->get_search_client()->deleteById($id);
         }
     }
     // Now we can actually index all the remaining files.
     foreach ($files as $file) {
         $this->add_stored_file($document, $file);
     }
 }