/** * Index files attached to the docuemnt, ensuring the index matches the current document files. * * For documents that aren't known to be new, we check the index for existing files. * - New files we will add. * - Existing and unchanged files we will skip. * - File that are in the index but not on the document will be deleted from the index. * - Files that have changed will be re-indexed. * * @param document $document */ protected function process_document_files($document) { if (!$this->file_indexing_enabled()) { return; } // Maximum rows to process at a time. $rows = 500; // Get the attached files. $files = $document->get_files(); // If this isn't a new document, we need to check the exiting indexed files. if (!$document->get_is_new()) { // We do this progressively, so we can handle lots of files cleanly. list($numfound, $indexedfiles) = $this->get_indexed_files($document, 0, $rows); $count = 0; $idstodelete = array(); do { // Go through each indexed file. We want to not index any stored and unchanged ones, delete any missing ones. foreach ($indexedfiles as $indexedfile) { $fileid = $indexedfile->solr_fileid; if (isset($files[$fileid])) { // Check for changes that would mean we need to re-index the file. If so, just leave in $files. // Filelib does not guarantee time modified is updated, so we will check important values. if ($indexedfile->modified != $files[$fileid]->get_timemodified()) { continue; } if (strcmp($indexedfile->title, $files[$fileid]->get_filename()) !== 0) { continue; } if ($indexedfile->solr_filecontenthash != $files[$fileid]->get_contenthash()) { continue; } if ($indexedfile->solr_fileindexstatus == document::INDEXED_FILE_FALSE && $this->file_is_indexable($files[$fileid])) { // This means that the last time we indexed this file, filtering blocked it. // Current settings say it is indexable, so we will allow it to be indexed. continue; } // If the file is already indexed, we can just remove it from the files array and skip it. unset($files[$fileid]); } else { // This means we have found a file that is no longer attached, so we need to delete from the index. // We do it later, since this is progressive, and it could reorder results. $idstodelete[] = $indexedfile->id; } } $count += $rows; if ($count < $numfound) { // If we haven't hit the total count yet, fetch the next batch. list($numfound, $indexedfiles) = $this->get_indexed_files($document, $count, $rows); } } while ($count < $numfound); // Delete files that are no longer attached. foreach ($idstodelete as $id) { // We directly delete the item using the client, as the engine delete_by_id won't work on file docs. $this->get_search_client()->deleteById($id); } } // Now we can actually index all the remaining files. foreach ($files as $file) { $this->add_stored_file($document, $file); } }