/** * @param nc_search_document $document * @return nc_search_context */ protected function create_document_context(nc_search_document $document) { return new nc_search_context(array('search_provider' => get_class($this), 'action' => 'indexing', 'group_alternative_forms' => true, 'language' => $document->get('language'))); }
/** * * @param nc_search_document $doc * @return nc_search_document */ protected function apply_hierarchy_options(nc_search_document $doc) { $nc_core = nc_Core::get_object(); // попробовать в два захода: сначала сайт try { $site = $nc_core->catalogue->get_by_host_name(parse_url($doc->get('url'), PHP_URL_HOST)); $site_id = $site["Catalogue_ID"]; $doc->set('language', $site["Language"]); } catch (Exception $e) { nc_search::log(nc_search::LOG_INDEXING_NO_SUB, "Cannot determine site of the document '{$doc->get('url')}': {$e->getMessage()}"); $site_id = 1; // наугад } $doc->set('site_id', $site_id); // теперь раздел try { $resolved_path = nc_resolve_url($doc->get('url'), 'GET'); if ($resolved_path && isset($resolved_path['folder_id'])) { $sub_id = $resolved_path['folder_id']; $sub = $nc_core->subdivision->get_by_id($sub_id); } else { throw new nc_search_exception(); } $ancestors = array(); $tree = $nc_core->subdivision->get_parent_tree($sub_id); // включает собственно раздел! foreach ($tree as $s) { if (isset($s["Subdivision_ID"])) { $ancestors[] = "sub{$s['Subdivision_ID']}"; } } $doc->set_values(array('sub_id' => $sub_id, 'language' => $nc_core->subdivision->get_lang($sub_id), 'ancestor_ids' => join(',', $ancestors))); $p = $nc_core->page; if ($sub[$p->get_field_name('sitemap_include')]) { $doc->set_values(array('sitemap_include' => true, 'sitemap_changefreq' => $sub[$p->get_field_name('sitemap_changefreq')], 'sitemap_priority' => $sub[$p->get_field_name('sitemap_priority')])); } } catch (Exception $e) { nc_search::log(nc_search::LOG_INDEXING_NO_SUB, "Cannot set subdivision data for the document '{$doc->get('url')}': {$e->getMessage()}"); } return $doc; }
/** * Обработать документ * @param nc_search_document $document * @return bool */ public function process_document(nc_search_document $document) { $doc_hash = $document->generate_hash(); // Есть ли документ с таким путём в нашей базе? $stored = nc_search_document::get_hash_by_path($document->get('site_id'), $document->get('path')); if ($stored) { // Нужно ли обновлять данные в индексе? Проверить хэш для content if ($stored->get('hash') == $doc_hash) { // «cтолько времени ззря!» © Фандорин $document->set_id($stored->get_id())->save(); return false; } $document->set_id($stored->get_id()); $this->update_document($document); } else { $this->add_document($document); } }