function elgg_solr_index_annotation($annotation) { $client = elgg_solr_get_client(); $commit = elgg_get_config('elgg_solr_nocommit') ? false : true; $query = $client->createUpdate(); // add document $doc = $query->createDocument(); $doc->id = 'annotation:' . $annotation->id; $doc->type = 'annotation'; $doc->subtype = $annotation->name; $doc->owner_guid = $annotation->owner_guid; $doc->container_guid = $annotation->entity_guid; $doc->access_id = $annotation->access_id; $doc->description = elgg_strip_tags($annotation->value); $doc->time_created = $annotation->time_created; $doc->enabled = $annotation->enabled; $doc = elgg_trigger_plugin_hook('elgg_solr:index', 'annotation', array('annotation' => $annotation), $doc); if (!$doc) { return true; // a plugin has stopped the index } $query->addDocument($doc); if ($commit) { $query->addCommit($commit); } // this executes the query and returns the result try { $client->update($query); } catch (Exception $exc) { error_log($exc->getMessage()); } }
if ($type) { // fix comments params if ($type == 'comments') { $type = 'annotation'; $subtype = 'generic_comment'; } $q = "type:{$type}"; if ($subtype) { $q .= " AND subtype:{$subtype}"; } } else { $q = '*:*'; } if ($starttime && $endtime) { $q .= " AND time_created:[{$starttime} TO {$endtime}]"; } // create a client instance $client = elgg_solr_get_client(); // get an update query instance $update = $client->createUpdate(); // add the delete query and a commit command to the update query $update->addDeleteQuery($q); $update->addCommit(); // this executes the query and returns the result try { $client->update($update); system_message(elgg_echo('elgg_solr:success:delete_index')); } catch (Exception $exc) { register_error($exc->getTraceAsString()); } forward(REFERER);
function elgg_solr_annotation_reindex() { set_time_limit(0); $ia = elgg_set_ignore_access(true); $show_hidden = access_get_show_hidden_status(); access_show_hidden_entities(true); // lock the function elgg_set_plugin_setting('reindex_running', 1, 'elgg_solr'); if (!file_exists(elgg_get_config('dataroot') . 'elgg_solr')) { mkdir(elgg_get_config('dataroot') . 'elgg_solr'); } $logtime = elgg_get_config('elgg_solr_restart_logtime'); if (!$logtime) { $logtime = time(); } $log = elgg_get_config('dataroot') . 'elgg_solr/' . $logtime . '.txt'; elgg_set_plugin_setting('current_log', $logtime, 'elgg_solr'); // initialize the csv $report = array('percent' => '', 'count' => 0, 'typecount' => 0, 'fullcount' => 0, 'type' => '', 'querytime' => 0, 'message' => 'Initializing Reindex', 'date' => date('Y-M-j H:i:s'), 'logtime' => $logtime); file_put_contents($log, json_encode($report) . "\n", FILE_APPEND); $debug = get_input('debug', false); if ($debug) { elgg_set_config('elgg_solr_debug', 1); } $registered_types = elgg_get_config('elgg_solr_reindex_annotation_options'); if (!$registered_types) { $registered_types = _elgg_services()->hooks->trigger('elgg_solr:can_index', 'annotation', [], []); } if (!$registered_types) { return; // nothing to reindex } // build our options and cache them in case we need to restart it $cacheoptions = array('types' => $registered_types); $options = array(); $time = elgg_get_config('elgg_solr_time_options'); if ($time && is_array($time)) { $options['wheres'] = array("n_table.time_created >= {$time['starttime']}", "n_table.time_created <= {$time['endtime']}"); $cacheoptions['starttime'] = $time['starttime']; $cacheoptions['endtime'] = $time['endtime']; } elgg_set_config('elgg_solr_nocommit', true); // tell our indexer not to commit right away $fullcount = 0; foreach ($registered_types as $types => $subtypes) { $options['limit'] = false; $restart_time = elgg_get_config('elgg_solr_restart_time'); if ($restart_time) { elgg_set_config('elgg_solr_restart_time', false); $options['wheres'][1] = "n_table.time_created <= {$restart_time}"; } elseif ($time['endtime']) { $options['wheres'][1] = "n_table.time_created <= {$time['endtime']}"; } if ($subtypes) { if (!is_array($subtypes)) { $options['annotation_names'] = array($subtypes); } else { $options['annotation_names'] = $subtypes; } } // this iteration fixes a bug https://github.com/Elgg/Elgg/issues/7561 // uses a custom getter which only fetches the guids in a single large-batch query // which is much more efficient than standard egef $batch_size = elgg_get_plugin_setting('reindex_batch_size', 'elgg_solr'); $batch_size = $batch_size ?: 1000; $annotations = new ElggBatch('elgg_solr_get_annotation_ids', $options, null, $batch_size); $final_count = elgg_solr_get_annotation_ids(array_merge($options, array('count' => true))); elgg_set_config('elgg_solr_nocommit', true); // disable committing on each entity for performance $count = 0; $fetch_time_start = microtime(true); foreach ($annotations as $a) { elgg_solr_debug_log($a->id); $count++; $fullcount++; $first_entity = (bool) ($count % $batch_size == 1); $last_entity = (bool) ($count % $batch_size == 0); if ($first_entity) { // this is the first entity in the new batch $fetch_time = microtime(true) - $fetch_time_start; // the query time in seconds } $annotation = elgg_get_annotation_from_id($a->id); if ($annotation) { elgg_solr_index_annotation($annotation); elgg_set_config('elgg_solr_nocommit', true); } if (!($count % 200)) { $qtime = round($fetch_time, 4); $percent = round($count / $final_count * 100); if ($annotation) { $restart_time = $annotation->time_created; } $report = array('percent' => $percent, 'count' => $count, 'typecount' => $final_count, 'fullcount' => $fullcount, 'type' => $type, 'querytime' => $qtime, 'message' => '', 'date' => date('Y-M-j H:i:s'), 'cacheoptions' => $cacheoptions, 'logtime' => $logtime, 'restart_time' => $restart_time); file_put_contents($log, json_encode($report) . "\n", FILE_APPEND); elgg_set_config('elgg_solr_nocommit', false); // push a commit on this one // check for the termination signal if ($logtime == elgg_get_plugin_setting('stop_reindex', 'elgg_solr')) { $report = array('percent' => $percent, 'count' => $count, 'typecount' => $final_count, 'fullcount' => $fullcount, 'type' => $type, 'querytime' => $qtime, 'message' => 'Reindex has been stopped', 'date' => date('Y-M-j H:i:s'), 'cacheoptions' => $cacheoptions, 'logtime' => $logtime, 'restart_time' => $restart_time); file_put_contents($log, json_encode($report) . "\n", FILE_APPEND); elgg_solr_debug_log('Stopping reindex due to termination signal'); exit; } } if ($last_entity) { $fetch_time_start = microtime(true); } } // we've finished this type, unset from the cache options unset($cacheoptions['types'][$type]); } $report = array('percent' => '', 'count' => 0, 'typecount' => 0, 'fullcount' => 0, 'type' => '', 'querytime' => 0, 'message' => 'Reindex complete', 'date' => date('Y-M-j H:i:s'), 'logtime' => $logtime); file_put_contents($log, json_encode($report) . "\n", FILE_APPEND); elgg_set_plugin_setting('reindex_running', 0, 'elgg_solr'); // commit the last of the entities $client = elgg_solr_get_client(); $query = $client->createUpdate(); $query->addCommit(); try { $client->update($query); } catch (Exception $e) { elgg_solr_debug_log($e->getMessage()); return false; } access_show_hidden_entities($show_hidden); elgg_set_ignore_access($ia); }
/** * NOTE - this is only used in Elgg 1.8 as comments are annotations * * @param type $hook * @param type $type * @param type $return * @param type $params * @return null */ function elgg_solr_comment_search($hook, $type, $return, $params) { $entities = array(); $select = array('start' => $params['offset'], 'rows' => $params['limit'] ? $params['limit'] : 10, 'fields' => array('id', 'container_guid', 'description', 'owner_guid', 'time_created', 'score')); if ($params['select'] && is_array($params['select'])) { $select = array_merge($select, $params['select']); } // create a client instance $client = elgg_solr_get_client(); // get an update query instance $query = $client->createSelect($select); $default_sort = array('score' => 'desc', 'time_created' => 'desc'); $sorts = $params['sorts'] ? $params['sorts'] : $default_sort; $query->addSorts($sorts); $description_boost = elgg_solr_get_description_boost(); // get the dismax component and set a boost query $dismax = $query->getEDisMax(); $qf = "description^{$description_boost}"; if ($params['qf']) { $qf = $params['qf']; } $dismax->setQueryFields($qf); $boostQuery = elgg_solr_get_boost_query(); if ($boostQuery) { $dismax->setBoostQuery($boostQuery); } // this query is now a dismax query $query->setQuery($params['query']); // make sure we're only getting comments $params['fq']['type'] = 'type:annotation'; $params['fq']['subtype'] = 'subtype:generic_comment'; $default_fq = elgg_solr_get_default_fq($params); if ($params['fq']) { $filter_queries = array_merge($default_fq, $params['fq']); } else { $filter_queries = $default_fq; } if (!empty($filter_queries)) { foreach ($filter_queries as $key => $value) { $query->createFilterQuery($key)->setQuery($value); } } // get highlighting component and apply settings $hl = $query->getHighlighting(); $hl->setFields(array('description')); $hl->setSimplePrefix('<span data-hl="elgg-solr">'); $hl->setSimplePostfix('</span>'); $fragsize = elgg_solr_get_fragsize(); if (isset($params['fragsize'])) { $fragsize = (int) $params['fragsize']; } $hl->setFragSize($fragsize); // this executes the query and returns the result try { $resultset = $client->select($query); } catch (Exception $e) { error_log($e->getMessage()); return null; } // Get the highlighted snippet try { $highlighting = $resultset->getHighlighting(); } catch (Exception $e) { error_log($e->getMessage()); return null; } // Count the total number of documents found by solr $count = $resultset->getNumFound(); $hl_prefix = elgg_solr_get_hl_prefix(); $hl_suffix = elgg_solr_get_hl_suffix(); $show_score = elgg_get_plugin_setting('show_score', 'elgg_solr'); $config = HTMLPurifier_Config::createDefault(); $purifier = new HTMLPurifier($config); foreach ($resultset as $document) { // comments entity_guid stored as container_guid in solr $entity = get_entity($document->container_guid); if (!$entity) { $entity = new ElggObject(); $entity->setVolatileData('search_unavailable_entity', TRUE); } // highlighting results can be fetched by document id (the field defined as uniquekey in this schema) $highlightedDoc = $highlighting->getResult($document->id); if ($highlightedDoc) { foreach ($highlightedDoc as $highlight) { $snippet = implode(' (...) ', $highlight); // get our highlight based on the wrapped tokens // note, this is to prevent partial html from breaking page layouts $match = array(); preg_match('/<span data-hl="elgg-solr">(.*)<\\/span>/', $snippet, $match); if ($match[1]) { $snippet = str_replace($match[1], $hl_prefix . $match[1] . $hl_suffix, $snippet); $snippet = $purifier->purify($snippet); } } } if (!$snippet) { $snippet = search_get_highlighted_relevant_substrings(elgg_get_excerpt($document->description), $params['query']); } if ($show_score == 'yes' && elgg_is_admin_logged_in()) { $snippet .= elgg_view('output/longtext', array('value' => elgg_echo('elgg_solr:relevancy', array($document->score)), 'class' => 'elgg-subtext')); } $comments_data = $entity->getVolatileData('search_comments_data'); if (!$comments_data) { $comments_data = array(); } $comments_data[] = array('annotation_id' => substr(strstr(elgg_strip_tags($document->id), ':'), 1), 'text' => $snippet, 'owner_guid' => $document->owner_guid, 'time_created' => $document->time_created); $entity->setVolatileData('search_comments_data', $comments_data); $entities[] = $entity; } return array('entities' => $entities, 'count' => $count); }
function plugin_search($hook, $type, $return, $params) { $select = array('start' => $params['offset'], 'rows' => $params['limit'], 'fields' => array('id', 'title', 'description')); if ($params['select'] && is_array($params['select'])) { $select = array_merge($select, $params['select']); } // create a client instance $client = elgg_solr_get_client(); // get an update query instance $query = $client->createSelect($select); $sorts = array('score' => 'desc', 'time_created' => 'desc'); if ($params['sorts'] && is_array($params['sorts'])) { $sorts = $params['sorts']; } $query->addSorts($sorts); $title_boost = elgg_solr_get_title_boost(); $description_boost = elgg_solr_get_description_boost(); // get the dismax component and set a boost query $dismax = $query->getDisMax(); $qf = "title^{$title_boost} description^{$description_boost}"; if ($params['qf']) { $qf = $params['qf']; } $dismax->setQueryFields($qf); $dismax->setQueryAlternative('*:*'); $boostQuery = elgg_solr_get_boost_query(); if ($boostQuery) { $dismax->setBoostQuery($boostQuery); } // this query is now a dismax query $query->setQuery($params['query']); // make sure we're only getting objects:plugin_project $params['fq']['type'] = 'type:object'; $params['fq']['subtype'] = 'subtype:plugin_project'; if (($category = get_input('category')) && $category != 'all') { $params['fq']['plugincat'] = 'tags:"' . elgg_solr_escape_special_chars('plugincat%%' . $category) . '"'; } $default_fq = elgg_solr_get_default_fq($params); if ($params['fq']) { $filter_queries = array_merge($default_fq, $params['fq']); } else { $filter_queries = $default_fq; } if (!empty($filter_queries)) { foreach ($filter_queries as $key => $value) { $query->createFilterQuery($key)->setQuery($value); } } // get highlighting component and apply settings $hl = $query->getHighlighting(); $hl->setFields(array('title', 'description')); $hl->setSimplePrefix('<strong class="search-highlight search-highlight-color1">'); $hl->setSimplePostfix('</strong>'); // this executes the query and returns the result try { $resultset = $client->select($query); } catch (Exception $e) { error_log($e->getMessage()); return null; } // Get the highlighted snippet try { $highlighting = $resultset->getHighlighting(); } catch (Exception $e) { error_log($e->getMessage()); return null; } // Count the total number of documents found by solr $count = $resultset->getNumFound(); $search_results = array(); foreach ($resultset as $document) { $search_results[$document->id] = array(); $snippet = ''; // highlighting results can be fetched by document id (the field defined as uniquekey in this schema) $highlightedDoc = $highlighting->getResult($document->id); if ($highlightedDoc) { foreach ($highlightedDoc as $field => $highlight) { $snippet = implode(' (...) ', $highlight); $snippet = search_get_highlighted_relevant_substrings(elgg_strip_tags($snippet), $params['query']); $search_results[$document->id][$field] = $snippet; } } } // get the entities $entities = array(); $entities_unsorted = array(); if ($search_results) { $entities_unsorted = elgg_get_entities(array('guids' => array_keys($search_results), 'limit' => false)); } foreach ($search_results as $guid => $matches) { foreach ($entities_unsorted as $e) { if ($e->guid == $guid) { if ($matches['title']) { $e->setVolatileData('search_matched_title', $matches['title']); } else { $e->setVolatileData('search_matched_title', $e->title); } if ($matches['description']) { $e->setVolatileData('search_matched_description', $matches['description']); } else { $e->setVolatileData('search_matched_description', elgg_get_excerpt($e->description, 100)); } $entities[] = $e; } } } return array('entities' => $entities, 'count' => $count); }
function elgg_solr_annotations_sync() { $access = access_get_show_hidden_status(); access_show_hidden_entities(true); $ids = elgg_get_config('elgg_solr_annotation_sync'); if (!$ids) { return true; } foreach ($ids as $id) { $annotation = elgg_get_annotation_from_id($id); if (!$annotation) { continue; } elgg_solr_index_annotation($annotation); } $delete_ids = elgg_get_config('elgg_solr_annotation_delete'); if (is_array($delete_ids)) { foreach ($delete_ids as $g => $foo) { $client = elgg_solr_get_client(); $query = $client->createUpdate(); $query->addDeleteById('annotation:' . $g); $query->addCommit(); try { $client->update($query); } catch (Exception $exc) { elgg_get_site_entity()->annotate('elgg_solr_delete_cache', 'annotation:' . $g, ACCESS_PUBLIC); } } } access_show_hidden_entities($access); }