function elgg_solr_add_update_entity($event, $type, $entity) { $debug = false; if (elgg_get_config('elgg_solr_debug')) { $debug = true; } if (!elgg_instanceof($entity)) { if ($debug) { elgg_solr_debug_log('Not a valid elgg entity'); } return true; } if (!is_registered_entity_type($entity->type, $entity->getSubtype())) { if ($debug) { elgg_solr_debug_log('Not a registered entity type'); } return true; } $function = elgg_solr_get_solr_function($entity->type, $entity->getSubtype()); if (is_callable($function)) { if ($debug) { elgg_solr_debug_log('processing entity with function - ' . $function); } $function($entity); } else { if ($debug) { elgg_solr_debug_log('Not a callable function - ' . $function); } } }
/** * * * @param type $type * @param type $subtype * @return boolean */ function elgg_solr_get_solr_function($type, $subtype) { if (elgg_get_config('elgg_solr_debug')) { $debug = true; } $solr_entities = elgg_get_config('solr_entities'); if (isset($solr_entities[$type][$subtype]) && is_callable($solr_entities[$type][$subtype])) { return $solr_entities[$type][$subtype]; } if (isset($solr_entities[$type]['default']) && is_callable($solr_entities[$type]['default'])) { return $solr_entities[$type]['default']; } if (isset($solr_entities['entity']['default']) && is_callable($solr_entities['entity']['default'])) { return $solr_entities['entity']['default']; } if ($debug) { elgg_solr_debug_log('Solr function not callable for type: ' . $type . ', subtype: ' . $subtype); } return false; }
function elgg_solr_annotation_reindex() { set_time_limit(0); $ia = elgg_set_ignore_access(true); $show_hidden = access_get_show_hidden_status(); access_show_hidden_entities(true); // lock the function elgg_set_plugin_setting('reindex_running', 1, 'elgg_solr'); if (!file_exists(elgg_get_config('dataroot') . 'elgg_solr')) { mkdir(elgg_get_config('dataroot') . 'elgg_solr'); } $logtime = elgg_get_config('elgg_solr_restart_logtime'); if (!$logtime) { $logtime = time(); } $log = elgg_get_config('dataroot') . 'elgg_solr/' . $logtime . '.txt'; elgg_set_plugin_setting('current_log', $logtime, 'elgg_solr'); // initialize the csv $report = array('percent' => '', 'count' => 0, 'typecount' => 0, 'fullcount' => 0, 'type' => '', 'querytime' => 0, 'message' => 'Initializing Reindex', 'date' => date('Y-M-j H:i:s'), 'logtime' => $logtime); file_put_contents($log, json_encode($report) . "\n", FILE_APPEND); $debug = get_input('debug', false); if ($debug) { elgg_set_config('elgg_solr_debug', 1); } $registered_types = elgg_get_config('elgg_solr_reindex_annotation_options'); if (!$registered_types) { $registered_types = _elgg_services()->hooks->trigger('elgg_solr:can_index', 'annotation', [], []); } if (!$registered_types) { return; // nothing to reindex } // build our options and cache them in case we need to restart it $cacheoptions = array('types' => $registered_types); $options = array(); $time = elgg_get_config('elgg_solr_time_options'); if ($time && is_array($time)) { $options['wheres'] = array("n_table.time_created >= {$time['starttime']}", "n_table.time_created <= {$time['endtime']}"); $cacheoptions['starttime'] = $time['starttime']; $cacheoptions['endtime'] = $time['endtime']; } elgg_set_config('elgg_solr_nocommit', true); // tell our indexer not to commit right away $fullcount = 0; foreach ($registered_types as $types => $subtypes) { $options['limit'] = false; $restart_time = elgg_get_config('elgg_solr_restart_time'); if ($restart_time) { elgg_set_config('elgg_solr_restart_time', false); $options['wheres'][1] = "n_table.time_created <= {$restart_time}"; } elseif ($time['endtime']) { $options['wheres'][1] = "n_table.time_created <= {$time['endtime']}"; } if ($subtypes) { if (!is_array($subtypes)) { $options['annotation_names'] = array($subtypes); } else { $options['annotation_names'] = $subtypes; } } // this iteration fixes a bug https://github.com/Elgg/Elgg/issues/7561 // uses a custom getter which only fetches the guids in a single large-batch query // which is much more efficient than standard egef $batch_size = elgg_get_plugin_setting('reindex_batch_size', 'elgg_solr'); $batch_size = $batch_size ?: 1000; $annotations = new ElggBatch('elgg_solr_get_annotation_ids', $options, null, $batch_size); $final_count = elgg_solr_get_annotation_ids(array_merge($options, array('count' => true))); elgg_set_config('elgg_solr_nocommit', true); // disable committing on each entity for performance $count = 0; $fetch_time_start = microtime(true); foreach ($annotations as $a) { elgg_solr_debug_log($a->id); $count++; $fullcount++; $first_entity = (bool) ($count % $batch_size == 1); $last_entity = (bool) ($count % $batch_size == 0); if ($first_entity) { // this is the first entity in the new batch $fetch_time = microtime(true) - $fetch_time_start; // the query time in seconds } $annotation = elgg_get_annotation_from_id($a->id); if ($annotation) { elgg_solr_index_annotation($annotation); elgg_set_config('elgg_solr_nocommit', true); } if (!($count % 200)) { $qtime = round($fetch_time, 4); $percent = round($count / $final_count * 100); if ($annotation) { $restart_time = $annotation->time_created; } $report = array('percent' => $percent, 'count' => $count, 'typecount' => $final_count, 'fullcount' => $fullcount, 'type' => $type, 'querytime' => $qtime, 'message' => '', 'date' => date('Y-M-j H:i:s'), 'cacheoptions' => $cacheoptions, 'logtime' => $logtime, 'restart_time' => $restart_time); file_put_contents($log, json_encode($report) . "\n", FILE_APPEND); elgg_set_config('elgg_solr_nocommit', false); // push a commit on this one // check for the termination signal if ($logtime == elgg_get_plugin_setting('stop_reindex', 'elgg_solr')) { $report = array('percent' => $percent, 'count' => $count, 'typecount' => $final_count, 'fullcount' => $fullcount, 'type' => $type, 'querytime' => $qtime, 'message' => 'Reindex has been stopped', 'date' => date('Y-M-j H:i:s'), 'cacheoptions' => $cacheoptions, 'logtime' => $logtime, 'restart_time' => $restart_time); file_put_contents($log, json_encode($report) . "\n", FILE_APPEND); elgg_solr_debug_log('Stopping reindex due to termination signal'); exit; } } if ($last_entity) { $fetch_time_start = microtime(true); } } // we've finished this type, unset from the cache options unset($cacheoptions['types'][$type]); } $report = array('percent' => '', 'count' => 0, 'typecount' => 0, 'fullcount' => 0, 'type' => '', 'querytime' => 0, 'message' => 'Reindex complete', 'date' => date('Y-M-j H:i:s'), 'logtime' => $logtime); file_put_contents($log, json_encode($report) . "\n", FILE_APPEND); elgg_set_plugin_setting('reindex_running', 0, 'elgg_solr'); // commit the last of the entities $client = elgg_solr_get_client(); $query = $client->createUpdate(); $query->addCommit(); try { $client->update($query); } catch (Exception $e) { elgg_solr_debug_log($e->getMessage()); return false; } access_show_hidden_entities($show_hidden); elgg_set_ignore_access($ia); }
/** * NOTE - this is only used in Elgg 1.8 as comments are annotations * * @param type $hook * @param type $type * @param type $return * @param type $params * @return null */ function elgg_solr_comment_search($hook, $type, $return, $params) { $entities = array(); $select = array('start' => $params['offset'], 'rows' => $params['limit'] ? $params['limit'] : 10, 'fields' => array('id', 'container_guid', 'description', 'owner_guid', 'time_created', 'score')); if ($params['select'] && is_array($params['select'])) { $select = array_merge($select, $params['select']); } // create a client instance $client = elgg_solr_get_client(); // get an update query instance $query = $client->createSelect($select); $default_sort = array('score' => 'desc', 'time_created' => 'desc'); $sorts = $params['sorts'] ? $params['sorts'] : $default_sort; $query->addSorts($sorts); $description_boost = elgg_solr_get_description_boost(); // get the dismax component and set a boost query $dismax = $query->getEDisMax(); $qf = "description^{$description_boost}"; if ($params['qf']) { $qf = $params['qf']; } $dismax->setQueryFields($qf); $boostQuery = elgg_solr_get_boost_query(); if ($boostQuery) { $dismax->setBoostQuery($boostQuery); } // this query is now a dismax query $query->setQuery($params['query']); // make sure we're only getting comments $params['fq']['type'] = 'type:annotation'; $params['fq']['subtype'] = 'subtype:generic_comment'; $default_fq = elgg_solr_get_default_fq($params); if ($params['fq']) { $filter_queries = array_merge($default_fq, $params['fq']); } else { $filter_queries = $default_fq; } if (!empty($filter_queries)) { foreach ($filter_queries as $key => $value) { $query->createFilterQuery($key)->setQuery($value); } } // get highlighting component and apply settings $hl = $query->getHighlighting(); $hl->setFields(array('description')); $hl_prefix = elgg_solr_get_hl_prefix(); $hl_suffix = elgg_solr_get_hl_suffix(); $hl->setSimplePrefix($hl_prefix); $hl->setSimplePostfix($hl_suffix); $fragsize = elgg_solr_get_fragsize(); if (isset($params['fragsize'])) { $fragsize = (int) $params['fragsize']; } $hl->setFragSize($fragsize); // this executes the query and returns the result try { $resultset = $client->select($query); } catch (Exception $e) { elgg_solr_debug_log($e->getMessage()); return null; } // Get the highlighted snippet try { $highlighting = $resultset->getHighlighting(); } catch (Exception $e) { elgg_solr_debug_log($e->getMessage()); return null; } // Count the total number of documents found by solr $count = $resultset->getNumFound(); $show_score = elgg_get_plugin_setting('show_score', 'elgg_solr'); $config = HTMLPurifier_Config::createDefault(); $purifier = new HTMLPurifier($config); foreach ($resultset as $document) { // comments entity_guid stored as container_guid in solr $entity = get_entity($document->container_guid); if (!$entity) { $entity = new ElggObject(); $entity->setVolatileData('search_unavailable_entity', TRUE); } // highlighting results can be fetched by document id (the field defined as uniquekey in this schema) $highlightedDoc = $highlighting->getResult($document->id); if ($highlightedDoc) { foreach ($highlightedDoc as $highlight) { $snippet = implode(' (...) ', $highlight); $snippet = $purifier->purify($snippet); } } if (!$snippet) { $snippet = search_get_highlighted_relevant_substrings(elgg_get_excerpt($document->description), $params['query']); } if ($show_score == 'yes' && elgg_is_admin_logged_in()) { $snippet .= elgg_view('output/longtext', array('value' => elgg_echo('elgg_solr:relevancy', array($document->score)), 'class' => 'elgg-subtext')); } $comments_data = $entity->getVolatileData('search_comments_data'); if (!$comments_data) { $comments_data = array(); } $comments_data[] = array('annotation_id' => substr(strstr(elgg_strip_tags($document->id), ':'), 1), 'text' => $snippet, 'owner_guid' => $document->owner_guid, 'time_created' => $document->time_created); $entity->setVolatileData('search_comments_data', $comments_data); $entities[] = $entity; } return array('entities' => $entities, 'count' => $count); }
/** * Process annotations deferrals queue * @return void */ function elgg_solr_annotations_sync() { $access = access_get_show_hidden_status(); access_show_hidden_entities(true); $ia = elgg_set_ignore_access(true); $ids = elgg_get_config('elgg_solr_annotation_update'); if (is_array($ids)) { foreach ($ids as $id => $foo) { $annotation = elgg_get_annotation_from_id($id); if (!$annotation) { continue; } elgg_solr_index_annotation($annotation); } } $delete_ids = elgg_get_config('elgg_solr_annotation_delete'); if (is_array($delete_ids)) { foreach ($delete_ids as $g => $foo) { $client = elgg_solr_get_client(); $query = $client->createUpdate(); $query->addDeleteById('annotation:' . $g); $query->addCommit(); try { $client->update($query); } catch (Exception $exc) { elgg_get_site_entity()->annotate('elgg_solr_delete_cache', 'annotation:' . $g, ACCESS_PUBLIC); elgg_solr_debug_log($exc->getMessage()); } } } access_show_hidden_entities($access); elgg_set_ignore_access($ia); }