Exemple #1
0
function elgg_solr_index_annotation($annotation)
{
    $client = elgg_solr_get_client();
    $commit = elgg_get_config('elgg_solr_nocommit') ? false : true;
    $query = $client->createUpdate();
    // add document
    $doc = $query->createDocument();
    $doc->id = 'annotation:' . $annotation->id;
    $doc->type = 'annotation';
    $doc->subtype = $annotation->name;
    $doc->owner_guid = $annotation->owner_guid;
    $doc->container_guid = $annotation->entity_guid;
    $doc->access_id = $annotation->access_id;
    $doc->description = elgg_strip_tags($annotation->value);
    $doc->time_created = $annotation->time_created;
    $doc->enabled = $annotation->enabled;
    $doc = elgg_trigger_plugin_hook('elgg_solr:index', 'annotation', array('annotation' => $annotation), $doc);
    if (!$doc) {
        return true;
        // a plugin has stopped the index
    }
    $query->addDocument($doc);
    if ($commit) {
        $query->addCommit($commit);
    }
    // this executes the query and returns the result
    try {
        $client->update($query);
    } catch (Exception $exc) {
        error_log($exc->getMessage());
    }
}
if ($type) {
    // fix comments params
    if ($type == 'comments') {
        $type = 'annotation';
        $subtype = 'generic_comment';
    }
    $q = "type:{$type}";
    if ($subtype) {
        $q .= " AND subtype:{$subtype}";
    }
} else {
    $q = '*:*';
}
if ($starttime && $endtime) {
    $q .= " AND time_created:[{$starttime} TO {$endtime}]";
}
// create a client instance
$client = elgg_solr_get_client();
// get an update query instance
$update = $client->createUpdate();
// add the delete query and a commit command to the update query
$update->addDeleteQuery($q);
$update->addCommit();
// this executes the query and returns the result
try {
    $client->update($update);
    system_message(elgg_echo('elgg_solr:success:delete_index'));
} catch (Exception $exc) {
    register_error($exc->getTraceAsString());
}
forward(REFERER);
function elgg_solr_annotation_reindex()
{
    set_time_limit(0);
    $ia = elgg_set_ignore_access(true);
    $show_hidden = access_get_show_hidden_status();
    access_show_hidden_entities(true);
    // lock the function
    elgg_set_plugin_setting('reindex_running', 1, 'elgg_solr');
    if (!file_exists(elgg_get_config('dataroot') . 'elgg_solr')) {
        mkdir(elgg_get_config('dataroot') . 'elgg_solr');
    }
    $logtime = elgg_get_config('elgg_solr_restart_logtime');
    if (!$logtime) {
        $logtime = time();
    }
    $log = elgg_get_config('dataroot') . 'elgg_solr/' . $logtime . '.txt';
    elgg_set_plugin_setting('current_log', $logtime, 'elgg_solr');
    // initialize the csv
    $report = array('percent' => '', 'count' => 0, 'typecount' => 0, 'fullcount' => 0, 'type' => '', 'querytime' => 0, 'message' => 'Initializing Reindex', 'date' => date('Y-M-j H:i:s'), 'logtime' => $logtime);
    file_put_contents($log, json_encode($report) . "\n", FILE_APPEND);
    $debug = get_input('debug', false);
    if ($debug) {
        elgg_set_config('elgg_solr_debug', 1);
    }
    $registered_types = elgg_get_config('elgg_solr_reindex_annotation_options');
    if (!$registered_types) {
        $registered_types = _elgg_services()->hooks->trigger('elgg_solr:can_index', 'annotation', [], []);
    }
    if (!$registered_types) {
        return;
        // nothing to reindex
    }
    // build our options and cache them in case we need to restart it
    $cacheoptions = array('types' => $registered_types);
    $options = array();
    $time = elgg_get_config('elgg_solr_time_options');
    if ($time && is_array($time)) {
        $options['wheres'] = array("n_table.time_created >= {$time['starttime']}", "n_table.time_created <= {$time['endtime']}");
        $cacheoptions['starttime'] = $time['starttime'];
        $cacheoptions['endtime'] = $time['endtime'];
    }
    elgg_set_config('elgg_solr_nocommit', true);
    // tell our indexer not to commit right away
    $fullcount = 0;
    foreach ($registered_types as $types => $subtypes) {
        $options['limit'] = false;
        $restart_time = elgg_get_config('elgg_solr_restart_time');
        if ($restart_time) {
            elgg_set_config('elgg_solr_restart_time', false);
            $options['wheres'][1] = "n_table.time_created <= {$restart_time}";
        } elseif ($time['endtime']) {
            $options['wheres'][1] = "n_table.time_created <= {$time['endtime']}";
        }
        if ($subtypes) {
            if (!is_array($subtypes)) {
                $options['annotation_names'] = array($subtypes);
            } else {
                $options['annotation_names'] = $subtypes;
            }
        }
        // this iteration fixes a bug https://github.com/Elgg/Elgg/issues/7561
        // uses a custom getter which only fetches the guids in a single large-batch query
        // which is much more efficient than standard egef
        $batch_size = elgg_get_plugin_setting('reindex_batch_size', 'elgg_solr');
        $batch_size = $batch_size ?: 1000;
        $annotations = new ElggBatch('elgg_solr_get_annotation_ids', $options, null, $batch_size);
        $final_count = elgg_solr_get_annotation_ids(array_merge($options, array('count' => true)));
        elgg_set_config('elgg_solr_nocommit', true);
        // disable committing on each entity for performance
        $count = 0;
        $fetch_time_start = microtime(true);
        foreach ($annotations as $a) {
            elgg_solr_debug_log($a->id);
            $count++;
            $fullcount++;
            $first_entity = (bool) ($count % $batch_size == 1);
            $last_entity = (bool) ($count % $batch_size == 0);
            if ($first_entity) {
                // this is the first entity in the new batch
                $fetch_time = microtime(true) - $fetch_time_start;
                // the query time in seconds
            }
            $annotation = elgg_get_annotation_from_id($a->id);
            if ($annotation) {
                elgg_solr_index_annotation($annotation);
                elgg_set_config('elgg_solr_nocommit', true);
            }
            if (!($count % 200)) {
                $qtime = round($fetch_time, 4);
                $percent = round($count / $final_count * 100);
                if ($annotation) {
                    $restart_time = $annotation->time_created;
                }
                $report = array('percent' => $percent, 'count' => $count, 'typecount' => $final_count, 'fullcount' => $fullcount, 'type' => $type, 'querytime' => $qtime, 'message' => '', 'date' => date('Y-M-j H:i:s'), 'cacheoptions' => $cacheoptions, 'logtime' => $logtime, 'restart_time' => $restart_time);
                file_put_contents($log, json_encode($report) . "\n", FILE_APPEND);
                elgg_set_config('elgg_solr_nocommit', false);
                // push a commit on this one
                // check for the termination signal
                if ($logtime == elgg_get_plugin_setting('stop_reindex', 'elgg_solr')) {
                    $report = array('percent' => $percent, 'count' => $count, 'typecount' => $final_count, 'fullcount' => $fullcount, 'type' => $type, 'querytime' => $qtime, 'message' => 'Reindex has been stopped', 'date' => date('Y-M-j H:i:s'), 'cacheoptions' => $cacheoptions, 'logtime' => $logtime, 'restart_time' => $restart_time);
                    file_put_contents($log, json_encode($report) . "\n", FILE_APPEND);
                    elgg_solr_debug_log('Stopping reindex due to termination signal');
                    exit;
                }
            }
            if ($last_entity) {
                $fetch_time_start = microtime(true);
            }
        }
        // we've finished this type, unset from the cache options
        unset($cacheoptions['types'][$type]);
    }
    $report = array('percent' => '', 'count' => 0, 'typecount' => 0, 'fullcount' => 0, 'type' => '', 'querytime' => 0, 'message' => 'Reindex complete', 'date' => date('Y-M-j H:i:s'), 'logtime' => $logtime);
    file_put_contents($log, json_encode($report) . "\n", FILE_APPEND);
    elgg_set_plugin_setting('reindex_running', 0, 'elgg_solr');
    // commit the last of the entities
    $client = elgg_solr_get_client();
    $query = $client->createUpdate();
    $query->addCommit();
    try {
        $client->update($query);
    } catch (Exception $e) {
        elgg_solr_debug_log($e->getMessage());
        return false;
    }
    access_show_hidden_entities($show_hidden);
    elgg_set_ignore_access($ia);
}
Exemple #4
0
/**
 * NOTE - this is only used in Elgg 1.8 as comments are annotations
 * 
 * @param type $hook
 * @param type $type
 * @param type $return
 * @param type $params
 * @return null
 */
function elgg_solr_comment_search($hook, $type, $return, $params)
{
    $entities = array();
    $select = array('start' => $params['offset'], 'rows' => $params['limit'] ? $params['limit'] : 10, 'fields' => array('id', 'container_guid', 'description', 'owner_guid', 'time_created', 'score'));
    if ($params['select'] && is_array($params['select'])) {
        $select = array_merge($select, $params['select']);
    }
    // create a client instance
    $client = elgg_solr_get_client();
    // get an update query instance
    $query = $client->createSelect($select);
    $default_sort = array('score' => 'desc', 'time_created' => 'desc');
    $sorts = $params['sorts'] ? $params['sorts'] : $default_sort;
    $query->addSorts($sorts);
    $description_boost = elgg_solr_get_description_boost();
    // get the dismax component and set a boost query
    $dismax = $query->getEDisMax();
    $qf = "description^{$description_boost}";
    if ($params['qf']) {
        $qf = $params['qf'];
    }
    $dismax->setQueryFields($qf);
    $boostQuery = elgg_solr_get_boost_query();
    if ($boostQuery) {
        $dismax->setBoostQuery($boostQuery);
    }
    // this query is now a dismax query
    $query->setQuery($params['query']);
    // make sure we're only getting comments
    $params['fq']['type'] = 'type:annotation';
    $params['fq']['subtype'] = 'subtype:generic_comment';
    $default_fq = elgg_solr_get_default_fq($params);
    if ($params['fq']) {
        $filter_queries = array_merge($default_fq, $params['fq']);
    } else {
        $filter_queries = $default_fq;
    }
    if (!empty($filter_queries)) {
        foreach ($filter_queries as $key => $value) {
            $query->createFilterQuery($key)->setQuery($value);
        }
    }
    // get highlighting component and apply settings
    $hl = $query->getHighlighting();
    $hl->setFields(array('description'));
    $hl->setSimplePrefix('<span data-hl="elgg-solr">');
    $hl->setSimplePostfix('</span>');
    $fragsize = elgg_solr_get_fragsize();
    if (isset($params['fragsize'])) {
        $fragsize = (int) $params['fragsize'];
    }
    $hl->setFragSize($fragsize);
    // this executes the query and returns the result
    try {
        $resultset = $client->select($query);
    } catch (Exception $e) {
        error_log($e->getMessage());
        return null;
    }
    // Get the highlighted snippet
    try {
        $highlighting = $resultset->getHighlighting();
    } catch (Exception $e) {
        error_log($e->getMessage());
        return null;
    }
    // Count the total number of documents found by solr
    $count = $resultset->getNumFound();
    $hl_prefix = elgg_solr_get_hl_prefix();
    $hl_suffix = elgg_solr_get_hl_suffix();
    $show_score = elgg_get_plugin_setting('show_score', 'elgg_solr');
    $config = HTMLPurifier_Config::createDefault();
    $purifier = new HTMLPurifier($config);
    foreach ($resultset as $document) {
        // comments entity_guid stored as container_guid in solr
        $entity = get_entity($document->container_guid);
        if (!$entity) {
            $entity = new ElggObject();
            $entity->setVolatileData('search_unavailable_entity', TRUE);
        }
        // highlighting results can be fetched by document id (the field defined as uniquekey in this schema)
        $highlightedDoc = $highlighting->getResult($document->id);
        if ($highlightedDoc) {
            foreach ($highlightedDoc as $highlight) {
                $snippet = implode(' (...) ', $highlight);
                // get our highlight based on the wrapped tokens
                // note, this is to prevent partial html from breaking page layouts
                $match = array();
                preg_match('/<span data-hl="elgg-solr">(.*)<\\/span>/', $snippet, $match);
                if ($match[1]) {
                    $snippet = str_replace($match[1], $hl_prefix . $match[1] . $hl_suffix, $snippet);
                    $snippet = $purifier->purify($snippet);
                }
            }
        }
        if (!$snippet) {
            $snippet = search_get_highlighted_relevant_substrings(elgg_get_excerpt($document->description), $params['query']);
        }
        if ($show_score == 'yes' && elgg_is_admin_logged_in()) {
            $snippet .= elgg_view('output/longtext', array('value' => elgg_echo('elgg_solr:relevancy', array($document->score)), 'class' => 'elgg-subtext'));
        }
        $comments_data = $entity->getVolatileData('search_comments_data');
        if (!$comments_data) {
            $comments_data = array();
        }
        $comments_data[] = array('annotation_id' => substr(strstr(elgg_strip_tags($document->id), ':'), 1), 'text' => $snippet, 'owner_guid' => $document->owner_guid, 'time_created' => $document->time_created);
        $entity->setVolatileData('search_comments_data', $comments_data);
        $entities[] = $entity;
    }
    return array('entities' => $entities, 'count' => $count);
}
Exemple #5
0
function plugin_search($hook, $type, $return, $params)
{
    $select = array('start' => $params['offset'], 'rows' => $params['limit'], 'fields' => array('id', 'title', 'description'));
    if ($params['select'] && is_array($params['select'])) {
        $select = array_merge($select, $params['select']);
    }
    // create a client instance
    $client = elgg_solr_get_client();
    // get an update query instance
    $query = $client->createSelect($select);
    $sorts = array('score' => 'desc', 'time_created' => 'desc');
    if ($params['sorts'] && is_array($params['sorts'])) {
        $sorts = $params['sorts'];
    }
    $query->addSorts($sorts);
    $title_boost = elgg_solr_get_title_boost();
    $description_boost = elgg_solr_get_description_boost();
    // get the dismax component and set a boost query
    $dismax = $query->getDisMax();
    $qf = "title^{$title_boost} description^{$description_boost}";
    if ($params['qf']) {
        $qf = $params['qf'];
    }
    $dismax->setQueryFields($qf);
    $dismax->setQueryAlternative('*:*');
    $boostQuery = elgg_solr_get_boost_query();
    if ($boostQuery) {
        $dismax->setBoostQuery($boostQuery);
    }
    // this query is now a dismax query
    $query->setQuery($params['query']);
    // make sure we're only getting objects:plugin_project
    $params['fq']['type'] = 'type:object';
    $params['fq']['subtype'] = 'subtype:plugin_project';
    if (($category = get_input('category')) && $category != 'all') {
        $params['fq']['plugincat'] = 'tags:"' . elgg_solr_escape_special_chars('plugincat%%' . $category) . '"';
    }
    $default_fq = elgg_solr_get_default_fq($params);
    if ($params['fq']) {
        $filter_queries = array_merge($default_fq, $params['fq']);
    } else {
        $filter_queries = $default_fq;
    }
    if (!empty($filter_queries)) {
        foreach ($filter_queries as $key => $value) {
            $query->createFilterQuery($key)->setQuery($value);
        }
    }
    // get highlighting component and apply settings
    $hl = $query->getHighlighting();
    $hl->setFields(array('title', 'description'));
    $hl->setSimplePrefix('<strong class="search-highlight search-highlight-color1">');
    $hl->setSimplePostfix('</strong>');
    // this executes the query and returns the result
    try {
        $resultset = $client->select($query);
    } catch (Exception $e) {
        error_log($e->getMessage());
        return null;
    }
    // Get the highlighted snippet
    try {
        $highlighting = $resultset->getHighlighting();
    } catch (Exception $e) {
        error_log($e->getMessage());
        return null;
    }
    // Count the total number of documents found by solr
    $count = $resultset->getNumFound();
    $search_results = array();
    foreach ($resultset as $document) {
        $search_results[$document->id] = array();
        $snippet = '';
        // highlighting results can be fetched by document id (the field defined as uniquekey in this schema)
        $highlightedDoc = $highlighting->getResult($document->id);
        if ($highlightedDoc) {
            foreach ($highlightedDoc as $field => $highlight) {
                $snippet = implode(' (...) ', $highlight);
                $snippet = search_get_highlighted_relevant_substrings(elgg_strip_tags($snippet), $params['query']);
                $search_results[$document->id][$field] = $snippet;
            }
        }
    }
    // get the entities
    $entities = array();
    $entities_unsorted = array();
    if ($search_results) {
        $entities_unsorted = elgg_get_entities(array('guids' => array_keys($search_results), 'limit' => false));
    }
    foreach ($search_results as $guid => $matches) {
        foreach ($entities_unsorted as $e) {
            if ($e->guid == $guid) {
                if ($matches['title']) {
                    $e->setVolatileData('search_matched_title', $matches['title']);
                } else {
                    $e->setVolatileData('search_matched_title', $e->title);
                }
                if ($matches['description']) {
                    $e->setVolatileData('search_matched_description', $matches['description']);
                } else {
                    $e->setVolatileData('search_matched_description', elgg_get_excerpt($e->description, 100));
                }
                $entities[] = $e;
            }
        }
    }
    return array('entities' => $entities, 'count' => $count);
}
Exemple #6
0
function elgg_solr_annotations_sync()
{
    $access = access_get_show_hidden_status();
    access_show_hidden_entities(true);
    $ids = elgg_get_config('elgg_solr_annotation_sync');
    if (!$ids) {
        return true;
    }
    foreach ($ids as $id) {
        $annotation = elgg_get_annotation_from_id($id);
        if (!$annotation) {
            continue;
        }
        elgg_solr_index_annotation($annotation);
    }
    $delete_ids = elgg_get_config('elgg_solr_annotation_delete');
    if (is_array($delete_ids)) {
        foreach ($delete_ids as $g => $foo) {
            $client = elgg_solr_get_client();
            $query = $client->createUpdate();
            $query->addDeleteById('annotation:' . $g);
            $query->addCommit();
            try {
                $client->update($query);
            } catch (Exception $exc) {
                elgg_get_site_entity()->annotate('elgg_solr_delete_cache', 'annotation:' . $g, ACCESS_PUBLIC);
            }
        }
    }
    access_show_hidden_entities($access);
}