Example #1
0
function elgg_solr_add_update_entity($event, $type, $entity)
{
    $debug = false;
    if (elgg_get_config('elgg_solr_debug')) {
        $debug = true;
    }
    if (!elgg_instanceof($entity)) {
        if ($debug) {
            elgg_solr_debug_log('Not a valid elgg entity');
        }
        return true;
    }
    if (!is_registered_entity_type($entity->type, $entity->getSubtype())) {
        if ($debug) {
            elgg_solr_debug_log('Not a registered entity type');
        }
        return true;
    }
    $function = elgg_solr_get_solr_function($entity->type, $entity->getSubtype());
    if (is_callable($function)) {
        if ($debug) {
            elgg_solr_debug_log('processing entity with function - ' . $function);
        }
        $function($entity);
    } else {
        if ($debug) {
            elgg_solr_debug_log('Not a callable function - ' . $function);
        }
    }
}
Example #2
0
/**
 * 
 * 
 * @param type $type
 * @param type $subtype
 * @return boolean
 */
function elgg_solr_get_solr_function($type, $subtype)
{
    if (elgg_get_config('elgg_solr_debug')) {
        $debug = true;
    }
    $solr_entities = elgg_get_config('solr_entities');
    if (isset($solr_entities[$type][$subtype]) && is_callable($solr_entities[$type][$subtype])) {
        return $solr_entities[$type][$subtype];
    }
    if (isset($solr_entities[$type]['default']) && is_callable($solr_entities[$type]['default'])) {
        return $solr_entities[$type]['default'];
    }
    if (isset($solr_entities['entity']['default']) && is_callable($solr_entities['entity']['default'])) {
        return $solr_entities['entity']['default'];
    }
    if ($debug) {
        elgg_solr_debug_log('Solr function not callable for type: ' . $type . ', subtype: ' . $subtype);
    }
    return false;
}
Example #3
0
function elgg_solr_annotation_reindex()
{
    set_time_limit(0);
    $ia = elgg_set_ignore_access(true);
    $show_hidden = access_get_show_hidden_status();
    access_show_hidden_entities(true);
    // lock the function
    elgg_set_plugin_setting('reindex_running', 1, 'elgg_solr');
    if (!file_exists(elgg_get_config('dataroot') . 'elgg_solr')) {
        mkdir(elgg_get_config('dataroot') . 'elgg_solr');
    }
    $logtime = elgg_get_config('elgg_solr_restart_logtime');
    if (!$logtime) {
        $logtime = time();
    }
    $log = elgg_get_config('dataroot') . 'elgg_solr/' . $logtime . '.txt';
    elgg_set_plugin_setting('current_log', $logtime, 'elgg_solr');
    // initialize the csv
    $report = array('percent' => '', 'count' => 0, 'typecount' => 0, 'fullcount' => 0, 'type' => '', 'querytime' => 0, 'message' => 'Initializing Reindex', 'date' => date('Y-M-j H:i:s'), 'logtime' => $logtime);
    file_put_contents($log, json_encode($report) . "\n", FILE_APPEND);
    $debug = get_input('debug', false);
    if ($debug) {
        elgg_set_config('elgg_solr_debug', 1);
    }
    $registered_types = elgg_get_config('elgg_solr_reindex_annotation_options');
    if (!$registered_types) {
        $registered_types = _elgg_services()->hooks->trigger('elgg_solr:can_index', 'annotation', [], []);
    }
    if (!$registered_types) {
        return;
        // nothing to reindex
    }
    // build our options and cache them in case we need to restart it
    $cacheoptions = array('types' => $registered_types);
    $options = array();
    $time = elgg_get_config('elgg_solr_time_options');
    if ($time && is_array($time)) {
        $options['wheres'] = array("n_table.time_created >= {$time['starttime']}", "n_table.time_created <= {$time['endtime']}");
        $cacheoptions['starttime'] = $time['starttime'];
        $cacheoptions['endtime'] = $time['endtime'];
    }
    elgg_set_config('elgg_solr_nocommit', true);
    // tell our indexer not to commit right away
    $fullcount = 0;
    foreach ($registered_types as $types => $subtypes) {
        $options['limit'] = false;
        $restart_time = elgg_get_config('elgg_solr_restart_time');
        if ($restart_time) {
            elgg_set_config('elgg_solr_restart_time', false);
            $options['wheres'][1] = "n_table.time_created <= {$restart_time}";
        } elseif ($time['endtime']) {
            $options['wheres'][1] = "n_table.time_created <= {$time['endtime']}";
        }
        if ($subtypes) {
            if (!is_array($subtypes)) {
                $options['annotation_names'] = array($subtypes);
            } else {
                $options['annotation_names'] = $subtypes;
            }
        }
        // this iteration fixes a bug https://github.com/Elgg/Elgg/issues/7561
        // uses a custom getter which only fetches the guids in a single large-batch query
        // which is much more efficient than standard egef
        $batch_size = elgg_get_plugin_setting('reindex_batch_size', 'elgg_solr');
        $batch_size = $batch_size ?: 1000;
        $annotations = new ElggBatch('elgg_solr_get_annotation_ids', $options, null, $batch_size);
        $final_count = elgg_solr_get_annotation_ids(array_merge($options, array('count' => true)));
        elgg_set_config('elgg_solr_nocommit', true);
        // disable committing on each entity for performance
        $count = 0;
        $fetch_time_start = microtime(true);
        foreach ($annotations as $a) {
            elgg_solr_debug_log($a->id);
            $count++;
            $fullcount++;
            $first_entity = (bool) ($count % $batch_size == 1);
            $last_entity = (bool) ($count % $batch_size == 0);
            if ($first_entity) {
                // this is the first entity in the new batch
                $fetch_time = microtime(true) - $fetch_time_start;
                // the query time in seconds
            }
            $annotation = elgg_get_annotation_from_id($a->id);
            if ($annotation) {
                elgg_solr_index_annotation($annotation);
                elgg_set_config('elgg_solr_nocommit', true);
            }
            if (!($count % 200)) {
                $qtime = round($fetch_time, 4);
                $percent = round($count / $final_count * 100);
                if ($annotation) {
                    $restart_time = $annotation->time_created;
                }
                $report = array('percent' => $percent, 'count' => $count, 'typecount' => $final_count, 'fullcount' => $fullcount, 'type' => $type, 'querytime' => $qtime, 'message' => '', 'date' => date('Y-M-j H:i:s'), 'cacheoptions' => $cacheoptions, 'logtime' => $logtime, 'restart_time' => $restart_time);
                file_put_contents($log, json_encode($report) . "\n", FILE_APPEND);
                elgg_set_config('elgg_solr_nocommit', false);
                // push a commit on this one
                // check for the termination signal
                if ($logtime == elgg_get_plugin_setting('stop_reindex', 'elgg_solr')) {
                    $report = array('percent' => $percent, 'count' => $count, 'typecount' => $final_count, 'fullcount' => $fullcount, 'type' => $type, 'querytime' => $qtime, 'message' => 'Reindex has been stopped', 'date' => date('Y-M-j H:i:s'), 'cacheoptions' => $cacheoptions, 'logtime' => $logtime, 'restart_time' => $restart_time);
                    file_put_contents($log, json_encode($report) . "\n", FILE_APPEND);
                    elgg_solr_debug_log('Stopping reindex due to termination signal');
                    exit;
                }
            }
            if ($last_entity) {
                $fetch_time_start = microtime(true);
            }
        }
        // we've finished this type, unset from the cache options
        unset($cacheoptions['types'][$type]);
    }
    $report = array('percent' => '', 'count' => 0, 'typecount' => 0, 'fullcount' => 0, 'type' => '', 'querytime' => 0, 'message' => 'Reindex complete', 'date' => date('Y-M-j H:i:s'), 'logtime' => $logtime);
    file_put_contents($log, json_encode($report) . "\n", FILE_APPEND);
    elgg_set_plugin_setting('reindex_running', 0, 'elgg_solr');
    // commit the last of the entities
    $client = elgg_solr_get_client();
    $query = $client->createUpdate();
    $query->addCommit();
    try {
        $client->update($query);
    } catch (Exception $e) {
        elgg_solr_debug_log($e->getMessage());
        return false;
    }
    access_show_hidden_entities($show_hidden);
    elgg_set_ignore_access($ia);
}
Example #4
0
/**
 * NOTE - this is only used in Elgg 1.8 as comments are annotations
 * 
 * @param type $hook
 * @param type $type
 * @param type $return
 * @param type $params
 * @return null
 */
function elgg_solr_comment_search($hook, $type, $return, $params)
{
    $entities = array();
    $select = array('start' => $params['offset'], 'rows' => $params['limit'] ? $params['limit'] : 10, 'fields' => array('id', 'container_guid', 'description', 'owner_guid', 'time_created', 'score'));
    if ($params['select'] && is_array($params['select'])) {
        $select = array_merge($select, $params['select']);
    }
    // create a client instance
    $client = elgg_solr_get_client();
    // get an update query instance
    $query = $client->createSelect($select);
    $default_sort = array('score' => 'desc', 'time_created' => 'desc');
    $sorts = $params['sorts'] ? $params['sorts'] : $default_sort;
    $query->addSorts($sorts);
    $description_boost = elgg_solr_get_description_boost();
    // get the dismax component and set a boost query
    $dismax = $query->getEDisMax();
    $qf = "description^{$description_boost}";
    if ($params['qf']) {
        $qf = $params['qf'];
    }
    $dismax->setQueryFields($qf);
    $boostQuery = elgg_solr_get_boost_query();
    if ($boostQuery) {
        $dismax->setBoostQuery($boostQuery);
    }
    // this query is now a dismax query
    $query->setQuery($params['query']);
    // make sure we're only getting comments
    $params['fq']['type'] = 'type:annotation';
    $params['fq']['subtype'] = 'subtype:generic_comment';
    $default_fq = elgg_solr_get_default_fq($params);
    if ($params['fq']) {
        $filter_queries = array_merge($default_fq, $params['fq']);
    } else {
        $filter_queries = $default_fq;
    }
    if (!empty($filter_queries)) {
        foreach ($filter_queries as $key => $value) {
            $query->createFilterQuery($key)->setQuery($value);
        }
    }
    // get highlighting component and apply settings
    $hl = $query->getHighlighting();
    $hl->setFields(array('description'));
    $hl_prefix = elgg_solr_get_hl_prefix();
    $hl_suffix = elgg_solr_get_hl_suffix();
    $hl->setSimplePrefix($hl_prefix);
    $hl->setSimplePostfix($hl_suffix);
    $fragsize = elgg_solr_get_fragsize();
    if (isset($params['fragsize'])) {
        $fragsize = (int) $params['fragsize'];
    }
    $hl->setFragSize($fragsize);
    // this executes the query and returns the result
    try {
        $resultset = $client->select($query);
    } catch (Exception $e) {
        elgg_solr_debug_log($e->getMessage());
        return null;
    }
    // Get the highlighted snippet
    try {
        $highlighting = $resultset->getHighlighting();
    } catch (Exception $e) {
        elgg_solr_debug_log($e->getMessage());
        return null;
    }
    // Count the total number of documents found by solr
    $count = $resultset->getNumFound();
    $show_score = elgg_get_plugin_setting('show_score', 'elgg_solr');
    $config = HTMLPurifier_Config::createDefault();
    $purifier = new HTMLPurifier($config);
    foreach ($resultset as $document) {
        // comments entity_guid stored as container_guid in solr
        $entity = get_entity($document->container_guid);
        if (!$entity) {
            $entity = new ElggObject();
            $entity->setVolatileData('search_unavailable_entity', TRUE);
        }
        // highlighting results can be fetched by document id (the field defined as uniquekey in this schema)
        $highlightedDoc = $highlighting->getResult($document->id);
        if ($highlightedDoc) {
            foreach ($highlightedDoc as $highlight) {
                $snippet = implode(' (...) ', $highlight);
                $snippet = $purifier->purify($snippet);
            }
        }
        if (!$snippet) {
            $snippet = search_get_highlighted_relevant_substrings(elgg_get_excerpt($document->description), $params['query']);
        }
        if ($show_score == 'yes' && elgg_is_admin_logged_in()) {
            $snippet .= elgg_view('output/longtext', array('value' => elgg_echo('elgg_solr:relevancy', array($document->score)), 'class' => 'elgg-subtext'));
        }
        $comments_data = $entity->getVolatileData('search_comments_data');
        if (!$comments_data) {
            $comments_data = array();
        }
        $comments_data[] = array('annotation_id' => substr(strstr(elgg_strip_tags($document->id), ':'), 1), 'text' => $snippet, 'owner_guid' => $document->owner_guid, 'time_created' => $document->time_created);
        $entity->setVolatileData('search_comments_data', $comments_data);
        $entities[] = $entity;
    }
    return array('entities' => $entities, 'count' => $count);
}
Example #5
0
/**
 * Process annotations deferrals queue
 * @return void
 */
function elgg_solr_annotations_sync()
{
    $access = access_get_show_hidden_status();
    access_show_hidden_entities(true);
    $ia = elgg_set_ignore_access(true);
    $ids = elgg_get_config('elgg_solr_annotation_update');
    if (is_array($ids)) {
        foreach ($ids as $id => $foo) {
            $annotation = elgg_get_annotation_from_id($id);
            if (!$annotation) {
                continue;
            }
            elgg_solr_index_annotation($annotation);
        }
    }
    $delete_ids = elgg_get_config('elgg_solr_annotation_delete');
    if (is_array($delete_ids)) {
        foreach ($delete_ids as $g => $foo) {
            $client = elgg_solr_get_client();
            $query = $client->createUpdate();
            $query->addDeleteById('annotation:' . $g);
            $query->addCommit();
            try {
                $client->update($query);
            } catch (Exception $exc) {
                elgg_get_site_entity()->annotate('elgg_solr_delete_cache', 'annotation:' . $g, ACCESS_PUBLIC);
                elgg_solr_debug_log($exc->getMessage());
            }
        }
    }
    access_show_hidden_entities($access);
    elgg_set_ignore_access($ia);
}