Example #1
0
function elgg_solr_annotation_reindex()
{
    set_time_limit(0);
    $ia = elgg_set_ignore_access(true);
    $show_hidden = access_get_show_hidden_status();
    access_show_hidden_entities(true);
    // lock the function
    elgg_set_plugin_setting('reindex_running', 1, 'elgg_solr');
    if (!file_exists(elgg_get_config('dataroot') . 'elgg_solr')) {
        mkdir(elgg_get_config('dataroot') . 'elgg_solr');
    }
    $logtime = elgg_get_config('elgg_solr_restart_logtime');
    if (!$logtime) {
        $logtime = time();
    }
    $log = elgg_get_config('dataroot') . 'elgg_solr/' . $logtime . '.txt';
    elgg_set_plugin_setting('current_log', $logtime, 'elgg_solr');
    // initialize the csv
    $report = array('percent' => '', 'count' => 0, 'typecount' => 0, 'fullcount' => 0, 'type' => '', 'querytime' => 0, 'message' => 'Initializing Reindex', 'date' => date('Y-M-j H:i:s'), 'logtime' => $logtime);
    file_put_contents($log, json_encode($report) . "\n", FILE_APPEND);
    $debug = get_input('debug', false);
    if ($debug) {
        elgg_set_config('elgg_solr_debug', 1);
    }
    $registered_types = elgg_get_config('elgg_solr_reindex_annotation_options');
    if (!$registered_types) {
        $registered_types = _elgg_services()->hooks->trigger('elgg_solr:can_index', 'annotation', [], []);
    }
    if (!$registered_types) {
        return;
        // nothing to reindex
    }
    // build our options and cache them in case we need to restart it
    $cacheoptions = array('types' => $registered_types);
    $options = array();
    $time = elgg_get_config('elgg_solr_time_options');
    if ($time && is_array($time)) {
        $options['wheres'] = array("n_table.time_created >= {$time['starttime']}", "n_table.time_created <= {$time['endtime']}");
        $cacheoptions['starttime'] = $time['starttime'];
        $cacheoptions['endtime'] = $time['endtime'];
    }
    elgg_set_config('elgg_solr_nocommit', true);
    // tell our indexer not to commit right away
    $fullcount = 0;
    foreach ($registered_types as $types => $subtypes) {
        $options['limit'] = false;
        $restart_time = elgg_get_config('elgg_solr_restart_time');
        if ($restart_time) {
            elgg_set_config('elgg_solr_restart_time', false);
            $options['wheres'][1] = "n_table.time_created <= {$restart_time}";
        } elseif ($time['endtime']) {
            $options['wheres'][1] = "n_table.time_created <= {$time['endtime']}";
        }
        if ($subtypes) {
            if (!is_array($subtypes)) {
                $options['annotation_names'] = array($subtypes);
            } else {
                $options['annotation_names'] = $subtypes;
            }
        }
        // this iteration fixes a bug https://github.com/Elgg/Elgg/issues/7561
        // uses a custom getter which only fetches the guids in a single large-batch query
        // which is much more efficient than standard egef
        $batch_size = elgg_get_plugin_setting('reindex_batch_size', 'elgg_solr');
        $batch_size = $batch_size ?: 1000;
        $annotations = new ElggBatch('elgg_solr_get_annotation_ids', $options, null, $batch_size);
        $final_count = elgg_solr_get_annotation_ids(array_merge($options, array('count' => true)));
        elgg_set_config('elgg_solr_nocommit', true);
        // disable committing on each entity for performance
        $count = 0;
        $fetch_time_start = microtime(true);
        foreach ($annotations as $a) {
            elgg_solr_debug_log($a->id);
            $count++;
            $fullcount++;
            $first_entity = (bool) ($count % $batch_size == 1);
            $last_entity = (bool) ($count % $batch_size == 0);
            if ($first_entity) {
                // this is the first entity in the new batch
                $fetch_time = microtime(true) - $fetch_time_start;
                // the query time in seconds
            }
            $annotation = elgg_get_annotation_from_id($a->id);
            if ($annotation) {
                elgg_solr_index_annotation($annotation);
                elgg_set_config('elgg_solr_nocommit', true);
            }
            if (!($count % 200)) {
                $qtime = round($fetch_time, 4);
                $percent = round($count / $final_count * 100);
                if ($annotation) {
                    $restart_time = $annotation->time_created;
                }
                $report = array('percent' => $percent, 'count' => $count, 'typecount' => $final_count, 'fullcount' => $fullcount, 'type' => $type, 'querytime' => $qtime, 'message' => '', 'date' => date('Y-M-j H:i:s'), 'cacheoptions' => $cacheoptions, 'logtime' => $logtime, 'restart_time' => $restart_time);
                file_put_contents($log, json_encode($report) . "\n", FILE_APPEND);
                elgg_set_config('elgg_solr_nocommit', false);
                // push a commit on this one
                // check for the termination signal
                if ($logtime == elgg_get_plugin_setting('stop_reindex', 'elgg_solr')) {
                    $report = array('percent' => $percent, 'count' => $count, 'typecount' => $final_count, 'fullcount' => $fullcount, 'type' => $type, 'querytime' => $qtime, 'message' => 'Reindex has been stopped', 'date' => date('Y-M-j H:i:s'), 'cacheoptions' => $cacheoptions, 'logtime' => $logtime, 'restart_time' => $restart_time);
                    file_put_contents($log, json_encode($report) . "\n", FILE_APPEND);
                    elgg_solr_debug_log('Stopping reindex due to termination signal');
                    exit;
                }
            }
            if ($last_entity) {
                $fetch_time_start = microtime(true);
            }
        }
        // we've finished this type, unset from the cache options
        unset($cacheoptions['types'][$type]);
    }
    $report = array('percent' => '', 'count' => 0, 'typecount' => 0, 'fullcount' => 0, 'type' => '', 'querytime' => 0, 'message' => 'Reindex complete', 'date' => date('Y-M-j H:i:s'), 'logtime' => $logtime);
    file_put_contents($log, json_encode($report) . "\n", FILE_APPEND);
    elgg_set_plugin_setting('reindex_running', 0, 'elgg_solr');
    // commit the last of the entities
    $client = elgg_solr_get_client();
    $query = $client->createUpdate();
    $query->addCommit();
    try {
        $client->update($query);
    } catch (Exception $e) {
        elgg_solr_debug_log($e->getMessage());
        return false;
    }
    access_show_hidden_entities($show_hidden);
    elgg_set_ignore_access($ia);
}
Example #2
0
function elgg_solr_comment_reindex()
{
    set_time_limit(0);
    $ia = elgg_set_ignore_access(true);
    $show_hidden = access_get_show_hidden_status();
    access_show_hidden_entities(true);
    $debug = get_input('debug', false);
    if ($debug) {
        elgg_set_config('elgg_solr_debug', 1);
    }
    // lock the function
    elgg_set_plugin_setting('reindex_running', 1, 'elgg_solr');
    if (!file_exists(elgg_get_config('dataroot') . 'elgg_solr')) {
        mkdir(elgg_get_config('dataroot') . 'elgg_solr');
    }
    $time = time();
    $log = elgg_get_config('dataroot') . 'elgg_solr/' . $time . '.txt';
    elgg_set_plugin_setting('current_log', $time, 'elgg_solr');
    // initialize the csv
    $report = array('percent' => '', 'count' => 0, 'typecount' => 0, 'fullcount' => 0, 'type' => 'Comments', 'querytime' => 0, 'message' => 'Initializing Reindex', 'date' => date('Y-M-j H:i:s'));
    file_put_contents($log, json_encode($report) . "\n", FILE_APPEND);
    elgg_set_config('elgg_solr_nocommit', true);
    // tell our indexer not to commit right away
    $count = 0;
    // index comments
    $options = array('annotation_name' => 'generic_comment', 'limit' => false);
    $time = elgg_get_config('elgg_solr_time_options');
    if ($time && is_array($time)) {
        $options['annotation_created_time_lower'] = $time['starttime'];
        $options['annotation_created_time_upper'] = $time['endtime'];
    }
    $batch_size = elgg_get_plugin_setting('reindex_batch_size', 'elgg_solr');
    $comments = new ElggBatch('elgg_get_annotations', $options, null, $batch_size);
    $final_count = elgg_get_annotations(array_merge($options, array('count' => true)));
    $fetch_time_start = microtime(true);
    foreach ($comments as $comment) {
        $count++;
        $first_entity = (bool) ($count % $batch_size == 1);
        $last_entity = (bool) ($count % $batch_size == 0);
        if ($first_entity) {
            // this is the first entity in the new batch
            $fetch_time = microtime(true) - $fetch_time_start;
            // the query time in seconds
        }
        if ($count % 10000) {
            elgg_set_config('elgg_solr_nocommit', false);
            // push a commit on this one
        }
        if ($comment) {
            elgg_solr_index_annotation($comment);
            elgg_set_config('elgg_solr_nocommit', true);
        }
        if (!($count % 200)) {
            $qtime = round($fetch_time, 4);
            $percent = round($count / $final_count * 100);
            $report = array('percent' => $percent, 'count' => $count, 'typecount' => $final_count, 'fullcount' => $count, 'type' => 'Comments', 'querytime' => $qtime, 'message' => '', 'date' => date('Y-M-j H:i:s'));
            file_put_contents($log, json_encode($report) . "\n", FILE_APPEND);
        }
        if ($last_entity) {
            $fetch_time_start = microtime(true);
        }
    }
    if ($debug) {
        elgg_solr_debug_log($count . ' entities sent to Solr');
    }
    $report = array('percent' => 100, 'count' => $count, 'typecount' => $final_count, 'fullcount' => $count, 'type' => 'Comments', 'querytime' => 0, 'message' => 'Comment Reindex has been completed', 'date' => date('Y-M-j H:i:s'));
    file_put_contents($log, json_encode($report) . "\n", FILE_APPEND);
    elgg_set_ignore_access($ia);
    access_show_hidden_entities($show_hidden);
    elgg_set_plugin_setting('reindex_running', 0, 'elgg_solr');
}
Example #3
0
function elgg_solr_annotations_sync()
{
    $access = access_get_show_hidden_status();
    access_show_hidden_entities(true);
    $ids = elgg_get_config('elgg_solr_annotation_sync');
    if (!$ids) {
        return true;
    }
    foreach ($ids as $id) {
        $annotation = elgg_get_annotation_from_id($id);
        if (!$annotation) {
            continue;
        }
        elgg_solr_index_annotation($annotation);
    }
    $delete_ids = elgg_get_config('elgg_solr_annotation_delete');
    if (is_array($delete_ids)) {
        foreach ($delete_ids as $g => $foo) {
            $client = elgg_solr_get_client();
            $query = $client->createUpdate();
            $query->addDeleteById('annotation:' . $g);
            $query->addCommit();
            try {
                $client->update($query);
            } catch (Exception $exc) {
                elgg_get_site_entity()->annotate('elgg_solr_delete_cache', 'annotation:' . $g, ACCESS_PUBLIC);
            }
        }
    }
    access_show_hidden_entities($access);
}