function elgg_solr_annotation_reindex() { set_time_limit(0); $ia = elgg_set_ignore_access(true); $show_hidden = access_get_show_hidden_status(); access_show_hidden_entities(true); // lock the function elgg_set_plugin_setting('reindex_running', 1, 'elgg_solr'); if (!file_exists(elgg_get_config('dataroot') . 'elgg_solr')) { mkdir(elgg_get_config('dataroot') . 'elgg_solr'); } $logtime = elgg_get_config('elgg_solr_restart_logtime'); if (!$logtime) { $logtime = time(); } $log = elgg_get_config('dataroot') . 'elgg_solr/' . $logtime . '.txt'; elgg_set_plugin_setting('current_log', $logtime, 'elgg_solr'); // initialize the csv $report = array('percent' => '', 'count' => 0, 'typecount' => 0, 'fullcount' => 0, 'type' => '', 'querytime' => 0, 'message' => 'Initializing Reindex', 'date' => date('Y-M-j H:i:s'), 'logtime' => $logtime); file_put_contents($log, json_encode($report) . "\n", FILE_APPEND); $debug = get_input('debug', false); if ($debug) { elgg_set_config('elgg_solr_debug', 1); } $registered_types = elgg_get_config('elgg_solr_reindex_annotation_options'); if (!$registered_types) { $registered_types = _elgg_services()->hooks->trigger('elgg_solr:can_index', 'annotation', [], []); } if (!$registered_types) { return; // nothing to reindex } // build our options and cache them in case we need to restart it $cacheoptions = array('types' => $registered_types); $options = array(); $time = elgg_get_config('elgg_solr_time_options'); if ($time && is_array($time)) { $options['wheres'] = array("n_table.time_created >= {$time['starttime']}", "n_table.time_created <= {$time['endtime']}"); $cacheoptions['starttime'] = $time['starttime']; $cacheoptions['endtime'] = $time['endtime']; } elgg_set_config('elgg_solr_nocommit', true); // tell our indexer not to commit right away $fullcount = 0; foreach ($registered_types as $types => $subtypes) { $options['limit'] = false; $restart_time = elgg_get_config('elgg_solr_restart_time'); if ($restart_time) { elgg_set_config('elgg_solr_restart_time', false); $options['wheres'][1] = "n_table.time_created <= {$restart_time}"; } elseif ($time['endtime']) { $options['wheres'][1] = "n_table.time_created <= {$time['endtime']}"; } if ($subtypes) { if (!is_array($subtypes)) { $options['annotation_names'] = array($subtypes); } else { $options['annotation_names'] = $subtypes; } } // this iteration fixes a bug https://github.com/Elgg/Elgg/issues/7561 // uses a custom getter which only fetches the guids in a single large-batch query // which is much more efficient than standard egef $batch_size = elgg_get_plugin_setting('reindex_batch_size', 'elgg_solr'); $batch_size = $batch_size ?: 1000; $annotations = new ElggBatch('elgg_solr_get_annotation_ids', $options, null, $batch_size); $final_count = elgg_solr_get_annotation_ids(array_merge($options, array('count' => true))); elgg_set_config('elgg_solr_nocommit', true); // disable committing on each entity for performance $count = 0; $fetch_time_start = microtime(true); foreach ($annotations as $a) { elgg_solr_debug_log($a->id); $count++; $fullcount++; $first_entity = (bool) ($count % $batch_size == 1); $last_entity = (bool) ($count % $batch_size == 0); if ($first_entity) { // this is the first entity in the new batch $fetch_time = microtime(true) - $fetch_time_start; // the query time in seconds } $annotation = elgg_get_annotation_from_id($a->id); if ($annotation) { elgg_solr_index_annotation($annotation); elgg_set_config('elgg_solr_nocommit', true); } if (!($count % 200)) { $qtime = round($fetch_time, 4); $percent = round($count / $final_count * 100); if ($annotation) { $restart_time = $annotation->time_created; } $report = array('percent' => $percent, 'count' => $count, 'typecount' => $final_count, 'fullcount' => $fullcount, 'type' => $type, 'querytime' => $qtime, 'message' => '', 'date' => date('Y-M-j H:i:s'), 'cacheoptions' => $cacheoptions, 'logtime' => $logtime, 'restart_time' => $restart_time); file_put_contents($log, json_encode($report) . "\n", FILE_APPEND); elgg_set_config('elgg_solr_nocommit', false); // push a commit on this one // check for the termination signal if ($logtime == elgg_get_plugin_setting('stop_reindex', 'elgg_solr')) { $report = array('percent' => $percent, 'count' => $count, 'typecount' => $final_count, 'fullcount' => $fullcount, 'type' => $type, 'querytime' => $qtime, 'message' => 'Reindex has been stopped', 'date' => date('Y-M-j H:i:s'), 'cacheoptions' => $cacheoptions, 'logtime' => $logtime, 'restart_time' => $restart_time); file_put_contents($log, json_encode($report) . "\n", FILE_APPEND); elgg_solr_debug_log('Stopping reindex due to termination signal'); exit; } } if ($last_entity) { $fetch_time_start = microtime(true); } } // we've finished this type, unset from the cache options unset($cacheoptions['types'][$type]); } $report = array('percent' => '', 'count' => 0, 'typecount' => 0, 'fullcount' => 0, 'type' => '', 'querytime' => 0, 'message' => 'Reindex complete', 'date' => date('Y-M-j H:i:s'), 'logtime' => $logtime); file_put_contents($log, json_encode($report) . "\n", FILE_APPEND); elgg_set_plugin_setting('reindex_running', 0, 'elgg_solr'); // commit the last of the entities $client = elgg_solr_get_client(); $query = $client->createUpdate(); $query->addCommit(); try { $client->update($query); } catch (Exception $e) { elgg_solr_debug_log($e->getMessage()); return false; } access_show_hidden_entities($show_hidden); elgg_set_ignore_access($ia); }
function elgg_solr_comment_reindex() { set_time_limit(0); $ia = elgg_set_ignore_access(true); $show_hidden = access_get_show_hidden_status(); access_show_hidden_entities(true); $debug = get_input('debug', false); if ($debug) { elgg_set_config('elgg_solr_debug', 1); } // lock the function elgg_set_plugin_setting('reindex_running', 1, 'elgg_solr'); if (!file_exists(elgg_get_config('dataroot') . 'elgg_solr')) { mkdir(elgg_get_config('dataroot') . 'elgg_solr'); } $time = time(); $log = elgg_get_config('dataroot') . 'elgg_solr/' . $time . '.txt'; elgg_set_plugin_setting('current_log', $time, 'elgg_solr'); // initialize the csv $report = array('percent' => '', 'count' => 0, 'typecount' => 0, 'fullcount' => 0, 'type' => 'Comments', 'querytime' => 0, 'message' => 'Initializing Reindex', 'date' => date('Y-M-j H:i:s')); file_put_contents($log, json_encode($report) . "\n", FILE_APPEND); elgg_set_config('elgg_solr_nocommit', true); // tell our indexer not to commit right away $count = 0; // index comments $options = array('annotation_name' => 'generic_comment', 'limit' => false); $time = elgg_get_config('elgg_solr_time_options'); if ($time && is_array($time)) { $options['annotation_created_time_lower'] = $time['starttime']; $options['annotation_created_time_upper'] = $time['endtime']; } $batch_size = elgg_get_plugin_setting('reindex_batch_size', 'elgg_solr'); $comments = new ElggBatch('elgg_get_annotations', $options, null, $batch_size); $final_count = elgg_get_annotations(array_merge($options, array('count' => true))); $fetch_time_start = microtime(true); foreach ($comments as $comment) { $count++; $first_entity = (bool) ($count % $batch_size == 1); $last_entity = (bool) ($count % $batch_size == 0); if ($first_entity) { // this is the first entity in the new batch $fetch_time = microtime(true) - $fetch_time_start; // the query time in seconds } if ($count % 10000) { elgg_set_config('elgg_solr_nocommit', false); // push a commit on this one } if ($comment) { elgg_solr_index_annotation($comment); elgg_set_config('elgg_solr_nocommit', true); } if (!($count % 200)) { $qtime = round($fetch_time, 4); $percent = round($count / $final_count * 100); $report = array('percent' => $percent, 'count' => $count, 'typecount' => $final_count, 'fullcount' => $count, 'type' => 'Comments', 'querytime' => $qtime, 'message' => '', 'date' => date('Y-M-j H:i:s')); file_put_contents($log, json_encode($report) . "\n", FILE_APPEND); } if ($last_entity) { $fetch_time_start = microtime(true); } } if ($debug) { elgg_solr_debug_log($count . ' entities sent to Solr'); } $report = array('percent' => 100, 'count' => $count, 'typecount' => $final_count, 'fullcount' => $count, 'type' => 'Comments', 'querytime' => 0, 'message' => 'Comment Reindex has been completed', 'date' => date('Y-M-j H:i:s')); file_put_contents($log, json_encode($report) . "\n", FILE_APPEND); elgg_set_ignore_access($ia); access_show_hidden_entities($show_hidden); elgg_set_plugin_setting('reindex_running', 0, 'elgg_solr'); }
function elgg_solr_annotations_sync() { $access = access_get_show_hidden_status(); access_show_hidden_entities(true); $ids = elgg_get_config('elgg_solr_annotation_sync'); if (!$ids) { return true; } foreach ($ids as $id) { $annotation = elgg_get_annotation_from_id($id); if (!$annotation) { continue; } elgg_solr_index_annotation($annotation); } $delete_ids = elgg_get_config('elgg_solr_annotation_delete'); if (is_array($delete_ids)) { foreach ($delete_ids as $g => $foo) { $client = elgg_solr_get_client(); $query = $client->createUpdate(); $query->addDeleteById('annotation:' . $g); $query->addCommit(); try { $client->update($query); } catch (Exception $exc) { elgg_get_site_entity()->annotate('elgg_solr_delete_cache', 'annotation:' . $g, ACCESS_PUBLIC); } } } access_show_hidden_entities($access); }