if (isset($_SESSION['sniff_navigator']) && $_SESSION['sniff_navigator']!="checked") { $_SESSION['sniff_navigator']=Security::remove_XSS($_POST['sniff_navigator']); $_SESSION['sniff_screen_size_w']=Security::remove_XSS($_POST['sniff_navigator_screen_size_w']); $_SESSION['sniff__screen_size_h']=Security::remove_XSS($_POST['sniff_navigator_screen_size_h']); $_SESSION['sniff_type_mimetypes']=Security::remove_XSS($_POST['sniff_navigator_type_mimetypes']); $_SESSION['sniff_suffixes_mimetypes']=Security::remove_XSS($_POST['sniff_navigator_suffixes_mimetypes']); $_SESSION['sniff_list_plugins']=Security::remove_XSS($_POST['sniff_navigator_list_plugins']); $_SESSION['sniff_check_some_activex']=Security::remove_XSS($_POST['sniff_navigator_check_some_activex']); $_SESSION['sniff_check_some_plugins']=Security::remove_XSS($_POST['sniff_navigator_check_some_plugins']); $_SESSION['sniff_java']=Security::remove_XSS($_POST['sniff_navigator_java']); $_SESSION['sniff_java_sun_ver']=Security::remove_XSS($_POST['sniff_navigator_java_sun_ver']); } */ /* MAIN CODE */ $controller = new IndexManager(get_lang('MyCourses')); // Main courses and session list $courses_and_sessions = $controller->return_courses_and_sessions($user_id); //Show the chamilo mascot if (empty($courses_and_sessions) && !isset($_GET['history'])) { $controller->tpl->assign('welcome_to_course_block', $controller->return_welcome_to_course_block()); } $controller->tpl->assign('content', $courses_and_sessions); if (api_get_setting('allow_browser_sniffer') == 'true') { if ($_SESSION['sniff_navigator']!="checked") {
$cidReset = true; require_once 'main/inc/global.inc.php'; require_once 'main/chat/chat_functions.lib.php'; //require_once 'main/auth/external_login/facebook.inc.php'; // The section (for the tabs). $this_section = SECTION_CAMPUS; $header_title = null; if (!api_is_anonymous()) { $header_title = " "; } // Facebook connexion, if activated /*if (api_is_facebook_auth_activated() && !api_get_user_id()) { facebookConnect(); } */ $controller = new IndexManager($header_title); //Actions $loginFailed = isset($_GET['loginFailed']) ? true : isset($loginFailed); if (!empty($_GET['logout'])) { $redirect = !empty($_GET['no_redirect']) ? false : true; $controller->logout($redirect); } /* Table definitions */ /* Constants and CONFIGURATION parameters */ /** @todo these configuration settings should move to the Chamilo config settings. */ /** Defines wether or not anonymous visitors can see a list of the courses on the Chamilo homepage that are open to the world. */ $_setting['display_courses_to_anonymous_users'] = 'true'; /* LOGIN */ /** * Registers in the track_e_default table (view in important activities in admin * interface) a possible attempted break in, sending auth data through get.
*/ function changeMyCoursesView(inView) { $.cookie("defaultMyCourseView"+userId, inView, { expires: 365 }); if (inView == ' . IndexManager::VIEW_BY_SESSION . ') { $("#viewBySession").addClass("btn-primary"); $("#viewByDefault").removeClass("btn-primary"); } else { $("#viewByDefault").addClass("btn-primary"); $("#viewBySession").removeClass("btn-primary"); } } </script> '; } $controller = new IndexManager(get_lang('MyCourses')); // Main courses and session list //$courseAndSessions = $controller->returnCoursesAndSessions($userId); // Main courses and session list if (isset($_COOKIE['defaultMyCourseView' . $userId]) && $_COOKIE['defaultMyCourseView' . $userId] == IndexManager::VIEW_BY_SESSION && $displayMyCourseViewBySessionLink) { $courseAndSessions = $controller->returnCoursesAndSessionsViewBySession($userId); IndexManager::setDefaultMyCourseView(IndexManager::VIEW_BY_SESSION, $userId); } else { $courseAndSessions = $controller->returnCoursesAndSessions($userId); IndexManager::setDefaultMyCourseView(IndexManager::VIEW_BY_DEFAULT, $userId); } // if teacher, session coach or admin, display the button to change te course view if ($displayMyCourseViewBySessionLink && (api_is_drh() || api_is_course_coach() || api_is_platform_admin() || api_is_session_admin() || api_is_teacher())) { $courseAndSessions['html'] = "<div class='view-by-session-link'>\n\t\t<div class='btn-group pull-right'>\n\t\t<a class='btn btn-default' id='viewByDefault' href='user_portal.php' onclick='changeMyCoursesView(\"" . IndexManager::VIEW_BY_DEFAULT . "\")'>\n\t\t" . get_lang('MyCoursesDefaultView') . "\n\t\t</a>\n\t\t<a class='btn btn-default' id='viewBySession' href='user_portal.php' onclick='changeMyCoursesView(\"" . IndexManager::VIEW_BY_SESSION . "\")'>\n\t\t" . get_lang('MyCoursesSessionView') . "\n\t\t</a>\n\t\t</div>\n\t</div><br /><br />\n\t" . $courseAndSessions['html']; } // Check if a user is enrolled only in one course for going directly to the course after the login.
/** * Ensure the indexes to the database * * @param boolean $delete (optional) true by default drop unknown and old indexes * * @return boolean * * @api */ public function ensureIndexes($delete = true) { $indexManager = new IndexManager($this); return $indexManager->commit($delete); }
/** * Extracts all phrases (sequences of adjacent words) from $string. Does * not extract terms within those phrase. Array key indicates position * of phrase * * @param string $string subject to extract phrases from * @param string $lang locale tag for stemming * @param string $index_name name of index to be used as a reference * when extracting phrases * @param bool $exact_match whether the match has to be exact or not * @param int $threshold roughly causes a stop to extracting more phrases * if exceed $threshold (still might get more than $threshold back, only * when detect have more stop) * @return array of phrases */ static function extractPhrases($string, $lang = NULL, $index_name = NULL, $exact_match = false, $threshold = 10) { if (isset(self::$programming_language_map[$lang])) { $control_word = self::$programming_language_map[$lang] . self::CONTROL_WORD_INDICATOR; $string = trim(substr($string, strlen($control_word) + 1)); } else { self::canonicalizePunctuatedTerms($string, $lang); } $terms = self::stemCharGramSegment($string, $lang); $num = count($terms); if ($index_name == NULL || $num <= 1) { return $terms; } if (count($terms) > MAX_QUERY_TERMS) { $first_terms = array_slice($terms, 0, MAX_QUERY_TERMS); $whole_phrase = implode(" ", $first_terms); } else { $whole_phrase = implode(" ", $terms); $first_terms =& $terms; } if ($exact_match) { return $terms; /* for exact phrase search do not use suffix tree stuff for now */ } $count_whole_phrase = IndexManager::numDocsTerm($whole_phrase, $index_name, $threshold); if ($count_whole_phrase >= $threshold || $num > SUFFIX_TREE_THRESHOLD) { $terms = array($whole_phrase, $terms[0]); return $terms; } else { if ($count_whole_phrase > 0) { foreach ($terms as $term) { $count_term = IndexManager::numDocsTerm($term, $index_name, 5 * $threshold); if ($count_term > 50 * $count_whole_phrase) { $terms = array($whole_phrase, $terms[0]); return $terms; } } } else { if ($num > 2) { $start_terms = $first_terms; $last_term = array_pop($start_terms); $start_phrase = implode(" ", $start_terms); $count_start = IndexManager::numDocsTerm($start_phrase, $index_name, $threshold); if ($count_start >= $threshold) { $terms = array($start_phrase, $last_term, $terms[0]); return $terms; } $end_terms = $first_terms; $first_term = array_shift($end_terms); $end_phrase = implode(" ", $end_terms); $count_end = IndexManager::numDocsTerm($end_phrase, $index_name, $threshold); if ($count_end >= $threshold) { $terms = array($first_term, $end_phrase); return $terms; } } } } if ($index_name != 'feed' && IndexManager::getVersion($index_name) == 0) { return $terms; //old style index before max phrase extraction } return $terms; }
exit; } $userResetPasswordSetting = api_get_setting('user_reset_password'); if ($userResetPasswordSetting === 'true') { $user = Database::getManager()->getRepository('ChamiloUserBundle:User')->find($user['uid']); Login::sendResetEmail($user, true); if (CustomPages::enabled() && CustomPages::exists(CustomPages::INDEX_UNLOGGED)) { CustomPages::display(CustomPages::INDEX_UNLOGGED, ['info' => get_lang('CheckYourEmailAndFollowInstructions')]); exit; } header('Location: ' . api_get_path(WEB_PATH)); exit; } $messageText = Login::handle_encrypted_password($user, true); if (CustomPages::enabled() && CustomPages::exists(CustomPages::INDEX_UNLOGGED)) { CustomPages::display(CustomPages::INDEX_UNLOGGED, ['info' => $messageText]); exit; } Display::addFlash(Display::return_message($messageText)); header('Location: ' . api_get_path(WEB_PATH)); exit; } if (CustomPages::enabled() && CustomPages::exists(CustomPages::LOST_PASSWORD)) { CustomPages::display(CustomPages::LOST_PASSWORD, ['form' => $form->returnForm()]); exit; } $controller = new IndexManager($tool_name); $controller->set_login_form(); $controller->tpl->assign('form', $form->returnForm()); $template = $controller->tpl->get_template('auth/lost_password.tpl'); $controller->tpl->display($template);
/** * Returns the number of documents in an index that a phrase occurs in. * If it occurs in more than threshold documents then cut off search. * * @param string $phrase to look up in index * @param int $threshold once count in posting list for any word * reaches to threshold then return the number * @param string $index_name selected index for search engine * @param string $lang locale tag for the query * @return int number of documents phrase occurs in */ static function numDocsIndex($phrase, $threshold, $index_name, $lang) { PhraseParser::canonicalizePunctuatedTerms($phrase, $lang); $terms = PhraseParser::stemCharGramSegment($phrase, $lang); $num = count($terms); if ($index_name == NULL) { return 0; } if (count($terms) > MAX_QUERY_TERMS) { $terms = array_slice($terms, 0, MAX_QUERY_TERMS); } $whole_phrase = implode(" ", $terms); return IndexManager::numDocsTerm($whole_phrase, $index_name, $threshold); }
/** * Hook function used by currentDocsWithWord to return the current block * of docs if it is not cached * * @return mixed doc ids and score if there are docs left, -1 otherwise */ function findDocsWithWord() { if ($this->current_generation >= $this->num_generations || $this->current_generation == $this->num_generations - 1 && $this->current_offset > $this->last_offset) { return -1; } $pre_results = array(); $this->next_offset = $this->current_offset; $index = IndexManager::getIndex($this->index_name); $index->setCurrentShard($this->current_generation, true); //the next call also updates next offset $shard = $index->getCurrentShard(); $this->getShardInfo($this->current_generation); $doc_key_len = IndexShard::DOC_KEY_LEN; $num_docs_or_links = $shard->num_docs + $shard->num_link_docs; $pre_results = array(); $num_docs_so_far = 0; do { if ($this->next_offset >= $this->last_offset) { break; } $posting = packPosting($this->next_offset >> 4, array(1)); list($doc_id, $num_keys, $item) = $shard->makeItem($posting, $num_docs_or_links); if ($num_keys % 2 == 0) { $num_keys++; } $this->next_offset += ($num_keys + 1) * $doc_key_len; $pre_results[$doc_id] = $item; $num_docs_so_far++; } while ($num_docs_so_far < $this->results_per_block); $results = array(); $doc_key_len = IndexShard::DOC_KEY_LEN; $filter = $this->filter == NULL ? array() : $this->filter; foreach ($pre_results as $keys => $data) { $host_key = substr($keys, self::HOST_KEY_POS, self::KEY_LEN); if (in_array($host_key, $filter)) { continue; } $data[self::KEY] = $keys; // inlinks is the domain of the inlink list($hash_url, $data[self::HASH], $data[self::INLINKS]) = str_split($keys, $doc_key_len); $data[self::CRAWL_TIME] = $this->index_name; $results[$keys] = $data; } $this->count_block = count($results); if ($this->current_generation == $this->num_generations - 1 && $results == array()) { $results = NULL; } $this->pages = $results; return $results; }
/** * Using the supplied $word_structs, contructs an iterator for getting * results to a query * * @param array $word_structs an array of word_structs. Here a word_struct * is an associative array with at least the following fields * KEYS -- an array of word keys * QUOTE_POSITIONS -- an array of positions of words that appreared in * quotes (so need to be matched exactly) * DISALLOW_PHRASES -- an array of words the document must not contain * WEIGHT -- a weight to multiple scores returned from this iterator by * INDEX_NAME -- an index timestamp to get results from * @param array& $filter an array of hashes of domains to filter from * results * and then potentially restored in cache * @param int $raw ($raw == 0) normal grouping, ($raw == 1) * no grouping done on data also no summaries returned (only lookup * info), $raw > 1 return summaries but no grouping * @param int $to_retrieve number of items to retrieve from location in * in interator * @param array $queue_servers a list of urls of yioop machines which might * be used during lookup * @param string $original_query if set, the orginal query that corresponds * to $word_structs * @param string $save_timestamp_name if this timestamp is non empty, then * when making iterator get sub-iterators to advance to gen doc_offset * stored with respect to save_timestamp if exists. * @param bool $limit_news if true the number of media:news items to * allow in search results is limited to WordIterator::LIMIT_NEWS_COUNT * * @return &object an iterator for iterating through results to the * query */ function getQueryIterator($word_structs, &$filter, $raw, &$to_retrieve, $queue_servers = array(), $original_query = "", $save_timestamp_name = "", $limit_news = true) { $iterators = array(); $total_iterators = 0; $network_flag = false; $min_group_flag = false; $min_group_override = false; if ($queue_servers != array() && !$this->isSingleLocalhost($queue_servers)) { $network_flag = true; $total_iterators = 1; if (!in_array(NAME_SERVER, $queue_servers)) { $queue_servers[] = NAME_SERVER; //name server might still have news } $num_servers = count($queue_servers); if ((!isset($this->index_name) || !$this->index_name) && isset($word_structs[0]["INDEX_NAME"])) { $index_name = $word_structs[0]["INDEX_NAME"]; } else { $index_name = $this->index_name; } $iterators[0] = new NetworkIterator($original_query, $queue_servers, $index_name, $filter, $save_timestamp_name, $limit_news); } if (!$network_flag) { $doc_iterate_hashes = array(substr(crawlHashWord("site:any"), 0, 9), substr(crawlHash("site:any"), 0, 9), substr(crawlHashWord("site:doc"), 0, 9), substr(crawlHash("site:doc"), 0, 9)); if ($save_timestamp_name != "") { // used for archive crawls of crawl mixes $save_file = CRAWL_DIR . '/schedules/' . self::save_point . $save_timestamp_name . ".txt"; if (file_exists($save_file)) { $save_point = unserialize(file_get_contents($save_file)); } $save_count = 0; } foreach ($word_structs as $word_struct) { if (!is_array($word_struct)) { continue; } $word_keys = $word_struct["KEYS"]; $distinct_word_keys = array(); $seen_keys = array(); foreach ($word_keys as $wkey) { if (is_string($wkey) || is_string($wkey[0])) { $tmp_key = is_string($wkey) ? $wkey : $wkey[0]; if (!isset($seen_keys[$tmp_key])) { $seen_keys[$tmp_key] = true; $distinct_word_keys[] = $wkey; } } else { $distinct_word_keys[] = $wkey; } } $quote_positions = $word_struct["QUOTE_POSITIONS"]; $disallow_keys = $word_struct["DISALLOW_KEYS"]; $index_name = $word_struct["INDEX_NAME"]; $weight = $word_struct["WEIGHT"]; $num_word_keys = count($word_keys); $total_iterators = count($distinct_word_keys); $word_iterators = array(); $word_iterator_map = array(); if ($num_word_keys < 1) { continue; } $sum = 0; for ($i = 0; $i < $total_iterators; $i++) { $current_key = is_string($distinct_word_keys[$i]) ? $distinct_word_keys[$i] : (is_string($distinct_word_keys[$i][0]) ? $distinct_word_keys[$i][0] : $distinct_word_keys[$i][0][0]); if (!is_string($current_key)) { $current_key = $current_key[0]; } if (in_array(substr($current_key, 0, 9), $doc_iterate_hashes)) { $word_iterators[$i] = new DocIterator($index_name, $filter, $to_retrieve); $min_group_override = true; } else { //can happen if exact phrase search suffix approach used if (isset($distinct_word_keys[$i][0][0]) && is_array($distinct_word_keys[$i][0][0])) { $distinct_keys = array($distinct_word_keys[$i][0][1]); } else { if (isset($distinct_word_keys[$i][0]) && is_array($distinct_word_keys[$i][0])) { $distinct_keys = $distinct_word_keys[$i]; } else { $distinct_keys = array($distinct_word_keys[$i]); } } $out_keys = array(); $old_distinct_key_id = ""; foreach ($distinct_keys as $distinct_key) { if (is_array($distinct_key)) { if (!isset($distinct_key[2]) && isset($distinct_key[1])) { $distinct_keys[] = $distinct_key[1]; } $shift = isset($distinct_key[1]) ? $distinct_key[1] : 0; $mask = isset($distinct_key[2]) ? $distinct_key[2] : "" . ""; if (isset($distinct_key[3])) { $old_distinct_key_id = unbase64Hash($distinct_key[3]); } $distinct_key_id = unbase64Hash($distinct_key[0]); } else { $shift = 0; $mask = "" . ""; $distinct_key_id = unbase64Hash($distinct_key); } $lookup_cutoff = max(MIN_RESULTS_TO_GROUP, $to_retrieve); $info = IndexManager::getWordInfo($index_name, $distinct_key_id, $shift, $mask); if ($old_distinct_key_id != "") { $old_info = IndexManager::getWordInfo($index_name, $old_distinct_key_id, $shift, $mask); if ($info !== false && $old_info !== false) { $info = array_merge($info, $old_info); } else { if ($old_info !== false) { $info = $old_info; } } } if ($info != array()) { $tmp_keys = arrayColumnCount($info, 4, 3); $sum += array_sum($tmp_keys); $out_keys = array_merge($out_keys, $tmp_keys); } if ($sum > $lookup_cutoff) { break; } } arsort($out_keys); $out_keys = array_keys(array_slice($out_keys, 0, 50)); $tmp_word_iterators = array(); $m = 0; foreach ($out_keys as $distinct_key) { $tmp_word_iterators[$m] = new WordIterator($distinct_key, $index_name, true, $filter, $to_retrieve, $limit_news); if ($tmp_word_iterators[$m]->dictionary_info != array() || $tmp_word_iterators[$m]->feed_count > 0) { $min_group_override = true; $m++; } else { unset($tmp_word_iterators[$m]); } } if ($m == 1) { $word_iterators[$i] = $tmp_word_iterators[0]; } else { $word_iterators[$i] = new DisjointIterator($tmp_word_iterators); } } foreach ($word_keys as $index => $key) { if (isset($distinct_word_keys[$i]) && $key == $distinct_word_keys[$i]) { $word_iterator_map[$index] = $i; } } } $num_disallow_keys = count($disallow_keys); if ($num_disallow_keys > 0) { for ($i = 0; $i < $num_disallow_keys; $i++) { $disallow_iterator = new WordIterator($disallow_keys[$i], $index_name, false, $filter); $word_iterators[$num_word_keys + $i] = new NegationIterator($disallow_iterator); } } $num_word_keys += $num_disallow_keys; if ($num_word_keys == 1 && $weight == 1) { $base_iterator = $word_iterators[0]; } else { $base_iterator = new IntersectIterator($word_iterators, $word_iterator_map, $quote_positions, $weight); $min_group_flag = true; if ($save_timestamp_name == "") { $base_iterator->sync_timer_on = true; } else { $base_iterator->sync_timer_on = false; } } if ($save_timestamp_name != "") { if (isset($save_point[$save_count]) && $save_point[$save_count] != -1) { $base_iterator->advance($save_point[$save_count]); } $save_count++; } $iterators[] = $base_iterator; } } $num_iterators = count($iterators); //if network_flag should be 1 if ($num_iterators < 1) { return NULL; } else { if ($num_iterators == 1) { $union_iterator = $iterators[0]; } else { $union_iterator = new UnionIterator($iterators); } } $raw = intval($raw); if ($raw > 0) { $group_iterator = $union_iterator; } else { $group_iterator = new GroupIterator($union_iterator, $total_iterators, $this->current_machine, $network_flag); } if ($network_flag) { $union_iterator->results_per_block = ceil(SERVER_ALPHA * $group_iterator->results_per_block / $num_servers); } else { if ($save_timestamp_name != "") { $group_iterator->save_iterators = $iterators; } else { if ($min_group_flag && !$min_group_override) { $group_iterator->results_per_block = max(MIN_RESULTS_TO_GROUP / 20, 1); $to_retrieve = -1; } } } return $group_iterator; }
/** * Determines the offset into the summaries WebArchiveBundle and generation * of the provided url (or hash_url) so that the info:url * (info:base64_hash_url) summary can be retrieved. This assumes of course * that the info:url meta word has been stored. * * @param string $url_or_key either info:base64_hash_url or just a url to * lookup * @param string $index_name index into which to do the lookup * @param bool $is_key whether the string is info:base64_hash_url or just a * url * @return array (offset, generation) into the web archive bundle */ function lookupSummaryOffsetGeneration($url_or_key, $index_name = "", $is_key = false) { if ($index_name == "") { $index_name = $this->index_name; } $index_archive = IndexManager::getIndex($index_name); if (!$index_archive) { return false; } $num_retrieved = 0; $pages = array(); $summary_offset = NULL; if (!isset($index_archive->generation_info['ACTIVE'])) { return false; } $mask = ""; $num_generations = $index_archive->generation_info['ACTIVE']; $hash_key = $is_key ? crawlHashWord($url_or_key, true, $mask) : crawlHashWord("info:{$url_or_key}", true, $mask); $info = IndexManager::getWordInfo($index_name, $hash_key, 0, $mask, 1); if (!isset($info[0][4])) { return false; } $word_iterator = new WordIterator($info[0][4], $index_name, true); if (is_array($next_docs = $word_iterator->nextDocsWithWord())) { foreach ($next_docs as $doc_key => $doc_info) { $summary_offset = $doc_info[CrawlConstants::SUMMARY_OFFSET]; $generation = $doc_info[CrawlConstants::GENERATION]; $index_archive->setCurrentShard($generation, true); $page = @$index_archive->getPage($summary_offset); $num_retrieved++; if ($num_retrieved >= 1) { break; } } if ($num_retrieved == 0) { return false; } } else { return false; } return array($summary_offset, $generation); }
/** * Returns the number of document that a given term or phrase appears in * in the given index * * @param string $term_or_phrase what to look up in the indexes dictionary * no mask is used for this look up * @param string $index_name index to look up term or phrase in * @param int $threshold if set and positive then once threshold many * documents are found the search for more documents to add to the * total is stopped * @return int number of documents */ static function numDocsTerm($term_or_phrase, $index_name, $threshold = -1) { $index = IndexManager::getIndex($index_name); if (!$index->dictionary) { return false; } $pos = -1; $total_num_docs = 0; $hashes = allCrawlHashPaths($term_or_phrase, array(), array(), true); if (!is_array($hashes)) { $hashes = array($hashes); } foreach ($hashes as $hash) { if (is_array($hash)) { $dictionary_info = IndexManager::getWordInfo($index_name, $hash[0], $hash[1], $hash[2], $threshold); } else { $dictionary_info = IndexManager::getWordInfo($index_name, $hash); } $num_generations = count($dictionary_info); $start = isset($dictionary_info[-1]) ? -1 : 0; $end = $start == -1 ? $num_generations - 1 : $num_generations; for ($i = $start; $i < $end; $i++) { list(, , , $num_docs) = $dictionary_info[$i]; $total_num_docs += $num_docs; if ($threshold > 0 && $total_num_docs > $threshold) { return $total_num_docs; } } } return $total_num_docs; }
/** * Prints information about $num many postings beginning at the * provided $generation and $offset * * @param string $archive_path the path of a directory that holds * an IndexArchiveBundle * @param int $generation which index shard to use * @param int $offset offset into posting lists for that shard * @param int $num how many postings to print info for */ function outputPostingInfo($archive_path, $generation, $offset, $num = 1) { $bundle_name = $this->getArchiveName($archive_path); echo "\nBundle Name: {$bundle_name}\n"; $archive_type = $this->getArchiveKind($archive_path); echo "Bundle Type: {$archive_type}\n"; echo "Generation: {$generation}\n"; echo "Offset: {$offset}\n"; if (strcmp($archive_type, "IndexArchiveBundle") != 0) { $this->badFormatMessageAndExit($archive_path, "index"); } $index_timestamp = substr($archive_path, strpos($archive_path, self::index_data_base_name) + strlen(self::index_data_base_name)); $index = IndexManager::getIndex($index_timestamp); $index->setCurrentShard($generation, true); $shard = $index->getCurrentShard(); $next = $offset >> 2; $raw_postings = array(); $doc_indexes = array(); $documents = array(); for ($i = 0; $i < $num; $i++) { $dummy_offset = 0; $posting_start = $next; $posting_end = $next; $old_offset = $next << 2; $old_start = $next << 2; $old_end = $next << 2; $tmp = $shard->getPostingAtOffset($next, $posting_start, $posting_end); $next = $posting_end + 1; if (!$tmp) { break; } $documents = array_merge($documents, $shard->getPostingsSlice($old_offset, $old_start, $old_end, 1)); $raw_postings[] = $tmp; $post_array = unpackPosting($tmp, $dummy_offset); $doc_indexes[] = $post_array[0]; } $end_offset = $next << 2; echo "Offset After Returned Results: {$end_offset}\n\n"; if (!$documents || ($count = count($documents)) < 1) { echo "No documents correspond to generation and offset given\n\n"; exit; } $document_word = $count == 1 ? "Document" : "Documents"; echo "{$count} {$document_word} Found:\n"; echo str_pad("", $count + 1, "=") . "================\n"; $j = 0; foreach ($documents as $key => $document) { echo "\nDOC ID: " . toHexString($key); echo "\nTYPE: " . ($document[self::IS_DOC] ? "Document" : "Link"); echo "\nDOC INDEX: " . $doc_indexes[$j]; $summary_offset = $document[self::SUMMARY_OFFSET]; echo "\nSUMMARY OFFSET: " . $summary_offset; echo "\nSCORE: " . $document[self::SCORE]; echo "\nDOC RANK: " . $document[self::DOC_RANK]; echo "\nRELEVANCE: " . $document[self::RELEVANCE]; echo "\nPROXIMITY: " . $document[self::PROXIMITY]; echo "\nHEX POSTING:\n"; echo "------------\n"; echo wordwrap(toHexString($raw_postings[$j]), 80); if (isset($document[self::POSITION_LIST])) { echo "\nTERM OCCURRENCES IN DOCUMENT (Count starts at title):"; echo "\n-------------------------" . "----------------------------\n"; $i = 0; foreach ($document[self::POSITION_LIST] as $position) { printf("%09d ", $position); $i++; if ($i >= 5) { echo "\n"; $i = 0; } } if ($i != 0) { echo "\n"; } } $page = @$index->getPage($summary_offset); if (isset($page[self::TITLE])) { echo "SUMMARY TITLE:\n"; echo "--------------\n"; echo wordwrap($page[self::TITLE], 80) . "\n"; } if (isset($page[self::DESCRIPTION])) { echo "SUMMARY DESCRIPTION:\n"; echo "--------------\n"; echo $page[self::DESCRIPTION] . "\n"; } $j++; } }
/** * Computes for each word in an array of words a count of the total number * of times it occurs in this crawl model's default index. * * @param array $words words to find the counts for * @param array $machine_urls machines to invoke this command on * @return array associative array of word => counts */ function countWords($words, $machine_urls = NULL) { if ($machine_urls != NULL && !$this->isSingleLocalhost($machine_urls)) { $count_strings = $this->execMachines("countWords", $machine_urls, serialize(array($words, $this->index_name))); $word_counts = array(); foreach ($count_strings as $count_string) { $a_word_counts = unserialize(webdecode($count_string[self::PAGE])); if (is_array($a_word_counts)) { foreach ($a_word_counts as $word => $count) { $word_counts[$word] = isset($word_counts[$word]) ? $word_counts[$word] + $count : $count; } } } return $word_counts; } $index_archive = IndexManager::getIndex($this->index_name); $hashes = array(); $lookup = array(); foreach ($words as $word) { $tmp = crawlHash($word); $hashes[] = $tmp; $lookup[$tmp] = $word; } $word_key_counts = $index_archive->countWordKeys($hashes); $phrases = array(); $word_counts = array(); if (is_array($word_key_counts) && count($word_key_counts) > 0) { foreach ($word_key_counts as $word_key => $count) { $word_counts[$lookup[$word_key]] = $count; } } return $word_counts; }