Пример #1
0
if (isset($_SESSION['sniff_navigator']) && $_SESSION['sniff_navigator']!="checked") {
	$_SESSION['sniff_navigator']=Security::remove_XSS($_POST['sniff_navigator']);
	$_SESSION['sniff_screen_size_w']=Security::remove_XSS($_POST['sniff_navigator_screen_size_w']);
	$_SESSION['sniff__screen_size_h']=Security::remove_XSS($_POST['sniff_navigator_screen_size_h']);
	$_SESSION['sniff_type_mimetypes']=Security::remove_XSS($_POST['sniff_navigator_type_mimetypes']);
	$_SESSION['sniff_suffixes_mimetypes']=Security::remove_XSS($_POST['sniff_navigator_suffixes_mimetypes']);
	$_SESSION['sniff_list_plugins']=Security::remove_XSS($_POST['sniff_navigator_list_plugins']);
	$_SESSION['sniff_check_some_activex']=Security::remove_XSS($_POST['sniff_navigator_check_some_activex']);
	$_SESSION['sniff_check_some_plugins']=Security::remove_XSS($_POST['sniff_navigator_check_some_plugins']);
	$_SESSION['sniff_java']=Security::remove_XSS($_POST['sniff_navigator_java']);
	$_SESSION['sniff_java_sun_ver']=Security::remove_XSS($_POST['sniff_navigator_java_sun_ver']);
}
*/
/* MAIN CODE */

$controller = new IndexManager(get_lang('MyCourses'));



// Main courses and session list
$courses_and_sessions = $controller->return_courses_and_sessions($user_id);

//Show the chamilo mascot
if (empty($courses_and_sessions) && !isset($_GET['history'])) {
	$controller->tpl->assign('welcome_to_course_block', $controller->return_welcome_to_course_block());
}

$controller->tpl->assign('content', $courses_and_sessions);

if (api_get_setting('allow_browser_sniffer') == 'true') {
	if ($_SESSION['sniff_navigator']!="checked") {
Пример #2
0
$cidReset = true;
require_once 'main/inc/global.inc.php';
require_once 'main/chat/chat_functions.lib.php';
//require_once 'main/auth/external_login/facebook.inc.php';
// The section (for the tabs).
$this_section = SECTION_CAMPUS;
$header_title = null;
if (!api_is_anonymous()) {
    $header_title = " ";
}
// Facebook connexion, if activated
/*if (api_is_facebook_auth_activated() && !api_get_user_id()) {
    facebookConnect();
}
*/
$controller = new IndexManager($header_title);
//Actions
$loginFailed = isset($_GET['loginFailed']) ? true : isset($loginFailed);
if (!empty($_GET['logout'])) {
    $redirect = !empty($_GET['no_redirect']) ? false : true;
    $controller->logout($redirect);
}
/* Table definitions */
/* Constants and CONFIGURATION parameters */
/** @todo these configuration settings should move to the Chamilo config settings. */
/** Defines wether or not anonymous visitors can see a list of the courses on the Chamilo homepage that are open to the world. */
$_setting['display_courses_to_anonymous_users'] = 'true';
/* LOGIN */
/**
 * Registers in the track_e_default table (view in important activities in admin
 * interface) a possible attempted break in, sending auth data through get.
Пример #3
0
        */
        function changeMyCoursesView(inView)
        {
            $.cookie("defaultMyCourseView"+userId, inView, { expires: 365 });
            if (inView == ' . IndexManager::VIEW_BY_SESSION . ') {
                $("#viewBySession").addClass("btn-primary");
                $("#viewByDefault").removeClass("btn-primary");
            } else {
                $("#viewByDefault").addClass("btn-primary");
                $("#viewBySession").removeClass("btn-primary");
            }
        }
	</script>
';
}
$controller = new IndexManager(get_lang('MyCourses'));
// Main courses and session list
//$courseAndSessions = $controller->returnCoursesAndSessions($userId);
// Main courses and session list
if (isset($_COOKIE['defaultMyCourseView' . $userId]) && $_COOKIE['defaultMyCourseView' . $userId] == IndexManager::VIEW_BY_SESSION && $displayMyCourseViewBySessionLink) {
    $courseAndSessions = $controller->returnCoursesAndSessionsViewBySession($userId);
    IndexManager::setDefaultMyCourseView(IndexManager::VIEW_BY_SESSION, $userId);
} else {
    $courseAndSessions = $controller->returnCoursesAndSessions($userId);
    IndexManager::setDefaultMyCourseView(IndexManager::VIEW_BY_DEFAULT, $userId);
}
// if teacher, session coach or admin, display the button to change te course view
if ($displayMyCourseViewBySessionLink && (api_is_drh() || api_is_course_coach() || api_is_platform_admin() || api_is_session_admin() || api_is_teacher())) {
    $courseAndSessions['html'] = "<div class='view-by-session-link'>\n\t\t<div class='btn-group pull-right'>\n\t\t<a class='btn btn-default' id='viewByDefault' href='user_portal.php' onclick='changeMyCoursesView(\"" . IndexManager::VIEW_BY_DEFAULT . "\")'>\n\t\t" . get_lang('MyCoursesDefaultView') . "\n\t\t</a>\n\t\t<a class='btn btn-default' id='viewBySession' href='user_portal.php' onclick='changeMyCoursesView(\"" . IndexManager::VIEW_BY_SESSION . "\")'>\n\t\t" . get_lang('MyCoursesSessionView') . "\n\t\t</a>\n\t\t</div>\n\t</div><br /><br />\n\t" . $courseAndSessions['html'];
}
// Check if a user is enrolled only in one course for going directly to the course after the login.
Пример #4
0
 /**
  * Ensure the indexes to the database
  *
  * @param boolean $delete (optional) true by default drop unknown and old indexes
  *
  * @return boolean
  *
  * @api
  */
 public function ensureIndexes($delete = true)
 {
     $indexManager = new IndexManager($this);
     return $indexManager->commit($delete);
 }
Пример #5
0
 /**
  * Extracts all phrases (sequences of adjacent words) from $string. Does
  * not extract terms within those phrase. Array key indicates position
  * of phrase
  *
  * @param string $string subject to extract phrases from
  * @param string $lang locale tag for stemming
  * @param string $index_name name of index to be used as a reference
  *     when extracting phrases
  * @param bool $exact_match whether the match has to be exact or not
  * @param int $threshold roughly causes a stop to extracting more phrases
  *  if exceed $threshold (still might get more than $threshold back, only
  *  when detect have more stop)
  * @return array of phrases
  */
 static function extractPhrases($string, $lang = NULL, $index_name = NULL, $exact_match = false, $threshold = 10)
 {
     if (isset(self::$programming_language_map[$lang])) {
         $control_word = self::$programming_language_map[$lang] . self::CONTROL_WORD_INDICATOR;
         $string = trim(substr($string, strlen($control_word) + 1));
     } else {
         self::canonicalizePunctuatedTerms($string, $lang);
     }
     $terms = self::stemCharGramSegment($string, $lang);
     $num = count($terms);
     if ($index_name == NULL || $num <= 1) {
         return $terms;
     }
     if (count($terms) > MAX_QUERY_TERMS) {
         $first_terms = array_slice($terms, 0, MAX_QUERY_TERMS);
         $whole_phrase = implode(" ", $first_terms);
     } else {
         $whole_phrase = implode(" ", $terms);
         $first_terms =& $terms;
     }
     if ($exact_match) {
         return $terms;
         /* for exact phrase search do not use suffix tree
              stuff for now
            */
     }
     $count_whole_phrase = IndexManager::numDocsTerm($whole_phrase, $index_name, $threshold);
     if ($count_whole_phrase >= $threshold || $num > SUFFIX_TREE_THRESHOLD) {
         $terms = array($whole_phrase, $terms[0]);
         return $terms;
     } else {
         if ($count_whole_phrase > 0) {
             foreach ($terms as $term) {
                 $count_term = IndexManager::numDocsTerm($term, $index_name, 5 * $threshold);
                 if ($count_term > 50 * $count_whole_phrase) {
                     $terms = array($whole_phrase, $terms[0]);
                     return $terms;
                 }
             }
         } else {
             if ($num > 2) {
                 $start_terms = $first_terms;
                 $last_term = array_pop($start_terms);
                 $start_phrase = implode(" ", $start_terms);
                 $count_start = IndexManager::numDocsTerm($start_phrase, $index_name, $threshold);
                 if ($count_start >= $threshold) {
                     $terms = array($start_phrase, $last_term, $terms[0]);
                     return $terms;
                 }
                 $end_terms = $first_terms;
                 $first_term = array_shift($end_terms);
                 $end_phrase = implode(" ", $end_terms);
                 $count_end = IndexManager::numDocsTerm($end_phrase, $index_name, $threshold);
                 if ($count_end >= $threshold) {
                     $terms = array($first_term, $end_phrase);
                     return $terms;
                 }
             }
         }
     }
     if ($index_name != 'feed' && IndexManager::getVersion($index_name) == 0) {
         return $terms;
         //old style index before max phrase extraction
     }
     return $terms;
 }
Пример #6
0
        exit;
    }
    $userResetPasswordSetting = api_get_setting('user_reset_password');
    if ($userResetPasswordSetting === 'true') {
        $user = Database::getManager()->getRepository('ChamiloUserBundle:User')->find($user['uid']);
        Login::sendResetEmail($user, true);
        if (CustomPages::enabled() && CustomPages::exists(CustomPages::INDEX_UNLOGGED)) {
            CustomPages::display(CustomPages::INDEX_UNLOGGED, ['info' => get_lang('CheckYourEmailAndFollowInstructions')]);
            exit;
        }
        header('Location: ' . api_get_path(WEB_PATH));
        exit;
    }
    $messageText = Login::handle_encrypted_password($user, true);
    if (CustomPages::enabled() && CustomPages::exists(CustomPages::INDEX_UNLOGGED)) {
        CustomPages::display(CustomPages::INDEX_UNLOGGED, ['info' => $messageText]);
        exit;
    }
    Display::addFlash(Display::return_message($messageText));
    header('Location: ' . api_get_path(WEB_PATH));
    exit;
}
if (CustomPages::enabled() && CustomPages::exists(CustomPages::LOST_PASSWORD)) {
    CustomPages::display(CustomPages::LOST_PASSWORD, ['form' => $form->returnForm()]);
    exit;
}
$controller = new IndexManager($tool_name);
$controller->set_login_form();
$controller->tpl->assign('form', $form->returnForm());
$template = $controller->tpl->get_template('auth/lost_password.tpl');
$controller->tpl->display($template);
Пример #7
0
 /**
  * Returns the number of documents in an index that a phrase occurs in.
  * If it occurs in more than threshold documents then cut off search.
  *
  * @param string $phrase to look up in index
  * @param int $threshold once count in posting list for any word
  *     reaches to threshold then return the number
  * @param string $index_name selected index for search engine
  * @param string $lang locale tag for the query
  * @return int number of documents phrase occurs in
  */
 static function numDocsIndex($phrase, $threshold, $index_name, $lang)
 {
     PhraseParser::canonicalizePunctuatedTerms($phrase, $lang);
     $terms = PhraseParser::stemCharGramSegment($phrase, $lang);
     $num = count($terms);
     if ($index_name == NULL) {
         return 0;
     }
     if (count($terms) > MAX_QUERY_TERMS) {
         $terms = array_slice($terms, 0, MAX_QUERY_TERMS);
     }
     $whole_phrase = implode(" ", $terms);
     return IndexManager::numDocsTerm($whole_phrase, $index_name, $threshold);
 }
Пример #8
0
 /**
  * Hook function used by currentDocsWithWord to return the current block
  * of docs if it is not cached
  *
  * @return mixed doc ids and score if there are docs left, -1 otherwise
  */
 function findDocsWithWord()
 {
     if ($this->current_generation >= $this->num_generations || $this->current_generation == $this->num_generations - 1 && $this->current_offset > $this->last_offset) {
         return -1;
     }
     $pre_results = array();
     $this->next_offset = $this->current_offset;
     $index = IndexManager::getIndex($this->index_name);
     $index->setCurrentShard($this->current_generation, true);
     //the next call also updates next offset
     $shard = $index->getCurrentShard();
     $this->getShardInfo($this->current_generation);
     $doc_key_len = IndexShard::DOC_KEY_LEN;
     $num_docs_or_links = $shard->num_docs + $shard->num_link_docs;
     $pre_results = array();
     $num_docs_so_far = 0;
     do {
         if ($this->next_offset >= $this->last_offset) {
             break;
         }
         $posting = packPosting($this->next_offset >> 4, array(1));
         list($doc_id, $num_keys, $item) = $shard->makeItem($posting, $num_docs_or_links);
         if ($num_keys % 2 == 0) {
             $num_keys++;
         }
         $this->next_offset += ($num_keys + 1) * $doc_key_len;
         $pre_results[$doc_id] = $item;
         $num_docs_so_far++;
     } while ($num_docs_so_far < $this->results_per_block);
     $results = array();
     $doc_key_len = IndexShard::DOC_KEY_LEN;
     $filter = $this->filter == NULL ? array() : $this->filter;
     foreach ($pre_results as $keys => $data) {
         $host_key = substr($keys, self::HOST_KEY_POS, self::KEY_LEN);
         if (in_array($host_key, $filter)) {
             continue;
         }
         $data[self::KEY] = $keys;
         // inlinks is the domain of the inlink
         list($hash_url, $data[self::HASH], $data[self::INLINKS]) = str_split($keys, $doc_key_len);
         $data[self::CRAWL_TIME] = $this->index_name;
         $results[$keys] = $data;
     }
     $this->count_block = count($results);
     if ($this->current_generation == $this->num_generations - 1 && $results == array()) {
         $results = NULL;
     }
     $this->pages = $results;
     return $results;
 }
Пример #9
0
 /**
  * Using the supplied $word_structs, contructs an iterator for getting
  * results to a query
  *
  * @param array $word_structs an array of word_structs. Here a word_struct
  *     is an associative array with at least the following fields
  *     KEYS -- an array of word keys
  *     QUOTE_POSITIONS -- an array of positions of words that appreared in
  *         quotes (so need to be matched exactly)
  *     DISALLOW_PHRASES -- an array of words the document must not contain
  *     WEIGHT -- a weight to multiple scores returned from this iterator by
  *     INDEX_NAME -- an index timestamp to get results from
  * @param array& $filter an array of hashes of domains to filter from
  *     results
  *     and then potentially restored in cache
  * @param int $raw ($raw == 0) normal grouping, ($raw == 1)
  *     no grouping done on data also no summaries returned (only lookup
  *     info), $raw > 1 return summaries but no grouping
  * @param int $to_retrieve number of items to retrieve from location in
  *     in interator
  * @param array $queue_servers a list of urls of yioop machines which might
  *     be used during lookup
  * @param string $original_query if set, the orginal query that corresponds
  *     to $word_structs
  * @param string $save_timestamp_name if this timestamp is non empty, then
  *     when making iterator get sub-iterators to advance to gen doc_offset
  *     stored with respect to save_timestamp if exists.
  * @param bool $limit_news if true the number of media:news items to
  *     allow in search results is limited to WordIterator::LIMIT_NEWS_COUNT
  *
  * @return &object an iterator for iterating through results to the
  * query
  */
 function getQueryIterator($word_structs, &$filter, $raw, &$to_retrieve, $queue_servers = array(), $original_query = "", $save_timestamp_name = "", $limit_news = true)
 {
     $iterators = array();
     $total_iterators = 0;
     $network_flag = false;
     $min_group_flag = false;
     $min_group_override = false;
     if ($queue_servers != array() && !$this->isSingleLocalhost($queue_servers)) {
         $network_flag = true;
         $total_iterators = 1;
         if (!in_array(NAME_SERVER, $queue_servers)) {
             $queue_servers[] = NAME_SERVER;
             //name server might still have news
         }
         $num_servers = count($queue_servers);
         if ((!isset($this->index_name) || !$this->index_name) && isset($word_structs[0]["INDEX_NAME"])) {
             $index_name = $word_structs[0]["INDEX_NAME"];
         } else {
             $index_name = $this->index_name;
         }
         $iterators[0] = new NetworkIterator($original_query, $queue_servers, $index_name, $filter, $save_timestamp_name, $limit_news);
     }
     if (!$network_flag) {
         $doc_iterate_hashes = array(substr(crawlHashWord("site:any"), 0, 9), substr(crawlHash("site:any"), 0, 9), substr(crawlHashWord("site:doc"), 0, 9), substr(crawlHash("site:doc"), 0, 9));
         if ($save_timestamp_name != "") {
             // used for archive crawls of crawl mixes
             $save_file = CRAWL_DIR . '/schedules/' . self::save_point . $save_timestamp_name . ".txt";
             if (file_exists($save_file)) {
                 $save_point = unserialize(file_get_contents($save_file));
             }
             $save_count = 0;
         }
         foreach ($word_structs as $word_struct) {
             if (!is_array($word_struct)) {
                 continue;
             }
             $word_keys = $word_struct["KEYS"];
             $distinct_word_keys = array();
             $seen_keys = array();
             foreach ($word_keys as $wkey) {
                 if (is_string($wkey) || is_string($wkey[0])) {
                     $tmp_key = is_string($wkey) ? $wkey : $wkey[0];
                     if (!isset($seen_keys[$tmp_key])) {
                         $seen_keys[$tmp_key] = true;
                         $distinct_word_keys[] = $wkey;
                     }
                 } else {
                     $distinct_word_keys[] = $wkey;
                 }
             }
             $quote_positions = $word_struct["QUOTE_POSITIONS"];
             $disallow_keys = $word_struct["DISALLOW_KEYS"];
             $index_name = $word_struct["INDEX_NAME"];
             $weight = $word_struct["WEIGHT"];
             $num_word_keys = count($word_keys);
             $total_iterators = count($distinct_word_keys);
             $word_iterators = array();
             $word_iterator_map = array();
             if ($num_word_keys < 1) {
                 continue;
             }
             $sum = 0;
             for ($i = 0; $i < $total_iterators; $i++) {
                 $current_key = is_string($distinct_word_keys[$i]) ? $distinct_word_keys[$i] : (is_string($distinct_word_keys[$i][0]) ? $distinct_word_keys[$i][0] : $distinct_word_keys[$i][0][0]);
                 if (!is_string($current_key)) {
                     $current_key = $current_key[0];
                 }
                 if (in_array(substr($current_key, 0, 9), $doc_iterate_hashes)) {
                     $word_iterators[$i] = new DocIterator($index_name, $filter, $to_retrieve);
                     $min_group_override = true;
                 } else {
                     //can happen if exact phrase search suffix approach used
                     if (isset($distinct_word_keys[$i][0][0]) && is_array($distinct_word_keys[$i][0][0])) {
                         $distinct_keys = array($distinct_word_keys[$i][0][1]);
                     } else {
                         if (isset($distinct_word_keys[$i][0]) && is_array($distinct_word_keys[$i][0])) {
                             $distinct_keys = $distinct_word_keys[$i];
                         } else {
                             $distinct_keys = array($distinct_word_keys[$i]);
                         }
                     }
                     $out_keys = array();
                     $old_distinct_key_id = "";
                     foreach ($distinct_keys as $distinct_key) {
                         if (is_array($distinct_key)) {
                             if (!isset($distinct_key[2]) && isset($distinct_key[1])) {
                                 $distinct_keys[] = $distinct_key[1];
                             }
                             $shift = isset($distinct_key[1]) ? $distinct_key[1] : 0;
                             $mask = isset($distinct_key[2]) ? $distinct_key[2] : "" . "";
                             if (isset($distinct_key[3])) {
                                 $old_distinct_key_id = unbase64Hash($distinct_key[3]);
                             }
                             $distinct_key_id = unbase64Hash($distinct_key[0]);
                         } else {
                             $shift = 0;
                             $mask = "" . "";
                             $distinct_key_id = unbase64Hash($distinct_key);
                         }
                         $lookup_cutoff = max(MIN_RESULTS_TO_GROUP, $to_retrieve);
                         $info = IndexManager::getWordInfo($index_name, $distinct_key_id, $shift, $mask);
                         if ($old_distinct_key_id != "") {
                             $old_info = IndexManager::getWordInfo($index_name, $old_distinct_key_id, $shift, $mask);
                             if ($info !== false && $old_info !== false) {
                                 $info = array_merge($info, $old_info);
                             } else {
                                 if ($old_info !== false) {
                                     $info = $old_info;
                                 }
                             }
                         }
                         if ($info != array()) {
                             $tmp_keys = arrayColumnCount($info, 4, 3);
                             $sum += array_sum($tmp_keys);
                             $out_keys = array_merge($out_keys, $tmp_keys);
                         }
                         if ($sum > $lookup_cutoff) {
                             break;
                         }
                     }
                     arsort($out_keys);
                     $out_keys = array_keys(array_slice($out_keys, 0, 50));
                     $tmp_word_iterators = array();
                     $m = 0;
                     foreach ($out_keys as $distinct_key) {
                         $tmp_word_iterators[$m] = new WordIterator($distinct_key, $index_name, true, $filter, $to_retrieve, $limit_news);
                         if ($tmp_word_iterators[$m]->dictionary_info != array() || $tmp_word_iterators[$m]->feed_count > 0) {
                             $min_group_override = true;
                             $m++;
                         } else {
                             unset($tmp_word_iterators[$m]);
                         }
                     }
                     if ($m == 1) {
                         $word_iterators[$i] = $tmp_word_iterators[0];
                     } else {
                         $word_iterators[$i] = new DisjointIterator($tmp_word_iterators);
                     }
                 }
                 foreach ($word_keys as $index => $key) {
                     if (isset($distinct_word_keys[$i]) && $key == $distinct_word_keys[$i]) {
                         $word_iterator_map[$index] = $i;
                     }
                 }
             }
             $num_disallow_keys = count($disallow_keys);
             if ($num_disallow_keys > 0) {
                 for ($i = 0; $i < $num_disallow_keys; $i++) {
                     $disallow_iterator = new WordIterator($disallow_keys[$i], $index_name, false, $filter);
                     $word_iterators[$num_word_keys + $i] = new NegationIterator($disallow_iterator);
                 }
             }
             $num_word_keys += $num_disallow_keys;
             if ($num_word_keys == 1 && $weight == 1) {
                 $base_iterator = $word_iterators[0];
             } else {
                 $base_iterator = new IntersectIterator($word_iterators, $word_iterator_map, $quote_positions, $weight);
                 $min_group_flag = true;
                 if ($save_timestamp_name == "") {
                     $base_iterator->sync_timer_on = true;
                 } else {
                     $base_iterator->sync_timer_on = false;
                 }
             }
             if ($save_timestamp_name != "") {
                 if (isset($save_point[$save_count]) && $save_point[$save_count] != -1) {
                     $base_iterator->advance($save_point[$save_count]);
                 }
                 $save_count++;
             }
             $iterators[] = $base_iterator;
         }
     }
     $num_iterators = count($iterators);
     //if network_flag should be 1
     if ($num_iterators < 1) {
         return NULL;
     } else {
         if ($num_iterators == 1) {
             $union_iterator = $iterators[0];
         } else {
             $union_iterator = new UnionIterator($iterators);
         }
     }
     $raw = intval($raw);
     if ($raw > 0) {
         $group_iterator = $union_iterator;
     } else {
         $group_iterator = new GroupIterator($union_iterator, $total_iterators, $this->current_machine, $network_flag);
     }
     if ($network_flag) {
         $union_iterator->results_per_block = ceil(SERVER_ALPHA * $group_iterator->results_per_block / $num_servers);
     } else {
         if ($save_timestamp_name != "") {
             $group_iterator->save_iterators = $iterators;
         } else {
             if ($min_group_flag && !$min_group_override) {
                 $group_iterator->results_per_block = max(MIN_RESULTS_TO_GROUP / 20, 1);
                 $to_retrieve = -1;
             }
         }
     }
     return $group_iterator;
 }
Пример #10
0
 /**
  * Determines the offset into the summaries WebArchiveBundle and generation
  * of the provided url (or hash_url) so that the info:url
  * (info:base64_hash_url) summary can be retrieved. This assumes of course
  * that the info:url  meta word has been stored.
  *
  * @param string $url_or_key either info:base64_hash_url or just a url to
  *     lookup
  * @param string $index_name index into which to do the lookup
  * @param bool $is_key whether the string is info:base64_hash_url or just a
  *     url
  * @return array (offset, generation) into the web archive bundle
  */
 function lookupSummaryOffsetGeneration($url_or_key, $index_name = "", $is_key = false)
 {
     if ($index_name == "") {
         $index_name = $this->index_name;
     }
     $index_archive = IndexManager::getIndex($index_name);
     if (!$index_archive) {
         return false;
     }
     $num_retrieved = 0;
     $pages = array();
     $summary_offset = NULL;
     if (!isset($index_archive->generation_info['ACTIVE'])) {
         return false;
     }
     $mask = "";
     $num_generations = $index_archive->generation_info['ACTIVE'];
     $hash_key = $is_key ? crawlHashWord($url_or_key, true, $mask) : crawlHashWord("info:{$url_or_key}", true, $mask);
     $info = IndexManager::getWordInfo($index_name, $hash_key, 0, $mask, 1);
     if (!isset($info[0][4])) {
         return false;
     }
     $word_iterator = new WordIterator($info[0][4], $index_name, true);
     if (is_array($next_docs = $word_iterator->nextDocsWithWord())) {
         foreach ($next_docs as $doc_key => $doc_info) {
             $summary_offset = $doc_info[CrawlConstants::SUMMARY_OFFSET];
             $generation = $doc_info[CrawlConstants::GENERATION];
             $index_archive->setCurrentShard($generation, true);
             $page = @$index_archive->getPage($summary_offset);
             $num_retrieved++;
             if ($num_retrieved >= 1) {
                 break;
             }
         }
         if ($num_retrieved == 0) {
             return false;
         }
     } else {
         return false;
     }
     return array($summary_offset, $generation);
 }
Пример #11
0
 /**
  * Returns the number of document that a given term or phrase appears in
  * in the given index
  *
  * @param string $term_or_phrase what to look up in the indexes dictionary
  *     no  mask is used for this look up
  * @param string $index_name index to look up term or phrase in
  * @param int $threshold if set and positive then once threshold many
  *     documents are found the search for more documents to add to the
  *     total is stopped
  * @return int number of documents
  */
 static function numDocsTerm($term_or_phrase, $index_name, $threshold = -1)
 {
     $index = IndexManager::getIndex($index_name);
     if (!$index->dictionary) {
         return false;
     }
     $pos = -1;
     $total_num_docs = 0;
     $hashes = allCrawlHashPaths($term_or_phrase, array(), array(), true);
     if (!is_array($hashes)) {
         $hashes = array($hashes);
     }
     foreach ($hashes as $hash) {
         if (is_array($hash)) {
             $dictionary_info = IndexManager::getWordInfo($index_name, $hash[0], $hash[1], $hash[2], $threshold);
         } else {
             $dictionary_info = IndexManager::getWordInfo($index_name, $hash);
         }
         $num_generations = count($dictionary_info);
         $start = isset($dictionary_info[-1]) ? -1 : 0;
         $end = $start == -1 ? $num_generations - 1 : $num_generations;
         for ($i = $start; $i < $end; $i++) {
             list(, , , $num_docs) = $dictionary_info[$i];
             $total_num_docs += $num_docs;
             if ($threshold > 0 && $total_num_docs > $threshold) {
                 return $total_num_docs;
             }
         }
     }
     return $total_num_docs;
 }
Пример #12
0
 /**
  * Prints information about $num many postings beginning at the
  * provided $generation and $offset
  *
  * @param string $archive_path the path of a directory that holds
  *     an IndexArchiveBundle
  * @param int $generation which index shard to use
  * @param int $offset offset into posting lists for that shard
  * @param int $num how many postings to print info for
  */
 function outputPostingInfo($archive_path, $generation, $offset, $num = 1)
 {
     $bundle_name = $this->getArchiveName($archive_path);
     echo "\nBundle Name: {$bundle_name}\n";
     $archive_type = $this->getArchiveKind($archive_path);
     echo "Bundle Type: {$archive_type}\n";
     echo "Generation: {$generation}\n";
     echo "Offset: {$offset}\n";
     if (strcmp($archive_type, "IndexArchiveBundle") != 0) {
         $this->badFormatMessageAndExit($archive_path, "index");
     }
     $index_timestamp = substr($archive_path, strpos($archive_path, self::index_data_base_name) + strlen(self::index_data_base_name));
     $index = IndexManager::getIndex($index_timestamp);
     $index->setCurrentShard($generation, true);
     $shard = $index->getCurrentShard();
     $next = $offset >> 2;
     $raw_postings = array();
     $doc_indexes = array();
     $documents = array();
     for ($i = 0; $i < $num; $i++) {
         $dummy_offset = 0;
         $posting_start = $next;
         $posting_end = $next;
         $old_offset = $next << 2;
         $old_start = $next << 2;
         $old_end = $next << 2;
         $tmp = $shard->getPostingAtOffset($next, $posting_start, $posting_end);
         $next = $posting_end + 1;
         if (!$tmp) {
             break;
         }
         $documents = array_merge($documents, $shard->getPostingsSlice($old_offset, $old_start, $old_end, 1));
         $raw_postings[] = $tmp;
         $post_array = unpackPosting($tmp, $dummy_offset);
         $doc_indexes[] = $post_array[0];
     }
     $end_offset = $next << 2;
     echo "Offset After Returned Results: {$end_offset}\n\n";
     if (!$documents || ($count = count($documents)) < 1) {
         echo "No documents correspond to generation and offset given\n\n";
         exit;
     }
     $document_word = $count == 1 ? "Document" : "Documents";
     echo "{$count} {$document_word} Found:\n";
     echo str_pad("", $count + 1, "=") . "================\n";
     $j = 0;
     foreach ($documents as $key => $document) {
         echo "\nDOC ID: " . toHexString($key);
         echo "\nTYPE: " . ($document[self::IS_DOC] ? "Document" : "Link");
         echo "\nDOC INDEX: " . $doc_indexes[$j];
         $summary_offset = $document[self::SUMMARY_OFFSET];
         echo "\nSUMMARY OFFSET: " . $summary_offset;
         echo "\nSCORE: " . $document[self::SCORE];
         echo "\nDOC RANK: " . $document[self::DOC_RANK];
         echo "\nRELEVANCE: " . $document[self::RELEVANCE];
         echo "\nPROXIMITY: " . $document[self::PROXIMITY];
         echo "\nHEX POSTING:\n";
         echo "------------\n";
         echo wordwrap(toHexString($raw_postings[$j]), 80);
         if (isset($document[self::POSITION_LIST])) {
             echo "\nTERM OCCURRENCES IN DOCUMENT (Count starts at title):";
             echo "\n-------------------------" . "----------------------------\n";
             $i = 0;
             foreach ($document[self::POSITION_LIST] as $position) {
                 printf("%09d ", $position);
                 $i++;
                 if ($i >= 5) {
                     echo "\n";
                     $i = 0;
                 }
             }
             if ($i != 0) {
                 echo "\n";
             }
         }
         $page = @$index->getPage($summary_offset);
         if (isset($page[self::TITLE])) {
             echo "SUMMARY TITLE:\n";
             echo "--------------\n";
             echo wordwrap($page[self::TITLE], 80) . "\n";
         }
         if (isset($page[self::DESCRIPTION])) {
             echo "SUMMARY DESCRIPTION:\n";
             echo "--------------\n";
             echo $page[self::DESCRIPTION] . "\n";
         }
         $j++;
     }
 }
Пример #13
0
 /**
  * Computes for each word in an array of words a count of the total number
  * of times it occurs in this crawl model's default index.
  *
  * @param array $words words to find the counts for
  * @param array $machine_urls machines to invoke this command on
  * @return array associative array of word => counts
  */
 function countWords($words, $machine_urls = NULL)
 {
     if ($machine_urls != NULL && !$this->isSingleLocalhost($machine_urls)) {
         $count_strings = $this->execMachines("countWords", $machine_urls, serialize(array($words, $this->index_name)));
         $word_counts = array();
         foreach ($count_strings as $count_string) {
             $a_word_counts = unserialize(webdecode($count_string[self::PAGE]));
             if (is_array($a_word_counts)) {
                 foreach ($a_word_counts as $word => $count) {
                     $word_counts[$word] = isset($word_counts[$word]) ? $word_counts[$word] + $count : $count;
                 }
             }
         }
         return $word_counts;
     }
     $index_archive = IndexManager::getIndex($this->index_name);
     $hashes = array();
     $lookup = array();
     foreach ($words as $word) {
         $tmp = crawlHash($word);
         $hashes[] = $tmp;
         $lookup[$tmp] = $word;
     }
     $word_key_counts = $index_archive->countWordKeys($hashes);
     $phrases = array();
     $word_counts = array();
     if (is_array($word_key_counts) && count($word_key_counts) > 0) {
         foreach ($word_key_counts as $word_key => $count) {
             $word_counts[$lookup[$word_key]] = $count;
         }
     }
     return $word_counts;
 }