/** * Handles admin request related to the manage locale activity * * The manage locale activity allows a user to add/delete locales, view * statistics about a locale as well as edit the string for that locale * * @return array $data info about current locales, statistics for each * locale as well as potentially the currently set string of a * locale and any messages about the success or failure of a * sub activity. */ function manageLocales() { $parent = $this->parent; $locale_model = $parent->model("locale"); $possible_arguments = array("addlocale", "deletelocale", "editlocale", "editstrings", "search"); $search_array = array(array("tag", "", "", "ASC")); $data['SCRIPT'] = ""; $data["ELEMENT"] = "managelocales"; $data['CURRENT_LOCALE'] = array("localename" => "", 'localetag' => "", 'writingmode' => '-1', 'active' => 1); $data['WRITING_MODES'] = array(-1 => tl('system_component_select_mode'), "lr-tb" => "lr-tb", "rl-tb" => "rl-tb", "tb-rl" => "tb-rl", "tb-lr" => "tb-lr"); $data['FORM_TYPE'] = "addlocale"; $paging = true; if (isset($_REQUEST['arg']) && in_array($_REQUEST['arg'], $possible_arguments)) { $clean_fields = array('localename', 'localetag', 'writingmode', 'selectlocale', 'active'); $incomplete = false; $required = array('localename', 'localetag'); foreach ($clean_fields as $field) { ${$field} = ""; if ($field == 'active') { $active = 0; } if (isset($_REQUEST[$field])) { $tmp = trim($parent->clean($_REQUEST[$field], "string")); if ($field == "writingmode" && ($tmp == -1 || !isset($data['WRITING_MODES'][$tmp]))) { $tmp = "lr-tb"; } if ($tmp == "" && in_array($field, $required)) { $incomplete = true; } ${$field} = $tmp; } else { if (in_array($field, $required)) { $incomplete = true; } } } switch ($_REQUEST['arg']) { case "addlocale": if ($incomplete) { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_locale_missing_info') . "</h1>')"; } else { $locale_model->addLocale($localename, $localetag, $writingmode, $active); $locale_model->extractMergeLocales(); $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_locale_added') . "</h1>')"; } break; case "deletelocale": if (!$locale_model->checkLocaleExists($selectlocale)) { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_localename_doesnt_exists') . "</h1>')"; return $data; } $locale_model->deleteLocale($selectlocale); $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_localename_deleted') . "</h1>')"; break; case "editlocale": if (!$locale_model->checkLocaleExists($selectlocale)) { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_localename_doesnt_exists') . "</h1>')"; return $data; } $data['FORM_TYPE'] = "editlocale"; $info = $locale_model->getLocaleInfo($selectlocale); $change = false; if (isset($localetag) && $localetag != "") { $info["LOCALE_TAG"] = $localetag; $change = true; } if (isset($writingmode) && $writingmode != "") { $info["WRITING_MODE"] = $writingmode; $change = true; } if (isset($_REQUEST['update']) && $active != $info['ACTIVE']) { $info['ACTIVE'] = $active; $change = true; } $data['CURRENT_LOCALE']['active'] = $info['ACTIVE']; $data['CURRENT_LOCALE']['localename'] = $info["LOCALE_NAME"]; $data['CURRENT_LOCALE']['localetag'] = $selectlocale; $data['CURRENT_LOCALE']['writingmode'] = $info["WRITING_MODE"]; if ($change) { echo "hi"; $locale_model->updateLocaleInfo($info); $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_locale_updated') . "</h1>')"; } break; case "editstrings": if (!isset($selectlocale)) { break; } $paging = false; $data["leftorright"] = getLocaleDirection() == 'ltr' ? "right" : "left"; $data['PREVIOUS_ACTIVITY'] = "manageLocales"; if (isset($_REQUEST['previous_activity']) && in_array($_REQUEST['previous_activity'], array("security", "searchSources"))) { $data['PREVIOUS_ACTIVITY'] = $_REQUEST['previous_activity']; } $data["ELEMENT"] = "editlocales"; $data['CURRENT_LOCALE_NAME'] = $locale_model->getLocaleName($selectlocale); $data['CURRENT_LOCALE_TAG'] = $selectlocale; if (isset($_REQUEST['STRINGS'])) { $safe_strings = array(); foreach ($_REQUEST['STRINGS'] as $key => $value) { $clean_key = $parent->clean($key, "string"); $clean_value = $parent->clean($value, "string"); $safe_strings[$clean_key] = $clean_value; } $locale_model->updateStringData($selectlocale, $safe_strings); $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_localestrings_updated') . "</h1>')"; } else { $locale_model->extractMergeLocales(); } $data['STRINGS'] = $locale_model->getStringData($selectlocale); $data['DEFAULT_STRINGS'] = $locale_model->getStringData(DEFAULT_LOCALE); $data['show'] = "all"; $data["show_strings"] = array("all" => tl('system_component_all_strings'), "missing" => tl('system_component_missing_strings')); if (isset($_REQUEST['show']) && $_REQUEST['show'] == "missing") { $data["show"] = "missing"; foreach ($data['STRINGS'] as $string_id => $translation) { if ($translation != "") { unset($data['STRINGS'][$string_id]); unset($data['DEFAULT_STRINGS'][$string_id]); } } } $data["filter"] = ""; if (isset($_REQUEST['filter']) && $_REQUEST['filter']) { $filter = $parent->clean($_REQUEST['filter'], "string"); $data["filter"] = $filter; foreach ($data['STRINGS'] as $string_id => $translation) { if (strpos($string_id, $filter) === false) { unset($data['STRINGS'][$string_id]); unset($data['DEFAULT_STRINGS'][$string_id]); } } } break; case "search": $search_array = $parent->tableSearchRequestHandler($data, array('name', 'tag', 'mode', 'active'), array('active')); break; } } if ($paging) { $parent->pagingLogic($data, $locale_model, "LOCALES", DEFAULT_ADMIN_PAGING_NUM, $search_array); } return $data; }
/** * Handles admin requests for creating, editing, and deleting classifiers. * * This activity implements the logic for the page that lists existing * classifiers, including the actions that can be performed on them. */ function manageClassifiers() { $parent = $this->parent; $crawl_model = $parent->model("crawl"); $possible_arguments = array('createclassifier', 'editclassifier', 'finalizeclassifier', 'deleteclassifier', 'search'); $data['ELEMENT'] = 'manageclassifiers'; $data['SCRIPT'] = ''; $data['FORM_TYPE'] = ''; $search_array = array(); $machine_urls = $parent->model("machine")->getQueueServerUrls(); $num_machines = count($machine_urls); if ($num_machines < 1 || $num_machines == 1 && UrlParser::isLocalhostUrl($machine_urls[0])) { $machine_urls = NULL; } $data['leftorright'] = getLocaleDirection() == 'ltr' ? 'right' : 'left'; $classifiers = Classifier::getClassifierList(); $start_finalizing = false; if (isset($_REQUEST['arg']) && in_array($_REQUEST['arg'], $possible_arguments)) { if (isset($_REQUEST['name'])) { $name = substr($parent->clean($_REQUEST['name'], 'string'), 0, NAME_LEN); $name = Classifier::cleanLabel($name); } else { if (isset($_REQUEST['class_label'])) { $name = substr($parent->clean($_REQUEST['class_label'], 'string'), 0, NAME_LEN); $name = Classifier::cleanLabel($name); } else { $name = ""; } } switch ($_REQUEST['arg']) { case 'createclassifier': if (!isset($classifiers[$name])) { $classifier = new Classifier($name); Classifier::setClassifier($classifier); $classifiers[$name] = $classifier; $data['SCRIPT'] .= "doMessage('<h1 class=\"red\">" . tl('crawl_component_new_classifier') . '</h1>\');'; } else { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\">" . tl('crawl_component_classifier_exists') . '</h1>\');'; } break; case 'deleteclassifier': /* In addition to deleting the classifier, we also want to delete the associated crawl mix (if one exists) used to iterate over existing indexes in search of new training examples. */ if (isset($classifiers[$name])) { unset($classifiers[$name]); Classifier::deleteClassifier($name); $mix_name = Classifier::getCrawlMixName($name); $mix_time = $crawl_model->getCrawlMixTimestamp($mix_name); if ($mix_time) { $crawl_model->deleteCrawlMixIteratorState($mix_time); $crawl_model->deleteCrawlMix($mix_time); } $data['SCRIPT'] .= "doMessage('<h1 class=\"red\">" . tl('crawl_component_classifier_deleted') . '</h1>\');'; } else { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\">" . tl('crawl_component_no_classifier') . '</h1>\');'; } break; case 'editclassifier': if (isset($classifiers[$name])) { $data['class_label'] = $name; $this->editClassifier($data, $classifiers, $machine_urls); } else { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\">" . tl('crawl_component_no_classifier') . '</h1>\');'; } break; case 'finalizeclassifier': /* Finalizing is too expensive to be done directly in the controller that responds to the web request. Instead, a daemon is launched to finalize the classifier asynchronously and save it back to disk when it's done. In the meantime, a flag is set to indicate the current finalizing state. */ CrawlDaemon::start("classifier_trainer", $name, '', -1); $classifier = $classifiers[$name]; $classifier->finalized = Classifier::FINALIZING; $start_finalizing = true; $data['SCRIPT'] .= "doMessage('<h1 class=\"red\">" . tl('crawl_component_finalizing_classifier') . '</h1>\');'; break; case 'search': $search_array = $parent->tableSearchRequestHandler($data, array('name')); break; } } $data['classifiers'] = $classifiers; if ($search_array == array()) { $search_array[] = array("name", "", "", "ASC"); } $parent->pagingLogic($data, 'classifiers', 'classifiers', DEFAULT_ADMIN_PAGING_NUM, $search_array, "", array('name' => 'class_label')); $data['reload'] = false; foreach ($classifiers as $label => $classifier) { if ($classifier->finalized == Classifier::FINALIZING) { $data['reload'] = true; break; } } if ($data['reload'] && !$start_finalizing) { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\">" . tl('crawl_component_finalizing_classifier') . '</h1>\');'; } return $data; }
/** * Formats a cache of a web page (adds history ui and highlight keywords) * * @param array $cache_item details meta information about the cache page * @param string $cache_file contains current web page before formatting * @param string $url that cache web page was originally from * @param string $summary_string summary data that was extracted from the * web page to be put in the actually inverted index * @param int $crawl_time timestamp of crawl cache page was from * @param array $all_crawl_times timestamps of all crawl times currently * in Yioop system * @param string $terms from orginal query responsible for cache request * @param array $ui_flags array of ui features which * should be added to the cache page. For example, "highlight" * would way search terms should be highlighted, "history" * says add history navigation for all copies of this cache page in * yioop system. * return string of formatted cached page */ function formatCachePage($cache_item, $cache_file, $url, $summary_string, $crawl_time, $all_crawl_times, $terms, $ui_flags) { //Check if it the URL is from the UI $hist_ui_open = in_array("hist_ui_open", $ui_flags) ? true : false; $date = date("F d Y H:i:s", $cache_item[self::TIMESTAMP]); $meta_words = PhraseParser::$meta_words_list; foreach ($meta_words as $meta_word) { $pattern = "/(\\b)({$meta_word}(\\S)+)/"; $terms = preg_replace($pattern, "", $terms); } $terms = str_replace("'", " ", $terms); $terms = str_replace('"', " ", $terms); $terms = str_replace('\\', " ", $terms); $terms = str_replace('|', " ", $terms); $terms = $this->clean($terms, "string"); $phrase_string = mb_ereg_replace("[[:punct:]]", " ", $terms); $words = mb_split(" ", $phrase_string); if (!in_array("highlight", $ui_flags)) { $words = array(); } $dom = new DOMDocument(); restore_error_handler(); $did_dom = @$dom->loadHTML('<?xml encoding="UTF-8">' . $cache_file); set_error_handler("yioop_error_handler"); foreach ($dom->childNodes as $item) { if ($item->nodeType == XML_PI_NODE) { $dom->removeChild($item); } // remove hack } $dom->encoding = "UTF-8"; // insert proper $head = $dom->getElementsByTagName('head')->item(0); if (is_object($head)) { // add a noindex nofollow robot directive to page $head_first_child = $head->firstChild; $robot_node = $dom->createElement('meta'); $robot_node = $head->insertBefore($robot_node, $head_first_child); $robot_node->setAttribute("name", "ROBOTS"); $robot_node->setAttribute("content", "NOINDEX,NOFOLLOW"); $comment = $dom->createComment(tl('search_controller_cache_comment')); $comment = $head->insertBefore($comment, $robot_node); // make link and script links absolute $head = $this->canonicalizeLinks($head, $url); } else { $body_tags = "<frameset><frame><noscript><img><span><b><i><em>" . "<strong><h1><h2><h3><h4><h5><h6><p><div>" . "<a><table><tr><td><th><dt><dir><dl><dd><pre>"; $cache_file = strip_tags($cache_file, $body_tags); $cache_file = wordwrap($cache_file, 80); $cache_file = "<html><head><title>" . tl('search_controller_yioop_cache') . "</title></head>" . "<body>" . $cache_file . "</body></html>"; $dom = new DOMDocument(); restore_error_handler(); @$dom->loadHTML($cache_file); set_error_handler("yioop_error_handler"); } $body = $dom->getElementsByTagName('body')->item(0); //make tags in body absolute $body = $this->canonicalizeLinks($body, $url); $first_child = $body->firstChild; $text_align = getLocaleDirection() == 'ltr' ? "left" : "right"; // add information about what was extracted from page if (in_array("summaries", $ui_flags)) { $summary_toggle_node = $this->createSummaryAndToggleNodes($dom, $text_align, $body, $summary_string, $cache_item); } else { $summary_toggle_node = $first_child; } if (isset($cache_item[self::KEYWORD_LINKS]) && count($cache_item[self::KEYWORD_LINKS]) > 0) { $keyword_node = $this->createDomBoxNode($dom, $text_align, "zIndex: 1"); $text_node = $dom->createTextNode("Z@key_links@Z"); $keyword_node->appendChild($text_node); $keyword_node = $body->insertBefore($keyword_node, $summary_toggle_node); $set_key_links = true; } else { $keyword_node = $summary_toggle_node; $set_key_links = false; } if (in_array("version", $ui_flags)) { $version_node = $this->createDomBoxNode($dom, $text_align, "zIndex: 1"); $textNode = $dom->createTextNode(tl('search_controller_cached_version', "Z@url@Z", $date)); $version_node->appendChild($textNode); $brNode = $dom->createElement('br'); $version_node->appendChild($brNode); $this->addCacheJavascriptTags($dom, $version_node); $version_node = $body->insertBefore($version_node, $keyword_node); } else { $version_node = $keyword_node; } //UI for showing history if (in_array("history", $ui_flags)) { $history_node = $this->historyUI($crawl_time, $all_crawl_times, $version_node, $dom, $terms, $hist_ui_open, $url); } else { $history_node = $dom->createElement('div'); } if ($history_node) { $version_node->appendChild($history_node); } $body = $this->markChildren($body, $words, $dom); $new_doc = $dom->saveHTML(); if (substr($url, 0, 7) != "record:") { $url = "<a href='{$url}'>{$url}</a>"; } $new_doc = str_replace("Z@url@Z", $url, $new_doc); $colors = array("yellow", "orange", "gray", "cyan"); $color_count = count($colors); $i = 0; foreach ($words as $word) { //only mark string of length at least 2 if (mb_strlen($word) > 1) { $mark_prefix = crawlHash($word); if (stristr($mark_prefix, $word) !== false) { $mark_prefix = preg_replace("/{$word}/i", '', $mark_prefix); } $match = $mark_prefix . $word; $new_doc = preg_replace("/{$match}/i", '<span style="background-color:' . $colors[$i] . '">$0</span>', $new_doc); $i = ($i + 1) % $color_count; $new_doc = preg_replace("/" . $mark_prefix . "/", "", $new_doc); } } if ($set_key_links) { $new_doc = $this->addKeywordLinks($new_doc, $cache_item); } return $new_doc; }
/** * Send the provided view to output, drawing it with the given * data variable, using the current locale for translation, and * writing mode * * @param string $view the name of the view to draw * @param array $data an array of values to use in drawing the view */ function displayView($view, $data) { $data['LOCALE_TAG'] = getLocaleTag(); $data['LOCALE_DIR'] = getLocaleDirection(); $data['BLOCK_PROGRESSION'] = getBlockProgression(); $data['WRITING_MODE'] = getWritingMode(); if (QUERY_STATISTICS) { $data['QUERY_STATISTICS'] = array(); $machine = isset($_SERVER["HTTP_HOST"]) ? htmlentities($_SERVER["HTTP_HOST"]) : "localhost"; $machine_uri = isset($_SERVER['REQUEST_URI']) ? htmlentities($_SERVER['REQUEST_URI']) : "/"; $protocol = isset($_SERVER["HTTPS"]) ? "https://" : "http://"; if ($machine == '::1') { //IPv6 :( $machine = "[::1]/"; //used if the fetching and queue serving on the same machine } $data['YIOOP_INSTANCE'] = $protocol . $machine . $machine_uri; $data['TOTAL_ELAPSED_TIME'] = 0; foreach ($this->model_instances as $model_name => $model) { $data['QUERY_STATISTICS'] = array_merge($model->db->query_log, $data['QUERY_STATISTICS']); $data['TOTAL_ELAPSED_TIME'] += $model->db->total_time; } $locale_info = getLocaleQueryStatistics(); $data['QUERY_STATISTICS'] = array_merge($locale_info['QUERY_LOG'], $data['QUERY_STATISTICS']); $data['TOTAL_ELAPSED_TIME'] += $locale_info['TOTAL_ELAPSED_TIME']; $mail_total_time = AnalyticsManager::get("MAIL_TOTAL_TIME"); $mail_messages = AnalyticsManager::get("MAIL_MESSAGES"); if ($mail_total_time && $mail_messages) { $data['QUERY_STATISTICS'] = array_merge($mail_messages, $data['QUERY_STATISTICS']); $data['TOTAL_ELAPSED_TIME'] += $mail_total_time; } } $data['c'] = isset($_REQUEST['c']) ? $_REQUEST['c'] : NULL; if (isset($_SESSION['DISPLAY_MESSAGE'])) { $data['DISPLAY_MESSAGE'] = $_SESSION['DISPLAY_MESSAGE']; unset($_SESSION['DISPLAY_MESSAGE']); } $this->view($view)->render($data); }
/** * Handles admin request related to the editing a crawl mix activity * * @param array $data info about the fragments and their contents for a * particular crawl mix (changed by this method) */ function editMix(&$data) { $parent = $this->parent; $crawl_model = $parent->model("crawl"); $data["leftorright"] = getLocaleDirection() == 'ltr' ? "right" : "left"; $data["ELEMENT"] = "editmix"; $user_id = $_SESSION['USER_ID']; $mix = array(); $timestamp = 0; if (isset($_REQUEST['timestamp'])) { $timestamp = substr($parent->clean($_REQUEST['timestamp'], "int"), 0, TIMESTAMP_LEN); } else { if (isset($_REQUEST['mix']['TIMESTAMP'])) { $timestamp = substr($parent->clean($_REQUEST['mix']['TIMESTAMP'], "int"), 0, TIMESTAMP_LEN); } } if (!$crawl_model->isCrawlMix($timestamp)) { $_REQUEST['a'] = "mixCrawls"; $parent->redirectWithMessage(tl('social_component_mix_invalid_timestamp')); } if (!$crawl_model->isMixOwner($timestamp, $user_id)) { $_REQUEST['a'] = "mixCrawls"; $parent->redirectWithMessage(tl('social_component_mix_not_owner')); } $mix = $crawl_model->getCrawlMix($timestamp); $owner_id = $mix['OWNER_ID']; $parent_id = $mix['PARENT']; $data['MIX'] = $mix; $data['INCLUDE_SCRIPTS'] = array("mix"); //set up an array of translation for javascript-land $data['SCRIPT'] .= "tl = {" . 'social_component_add_crawls:"' . tl('social_component_add_crawls') . '",' . 'social_component_num_results:"' . tl('social_component_num_results') . '",' . 'social_component_del_frag:"' . tl('social_component_del_frag') . '",' . 'social_component_weight:"' . tl('social_component_weight') . '",' . 'social_component_name:"' . tl('social_component_name') . '",' . 'social_component_add_keywords:"' . tl('social_component_add_keywords') . '",' . 'social_component_actions:"' . tl('social_component_actions') . '",' . 'social_component_add_query:"' . tl('social_component_add_query') . '",' . 'social_component_delete:"' . tl('social_component_delete') . '"' . '};'; //clean and save the crawl mix sent from the browser if (isset($_REQUEST['update']) && $_REQUEST['update'] == "update") { $mix = $_REQUEST['mix']; $mix['TIMESTAMP'] = $timestamp; $mix['OWNER_ID'] = $owner_id; $mix['PARENT'] = $parent_id; $mix['NAME'] = $parent->clean($mix['NAME'], "string"); $comp = array(); $save_mix = false; if (isset($mix['FRAGMENTS'])) { if ($mix['FRAGMENTS'] != NULL && count($mix['FRAGMENTS']) < MAX_MIX_FRAGMENTS) { foreach ($mix['FRAGMENTS'] as $fragment_id => $fragment_data) { if (isset($fragment_data['RESULT_BOUND'])) { $mix['FRAGMENTS'][$fragment_id]['RESULT_BOUND'] = $parent->clean($fragment_data['RESULT_BOUND'], "int"); } else { $mix['FRAGMENTS']['RESULT_BOUND'] = 0; } if (isset($fragment_data['COMPONENTS'])) { $comp = array(); foreach ($fragment_data['COMPONENTS'] as $component) { $row = array(); $row['CRAWL_TIMESTAMP'] = $parent->clean($component['CRAWL_TIMESTAMP'], "int"); $row['WEIGHT'] = $parent->clean($component['WEIGHT'], "float"); $row['KEYWORDS'] = $parent->clean($component['KEYWORDS'], "string"); $comp[] = $row; } $mix['FRAGMENTS'][$fragment_id]['COMPONENTS'] = $comp; } else { $mix['FRAGMENTS'][$fragment_id]['COMPONENTS'] = array(); } } $save_mix = true; } else { if (count($mix['FRAGMENTS']) >= MAX_MIX_FRAGMENTS) { $mix['FRAGMENTS'] = $data['MIX']['FRAGMENTS']; $parent->redirectWithMessage(tl('social_component_too_many_fragments')); } else { $mix['FRAGMENTS'] = $data['MIX']['FRAGMENTS']; } } } else { $mix['FRAGMENTS'] = $data['MIX']['FRAGMENTS']; } if ($save_mix) { $data['MIX'] = $mix; $crawl_model->setCrawlMix($mix); $parent->redirectWithMessage(tl('social_component_mix_saved')); } } $data['SCRIPT'] .= 'fragments = ['; $not_first = ""; foreach ($mix['FRAGMENTS'] as $fragment_id => $fragment_data) { $data['SCRIPT'] .= $not_first . '{'; $not_first = ","; if (isset($fragment_data['RESULT_BOUND'])) { $data['SCRIPT'] .= "num_results:" . $fragment_data['RESULT_BOUND']; } else { $data['SCRIPT'] .= "num_results:1 "; } $data['SCRIPT'] .= ", components:["; if (isset($fragment_data['COMPONENTS'])) { $comma = ""; foreach ($fragment_data['COMPONENTS'] as $component) { $crawl_ts = $component['CRAWL_TIMESTAMP']; $crawl_name = $data['available_crawls'][$crawl_ts]; $data['SCRIPT'] .= $comma . " [{$crawl_ts}, '{$crawl_name}', " . $component['WEIGHT'] . ", "; $comma = ","; $keywords = isset($component['KEYWORDS']) ? $component['KEYWORDS'] : ""; $data['SCRIPT'] .= "'{$keywords}'] "; } } $data['SCRIPT'] .= "] }"; } $data['SCRIPT'] .= ']; drawFragments();'; }