/** * Handles admin request related to the managing the machines which perform * crawls * * With this activity an admin can add/delete machines to manage. For each * managed machine, the admin can stop and start fetchers/queue_servers * as well as look at their log files * * @return array $data MACHINES, their MACHINE_NAMES, data for * FETCHER_NUMBERS drop-down */ function manageMachines() { $parent = $this->parent; $machine_model = $parent->model("machine"); $profile_model = $parent->model("profile"); $data = array(); $data["ELEMENT"] = "managemachines"; $possible_arguments = array("addmachine", "deletemachine", "newsmode", "log", "update"); $data['SCRIPT'] = "doUpdate();"; $data["leftorright"] = getLocaleDirection() == 'ltr' ? "right" : "left"; $data['MACHINE_NAMES'] = array(); $data['FETCHER_NUMBERS'] = array(0 => 0, 1 => 1, 2 => 2, 3 => 3, 4 => 4, 5 => 5, 6 => 6, 7 => 7, 8 => 8, 16 => 16); $tmp = tl('system_component_select_machine'); if (!isset($_REQUEST["has_queue_server"]) || isset($_REQUEST['is_replica'])) { $_REQUEST["has_queue_server"] = false; } if (isset($_REQUEST['is_replica'])) { $_REQUEST['num_fetchers'] = 0; } else { $_REQUEST['parent'] = ""; } $request_fields = array("name" => "string", "url" => "string", "has_queue_server" => "bool", "num_fetchers" => "int", "parent" => "string"); $r = array(); $allset = true; foreach ($request_fields as $field => $type) { if (isset($_REQUEST[$field])) { $r[$field] = $parent->clean($_REQUEST[$field], $type); if ($type == "string") { $r[$field] = trim($r[$field]); if ($r[$field] == "" && $field != "parent") { $allset = false; } } if ($field == "url") { if (isset($r[$field][strlen($r[$field]) - 1]) && $r[$field][strlen($r[$field]) - 1] != "/") { $r[$field] .= "/"; } $r[$field] = UrlParser::canonicalLink($r[$field], NAME_SERVER); if (!$r[$field]) { $allset = false; } } } else { $allset = false; } } if (isset($r["num_fetchers"]) && in_array($r["num_fetchers"], $data['FETCHER_NUMBERS'])) { $data['FETCHER_NUMBER'] = $r["num_fetchers"]; } else { $data['FETCHER_NUMBER'] = 0; if (isset($r["num_fetchers"])) { $r["num_fetchers"] = 0; } } $machine_exists = isset($r["name"]) && $machine_model->checkMachineExists("NAME", $r["name"]) || isset($r["url"]) && $machine_model->checkMachineExists("URL", $r["url"]); if (isset($_REQUEST['arg']) && in_array($_REQUEST['arg'], $possible_arguments)) { switch ($_REQUEST['arg']) { case "addmachine": if ($allset == true && !$machine_exists) { $machine_model->addMachine($r["name"], $r["url"], $r["has_queue_server"], $r["num_fetchers"], $r["parent"]); $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_machine_added') . "</h1>');"; $data['MACHINE_NAMES'][] = $r["name"]; $data['DELETABLE_MACHINES'][$r["name"]] = $r["name"]; sort($data['MACHINE_NAMES']); } else { if ($allset && $machine_exists) { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_machine_exists') . "</h1>');"; } else { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_machine_incomplete') . "</h1>');"; } } break; case "deletemachine": if (!$machine_exists) { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_machine_doesnt_exists') . "</h1>');"; } else { $machines = $machine_model->getRows(0, 1, $total_rows, array(array("name", "=", $r["name"], ""))); $service_in_use = false; foreach ($machines as $machine) { if ($machine['NAME'] == $r["name"]) { if (isset($machine['STATUSES']) && is_array($machine['STATUSES']) && $machine['STATUSES'] != array()) { $service_in_use = true; break; } else { break; } } } if ($service_in_use) { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_stop_service_first') . "</h1>');"; break; } $machine_model->deleteMachine($r["name"]); $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_machine_deleted') . "</h1>');"; } break; case "newsmode": $profile = $profile_model->getProfile(WORK_DIRECTORY); $news_modes = array("news_off", "news_web", "news_process"); if (isset($_REQUEST['news_mode']) && in_array($_REQUEST['news_mode'], $news_modes)) { $profile["NEWS_MODE"] = $_REQUEST['news_mode']; if ($profile["NEWS_MODE"] != "news_process") { CrawlDaemon::stop("news_updater", "", false); $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_news_mode_updated') . "</h1>');"; } else { CrawlDaemon::start("news_updater", 'none', "", -1); $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_news_mode_updated') . "</h1>');"; } $profile_model->updateProfile(WORK_DIRECTORY, array(), $profile); } else { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_news_update_failed') . "</h1>');"; } break; case "log": if (isset($_REQUEST["fetcher_num"])) { $r["fetcher_num"] = $parent->clean($_REQUEST["fetcher_num"], "int"); } if (isset($_REQUEST["mirror_name"])) { $r["mirror_name"] = $parent->clean($_REQUEST["mirror_name"], "string"); } if (isset($_REQUEST["time"])) { $data["time"] = $parent->clean($_REQUEST["time"], "int") + 30; } else { $data["time"] = 30; } if (isset($_REQUEST["NO_REFRESH"])) { $data["NO_REFRESH"] = $parent->clean($_REQUEST["NO_REFRESH"], "bool"); } else { $data["NO_REFRESH"] = false; } $data["ELEMENT"] = "machinelog"; $filter = ""; if (isset($_REQUEST['f'])) { $filter = $parent->clean($_REQUEST['f'], "string"); } $data['filter'] = $filter; $data["REFRESH_LOG"] = "&time=" . $data["time"]; $data["LOG_TYPE"] = ""; if (isset($r['fetcher_num']) && isset($r['name'])) { $data["LOG_FILE_DATA"] = $machine_model->getLog($r["name"], $r["fetcher_num"], $filter); $data["LOG_TYPE"] = $r['name'] . " fetcher " . $r["fetcher_num"]; $data["REFRESH_LOG"] .= "&arg=log&name=" . $r['name'] . "&fetcher_num=" . $r['fetcher_num']; } else { if (isset($r["mirror_name"])) { $data["LOG_TYPE"] = $r['mirror_name'] . " mirror"; $data["LOG_FILE_DATA"] = $machine_model->getLog($r["mirror_name"], NULL, $filter, true); } else { if (isset($r['name'])) { $data["LOG_TYPE"] = $r['name'] . " queue_server"; if ($r['name'] == "news") { $data["LOG_TYPE"] = "Name Server News Updater"; } $data["LOG_FILE_DATA"] = $machine_model->getLog($r["name"], NULL, $filter); $data["REFRESH_LOG"] .= "&arg=log&name=" . $r['name']; } } } if ($data["time"] >= ONE_HOUR / 3) { $data["REFRESH_LOG"] = ""; } if (!isset($data["LOG_FILE_DATA"]) || $data["LOG_FILE_DATA"] == "") { $data["LOG_FILE_DATA"] = tl('system_component_no_machine_log'); } $lines = array_reverse(explode("\n", $data["LOG_FILE_DATA"])); $data["LOG_FILE_DATA"] = implode("\n", $lines); break; case "update": if (isset($_REQUEST["fetcher_num"])) { $r["fetcher_num"] = $parent->clean($_REQUEST["fetcher_num"], "int"); } else { $r["fetcher_num"] = NULL; } $available_actions = array("start", "stop", "mirror_start", "mirror_stop"); if (isset($r["name"]) && isset($_REQUEST["action"]) && in_array($_REQUEST["action"], $available_actions)) { $action = $_REQUEST["action"]; $is_mirror = false; if ($action == "mirror_start") { $action = "start"; $is_mirror = true; } else { if ($action == "mirror_stop") { $action = "stop"; $is_mirror = true; } } $machine_model->update($r["name"], $action, $r["fetcher_num"], $is_mirror); $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_machine_servers_updated') . "</h1>');"; } else { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >" . tl('system_component_machine_no_action') . "</h1>');"; } break; } } $parent->pagingLogic($data, $machine_model, "MACHINE", DEFAULT_ADMIN_PAGING_NUM); if (!isset($_REQUEST['arg']) || $_REQUEST['arg'] != 'log') { $data['SCRIPT'] .= "toggleReplica(false);"; } return $data; }
/** * This is the function that should be called to get the * classifier_trainer to start training a logistic regression instance for * a particular classifier. The class label corresponding to the * classifier to be finalized should be passed as the second command-line * argument. */ function start() { global $argv; CrawlDaemon::init($argv, "classifier_trainer"); $label = $argv[2]; crawlLog("Initializing classifier trainer log..", $label . '-classifier_trainer', true); $classifier = Classifier::getClassifier($label); $classifier->prepareToFinalize(); $classifier->finalize(); Classifier::setClassifier($classifier); crawlLog("Training complete.\n"); CrawlDaemon::stop('classifier_trainer', $label); }
/** * Used to send a message the given daemon or run the program in the * foreground. * * @param array $argv an array of command line arguments. The argument * start will check if the process control functions exists if these * do they will fork and detach a child process to act as a daemon. * a lock file will be created to prevent additional daemons from * running. If the message is stop then a message file is written to * tell the daemon to stop. If the argument is terminal then the * program won't be run as a daemon. * @param string $name the prefix to use for lock and message files * @param bool $exit_type whether this function should exit or return * by default a lock file is only written if exit (this allows * both queue server processes (Indexer and Scheduler) to use the * same lock file */ static function init($argv, $name, $exit_type = 1) { self::$name = $name; if (isset($argv[2]) && $argv[2] != "none") { self::$subname = $argv[2]; } else { self::$subname = ""; } //don't let our script be run from apache if (isset($_SERVER['DOCUMENT_ROOT']) && strlen($_SERVER['DOCUMENT_ROOT']) > 0) { echo "BAD REQUEST"; exit; } if (!isset($argv[1])) { echo "{$name} needs to be run with a command-line argument.\n"; echo "For example,\n"; echo "php {$name}.php start //starts the {$name} as a daemon\n"; echo "php {$name}.php stop //stops the {$name} daemon\n"; echo "php {$name}.php terminal //runs {$name} within the current " . "process, not as a daemon, output going to the terminal\n"; exit; } $messages_file = self::getMesssageFileName(self::$name, self::$subname); switch ($argv[1]) { case "start": $options = ""; for ($i = 3; $i < count($argv); $i++) { $options .= " " . $argv[$i]; } $subname = !isset($argv[2]) || $argv[2] == 'none' ? 'none' : self::$subname; $name_prefix = isset($argv[3]) ? $argv[3] : self::$subname; $name_string = CrawlDaemon::getNameString($name, $name_prefix); echo "Starting {$name_string}...\n"; CrawlDaemon::start($name, $subname, $options, $exit_type); break; case "stop": CrawlDaemon::stop($name, self::$subname); break; case "terminal": self::$mode = 'terminal'; $info = array(); $info[self::STATUS] = self::WAITING_START_MESSAGE_STATE; file_put_contents($messages_file, serialize($info)); chmod($messages_file, 0777); define("LOG_TO_FILES", false); break; case "child": self::$mode = 'daemon'; $info = array(); $info[self::STATUS] = self::WAITING_START_MESSAGE_STATE; file_put_contents($messages_file, serialize($info)); chmod($messages_file, 0777); define("LOG_TO_FILES", true); // if false log messages are sent to the console break; default: exit; break; } }
/** * Used to start/stop a queue_server/fetcher of the current Yioop instance * based on the queue_server and fetcher fields of the current $_REQUEST */ function update() { $statuses = CrawlDaemon::statuses(); if (isset($_REQUEST['queue_server'])) { if ($_REQUEST['queue_server'] == "true" && !isset($statuses["queue_server"][-1])) { CrawlDaemon::start("queue_server", 'none', self::INDEXER, 0); CrawlDaemon::start("queue_server", 'none', self::SCHEDULER, 2); } else { if ($_REQUEST['queue_server'] == "false" && isset($statuses["queue_server"][-1])) { CrawlDaemon::stop("queue_server"); } } } if (isset($_REQUEST['mirror'])) { if ($_REQUEST['mirror'] == "true" && !isset($statuses["mirror"][-1])) { CrawlDaemon::start("mirror"); } else { if ($_REQUEST['mirror'] == "false" && isset($statuses["mirror"][-1])) { CrawlDaemon::stop("mirror"); } } } if (isset($_REQUEST['fetcher']) && is_array($_REQUEST['fetcher'])) { foreach ($_REQUEST['fetcher'] as $index => $value) { if ($value == "true" && !isset($statuses["fetcher"][$index])) { CrawlDaemon::start("fetcher", "{$index}"); } else { if ($value == "false" && isset($statuses["fetcher"][$index])) { CrawlDaemon::stop("fetcher", "{$index}"); } } } } }