/** * This is the function that should be called to get the queue_server * to start. Calls init to handle the command line arguments then enters * the queue_server's main loop */ function start() { global $argv; if (isset($argv[1]) && $argv[1] == "start") { if (isset($argv[2]) && in_array($argv[2], array(self::INDEXER, self::SCHEDULER))) { $argv[3] = $argv[2]; $argv[2] = "none"; // 3 indicates force start CrawlDaemon::init($argv, "queue_server", 3); } else { $argv[2] = "none"; $argv[3] = self::INDEXER; CrawlDaemon::init($argv, "queue_server", 0); $argv[2] = "none"; $argv[3] = self::SCHEDULER; CrawlDaemon::init($argv, "queue_server", 2); } } else { CrawlDaemon::init($argv, "queue_server"); } crawlLog("\n\nInitialize logger..", "queue_server", true); $this->server_name = "IndexerAndScheduler"; if (isset($argv[3]) && $argv[1] == "child" && in_array($argv[3], array(self::INDEXER, self::SCHEDULER))) { $this->server_type = $argv[3]; $this->server_name = $argv[3]; crawlLog($argv[3] . " logging started."); } $remove = false; $old_message_names = array("queue_server_messages.txt", "scheduler_messages.txt", "crawl_status.txt", "schedule_status.txt"); foreach ($old_message_names as $name) { if (file_exists(CRAWL_DIR . "/schedules/{$name}")) { @unlink(CRAWL_DIR . "/schedules/{$name}"); $remove = true; } } if ($remove == true) { crawlLog("Remove old messages..", "queue_server"); } $this->loop(); }
/** * This is the function that should be called to get the fetcher to start * fetching. Calls init to handle the command-line arguments then enters * the fetcher's main loop */ function start() { global $argv; if (isset($argv[2])) { $this->fetcher_num = intval($argv[2]); } else { $this->fetcher_num = 0; $argv[2] = "0"; } CrawlDaemon::init($argv, "fetcher"); crawlLog("\n\nInitialize logger..", $this->fetcher_num . "-fetcher", true); $this->loop(); }
/** * This is the function that should be called to get the * classifier_trainer to start training a logistic regression instance for * a particular classifier. The class label corresponding to the * classifier to be finalized should be passed as the second command-line * argument. */ function start() { global $argv; CrawlDaemon::init($argv, "classifier_trainer"); $label = $argv[2]; crawlLog("Initializing classifier trainer log..", $label . '-classifier_trainer', true); $classifier = Classifier::getClassifier($label); $classifier->prepareToFinalize(); $classifier->finalize(); Classifier::setClassifier($classifier); crawlLog("Training complete.\n"); CrawlDaemon::stop('classifier_trainer', $label); }
/** * This is the function that should be called to get the mirror to start * syncing. Calls init to handle the command line arguments then enters * the syncer's main loop */ function start() { global $argv; CrawlDaemon::init($argv, "mirror"); crawlLog("\n\nInitialize logger..", "mirror", true); $this->loop(); }
/** * This is the function that should be called to get the newsupdater to * start to start updating. Calls init to handle the command-line * arguments then enters news_updaters main loop */ function start() { global $argv; CrawlDaemon::init($argv, "news_updater"); crawlLog("\n\nInitialize logger..", "news_updater", true); $this->sourceModel = new SourceModel(); $this->loop(); }