/** * Handles admin requests for creating, editing, and deleting classifiers. * * This activity implements the logic for the page that lists existing * classifiers, including the actions that can be performed on them. */ function manageClassifiers() { $parent = $this->parent; $crawl_model = $parent->model("crawl"); $possible_arguments = array('createclassifier', 'editclassifier', 'finalizeclassifier', 'deleteclassifier', 'search'); $data['ELEMENT'] = 'manageclassifiers'; $data['SCRIPT'] = ''; $data['FORM_TYPE'] = ''; $search_array = array(); $machine_urls = $parent->model("machine")->getQueueServerUrls(); $num_machines = count($machine_urls); if ($num_machines < 1 || $num_machines == 1 && UrlParser::isLocalhostUrl($machine_urls[0])) { $machine_urls = NULL; } $data['leftorright'] = getLocaleDirection() == 'ltr' ? 'right' : 'left'; $classifiers = Classifier::getClassifierList(); $start_finalizing = false; if (isset($_REQUEST['arg']) && in_array($_REQUEST['arg'], $possible_arguments)) { if (isset($_REQUEST['name'])) { $name = substr($parent->clean($_REQUEST['name'], 'string'), 0, NAME_LEN); $name = Classifier::cleanLabel($name); } else { if (isset($_REQUEST['class_label'])) { $name = substr($parent->clean($_REQUEST['class_label'], 'string'), 0, NAME_LEN); $name = Classifier::cleanLabel($name); } else { $name = ""; } } switch ($_REQUEST['arg']) { case 'createclassifier': if (!isset($classifiers[$name])) { $classifier = new Classifier($name); Classifier::setClassifier($classifier); $classifiers[$name] = $classifier; $data['SCRIPT'] .= "doMessage('<h1 class=\"red\">" . tl('crawl_component_new_classifier') . '</h1>\');'; } else { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\">" . tl('crawl_component_classifier_exists') . '</h1>\');'; } break; case 'deleteclassifier': /* In addition to deleting the classifier, we also want to delete the associated crawl mix (if one exists) used to iterate over existing indexes in search of new training examples. */ if (isset($classifiers[$name])) { unset($classifiers[$name]); Classifier::deleteClassifier($name); $mix_name = Classifier::getCrawlMixName($name); $mix_time = $crawl_model->getCrawlMixTimestamp($mix_name); if ($mix_time) { $crawl_model->deleteCrawlMixIteratorState($mix_time); $crawl_model->deleteCrawlMix($mix_time); } $data['SCRIPT'] .= "doMessage('<h1 class=\"red\">" . tl('crawl_component_classifier_deleted') . '</h1>\');'; } else { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\">" . tl('crawl_component_no_classifier') . '</h1>\');'; } break; case 'editclassifier': if (isset($classifiers[$name])) { $data['class_label'] = $name; $this->editClassifier($data, $classifiers, $machine_urls); } else { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\">" . tl('crawl_component_no_classifier') . '</h1>\');'; } break; case 'finalizeclassifier': /* Finalizing is too expensive to be done directly in the controller that responds to the web request. Instead, a daemon is launched to finalize the classifier asynchronously and save it back to disk when it's done. In the meantime, a flag is set to indicate the current finalizing state. */ CrawlDaemon::start("classifier_trainer", $name, '', -1); $classifier = $classifiers[$name]; $classifier->finalized = Classifier::FINALIZING; $start_finalizing = true; $data['SCRIPT'] .= "doMessage('<h1 class=\"red\">" . tl('crawl_component_finalizing_classifier') . '</h1>\');'; break; case 'search': $search_array = $parent->tableSearchRequestHandler($data, array('name')); break; } } $data['classifiers'] = $classifiers; if ($search_array == array()) { $search_array[] = array("name", "", "", "ASC"); } $parent->pagingLogic($data, 'classifiers', 'classifiers', DEFAULT_ADMIN_PAGING_NUM, $search_array, "", array('name' => 'class_label')); $data['reload'] = false; foreach ($classifiers as $label => $classifier) { if ($classifier->finalized == Classifier::FINALIZING) { $data['reload'] = true; break; } } if ($data['reload'] && !$start_finalizing) { $data['SCRIPT'] .= "doMessage('<h1 class=\"red\">" . tl('crawl_component_finalizing_classifier') . '</h1>\');'; } return $data; }
/** * Parses the command-line options, returns the required arguments, and * updates the member variable $options with any parameters. If any of the * required arguments (activity, dataset, or label) are missing, then a * message is printed and the program exits. The optional arguments used to * set parameters directly modify the class state through the setOptions * method. * * @return array the parsed activity, dataset, and label */ function parseOptions() { $shortopts = 'l:a:d:S:I:F:B:'; $options = getopt($shortopts); if (!isset($options['a'])) { echo "missing -a flag to choose activity to run\n"; exit(1); } if (!isset($options['l'])) { echo "missing -l flag to set classifier label\n"; exit(1); } if (!isset($options['d'])) { echo "missing -d flag to choose dataset to use\n"; exit(1); } $activity = $options['a']; $label = Classifier::cleanLabel($options['l']); $dataset_name = $options['d']; unset($options['a'], $options['l'], $options['d']); foreach ($options as $opt_name => $value) { switch ($opt_name) { case 'S': $this->setOptions($value); break; case 'I': $this->setOptions($value, 'intval'); break; case 'F': $this->setOptions($value, 'floatval'); break; case 'B': $this->setOptions($value, 'boolval'); break; default: echo "unsupported option: {$opt_name}\n"; break; } } return array($activity, $dataset_name, $label); }