/** * Changes the crawl parameters of an existing crawl (can be while crawling) * Not all fields are allowed to be updated * * @param string $timestamp timestamp of the crawl to change * @param array $new_info the new parameters * @param array $machine_urls an array of urls of yioop queue servers */ function setCrawlSeedInfo($timestamp, $new_info, $machine_urls = NULL) { if ($machine_urls != NULL && !$this->isSingleLocalhost($machine_urls, $timestamp)) { $params = array($timestamp, $new_info); $this->execMachines("setCrawlSeedInfo", $machine_urls, serialize($params)); } $dir = CRAWL_DIR . '/cache/' . self::index_data_base_name . $timestamp; if (file_exists($dir)) { $info = IndexArchiveBundle::getArchiveInfo($dir); $index_info = unserialize($info['DESCRIPTION']); if (isset($new_info['general']["restrict_sites_by_url"])) { $index_info[self::RESTRICT_SITES_BY_URL] = $new_info['general']["restrict_sites_by_url"]; } $updatable_site_info = array("allowed_sites" => array(self::ALLOWED_SITES, 'url'), "disallowed_sites" => array(self::DISALLOWED_SITES, 'url'), "seed_sites" => array(self::TO_CRAWL, "url"), "page_rules" => array(self::PAGE_RULES, 'rule'), "indexed_file_types" => array(self::INDEXED_FILE_TYPES, "extensions"), "active_classifiers" => array(self::ACTIVE_CLASSIFIERS, 'label'), "active_rankers" => array(self::ACTIVE_RANKERS, 'label')); foreach ($updatable_site_info as $type => $type_info) { if (isset($new_info[$type][$type_info[1]])) { $index_info[$type_info[0]] = $new_info[$type][$type_info[1]]; } } if (isset($new_info['indexing_plugins']['plugins'])) { $index_info[self::INDEXING_PLUGINS] = $new_info['indexing_plugins']['plugins']; } $info['DESCRIPTION'] = serialize($index_info); IndexArchiveBundle::setArchiveInfo($dir, $info); } }