private function process_form() { $domain = Request::getPost('domain'); if (strlen($domain) < 1) { return; } $this->set_body('form_success', true); $url = CrawlerURL::instance($domain); $domain = $url->getDomain(); $link = "{$domain}/"; $domainObject = Finder::instance('Domain')->setNameFilter($domain)->getDomain(); if (!$domainObject) { $domainObject = Mutator::instance('Domain', 'Create')->setData($domain)->execute(); } $linkObject = Finder::instance('Link')->setNameFilter($link)->getLink(); if (!$linkObject) { $linkObject = Mutator::instance('Link', 'Create')->setData($link)->execute(); } $crawlSiteQueueObject = Finder::instance('CrawlSiteQueue')->setDomainFilter($domainObject->getID())->setStatusFilter(CrawlSiteQueue::$IS_UNCRAWLED)->getCrawlSiteQueue(); if (!$crawlSiteQueueObject) { Mutator::instance('CrawlSiteQueue', 'Create')->setData($domainObject, CrawlSiteQueue::$IS_UNCRAWLED)->execute(); } $crawlPageQueueObject = Finder::instance('CrawlPageQueue')->setDomainFilter($domainObject->getID())->setLinkFilter($linkObject->getID())->setStatusFilter(CrawlPageQueue::$IS_UNCRAWLED)->getCrawlPageQueue(); if (!$crawlPageQueueObject) { Mutator::instance('CrawlPageQueue', 'Create')->setData($domainObject, $linkObject, CrawlPageQueue::$IS_UNCRAWLED)->execute(); } }
private function get_links() { if (!isset($this->links)) { preg_match_all(self::$MATCH_LINK, $this->data, $match); if (isset($match[1]) && is_array($match[1])) { $links = array(); foreach ($match[1] as $link) { if (stristr($link, 'http://') != $link) { $link = $this->crawlerURL->getDomain() . $link; } $links[] = CrawlerURL::instance($link); } $this->links = $links; } else { $this->links = array(); } } return $this->links; }