private function process_form() { $domain = Request::getPost('domain'); if (strlen($domain) < 1) { return; } $this->set_body('form_success', true); $url = CrawlerURL::instance($domain); $domain = $url->getDomain(); $link = "{$domain}/"; $domainObject = Finder::instance('Domain')->setNameFilter($domain)->getDomain(); if (!$domainObject) { $domainObject = Mutator::instance('Domain', 'Create')->setData($domain)->execute(); } $linkObject = Finder::instance('Link')->setNameFilter($link)->getLink(); if (!$linkObject) { $linkObject = Mutator::instance('Link', 'Create')->setData($link)->execute(); } $crawlSiteQueueObject = Finder::instance('CrawlSiteQueue')->setDomainFilter($domainObject->getID())->setStatusFilter(CrawlSiteQueue::$IS_UNCRAWLED)->getCrawlSiteQueue(); if (!$crawlSiteQueueObject) { Mutator::instance('CrawlSiteQueue', 'Create')->setData($domainObject, CrawlSiteQueue::$IS_UNCRAWLED)->execute(); } $crawlPageQueueObject = Finder::instance('CrawlPageQueue')->setDomainFilter($domainObject->getID())->setLinkFilter($linkObject->getID())->setStatusFilter(CrawlPageQueue::$IS_UNCRAWLED)->getCrawlPageQueue(); if (!$crawlPageQueueObject) { Mutator::instance('CrawlPageQueue', 'Create')->setData($domainObject, $linkObject, CrawlPageQueue::$IS_UNCRAWLED)->execute(); } }
public function save() { $domain = Finder::instance('Domain')->setNameFilter($this->crawlerURL->getDomain())->getDomain(); if (!isset($domain)) { $domain = Mutator::instance('Domain', 'Create')->setData($this->crawlerURL->getDomain())->execute(); } $link = Finder::instance('Link')->setNameFilter($this->crawlerURL->getAbsoluteURL())->getLink(); if (!isset($link)) { $link = Mutator::instance('Link', 'Create')->setData($this->crawlerURL->getAbsoluteURL())->execute(); } $metaTitle = Finder::instance('MetaTitle')->setNameFilter($this->getMetaTitle())->getMetaTitle(); if (!isset($metaTitle)) { $metaTitle = Mutator::instance('MetaTitle', 'Create')->setData($this->getMetaTitle())->execute(); } $metaDescription = Finder::instance('MetaDescription')->setNameFilter($this->getMetaDescription())->getMetaDescription(); if (!isset($metaDescription)) { $metaDescription = Mutator::instance('MetaDescription', 'Create')->setData($this->getMetaDescription())->execute(); } $contentPage = Finder::instance('ContentPage')->setNameFilter($this->getContentPage())->getContentPage(); if (!isset($contentPage)) { $contentPage = Mutator::instance('ContentPage', 'Create')->setData($this->getContentPage())->execute(); } $page = Finder::instance('Page')->setDomainFilter($domain->getID())->setLinkFilter($link->getID())->setMetaTitleFilter($metaTitle->getID())->setMetaDescriptionFilter($metaDescription->getID())->setMetaRedirectFilter($this->hasMetaRedirect())->setContentPageFilter($contentPage->getID())->setHTTPCodeFilter($this->getHTTPCode())->getPage(); if (!isset($page)) { $page = Mutator::instance('Page', 'Create')->setData($domain, $link, $metaTitle, $metaDescription, $this->hasMetaRedirect(), $contentPage, $this->getHTTPCode(), $this->getDateAccessed())->execute(); } $this->page = $page; foreach ($this->getContentH1s() as $contentH1) { $contentH1Object = Finder::instance('ContentH1')->setNameFilter($contentH1)->getContentH1(); if (!isset($contentH1Object)) { $contentH1Object = Mutator::instance('ContentH1', 'Create')->setData($contentH1)->execute(); } $pageContentH1Map = Mutator::instance('PageContentH1Map', 'Create')->setData($page, $contentH1Object)->execute(); } foreach ($this->getContentImages() as $image) { $contentImageSource = Finder::instance('ContentImageSource')->setNameFilter($image->source)->getContentImageSource(); if (!isset($contentImageSource)) { $contentImageSource = Mutator::instance('ContentImageSource', 'Create')->setData($image->source)->execute(); } $pageContentImageSourceMap = Finder::instance('PageContentImageSourceMap')->setPageFilter($page->getID())->setContentImageSourceFilter($contentImageSource->getID())->getPageContentImageSourceMap(); if (!$pageContentImageSourceMap) { $pageContentImageSourceMap = Mutator::instance('PageContentImageSourceMap', 'Create')->setData($page, $contentImageSource)->execute(); } if (strlen($image->alternate_text) > 0) { $contentImageAlternateText = Finder::instance('ContentImageAlternateText')->setNameFilter($image->alternate_text)->getContentImageAlternateText(); if (!isset($contentImageAlternateText)) { $contentImageAlternateText = Mutator::instance('ContentImageAlternateText', 'Create')->setData($image->alternate_text)->execute(); } $contentImageSourceContentImageAlternateTextMap = Finder::instance('ContentImageSourceContentImageAlternateTextMap')->setContentImageSourceFilter($contentImageSource->getID())->setContentImageAlternateTextFilter($contentImageAlternateText->getID())->getContentImageSourceContentImageAlternateTextMap(); if (!isset($contentImageSourceContentImageAlternateTextMap)) { $contentImageSourceContentImageAlternateTextMap = Mutator::instance('ContentImageSourceContentImageAlternateTextMap', 'Create')->setData($contentImageSource, $contentImageAlternateText)->execute(); } } } foreach ($this->get_links() as $crawlerLink) { if (!$crawlerLink->isContentLink()) { continue; } $link = Finder::instance('Link')->setNameFilter($crawlerLink->getAbsoluteURL())->getLink(); if (!isset($link)) { $link = Mutator::instance('Link', 'Create')->setData($crawlerLink->getAbsoluteURL())->execute(); } $host = $domain->getName(); $host = str_replace('http://', '', $host); $pageLinkMap = Finder::instance('PageLinkMap')->setPageFilter($page->getID())->setLinkFilter($link->getID())->setInternalLinkFilter($crawlerLink->isInternalLink($host))->getPageLinkMap(); if (!$pageLinkMap) { $pageLinkMap = Mutator::instance('PageLinkMap', 'Create')->setData($page, $link, $crawlerLink->isInternalLink($host))->execute(); } } foreach ($this->getMetaKeywords() as $metaKeyword) { $keyword = Finder::instance('MetaKeyword')->setNameFilter($metaKeyword)->getMetaKeyword(); if (!isset($keyword)) { $keyword = Mutator::instance('MetaKeyword', 'Create')->setData($metaKeyword)->execute(); } $pageKeywordMap = Finder::instance('PageMetaKeywordMap')->setPageFilter($page->getID())->setMetaKeywordFilter($keyword->getID())->getPageMetaKeywordMap(); if (!$pageKeywordMap) { $pageKeywordMap = Mutator::instance('PageMetaKeywordMap', 'Create')->setData($page, $keyword)->execute(); } } }