/** * Run Crawler * @todo Cleanup code and code review * * @param Maverick_Crawler_Model_Crawler $crawler * @param $mode * @return array */ public function run(Maverick_Crawler_Model_Crawler $crawler, $mode = Maverick_Crawler_Model_Crawler::MODE_MANUAL) { $errors = array(); $urls = $this->getUrls(); $logEnabled = Mage::getStoreConfig('crawler/general/log_url'); $helper = Mage::helper('maverick_crawler'); $maxTime = Mage::getStoreConfig('crawler/general/max_time'); $startTime = Mage::getStoreConfigFlag('crawler/general/max_time') ? time() : false; // Log Start if ($logEnabled) { $helper->log($helper->__('###### Starting Crawler ID %s ######', $crawler->getId())); } foreach ($urls as $url) { $maxTimeExceeded = $startTime && $maxTime < time() - $startTime; if ($maxTimeExceeded) { $errMessage = Mage::helper('maverick_crawler')->__('--> ### Stoping Crawler, Maximum Time Of Crawling Exceeded (%s secondes)', $maxTime); if ($logEnabled) { Mage::helper('maverick_crawler')->log($errMessage); } $errors[] = $errMessage; break; } if ($logEnabled) { $helper->log($helper->__('--> Warming Up %s (%s time(s))', $url, $this->_nbrOfVisits)); } $crawlerObj = $this->_crawlerHelper->visit($url, $this->_nbrOfVisits); if (!is_object($crawlerObj)) { $errors[] = Mage::helper('maverick_crawler')->__('Some errors encountered while crawling, check your log file'); continue; } if ($crawler->getScan() == '1') { $pageLinks = $this->_crawlerHelper->getPageLinks($crawlerObj); if ($logEnabled) { $helper->log($helper->__('--> Scan Option is enabled, scanning %s', $url)); $helper->log($helper->__('--> Scan Option found %s urls', count($pageLinks))); $helper->log($helper->__('--> Crawling Them ...')); } foreach ($pageLinks as $link) { $maxTimeExceeded = $startTime && $maxTime < time() - $startTime; if ($maxTimeExceeded) { $errMessage = Mage::helper('maverick_crawler')->__('--> ### Stoping Scanning, Maximum Time Of Crawling Exceeded (%s secondes)', $maxTime); if ($logEnabled) { Mage::helper('maverick_crawler')->log($errMessage); } $errors[] = $errMessage; break; } $start = time(); $this->_crawlerHelper->visit($link, $this->_nbrOfVisits); $end = time() - $start; if ($logEnabled) { $helper->log($helper->__(' --> Warming Up %s (%s time(s), Took %s secondes)', $link, $this->_nbrOfVisits, $end)); } } } } if ($logEnabled) { $helper->log($helper->__('###### End Process Crawler ID %s ######', $crawler->getId())); } $crawler->setLastExecutionMode($mode)->setLastExecutionAt(Mage::getModel('core/date')->date('Y-m-d H:i:s'))->save(); return $errors; }
/** * Save crawler cms page relations * * @param Maverick_Crawler_Model_Crawler $crawler * @return $this */ public function savePages($crawler) { $pageIds = $crawler->getPageIds(); $oldPageIds = $this->getPageIds($crawler); $crawler->setIsChangedPages(false); $insert = array_diff($pageIds, $oldPageIds); $delete = array_diff($oldPageIds, $pageIds); $write = $this->_getWriteAdapter(); if (!empty($insert)) { $data = array(); foreach ($insert as $pageId) { if (empty($pageId)) { continue; } $data[] = array('crawler_id' => (int) $crawler->getId(), 'page_id' => (int) $pageId); } if ($data) { $write->insertMultiple($this->_crawlerCmsTable, $data); } } if (!empty($delete)) { foreach ($delete as $pageId) { $where = array('crawler_id = ?' => (int) $crawler->getId(), 'page_id = ?' => (int) $pageId); $write->delete($this->_crawlerCmsTable, $where); } } if (!empty($insert) || !empty($delete)) { $crawler->setAffectedPageIds(array_merge($insert, $delete)); $crawler->setIsChangedPages(true); } return $this; }