Exemple #1
0
 /**
  * Prepares and executes requests by given request_paths values
  *
  * @param array $info
  * @param Varien_Http_Adapter_Curl $adapter
  */
 protected function _executeRequests(array $info, Varien_Http_Adapter_Curl $adapter)
 {
     $storeId = $info['store_id'];
     $options = array(CURLOPT_USERAGENT => self::USER_AGENT, CURLOPT_SSL_VERIFYPEER => 0);
     $threads = $this->_getCrawlerThreads($storeId);
     if (!$threads) {
         $threads = 1;
     }
     if (!empty($info['cookie'])) {
         $options[CURLOPT_COOKIE] = $info['cookie'];
     }
     $urls = array();
     $offset = 0;
     Mage::getSingleton('core/session')->setCrawlerOffset($offset);
     while ($rewrites = $this->_getResource()->getRequestPaths($storeId)) {
         foreach ($rewrites as $rewriteRow) {
             $url = $this->_getUrlByRewriteRow($rewriteRow, $info['base_url'], $storeId);
             $urls[] = $url;
             if (count($urls) == $threads) {
                 $adapter->multiRequest($urls, $options);
                 $urls = array();
             }
         }
         $offset += self::BATCH_SIZE;
         Mage::getSingleton('core/session')->setCrawlerOffset($offset);
     }
     if (!empty($urls)) {
         $adapter->multiRequest($urls, $options);
     }
 }
 public function requestUrls($urls, $verbose = true)
 {
     $adapter = new Varien_Http_Adapter_Curl();
     $options = array(CURLOPT_USERAGENT => self::USER_AGENT, CURLOPT_HEADER => true);
     $multiResult = $adapter->multiRequest($urls, $options);
     foreach ($multiResult as $urlId => $content) {
         $urlModel = Mage::getModel('fpc/crawler_url')->load($urlId);
         $this->_removeDublicates($urlModel);
         $matches = array();
         preg_match('/Fpc-Cache-Id: (' . Mirasvit_Fpc_Model_Processor::REQUEST_ID_PREFIX . '[a-z0-9]{32})/', $content, $matches);
         if (count($matches) == 2) {
             $cacheId = $matches[1];
             if ($urlModel->getCacheId() != $cacheId) {
                 $urlModel->setCacheId($cacheId)->save();
             }
             if ($verbose) {
                 echo 'CACHED ' . $urls[$urlId] . PHP_EOL;
             }
         } else {
             if ($verbose) {
                 echo 'REMOVED ' . $urls[$urlId] . PHP_EOL;
             }
             $urlModel->delete();
         }
     }
     return $this;
 }
 public function crawl()
 {
     return $this;
     Mage::register('custom_entry_point', true, true);
     $counter = 0;
     $timeStart = time();
     $storesInfo = $this->getStoresInfo();
     $adapter = new Varien_Http_Adapter_Curl();
     foreach ($storesInfo as $info) {
         $options = array(CURLOPT_USERAGENT => self::USER_AGENT);
         $storeId = $info['store_id'];
         $threads = 1;
         if (!empty($info['cookie'])) {
             $options[CURLOPT_COOKIE] = $info['cookie'];
         }
         $urls = array();
         $urlsCount = 0;
         $totalCount = 0;
         $queries = Mage::getModel('catalogsearch/query')->getCollection()->addFieldToFilter('store_id', $storeId)->setOrder('popularity', 'desc');
         foreach ($queries as $query) {
             $queryText = $query->getQueryText();
             $part = '';
             for ($i = 0; $i < strlen($queryText); $i++) {
                 $part .= $queryText[$i];
                 $url = $info['base_url'] . 'searchautocomplete/ajax/get/?q=' . $part . '&cat=0';
                 $urls[] = $url;
                 $urlsCount++;
                 $totalCount++;
                 $counter++;
                 if ($urlsCount == $threads) {
                     $result = $adapter->multiRequest($urls, $options);
                     $urlsCount = 0;
                     $urls = array();
                 }
             }
             if (time() - $timeStart > 1 * 60 * 60) {
                 return $this;
             }
         }
         if (!empty($urls)) {
             $adapter->multiRequest($urls, $options);
         }
     }
     return $this;
 }
Exemple #4
0
 /**
  * Crawl all system urls
  * @return Enterprise_PageCache_Model_Crawler
  */
 public function crawl()
 {
     $storesInfo = $this->getStoresInfo();
     $adapter = new Varien_Http_Adapter_Curl();
     foreach ($storesInfo as $info) {
         $options = array(CURLOPT_USERAGENT => self::USER_AGENT);
         $storeId = $info['store_id'];
         if (!Mage::app()->getStore($storeId)->getConfig(self::XML_PATH_CRAWLER_ENABLED)) {
             continue;
         }
         $threads = (int) Mage::app()->getStore($storeId)->getConfig(self::XML_PATH_CRAWLER_THREADS);
         if (!$threads) {
             $threads = 1;
         }
         $stmt = $this->_getResource()->getUrlStmt($storeId);
         $baseUrl = $info['base_url'];
         if (!empty($info['cookie'])) {
             $options[CURLOPT_COOKIE] = $info['cookie'];
         }
         $urls = array();
         $urlsCount = 0;
         $totalCount = 0;
         while ($row = $stmt->fetch()) {
             $urls[] = $baseUrl . $row['request_path'];
             $urlsCount++;
             $totalCount++;
             if ($urlsCount == $threads) {
                 $adapter->multiRequest($urls, $options);
                 $urlsCount = 0;
                 $urls = array();
             }
         }
         if (!empty($urls)) {
             $adapter->multiRequest($urls, $options);
         }
     }
     return $this;
 }
Exemple #5
0
 /**
  * Prepares and executes requests by given request_paths values
  *
  * @param array $info
  * @param Varien_Http_Adapter_Curl $adapter
  */
 protected function _executeRequests(array $info, Varien_Http_Adapter_Curl $adapter)
 {
     $storeId = $info['store_id'];
     $options = array(CURLOPT_USERAGENT => self::USER_AGENT);
     $threads = $this->_getCrawlerThreads($storeId);
     if (!$threads) {
         $threads = 1;
     }
     if (!empty($info['cookie'])) {
         $options[CURLOPT_COOKIE] = $info['cookie'];
     }
     $urls = array();
     $urlsCount = $totalCount = 0;
     foreach ($this->_getResource()->getRequestPaths($storeId) as $requestPath) {
         $url = $info['base_url'] . $requestPath;
         $urlHash = md5($url);
         if (isset($this->_visitedUrls[$urlHash])) {
             continue;
         }
         $urls[] = $url;
         $this->_visitedUrls[$urlHash] = true;
         $urlsCount++;
         $totalCount++;
         if ($urlsCount == $threads) {
             $adapter->multiRequest($urls, $options);
             $urlsCount = 0;
             $urls = array();
         }
     }
     if (!empty($urls)) {
         $adapter->multiRequest($urls, $options);
     }
 }
Exemple #6
0
 /**
  * Crawl all system urls
  *
  * @return Enterprise_PageCache_Model_Crawler
  */
 public function crawl()
 {
     if (!Mage::app()->useCache('full_page')) {
         return $this;
     }
     $storesInfo = $this->getStoresInfo();
     $adapter = new Varien_Http_Adapter_Curl();
     foreach ($storesInfo as $info) {
         $options = array(CURLOPT_USERAGENT => self::USER_AGENT);
         $storeId = $info['store_id'];
         $this->_visitedUrls = array();
         if (!Mage::app()->getStore($storeId)->getConfig(self::XML_PATH_CRAWLER_ENABLED)) {
             continue;
         }
         $threads = (int) Mage::app()->getStore($storeId)->getConfig(self::XML_PATH_CRAWLER_THREADS);
         if (!$threads) {
             $threads = 1;
         }
         if (!empty($info['cookie'])) {
             $options[CURLOPT_COOKIE] = $info['cookie'];
         }
         $urls = array();
         $baseUrl = $info['base_url'];
         $urlsCount = $totalCount = 0;
         $urlsPaths = $this->_getResource()->getUrlsPaths($storeId);
         foreach ($urlsPaths as $urlPath) {
             $url = $baseUrl . $urlPath;
             $urlHash = md5($url);
             if (isset($this->_visitedUrls[$urlHash])) {
                 continue;
             }
             $urls[] = $url;
             $this->_visitedUrls[$urlHash] = true;
             $urlsCount++;
             $totalCount++;
             if ($urlsCount == $threads) {
                 $adapter->multiRequest($urls, $options);
                 $urlsCount = 0;
                 $urls = array();
             }
         }
         if (!empty($urls)) {
             $adapter->multiRequest($urls, $options);
         }
     }
     return $this;
 }