Пример #1
0
 /**
  * @inheritdoc
  */
 protected function collectObjects($url)
 {
     if (!array_key_exists($url, $this->collectedCount)) {
         $this->collectedCount[$url] = 0;
     }
     $host = 'http://' . parse_url($url, PHP_URL_HOST);
     $dom = new Dom();
     try {
         $dom->loadFromUrl($url, [], GlabsController::$curl);
     } catch (CurlException $e) {
         if (false === strpos($e->getMessage(), 'timed out')) {
             throw new CurlException($e->getMessage());
         }
         GlabsController::showMessage(' ...trying again', false);
         return $this->collectObjects($url);
     }
     if (false !== strpos($dom, 'This IP has been automatically blocked.')) {
         throw new CurlException('IP has been blocked.');
     }
     // end collect. no results
     if ($dom->find('#moon')[0]) {
         return true;
     }
     $this->checkTotalObjects($dom);
     /* @var \PHPHtmlParser\Dom\AbstractNode $span */
     foreach ($dom->find('.txt') as $span) {
         if ($this->isEnoughCollect()) {
             break;
         }
         /* @var \PHPHtmlParser\Dom\AbstractNode $link */
         if ($link = $span->find('a')[0]) {
             $href = $this->checkObjectLink($host, $link->getAttribute('href'));
             if (false === $href) {
                 continue;
             }
             $title = $link->text() ?: strip_tags($link->innerHtml());
             try {
                 $object = $this->getObjectModel($url, $href, $title, $this->categoryId, $this->type);
                 $object->setPrice($span);
             } catch (ObjectException $e) {
                 continue;
             }
             $this->collected[] = $href;
             $this->objects[] = $object;
             $this->collectedCount[$url]++;
             BaseSite::$doneObjects++;
             BaseSite::progress();
         }
     }
     if (!$this->isEnoughCollect()) {
         $curl = GlabsController::$curl;
         $curl::$referer = $url;
         $url = str_replace([self::$pageParam . self::$page, '#list'], '', $url);
         self::$page += 100;
         return $this->collectObjects($this->getPagedUrl($url));
     }
     return true;
 }
Пример #2
0
 /**
  * @inheritdoc
  */
 protected function collectObjects($url)
 {
     if (!array_key_exists($url, $this->collectedCount)) {
         $this->collectedCount[$url] = 0;
     }
     $dom = new Dom();
     try {
         $dom->loadFromUrl($url, [], GlabsController::$curl);
     } catch (CurlException $e) {
         if (false === strpos($e->getMessage(), 'timed out')) {
             throw new CurlException($e->getMessage());
         }
         if (false === strpos($e->getMessage(), '525')) {
             throw new CurlException($e->getMessage());
         }
         GlabsController::showMessage(' ...trying again', false);
         return $this->collectObjects($url);
     }
     // end collect. no results
     if (false !== strpos($dom, 'No matches found.')) {
         return true;
     }
     $this->checkTotalObjects($dom);
     /* @var \PHPHtmlParser\Dom\AbstractNode $span */
     foreach ($dom->find('.summaryHeader') as $span) {
         if ($this->isEnoughCollect()) {
             break;
         }
         /* @var \PHPHtmlParser\Dom\AbstractNode $link */
         if ($link = $span->find('a', 0)) {
             $href = $link->getAttribute('href');
             if (in_array($href, $this->collected, true)) {
                 continue;
             }
             $object = new Object($url, $href, $link->text(), $this->categoryId, $this->type);
             try {
                 $object->setPrice();
             } catch (ObjectException $e) {
                 continue;
             }
             $this->collected[] = $href;
             $this->objects[] = $object;
             $this->collectedCount[$url]++;
             BaseSite::$doneObjects++;
             BaseSite::progress();
         }
     }
     if (!$this->isEnoughCollect()) {
         $curl = GlabsController::$curl;
         $curl::$referer = $url;
         $url = str_replace(self::$pageParam . self::$page, '', $url);
         self::$page += self::$page ? 1 : 2;
         return $this->collectObjects($this->getPagedUrl($url));
     }
     return true;
 }
Пример #3
0
 /**
  * @inheritdoc
  */
 protected function collectObjects($url)
 {
     if (!array_key_exists($url, $this->collectedCount)) {
         $this->collectedCount[$url] = 0;
     }
     $dom = new Dom();
     try {
         $dom->loadFromUrl($url, [], GlabsController::$curl);
     } catch (CurlException $e) {
         if (false === strpos($e->getMessage(), 'timed out')) {
             throw new CurlException($e->getMessage());
         }
         if (false === strpos($e->getMessage(), '525')) {
             throw new CurlException($e->getMessage());
         }
         GlabsController::showMessage(' ...trying again', false);
         return $this->collectObjects($url);
     }
     // end collect. no results
     if (false !== strpos($dom, 'No matches found') || false !== strpos($dom, 'Keine Entsprechungen gefunden') || false !== strpos($dom, 'No hay resultados') || false !== strpos($dom, 'Nessuna corrispondenza trovata') || false !== strpos($dom, 'Aucune correspondance n’a été trouvée') || false !== strpos($dom, 'Nenhuma correspondência encontrada') || false !== strpos($dom, 'Совпадений нет') || false !== strpos($dom, 'Ingen match fundet') || false !== strpos($dom, 'Nebyly nalezeny žádné shody') || false !== strpos($dom, 'Ingen match funnet') || false !== strpos($dom, 'Nie znaleziono') || false !== strpos($dom, 'Eşleşme bulunamadı') || false !== strpos($dom, 'Eredmény nem található') || false !== strpos($dom, 'Δεν βρέθηκαν εγγραφές') || false !== strpos($dom, 'Aucune correspondance n’a été trouvée')) {
         return true;
     }
     $this->checkTotalObjects($dom);
     /* @var \PHPHtmlParser\Dom\AbstractNode $span */
     foreach ($dom->find('.cat') as $span) {
         if ($this->isEnoughCollect()) {
             break;
         }
         /* @var \PHPHtmlParser\Dom\AbstractNode $link */
         if ($link = $span->find('a', 0)) {
             $href = $link->getAttribute('href');
             if (in_array($href, $this->collected, true)) {
                 continue;
             }
             try {
                 $object = new Object($url, $href, $link->text(), $this->categoryId, $this->type);
             } catch (ObjectException $e) {
                 continue;
             }
             $this->collected[] = $href;
             $this->objects[] = $object;
             $this->collectedCount[$url]++;
             BaseSite::$doneObjects++;
             BaseSite::progress();
         }
     }
     if (!$this->isEnoughCollect()) {
         $curl = GlabsController::$curl;
         $curl::$referer = $url;
         $url = str_replace(self::$pageParam . self::$page, '', $url);
         self::$page += self::$page ? 1 : 2;
         return $this->collectObjects($this->getPagedUrl($url));
     }
     return true;
 }
Пример #4
0
 /**
  * @inheritdoc
  */
 public function __construct(array $categories, $count, $url = '')
 {
     $this->url = $url ?: self::URL;
     $this->categoriesList = self::CATEGORIES;
     parent::__construct($categories, $count);
 }