/** * @inheritdoc */ protected function collectObjects($url) { if (!array_key_exists($url, $this->collectedCount)) { $this->collectedCount[$url] = 0; } $host = 'http://' . parse_url($url, PHP_URL_HOST); $dom = new Dom(); try { $dom->loadFromUrl($url, [], GlabsController::$curl); } catch (CurlException $e) { if (false === strpos($e->getMessage(), 'timed out')) { throw new CurlException($e->getMessage()); } GlabsController::showMessage(' ...trying again', false); return $this->collectObjects($url); } if (false !== strpos($dom, 'This IP has been automatically blocked.')) { throw new CurlException('IP has been blocked.'); } // end collect. no results if ($dom->find('#moon')[0]) { return true; } $this->checkTotalObjects($dom); /* @var \PHPHtmlParser\Dom\AbstractNode $span */ foreach ($dom->find('.txt') as $span) { if ($this->isEnoughCollect()) { break; } /* @var \PHPHtmlParser\Dom\AbstractNode $link */ if ($link = $span->find('a')[0]) { $href = $this->checkObjectLink($host, $link->getAttribute('href')); if (false === $href) { continue; } $title = $link->text() ?: strip_tags($link->innerHtml()); try { $object = $this->getObjectModel($url, $href, $title, $this->categoryId, $this->type); $object->setPrice($span); } catch (ObjectException $e) { continue; } $this->collected[] = $href; $this->objects[] = $object; $this->collectedCount[$url]++; BaseSite::$doneObjects++; BaseSite::progress(); } } if (!$this->isEnoughCollect()) { $curl = GlabsController::$curl; $curl::$referer = $url; $url = str_replace([self::$pageParam . self::$page, '#list'], '', $url); self::$page += 100; return $this->collectObjects($this->getPagedUrl($url)); } return true; }
/** * @inheritdoc */ protected function collectObjects($url) { if (!array_key_exists($url, $this->collectedCount)) { $this->collectedCount[$url] = 0; } $dom = new Dom(); try { $dom->loadFromUrl($url, [], GlabsController::$curl); } catch (CurlException $e) { if (false === strpos($e->getMessage(), 'timed out')) { throw new CurlException($e->getMessage()); } if (false === strpos($e->getMessage(), '525')) { throw new CurlException($e->getMessage()); } GlabsController::showMessage(' ...trying again', false); return $this->collectObjects($url); } // end collect. no results if (false !== strpos($dom, 'No matches found.')) { return true; } $this->checkTotalObjects($dom); /* @var \PHPHtmlParser\Dom\AbstractNode $span */ foreach ($dom->find('.summaryHeader') as $span) { if ($this->isEnoughCollect()) { break; } /* @var \PHPHtmlParser\Dom\AbstractNode $link */ if ($link = $span->find('a', 0)) { $href = $link->getAttribute('href'); if (in_array($href, $this->collected, true)) { continue; } $object = new Object($url, $href, $link->text(), $this->categoryId, $this->type); try { $object->setPrice(); } catch (ObjectException $e) { continue; } $this->collected[] = $href; $this->objects[] = $object; $this->collectedCount[$url]++; BaseSite::$doneObjects++; BaseSite::progress(); } } if (!$this->isEnoughCollect()) { $curl = GlabsController::$curl; $curl::$referer = $url; $url = str_replace(self::$pageParam . self::$page, '', $url); self::$page += self::$page ? 1 : 2; return $this->collectObjects($this->getPagedUrl($url)); } return true; }
/** * @inheritdoc */ protected function collectObjects($url) { if (!array_key_exists($url, $this->collectedCount)) { $this->collectedCount[$url] = 0; } $dom = new Dom(); try { $dom->loadFromUrl($url, [], GlabsController::$curl); } catch (CurlException $e) { if (false === strpos($e->getMessage(), 'timed out')) { throw new CurlException($e->getMessage()); } if (false === strpos($e->getMessage(), '525')) { throw new CurlException($e->getMessage()); } GlabsController::showMessage(' ...trying again', false); return $this->collectObjects($url); } // end collect. no results if (false !== strpos($dom, 'No matches found') || false !== strpos($dom, 'Keine Entsprechungen gefunden') || false !== strpos($dom, 'No hay resultados') || false !== strpos($dom, 'Nessuna corrispondenza trovata') || false !== strpos($dom, 'Aucune correspondance n’a été trouvée') || false !== strpos($dom, 'Nenhuma correspondência encontrada') || false !== strpos($dom, 'Совпадений нет') || false !== strpos($dom, 'Ingen match fundet') || false !== strpos($dom, 'Nebyly nalezeny žádné shody') || false !== strpos($dom, 'Ingen match funnet') || false !== strpos($dom, 'Nie znaleziono') || false !== strpos($dom, 'Eşleşme bulunamadı') || false !== strpos($dom, 'Eredmény nem található') || false !== strpos($dom, 'Δεν βρέθηκαν εγγραφές') || false !== strpos($dom, 'Aucune correspondance n’a été trouvée')) { return true; } $this->checkTotalObjects($dom); /* @var \PHPHtmlParser\Dom\AbstractNode $span */ foreach ($dom->find('.cat') as $span) { if ($this->isEnoughCollect()) { break; } /* @var \PHPHtmlParser\Dom\AbstractNode $link */ if ($link = $span->find('a', 0)) { $href = $link->getAttribute('href'); if (in_array($href, $this->collected, true)) { continue; } try { $object = new Object($url, $href, $link->text(), $this->categoryId, $this->type); } catch (ObjectException $e) { continue; } $this->collected[] = $href; $this->objects[] = $object; $this->collectedCount[$url]++; BaseSite::$doneObjects++; BaseSite::progress(); } } if (!$this->isEnoughCollect()) { $curl = GlabsController::$curl; $curl::$referer = $url; $url = str_replace(self::$pageParam . self::$page, '', $url); self::$page += self::$page ? 1 : 2; return $this->collectObjects($this->getPagedUrl($url)); } return true; }
/** * @inheritdoc */ public function __construct(array $categories, $count, $url = '') { $this->url = $url ?: self::URL; $this->categoriesList = self::CATEGORIES; parent::__construct($categories, $count); }