示例#1
0
 /**
  * @inheritdoc
  */
 protected function loadDom()
 {
     if (!$this->reply_url) {
         preg_match('/(\\w+)\\/(\\d+)\\.html/', $this->url, $matches);
         if (!isset($matches[1])) {
             throw new ObjectException('Can\'t match reply URL.');
         }
         $this->object_id = (int) $matches[2];
         $this->reply_url = 'http://' . parse_url($this->url, PHP_URL_HOST) . '/reply/lax/' . $matches[1] . '/' . $this->object_id;
     }
     $curl = GlabsController::$curl;
     $curl::$referer = $this->url;
     try {
         self::$dom->loadFromUrl($this->reply_url, [], $curl);
     } catch (CurlException $e) {
         if (false !== strpos($e->getMessage(), 'timed out')) {
             GlabsController::showMessage(' ...trying again', false);
             return $this->loadDom();
         }
         throw new ObjectException($e->getMessage());
     } catch (EmptyCollectionException $e) {
         throw new ObjectException($e->getMessage());
     }
     return true;
 }
示例#2
0
 /**
  * @inheritdoc
  */
 protected function collectObjects($url)
 {
     if (!array_key_exists($url, $this->collectedCount)) {
         $this->collectedCount[$url] = 0;
     }
     $host = 'http://' . parse_url($url, PHP_URL_HOST);
     $dom = new Dom();
     try {
         $dom->loadFromUrl($url, [], GlabsController::$curl);
     } catch (CurlException $e) {
         if (false === strpos($e->getMessage(), 'timed out')) {
             throw new CurlException($e->getMessage());
         }
         GlabsController::showMessage(' ...trying again', false);
         return $this->collectObjects($url);
     }
     if (false !== strpos($dom, 'This IP has been automatically blocked.')) {
         throw new CurlException('IP has been blocked.');
     }
     // end collect. no results
     if ($dom->find('#moon')[0]) {
         return true;
     }
     $this->checkTotalObjects($dom);
     /* @var \PHPHtmlParser\Dom\AbstractNode $span */
     foreach ($dom->find('.txt') as $span) {
         if ($this->isEnoughCollect()) {
             break;
         }
         /* @var \PHPHtmlParser\Dom\AbstractNode $link */
         if ($link = $span->find('a')[0]) {
             $href = $this->checkObjectLink($host, $link->getAttribute('href'));
             if (false === $href) {
                 continue;
             }
             $title = $link->text() ?: strip_tags($link->innerHtml());
             try {
                 $object = $this->getObjectModel($url, $href, $title, $this->categoryId, $this->type);
                 $object->setPrice($span);
             } catch (ObjectException $e) {
                 continue;
             }
             $this->collected[] = $href;
             $this->objects[] = $object;
             $this->collectedCount[$url]++;
             BaseSite::$doneObjects++;
             BaseSite::progress();
         }
     }
     if (!$this->isEnoughCollect()) {
         $curl = GlabsController::$curl;
         $curl::$referer = $url;
         $url = str_replace([self::$pageParam . self::$page, '#list'], '', $url);
         self::$page += 100;
         return $this->collectObjects($this->getPagedUrl($url));
     }
     return true;
 }
示例#3
0
 /**
  * @inheritdoc
  */
 protected function collectObjects($url)
 {
     if (!array_key_exists($url, $this->collectedCount)) {
         $this->collectedCount[$url] = 0;
     }
     $dom = new Dom();
     try {
         $dom->loadFromUrl($url, [], GlabsController::$curl);
     } catch (CurlException $e) {
         if (false === strpos($e->getMessage(), 'timed out')) {
             throw new CurlException($e->getMessage());
         }
         if (false === strpos($e->getMessage(), '525')) {
             throw new CurlException($e->getMessage());
         }
         GlabsController::showMessage(' ...trying again', false);
         return $this->collectObjects($url);
     }
     // end collect. no results
     if (false !== strpos($dom, 'No matches found.')) {
         return true;
     }
     $this->checkTotalObjects($dom);
     /* @var \PHPHtmlParser\Dom\AbstractNode $span */
     foreach ($dom->find('.summaryHeader') as $span) {
         if ($this->isEnoughCollect()) {
             break;
         }
         /* @var \PHPHtmlParser\Dom\AbstractNode $link */
         if ($link = $span->find('a', 0)) {
             $href = $link->getAttribute('href');
             if (in_array($href, $this->collected, true)) {
                 continue;
             }
             $object = new Object($url, $href, $link->text(), $this->categoryId, $this->type);
             try {
                 $object->setPrice();
             } catch (ObjectException $e) {
                 continue;
             }
             $this->collected[] = $href;
             $this->objects[] = $object;
             $this->collectedCount[$url]++;
             BaseSite::$doneObjects++;
             BaseSite::progress();
         }
     }
     if (!$this->isEnoughCollect()) {
         $curl = GlabsController::$curl;
         $curl::$referer = $url;
         $url = str_replace(self::$pageParam . self::$page, '', $url);
         self::$page += self::$page ? 1 : 2;
         return $this->collectObjects($this->getPagedUrl($url));
     }
     return true;
 }
示例#4
0
 /**
  * @inheritdoc
  */
 protected function collectObjects($url)
 {
     if (!array_key_exists($url, $this->collectedCount)) {
         $this->collectedCount[$url] = 0;
     }
     $dom = new Dom();
     try {
         $dom->loadFromUrl($url, [], GlabsController::$curl);
     } catch (CurlException $e) {
         if (false === strpos($e->getMessage(), 'timed out')) {
             throw new CurlException($e->getMessage());
         }
         if (false === strpos($e->getMessage(), '525')) {
             throw new CurlException($e->getMessage());
         }
         GlabsController::showMessage(' ...trying again', false);
         return $this->collectObjects($url);
     }
     // end collect. no results
     if (false !== strpos($dom, 'No matches found') || false !== strpos($dom, 'Keine Entsprechungen gefunden') || false !== strpos($dom, 'No hay resultados') || false !== strpos($dom, 'Nessuna corrispondenza trovata') || false !== strpos($dom, 'Aucune correspondance n’a été trouvée') || false !== strpos($dom, 'Nenhuma correspondência encontrada') || false !== strpos($dom, 'Совпадений нет') || false !== strpos($dom, 'Ingen match fundet') || false !== strpos($dom, 'Nebyly nalezeny žádné shody') || false !== strpos($dom, 'Ingen match funnet') || false !== strpos($dom, 'Nie znaleziono') || false !== strpos($dom, 'Eşleşme bulunamadı') || false !== strpos($dom, 'Eredmény nem található') || false !== strpos($dom, 'Δεν βρέθηκαν εγγραφές') || false !== strpos($dom, 'Aucune correspondance n’a été trouvée')) {
         return true;
     }
     $this->checkTotalObjects($dom);
     /* @var \PHPHtmlParser\Dom\AbstractNode $span */
     foreach ($dom->find('.cat') as $span) {
         if ($this->isEnoughCollect()) {
             break;
         }
         /* @var \PHPHtmlParser\Dom\AbstractNode $link */
         if ($link = $span->find('a', 0)) {
             $href = $link->getAttribute('href');
             if (in_array($href, $this->collected, true)) {
                 continue;
             }
             try {
                 $object = new Object($url, $href, $link->text(), $this->categoryId, $this->type);
             } catch (ObjectException $e) {
                 continue;
             }
             $this->collected[] = $href;
             $this->objects[] = $object;
             $this->collectedCount[$url]++;
             BaseSite::$doneObjects++;
             BaseSite::progress();
         }
     }
     if (!$this->isEnoughCollect()) {
         $curl = GlabsController::$curl;
         $curl::$referer = $url;
         $url = str_replace(self::$pageParam . self::$page, '', $url);
         self::$page += self::$page ? 1 : 2;
         return $this->collectObjects($this->getPagedUrl($url));
     }
     return true;
 }
示例#5
0
 /**
  * Parse category page.
  *
  * @throws CurlException
  * @throws InvalidParamException
  * @throws ObjectException
  */
 public function parse()
 {
     GlabsController::showMessage("\n" . 'Parsing category "' . $this->title . '"');
     /** @var \app\models\glabs\objects\massmail\Craigslist $object */
     foreach ($this->objects as $object) {
         if (in_array($object->getUrl(), $this->doneObjects, true)) {
             continue;
         }
         $this->i++;
         GlabsController::showMessage("\t" . $this->i . ') Parsing object "' . $object->getTitle() . '" (' . $object->getUrl() . ')');
         try {
             $object->parse();
             $this->doneObjects[] = $object->getUrl();
         } catch (ObjectException $e) {
             GlabsController::showMessage("\t\t" . 'Object skipped because of reason: ' . $e->getMessage());
             continue;
         } catch (EmptyCollectionException $e) {
             GlabsController::showMessage("\t\t" . 'Object skipped because of reason: ' . $e->getMessage());
             continue;
         }
         GlabsController::showMessage("\t\t" . 'Sending object... ', false);
         try {
             //$object->send();
             GlabsController::$sentObjects++;
             GlabsController::showMessage('Success.');
         } catch (TransportException $e) {
             $object->removeFiles();
             GlabsController::showMessage('Fail with message: "' . $e->getMessage() . '"');
         }
         /* @var \app\models\glabs\objects\massmail\Craigslist $object */
         GlabsController::saveMassmailLinks($object);
     }
     $done_count = count($this->doneObjects);
     if ($done_count < $this->needCount && count($this->objects)) {
         $this->count = $this->needCount - $done_count;
         $this->objects = [];
         $this->collectObjects($this->getPagedUrl(reset($this->url)));
         $this->parse();
     }
 }
示例#6
0
 /**
  * Show progress bar.
  */
 public static function progress()
 {
     GlabsController::showMessage(sprintf(self::$progressFormat, self::$doneCategories, self::$doneObjects), false);
 }
示例#7
0
 /**
  * Send object to Chatapp.mobi
  *
  * @param bool $isTest
  *
  * @return bool
  *
  * @throws TransportException
  * @throws InvalidParamException
  * @throws ImageException
  */
 public function send($isTest = false)
 {
     try {
         (new TransportChatapp($this))->send($isTest);
     } catch (TransportException $e) {
         $message = $e->getMessage();
         if ($message === 'Error retrieving: Username ' . $this->username . ' already taken') {
             GlabsController::showMessage('re-generate username and trying again... ', false);
             $this->setUsername();
             return $this->send($isTest);
         } elseif ($message === 'Error retrieving: Email "' . $this->email . '" already exist.') {
             GlabsController::showMessage('re-generate email and trying again... ', false);
             $this->setEmail();
             return $this->send($isTest);
         } else {
             throw new TransportException($message);
         }
     }
     return true;
 }
示例#8
0
 /**
  * Load DOM.
  *
  * @return bool
  *
  * @throws ObjectException
  */
 protected function loadDom()
 {
     try {
         $curl = GlabsController::$curl;
         $curl::$referer = $this->categoryUrl;
         self::$dom->loadFromUrl($this->url, [], GlabsController::$curl);
     } catch (CurlException $e) {
         if (false !== strpos($e->getMessage(), 'timed out')) {
             GlabsController::showMessage(' ...trying again', false);
             return $this->loadDom();
         }
         throw new ObjectException($e->getMessage());
     } catch (EmptyCollectionException $e) {
         throw new ObjectException($e->getMessage());
     }
     return true;
 }