/** * @inheritdoc */ protected function loadDom() { if (!$this->reply_url) { preg_match('/(\\w+)\\/(\\d+)\\.html/', $this->url, $matches); if (!isset($matches[1])) { throw new ObjectException('Can\'t match reply URL.'); } $this->object_id = (int) $matches[2]; $this->reply_url = 'http://' . parse_url($this->url, PHP_URL_HOST) . '/reply/lax/' . $matches[1] . '/' . $this->object_id; } $curl = GlabsController::$curl; $curl::$referer = $this->url; try { self::$dom->loadFromUrl($this->reply_url, [], $curl); } catch (CurlException $e) { if (false !== strpos($e->getMessage(), 'timed out')) { GlabsController::showMessage(' ...trying again', false); return $this->loadDom(); } throw new ObjectException($e->getMessage()); } catch (EmptyCollectionException $e) { throw new ObjectException($e->getMessage()); } return true; }
/** * @inheritdoc */ protected function collectObjects($url) { if (!array_key_exists($url, $this->collectedCount)) { $this->collectedCount[$url] = 0; } $host = 'http://' . parse_url($url, PHP_URL_HOST); $dom = new Dom(); try { $dom->loadFromUrl($url, [], GlabsController::$curl); } catch (CurlException $e) { if (false === strpos($e->getMessage(), 'timed out')) { throw new CurlException($e->getMessage()); } GlabsController::showMessage(' ...trying again', false); return $this->collectObjects($url); } if (false !== strpos($dom, 'This IP has been automatically blocked.')) { throw new CurlException('IP has been blocked.'); } // end collect. no results if ($dom->find('#moon')[0]) { return true; } $this->checkTotalObjects($dom); /* @var \PHPHtmlParser\Dom\AbstractNode $span */ foreach ($dom->find('.txt') as $span) { if ($this->isEnoughCollect()) { break; } /* @var \PHPHtmlParser\Dom\AbstractNode $link */ if ($link = $span->find('a')[0]) { $href = $this->checkObjectLink($host, $link->getAttribute('href')); if (false === $href) { continue; } $title = $link->text() ?: strip_tags($link->innerHtml()); try { $object = $this->getObjectModel($url, $href, $title, $this->categoryId, $this->type); $object->setPrice($span); } catch (ObjectException $e) { continue; } $this->collected[] = $href; $this->objects[] = $object; $this->collectedCount[$url]++; BaseSite::$doneObjects++; BaseSite::progress(); } } if (!$this->isEnoughCollect()) { $curl = GlabsController::$curl; $curl::$referer = $url; $url = str_replace([self::$pageParam . self::$page, '#list'], '', $url); self::$page += 100; return $this->collectObjects($this->getPagedUrl($url)); } return true; }
/** * @inheritdoc */ protected function collectObjects($url) { if (!array_key_exists($url, $this->collectedCount)) { $this->collectedCount[$url] = 0; } $dom = new Dom(); try { $dom->loadFromUrl($url, [], GlabsController::$curl); } catch (CurlException $e) { if (false === strpos($e->getMessage(), 'timed out')) { throw new CurlException($e->getMessage()); } if (false === strpos($e->getMessage(), '525')) { throw new CurlException($e->getMessage()); } GlabsController::showMessage(' ...trying again', false); return $this->collectObjects($url); } // end collect. no results if (false !== strpos($dom, 'No matches found.')) { return true; } $this->checkTotalObjects($dom); /* @var \PHPHtmlParser\Dom\AbstractNode $span */ foreach ($dom->find('.summaryHeader') as $span) { if ($this->isEnoughCollect()) { break; } /* @var \PHPHtmlParser\Dom\AbstractNode $link */ if ($link = $span->find('a', 0)) { $href = $link->getAttribute('href'); if (in_array($href, $this->collected, true)) { continue; } $object = new Object($url, $href, $link->text(), $this->categoryId, $this->type); try { $object->setPrice(); } catch (ObjectException $e) { continue; } $this->collected[] = $href; $this->objects[] = $object; $this->collectedCount[$url]++; BaseSite::$doneObjects++; BaseSite::progress(); } } if (!$this->isEnoughCollect()) { $curl = GlabsController::$curl; $curl::$referer = $url; $url = str_replace(self::$pageParam . self::$page, '', $url); self::$page += self::$page ? 1 : 2; return $this->collectObjects($this->getPagedUrl($url)); } return true; }
/** * @inheritdoc */ protected function collectObjects($url) { if (!array_key_exists($url, $this->collectedCount)) { $this->collectedCount[$url] = 0; } $dom = new Dom(); try { $dom->loadFromUrl($url, [], GlabsController::$curl); } catch (CurlException $e) { if (false === strpos($e->getMessage(), 'timed out')) { throw new CurlException($e->getMessage()); } if (false === strpos($e->getMessage(), '525')) { throw new CurlException($e->getMessage()); } GlabsController::showMessage(' ...trying again', false); return $this->collectObjects($url); } // end collect. no results if (false !== strpos($dom, 'No matches found') || false !== strpos($dom, 'Keine Entsprechungen gefunden') || false !== strpos($dom, 'No hay resultados') || false !== strpos($dom, 'Nessuna corrispondenza trovata') || false !== strpos($dom, 'Aucune correspondance n’a été trouvée') || false !== strpos($dom, 'Nenhuma correspondência encontrada') || false !== strpos($dom, 'Совпадений нет') || false !== strpos($dom, 'Ingen match fundet') || false !== strpos($dom, 'Nebyly nalezeny žádné shody') || false !== strpos($dom, 'Ingen match funnet') || false !== strpos($dom, 'Nie znaleziono') || false !== strpos($dom, 'Eşleşme bulunamadı') || false !== strpos($dom, 'Eredmény nem található') || false !== strpos($dom, 'Δεν βρέθηκαν εγγραφές') || false !== strpos($dom, 'Aucune correspondance n’a été trouvée')) { return true; } $this->checkTotalObjects($dom); /* @var \PHPHtmlParser\Dom\AbstractNode $span */ foreach ($dom->find('.cat') as $span) { if ($this->isEnoughCollect()) { break; } /* @var \PHPHtmlParser\Dom\AbstractNode $link */ if ($link = $span->find('a', 0)) { $href = $link->getAttribute('href'); if (in_array($href, $this->collected, true)) { continue; } try { $object = new Object($url, $href, $link->text(), $this->categoryId, $this->type); } catch (ObjectException $e) { continue; } $this->collected[] = $href; $this->objects[] = $object; $this->collectedCount[$url]++; BaseSite::$doneObjects++; BaseSite::progress(); } } if (!$this->isEnoughCollect()) { $curl = GlabsController::$curl; $curl::$referer = $url; $url = str_replace(self::$pageParam . self::$page, '', $url); self::$page += self::$page ? 1 : 2; return $this->collectObjects($this->getPagedUrl($url)); } return true; }
/** * Parse category page. * * @throws CurlException * @throws InvalidParamException * @throws ObjectException */ public function parse() { GlabsController::showMessage("\n" . 'Parsing category "' . $this->title . '"'); /** @var \app\models\glabs\objects\massmail\Craigslist $object */ foreach ($this->objects as $object) { if (in_array($object->getUrl(), $this->doneObjects, true)) { continue; } $this->i++; GlabsController::showMessage("\t" . $this->i . ') Parsing object "' . $object->getTitle() . '" (' . $object->getUrl() . ')'); try { $object->parse(); $this->doneObjects[] = $object->getUrl(); } catch (ObjectException $e) { GlabsController::showMessage("\t\t" . 'Object skipped because of reason: ' . $e->getMessage()); continue; } catch (EmptyCollectionException $e) { GlabsController::showMessage("\t\t" . 'Object skipped because of reason: ' . $e->getMessage()); continue; } GlabsController::showMessage("\t\t" . 'Sending object... ', false); try { //$object->send(); GlabsController::$sentObjects++; GlabsController::showMessage('Success.'); } catch (TransportException $e) { $object->removeFiles(); GlabsController::showMessage('Fail with message: "' . $e->getMessage() . '"'); } /* @var \app\models\glabs\objects\massmail\Craigslist $object */ GlabsController::saveMassmailLinks($object); } $done_count = count($this->doneObjects); if ($done_count < $this->needCount && count($this->objects)) { $this->count = $this->needCount - $done_count; $this->objects = []; $this->collectObjects($this->getPagedUrl(reset($this->url))); $this->parse(); } }
/** * Show progress bar. */ public static function progress() { GlabsController::showMessage(sprintf(self::$progressFormat, self::$doneCategories, self::$doneObjects), false); }
/** * Send object to Chatapp.mobi * * @param bool $isTest * * @return bool * * @throws TransportException * @throws InvalidParamException * @throws ImageException */ public function send($isTest = false) { try { (new TransportChatapp($this))->send($isTest); } catch (TransportException $e) { $message = $e->getMessage(); if ($message === 'Error retrieving: Username ' . $this->username . ' already taken') { GlabsController::showMessage('re-generate username and trying again... ', false); $this->setUsername(); return $this->send($isTest); } elseif ($message === 'Error retrieving: Email "' . $this->email . '" already exist.') { GlabsController::showMessage('re-generate email and trying again... ', false); $this->setEmail(); return $this->send($isTest); } else { throw new TransportException($message); } } return true; }
/** * Load DOM. * * @return bool * * @throws ObjectException */ protected function loadDom() { try { $curl = GlabsController::$curl; $curl::$referer = $this->categoryUrl; self::$dom->loadFromUrl($this->url, [], GlabsController::$curl); } catch (CurlException $e) { if (false !== strpos($e->getMessage(), 'timed out')) { GlabsController::showMessage(' ...trying again', false); return $this->loadDom(); } throw new ObjectException($e->getMessage()); } catch (EmptyCollectionException $e) { throw new ObjectException($e->getMessage()); } return true; }