public function parse($url) { $chunks = parse_url($url); if ($chunks['path'] !== Urls::URL_ALBUM) { $this->logger->error('Unknown URL received'); return; } $host = $chunks['host']; $schema = $chunks['scheme']; $this->logger->info('Resolved host and schema', ['host' => $host, 'schema' => $schema]); $this->logger->info('Fetching url', ['url' => $url]); $response = $this->guzzle->get($url); $this->logger->info('Fetched first page', ['url' => $url]); $crawler = new Crawler((string) $response->getBody()); $links = $crawler->filter(Selectors::SELECTOR_PAGINATION_NUMBER_LINKS); $pages = []; for ($i = 0; $i < $crawler->count(); $i++) { $link = $links->eq($i); $chunks = parse_url($link->attr('href')); parse_str($chunks['query'], $query); $pages[] = $query['page']; } $this->logger->info('', ['pages' => $pages]); }
/** * Return the field's value * * @param $node * @param $defaultValue * @param $callback * @param string $funcName * @param string $funcParam * * @return mixed */ private function getFieldValue(Crawler $node, $defaultValue, $callback, $funcName = 'text', $funcParam = '') { if ($node->count()) { return $callback($node->{$funcName}($funcParam)); } return $defaultValue; }
private function getPaginationInfo(Crawler $info) { $result = new \stdClass(); switch ($info->count()) { case 0: // no results $result->total_results = 0; $result->results_per_page = 10; // (last item - first) + 1 => Items 21 -> 40 => 40-21+1 = 20 items. $result->num_pages = 0; $result->first_on_page = 0; $result->last_on_page = 0; $result->current_page = 0; break; case 1: $aux = explode(PHP_EOL, $info->text()); $info = array_pop($aux); $info = explode(' ', $info); $result->total_results = intval($info[4]); $result->results_per_page = intval($info[2]) - intval($info[0]) + 1; // (last item - first) + 1 => Items 21 -> 40 => 40-21+1 = 20 items. $result->num_pages = intval(ceil($result->total_results / $result->results_per_page)); $result->first_on_page = intval($info[0]); $result->last_on_page = intval($info[2]); $result->current_page = intval(ceil($result->first_on_page / $result->results_per_page)); break; } return $result; }
/** * Length of items in document * * @group Iterator */ public function count() { return $this->crawler->count(); }