/** * @param string $url * @return Extract * @throws Exception\PageNotFoundException * @throws Exception\ServerErrorException */ public function extract($url) { $providerClassName = $this->findProvider($url); if (is_null($this->pageFetcher)) { $this->pageFetcher = new CurlPageFetcher(); } $crawler = new Crawler($this->pageFetcher->fetch($url)); /** @var ExtractProvider $provider */ $provider = new $providerClassName(); $provider->setUrl($url); return $provider->extract($crawler); }
/** * @return Crawler * @throws PageNotFoundException * @throws ServerErrorException */ protected function getStatsPageCrawler() { if (is_null($this->pageFetcher)) { $this->pageFetcher = new CurlPageFetcher(); } $this->pageFetcher->setHeader('X-Requested-With', 'XMLHttpRequest'); $statsHtml = $this->pageFetcher->fetch($this->url . '?action=stats'); return new Crawler($statsHtml); }
/** * @param Crawler $crawler * @return ExtractedAlbum */ public function extract(Crawler $crawler) { if (StringHelper::contains($this->url, '/gallery/')) { if (is_null($this->pageFetcher)) { $this->pageFetcher = new CurlPageFetcher(); } $html = $this->pageFetcher->fetch(str_replace('/gallery/', '/a/', $this->url)); $crawler = new Crawler($html); } $album = new ExtractedAlbum(); if ($crawler->filter('.album-description h1')->count() > 0) { $album->setTitle($crawler->filter('.album-description h1')->text()); } if ($crawler->filter('.album-description p')->count() > 0) { $album->setDescription($crawler->filter('.album-description p')->text()); } $album->setImages($this->extractImages($crawler)); $album->setStats($this->extractStats($crawler)); $album->setOwner($this->extractOwner($crawler)); return $album; }