/** * This is the main action that will get called recursively depending on `$depth`. * * @param integer $depth * @param String $url * * @return Array */ public function go($depth = -1, $url = '') { if ($url === '') { $url = $this->client->getStartUrl(); } // apply filters if ($url !== $this->client->getStartUrl() && !$this->urlPassesFilters($url)) { return array(); } // download from the url $data = $this->client->downloadContent($url); if (empty($data)) { return array(); } $this->processActions($url, $data); // add current url to links $currentHash = $this->client->createHashFromUrl($url); if (!isset($this->allLinks[$currentHash])) { $this->allLinks[$currentHash] = $this->client->convertToAbsoluteUrl($url); } // when we reach max. depth we don't need to go deeper and download more if ($depth-- !== 0) { // parse sub links $links = $this->filterUrls($this->findAllLinks($data)); $this->allLinks = array_merge($this->allLinks, $links); // recursive calls provide depth foreach ($links as $hash => $link) { $this->allLinks = array_merge($this->allLinks, $this->go($depth, $link)); } } return $this->allLinks; }
public function testConvertsToFullyQualifiedUrl() { $client = new HttpClient('http://google.com/'); $this->assertEquals($client->convertToAbsoluteUrl('http://google.com/test'), $client->convertToAbsoluteUrl('/test')); }