/** * Parses links of a given url page to donwload * * @param string $url Page Url to donwload and harvest links * @param string $referer Url referer to register when donwloading page * @param string $open_tag Links open tags * @param string $close_tag Links close tags * @return array Array of links */ protected function harvestLinks($url, $referer, $open_tag, $close_tag) { $links = array(); # Get page base for $url $page_base = Resolver::getBasePageAddress($url); if ($this->config['webbot']['base_domain_relative_links'] == TRUE) { $page_base = Resolver::getBaseDomainAddress($page_base); } # Download webpage $downloaded_page = $this->downloadPage($url, $referer); // esto se puede pponer en una configuracion $anchor_tags = HtmlParser::parse2Array($downloaded_page['FILE'], $open_tag, $close_tag); # Put http attributes for each tag into an array for ($i = 0; $i < count($anchor_tags); $i++) { $href = HtmlParser::getAttribute($anchor_tags[$i], "href"); //echo $links[$i]."<br>"; $resolved_addres = Resolver::resolveAddress($href, $page_base); $links[] = $resolved_addres; //logging $this->logActivity("Harvested: " . $resolved_addres); } return $links; }