示例#1
0
 /**
  * Parses links of a given url page to donwload
  * 
  * @param string $url             Page Url to donwload and harvest links
  * @param string $referer         Url referer to register when donwloading page
  * @param string $open_tag        Links open tags
  * @param string $close_tag       Links close tags
  * @return array                  Array of links
  */
 protected function harvestLinks($url, $referer, $open_tag, $close_tag)
 {
     $links = array();
     # Get page base for $url
     $page_base = Resolver::getBasePageAddress($url);
     if ($this->config['webbot']['base_domain_relative_links'] == TRUE) {
         $page_base = Resolver::getBaseDomainAddress($page_base);
     }
     # Download webpage
     $downloaded_page = $this->downloadPage($url, $referer);
     // esto se puede pponer en una configuracion
     $anchor_tags = HtmlParser::parse2Array($downloaded_page['FILE'], $open_tag, $close_tag);
     # Put http attributes for each tag into an array
     for ($i = 0; $i < count($anchor_tags); $i++) {
         $href = HtmlParser::getAttribute($anchor_tags[$i], "href");
         //echo $links[$i]."<br>";
         $resolved_addres = Resolver::resolveAddress($href, $page_base);
         $links[] = $resolved_addres;
         //logging
         $this->logActivity("Harvested: " . $resolved_addres);
     }
     return $links;
 }