예제 #1
0
 public function get_links($url = '')
 {
     if (!$url || !$this->sel_listing) {
         return false;
     }
     $arr_url = DLN_Helper_Source::validate_url($url);
     $host = isset($arr_url['host']) ? $arr_url['host'] : '';
     $url = isset($arr_url['full']) ? $arr_url['full'] : '';
     $arr_links = array();
     if ($url) {
         // Get urls raw
         $arr_urls = array();
         $links = array();
         $opts = array('http' => array('header' => "User-Agent:MyAgent/1.0\r\n"));
         $context = stream_context_create($opts);
         $this->html = file_get_html($url, false, $context);
         if (is_array($this->sel_listing)) {
             foreach ($this->sel_listing as $i => $selector) {
                 $links = array_merge($links, $this->html->find($selector));
             }
         } else {
             $links = $this->html->find($this->sel_listing);
         }
         if (is_array($links)) {
             foreach ($links as $i => $link) {
                 if ($link->href) {
                     // Exclude bad url
                     if (!in_array($link->href, $this->arr_prevent)) {
                         if (substr($link->href, 0, 4) === 'http') {
                             $crawl_url = $link->href;
                         } else {
                             $crawl_url = $host . $link->href;
                         }
                         $arr_urls[] = $crawl_url;
                     }
                 }
             }
         }
         // Exclude duplicate urls
         if (!empty($arr_urls)) {
             foreach ($arr_urls as $i => $url) {
                 if (!in_array($url, $arr_links)) {
                     $arr_links[] = $url;
                 }
             }
         }
     }
     return $arr_links;
 }
예제 #2
0
 public static function get_links($url = '')
 {
     if (!$url || !self::$sel_listing) {
         return false;
     }
     $arr_url = DLN_Helper_Source::validate_url($url);
     $host = isset($arr_url['host']) ? $arr_url['host'] : '';
     $url = isset($arr_url['full']) ? $arr_url['full'] : '';
     $arr_links = array();
     if ($url) {
         // Get urls raw
         $arr_urls = array();
         $links = array();
         $html = file_get_html($url);
         if (is_array(self::$sel_listing)) {
             foreach (self::$sel_listing as $i => $selector) {
                 $links = array_merge($links, $html->find($selector));
             }
         } else {
             $links = $html->find(self::$sel_listing);
         }
         if (is_array($links)) {
             foreach ($links as $i => $link) {
                 if ($link->href) {
                     // Exclude bad url
                     if (!in_array($link->href, self::$arr_prevent)) {
                         if (substr($link->href, 0, 4) === 'http') {
                             $crawl_url = $link->href;
                         } else {
                             $crawl_url = $host . $link->href;
                         }
                         $crawl_url = str_replace('#box_comment', '', $crawl_url);
                         $arr_urls[] = $crawl_url;
                     }
                 }
             }
         }
         // Exclude duplicate urls
         if (!empty($arr_urls)) {
             foreach ($arr_urls as $i => $url) {
                 if (!in_array($url, $arr_links)) {
                     //var_dump(json_decode( file_get_contents( 'https://graph.facebook.com/v2.1/?ids=' . $url . '&access_token=225132297553705|8f00d29717ee8c6a49cd25da80c5aad8' ) ));
                     $arr_links[] = $url;
                 }
             }
         }
     }
     return $arr_links;
 }
예제 #3
0
 public static function get_links($url = '')
 {
     if (!$url || !self::$sel_listing) {
         return false;
     }
     $arr_url = DLN_Helper_Source::validate_url($url);
     $host = isset($arr_url['host']) ? $arr_url['host'] : '';
     $url = isset($arr_url['full']) ? $arr_url['full'] : '';
     $arr_links = array();
     if ($url) {
         // Get urls raw
         $arr_urls = array();
         $links = array();
         $html = file_get_html($url);
         if (is_array(self::$sel_listing)) {
             foreach (self::$sel_listing as $i => $selector) {
                 $links = array_merge($links, $html->find($selector));
             }
         } else {
             $links = $html->find(self::$sel_listing);
         }
         if (is_array($links)) {
             foreach ($links as $i => $link) {
                 if ($link->href) {
                     // Exclude bad url
                     if (!in_array($link->href, self::$arr_prevent)) {
                         if (substr($link->href, 0, 4) === 'http') {
                             $crawl_url = $link->href;
                         } else {
                             $crawl_url = $host . $link->href;
                         }
                         $arr_urls[] = $crawl_url;
                     }
                 }
             }
         }
         // Exclude duplicate urls
         if (!empty($arr_urls)) {
             foreach ($arr_urls as $i => $url) {
                 if (!in_array($url, $arr_links)) {
                     $arr_links[] = $url;
                 }
             }
         }
     }
     return $arr_links;
 }