public function get_links($url = '') { if (!$url || !$this->sel_listing) { return false; } $arr_url = DLN_Helper_Source::validate_url($url); $host = isset($arr_url['host']) ? $arr_url['host'] : ''; $url = isset($arr_url['full']) ? $arr_url['full'] : ''; $arr_links = array(); if ($url) { // Get urls raw $arr_urls = array(); $links = array(); $opts = array('http' => array('header' => "User-Agent:MyAgent/1.0\r\n")); $context = stream_context_create($opts); $this->html = file_get_html($url, false, $context); if (is_array($this->sel_listing)) { foreach ($this->sel_listing as $i => $selector) { $links = array_merge($links, $this->html->find($selector)); } } else { $links = $this->html->find($this->sel_listing); } if (is_array($links)) { foreach ($links as $i => $link) { if ($link->href) { // Exclude bad url if (!in_array($link->href, $this->arr_prevent)) { if (substr($link->href, 0, 4) === 'http') { $crawl_url = $link->href; } else { $crawl_url = $host . $link->href; } $arr_urls[] = $crawl_url; } } } } // Exclude duplicate urls if (!empty($arr_urls)) { foreach ($arr_urls as $i => $url) { if (!in_array($url, $arr_links)) { $arr_links[] = $url; } } } } return $arr_links; }
public static function get_links($url = '') { if (!$url || !self::$sel_listing) { return false; } $arr_url = DLN_Helper_Source::validate_url($url); $host = isset($arr_url['host']) ? $arr_url['host'] : ''; $url = isset($arr_url['full']) ? $arr_url['full'] : ''; $arr_links = array(); if ($url) { // Get urls raw $arr_urls = array(); $links = array(); $html = file_get_html($url); if (is_array(self::$sel_listing)) { foreach (self::$sel_listing as $i => $selector) { $links = array_merge($links, $html->find($selector)); } } else { $links = $html->find(self::$sel_listing); } if (is_array($links)) { foreach ($links as $i => $link) { if ($link->href) { // Exclude bad url if (!in_array($link->href, self::$arr_prevent)) { if (substr($link->href, 0, 4) === 'http') { $crawl_url = $link->href; } else { $crawl_url = $host . $link->href; } $crawl_url = str_replace('#box_comment', '', $crawl_url); $arr_urls[] = $crawl_url; } } } } // Exclude duplicate urls if (!empty($arr_urls)) { foreach ($arr_urls as $i => $url) { if (!in_array($url, $arr_links)) { //var_dump(json_decode( file_get_contents( 'https://graph.facebook.com/v2.1/?ids=' . $url . '&access_token=225132297553705|8f00d29717ee8c6a49cd25da80c5aad8' ) )); $arr_links[] = $url; } } } } return $arr_links; }
public static function get_links($url = '') { if (!$url || !self::$sel_listing) { return false; } $arr_url = DLN_Helper_Source::validate_url($url); $host = isset($arr_url['host']) ? $arr_url['host'] : ''; $url = isset($arr_url['full']) ? $arr_url['full'] : ''; $arr_links = array(); if ($url) { // Get urls raw $arr_urls = array(); $links = array(); $html = file_get_html($url); if (is_array(self::$sel_listing)) { foreach (self::$sel_listing as $i => $selector) { $links = array_merge($links, $html->find($selector)); } } else { $links = $html->find(self::$sel_listing); } if (is_array($links)) { foreach ($links as $i => $link) { if ($link->href) { // Exclude bad url if (!in_array($link->href, self::$arr_prevent)) { if (substr($link->href, 0, 4) === 'http') { $crawl_url = $link->href; } else { $crawl_url = $host . $link->href; } $arr_urls[] = $crawl_url; } } } } // Exclude duplicate urls if (!empty($arr_urls)) { foreach ($arr_urls as $i => $url) { if (!in_array($url, $arr_links)) { $arr_links[] = $url; } } } } return $arr_links; }