public function search($website, $string) { $string = urlencode($string); $website = urlencode($website); //old => http://www.google.fr/search?q=site:www.estrildidae.net%2Ffr%2F+lonchura&hl=en&hs=Dqa&filter=0&num=50 // new => https://www.google.fr/search?q=site:www.estrildidae.net%2Ffr%2F+lonchura&hl=en&hs=Dqa&filter=0&num=50 $url = "https://www.google.fr/search?q=site:" . $website . "+" . $string . "&hl=en&hs=Dqa&filter=0"; //echo $url; //for look page 2 => add &start=10 //we can only have 10 result by 10 $data = Curl::get($url); //echo htmlentities($data); $content = Grabber::getTagContent($data, '<div data-jibp="h" data-jiis="uc" id="search"', true); //echo $content; //echo htmlentities($content); $search = []; if ($content) { $list_li = Grabber::getTagContents($data, '<div class="g">', true); //print_r($list_li); $i = 0; foreach ($list_li as $li) { //echo htmlspecialchars($li); //echo "<br />-*--<br />"; $a = Grabber::getTagContent($li, '<h3', true); $search[$i]['URL'] = (string) Grabber::getTagAttributeValue($a, 'href'); $search[$i]['Title'] = strip_tags($a, "<b>"); $search[$i]['Data'] = Grabber::getTagContent($li, '<span class="st">', true); $search[$i]['Cite'] = Grabber::getTagContent($li, '<cite>', true); $i++; } return $search; } else { //echo "content not found"; } }
public static function getAllLink() { $url = "http://www.xeno-canto.org/all_species.php"; $file = file_get_contents("/var/www/xc/all_species.php.htm"); echo "start OK !\n"; $contents = Grabber::getTagContent($file, '<table class="results', true); echo "contents OK !\n"; $line = Grabber::getTagContents($contents, '<tr>', true); echo "line OK !\n"; if ($line) { $data = array(); $i = 0; foreach ($line as $row) { $i++; echo "line : " . $i . "\n"; if ($i < 3) { continue; } $cell = Grabber::getTagContents($row, '<td', true); $tab = explode('href="', $cell[0]); $tab = explode('"', $tab[1]); $species = array(); $species['url'] = $tab[0]; $species['scientific_name'] = $cell[1]; $species['name_en'] = Grabber::getTagContent($cell[0], '<a', true); $species['foreground'] = $cell[2]; $species['background'] = $cell[3]; $data[] = $species; } } else { return false; } return $data; }
public static function getExperience($generate_url) { $content = self::curl($generate_url); $content = Grabber::getTagContent($content, '<div class="section subsection-reorder" id="profile-experience" style="display:block">', true); $experiences = Grabber::getTagContents($content, '<div class="position', true); $nb_exp = count($experiences); for ($i = 0; $i < $nb_exp; $i++) { $to_del = Grabber::getTagContent($experiences[$i], '<p class="orgstats', false); $experiences[$i] = str_replace($to_del, '', $experiences[$i]); } return $experiences; }
public static function getInfoFromIp($adresse_ip) { $url = "http://whatismyipaddress.com/ip/" . $adresse_ip; $ch = curl_init(); //curl_setopt($ch, CURLOPT_PROXY, 'proxy.int.world.socgen:8080'); //curl_setopt($ch, CURLOPT_PROXYUSERPWD, "aurelien.lequoy:xxxxx"); curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1"); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_URL, $url); $data_brut = curl_exec($ch); curl_close($ch); //$data_brut = file_get_contents("gg"); //pour les test if (!$data_brut) { return false; } //$data_brut = mb_convert_variables("UTF-8", "ISO-8859-1", $data_brut); $tab = Grabber::getTagContents($data_brut, '<table', true); if (!is_array($tab)) { return false; } if (empty($tab[1])) { return false; } $mapping = array(array('var' => 'IP:', 'val' => 'ip'), array('var' => 'Decimal:', 'val' => 'decimal'), array('var' => 'Hostname:', 'val' => 'hostname'), array('var' => 'ISP:', 'val' => 'isp'), array('var' => 'Organization:', 'val' => 'organization'), array('var' => 'Services:', 'val' => 'services'), array('var' => 'Type:', 'val' => 'type'), array('var' => 'Assignment:', 'val' => 'assignment'), array('var' => 'Country:', 'val' => 'country'), array('var' => 'Area Code:', 'val' => 'area_code'), array('var' => 'City:', 'val' => 'city'), array('var' => 'Latitude:', 'val' => 'latitude'), array('var' => 'Longitude:', 'val' => 'longitude'), array('var' => 'Postal Code:', 'val' => 'postal_code'), array('var' => 'State/Region:', 'val' => 'region')); $tab1 = Grabber::getTagContents($tab[0], '<td', true); $tab2 = Grabber::getTagContents($tab[1], '<td', true); $tab3 = Grabber::getTagContents($tab[0], '<th', true); $tab4 = Grabber::getTagContents($tab[1], '<th', true); print_r($tab3); print_r($tab4); $ip = array(); $ip['ip'] = $adresse_ip; $ip['decimal'] = $tab1[1]; $ip['hostname'] = $tab1[2]; $ip['isp'] = $tab1[3]; $ip['organization'] = $tab1[3]; $ip['services'] = $tab1[4]; $ip['type'] = Grabber::getTagContent($tab1[5], '<a', true); $ip['assignment'] = Grabber::getTagContent($tab1[6], '<a', true); $tab_iso = explode("flags/", $tab2[0]); $tab_iso = explode(".png", $tab_iso[1]); $ip['iso'] = $tab_iso[0]; $ip['country'] = trim(strip_tags($tab2[0])); $ip['area'] = $tab2[1]; $ip['city'] = $tab2[2]; $ip['latitude'] = $tab2[3]; $ip['longitude'] = $tab2[4]; print_r($ip); return $ip; }
public static function get_species_from_family($famliy = "estrildides") { $url = "http://www.oiseaux.net/oiseaux/" . $famliy . ".html"; $ch = curl_init(); $user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.79 Safari/537.1'; // simule Firefox 4. $header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,"; $header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"; $header[] = "Cache-Control: max-age=0"; $header[] = "Connection: keep-alive"; $header[] = "Keep-Alive: 300"; $header[] = "Accept-Charset: utf-8"; $header[] = "Accept-Language: en"; // langue fr. $header[] = "Pragma: "; // Simule un navigateur //curl_setopt($ch, CURLOPT_PROXY, 'proxy.int.world.socgen:8080'); //curl_setopt($ch, CURLOPT_PROXYUSERPWD, "aurelien.lequoy:xxxxx"); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HTTPHEADER, $header); curl_setopt($ch, CURLOPT_USERAGENT, $user_agent); $content = curl_exec($ch); curl_close($ch); $content2 = Grabber::getTagContent($content, '<table class="tb_lite">', true); if (false === $content2) { return false; } $tab_tr = Grabber::getTagContents($content2, '<tr', true); $data = array(); foreach ($tab_tr as $tr) { if (!strstr($tr, "<a")) { continue; } $out = array(); $out['French'] = Grabber::getTagContent($tr, '<a href="', true); $url = \wlGrabber::getTagContent($tr, '<a href="', false); $tab_url = explode('"', $url); $out['url'] = "http://www.oiseaux.net/oiseaux/" . $tab_url[1]; $resultat = pathinfo($tab_url[1]); $out['reference_id'] = $resultat['filename']; $tab_td = Grabber::getTagContents($tr, '<td', true); $out['scientific_name'] = $tab_td[1]; $out['English'] = $tab_td[2]; $data[] = $out; } return $data; }
public static function get_answer_from_google($string, $from) { //debug(self::$_language); //debug($from); //debug($string); //debug("We calling google ..."); //$url ="http://translate.google.fr/translate_t?text=Traduction%20automatique%20de%20pages%20web%0Aceci%20est%20un%20test&hl=fr&langpair=en&tbb=1&ie=utf-8"; $url = 'http://translate.google.fr/translate_t?text=' . urlencode($string) . '&hl=' . $from . '&langpair=' . self::$_language . '&tbb=1&ie=utf-8'; $url = 'https://translate.google.fr/?text=' . urlencode($string) . '&hl=' . self::$_language . '&langpair=' . $from . '%7Cfr&tbb=1&ie=utf-8'; $url = 'https://translate.google.fr/?text=' . urlencode($string) . '&hl=' . self::$_language . '&langpair=' . $from . '&tbb=1&ie=utf-8'; //debug($url); //$UA = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0'; $UA = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36'; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_USERAGENT, $UA); //curl_setopt($ch, CURLOPT_REFERER, "https://translate.google.fr/"); $body = curl_exec($ch); curl_close($ch); // if we send no user_agent google send sentence translated in default charset we asked for the language //$body = iconv(self::charset[$to], "UTF-8", $body); // debug($body); $content = Grabber::getTagContent($body, '<span id=result_box', true); //echo $content; $out = explode("<br>", $content); $content = str_replace('<br>', '', $content); $out = Grabber::getTagContents($content, '<span title="', true); if (empty($out)) { $out = $content; } //var_dump($content); //verify that we exactly the same number of element in entry $nb = explode("\n", trim($string)); if (!is_array($out)) { $out = explode("\n", trim($out)); } //we check that we have same number of input and output if (count($nb) != count($out)) { debug($from); debug($string); echo '<hr>'; debug($nb); debug($out); echo $url . "<br>\n"; throw new \Exception("GLI-059 : Problem with machine translation '" . trim($string) . "' [" . $from . "=>" . self::$_language . "]" . PHP_EOL); return false; } //throw new \Exception("GLI-999 : GOOD".PHP_EOL); return $out; }
public function get_image() { $var = "White headed munia"; $var2 = urlencode($var); $url = "http://www.google.fr/search?q=%22Lonchura+maja%22&num=50&hl=fr&client=firefox-a&hs=Dcl&rls=org.mozilla:fr:official&prmd=imvns&tbm=isch&tbo=u&source=univ&sa=X&ei=3TTWTpoQh4HiBPrHzZUB&ved=0CDEQsAQ&biw=1173&bih=748"; $url = "http://www.google.fr/search?hl=fr&client=firefox-a&hs=Vin&rls=org.mozilla:fr:official&biw=1173&bih=775&tbm=isch&q=lonchura+maja&btnG=Rechercher&oq=lonchura+maja&aq=f&aqi=&gs_upl=0l0l0l160005l0l0l0l0l0l0l0l0ll0l0&gbv=1&sei=XovXTuGSCKL74QSRp73LDQ"; $url = "http://www.google.fr/search?q=%22Lonchura+maja%22&hl=fr&client=firefox-a&rls=org.mozilla:fr:official&biw=1173&bih=775&gbv=1&tbm=isch&ei=zJzXTo7xGaz44QSnnqnVDQ&start=0&sa=N"; $url = "http://www.google.fr/search?q=%22Lonchura+maja%22&hl=fr&client=firefox-a&rls=org.mozilla:fr:official&biw=1173&bih=775&gbv=1&tbm=isch&ei=fJzXTpq5A4aj4gTjt73rDQ&start=20&sa=N"; $url = "http://www.google.fr/search?q=%22" . $var2 . "%22&hl=fr&client=firefox-a&rls=org.mozilla:fr:official&biw=1173&bih=775&gbv=1&tbm=isch&ei=jpzXTsqaE6bb4QSohJneDQ&start=0&sa=N"; //echo "" $data = array(); $ch = curl_init(); curl_setopt($ch, CURLOPT_PROXY, 'proxy.int.world.socgen:8080'); curl_setopt($ch, CURLOPT_PROXYUSERPWD, "aurelien.lequoy:Zeb33tln1\$"); curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 5.1; rv:8.0) Gecko/20100101 Firefox/8.0"); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_URL, $url); $content = curl_exec($ch); curl_close($ch); $tab = Grabber::getTagContent($content, '<table width="100%" class="images_table" style="table-layout:fixed"', true); $grid = Grabber::getTagContents($tab, '<td', true); echo "<pre>"; print_r($grid); echo "</pre>"; foreach ($grid as $img) { preg_match('/http[^\\s]*&/i', $img, $match); $res = explode("&", $match[0]); $elem['url'] = urldecode(urldecode($res[0])); $ref = stristr($img, "imgrefurl="); $ref = stristr($ref, "&", true); $elem['ref'] = urldecode(stristr($ref, "http")); $ff = explode("<br>", $img); $elem['legend'] = strip_tags($ff[1]); $gg = explode("-", strip_tags($ff[2])); $elem['file_size'] = str_replace(" ", "", trim($gg[1])); $size_img = explode("×", $gg[0]); $elem['width'] = trim($size_img[0]); $elem['height'] = trim($size_img[1]); $elem['ext'] = str_replace(" ", "", trim($gg[2])); $url = explode('"', trim($ff[3])); $elem['site'] = $url[1]; //if (stristr($img, $var)) continue; if ($elem['width'] < 250 || $elem['height'] < 250) { $error[] = $elem; continue; } //1: we already extract these element from speficific interface //2: we remove website with low or bad quality image $exclude = array("flickr.com", "lonchuramyworld.monempire.net", "hofmann-photography.de", "ibc.lynxeds.com", "flickriver.com", "photozoo.org"); if (in_array($elem['site'], $exclude)) { $error[] = $elem; continue; } if (stristr($elem['legend'], $var) === false) { $error[] = $elem; continue; } $data[] = $elem; } echo "<pre>"; print_r($data); print_r($error); echo "</pre>"; /* preg_match_all("#\[\[.*\]\]#",$tab,$res); $tmp = substr ($res[0][0],0,-2); $tmp = substr ($tmp,2); $tmp = str_replace(",[],",",",$tmp ); $res = explode("],[", $tmp ); $var1 = array("\\x3d", "\\x3c", "\\x3e", "\\x26"); $var2 = array("=", "<", ">", "&"); foreach ($res as $var) { $var = substr ($var,1,-1); //$var = str_replace('"",','',$var ); $var = str_replace('"','',$var ); $var = rawurldecode ($var); $var = str_replace($var1,$var2,$var ); $elem[] = explode(',', $var ); } */ }
public static function getSpeciesBibliography($id) { $data = array(); $url = "http://www.iucnredlist.org/details/biblio/" . $id . "/0"; $ch = curl_init(); $user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.79 Safari/537.1'; // simule Firefox 4. $header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,"; $header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"; $header[] = "Cache-Control: max-age=0"; $header[] = "Connection: keep-alive"; $header[] = "Keep-Alive: 300"; $header[] = "Accept-Charset: utf-8"; $header[] = "Accept-Language: en"; // langue fr. $header[] = "Pragma: "; // Simule un navigateur //curl_setopt($ch, CURLOPT_PROXY, 'proxy.int.world.socgen:8080'); //curl_setopt($ch, CURLOPT_PROXYUSERPWD, "aurelien.lequoy:xxxxx"); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HTTPHEADER, $header); curl_setopt($ch, CURLOPT_USERAGENT, $user_agent); $content = curl_exec($ch); curl_close($ch); $content = Grabber::getTagContent($content, '<div id="detailsPage"', true); $table = Grabber::getTagContents($content, '<table class="tab_data" cellpadding="0" cellspacing="0">', true); $book = Grabber::getTagContents($table[0], '<p>', true); if (stristr($book[count($book) - 1], 'IUCN')) { unset($book[count($book) - 1]); } $data['book'] = $book; return $data; }
public static function getUserInfos($link) { $url = "http://ibc.lynxeds.com/users/" . $link; $ch = curl_init(); $user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.79 Safari/537.1'; // simule Firefox 4. $header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,"; $header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"; $header[] = "Cache-Control: max-age=0"; $header[] = "Connection: keep-alive"; $header[] = "Keep-Alive: 300"; $header[] = "Accept-Charset: utf-8"; $header[] = "Accept-Language: en"; // langue fr. $header[] = "Pragma: "; // Simule un navigateur //curl_setopt($ch, CURLOPT_PROXY, 'proxy.int.world.socgen:8080'); //curl_setopt($ch, CURLOPT_PROXYUSERPWD, "aurelien.lequoy:xxxxx"); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HTTPHEADER, $header); curl_setopt($ch, CURLOPT_USERAGENT, $user_agent); $content = curl_exec($ch); curl_close($ch); $data = array(); $pic = explode('"', Grabber::getTagContent($content, '<div id="image">', true)); $data['user_picture_url'] = 'http://ibc.lynxeds.com' . $pic[1]; $name = explode('|', Grabber::getTagContent($content, '<title>', true)); $data['username'] = trim($name[0]); $data['from'] = trim(str_replace('<br>', ', ', Grabber::getTagContent($content, '<h4>', true))); $locations = Grabber::getTagContent($content, '<div class="puntitos-block">'); $locations = str_replace('View all the localities on a map!', '', $locations); $locations = explode('from:', $locations); $data['contributions_locations'] = explode('·', trim(strip_tags($locations[1]))); $infos = Grabber::getTagContent($content, '<div class="info">', true); $infos = explode('</p>', $infos); $infos = explode('<strong>', $infos[2]); $data['biography'] = trim(strip_tags($infos[0])); $data['first_posted'] = date('Y-m-d', strtotime(Grabber::getTagContent(Grabber::getTagContent($content, '<li id="first-posted">', true), '<span>', true))); $videos_posted = Grabber::getTagContent($content, '<li id="videos-posted">', true); $videos_posted = explode('<span>', str_replace('</span>', '', $videos_posted)); $total = explode(' ', $videos_posted[1]); $data['videos_posted']['total'] = $total[0]; $covered = explode(' ', $videos_posted[2]); $data['videos_posted']['species_covered'] = $covered[0]; $videos_posted = preg_split('#[\\(\\)]#', $videos_posted[3]); $data['videos_posted']['species_percentage'] = $videos_posted[1]; $images_posted = Grabber::getTagContent($content, '<li id="images-posted">', true); $images_posted = explode('<span>', str_replace('</span>', '', $images_posted)); $total = explode(' ', $images_posted[1]); $data['images_posted']['total'] = $total[0]; $covered = explode(' ', $images_posted[2]); $data['images_posted']['species_covered'] = $covered[0]; $images_posted = preg_split('#[\\(\\)]#', $images_posted[3]); $data['images_posted']['species_percentage'] = $images_posted[1]; $sounds_posted = Grabber::getTagContent($content, '<li id="sounds-posted">', true); $sounds_posted = explode('<span>', str_replace('</span>', '', $sounds_posted)); $total = explode(' ', $sounds_posted[1]); $data['sounds_posted']['total'] = $total[0]; $covered = explode(' ', $sounds_posted[2]); $data['sounds_posted']['species_covered'] = $covered[0]; $sounds_posted = preg_split('#[\\(\\)]#', $sounds_posted[3]); $data['sounds_posted']['species_percentage'] = $sounds_posted[1]; return $data; }
public function getInfo() { $link = "http://www.pbase.com/wongtsushi/image/80484674&exif=Y"; $link = "http://www.pbase.com/ingotkfr/image/132082042"; $link = "http://www.pbase.com/ingotkfr/image/93124337"; $link = "http://www.pbase.com/wongtsushi/image/80273259"; //$url = "http://www.pbase.com/ingotkfr/image/102977507&exif=Y"; $url = $link . "&exif=Y"; $data = array(); $ele = explode("/", $url); $data['author'] = $ele[3]; $data['url_context'] = $link; $data['url_md5'] = md5($data['url_context']); $ch = curl_init(); curl_setopt($ch, CURLOPT_PROXY, 'proxy.int.world.socgen:8080'); curl_setopt($ch, CURLOPT_PROXYUSERPWD, "aurelien.lequoy:Zeb33tln1\$"); curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 5.1; rv:8.0) Gecko/20100101 Firefox/8.0"); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_URL, $url); $content = curl_exec($ch); curl_close($ch); $tab = Grabber::getTagContent($content, '<table width=0 border=0 align="center" class="imagetable">', true); $img = Grabber::getTagContent($tab, '<IMG'); $elem = explode('"', $img); $data['url_found'] = $elem[3]; $img_name = pathinfo($data['url_found']); $data['name'] = $img_name['basename']; get_image($data['url_found'], $data['name']); $info = getimagesize($data['name']); $data['width'] = $info[0]; $data['height'] = $info[1]; $data['md5'] = md5_file($data['name']); $title = Grabber::getTagContent($content, '<h3 class="title"', true); $data['title'] = trim(strip_tags($title)); $location = Grabber::getTagContent($content, '<h3 class="location"', true); $data['location'] = trim(strip_tags($location)); $legend = Grabber::getTagContent($content, '<div id="imagecaption" class="imagecaption">', true); $data['legend'] = trim(strip_tags($legend)); $exif = Grabber::getTagContent($content, '<div id="techinfo" class="techinfo">', true); $camera = Grabber::getTagContent($exif, '<span class="camera">', true); $data['camera'] = trim(strip_tags($camera)); $data_exif = Grabber::getTagContents($exif, '<tr', true); $hh = array(); foreach ($data_exif as $line) { $dd = Grabber::getTagContents($line, '<td class=lid', true); if ($dd == false) { continue; } $hh[$dd[0]] = $dd[1]; } $data['exif'] = $hh; echo "<pre>"; print_r($data); echo "</pre>"; }
public static function get_all_size($url) { $content = self::curl($url); $keys = explode('/', $url); $lis = Grabber::getTagContent($content, '<ol class="sizes-list"', true); if ($lis) { $pattern = '#/' . $keys[3] . '/' . $keys[4] . '/' . $keys[5] . '/' . $keys[6] . '/([a-z]{1,2})/#i'; preg_match_all($pattern, $lis, $matches); $tmp['size_available'] = $matches[1]; foreach ($tmp['size_available'] as $size) { if (in_array($size, self::$allowed)) { $tmp['best'] = $size; } } if (empty($tmp['best'])) { return false; } $brut_url = Grabber::getTagContent($content, '<div id="allsizes-photo"', true); $img = Grabber::getTagAttributeValue($brut_url, "src"); $tmp['url']['img'] = str_replace("_s.jpg", self::$size[$tmp['best']] . ".jpg", $img); } else { return false; } return $tmp; }