public function web_scraping($array) { $ri = new resizeImage(); $pa = new postarticle(); // $get = $array['get']; // $url = $array['url']; // $this->print_r1( $array ); // init arrays $get = array(); $response = array(); $img = array(); $video = array(); // init size $size['image']['width'] = 'auto'; $size['image']['height'] = 'auto'; $size['video']['height'] = 'auto'; $size['video']['width'] = 'auto'; // init attr $title = ''; $description = ''; $keyword = ''; // clean session unset($_SESSION['article_image']); unset($_SESSION['article_title']); unset($_SESSION['article_description']); unset($_SESSION['article_keyword']); unset($_SESSION['article_video']); // initialized get if (!empty($array['get'])) { foreach ($array['get'] as $value) { // $data[$value] = $value; // echo " $value <br> "; $get[$value] = $value; } } // print_r($response); // initialize size if (!empty($array['size'])) { foreach ($array['size'] as $key => $value) { // $data[$value] = $value; $height = !empty($value['height']) ? $value['height'] : 'auto'; $width = !empty($value['width']) ? $value['width'] : 'auto'; // echo " $key width $width , height $height <br> "; $size[$key]['height'] = $height; $size[$key]['width'] = $width; } } // initialize url $url = !empty($array['url']) ? $array['url'] : ''; echo " url {$url} <br>"; // initialized limit $limit = !empty($array['limit']) ? $array['limit'] : 10; echo " limit {$limit} <br>"; // initialized general data if (!empty($get['image'])) { // echo $get['image'].' height = '.$size['image']['height'].' width = '.$size['image']['width'].'<br>'; } if (!empty($get['video'])) { // echo $get['video'].' height = '.$size['video']['height'].' width = '.$size['video']['width'].'<br>'; unset($_SESSION['article_video']); } // initialize curl $counter = 0; $images = array(); // echo " scraping <br>"; $ch = curl_init("{$url}"); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $cl = curl_exec($ch); $dom = new DOMDocument(); @$dom->loadHTML($cl); $images = $dom->getElementsByTagName('img'); $videos = $dom->getElementsByTagName('iframe'); $paharagraph = $dom->getElementsByTagName('p'); $embededs = $dom->getElementsByTagName('embed'); // modal image if (!empty($get['image'])) { // init $height = $size['image']['height']; $width = $size['image']['width']; $c = 0; $ph = ''; $t[0]['title'] = ''; $t[1]['title'] = ''; $t[2]['title'] = ''; $t[3]['title'] = ''; // retrieve keyword #code to retrieve keyword here // retrieve title $title = $pa->get_title_in_a_website($url); if (empty($title)) { $title1 = $dom->getElementsByTagName('h1'); $title2 = $dom->getElementsByTagName('h2'); $title3 = $dom->getElementsByTagName('h3'); $title4 = $dom->getElementsByTagName('h4'); $c = 0; // echo "<h1> title1 </h1>"; foreach ($title1 as $text) { if ($c == 0) { $t[0]['title'] = $text->nodeValue . ' <br> '; } break; $c++; } $c = 0; // echo "<h1> title2 </h1>"; foreach ($title2 as $text) { if ($c == 0) { $t[1]['title'] = $text->nodeValue . ' <br> '; } break; $c++; } // $c=0; print_r(expression) // echo "<h1> title3 </h1>"; foreach ($title3 as $text) { if ($c == 0) { $t[2]['title'] = $text->nodeValue . ' <br> '; } break; $c++; } $c = 0; // echo "<h1> title4 </h1>"; foreach ($title4 as $text) { if ($c == 0) { $t[3]['title'] = $text->nodeValue . ' <br> '; } break; $c++; } $x[0] = strlen($t[0]['title']); $x[1] = strlen($t[1]['title']); $x[2] = strlen($t[2]['title']); $x[3] = strlen($t[3]['title']); $s = $x[0]; for ($i = 1; $i < count($x); $i++) { if ($s < $x[$i]) { $s = $x[$i]; $title = $t[$i]['title']; } } } // retrieve description #code to retrieved description here in the main site meta $c = 0; // echo "<h1> paharagraph </h1>"; foreach ($paharagraph as $p) { $ph[$c]['description'] = $p->nodeValue . ' <br> '; $c++; } // print_r($ph); // get greatest value desc $s = !empty($ph[0]['description']) ? strlen($ph[0]['description']) : 0; // $description = ''; for ($i = 1; $i < count($ph); $i++) { $len = !empty($ph[$i]['description']) ? strlen($ph[$i]['description']) : 0; if ($s < $len) { $s = $len; $description = $ph[$i]['description']; } } // print description // echo " description : $description <br> "; $_SESSION['article_title'] = $title; $_SESSION['article_description'] = $description; $_SESSION['article_keyword'] = $keyword; // assign image $c = 0; foreach ($images as $image) { $imgSrc = $image->getAttribute('src'); // echo " $imgSrc <br> "; // echo " <img src='$imgSrc' style='width:$width; height:$height' /> "; #get image foreach ($array['accept_image'] as $url) { # foreach for the allowed extention if (strpos($imgSrc, $url)) { # check if the extention allowed is exist if ($c < $limit) { # set limit how many modals show $imgSrc = $pa->add_img_url_if_dont_have($imgSrc, $url); # add url link is only direct to the folder if ($pa->url_exists($imgSrc)) { # check if the image exist // $ri->load( $imgSrc ); // $img_width = $ri->getWidth(); // if ( $img_width > $width ) { // $jpg = intval( strpos($imgSrc, '.jpg') ); // if ( $jpg > 0 ) { #only allows if jpg $img[$c] = $imgSrc; $c++; // echo " pos $jpg $imgSrc <br>"; // } // else { // echo " not jpg pos $jpg"; // } // } } } } } } $_SESSION['article_image'] = $img; echo " after foreach "; // print_r($_SESSION['article_image']); } // modal video if (!empty($get['video'])) { $height = $size['video']['height']; $width = $size['video']['width']; /* vimeo url http://vube.com/trending/Aj+Silva+Music/0dPsIWYjoj?t=s http://vube.com/trending/Andrea+Kaden/W8nM24DqUw?t=s http://vube.com/trending/Andrea+Kaden/oFzAdU9Dqe youtube embeded: //www.youtube.com/embed/1oju14nG5vo url: https://www.youtube.com/watch?v=1oju14nG5vo https://www.youtube.com/watch?v=zC617kE1maU https://www.youtube.com/watch?v=sR8rlTIU8_Y video_id = 1oju14nG5vo vimeo https://vimeo.com/channels/staffpicks/102550866 https://vimeo.com/channels/staffpicks/102399221 https://vimeo.com/22506211 */ $c = 0; // iframe foreach ($embededs as $video) { $vidSrc = $video->getAttribute('src'); foreach ($array['accept_video'] as $url) { if (strpos($vidSrc, $url)) { echo " video src = " . $vidSrc . '<br>'; $vid[$c] = $vidSrc; // $vid[$c]['id'] = '123123'; $c++; } } } // embeded foreach ($videos as $video) { $vidSrc = $video->getAttribute('src'); foreach ($array['accept_video'] as $url) { if (strpos($vidSrc, $url)) { echo " video src = " . $vidSrc . '<br>'; $vid[$c] = $vidSrc; // $vid[$c]['id'] = '123123'; $c++; } } } $_SESSION['article_video'] = $vid; } }
require "../../../fs_folders/php_functions/connect.php"; require "../../../fs_folders/php_functions/function.php"; require "../../../fs_folders/php_functions/myclass.php"; require "../../../fs_folders/php_functions/library.php"; require "../../../fs_folders/php_functions/source.php"; $mc = new myclass(); $pa = new postarticle(); $ri = new resizeImage(); $url = $_GET['url']; $url = str_replace(' ', '.', $url); if (empty($url)) { $url = 'http://freelancersfashion.blogspot.com/'; } $bool = true; if ($bool) { if ($pa->url_exists($url)) { if ($pa->with_http($url)) { // echo "http" /* $content = file_get_contents($url); $imagesArray = $pa->retrieve_images_from_url( $content , $mc , $pa , $ri ); */ echo " url1 = {$url} <br>"; // $article_len = count($article); echo "<hr>"; if (strpos($url, 'youtube.com')) { // $pa->test_get_images_from_url( $url ); $video_array = $pa->video($url); // echo "youtube"; echo ' <h1> "' . $video_array[0]['vtitle'] . '"</h1>