示例#1
0
 public function web_scraping($array)
 {
     $ri = new resizeImage();
     $pa = new postarticle();
     // $get = $array['get'];
     // $url = $array['url'];
     // $this->print_r1( $array );
     // init arrays
     $get = array();
     $response = array();
     $img = array();
     $video = array();
     // init size
     $size['image']['width'] = 'auto';
     $size['image']['height'] = 'auto';
     $size['video']['height'] = 'auto';
     $size['video']['width'] = 'auto';
     // init attr
     $title = '';
     $description = '';
     $keyword = '';
     // clean session
     unset($_SESSION['article_image']);
     unset($_SESSION['article_title']);
     unset($_SESSION['article_description']);
     unset($_SESSION['article_keyword']);
     unset($_SESSION['article_video']);
     // initialized get
     if (!empty($array['get'])) {
         foreach ($array['get'] as $value) {
             // $data[$value] = $value;
             // echo " $value  <br> ";
             $get[$value] = $value;
         }
     }
     // print_r($response);
     // initialize size
     if (!empty($array['size'])) {
         foreach ($array['size'] as $key => $value) {
             // $data[$value] = $value;
             $height = !empty($value['height']) ? $value['height'] : 'auto';
             $width = !empty($value['width']) ? $value['width'] : 'auto';
             // echo " $key width $width , height $height <br> ";
             $size[$key]['height'] = $height;
             $size[$key]['width'] = $width;
         }
     }
     // initialize url
     $url = !empty($array['url']) ? $array['url'] : '';
     echo " url {$url} <br>";
     // initialized limit
     $limit = !empty($array['limit']) ? $array['limit'] : 10;
     echo " limit {$limit} <br>";
     // initialized general data
     if (!empty($get['image'])) {
         // echo $get['image'].' height = '.$size['image']['height'].' width = '.$size['image']['width'].'<br>';
     }
     if (!empty($get['video'])) {
         // echo $get['video'].' height = '.$size['video']['height'].' width = '.$size['video']['width'].'<br>';
         unset($_SESSION['article_video']);
     }
     // initialize curl
     $counter = 0;
     $images = array();
     // echo " scraping <br>";
     $ch = curl_init("{$url}");
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
     $cl = curl_exec($ch);
     $dom = new DOMDocument();
     @$dom->loadHTML($cl);
     $images = $dom->getElementsByTagName('img');
     $videos = $dom->getElementsByTagName('iframe');
     $paharagraph = $dom->getElementsByTagName('p');
     $embededs = $dom->getElementsByTagName('embed');
     // modal image
     if (!empty($get['image'])) {
         // init
         $height = $size['image']['height'];
         $width = $size['image']['width'];
         $c = 0;
         $ph = '';
         $t[0]['title'] = '';
         $t[1]['title'] = '';
         $t[2]['title'] = '';
         $t[3]['title'] = '';
         // retrieve keyword
         #code to retrieve keyword here
         // retrieve title
         $title = $pa->get_title_in_a_website($url);
         if (empty($title)) {
             $title1 = $dom->getElementsByTagName('h1');
             $title2 = $dom->getElementsByTagName('h2');
             $title3 = $dom->getElementsByTagName('h3');
             $title4 = $dom->getElementsByTagName('h4');
             $c = 0;
             // echo "<h1> title1 </h1>";
             foreach ($title1 as $text) {
                 if ($c == 0) {
                     $t[0]['title'] = $text->nodeValue . ' <br> ';
                 }
                 break;
                 $c++;
             }
             $c = 0;
             // echo "<h1> title2 </h1>";
             foreach ($title2 as $text) {
                 if ($c == 0) {
                     $t[1]['title'] = $text->nodeValue . ' <br> ';
                 }
                 break;
                 $c++;
             }
             // $c=0; print_r(expression)
             // echo "<h1> title3 </h1>";
             foreach ($title3 as $text) {
                 if ($c == 0) {
                     $t[2]['title'] = $text->nodeValue . ' <br> ';
                 }
                 break;
                 $c++;
             }
             $c = 0;
             // echo "<h1> title4 </h1>";
             foreach ($title4 as $text) {
                 if ($c == 0) {
                     $t[3]['title'] = $text->nodeValue . ' <br> ';
                 }
                 break;
                 $c++;
             }
             $x[0] = strlen($t[0]['title']);
             $x[1] = strlen($t[1]['title']);
             $x[2] = strlen($t[2]['title']);
             $x[3] = strlen($t[3]['title']);
             $s = $x[0];
             for ($i = 1; $i < count($x); $i++) {
                 if ($s < $x[$i]) {
                     $s = $x[$i];
                     $title = $t[$i]['title'];
                 }
             }
         }
         // retrieve description
         #code to retrieved description here in the main site meta
         $c = 0;
         // echo "<h1> paharagraph </h1>";
         foreach ($paharagraph as $p) {
             $ph[$c]['description'] = $p->nodeValue . ' <br> ';
             $c++;
         }
         // print_r($ph);
         // get greatest value desc
         $s = !empty($ph[0]['description']) ? strlen($ph[0]['description']) : 0;
         // $description = '';
         for ($i = 1; $i < count($ph); $i++) {
             $len = !empty($ph[$i]['description']) ? strlen($ph[$i]['description']) : 0;
             if ($s < $len) {
                 $s = $len;
                 $description = $ph[$i]['description'];
             }
         }
         // print description
         // echo " description : $description <br> ";
         $_SESSION['article_title'] = $title;
         $_SESSION['article_description'] = $description;
         $_SESSION['article_keyword'] = $keyword;
         // assign image
         $c = 0;
         foreach ($images as $image) {
             $imgSrc = $image->getAttribute('src');
             // echo " $imgSrc <br> ";
             // echo " <img src='$imgSrc' style='width:$width; height:$height' /> ";
             #get image
             foreach ($array['accept_image'] as $url) {
                 # foreach for the allowed extention
                 if (strpos($imgSrc, $url)) {
                     # check if the extention allowed is exist
                     if ($c < $limit) {
                         # set limit how many modals show
                         $imgSrc = $pa->add_img_url_if_dont_have($imgSrc, $url);
                         # add url link is only direct to the folder
                         if ($pa->url_exists($imgSrc)) {
                             # check if the image exist
                             // $ri->load( $imgSrc );
                             // $img_width  = $ri->getWidth();
                             // if ( $img_width > $width  ) {
                             // $jpg = intval( strpos($imgSrc, '.jpg') );
                             // if ( $jpg > 0 ) {    #only allows if jpg
                             $img[$c] = $imgSrc;
                             $c++;
                             // echo " pos $jpg $imgSrc <br>";
                             // }
                             // else  {
                             // echo " not jpg pos $jpg";
                             // }
                             // }
                         }
                     }
                 }
             }
         }
         $_SESSION['article_image'] = $img;
         echo " after foreach ";
         // print_r($_SESSION['article_image']);
     }
     // modal video
     if (!empty($get['video'])) {
         $height = $size['video']['height'];
         $width = $size['video']['width'];
         /*
         					        		vimeo 
         		url 
         
         			http://vube.com/trending/Aj+Silva+Music/0dPsIWYjoj?t=s
         			http://vube.com/trending/Andrea+Kaden/W8nM24DqUw?t=s
         			http://vube.com/trending/Andrea+Kaden/oFzAdU9Dqe    								 
          
         
         	youtube 
         		embeded: 
         			//www.youtube.com/embed/1oju14nG5vo
         		url: 
         			https://www.youtube.com/watch?v=1oju14nG5vo
         			https://www.youtube.com/watch?v=zC617kE1maU
         			https://www.youtube.com/watch?v=sR8rlTIU8_Y 
         
         		video_id = 	1oju14nG5vo	 
         	 								vimeo 
         		
         		 								https://vimeo.com/channels/staffpicks/102550866
         		 								https://vimeo.com/channels/staffpicks/102399221
         		 								https://vimeo.com/22506211
         */
         $c = 0;
         // iframe
         foreach ($embededs as $video) {
             $vidSrc = $video->getAttribute('src');
             foreach ($array['accept_video'] as $url) {
                 if (strpos($vidSrc, $url)) {
                     echo " video src = " . $vidSrc . '<br>';
                     $vid[$c] = $vidSrc;
                     // $vid[$c]['id']  =  '123123';
                     $c++;
                 }
             }
         }
         // embeded
         foreach ($videos as $video) {
             $vidSrc = $video->getAttribute('src');
             foreach ($array['accept_video'] as $url) {
                 if (strpos($vidSrc, $url)) {
                     echo " video src = " . $vidSrc . '<br>';
                     $vid[$c] = $vidSrc;
                     // $vid[$c]['id']  =  '123123';
                     $c++;
                 }
             }
         }
         $_SESSION['article_video'] = $vid;
     }
 }
示例#2
0
文件: test.php 项目: jesus143/fs-dev
require "../../../fs_folders/php_functions/connect.php";
require "../../../fs_folders/php_functions/function.php";
require "../../../fs_folders/php_functions/myclass.php";
require "../../../fs_folders/php_functions/library.php";
require "../../../fs_folders/php_functions/source.php";
$mc = new myclass();
$pa = new postarticle();
$ri = new resizeImage();
$url = $_GET['url'];
$url = str_replace(' ', '.', $url);
if (empty($url)) {
    $url = 'http://freelancersfashion.blogspot.com/';
}
$bool = true;
if ($bool) {
    if ($pa->url_exists($url)) {
        if ($pa->with_http($url)) {
            // echo "http"
            /*
            $content = file_get_contents($url);
            $imagesArray = $pa->retrieve_images_from_url( $content , $mc , $pa , $ri );
            */
            echo " url1 = {$url} <br>";
            // $article_len =  count($article);
            echo "<hr>";
            if (strpos($url, 'youtube.com')) {
                // $pa->test_get_images_from_url( $url );
                $video_array = $pa->video($url);
                // echo "youtube";
                echo ' 	
								<h1> "' . $video_array[0]['vtitle'] . '"</h1>