Example #1
0
 public function procede()
 {
     if (!$this->oRequest->existParam('key')) {
         throw new Error('Vous devez renseigner la clé.', 3003);
     }
     if ($this->oRequest->getParam('key', 'string') != Config::get('ingestkey')) {
         throw new Error('La clé est invalide.', 3003);
     }
     //Ajoute du titre
     $this->oView->addData('titre', 'Analyse des releases');
     //On récupère les 30 dernières releases
     $oMysqli = Database::getInstance();
     //Traitement de la requête
     $sSqlRequest = "SELECT r.*, \r\n                        (SELECT GROUP_CONCAT(t.id_regex ORDER BY t.id_regex SEPARATOR ';') FROM tks_tags t WHERE t.id_release = r.id) AS tags,\r\n                        (SELECT GROUP_CONCAT(f.date ORDER BY f.date SEPARATOR ';') FROM tks_torrents f WHERE f.id_release = r.id) AS dates\t\r\n                        FROM tks_releases r \r\n                        WHERE r.id_categorie = '0' \r\n                        ORDER BY r.id DESC \r\n                        LIMIT 10";
     $oResults = $oMysqli->query($sSqlRequest);
     $oTable = new TableGenerator();
     $oTable->setId(md5('Scrapper'));
     $oTable->addColumn('Release');
     $oTable->addColumn('Catégorie');
     $oTable->addColumn('ID Fiche');
     $aCategories = Categorie::getCategoriesSelect();
     while ($aResult = $oResults->fetch_assoc()) {
         $oScrapper = new Scrapper($aResult['name'], $aResult['id']);
         $oScrapper->procede();
         $oTable->addLine(array($aResult['name'], $aCategories[$oScrapper->getCategorie()], $oScrapper->getFiche()));
     }
     $oTable->setBottom('');
     $oTable->create();
     $this->oView->addData('content', $oTable->getCode());
     $this->oView->Create();
 }
Example #2
0
 function ejes()
 {
     $ret = array();
     $s = new Scrapper(file_get_contents('http://portal.ejes.com/tapas-del-dia/'));
     $tapas = array('Nacion', 'Herald', 'Pagina', 'Tiempo', 'Pais', 'Clarin', 'Cronista', 'Ambito', 'BAE', 'Perfil', 'Economista', 'Estadista');
     foreach ($s->query('//ul[@class="tapitas"]//a') as $a) {
         foreach ($tapas as $tapa) {
             $diario = str_replace(" ", "", stripAccents($a->text()));
             if (strpos($diario, $tapa) !== false) {
                 $t = new Scrapper(file_get_contents($a->attr('href')));
                 foreach ($t->query('//img') as $img) {
                     if (!isset($ret[$diario])) {
                         $ret[$diario] = $img->attr('src');
                     }
                 }
             }
         }
     }
     return $ret;
 }
Example #3
0
 /**
  * Get radar map image from OMSZ.
  *
  * @link http://www.met.hu/en/idojaras/aktualis_idojaras/radar/
  *
  * @access public
  *
  * @return string $map Full URL of image.
  */
 public function omsz_radar()
 {
     $args = array('type' => 'omsz', 'extension' => '.jpg', 'remote_image_url' => Scrapper::omsz());
     return $this->get_local_current_map_image_url($args);
 }
Example #4
0
 function get_content($url)
 {
     $s = new Scrapper("http://ftr.fivefilters.org/makefulltextfeed.php?url=" . urlencode($url), array('xml'));
     return $s->node('//item/description')->text();
 }
Example #5
0
 /**
  * Get forecast info from SerbianMeteo.
  *
  * @link http://serbianmeteo.com/
  *
  * @access public
  *
  * @return array $forecast Title and content of forecast.
  */
 public function serbianmeteo_forecast()
 {
     // Get forecast info for SerbianMeteo
     $forecast_info = Scrapper::serbianmeteo();
     // Format texts
     if ($forecast_info) {
         $forecast_info['w3c_time'] = date(DATE_W3C, $forecast_info['time']);
         $forecast_info['human_time'] = date_i18n('j. F Y. у H:i', $forecast_info['time']);
         $forecast_info['title'] = $this->to_cyrillic($forecast_info['title']);
         $forecast_info['description'] = $this->format_forecast($forecast_info['description']);
     }
     return $forecast_info;
 }
 public function ProcessScrappingSite()
 {
     // ##### Récupération des donnée #####
     echo "<h1> Recuperation des donnees </h1>";
     $time_all = microtime(true);
     $time_start = microtime(true);
     $Publications = parent::RetrieveDatas($this->urlPublications);
     $time_end = microtime(true);
     $time = round($time_end - $time_start, 2);
     echo "Temps Recuperation Publications: {$time} secondes <br>";
     $time_start = microtime(true);
     $Membres = parent::RetrieveDatas($this->urlMembres);
     $time_end = microtime(true);
     $time = round($time_end - $time_start, 2);
     echo "Temps Recuperation Membres: {$time} secondes <br>";
     $time_start = microtime(true);
     $Equipe = parent::RetrieveDatas($this->urlEquipe);
     $time_end = microtime(true);
     $time = round($time_end - $time_start, 2);
     echo "Temps Recuperation Equipe: {$time} secondes <br>";
     $time = round($time_end - $time_all, 2);
     echo "<br>Temps total : {$time} secondes <br>";
     // ##### Traitement des données #####
     echo "<h1> Traitement des donnees </h1>";
     $finalDatas = new DB_Datas();
     $indice = 0;
     $indiceTeam = 0;
     $indiceUserTeam = 0;
     foreach ($Membres['results']['collection1'] as $membre) {
         if ($indice > 0) {
             # Le premier element est vide pour ainsi dire.
             $nomPrenom = explode(" ", $membre['Nom']['text']);
             //Ajouter un petit trim
             $nom = $nomPrenom[0];
             if (count($nomPrenom) > 1) {
                 $prenom = $nomPrenom[1];
             } else {
                 $prenom = "";
             }
             $fonction = $membre['Profession'];
             $equipe = $membre['Equipe'];
             $descriptionEquipe = "";
             $url_site = "http://www-lipn.univ-paris13.fr/~{$this->formatStringUniforme($nom)}/";
             $telephone = $membre['Téléphone'];
             $email = $this->CorrectionEmail($membre["Email"]);
             $bureau = $membre['Salle'];
             $age = "";
             $idUser = $this->idLaboratory + 100 * $indice;
             if ($equipe != "") {
                 $idTeam = $this->idLaboratory + 100 * ($indiceTeam + 1);
                 $idTeamContained = $this->getIdElementByName($equipe, $finalDatas->teams);
                 if ($idTeamContained == -1) {
                     # Team not already added
                     $finalDatas->teams[$indiceTeam] = new Team($idTeam, $equipe, $descriptionEquipe, $this->idLaboratory);
                     $finalDatas->userTeams[$indiceUserTeam] = new UserTeam($idTeam, $idUser);
                     $indiceTeam++;
                 } else {
                     $finalDatas->userTeams[$indiceUserTeam] = new UserTeam($idTeamContained, $idUser);
                 }
                 $indiceUserTeam++;
             }
             $finalDatas->users[$indice] = new User($idUser, $prenom, $nom, $age, $url_site, $telephone, $email, $bureau, $fonction);
         }
         $indice++;
     }
     $finalDatas->university[0] = new University($this->idUniversity, "Paris 13", "99 Avenue J.B. Clément - 93430 Villetaneuse");
     $finalDatas->laboratory[0] = new Laboratory($this->idLaboratory, "LIPN", "Laboratoire d'Informatique de l'université Paris-Nord", $this->idUniversity);
     ## Partie publications ##
     $indice = 0;
     $indiceUserPub = 0;
     foreach ($Publications['response']['docs'] as $publication) {
         $publicationName = $publication['title_s'][0];
         $date_publication = $publication['producedDate_s'];
         $authors = $publication['authFullName_s'];
         $description = "";
         $url = $publication['uri_s'];
         $idPublication = $this->idLaboratory + 100 * ($indice + 1);
         $finalDatas->publications[$indice] = new Publication($idPublication, $publicationName, $description, $url, $date_publication);
         foreach ($authors as $author) {
             $splitedName = explode(" ", $author);
             $authorFirstName = $splitedName[0];
             $authorLastName = $splitedName[1];
             $idUserExisting = $this->getIdElementByFirstNameAndLastName($authorFirstName, $authorLastName, $finalDatas->users);
             if ($idUserExisting != -1) {
                 $finalDatas->userPublications[$indiceUserPub++] = new UserPublication($idPublication, $idUserExisting);
             }
         }
         $indice++;
     }
     ## Partie Equipe (description) ##
     $indice = 0;
     foreach ($Equipe['results']['collection1'] as $equipe) {
         $indexExistingTeam = $this->getIndexElementByName($equipe['Nom']['text'], $finalDatas->teams);
         if ($indexExistingTeam != -1) {
             $finalDatas->teams[$indexExistingTeam]->Description = $equipe['Description'];
         }
         $indice++;
     }
     return $finalDatas;
 }
Example #7
0
         $t = time() - 7 * 24 * 60 * 60;
         $y = date("Y", $t);
         $m = date("m", $t);
         $d = date("d", $t);
         $t1 = "{$d}/{$m}/{$y}";
         $t = time();
         $y = date("Y", $t);
         $m = date("m", $t);
         $d = date("d", $t);
         $t2 = "{$d}/{$m}/{$y}";
         $s = new Scrapper("{$url}/resultados.html", array('post' => array('search' => $nombre, 'desde' => $t1, 'hasta' => $t2)));
         $primero = true;
         foreach ($s->query('//*[@id="seccion-arriba"]/article//a') as $a) {
             $link = $url . $a->attr('href');
             $title = $a->text();
             $t = new Scrapper($link);
             $author = $t->node('//li[@class="autor"]')->text();
             if (strpos($author, $nombre) !== false) {
                 $content = $t->node('//*[@id="header-noticia"]/h3')->html();
                 $content .= $t->node('//figure[@id="foto"]')->html();
                 $content .= $t->node('//div[@itemprop="articleBody"]')->html();
                 $content = preg_replace('#(?:<br\\s*/?>\\s*?)+#', '</p><p>', $content);
                 $content = preg_replace('#src="/#', 'src="' . $url . '/', $content);
                 $content = preg_replace('#href="/#', 'href="' . $url . '/', $content);
                 $arts[] = new Article($link, $title, $author, $content);
             }
         }
     }
     return $arts;
 };
 $get_content = function (&$art) {
Example #8
0
<?php

/**
 * index file that can be called from command line
 */
require_once 'include/class.scraper.php';
$url = "http://www.sainsburys.co.uk/webapp/wcs/stores/servlet/CategoryDisplay?msg=&langId=44&categoryId=185749&storeId=10151&krypto=XkNJuRN9KLq6n6uH5Mz8vPJfKTPApwG8ucakhoeAQfAFnb5qGoUjeNIcE37XJqsLFKgqJpn0KSE2%0A4jWWsyGCqL9MrFzaSCurcTmEROPW2THpmiThWdfCBMkVMSAzC8evzTjPcnXztC7gZGu6swc%2BYE%2Bg%0Ar8dazzckCO9eiVbSKD7I%2BqoK45FoyfB5vK58kXkI%2FokZVqWZgIcEc7yXITiZeunS4A409YSjTfwF%0A9Y8J5255LV9jsLZBkMAnB%2Fl6zy53JRXhDEg%2BB7w7Lls7d16DjbsU0i4zzK4W15E%2BSTHddb4%3D#langId=44&storeId=10151&catalogId=10137&categoryId=185749&parent_category_rn=12518&top_category=12518&pageSize=20&orderBy=FAVOURITES_FIRST&searchTerm=&beginIndex=0&hideFilters=true";
$scrapper = new Scrapper($url);
$json_array = $scrapper->createJsonArray();
echo '<pre>';
echo json_encode($json_array, JSON_PRETTY_PRINT);
echo "</pre>";
Example #9
0
 function py_lanacion()
 {
     $y = date("Y", $this->t);
     $m = date("m", $this->t);
     $d = date("d", $this->t);
     $url = 'http://www.lanacion.com.py/category/tapa/';
     $s = new Scrapper($url, array('silence'));
     foreach ($s->query('//article//a') as $a) {
         if (strpos($a->attr('href'), "{$y}/{$m}/{$d}") !== false) {
             $s = new Scrapper($a->attr('href'), array('silence'));
             $img = $s->node('//div[@class="newsstand-blog-single-content"]/p//img')->attr('data-lazy-src');
             return '<img src="' . $img . '" style="width:100%;"><br>';
         }
     }
 }
 public function ProcessScrappingSite()
 {
     // ##### Récupération des donnée #####
     echo "<h1> Recuperation des donnees </h1>";
     $time_all = microtime(true);
     /*
     $time_start = microtime(true);
     $Universite = parent::RetrieveDatas( $this->urlUniversite );
     $time_end = microtime(true);
     $time = round ($time_end - $time_start,2);
     echo "Temps Recuperation Universite: $time secondes <br>";
     
     $time_start = microtime(true);
     $Laboratoire = parent::RetrieveDatas( $this->urlLaboratoire );
     $time_end = microtime(true);
     $time = round ($time_end - $time_start,2);
     echo "Temps Recuperation Laboratoire: $time secondes <br>";
     
     
     $time_start = microtime(true);
     $Equipe = parent::RetrieveDatas( $this->urlEquipe );
     $time_end = microtime(true);
     $time = round ($time_end - $time_start,2);
     echo "Temps Recuperation Equipe: $time secondes <br>";
     */
     $time_start = microtime(true);
     $Membres = parent::RetrieveDatas($this->urlMembres);
     $time_end = microtime(true);
     $time = round($time_end - $time_start, 2);
     echo "Temps Recuperation Membres: {$time} secondes <br>";
     /*
     			$time_start = microtime(true);			
     			$Publications = parent::RetrieveDatas( $this->urlPublications );
     			$time_end = microtime(true);
     			$time = round ($time_end - $time_start,2);
     			echo "Temps Recuperation Publications: $time secondes <br>";*/
     $time = round($time_end - $time_all, 2);
     echo "<br>Temps total : {$time} secondes <br>";
     // ##### Traitement des données #####
     echo "<h1> Traitement des donnees </h1>";
     $finalDatas = new DB_Datas();
     //var_dump($Membres);
     $indice = 0;
     $indiceTeam = 0;
     $indiceUserTeam = 0;
     foreach ($Membres['results']['collection1'] as $membre) {
         if ($indice > 0) {
             # Le premier element est vide pour ainsi dire.
             $nomPrenom = explode(" ", $membre['Nom']['text']);
             //Ajouter un petit trim
             $nom = $nomPrenom[0];
             $prenom = $nomPrenom[1];
             $fonction = $membre['Profession'];
             $equipe = $membre['Equipe'];
             $descriptionEquipe = "";
             $url_site = $membre['url'];
             $telephone = $membre['Téléphone'];
             $email = $membre['Email'];
             $bureau = $membre['Salle'];
             $age = "";
             //echo "Nom = { $nom }, Prenom = { $prenom }, Fonction = { $fonction }, Equipe = { $equipe } <br>";
             if ($equipe != "") {
                 $idTeamContained = $this->getIdElementByName($equipe, $finalDatas->teams);
                 if ($idTeamContained == -1) {
                     # Team not already added
                     $finalDatas->teams[$indiceTeam] = new Team($indiceTeam, $equipe, $descriptionEquipe, $this->idLaboratory);
                     $finalDatas->userTeams[$indiceUserTeam] = new UserTeam($indiceTeam, $indice);
                     $indiceTeam++;
                 } else {
                     $finalDatas->userTeams[$indiceUserTeam] = new UserTeam($idTeamContained, $indice);
                 }
                 $indiceUserTeam++;
             }
             $finalDatas->users[$indice] = new User($indice, $prenom, $nom, $age, $url_site, $telephone, $email, $bureau, $fonction);
         }
         $indice++;
     }
     $finalDatas->university[0] = new University($this->idUniversity, "Paris 13", "99 Avenue J.B. Clément - 93430 Villetaneuse");
     $finalDatas->laboratory[0] = new Laboratory($this->idLaboratory, "L2TI", "Laboratoire de traitement et transport de l'information ", $this->idUniversity);
     ## Partie publications ##
     $time_start = microtime(true);
     $Publications = parent::RetrieveDatas($this->urlPublications);
     $time_end = microtime(true);
     $time = round($time_end - $time_start, 2);
     echo "Temps Recuperation Publications: {$time} secondes <br>";
     $indice = 0;
     $indiceUserPub = 0;
     foreach ($Publications['response']['docs'] as $publication) {
         $publicationName = $publication['title_s'][0];
         $date_publication = $publication['producedDate_s'];
         $authors = $publication['authFullName_s'];
         $description = "";
         $url = $publication['uri_s'];
         $finalDatas->publications[$indice] = new Publication($indice, $publicationName, $description, $url, $date_publication);
         //echo "<br><br><br>publicationName = '$publicationName'<br>date_publication = '$date_publication'<br>authors = '$authors'<br><br><br>";
         foreach ($authors as $author) {
             $splitedName = explode(" ", $author);
             $authorFirstName = $splitedName[0];
             $authorLastName = $splitedName[1];
             //echo "nom = $authorFirstName, prenom = $authorLastName";
             $idUserExisting = $this->getIdElementByFirstNameAndLastName($authorFirstName, $authorLastName, $finalDatas->users);
             if ($idUserExisting != -1) {
                 $finalDatas->userPublications[$indiceUserPub++] = new UserPublication($indice, $idUserExisting);
             }
             //echo "<br><br>authorFirstName = '$authorFirstName', authorLastName = '$authorLastName'<br><br>";
         }
         $indice++;
     }
     ## Partie Equipe (description) ##
     $time_start = microtime(true);
     $Equipe = parent::RetrieveDatas($this->urlEquipe);
     $time_end = microtime(true);
     $time = round($time_end - $time_start, 2);
     echo "Temps Recuperation Equipe: {$time} secondes <br>";
     $indice = 0;
     foreach ($Equipe['results']['collection1'] as $equipe) {
         //echo "Team => {$equipe['Nom']['text']}";
         $indexExistingTeam = $this->getIndexElementByName($equipe['Nom']['text'], $finalDatas->teams);
         if ($indexExistingTeam != -1) {
             $finalDatas->teams[$indexExistingTeam]->Description = $equipe['Description'];
             //echo "EquipeDesc = '{$equipe['Nom']['text']}', Description = '{$equipe['Description']}'<br>";
         }
         $indice++;
     }
     #####			   #####
     #####   AFFICHAGEs TESTS   #####
     #####			   #####
     //echo $Membres['results']['collection1'][2]['Equipe'];
     /*
     			$indice = 0;
     			foreach($finalDatas->functions as $function) {
     				echo "Fonction No $indice = '{$function->Name}' <br>";
     				$indice++;
     			}*/
     $indice = 0;
     /*foreach($finalDatas->users as $user) {
     			echo "User No $indice : { Id = '{$user->Id}', FirstName = '{$user->FirstName}', LastName = '{$user->LastName}', Age = '{$user->Age}' , url_site = '{$user->Url}' , Telephone = '{$user->Telephone}' , Email = '{$user->Email}' , Bureau = '{$user->Bureau}' , Profession = '{$user->Profession}' } <br>";
     			$indice++;
     		}*/
     foreach ($finalDatas->teams as $team) {
         echo "Team No {$indice} : { Id = '{$team->Id}', Name = '{$team->Name}', Description = '{$team->Description}', LaboratoryId = '{$team->LaboratoryId}' } <br>";
         $indice++;
     }
     /*
     			echo "<br><br><br>";
     			$indice = 0;
     			foreach($finalDatas->userTeams as $userTeam) {
     				echo "UserTeam No $indice : { TeamId = '{$userTeam->TeamId}', UserId = '{$userTeam->UserId}' } <br>";
     				$indice++;
     			}*/
     /*$indice = 0;
     		foreach($finalDatas->publications as $publi) {
     			echo "Publication No $indice : { Id = '{$publi->Id}', Title = '{$publi->Title}', Description = '{$publi->Description}', Url = '{$publi->Url}', Date_publication = '{$publi->Date_publication}' } <br>";
     			$indice++;
     		}*/
     /*echo "<br><br>";
     		$indice = 0;
     		foreach($finalDatas->userPublications as $publi) {
     			echo "PublicationUser No $indice : { PublicationId = '{$publi->PublicationId}', UserId = '{$publi->UserId}'<br>";
     			$indice++;
     		}*/
     return $finalDatas;
 }