function getProducts() { include "../classes/httpFile.php"; include "../functions/crawler_functions.php"; error_reporting(E_ALL ^ E_WARNING); $http = new HttpConnection(); $http->setCookiePath("cookies/"); $http->init(); $DOM = new DOMDocument(); $url_array = split(PHP_EOL, file_get_contents('../file/cats_list.ccd')); while (count($url_array) > 0) { $url = array_shift($url_array); $http->get($url, true); get_dom($url, $DOM, $http); $finder = new DomXPath($DOM); $classname = "product-container"; $product = $finder->query("//*[contains( normalize-space( @class ), ' {$classname} ' )\r\n\t\t \t\t\tor substring( normalize-space( @class ), 1, string-length( '{$classname}' ) + 1 ) = '{$classname} '\r\n\t\t \t\t\tor substring( normalize-space( @class ), string-length( @class ) - string-length( '{$classname}' ) ) = ' {$classname}'\r\n\t\t \t\t\tor @class = '{$classname}']"); foreach ($product as $p) { $enlaces = $p->getElementsByTagName('a'); $enlace = $enlaces->item(0)->getAttribute('href'); echo $enlace . '<br/>'; file_put_contents("../file/url_list.ccd", $enlace . PHP_EOL, FILE_APPEND); } } $http->close(); }
function getProducts() { include "../classes/httpFile.php"; include "../functions/crawler_functions.php"; error_reporting(E_ALL ^ E_WARNING); $http = new HttpConnection(); $http->setCookiePath("cookies/"); $http->init(); $contador = 0; $DOM = new DOMDocument(); $url_array = explode(PHP_EOL, file_get_contents('../file/cats_list.ccd')); $contador = 0; while (count($url_array) > 0) { $url = array_shift($url_array); get_dom($url, $DOM, $http); $paginator = $DOM->getElementById('pagination_next_bottom'); if ($paginator != null) { getProductsLinks($DOM); $link_paginator = $paginator->getElementsByTagName('a'); $link_number = $link_paginator->item(0)->getAttribute('href'); $pages = substr($link_number, -1) . '<br/>'; for ($i = 2; $i <= $pages; $i++) { $http->get($url . '?p=' . $i, true); get_dom($url, $DOM, $http); getProductsLinks($DOM); } } else { getProductsLinks($DOM); } } $http->close(); }
function getProducts() { $base_url = 'http://www.marcapl.com/marca/'; include "../classes/httpFile.php"; include "../functions/crawler_functions.php"; $http = new HttpConnection(); $http->init('galleta'); $DOM = new DOMDocument(); $url_array = explode(PHP_EOL, file_get_contents('../file/cats_list.ccd')); while (count($url_array) > 0) { $url = array_shift($url_array); if ($url == null || $url == " ") { echo "ENtrando"; continue; } //$url = 'http://www.marcapl.com/marca/index.php?seccion=productos&productos=listado&seccion1=Guantes%20de%20Trabajo&seccion2=Abrigo&i=0'; //$http->get(urlencode($url),false); get_dom($url, $DOM, $http); //$listado = $DOM->getElementById('listado_producto_referencia'); $finder = new DomXPath($DOM); $listado = $finder->query("//*[contains(@id, 'listado_producto_referencia')]"); foreach ($listado as $item) { $link = $item->getElementsByTagName('a'); $enlace = $link->item(0)->getAttribute('href'); if ($link != null) { file_put_contents("../file/url_list.ccd", $base_url . str_replace(" ", "%20", $enlace) . PHP_EOL, FILE_APPEND); } echo $enlace . '<br />'; } } $http->close(); }
function getProducts() { include "../classes/httpFile.php"; include "../functions/crawler_functions.php"; error_reporting(E_ALL ^ E_WARNING); $http = new HttpConnection(); $http->setCookiePath("cookies/"); $http->init(); $contador = 0; $DOM = new DOMDocument(); $url_array = explode(PHP_EOL, file_get_contents('../file/cats_list.ccd')); while (count($url_array) > 0 && $contador < 400) { $url = array_shift($url_array); if ($url == null || ($url = " ")) { continue; } get_dom($url, $DOM, $http); $paginator = $DOM->getElementById('pagination_next_bottom'); if ($paginator != null) { getProductsLinks($DOM); $link_paginator = $paginator->previousSibling; while ($link_paginator->nodeType !== 1) { $link_paginator = $link_paginator->previousSibling; } $link_last = $link_paginator->getElementsByTagName('a'); $link_number = $link_last->item(0)->getAttribute('href'); $pages = substr($link_number, -1) . '<br/>'; for ($i = 2; $i <= $pages; $i++) { echo $i . '<br />'; $http->get($url . '?id_category=58&n=9&p=' . $i, true); get_dom($url . '?id_category=58&n=9&p=' . $i, $DOM, $http); $contador = $contador + getProductsLinks($DOM); } } else { $contador = $contador + getProductsLinks($DOM); } } $http->close(); if (count($url_array) > 0) { file_put_contents("../file/cats_list.ccd", ""); while (count($url_array) > 0) { $my_url = array_shift($url_array); file_put_contents("../file/cats_list.ccd", $my_url . PHP_EOL, FILE_APPEND); } } echo "</br>Se han procesado: " . $contador . " urls</br>"; echo "Quedan " . count($url_array) . " por procesar<br />"; }
$getfield = '?q=%23meat&count=25'; $requestMethod = 'GET'; $twitter = new TwitterAPIExchange($settings); $data_twitter = json_decode($twitter->setGetfield($getfield)->buildOauth($url, $requestMethod)->performRequest(), true); foreach ($data_twitter['statuses'] as $statuse) { //Formato a fecha $date = new DateTime($statuse['created_at']); $date->modify('-3 hours'); $date_format = $date->format('d/m/Y H:i:s'); //Construcción de array para json $data_post[] = array('type' => 'twitter', 'content' => $statuse['text'], 'date' => $date_format, 'likes' => $statuse['favorite_count']); } //fin twitter //Instagram $http = new HttpConnection(); $http->init(); $data_instagram = json_decode($http->get("https://api.instagram.com/v1/tags/meat/media/recent?access_token=44110995.1677ed0.6d87a7ce19f544c99e2912686465de59&count=25"), true); $http->close(); foreach ($data_instagram['data'] as $data) { //Formato a fecha $horas_restantes = 4; $date_format = $data['created_time'] - $horas_restantes * 3600; $date_format = date('d/m/Y H:i:s', $date_format); //Construcción de array para json $data_post[] = array('type' => 'instagram', 'content' => $data['images']['standard_resolution']['url'] . ' ' . $data['caption']['text'], 'date' => $date_format, 'likes' => $data['likes']['count']); } //fin instagram //Router $router = new AltoRouter(); $router->setBasePath(''); $router->map('GET', '/posts', 'posts.php', 'posts');