Example #1
0
function getProducts()
{
    include "../classes/httpFile.php";
    include "../functions/crawler_functions.php";
    error_reporting(E_ALL ^ E_WARNING);
    $http = new HttpConnection();
    $http->setCookiePath("cookies/");
    $http->init();
    $DOM = new DOMDocument();
    $url_array = split(PHP_EOL, file_get_contents('../file/cats_list.ccd'));
    while (count($url_array) > 0) {
        $url = array_shift($url_array);
        $http->get($url, true);
        get_dom($url, $DOM, $http);
        $finder = new DomXPath($DOM);
        $classname = "product-container";
        $product = $finder->query("//*[contains( normalize-space( @class ), ' {$classname} ' )\r\n\t\t  \t\t\tor substring( normalize-space( @class ), 1, string-length( '{$classname}' ) + 1 ) = '{$classname} '\r\n\t\t  \t\t\tor substring( normalize-space( @class ), string-length( @class ) - string-length( '{$classname}' ) ) = ' {$classname}'\r\n\t\t  \t\t\tor @class = '{$classname}']");
        foreach ($product as $p) {
            $enlaces = $p->getElementsByTagName('a');
            $enlace = $enlaces->item(0)->getAttribute('href');
            echo $enlace . '<br/>';
            file_put_contents("../file/url_list.ccd", $enlace . PHP_EOL, FILE_APPEND);
        }
    }
    $http->close();
}
Example #2
0
function getProducts()
{
    include "../classes/httpFile.php";
    include "../functions/crawler_functions.php";
    error_reporting(E_ALL ^ E_WARNING);
    $http = new HttpConnection();
    $http->setCookiePath("cookies/");
    $http->init();
    $contador = 0;
    $DOM = new DOMDocument();
    $url_array = explode(PHP_EOL, file_get_contents('../file/cats_list.ccd'));
    $contador = 0;
    while (count($url_array) > 0) {
        $url = array_shift($url_array);
        get_dom($url, $DOM, $http);
        $paginator = $DOM->getElementById('pagination_next_bottom');
        if ($paginator != null) {
            getProductsLinks($DOM);
            $link_paginator = $paginator->getElementsByTagName('a');
            $link_number = $link_paginator->item(0)->getAttribute('href');
            $pages = substr($link_number, -1) . '<br/>';
            for ($i = 2; $i <= $pages; $i++) {
                $http->get($url . '?p=' . $i, true);
                get_dom($url, $DOM, $http);
                getProductsLinks($DOM);
            }
        } else {
            getProductsLinks($DOM);
        }
    }
    $http->close();
}
Example #3
0
function getProducts()
{
    $base_url = 'http://www.marcapl.com/marca/';
    include "../classes/httpFile.php";
    include "../functions/crawler_functions.php";
    $http = new HttpConnection();
    $http->init('galleta');
    $DOM = new DOMDocument();
    $url_array = explode(PHP_EOL, file_get_contents('../file/cats_list.ccd'));
    while (count($url_array) > 0) {
        $url = array_shift($url_array);
        if ($url == null || $url == " ") {
            echo "ENtrando";
            continue;
        }
        //$url = 'http://www.marcapl.com/marca/index.php?seccion=productos&productos=listado&seccion1=Guantes%20de%20Trabajo&seccion2=Abrigo&i=0';
        //$http->get(urlencode($url),false);
        get_dom($url, $DOM, $http);
        //$listado = $DOM->getElementById('listado_producto_referencia');
        $finder = new DomXPath($DOM);
        $listado = $finder->query("//*[contains(@id, 'listado_producto_referencia')]");
        foreach ($listado as $item) {
            $link = $item->getElementsByTagName('a');
            $enlace = $link->item(0)->getAttribute('href');
            if ($link != null) {
                file_put_contents("../file/url_list.ccd", $base_url . str_replace(" ", "%20", $enlace) . PHP_EOL, FILE_APPEND);
            }
            echo $enlace . '<br />';
        }
    }
    $http->close();
}
Example #4
0
function getProducts()
{
    include "../classes/httpFile.php";
    include "../functions/crawler_functions.php";
    error_reporting(E_ALL ^ E_WARNING);
    $http = new HttpConnection();
    $http->setCookiePath("cookies/");
    $http->init();
    $contador = 0;
    $DOM = new DOMDocument();
    $url_array = explode(PHP_EOL, file_get_contents('../file/cats_list.ccd'));
    while (count($url_array) > 0 && $contador < 400) {
        $url = array_shift($url_array);
        if ($url == null || ($url = " ")) {
            continue;
        }
        get_dom($url, $DOM, $http);
        $paginator = $DOM->getElementById('pagination_next_bottom');
        if ($paginator != null) {
            getProductsLinks($DOM);
            $link_paginator = $paginator->previousSibling;
            while ($link_paginator->nodeType !== 1) {
                $link_paginator = $link_paginator->previousSibling;
            }
            $link_last = $link_paginator->getElementsByTagName('a');
            $link_number = $link_last->item(0)->getAttribute('href');
            $pages = substr($link_number, -1) . '<br/>';
            for ($i = 2; $i <= $pages; $i++) {
                echo $i . '<br />';
                $http->get($url . '?id_category=58&n=9&p=' . $i, true);
                get_dom($url . '?id_category=58&n=9&p=' . $i, $DOM, $http);
                $contador = $contador + getProductsLinks($DOM);
            }
        } else {
            $contador = $contador + getProductsLinks($DOM);
        }
    }
    $http->close();
    if (count($url_array) > 0) {
        file_put_contents("../file/cats_list.ccd", "");
        while (count($url_array) > 0) {
            $my_url = array_shift($url_array);
            file_put_contents("../file/cats_list.ccd", $my_url . PHP_EOL, FILE_APPEND);
        }
    }
    echo "</br>Se han procesado: " . $contador . "  urls</br>";
    echo "Quedan " . count($url_array) . " por procesar<br />";
}
$getfield = '?q=%23meat&count=25';
$requestMethod = 'GET';
$twitter = new TwitterAPIExchange($settings);
$data_twitter = json_decode($twitter->setGetfield($getfield)->buildOauth($url, $requestMethod)->performRequest(), true);
foreach ($data_twitter['statuses'] as $statuse) {
    //Formato a fecha
    $date = new DateTime($statuse['created_at']);
    $date->modify('-3 hours');
    $date_format = $date->format('d/m/Y H:i:s');
    //Construcción de array para json
    $data_post[] = array('type' => 'twitter', 'content' => $statuse['text'], 'date' => $date_format, 'likes' => $statuse['favorite_count']);
}
//fin twitter
//Instagram
$http = new HttpConnection();
$http->init();
$data_instagram = json_decode($http->get("https://api.instagram.com/v1/tags/meat/media/recent?access_token=44110995.1677ed0.6d87a7ce19f544c99e2912686465de59&count=25"), true);
$http->close();
foreach ($data_instagram['data'] as $data) {
    //Formato a fecha
    $horas_restantes = 4;
    $date_format = $data['created_time'] - $horas_restantes * 3600;
    $date_format = date('d/m/Y H:i:s', $date_format);
    //Construcción de array para json
    $data_post[] = array('type' => 'instagram', 'content' => $data['images']['standard_resolution']['url'] . ' ' . $data['caption']['text'], 'date' => $date_format, 'likes' => $data['likes']['count']);
}
//fin instagram
//Router
$router = new AltoRouter();
$router->setBasePath('');
$router->map('GET', '/posts', 'posts.php', 'posts');