Пример #1
0
function getProducts()
{
    include "../classes/httpFile.php";
    include "../functions/crawler_functions.php";
    error_reporting(E_ALL ^ E_WARNING);
    $http = new HttpConnection();
    $http->setCookiePath("cookies/");
    $http->init();
    $contador = 0;
    $DOM = new DOMDocument();
    $url_array = explode(PHP_EOL, file_get_contents('../file/cats_list.ccd'));
    $contador = 0;
    while (count($url_array) > 0) {
        $url = array_shift($url_array);
        get_dom($url, $DOM, $http);
        $paginator = $DOM->getElementById('pagination_next_bottom');
        if ($paginator != null) {
            getProductsLinks($DOM);
            $link_paginator = $paginator->getElementsByTagName('a');
            $link_number = $link_paginator->item(0)->getAttribute('href');
            $pages = substr($link_number, -1) . '<br/>';
            for ($i = 2; $i <= $pages; $i++) {
                $http->get($url . '?p=' . $i, true);
                get_dom($url, $DOM, $http);
                getProductsLinks($DOM);
            }
        } else {
            getProductsLinks($DOM);
        }
    }
    $http->close();
}
Пример #2
0
function getProducts()
{
    include "../classes/httpFile.php";
    include "../functions/crawler_functions.php";
    error_reporting(E_ALL ^ E_WARNING);
    $http = new HttpConnection();
    $http->setCookiePath("cookies/");
    $http->init();
    $contador = 0;
    $DOM = new DOMDocument();
    $url_array = explode(PHP_EOL, file_get_contents('../file/cats_list.ccd'));
    while (count($url_array) > 0 && $contador < 400) {
        $url = array_shift($url_array);
        if ($url == null || ($url = " ")) {
            continue;
        }
        get_dom($url, $DOM, $http);
        $paginator = $DOM->getElementById('pagination_next_bottom');
        if ($paginator != null) {
            getProductsLinks($DOM);
            $link_paginator = $paginator->previousSibling;
            while ($link_paginator->nodeType !== 1) {
                $link_paginator = $link_paginator->previousSibling;
            }
            $link_last = $link_paginator->getElementsByTagName('a');
            $link_number = $link_last->item(0)->getAttribute('href');
            $pages = substr($link_number, -1) . '<br/>';
            for ($i = 2; $i <= $pages; $i++) {
                echo $i . '<br />';
                $http->get($url . '?id_category=58&n=9&p=' . $i, true);
                get_dom($url . '?id_category=58&n=9&p=' . $i, $DOM, $http);
                $contador = $contador + getProductsLinks($DOM);
            }
        } else {
            $contador = $contador + getProductsLinks($DOM);
        }
    }
    $http->close();
    if (count($url_array) > 0) {
        file_put_contents("../file/cats_list.ccd", "");
        while (count($url_array) > 0) {
            $my_url = array_shift($url_array);
            file_put_contents("../file/cats_list.ccd", $my_url . PHP_EOL, FILE_APPEND);
        }
    }
    echo "</br>Se han procesado: " . $contador . "  urls</br>";
    echo "Quedan " . count($url_array) . " por procesar<br />";
}