コード例 #1
0
ファイル: cosmomedica.php プロジェクト: hesselek/parser
function getProducts()
{
    include "../classes/httpFile.php";
    include "../functions/crawler_functions.php";
    error_reporting(E_ALL ^ E_WARNING);
    $http = new HttpConnection();
    $http->setCookiePath("cookies/");
    $http->init();
    $contador = 0;
    $DOM = new DOMDocument();
    $url_array = explode(PHP_EOL, file_get_contents('../file/cats_list.ccd'));
    $contador = 0;
    while (count($url_array) > 0) {
        $url = array_shift($url_array);
        get_dom($url, $DOM, $http);
        $paginator = $DOM->getElementById('pagination_next_bottom');
        if ($paginator != null) {
            getProductsLinks($DOM);
            $link_paginator = $paginator->getElementsByTagName('a');
            $link_number = $link_paginator->item(0)->getAttribute('href');
            $pages = substr($link_number, -1) . '<br/>';
            for ($i = 2; $i <= $pages; $i++) {
                $http->get($url . '?p=' . $i, true);
                get_dom($url, $DOM, $http);
                getProductsLinks($DOM);
            }
        } else {
            getProductsLinks($DOM);
        }
    }
    $http->close();
}
コード例 #2
0
ファイル: megamenu.php プロジェクト: hesselek/parser
function getProducts()
{
    include "../classes/httpFile.php";
    include "../functions/crawler_functions.php";
    error_reporting(E_ALL ^ E_WARNING);
    $http = new HttpConnection();
    $http->setCookiePath("cookies/");
    $http->init();
    $DOM = new DOMDocument();
    $url_array = split(PHP_EOL, file_get_contents('../file/cats_list.ccd'));
    while (count($url_array) > 0) {
        $url = array_shift($url_array);
        $http->get($url, true);
        get_dom($url, $DOM, $http);
        $finder = new DomXPath($DOM);
        $classname = "product-container";
        $product = $finder->query("//*[contains( normalize-space( @class ), ' {$classname} ' )\r\n\t\t  \t\t\tor substring( normalize-space( @class ), 1, string-length( '{$classname}' ) + 1 ) = '{$classname} '\r\n\t\t  \t\t\tor substring( normalize-space( @class ), string-length( @class ) - string-length( '{$classname}' ) ) = ' {$classname}'\r\n\t\t  \t\t\tor @class = '{$classname}']");
        foreach ($product as $p) {
            $enlaces = $p->getElementsByTagName('a');
            $enlace = $enlaces->item(0)->getAttribute('href');
            echo $enlace . '<br/>';
            file_put_contents("../file/url_list.ccd", $enlace . PHP_EOL, FILE_APPEND);
        }
    }
    $http->close();
}
コード例 #3
0
ファイル: top_menu.php プロジェクト: hesselek/parser
function getProducts()
{
    include "../classes/httpFile.php";
    include "../functions/crawler_functions.php";
    error_reporting(E_ALL ^ E_WARNING);
    $http = new HttpConnection();
    $http->setCookiePath("cookies/");
    $http->init();
    $contador = 0;
    $DOM = new DOMDocument();
    $url_array = explode(PHP_EOL, file_get_contents('../file/cats_list.ccd'));
    while (count($url_array) > 0 && $contador < 400) {
        $url = array_shift($url_array);
        if ($url == null || ($url = " ")) {
            continue;
        }
        get_dom($url, $DOM, $http);
        $paginator = $DOM->getElementById('pagination_next_bottom');
        if ($paginator != null) {
            getProductsLinks($DOM);
            $link_paginator = $paginator->previousSibling;
            while ($link_paginator->nodeType !== 1) {
                $link_paginator = $link_paginator->previousSibling;
            }
            $link_last = $link_paginator->getElementsByTagName('a');
            $link_number = $link_last->item(0)->getAttribute('href');
            $pages = substr($link_number, -1) . '<br/>';
            for ($i = 2; $i <= $pages; $i++) {
                echo $i . '<br />';
                $http->get($url . '?id_category=58&n=9&p=' . $i, true);
                get_dom($url . '?id_category=58&n=9&p=' . $i, $DOM, $http);
                $contador = $contador + getProductsLinks($DOM);
            }
        } else {
            $contador = $contador + getProductsLinks($DOM);
        }
    }
    $http->close();
    if (count($url_array) > 0) {
        file_put_contents("../file/cats_list.ccd", "");
        while (count($url_array) > 0) {
            $my_url = array_shift($url_array);
            file_put_contents("../file/cats_list.ccd", $my_url . PHP_EOL, FILE_APPEND);
        }
    }
    echo "</br>Se han procesado: " . $contador . "  urls</br>";
    echo "Quedan " . count($url_array) . " por procesar<br />";
}