示例#1
0
<?php

include_once __DIR__ . '/../goutte.phar';
$goutte = new Goutte\Client();
$baseURL = 'http://sngapparelinc.com/';
$leggingsEndpoint = 'leggings.html?p=';
$plusSizeEndpoint = 'plus-size-leggings-1215.html?p=';
$winterEndPoint = 'jeggings.html?p=';
$pantsEndpoint = 'pants.html?p=';
$shortsEndpoint = 'shorts.html?p=';
$kidsEndpoint = 'kids.html?p=';
$status_code = 200;
$count = 1;
// Get master links
$data = getLinkEndPoints($goutte, $baseURL . $leggingsEndpoint);
while ($data['status'] == 200) {
    $urls = getUrlFromArray($data['urls']);
    // Get product urls
    foreach ($urls as $url) {
        // print_r($url);exit;
        $status_code = getLink($goutte, $url);
        $sleep_time = rand(3 * 1000000, 4 * 1000000);
        echo "\tSleeping for " . number_format($sleep_time / 1000000, 2) . " sec\n";
        usleep($sleep_time);
    }
}
function getUrlFromArray($data)
{
    if (!is_array($data)) {
        // nothing to do if it's not an array
        return array($data);
示例#2
0
<?php

require_once "config/default_config.php";
includeMyFiles('urlgenerator');
$goutte = new Goutte\Client();
$baseURL = 'https://www.lashowroom.com/';
$womenManuEndpoint = 'women/manufacturers';
//*[@id="lnv_level_content_cat"]/ul
$status_code = 200;
$count = 1;
getLinkEndPoints($goutte, $baseURL . $womenManuEndpoint);
while ($status_code == 200) {
    $status_code = getLink($goutte, $baseURL . $womenManuEndpoint . $count);
    $count++;
    $sleep_time = rand(3 * 1000000, 4 * 1000000);
    echo "\tSleeping for " . number_format($sleep_time / 1000000, 2) . " sec\n";
    usleep($sleep_time);
}
function getLinkEndPoints($goutte, $url)
{
    $crawler = $goutte->request('GET', $url);
    $status_code = $goutte->getResponse()->getStatus();
    if ($status_code == 200) {
        // Plus size
        $domSelector = '//*[@id="lnv_level_content_cat"]/ul';
        $crawler->filterXPath($domSelector)->each(function ($node) {
            $domS = '//li/a';
            $node->filterXPath($domS)->each(function ($node) {
                print_r($node->attr('href'));
                echo "\n";
            });