<?php include_once __DIR__ . '/../goutte.phar'; $goutte = new Goutte\Client(); $baseURL = 'http://sngapparelinc.com/'; $leggingsEndpoint = 'leggings.html?p='; $plusSizeEndpoint = 'plus-size-leggings-1215.html?p='; $winterEndPoint = 'jeggings.html?p='; $pantsEndpoint = 'pants.html?p='; $shortsEndpoint = 'shorts.html?p='; $kidsEndpoint = 'kids.html?p='; $status_code = 200; $count = 1; // Get master links $data = getLinkEndPoints($goutte, $baseURL . $leggingsEndpoint); while ($data['status'] == 200) { $urls = getUrlFromArray($data['urls']); // Get product urls foreach ($urls as $url) { // print_r($url);exit; $status_code = getLink($goutte, $url); $sleep_time = rand(3 * 1000000, 4 * 1000000); echo "\tSleeping for " . number_format($sleep_time / 1000000, 2) . " sec\n"; usleep($sleep_time); } } function getUrlFromArray($data) { if (!is_array($data)) { // nothing to do if it's not an array return array($data);
<?php require_once "config/default_config.php"; includeMyFiles('urlgenerator'); $goutte = new Goutte\Client(); $baseURL = 'https://www.lashowroom.com/'; $womenManuEndpoint = 'women/manufacturers'; //*[@id="lnv_level_content_cat"]/ul $status_code = 200; $count = 1; getLinkEndPoints($goutte, $baseURL . $womenManuEndpoint); while ($status_code == 200) { $status_code = getLink($goutte, $baseURL . $womenManuEndpoint . $count); $count++; $sleep_time = rand(3 * 1000000, 4 * 1000000); echo "\tSleeping for " . number_format($sleep_time / 1000000, 2) . " sec\n"; usleep($sleep_time); } function getLinkEndPoints($goutte, $url) { $crawler = $goutte->request('GET', $url); $status_code = $goutte->getResponse()->getStatus(); if ($status_code == 200) { // Plus size $domSelector = '//*[@id="lnv_level_content_cat"]/ul'; $crawler->filterXPath($domSelector)->each(function ($node) { $domS = '//li/a'; $node->filterXPath($domS)->each(function ($node) { print_r($node->attr('href')); echo "\n"; });