示例#1
0
文件: test.php 项目: GeekAb/Crawlers
function getProductData($goutte, $url)
{
    $crawler = $goutte->request('GET', $url);
    $status_code = $goutte->getResponse()->getStatus();
    $data = array();
    $result = array();
    if ($status_code == 200) {
        $filterNav = '//*[@class="item-detail-header"]/p/a';
        $catStruct = $crawler->filterXPath($filterNav)->each(function ($node) {
            return $node->html();
        });
        foreach ($catStruct as $key => $catVal) {
            $catStruct[$key] = trim(str_replace(' ', '', $catVal));
        }
        $result['category'] = $catStruct[1];
        $result['subCategory'] = $catStruct[2];
        // Master Images
        $domSelector = '//*[@id="item_detail_image_display"]';
        $data['masterImage'] = $crawler->filterXPath($domSelector)->each(function ($node) {
            return $node->attr('src');
        });
        /*Model Text*/
        $domSelector = '//*[@class="item-detail-main-content-image-thumbnail-disclaimer"]/p';
        $data['model_text'] = $crawler->filterXPath($domSelector)->each(function ($node) {
            return $node->html();
        });
        /*Other Images*/
        $domSelector = '//*[@class="item-detail-main-content-image-thumbnail"]/ul/li/a';
        $data['otherImages'] = $crawler->filterXPath($domSelector)->each(function ($node) {
            return $node->attr('href');
        });
        /*Product Data*/
        $domSelector = '//*[@class="item-detail-main-content-details clearfix"]';
        $data['data'] = $crawler->filterXPath($domSelector)->each(function ($node) {
            $data = array();
            $data['otherData'] = $node->html();
            /*Style Number*/
            $domSelector = '//h2';
            $data['style_no'] = $node->filterXPath($domSelector)->each(function ($node) {
                return trim($node->text());
            });
            return $data;
        });
        $result = array_merge($result, formatProductData($data));
        return $result;
    }
}
示例#2
0
function getProductData($goutte, $url)
{
    $crawler = $goutte->request('GET', $url);
    $status_code = $goutte->getResponse()->getStatus();
    $data = array();
    $result = array();
    if ($status_code == 200) {
        $filterNav = '//*[@id="store_item"]/div[1]/a';
        $catStruct = $crawler->filterXPath($filterNav)->each(function ($node) {
            return $node->html();
        });
        foreach ($catStruct as $key => $catVal) {
            $catStruct[$key] = trim(str_replace(' ', '', $catVal));
        }
        $result['category'] = $catStruct[1];
        $result['subCategory'] = $catStruct[2];
        // Master Images
        $domSelector = '//*[@id="store_item_detail_image"]';
        $data['masterImage'] = $crawler->filterXPath($domSelector)->each(function ($node) {
            return $node->attr('src');
        });
        /*Model Text*/
        $domSelector = '//*[@id="store_item_detail_l"]/div[1]';
        $data['model_text'] = $crawler->filterXPath($domSelector)->each(function ($node) {
            return $node->html();
        });
        /*Other Images*/
        $domSelector = '//*[@id="more_view_box"]/ul/li/a';
        $data['otherImages'] = $crawler->filterXPath($domSelector)->each(function ($node) {
            return $node->attr('href');
        });
        /*Product Data*/
        $domSelector = '//*[@id="store_item_detail_r"]';
        $data['data'] = $crawler->filterXPath($domSelector)->each(function ($node) {
            $data = array();
            /*Style Number*/
            $domSelector = '//*[@id="store_item_description"]/h1';
            $data['style_no'] = $node->filterXPath($domSelector)->each(function ($node) {
                return trim($node->text());
            });
            /*Description*/
            $domSelector = '//*[@id="store_item_description"]/table';
            $data['description'] = $node->filterXPath($domSelector)->each(function ($node) {
                return $node->html();
            });
            /*Price Info*/
            $domSelector = '//*[@id="store_item_price"]';
            $data['priceInfo'] = $node->filterXPath($domSelector)->each(function ($node) {
                return $node->html();
            });
            /*Color Options*/
            $domSelector = '//*[@id="item_order_form"]/div[2]/table/tbody/tr/th/a';
            $data['colors'] = $node->filterXPath($domSelector)->each(function ($node) {
                return $node->text();
            });
            return $data;
        });
        $result = array_merge($result, formatProductData($data));
        return $result;
    }
}