public function parseProductDetails(XPathHelper $xph) { $record = reset($xph->xpSubQueries("//div[@id='productDesc']", array('product.name' => "//div[@class='RSTL_RightTitle_Product']", 'product.price_retail' => "//div[@id='rightSideWrapper']/div[@class='RSTR_TopRetail_Product']", 'product.price_listed' => "//div[@id='rightSideWrapper']/div[@class='RSTR_TopValue_Product']/div[@class='pOurPriceM']", 'product.price_sale' => "//div[@id='rightSideWrapper']/div[@class='RSTR_TopValue_Product']/div[@class='pSalePrice']", 'product.url' => "//link[@rel='canonical']/@href"))); $record['product.numeric_details[]'] = $xph->queryValue("//div[@id='productDesc']//div[@id='RSTL_Right_Product']/div[@class='RSTL_RightCount_Product']/div", -1); $record['product.rating'] = $xph->queryValue("//span[@class='BVRRNumber BVRRRatingNumber']"); return $record; }
public function parseProductDetails(XPathHelper $xph) { $record = reset($xph->xpSubQueries("//div[@id='ProductDetails']//div[@class='BlockContent']", array('product.name' => 'h2', 'product.price_retail' => "div[@class='ProductMain']/div[@class='ProductDetailsGrid']/div[@class='DetailRow RetailPrice']/div[@class='Value']", 'product.price_listed' => "div[@class='ProductMain']/div[@class='ProductDetailsGrid']/div[@class='DetailRow PriceRow']/div[@class='Value']/em[@class='ProductPrice VariationProductPrice']", 'product.sku' => "div[@class='ProductMain']/div[@class='ProductDetailsGrid']/div[@class='DetailRow ProductSKU']/div[@class='Value']"))); $record['product.description'] = $xph->queryValue("//div[@id='ProductDescription']/div[@class='ProductDescriptionContainer']"); /** TODO: parse additional data fields for weight, quantity, brand,rating.. * //div[@id='ProductDetails']//div[@class='BlockContent']/div[@class='ProductMain']/div[@class='ProductDetailsGrid']/div[@class='DetailRow'] * will return a list of div[@class='Label'] and div[@class='Value'] children */ return $record; }
public function parseProductDetails(XPathHelper $xph) { //broken page with error message if (substr($xph->queryValue("//div[@class='itemPageWrapper']/text()[5]"), 0, 9) === 'Exception') { return array(); } $result = array(); //Discontinued items have no price. if ($xph->queryValue("//div[@id='unluckyInfo']/div[@id='apology']/h2") === "We're Sorry, We No Longer Carry This Item") { $result = $this->array_merge_first_record($result, $xph->xpSubQueries("//div[@id='unluckyInfo']/div[@id='apology']", array('product.sku_and_upc' => "//div[@class='itemDataCode']"))); return $result; } //regular items $result = $this->array_merge_first_record($result, $xph->xpSubQueries("//div[@class='imgBorderBgNew']", array('product.sku' => "//div[@class='itemDataCode'][span]", 'product.upc' => "//div[@class='itemDataCode'][not(span)]"))); //TRICK: putting array arround collapseLabelValuePairs() to make it compatible with array_merge_first_record() $result = $this->array_merge_first_record($result, array($this->collapseLabelValuePairs($xph->xpSubQueries("//div[@id='tabInfoContent']/table/tbody/tr/td/table/tbody/tr/td/div[@class='itemData']", array('label' => 'b', 'value' => 'text()'))))); $result = $this->array_merge_first_record($result, $xph->xpSubQueries("//div[@id='tabPricingContent']/table/tbody/tr/td/div[@class='pricingDisplay']", array('product.price_retail' => "table/tbody/tr/td/div[@class='retailPrice']", 'product.price_listed' => "table/tbody/tr/td/div[@class='salePrice']|table/tbody/tr/td/div[@class='specialPrice']"))); $result['product.url'] = $xph->getUrl(); return $result; }
public function parseProductDetails(XPathHelper $xph) { return $xph->xpSubQueries("//div[@id='item-spc']", array('product.name' => "div[@id='item-summary']/h1[@class='item-name']", 'product.brand' => "div[@id='item-summary']/h2[@class='item-brand']", 'product.price_listed' => "div[@id='item-attributes']/div[@id='item-price']/div[@id='swanson-price']/b[@class='price']", 'product.price_retail' => "div[@id='item-attributes']/div[@id='item-price']/div[@id='retail-price']/b[@class='price']")); }
public function parseProductSellers(XPathHelper $xph) { return $xph->xpSubQueries("//tr[@class='os-row']", array('seller.name' => "td[@class='os-seller-name']", 'seller.url' => "td[@class='os-seller-name']/span[@class='os-seller-name-primary']/a/@href", 'product.price_listed' => "td[@class='os-price-col']/span[@class='os-base_price']")); }
public function parseOfferMerchantList(XPathHelper $xph) { $imageUrl = $xph->queryValue('//*[@id="olpProductImage"]/a/img/@src'); $result = $xph->xpSubQueries("//div[@id='olpOfferList']/div[@id='olpTabContent']/div/div[2]/div[contains(@class, 'olpOffer')]", array('offer.price' => "div[1]/span[contains(@class, 'olpOfferPrice')]", 'offer.price-shipping' => "div[1]/p/span/span[@class='olpShippingPrice']", 'seller.name' => "div[3]/p[contains(@class, 'olpSellerName')]/span/a", 'seller.logo' => "div[3]/p[contains(@class, 'olpSellerName')]/a/img/@src|div[3]/p[contains(@class, 'olpSellerName')]/span/a/img/@src", 'seller.aboutus' => "div[3]/p[contains(@class, 'olpSellerName')]/a/@href|div[3]/p[contains(@class, 'olpSellerName')]/span/a/@href", 'seller.department_url' => "div[3]/p[contains(@class, 'olpSellerName')]/a/@href")); foreach ($result as &$row) { // add the image url $row['product.image'] = $imageUrl; // assure URL if (strpos($row['seller.aboutus'], '/') == 0) { $row['seller.aboutus'] = 'http://amazon.com' . $row['seller.aboutus']; } //extract merchant id somehow $merchantId = ""; if (strpos($row['seller.aboutus'], 'redirect') !== false) { // if aboutus URL has redirect, it is for a new seller with no reviews $merchantId = $this->extractUriParam('merchantID', $row['seller.aboutus']); } if (empty($merchantId)) { $merchantId = $this->extractUriParam('seller', $row['seller.aboutus']); } if (empty($merchantId)) { preg_match('|.*/shops/([^/]+)|', $row['seller.aboutus'], $matches); if (!empty($matches[1])) { $merchantId = $matches[1]; } } // use the dependable page if we can if (!empty($merchantId)) { $row['seller.aboutus'] = "http://www.amazon.com/gp/aag/main?ie=UTF8&seller=" . $merchantId; $row['seller.seller_id'] = $merchantId; } // Seller Name is mandatory data. Crawl an extra page if we must if (empty($row['seller.name'])) { if (!empty($merchantId)) { $xpath_helper = $this->openHref($row['seller.aboutus']); $row['seller.name'] = $xpath_helper->queryValue("//h1"); } // follow the seller image link as a last resort if (empty($row['seller.name'])) { $xpath_helper = $this->openHref($row['seller.department_url']); $row['seller.name'] = $xpath_helper->queryValue("//*[@id='s-result-count']/span|//title|//h1"); $row['seller.name'] = preg_replace('/ Storefront$/', '', $row['seller.name']); $row['seller.name'] = preg_replace('/: Online Shopping for Electronics, Apparel, Computers, Books, DVDs & more/', '', $row['seller.name']); } } unset($row['seller.department_url']); } return $result; }
public function parseSearchResult(XPathHelper $xph) { return $xph->xpSubQueries("//div[@class='category-products']/ul[@class]/li", array('product.name' => "h2[@class='product-name']", 'product.url' => "h2[@class='product-name']/a/@href", 'product.price_listed' => "div[@class='price-box']/p[@class='special-price']/span[@class='price']", 'product.price_retail' => "div[@class='price-box']/p[@class='old-price']/span[@class='price']")); }
public function parseSearchResult(XPathHelper $xph) { return $xph->xpSubQueries("//div[@id='categoryTable']/div[@class='row']", array('product.name' => "div[@class='rowInfo']/div[@class='rowWrapper']/div[@class='rowInfoA']/h3/a", 'product.url' => "div[@class='rowInfo']/div[@class='rowWrapper']/div[@class='rowInfoA']/h3/a/@href", 'product.price_listed' => "div[@class='rowInfo']/div[@class='rowWrapper']/div[@class='rowInfoB']/div/div[@id='priceLabelContainer']/div[@class='saleValue-Price-Search']", 'product.price_retail' => "div[@class='rowInfo']/div[@class='rowWrapper']/div[@class='rowInfoB']/div/div[@id='priceLabelContainer']/div[@class='listRegular-Price'][1]")); }
public function parseSearchResult(XPathHelper $xph) { return $xph->xpSubQueries("//div[@id='display-results-content']/div[@class='prodSlotWide']", array('product.name' => "p[@class='description']", 'product.url' => "p[@class='description']/a/@href", 'product.rating' => "div[@class='details']/div[@class='starsAndPrice']/span/a/img/@title", 'product.price_listed' => "div[@class='details']/div[@class='starsAndPrice']/span[@class='price']/text()[1]", 'product.price_retail' => "div[@class='details']/div[@class='starsAndPrice']/span[@class='price']/span[@class='crossed-out-price']")); }
public function parseSearchResult(XPathHelper $xph) { return $xph->xpSubQueries("//div[@class='productList']/div[1]/div[@class='itemCell']", array('product.name' => "div[@class='itemText']/div[@class='wrapper']/a/span[@class='itemDescription' and starts-with(@id, 'title')]", 'product.url' => "div[@class='itemText']/div[@class='wrapper']/a/@href", 'product.sku' => "div[@class='itemText']/ul[@class='featureList']/li[contains(., 'Model #:')]/text()", 'product.rating' => "div[@class='itemGraphics']/a[@class='itemRating']/@title")); }