Ejemplo n.º 1
0
 public function parseProductDetails(XPathHelper $xph)
 {
     $record = reset($xph->xpSubQueries("//div[@id='productDesc']", array('product.name' => "//div[@class='RSTL_RightTitle_Product']", 'product.price_retail' => "//div[@id='rightSideWrapper']/div[@class='RSTR_TopRetail_Product']", 'product.price_listed' => "//div[@id='rightSideWrapper']/div[@class='RSTR_TopValue_Product']/div[@class='pOurPriceM']", 'product.price_sale' => "//div[@id='rightSideWrapper']/div[@class='RSTR_TopValue_Product']/div[@class='pSalePrice']", 'product.url' => "//link[@rel='canonical']/@href")));
     $record['product.numeric_details[]'] = $xph->queryValue("//div[@id='productDesc']//div[@id='RSTL_Right_Product']/div[@class='RSTL_RightCount_Product']/div", -1);
     $record['product.rating'] = $xph->queryValue("//span[@class='BVRRNumber BVRRRatingNumber']");
     return $record;
 }
Ejemplo n.º 2
0
 public function parseProductDetails(XPathHelper $xph)
 {
     $record = reset($xph->xpSubQueries("//div[@id='ProductDetails']//div[@class='BlockContent']", array('product.name' => 'h2', 'product.price_retail' => "div[@class='ProductMain']/div[@class='ProductDetailsGrid']/div[@class='DetailRow RetailPrice']/div[@class='Value']", 'product.price_listed' => "div[@class='ProductMain']/div[@class='ProductDetailsGrid']/div[@class='DetailRow PriceRow']/div[@class='Value']/em[@class='ProductPrice VariationProductPrice']", 'product.sku' => "div[@class='ProductMain']/div[@class='ProductDetailsGrid']/div[@class='DetailRow ProductSKU']/div[@class='Value']")));
     $record['product.description'] = $xph->queryValue("//div[@id='ProductDescription']/div[@class='ProductDescriptionContainer']");
     /** TODO: parse additional data fields for weight, quantity, brand,rating..
      *	//div[@id='ProductDetails']//div[@class='BlockContent']/div[@class='ProductMain']/div[@class='ProductDetailsGrid']/div[@class='DetailRow']
      *	will return a list of div[@class='Label'] and div[@class='Value'] children 
      */
     return $record;
 }
 public function parseProductDetails(XPathHelper $xph)
 {
     //broken page with error message
     if (substr($xph->queryValue("//div[@class='itemPageWrapper']/text()[5]"), 0, 9) === 'Exception') {
         return array();
     }
     $result = array();
     //Discontinued items have no price.
     if ($xph->queryValue("//div[@id='unluckyInfo']/div[@id='apology']/h2") === "We're Sorry, We No Longer Carry This Item") {
         $result = $this->array_merge_first_record($result, $xph->xpSubQueries("//div[@id='unluckyInfo']/div[@id='apology']", array('product.sku_and_upc' => "//div[@class='itemDataCode']")));
         return $result;
     }
     //regular items
     $result = $this->array_merge_first_record($result, $xph->xpSubQueries("//div[@class='imgBorderBgNew']", array('product.sku' => "//div[@class='itemDataCode'][span]", 'product.upc' => "//div[@class='itemDataCode'][not(span)]")));
     //TRICK: putting array arround collapseLabelValuePairs() to make it compatible with array_merge_first_record()
     $result = $this->array_merge_first_record($result, array($this->collapseLabelValuePairs($xph->xpSubQueries("//div[@id='tabInfoContent']/table/tbody/tr/td/table/tbody/tr/td/div[@class='itemData']", array('label' => 'b', 'value' => 'text()')))));
     $result = $this->array_merge_first_record($result, $xph->xpSubQueries("//div[@id='tabPricingContent']/table/tbody/tr/td/div[@class='pricingDisplay']", array('product.price_retail' => "table/tbody/tr/td/div[@class='retailPrice']", 'product.price_listed' => "table/tbody/tr/td/div[@class='salePrice']|table/tbody/tr/td/div[@class='specialPrice']")));
     $result['product.url'] = $xph->getUrl();
     return $result;
 }
 public function parseProductDetails(XPathHelper $xph)
 {
     return $xph->xpSubQueries("//div[@id='item-spc']", array('product.name' => "div[@id='item-summary']/h1[@class='item-name']", 'product.brand' => "div[@id='item-summary']/h2[@class='item-brand']", 'product.price_listed' => "div[@id='item-attributes']/div[@id='item-price']/div[@id='swanson-price']/b[@class='price']", 'product.price_retail' => "div[@id='item-attributes']/div[@id='item-price']/div[@id='retail-price']/b[@class='price']"));
 }
Ejemplo n.º 5
0
 public function parseProductSellers(XPathHelper $xph)
 {
     return $xph->xpSubQueries("//tr[@class='os-row']", array('seller.name' => "td[@class='os-seller-name']", 'seller.url' => "td[@class='os-seller-name']/span[@class='os-seller-name-primary']/a/@href", 'product.price_listed' => "td[@class='os-price-col']/span[@class='os-base_price']"));
 }
Ejemplo n.º 6
0
 public function parseOfferMerchantList(XPathHelper $xph)
 {
     $imageUrl = $xph->queryValue('//*[@id="olpProductImage"]/a/img/@src');
     $result = $xph->xpSubQueries("//div[@id='olpOfferList']/div[@id='olpTabContent']/div/div[2]/div[contains(@class, 'olpOffer')]", array('offer.price' => "div[1]/span[contains(@class, 'olpOfferPrice')]", 'offer.price-shipping' => "div[1]/p/span/span[@class='olpShippingPrice']", 'seller.name' => "div[3]/p[contains(@class, 'olpSellerName')]/span/a", 'seller.logo' => "div[3]/p[contains(@class, 'olpSellerName')]/a/img/@src|div[3]/p[contains(@class, 'olpSellerName')]/span/a/img/@src", 'seller.aboutus' => "div[3]/p[contains(@class, 'olpSellerName')]/a/@href|div[3]/p[contains(@class, 'olpSellerName')]/span/a/@href", 'seller.department_url' => "div[3]/p[contains(@class, 'olpSellerName')]/a/@href"));
     foreach ($result as &$row) {
         // add the image url
         $row['product.image'] = $imageUrl;
         // assure URL
         if (strpos($row['seller.aboutus'], '/') == 0) {
             $row['seller.aboutus'] = 'http://amazon.com' . $row['seller.aboutus'];
         }
         //extract merchant id somehow
         $merchantId = "";
         if (strpos($row['seller.aboutus'], 'redirect') !== false) {
             // if aboutus URL has redirect, it is for a new seller with no reviews
             $merchantId = $this->extractUriParam('merchantID', $row['seller.aboutus']);
         }
         if (empty($merchantId)) {
             $merchantId = $this->extractUriParam('seller', $row['seller.aboutus']);
         }
         if (empty($merchantId)) {
             preg_match('|.*/shops/([^/]+)|', $row['seller.aboutus'], $matches);
             if (!empty($matches[1])) {
                 $merchantId = $matches[1];
             }
         }
         // use the dependable page if we can
         if (!empty($merchantId)) {
             $row['seller.aboutus'] = "http://www.amazon.com/gp/aag/main?ie=UTF8&seller=" . $merchantId;
             $row['seller.seller_id'] = $merchantId;
         }
         // Seller Name is mandatory data.  Crawl an extra page if we must
         if (empty($row['seller.name'])) {
             if (!empty($merchantId)) {
                 $xpath_helper = $this->openHref($row['seller.aboutus']);
                 $row['seller.name'] = $xpath_helper->queryValue("//h1");
             }
             // follow the seller image link as a last resort
             if (empty($row['seller.name'])) {
                 $xpath_helper = $this->openHref($row['seller.department_url']);
                 $row['seller.name'] = $xpath_helper->queryValue("//*[@id='s-result-count']/span|//title|//h1");
                 $row['seller.name'] = preg_replace('/ Storefront$/', '', $row['seller.name']);
                 $row['seller.name'] = preg_replace('/: Online Shopping for Electronics, Apparel, Computers, Books, DVDs & more/', '', $row['seller.name']);
             }
         }
         unset($row['seller.department_url']);
     }
     return $result;
 }
 public function parseSearchResult(XPathHelper $xph)
 {
     return $xph->xpSubQueries("//div[@class='category-products']/ul[@class]/li", array('product.name' => "h2[@class='product-name']", 'product.url' => "h2[@class='product-name']/a/@href", 'product.price_listed' => "div[@class='price-box']/p[@class='special-price']/span[@class='price']", 'product.price_retail' => "div[@class='price-box']/p[@class='old-price']/span[@class='price']"));
 }
 public function parseSearchResult(XPathHelper $xph)
 {
     return $xph->xpSubQueries("//div[@id='categoryTable']/div[@class='row']", array('product.name' => "div[@class='rowInfo']/div[@class='rowWrapper']/div[@class='rowInfoA']/h3/a", 'product.url' => "div[@class='rowInfo']/div[@class='rowWrapper']/div[@class='rowInfoA']/h3/a/@href", 'product.price_listed' => "div[@class='rowInfo']/div[@class='rowWrapper']/div[@class='rowInfoB']/div/div[@id='priceLabelContainer']/div[@class='saleValue-Price-Search']", 'product.price_retail' => "div[@class='rowInfo']/div[@class='rowWrapper']/div[@class='rowInfoB']/div/div[@id='priceLabelContainer']/div[@class='listRegular-Price'][1]"));
 }
Ejemplo n.º 9
0
 public function parseSearchResult(XPathHelper $xph)
 {
     return $xph->xpSubQueries("//div[@id='display-results-content']/div[@class='prodSlotWide']", array('product.name' => "p[@class='description']", 'product.url' => "p[@class='description']/a/@href", 'product.rating' => "div[@class='details']/div[@class='starsAndPrice']/span/a/img/@title", 'product.price_listed' => "div[@class='details']/div[@class='starsAndPrice']/span[@class='price']/text()[1]", 'product.price_retail' => "div[@class='details']/div[@class='starsAndPrice']/span[@class='price']/span[@class='crossed-out-price']"));
 }
Ejemplo n.º 10
0
 public function parseSearchResult(XPathHelper $xph)
 {
     return $xph->xpSubQueries("//div[@class='productList']/div[1]/div[@class='itemCell']", array('product.name' => "div[@class='itemText']/div[@class='wrapper']/a/span[@class='itemDescription' and starts-with(@id, 'title')]", 'product.url' => "div[@class='itemText']/div[@class='wrapper']/a/@href", 'product.sku' => "div[@class='itemText']/ul[@class='featureList']/li[contains(., 'Model #:')]/text()", 'product.rating' => "div[@class='itemGraphics']/a[@class='itemRating']/@title"));
 }