public function parseProductDetails(XPathHelper $xph)
 {
     $record = reset($xph->xpSubQueries("//div[@id='ProductDetails']//div[@class='BlockContent']", array('product.name' => 'h2', 'product.price_retail' => "div[@class='ProductMain']/div[@class='ProductDetailsGrid']/div[@class='DetailRow RetailPrice']/div[@class='Value']", 'product.price_listed' => "div[@class='ProductMain']/div[@class='ProductDetailsGrid']/div[@class='DetailRow PriceRow']/div[@class='Value']/em[@class='ProductPrice VariationProductPrice']", 'product.sku' => "div[@class='ProductMain']/div[@class='ProductDetailsGrid']/div[@class='DetailRow ProductSKU']/div[@class='Value']")));
     $record['product.description'] = $xph->queryValue("//div[@id='ProductDescription']/div[@class='ProductDescriptionContainer']");
     /** TODO: parse additional data fields for weight, quantity, brand,rating..
      *	//div[@id='ProductDetails']//div[@class='BlockContent']/div[@class='ProductMain']/div[@class='ProductDetailsGrid']/div[@class='DetailRow']
      *	will return a list of div[@class='Label'] and div[@class='Value'] children 
      */
     return $record;
 }
 public function parsePageType(XPathHelper $xph)
 {
     if ($xph->xpQuery("//div[@class='srListing']") !== null) {
         return "searchResult";
     }
     if (true) {
         //FIXME detect product
         return "product";
     }
     //FIXME: detect no result
 }
 public function openHref($href, $pageUrl = NULL)
 {
     $bak = XPathHelper::$_curlopts;
     XPathHelper::$_curlopts[CURLOPT_USERAGENT] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.97 Safari/537.11";
     $res = parent::openHref($href);
     XPathHelper::$_curlopts = $bak;
     return $res;
 }
 public function parseProductDetails(XPathHelper $xph)
 {
     //broken page with error message
     if (substr($xph->queryValue("//div[@class='itemPageWrapper']/text()[5]"), 0, 9) === 'Exception') {
         return array();
     }
     $result = array();
     //Discontinued items have no price.
     if ($xph->queryValue("//div[@id='unluckyInfo']/div[@id='apology']/h2") === "We're Sorry, We No Longer Carry This Item") {
         $result = $this->array_merge_first_record($result, $xph->xpSubQueries("//div[@id='unluckyInfo']/div[@id='apology']", array('product.sku_and_upc' => "//div[@class='itemDataCode']")));
         return $result;
     }
     //regular items
     $result = $this->array_merge_first_record($result, $xph->xpSubQueries("//div[@class='imgBorderBgNew']", array('product.sku' => "//div[@class='itemDataCode'][span]", 'product.upc' => "//div[@class='itemDataCode'][not(span)]")));
     //TRICK: putting array arround collapseLabelValuePairs() to make it compatible with array_merge_first_record()
     $result = $this->array_merge_first_record($result, array($this->collapseLabelValuePairs($xph->xpSubQueries("//div[@id='tabInfoContent']/table/tbody/tr/td/table/tbody/tr/td/div[@class='itemData']", array('label' => 'b', 'value' => 'text()')))));
     $result = $this->array_merge_first_record($result, $xph->xpSubQueries("//div[@id='tabPricingContent']/table/tbody/tr/td/div[@class='pricingDisplay']", array('product.price_retail' => "table/tbody/tr/td/div[@class='retailPrice']", 'product.price_listed' => "table/tbody/tr/td/div[@class='salePrice']|table/tbody/tr/td/div[@class='specialPrice']")));
     $result['product.url'] = $xph->getUrl();
     return $result;
 }
 public function parseProductDetails(XPathHelper $xph)
 {
     return $xph->xpSubQueries("//div[@id='item-spc']", array('product.name' => "div[@id='item-summary']/h1[@class='item-name']", 'product.brand' => "div[@id='item-summary']/h2[@class='item-brand']", 'product.price_listed' => "div[@id='item-attributes']/div[@id='item-price']/div[@id='swanson-price']/b[@class='price']", 'product.price_retail' => "div[@id='item-attributes']/div[@id='item-price']/div[@id='retail-price']/b[@class='price']"));
 }
 public function parseOfferSellerListPagination(XPathHelper $xph)
 {
     return $xph->queryValue("//span[@id='online-pagination']/a[contains(.,'Next')]/@href");
 }
 public function parsePageType(XPathHelper $xph)
 {
     if (null !== $xph->xpQuery("//*[@id='noResultsTitle']")) {
         return "searchResult";
     }
     if (null !== $xph->xpQuery("//*[@id='s-result-count']")) {
         return "searchResult";
     }
     if (null !== $xph->xpQuery("//*[@id='atfResults']")) {
         return "searchResult";
     }
     if (null !== $xph->xpQuery("//div[@id='product-title_feature_div']/div[@class='buying'] | //form[@id='handleBuy']/div[@class='buying']")) {
         return "product";
     }
     // TODO: fix this
     //if ('Robot Check' == $xph->queryValue("//html/body/title/text()", 1))
     return "captcha";
     // default, save for analysis
     $ci =& get_instance();
     $html_contents = "\n\n----------------------------------------------------------\n" . date('Y-m-d H:i:s') . "\n" . $xph->dump();
     file_put_contents($ci->config->item('file_root') . "output.amazon.failed.parse.txt", $html_contents, FILE_APPEND);
     throw new Exception(__FUNCTION__ . ": unexpected page type at " . $xph->__toString());
 }
 public function parseSearchResult(XPathHelper $xph)
 {
     return $xph->xpSubQueries("//div[@class='category-products']/ul[@class]/li", array('product.name' => "h2[@class='product-name']", 'product.url' => "h2[@class='product-name']/a/@href", 'product.price_listed' => "div[@class='price-box']/p[@class='special-price']/span[@class='price']", 'product.price_retail' => "div[@class='price-box']/p[@class='old-price']/span[@class='price']"));
 }
 public function parseSearchResult(XPathHelper $xph)
 {
     return $xph->xpSubQueries("//div[@id='categoryTable']/div[@class='row']", array('product.name' => "div[@class='rowInfo']/div[@class='rowWrapper']/div[@class='rowInfoA']/h3/a", 'product.url' => "div[@class='rowInfo']/div[@class='rowWrapper']/div[@class='rowInfoA']/h3/a/@href", 'product.price_listed' => "div[@class='rowInfo']/div[@class='rowWrapper']/div[@class='rowInfoB']/div/div[@id='priceLabelContainer']/div[@class='saleValue-Price-Search']", 'product.price_retail' => "div[@class='rowInfo']/div[@class='rowWrapper']/div[@class='rowInfoB']/div/div[@id='priceLabelContainer']/div[@class='listRegular-Price'][1]"));
 }
示例#10
0
 public function parseSearchResult(XPathHelper $xph)
 {
     return $xph->xpSubQueries("//div[@id='display-results-content']/div[@class='prodSlotWide']", array('product.name' => "p[@class='description']", 'product.url' => "p[@class='description']/a/@href", 'product.rating' => "div[@class='details']/div[@class='starsAndPrice']/span/a/img/@title", 'product.price_listed' => "div[@class='details']/div[@class='starsAndPrice']/span[@class='price']/text()[1]", 'product.price_retail' => "div[@class='details']/div[@class='starsAndPrice']/span[@class='price']/span[@class='crossed-out-price']"));
 }
示例#11
0
    ?>
                        </tr>
                    </table>
                </td>
                <td title="<?php 
    echo $v_rkey;
    ?>
"><?php 
    echo $v_rdom;
    ?>
</td>
<?php 
    if ($sitenodelist->length != 0) {
        ?>
                <td title="<?php 
        echo XPathHelper::listSiteNodeValues($sitenodelist);
        ?>
"><?php 
        echo FLANG_H_SITE;
        ?>
</td>
<?php 
    } else {
        ?>
                <td></td>
<?php 
    }
    ?>
                <td><span class="IPv_<?php 
    echo $v_ipv;
    ?>
示例#12
0
文件: main.php 项目: kostya1017/our
function main()
{
    $opt = getopt('x:t:i:k:hv:p:');
    if (isset($opt['h'])) {
        fatal_error($help);
    }
    if (isset($opt['v'])) {
        XPathHelper::$_debug = intval($opt['v']);
    }
    if (isset($opt['p'])) {
        XPathHelper::$_proxy = $opt['p'];
    }
    if (isset($opt['t'])) {
        $target = $opt['t'];
    } else {
        $target = null;
    }
    if (!isset($opt['x'])) {
        fatal_error("-x argument is mandatory");
    }
    if (isset($opt['s'])) {
        XPathHelper::$_sleep = (int) $opt['s'];
    }
    if (isset($opt['i'])) {
        $input = explode("\n", file_get_contents($opt['i']));
        if (($tail = array_pop($input)) !== '') {
            array_push($input, $tail);
        }
    } else {
        $input = array($target);
    }
    if (isset($opt['k'])) {
        $keyword = $opt['k'];
    } else {
        $keyword = '';
    }
    switch ($opt['x']) {
        case 'product-search':
            productSearch($keyword, $target);
            break;
        case 'product-details':
            foreach ($input as $url) {
                productDetails($keyword, $url);
            }
            break;
        case 'product-details2':
            foreach ($input as $url) {
                productDetails2($url);
            }
            break;
        case 'product-parse':
            foreach ($input as $url) {
                productParse($url);
            }
            break;
        case 'product-listall':
            productfinder_listAll($target);
            break;
        case 'product-search-upc':
            if (isset($opt['k'])) {
                priceCheck(array($keyword), $target);
            } else {
                priceCheck($input, $target);
            }
            break;
        case 'product-search-upc2':
            if (isset($opt['k'])) {
                priceCheck2(array($keyword), $target);
            } else {
                priceCheck2($input, $target);
            }
            break;
        case 'product-search-mpn':
            if (isset($opt['k'])) {
                fatal_error("unexpected -k option for action 'product-search-mpn'");
            } else {
                priceCheckMPN($input, $target);
            }
            break;
        case 'fetch':
            XPathHelper::$_curlopts[CURLOPT_FOLLOWLOCATION] = true;
            $xph = new XPathHelper($target);
            echo $xph->dump();
            break;
        default:
            fatal_error("unexpected -x action: " . $opt['x']);
    }
}
示例#13
0
 public function parseSearchResult(XPathHelper $xph)
 {
     return $xph->xpSubQueries("//div[@class='productList']/div[1]/div[@class='itemCell']", array('product.name' => "div[@class='itemText']/div[@class='wrapper']/a/span[@class='itemDescription' and starts-with(@id, 'title')]", 'product.url' => "div[@class='itemText']/div[@class='wrapper']/a/@href", 'product.sku' => "div[@class='itemText']/ul[@class='featureList']/li[contains(., 'Model #:')]/text()", 'product.rating' => "div[@class='itemGraphics']/a[@class='itemRating']/@title"));
 }