Ejemplo n.º 1
0
 public function parsePageType(XPathHelper $xph)
 {
     if (null !== $xph->xpQuery("//*[@id='noResultsTitle']")) {
         return "searchResult";
     }
     if (null !== $xph->xpQuery("//*[@id='s-result-count']")) {
         return "searchResult";
     }
     if (null !== $xph->xpQuery("//*[@id='atfResults']")) {
         return "searchResult";
     }
     if (null !== $xph->xpQuery("//div[@id='product-title_feature_div']/div[@class='buying'] | //form[@id='handleBuy']/div[@class='buying']")) {
         return "product";
     }
     // TODO: fix this
     //if ('Robot Check' == $xph->queryValue("//html/body/title/text()", 1))
     return "captcha";
     // default, save for analysis
     $ci =& get_instance();
     $html_contents = "\n\n----------------------------------------------------------\n" . date('Y-m-d H:i:s') . "\n" . $xph->dump();
     file_put_contents($ci->config->item('file_root') . "output.amazon.failed.parse.txt", $html_contents, FILE_APPEND);
     throw new Exception(__FUNCTION__ . ": unexpected page type at " . $xph->__toString());
 }
Ejemplo n.º 2
0
function main()
{
    $opt = getopt('x:t:i:k:hv:p:');
    if (isset($opt['h'])) {
        fatal_error($help);
    }
    if (isset($opt['v'])) {
        XPathHelper::$_debug = intval($opt['v']);
    }
    if (isset($opt['p'])) {
        XPathHelper::$_proxy = $opt['p'];
    }
    if (isset($opt['t'])) {
        $target = $opt['t'];
    } else {
        $target = null;
    }
    if (!isset($opt['x'])) {
        fatal_error("-x argument is mandatory");
    }
    if (isset($opt['s'])) {
        XPathHelper::$_sleep = (int) $opt['s'];
    }
    if (isset($opt['i'])) {
        $input = explode("\n", file_get_contents($opt['i']));
        if (($tail = array_pop($input)) !== '') {
            array_push($input, $tail);
        }
    } else {
        $input = array($target);
    }
    if (isset($opt['k'])) {
        $keyword = $opt['k'];
    } else {
        $keyword = '';
    }
    switch ($opt['x']) {
        case 'product-search':
            productSearch($keyword, $target);
            break;
        case 'product-details':
            foreach ($input as $url) {
                productDetails($keyword, $url);
            }
            break;
        case 'product-details2':
            foreach ($input as $url) {
                productDetails2($url);
            }
            break;
        case 'product-parse':
            foreach ($input as $url) {
                productParse($url);
            }
            break;
        case 'product-listall':
            productfinder_listAll($target);
            break;
        case 'product-search-upc':
            if (isset($opt['k'])) {
                priceCheck(array($keyword), $target);
            } else {
                priceCheck($input, $target);
            }
            break;
        case 'product-search-upc2':
            if (isset($opt['k'])) {
                priceCheck2(array($keyword), $target);
            } else {
                priceCheck2($input, $target);
            }
            break;
        case 'product-search-mpn':
            if (isset($opt['k'])) {
                fatal_error("unexpected -k option for action 'product-search-mpn'");
            } else {
                priceCheckMPN($input, $target);
            }
            break;
        case 'fetch':
            XPathHelper::$_curlopts[CURLOPT_FOLLOWLOCATION] = true;
            $xph = new XPathHelper($target);
            echo $xph->dump();
            break;
        default:
            fatal_error("unexpected -x action: " . $opt['x']);
    }
}