public function parsePageType(XPathHelper $xph) { if (null !== $xph->xpQuery("//*[@id='noResultsTitle']")) { return "searchResult"; } if (null !== $xph->xpQuery("//*[@id='s-result-count']")) { return "searchResult"; } if (null !== $xph->xpQuery("//*[@id='atfResults']")) { return "searchResult"; } if (null !== $xph->xpQuery("//div[@id='product-title_feature_div']/div[@class='buying'] | //form[@id='handleBuy']/div[@class='buying']")) { return "product"; } // TODO: fix this //if ('Robot Check' == $xph->queryValue("//html/body/title/text()", 1)) return "captcha"; // default, save for analysis $ci =& get_instance(); $html_contents = "\n\n----------------------------------------------------------\n" . date('Y-m-d H:i:s') . "\n" . $xph->dump(); file_put_contents($ci->config->item('file_root') . "output.amazon.failed.parse.txt", $html_contents, FILE_APPEND); throw new Exception(__FUNCTION__ . ": unexpected page type at " . $xph->__toString()); }
function main() { $opt = getopt('x:t:i:k:hv:p:'); if (isset($opt['h'])) { fatal_error($help); } if (isset($opt['v'])) { XPathHelper::$_debug = intval($opt['v']); } if (isset($opt['p'])) { XPathHelper::$_proxy = $opt['p']; } if (isset($opt['t'])) { $target = $opt['t']; } else { $target = null; } if (!isset($opt['x'])) { fatal_error("-x argument is mandatory"); } if (isset($opt['s'])) { XPathHelper::$_sleep = (int) $opt['s']; } if (isset($opt['i'])) { $input = explode("\n", file_get_contents($opt['i'])); if (($tail = array_pop($input)) !== '') { array_push($input, $tail); } } else { $input = array($target); } if (isset($opt['k'])) { $keyword = $opt['k']; } else { $keyword = ''; } switch ($opt['x']) { case 'product-search': productSearch($keyword, $target); break; case 'product-details': foreach ($input as $url) { productDetails($keyword, $url); } break; case 'product-details2': foreach ($input as $url) { productDetails2($url); } break; case 'product-parse': foreach ($input as $url) { productParse($url); } break; case 'product-listall': productfinder_listAll($target); break; case 'product-search-upc': if (isset($opt['k'])) { priceCheck(array($keyword), $target); } else { priceCheck($input, $target); } break; case 'product-search-upc2': if (isset($opt['k'])) { priceCheck2(array($keyword), $target); } else { priceCheck2($input, $target); } break; case 'product-search-mpn': if (isset($opt['k'])) { fatal_error("unexpected -k option for action 'product-search-mpn'"); } else { priceCheckMPN($input, $target); } break; case 'fetch': XPathHelper::$_curlopts[CURLOPT_FOLLOWLOCATION] = true; $xph = new XPathHelper($target); echo $xph->dump(); break; default: fatal_error("unexpected -x action: " . $opt['x']); } }