public function parsePageType(XPathHelper $xph) { if ($xph->xpQuery("//div[@class='srListing']") !== null) { return "searchResult"; } if (true) { //FIXME detect product return "product"; } //FIXME: detect no result }
public function parsePageType(XPathHelper $xph) { if (null !== $xph->xpQuery("//*[@id='noResultsTitle']")) { return "searchResult"; } if (null !== $xph->xpQuery("//*[@id='s-result-count']")) { return "searchResult"; } if (null !== $xph->xpQuery("//*[@id='atfResults']")) { return "searchResult"; } if (null !== $xph->xpQuery("//div[@id='product-title_feature_div']/div[@class='buying'] | //form[@id='handleBuy']/div[@class='buying']")) { return "product"; } // TODO: fix this //if ('Robot Check' == $xph->queryValue("//html/body/title/text()", 1)) return "captcha"; // default, save for analysis $ci =& get_instance(); $html_contents = "\n\n----------------------------------------------------------\n" . date('Y-m-d H:i:s') . "\n" . $xph->dump(); file_put_contents($ci->config->item('file_root') . "output.amazon.failed.parse.txt", $html_contents, FILE_APPEND); throw new Exception(__FUNCTION__ . ": unexpected page type at " . $xph->__toString()); }