Ejemplo n.º 1
0
 public function parsePageType(XPathHelper $xph)
 {
     if ($xph->xpQuery("//div[@class='srListing']") !== null) {
         return "searchResult";
     }
     if (true) {
         //FIXME detect product
         return "product";
     }
     //FIXME: detect no result
 }
Ejemplo n.º 2
0
 public function parsePageType(XPathHelper $xph)
 {
     if (null !== $xph->xpQuery("//*[@id='noResultsTitle']")) {
         return "searchResult";
     }
     if (null !== $xph->xpQuery("//*[@id='s-result-count']")) {
         return "searchResult";
     }
     if (null !== $xph->xpQuery("//*[@id='atfResults']")) {
         return "searchResult";
     }
     if (null !== $xph->xpQuery("//div[@id='product-title_feature_div']/div[@class='buying'] | //form[@id='handleBuy']/div[@class='buying']")) {
         return "product";
     }
     // TODO: fix this
     //if ('Robot Check' == $xph->queryValue("//html/body/title/text()", 1))
     return "captcha";
     // default, save for analysis
     $ci =& get_instance();
     $html_contents = "\n\n----------------------------------------------------------\n" . date('Y-m-d H:i:s') . "\n" . $xph->dump();
     file_put_contents($ci->config->item('file_root') . "output.amazon.failed.parse.txt", $html_contents, FILE_APPEND);
     throw new Exception(__FUNCTION__ . ": unexpected page type at " . $xph->__toString());
 }