$id = trim($cols[0]->find('input', 0)->value); $name = trim($cols[0]->find('.resultName', 0)->plaintext); $url = html_entity_decode($cols[0]->find('.resultName', 0)->find('a', 0)->href); $address = trim($cols[0]->find('.resultAddress', 0)->plaintext); $postcode = trim($cols[0]->find('.resultPostcode', 0)->plaintext); $stars = sscanf($cols[1]->find('img', 0)->alt, "Food hygiene rating is '%d'"); //$stars = str_replace("images/scores/", "", $cols[1]->find('img', 0)->src); $stars = $stars[0]; if (!is_numeric($stars)) { $stars = "Exempt"; } $premhtml = scraperWiki::scrape($url); $premdom = new simple_html_dom(); $premdom->load($premhtml); $businesstype = $premdom->find('h1', 0)->plaintext; $date = $premdom->find('#ctl00_ContentPlaceHolder1_uxBusinessLastInspection', 0)->plaintext; $address1 = $premdom->find('#ctl00_ContentPlaceHolder1_uxBusinessAddress1', 0)->plaintext; $address2 = $premdom->find('#ctl00_ContentPlaceHolder1_uxBusinessAddress2', 0)->plaintext; $address3 = $premdom->find('#ctl00_ContentPlaceHolder1_uxBusinessAddress3', 0)->plaintext; $address4 = $premdom->find('#ctl00_ContentPlaceHolder1_uxBusinessAddress4', 0)->plaintext; $latlng = scraperWiki::gb_postcode_to_latlng($postcode); $prem = array('id' => $id, 'name' => html_entity_decode($name), 'address1' => html_entity_decode($address1), 'address2' => html_entity_decode($address2), 'address3' => html_entity_decode($address3), 'address4' => html_entity_decode($address4), 'postcode' => $postcode, 'businesstype' => $businesstype, 'rating' => $stars, 'url' => html_entity_decode($url), 'rssdate' => date("r", strtotime($date))); $date = date("c", strtotime($date)); scraperwiki::save(array('id'), $prem, $date, $latlng); } # Those pesky form elements again! $viewstate = $dom->find('#__VIEWSTATE', 0)->value; $eventvalidation = $dom->find('#__EVENTVALIDATION', 0)->value; $page++; } }