continue; } // Skip if already captured $cols = $row->find('td'); # Determine the URL of the linked page (e.g. the info for 'WRK' $href = html_entity_decode($row->find('a', 0)->href); $url = $domain . $href; # Get the postcode from the linked page $html2 = scraperWiki::scrape($url); $dom2 = new simple_html_dom(); $dom2->load($html2); $address = trim($dom2->find('address', 0)->plaintext); $lines = explode("\n", $address); $postcode = trim(array_pop($lines)); # Convert postcode to lat/lon list($lat, $lng) = scraperwiki::gb_postcode_to_latlng($postcode); //$postcodeTrimmed = str_replace (' ', '', $postcode); //$latlng = scraperwiki::select("* from ukp.swdata where postcode='{$postcodeTrimmed}';"); # Assemble the record $station = array('code' => trim($cols[1]->plaintext), 'name' => html_entity_decode(trim($cols[0]->plaintext)), 'postcode' => $postcode, 'latitude' => $lat, 'longitude' => $lng, 'url' => $url); # Save the record scraperwiki::save(array('code'), $station); print_r($record); # Limit while testing //if ($i == 10) {break;} # Save the current position scraperwiki::save_var('run_first', $i); } /* Useful pages: http://scraperwiki.com/scrapers/swansea_food_safety_inspections_1/edit/
<?php $postcode = "CF14 2QW"; $lat_lng = scraperwiki::gb_postcode_to_latlng($postcode); if ($lat_lng) { $values["lat"] = $lat_lng[0]; $values["lng"] = $lat_lng[1]; } print $values["lat"]; print $values["lng"]; $nameindex = 0; $nameindex = +11111111111115; print $nameindex; $postcode = "CF14 2QW"; $lat_lng = scraperwiki::gb_postcode_to_latlng($postcode); if ($lat_lng) { $values["lat"] = $lat_lng[0]; $values["lng"] = $lat_lng[1]; } print $values["lat"]; print $values["lng"]; $nameindex = 0; $nameindex = +11111111111115; print $nameindex;
$date = time(); $arr = array("name", "breed", $date, $latlng); # Metadata functions. $latest_message = scraperwiki::get_metadata('keyname', $default = 'No message yet'); print $latest_message; $latest_message = 'Scraper input'; scraperwiki::save_metadata('latest_message', $latest_message); $arr = array("breed", "name"); # Test scraper for PHP language. # Should contain all our documented PHP functions. # A fail in this scraper indicates a code failure somewhere. require 'scraperwiki/simple_html_dom.php'; # Scrape function. # TODO: Clarify, can we send POST parameters? Does not fail. $arr = array("foo" => "bar"); $html = scraperwiki::scrape("http://scraperwiki.com/hello_world.html", $arr); print $html; # Geo function. $latlng = scraperwiki::gb_postcode_to_latlng("E1 5AW"); print $latlng[0]; # Save function including date and latlng. $arr = array('name' => 'Fluffles', 'breed' => 'Alsatian'); scraperwiki::save(array('name'), $arr); $date = time(); $arr = array("name", "breed", $date, $latlng); # Metadata functions. $latest_message = scraperwiki::get_metadata('keyname', $default = 'No message yet'); print $latest_message; $latest_message = 'Scraper input'; scraperwiki::save_metadata('latest_message', $latest_message); $arr = array("breed", "name");
require 'scraperwiki/simple_html_dom.php'; $html = scraperwiki::scrape("http://www.nhs.uk/Services/Trusts/GPs/DefaultView.aspx?id=5NL"); print "Downloaded\n"; print $html . "\n"; $dom = new simple_html_dom(); $dom->load($html); print "Parsed\n"; print_r($dom->find('dt')); foreach ($dom->find('dt') as $data) { print "1\n"; $address = $data->next_sibling()->plaintext; $exp = explode(',', $address); $pc = array_pop($exp); //print $pc. "\n"; //print "\n"; $ll = scraperwiki::gb_postcode_to_latlng($pc); //$ll['lat'] = $ll[0]; //$ll['lng'] = $ll[1]; $url = 'http://www.nhs.uk' . $data->first_child()->href; print "2"; /* $doctor = scraperwiki::scrape($url); $docdom = new simple_html_dom(); $docdom->load($doctor); $dd = $docdom->find('ul[class=dr-list]', 0); // if ($dd && $dd->children()) // { // $numdocs = sizeof($dd->children()); //} else {
#if (!isset($qual[1][0])) { $qual[1][0] = '';} #if (!isset($country[1][0])) { $country[1][0] = '';} if (!isset($postcode[1][0])) { $postcode[1][0] = ''; } #if (!isset($postcodedistrict[1][0])) { $postcodedistrict[1][0] = '';} #if (!isset($postcodearea[1][0])) { $postcodearea[1][0] = '';} #if (!isset($address[1][0])) { $address[1][0] = '';} #if (!isset($tel[1][0])) { $tel[1][0] = '';} #if (!isset($mob[1][0])) { $mob[1][0] = '';} #if (!isset($email[1][0])) { $email[1][0] = '';} if (!isset($web[1][0])) { $web[1][0] = ''; } #if (!isset($facebook[1][0])) { $facebook[1][0] = '';} $latlng = scraperwiki::gb_postcode_to_latlng($postcode[1][0]); $lat = $latlng[0]; $lng = $latlng[1]; #if (!isset($lat[1][0])) { $lat[1][0] = '';} #if (!isset($lng[1][0])) { $lng[1][0] = '';} $postcode = str_replace('LL18 3EB', '', $postcode[1][0]); scraperwiki::save(array('business'), array('business' => clean($business[1][0]), 'postcode' => clean($postcode), 'lat' => $lat, 'lng' => $lng, 'web' => clean($web[1][0]), 'probody' => $profbody)); } } function clean($val) { $val = str_replace(' ', ' ', $val); $val = str_replace('&', '&', $val); $val = html_entity_decode($val); $val = strip_tags($val); $val = trim($val);
foreach ($rows as $row) { if ($row->children(0)->tag != "th") { echo "Found station..."; $insert = array(); $insert['name'] = $row->children(0)->children(0)->innertext; $r = $row->children(1)->children(0); if (isset($r)) { $insert['postcode'] = strip_tags($r->innertext); } $r = $row->children(2)->children(0); if (isset($r)) { $insert['code'] = $r->innertext; } if ($insert['postcode'] != "") { echo ".."; $ll = scraperwiki::gb_postcode_to_latlng($insert['postcode']); echo ".."; $insert['lat'] = $ll[0]; $insert['lng'] = $ll[1]; } else { $insert['lat'] = 0; $insert['lng'] = 0; } var_dump($insert); scraperwiki::save_sqlite(array('name'), $insert); echo " Saved\n"; } } echo "Page done... Starting next"; $c++; if (isset($scrape_list[$c])) {
$postcode_start = scraperwiki::gb_postcode_to_latlng("SW170EX"); ?> ); var directionsService = new google.maps.DirectionsService(); var myOptions = { zoom: 13, center: startplace, mapTypeId: google.maps.MapTypeId.ROADMAP }; map = new google.maps.Map(document.getElementById('map_canvas'), myOptions); <?php $counter = 0; foreach ($data as $gp) { $postcode = $gp["postcode"]; $long_lat = scraperwiki::gb_postcode_to_latlng($postcode); ?> var myLatLng<?php echo $counter; ?> = new google.maps.LatLng(<?php echo $long_lat[0] . "," . $long_lat[1]; ?> ); var marker<?php echo $counter; ?> = new google.maps.Marker({ position: myLatLng<?php echo $counter;
function lbl_recyclebins_locs($dom) { scraperwiki::sqliteexecute("drop table if exists r_locs"); scraperwiki::sqlitecommit(); scraperwiki::sqliteexecute("create table r_locs ('address' string, 'types' string, 'latitude' string, 'longitude' string)"); scraperwiki::sqlitecommit(); $descs = lbl_recyclebins_types(); foreach ($dom->find("tr") as $id => $data) { $tds = $data->find("td"); if (count($tds)) { $add = $tds[0]->plaintext; $tmp = explode(', ', $tds[1]->plaintext); $types = ''; foreach ($tmp as $k => $v) { $types .= trim($descs[$v]) . "; "; } $lat = ''; $lon = ''; if ($i = preg_match('/([\\w]{2}[\\d]+\\s\\d[\\w]{2})/', $add, $matches)) { if ($pcode = $matches[0]) { try { $geo = scraperwiki::gb_postcode_to_latlng($pcode); $lat = $geo[0]; $lon = $geo[1]; } catch (Exception $e) { print_r($e->getMessage()); $lat = ''; $lon = ''; } } } scraperwiki::sqliteexecute("insert into r_locs values (:address,:types,:latitude,:longitude)", array($add, $types, $lat, $lon)); scraperwiki::sqlitecommit(); } } }
if (stristr($data->plaintext, "Site location")) { $values['source'] = "http://www.oxford.gov.uk" . $outerdata->href; $values['image'] = "http://www.oxford.gov.uk" . $data->find("img", 0)->src; #Use HTML Simple Dom to fetch the image; $lines = explode("<BR>", str_replace("<br>", "<BR>", str_replace("<br />", "<BR>", $data->innertext))); #Split on line-breaks. ->innertext returns the element with its HTML foreach ($lines as $line) { $line_detail = explode(':', strip_tags($line)); #Split out by ':' if (count($line_detail) > 1) { #Check we have a : in the line, and it's not an image line. $values[trim($line_detail[0])] = trim($line_detail[1]); #Use bit before : as array key } } list($length, $maxHeight, $clearanceHeight, $maxWidth, $clearanceWidth) = explode("m,", $values['Garage dimensions']); $values['length'] = preg_replace("/[a-zA-Z]+/", "", $length); $values['maxHeight'] = preg_replace("/[a-zA-Z]+/", "", $maxHeight); $values['maxWidth'] = preg_replace("/[a-zA-Z]+/", "", $maxWidth); $values['clearanceWidth'] = preg_replace("/[a-zA-Z]+/", "", $clearanceWidth); $lat_lng = scraperwiki::gb_postcode_to_latlng($values['Postcode']); if ($lat_lng) { $values["lat"] = $lat_lng[0]; $values["lng"] = $lat_lng[1]; } scraperwiki::save(array('Site location'), $values); unset($values); } } } }