$date = time(); $arr = array("name", "breed", $date, $latlng); # Metadata functions. $latest_message = scraperwiki::get_metadata('keyname', $default = 'No message yet'); print $latest_message; $latest_message = 'Scraper input'; scraperwiki::save_metadata('latest_message', $latest_message); $arr = array("breed", "name"); # Test scraper for PHP language. # Should contain all our documented PHP functions. # A fail in this scraper indicates a code failure somewhere. require 'scraperwiki/simple_html_dom.php'; # Scrape function. # TODO: Clarify, can we send POST parameters? Does not fail. $arr = array("foo" => "bar"); $html = scraperwiki::scrape("http://scraperwiki.com/hello_world.html", $arr); print $html; # Geo function. $latlng = scraperwiki::gb_postcode_to_latlng("E1 5AW"); print $latlng[0]; # Save function including date and latlng. $arr = array('name' => 'Fluffles', 'breed' => 'Alsatian'); scraperwiki::save(array('name'), $arr); $date = time(); $arr = array("name", "breed", $date, $latlng); # Metadata functions. $latest_message = scraperwiki::get_metadata('keyname', $default = 'No message yet'); print $latest_message; $latest_message = 'Scraper input'; scraperwiki::save_metadata('latest_message', $latest_message); $arr = array("breed", "name");
$th = $tabletr->find("th"); $tds = $tabletr->find("td"); $record[$th[0]->plaintext] = $tds[1]->plaintext; unset($th); unset($tds); } unset($tabletrs); $dom->__destruct(); unset($dom); unset($html); scraperwiki::save(array('ID', 'sector'), $record); sleep(2); return $record; } $sourcescraper = 'tedscrapper'; $lasttime = scraperwiki::get_metadata('lasttime', -1); scraperwiki::attach($sourcescraper); print "Querying for data\n"; $data = scraperwiki::sqliteexecute("select distinct time, sector, url from tedscrapper.swdata where time >= '" . $lasttime . "' order by time"); print count($data->data) . " items to process\n"; $count = 0; foreach ($data->data as $ind => $item) { $time = $item[0]; $sector = $item[1]; $url = $item[2]; $record = scrapeTEDDataPage($url, $sector); print $count++ . " " . $record['TI'] . " " . memory_get_usage() / 1024 / 1024 . "MB\n"; scraperwiki::save_metadata('lasttime', $time); unset($item); unset($record); unset($time);
function begin() { $dateFrom = scraperwiki::get_metadata("datefrom", "1980-01-01"); $dateTo = scraperwiki::get_metadata("dateto", "1980-01-01"); $dayFrom = $dateFrom; $dayTo = $dateTo; for ($i = 1; $i <= 1; $i++) { $nextDay = mktime(0, 0, 0, date("m", strtotime($dayFrom)), date("d", strtotime($dayFrom)) + 1, date("y", strtotime($dayFrom))); $dayFrom = date('Y-m-d', $nextDay); mainDate($dayFrom); scraperwiki::save_metadata("datefrom", date('Y-m-d', strtotime($dayFrom))); scraperwiki::save_metadata("dateto", date('Y-m-d', strtotime($dayTo))); } }
<?php require 'scraperwiki.php'; ###################################### # Basic PHP scraper ###################################### $max = 10032586; $counter = scraperwiki::get_metadata('counter'); for ($i = 0; $i < 1000; $i++) { $counter++; if ($counter == $max) { scraperwiki::save_metadata('counter', 10000000); $i = 1001; } $html = oneline(scraperwiki::scrape("http://www.ukrlp.co.uk/ukrlp/ukrlp_provider.page_pls_provDetails?x=&pn_p_id=" . $counter . "&pv_status=VERIFIED&pv_vis_code=L")); preg_match_all('|<div class="pod_main_body">(.*?<div )class="searchleft">|', $html, $arr); if (isset($arr[1][0])) { $code = $arr[1][0]; } else { $code = ''; } if ($code != '') { preg_match_all('|<div class="provhead">UKPRN: ([0-9]*?)</div>|', $code, $num); if (isset($num[1][0])) { $num = trim($num[1][0]); } else { $num = ''; } preg_match_all('|</div>.*?<div class="provhead">(.*?)<|', $code, $name); if (isset($name[1][0])) { $name = trim($name[1][0]);