} } } function getData($query) { if (!count(scraperwiki::table_info("swdata"))) { return false; } return scraperWiki::select($query); } if (!count(scraperwiki::table_info("swvariables"))) { // if no stored variables are available, start from beginning $start = ''; } else { // else, load last disease from database $start = scraperWiki::get_var('disease', ''); } $html = scraperWiki::scrape("http://www.who.int/csr/don/archive/disease/en/index.html"); $dom = new simple_html_dom(); $dom->load($html); $handleNextEntry = false; // no previous entries -> start from beginning if ($start == '') { $handleNextEntry = true; } else { print "starting after: {$start}\n\n"; } foreach ($dom->find("ul[@class='a_z'] li a") as $data) { $disease = trim($data->plaintext); print "fetching data for disease: {$disease}"; if ($handleNextEntry) {
$synthList3 = file_get_contents("https://api.scraperwiki.com/api/1.0/datastore/sqlite?format=jsondict&name=current_synths&query=select%20DISTINCT%20manufacturer%2C%20url%2C%20name%20from%20%60swdata%60"); if (!empty($synthList3)) { $synthList3 = json_decode($synthList3); } $synths = array(); $synths = traverseList($synthList1); $synths = array_merge(traverseList($synthList2), $synths); $synths = array_merge(traverseList($synthList3), $synths); $synths = array_map('unserialize', array_unique(array_map('serialize', $synths))); echo "Total synths: " . count($synths) . "\n"; //var_dump($synths); if (!empty($synths)) { //$dbName = "vintagesynth-scrape-".$today = date("m-d-Y"); $saveMessage = scraperWiki::save_sqlite(array('manufacturer', 'name', 'url'), $synths); //print strval($saveMessage); scraperwiki::save_var('total_results', count($synths)); print scraperWiki::get_var('total_results'); } function traverseList($list) { $dataList = array(); foreach ($list as $item) { //Clean up the data foreach ($item as $key => $value) { $item->{$key} = preg_replace("/<*.>/", "", $value); //echo $item->$key."\n"; } $dataList[] = $item; } return $dataList; }
scraperwiki::save(array('EMISNumber'), $tosave); } catch (Exception $e) { print 'Caught exception (' . $data->value . '): ' . $e->getMessage() . "\n"; } //break; //uncomment to just process one school } scraperWiki::save_var('place', 'none'); $html = scraperWiki::scrape("http://wcedemis.pgwc.gov.za/wced/findschoolO.shtml?2"); require 'scraperwiki/simple_html_dom.php'; $dom = new simple_html_dom(); $dom->load($html); //scraperwiki::sqliteexecute("CREATE TABLE `swdata` (`EMISNumber` text)"); //scraperWiki::save_var('place','none'); //scraperWiki::save_var('place','0130333352'); //exit(); $place = scraperWiki::get_var('place', 'none'); $resume = true; if ($place == 'none') { $resume = false; } foreach ($dom->find("select[name='EMIS_NO'] option") as $data) { //loop through the list of ordinary schools $d = http_build_query(array('EMIS_NO' => $data->value)); $dlength = strlen($d); if ($resume) { //we need to resume, so find the correct place if ($place == $data->value) { $resume = false; } else { continue; }