function grep_munich($url, $table_name) { $html = scraperWiki::scrape($url); $count = 0; # Use the PHP Simple HTML DOM Parser to extract <td> tags $dom = new simple_html_dom(); $dom->load($html); //Drop all old informations by dropping the table scraperwiki::sqliteexecute("drop table if exists " . $table_name); scraperwiki::sqlitecommit(); $table = $dom->getElementById('flight_info_area'); foreach ($table->find('tr') as $data) { // Flight details. Read tds or ths $tds = $data->find("td"); //if there are less then 7 columns continue to next loop if (sizeof($tds) < 7) { continue; } //print $data->plaintext . "\n"; $flightnr = $tds[1]->plaintext; $from = $tds[2]->plaintext; $time = $tds[3]->plaintext; $expected_time = $tds[4]->plaintext; //Create date $date = date("Y-m-d"); //Build array of flight informations $flight_data = array("date" => $date, "count" => $count, "flightnr" => $flightnr, "from" => $from, "time" => $time, "expected_time" => $expected_time); //Save the informations of one flight scraperwiki::save_sqlite(array("date", "count"), $flight_data, $table_name); $count = $count + 1; } }
function insertar($nombreTabla, $idContrato, $fecha, $objeto, $importe, $adjudicatario, $NIF) { scraperwiki::sqliteexecute("insert into " . $nombreTabla . " values (?,?,?,?,?,?)", array($idContrato, $fecha, utf8_decode($objeto), $importe, utf8_decode($adjudicatario), $NIF)); scraperwiki::sqlitecommit(); }
$party = trim(str_replace(")", "", $partycell)); $name = trim(str_replace("Cllr. ", "", strip_tags($party))); $namecell = $nameparty[0]; $name = trim(str_replace("Cllr. ", "", strip_tags($namecell))); print $name; #$party = $row->find("p",0); //$party = $cell->find("p",0); print $party; $moredetails["name"] = $name; $moredetails["party"] = $party; return $moredetails; } } //$moredetails = array(); //$moredetails = get_details($content); $moredetails = get_nameparty($content); foreach ($moredetails as $moredetail) { $lea = "lea"; print $lea; $councillors["{$name}"] = array("LEA" => $lea, "Party" => $moredetails["party"]); } unset($dom, $html, $uri); scraperwiki::sqliteexecute("drop table councillors"); scraperwiki::sqliteexecute("create table if not exists councillors (`auth` string, `lea` string, `name` string, `party` string)"); #, `email` string, `address` string, `phone` string, `mobile` string, `image` string)"); scraperwiki::sqlitecommit(); foreach ($councillors as $name => $values) { scraperwiki::sqliteexecute("insert or replace into councillors values (:auth, :lea, :name, :party)", array("auth" => "Carlow County Council", "lea" => $values["LEA"], "name" => $name, "party" => $values["Party"])); } scraperwiki::sqlitecommit();
function saveHydroAuthority($authority) { # Save applications $pk = array("name"); $verbose = 2; scraperwiki::save_sqlite($pk, $authority, "hydro_authorities", $verbose); scraperwiki::sqlitecommit(); }
function parsePage($html1, $pageNo) { $numrecords = 1 + 20 * $pageNo; foreach ($html1->find("table[@style='padding: 5px; 5px; 5px; 5px;']") as $row) { $tr1 = $row->find("tr", 0); # print $tr1 . "\n"; $title = $tr1->find("a[@target='_self']", 0); $papertitle = "http://dl.acm.org/" . $title->href . "&preflayout=flat"; foreach ($tr1->find("div.authors") as $author) { $tr2 = $row->find("tr", 1); $year = $tr2->find("td", 0); $addinfo = $tr2->find("td", 2); $tr3 = $row->find("tr", 2); $publisher = $tr3->find("td", 0); $abstract = getAbstract($papertitle); print "abstract " . $abstract . "\n"; sleep(30); print $title->innertext . ", "; print $author->plaintext . ", "; print $year->innertext . ", "; print $addinfo->plaintext . ", "; print $publisher->plaintext . ", "; print $abstract . "/n"; flush(); scraperwiki::save_sqlite(array("a"), array("a" => $numrecords, "title" => $title->innertext, "author" => $author->plaintext, "year" => $year->innertext, "addinfo" => $addinfo->plaintext, "publisher" => $publisher->plaintext, "abstract" => $abstract), $table_name = "acmdata1", $verbose = 2); scraperwiki::sqlitecommit(); $numrecords++; flush(); } } return $numrecords; }
function populateDOM($htmlDOM, $src_link, $upd_flag = false) { scraperwiki::sqliteexecute("CREATE TABLE IF NOT EXISTS sources (src_link TEXT PRIMARY KEY, timestamp DATETIME, src_dump TEXT)"); echo "Checking local cache...<br>\n"; $result = scraperwiki::sqliteexecute("SELECT src_link, timestamp, src_dump FROM sources WHERE src_link = :slnk", array("slnk" => $src_link)); if (empty($result->data[0][2]) || $upd_flag == true) { echo "No Cache for this site (or force-update flag given), scraping live site for local cache...<br>\n"; // Load the site and save it locally so that we dont end up crawling their site a million times during development $source = scraperWiki::scrape($src_link); $htmlDOM->load($source); $save_source = $htmlDOM->save(); echo "Scrape complete, storing into cache...<br>\n"; scraperwiki::sqliteexecute("INSERT OR REPLACE INTO sources VALUES (:slnk, :stime, :sdmp)", array("slnk" => $src_link, "stime" => time(), "sdmp" => $save_source)); scraperwiki::sqlitecommit(); echo "Cache saved.<br>\n"; echo "Populate DOM Complete."; return $htmlDOM; } else { echo "Using local cache, as cached data exists from '" . date(DATE_RFC822, $result->data[0][1]) . ".'<br>\n"; echo "Loading...<br>\n"; $htmlDOM->load($result->data[0][2]); echo "Populate DOM Complete."; return $htmlDOM; } }
function lbl_recyclebins_locs($dom) { scraperwiki::sqliteexecute("drop table if exists r_locs"); scraperwiki::sqlitecommit(); scraperwiki::sqliteexecute("create table r_locs ('address' string, 'types' string, 'latitude' string, 'longitude' string)"); scraperwiki::sqlitecommit(); $descs = lbl_recyclebins_types(); foreach ($dom->find("tr") as $id => $data) { $tds = $data->find("td"); if (count($tds)) { $add = $tds[0]->plaintext; $tmp = explode(', ', $tds[1]->plaintext); $types = ''; foreach ($tmp as $k => $v) { $types .= trim($descs[$v]) . "; "; } $lat = ''; $lon = ''; if ($i = preg_match('/([\\w]{2}[\\d]+\\s\\d[\\w]{2})/', $add, $matches)) { if ($pcode = $matches[0]) { try { $geo = scraperwiki::gb_postcode_to_latlng($pcode); $lat = $geo[0]; $lon = $geo[1]; } catch (Exception $e) { print_r($e->getMessage()); $lat = ''; $lon = ''; } } } scraperwiki::sqliteexecute("insert into r_locs values (:address,:types,:latitude,:longitude)", array($add, $types, $lat, $lon)); scraperwiki::sqlitecommit(); } } }
function createTable() { scraperwiki::sqliteexecute('create table tagtable(appname text primary key,url text, type text, function type, license type)'); scraperwiki::sqliteexecute('create table htmltable(appname text primary key,html text)'); scraperwiki::sqlitecommit(); }