if (preg_match("/[0-9]+\\.[0-9]+\\.[0-9]+/", $dam->href, $m)) { $uidDam = $m[0]; $titDam = utf8_encode($dam->plaintext); $urlInf = $urlBase . $dam->href; $txtInf = utf8_encode(scraperWiki::scrape($urlInf)); preg_match_all("/((?:Sone :)|(?:.st :)|(?:Nord :)) <B>([^<]+)</u", $txtInf, $m, PREG_SET_ORDER); foreach ($m as $kv) { $pos[$kv[1]] = $kv[2]; } $utmDam = sprintf("%sN %s %s", $pos["Sone :"], $pos["Nord :"], $pos["Øst :"]); $urlDam = $urlBase . str_replace('index.html', 'basis.txt', $dam->href); $txtDam = scraperWiki::scrape($urlDam); $arrDam = explode("\n", $txtDam); $lenArr = count($arrDam); for ($i = 2; $i < $lenArr; $i++) { $row = array(); $ln = $arrDam[$i]; foreach ($fmtSampleFixed as $col => $pos) { $row[$col] = trim(substr($ln, $pos["offset"], $pos["length"])); } if (!is_numeric($row["level"])) { continue; } $row["time"] = sprintf("%s-%s-%s %s:%s:00", substr($row["time"], 0, 2), substr($row["time"], 2, 2), substr($row["time"], 4, 4), substr($row["time"], 9, 2), substr($row["time"], 11, 2)); $row["id_dam"] = $uidDam; $row["dam"] = $titDam; $row["utm_pos"] = empty($utmDam) ? "NULL" : $utmDam; scraperWiki::save(array('id_dam', 'time'), $row); } } }
<?php # Blank PHP require 'scraperwiki/simple_html_dom.php'; $i = 1; $html = scraperWiki::scrape("http://www.quora.com/Placerville-CA/What-is-nightlife-like-in-Placerville-CA"); scraperWiki::save(array('id' => $i), array('id' => $i, 'html' => $html->outertext)); # Blank PHP require 'scraperwiki/simple_html_dom.php'; $i = 1; $html = scraperWiki::scrape("http://www.quora.com/Placerville-CA/What-is-nightlife-like-in-Placerville-CA"); scraperWiki::save(array('id' => $i), array('id' => $i, 'html' => $html->outertext));
<?php require 'scraperwiki/simple_html_dom.php'; $res = scraperWiki::scrape("http://www.onewaytextlink.com/links.php?type=free&pagenum=1"); $html = str_get_html($res); $id = 0; foreach ($html->find("a") as $data) { $record = array('id' => $id, 'url' => $data->href); scraperWiki::save(array('id'), $record); $id++; } require 'scraperwiki/simple_html_dom.php'; $res = scraperWiki::scrape("http://www.onewaytextlink.com/links.php?type=free&pagenum=1"); $html = str_get_html($res); $id = 0; foreach ($html->find("a") as $data) { $record = array('id' => $id, 'url' => $data->href); scraperWiki::save(array('id'), $record); $id++; }
function grab12($url, $name) { $html = scraperWiki::scrape($url); $html = preg_replace('/<span style="display:none">(\\d+)<\\/span>/', '', $html); $html = preg_replace('/<span class="\\d+">(\\d+)<\\/span>/', '\\1', $html); preg_match_all('/\\d+\\.\\d+\\.\\d+\\.\\d+/', $html, $ips); foreach ($ips[0] as $ip) { $record = array('ip' => $ip, 'name' => $name); scraperWiki::save(array('ip'), $record); } }
scraperWiki::save(array("id"), $data); # Fortsett med neste rad... } # Ferdig når det ikke er flere rader igjen i $rows. # Sett inn ekstern fil med Simple HTML DOM-biblioteket: require "scraperwiki/simple_html_dom.php"; # Hent HTML-koden fra nettstedet og lagre i $h: $h = scraperWiki::scrape("http://car.espenandersen.no/?data=1"); # Konstruere et tomt HTML-tre og lagre objektet i $tree: $tree = new simple_html_dom(); # Last HTML-koden fra nettstedet inn i HTML-treet: $tree->load($h); # La $tbody representere <tbody>, bruk stien: $tbody = $tree->find("html body table tbody", 0); # Dataradene er barn av <tbody>, kall metoden children(): $rows = $tbody->children(); # For hver rad blant dataradene... foreach ($rows as $row) { # Gjør klar tom array: $data = array(); # Fyll arrayen med skrapede data, feltnavn som nøkler: $data["id"] = $row->find("td", 0)->plaintext; $data["skolenavn"] = $row->find("td", 1)->plaintext; $data["kommunenr"] = $row->find("td", 2)->plaintext; $data["kommune"] = $row->find("td", 3)->plaintext; $data["inneklima"] = $row->find("td", 4)->plaintext; # Lagre dataraden (arrayen) til databasen i ScraperWiki: scraperWiki::save(array("id"), $data); # Fortsett med neste rad... } # Ferdig når det ikke er flere rader igjen i $rows.