function grep_munich($url, $table_name) { $html = scraperWiki::scrape($url); $count = 0; # Use the PHP Simple HTML DOM Parser to extract <td> tags $dom = new simple_html_dom(); $dom->load($html); //Drop all old informations by dropping the table scraperwiki::sqliteexecute("drop table if exists " . $table_name); scraperwiki::sqlitecommit(); $table = $dom->getElementById('flight_info_area'); foreach ($table->find('tr') as $data) { // Flight details. Read tds or ths $tds = $data->find("td"); //if there are less then 7 columns continue to next loop if (sizeof($tds) < 7) { continue; } //print $data->plaintext . "\n"; $flightnr = $tds[1]->plaintext; $from = $tds[2]->plaintext; $time = $tds[3]->plaintext; $expected_time = $tds[4]->plaintext; //Create date $date = date("Y-m-d"); //Build array of flight informations $flight_data = array("date" => $date, "count" => $count, "flightnr" => $flightnr, "from" => $from, "time" => $time, "expected_time" => $expected_time); //Save the informations of one flight scraperwiki::save_sqlite(array("date", "count"), $flight_data, $table_name); $count = $count + 1; } }
#} ###################################### # PHP scraper for Seznam cirkvi ###################################### require 'scraperwiki/simple_html_dom.php'; #function odstranDiakritiku($text) #{ # return iconv("windows-1250", "ascii//TRANSLIT", $text); #} $html = scraperwiki::scrape("http://www3.mkcr.cz/cns_internet/CNS/detail_cns.aspx?id_subj=147&str_zpet=Seznam_cns.aspx"); #print $html; # Use the PHP Simple HTML DOM Parser to extract <td> tags $dom = new simple_html_dom(); $dom->load($html); $nazev = false; $table = $dom->getElementById('Table3'); foreach ($table->find('td') as $data) { print $data; #kdyz konci dvojteckou, indexuji timto slovem if (ereg(":\$", $data)) { if ($nazev == true && $data->plaintext == "Název:") { $data = "Nazev organu cirkve:"; } elseif ($data->plaintext == "Název:") { $nazev = true; } $index = odstranDiakritiku($data->plaintext); } else { $output[$index] = $data; } } #foreach($dom->find('td') as $data)
print "fbjksdhf\n"; ?> //<?php # Blank PHP require 'scraperwiki/simple_html_dom.php'; function toIndex($text) { $text = iconv('Windows-1252', 'ASCII//TRANSLIT', $text); return Str_Replace(' ', '_', $text); } $html = scraperwiki::scrape("http://www3.mkcr.cz/cns_internet/CNS/detail_cns.aspx?id_subj=147&str_zpet=Seznam_cns.aspx"); $dom = new simple_html_dom(); $dom->load($html); #print toIndex("Dataž :"); #$table=$data->find('table[id=Table3]'); $curIndex = ""; foreach ($dom->getElementById('Table3')->find('td') as $data) { $txt = $data->plaintext; if (ereg(":\$", $txt)) { $curIndex = toIndex($txt); } else { $field[$curIndex] = $txt; print $field[$curIndex] . "\n"; } } foreach ($field as $i => $a) { print $i . $a . "\n"; } print "fbjksdhf\n"; ?> //
} require 'scraperwiki/simple_html_dom.php'; $start = 207; $limit = 0; //this can enable, disable automatic runs $count = $start; for ($i = $start; $i <= $limit; $i += 100) { $gURL = scraperwiki::scrape("http://www.google.com/search?q=site%3Amappery.com&num=100&start=" . $i); $gDOM = new simple_html_dom(); $gDOM->load($gURL); foreach ($gDOM->find('a') as $a) { if (substr($a->href, 7, 8) == "webcache") { $rURL = scraperwiki::scrape($a->href); $rDOM = new simple_html_dom(); $rDOM->load($rURL); $imgDiv = $rDOM->getElementById('mapPic'); if (!$imgDiv) { continue; } $imgs = $imgDiv->find('img'); if (count($imgs) == 0) { continue; } $detDiv = $rDOM->getElementById('mapDetailInfo'); if (!$detDiv) { continue; } $as = $detDiv->find('a'); if (count($as) == 0) { continue; }
// ----------------------------------------------------------------------------- // old fashion camel naming conventions test $str = <<<HTML <input type="checkbox" id="checkbox" name="checkbox" value="checkbox" checked> <input type="checkbox" id="checkbox1" name="checkbox1" value="checkbox1"> <input type="checkbox" id="checkbox2" name="checkbox2" value="checkbox2" checked> HTML; $html->load($str); assert($html == $str); assert($html->getElementByTagName('input')->hasAttribute('checked') == true); assert($html->getElementsByTagName('input', 1)->hasAttribute('checked') == false); assert($html->getElementsByTagName('input', 1)->hasAttribute('not_exist') == false); assert($html->find('input', 0)->value == $html->getElementByTagName('input')->getAttribute('value')); assert($html->find('input', 1)->value == $html->getElementsByTagName('input', 1)->getAttribute('value')); assert($html->find('#checkbox1', 0)->value == $html->getElementById('checkbox1')->getAttribute('value')); assert($html->find('#checkbox2', 0)->value == $html->getElementsById('checkbox2', 0)->getAttribute('value')); $e = $html->find('[name=checkbox]', 0); assert($e->getAttribute('value') == 'checkbox'); assert($e->getAttribute('checked') == true); assert($e->getAttribute('not_exist') == ''); $e->setAttribute('value', 'okok'); assert($e == '<input type="checkbox" id="checkbox" name="checkbox" value="okok" checked>'); $e->setAttribute('checked', false); assert($e == '<input type="checkbox" id="checkbox" name="checkbox" value="okok">'); $e->setAttribute('checked', true); assert($e == '<input type="checkbox" id="checkbox" name="checkbox" value="okok" checked>'); $e->removeAttribute('value'); assert($e == '<input type="checkbox" id="checkbox" name="checkbox" checked>'); $e->removeAttribute('checked'); assert($e == '<input type="checkbox" id="checkbox" name="checkbox">');
function getNavigation(simple_html_dom $DOM) { $navbar = array(); $navigation = $DOM->getElementById('prdMenu_RadMenu'); if (empty($navigation)) { return false; } foreach ($navigation->find('li[class=rmItem rmLast] > div.rmSlide > ul.rmLevel1', 0)->children() as $navlist) { $navlink = $navlist->find('a', 0); $navtitle = $navlink->title; $navtitle = str_replace("GeForce", "", $navtitle); $navurl = $navlink->href; $navurl = str_replace("ProductList.aspx?type=8&family=", "", $navurl); $navurl = str_replace('+', "%2B", $navurl); //$navitem = $navitem->title; $navbar[$navtitle] = array('url' => $navurl); } return $navbar; }
function parseDetails($name, $id, $in) { $dom = new simple_html_dom(); $dom->load($in); $data_table = $dom->getElementById('TabelleKitaDetails'); foreach ($data_table->find("tbody tr") as $trs) { $switch = $trs->childNodes(0)->plaintext; switch ($switch) { case "Adresse": $addr = $trs->childNodes(1)->plaintext; break; case "Pädagogische Merkmale": $concept = $trs->childNodes(1)->plaintext; break; case "Telefon": $tel = $trs->childNodes(1)->plaintext; break; case "Email": $mail = $trs->childNodes(1)->plaintext; break; case "Internet": $web = $trs->childNodes(1)->plaintext; break; case "Einrichtungsart": $type = $trs->childNodes(1)->plaintext; break; case "Träger": $head = $trs->childNodes(1)->plaintext; break; case "Adresse Träger": $head_address = $trs->childNodes(1)->plaintext; break; case "Trägerart": $head_type = $trs->childNodes(1)->plaintext; break; } } $record = array('kitaID' => $id, 'kita' => $name, 'address' => $addr, 'telefone' => $tel, 'mail' => $mail, 'web' => $web, 'type' => $type, 'concept' => $concept, 'head' => $head, 'head_address' => $head_address, 'head_type' => $head_type); scraperwiki::save(array('kitaID'), $record); }
$date = date("m.d.y"); //Build array of flight informations $flight_data = array("date" => $date, "flightnr" => $flightnr, "from" => $from, "time" => $time, "expected_time" => $expected_time); //Save the informations of one flight scraperWiki::save_sqlite(array("date", "flightnr"), $flight_data); } # Exercise sheet 8 - Task 12 # www.munich-airport.de # we extracted information of the arivales of the munich airport # we wanted schedule time of 5 minutes but this is not alowed for standard acount (1 day) require "scraperwiki/simple_html_dom.php"; $html = scraperWiki::scrape("http://www.munich-airport.de/de/consumer/index.jsp"); # Use the PHP Simple HTML DOM Parser to extract <td> tags $dom = new simple_html_dom(); $dom->load($html); $table = $dom->getElementById('navigation_mainpage_flightinfo_table'); foreach ($table->find('tr') as $data) { print $data->plaintext . "\n"; // Flight details. Read tds or ths $tds = sizeof($data->find("td")) > 1 ? $data->find("td") : $data->find("th"); if (sizeof($tds) == 0) { break; } $flightnr = $tds[1]->plaintext; $from = $tds[2]->plaintext; $time = $tds[3]->plaintext; $expected_time = $tds[4]->plaintext; // Skip header if ($flightnr == "Flug") { continue; }