function scrapeTeams($url) { $html = scraperWiki::scrape($url); $dom = new simple_html_dom(); $dom->load($html); $cells = $dom->find('td.cw a'); foreach ($cells as $cell) { $name = $cell->plaintext; $team = array('club' => $name); scraperWiki::save_sqlite(array('club'), $team); } }
function scrapePage($url) { $html = scraperWiki::scrape($url); $dom = new simple_html_dom(); $dom->load($html); $cells = $dom->find('td.nom'); foreach ($cells as $cell) { $name = $cell->find('a', 0)->plaintext; $parent = $cell->parent(); $count = $parent->find('td.compte', 0)->plaintext; if ($count) { $payload = array('name' => $name, 'count' => $count); scraperWiki::save_sqlite(array('name'), $payload); } } }
function scrapeSmellsCategory($url) { $html = scraperWiki::scrape($url); $dom = new simple_html_dom(); $dom->load($html); $products = $dom->find('div.product'); foreach ($products as $product) { $brand = $product->find('h3 > a', 0); $productName = $product->find('p.productName > a', 0); echo $productName->innertext; $prices = $product->find('p.productPrice', 0); $rrp = $prices->find('a', 0); $ourPrice = $prices->find('a.ourPrice', 0); $productDesc = $product->find('p.productDesc a', 0); $fixedRrp = substr($rrp->innertext, 10); $fixedPrice = substr($ourPrice->innertext, 16); $data = array('brand' => $brand->innertext, 'product' => $productName->innertext, 'rrp' => $fixedRrp, 'price' => $fixedPrice, 'desc' => $productDesc->innertext, 'url' => $url); scraperWiki::save_sqlite(array('brand', 'product', 'rrp', 'price', 'desc', 'url'), $data); } }
function saveData($unique, $flight_data) { scraperWiki::save_sqlite($unique, $flight_data); }
} //Current Synths $synthList3 = file_get_contents("https://api.scraperwiki.com/api/1.0/datastore/sqlite?format=jsondict&name=current_synths&query=select%20DISTINCT%20manufacturer%2C%20url%2C%20name%20from%20%60swdata%60"); if (!empty($synthList3)) { $synthList3 = json_decode($synthList3); } $synths = array(); $synths = traverseList($synthList1); $synths = array_merge(traverseList($synthList2), $synths); $synths = array_merge(traverseList($synthList3), $synths); $synths = array_map('unserialize', array_unique(array_map('serialize', $synths))); echo "Total synths: " . count($synths) . "\n"; //var_dump($synths); if (!empty($synths)) { //$dbName = "vintagesynth-scrape-".$today = date("m-d-Y"); $saveMessage = scraperWiki::save_sqlite(array('manufacturer', 'name', 'url'), $synths); //print strval($saveMessage); scraperwiki::save_var('total_results', count($synths)); print scraperWiki::get_var('total_results'); } function traverseList($list) { $dataList = array(); foreach ($list as $item) { //Clean up the data foreach ($item as $key => $value) { $item->{$key} = preg_replace("/<*.>/", "", $value); //echo $item->$key."\n"; } $dataList[] = $item; }
$dom = new simple_html_dom(); $dom->load($html); $position = 0; $result = array(); foreach ($dom->find("span[@class='topsites-label']") as $data) { $result = $data->plaintext; $target = scraperWiki::scrape($result); $ga = FALSE; if (strpos($target, 'ga.js') !== FALSE) { $ga = TRUE; } scraperWiki::save_sqlite(array("data"), array('data' => $result, 'ga' => $ga)); } } require 'scraperwiki/simple_html_dom.php'; for ($i = 0; $i < 20; $i++) { $html = scraperWiki::scrape("http://www.alexa.com/topsites/countries;{$i}/CZ"); $dom = new simple_html_dom(); $dom->load($html); $position = 0; $result = array(); foreach ($dom->find("span[@class='topsites-label']") as $data) { $result = $data->plaintext; $target = scraperWiki::scrape($result); $ga = FALSE; if (strpos($target, 'ga.js') !== FALSE) { $ga = TRUE; } scraperWiki::save_sqlite(array("data"), array('data' => $result, 'ga' => $ga)); } }
function saveData($unique, $flight_data) { scraperWiki::save_sqlite(array("date", "airline", "flight_num"), $flight_data); }
<?php require 'scraperwiki/simple_html_dom.php'; scraperWiki::sqliteexecute('drop table if exists ttt'); $url = 'http://www.rugbydump.blogspot.com/'; $html = scraperWiki::scrape($url); $dom = new simple_html_dom(); $dom->load($html); $movies = $dom->find('object param[name="movie"]'); $i = 0; foreach ($movies as $movie) { $i++; $movieurl = $movie->value; $data = array('id' => "code:New_Script_Resolve('" . $movieurl . "')", 'title' => 'Video ' . $i); scraperWiki::save_sqlite(array('id'), $data); } require 'scraperwiki/simple_html_dom.php'; scraperWiki::sqliteexecute('drop table if exists ttt'); $url = 'http://www.rugbydump.blogspot.com/'; $html = scraperWiki::scrape($url); $dom = new simple_html_dom(); $dom->load($html); $movies = $dom->find('object param[name="movie"]'); $i = 0; foreach ($movies as $movie) { $i++; $movieurl = $movie->value; $data = array('id' => "code:New_Script_Resolve('" . $movieurl . "')", 'title' => 'Video ' . $i); scraperWiki::save_sqlite(array('id'), $data); }
<?php require 'scraperwiki/simple_html_dom.php'; $url = 'http://www.rugbydump.blogspot.com/'; $html = scraperWiki::scrape($url); $dom = new simple_html_dom(); $dom->load($html); $movies = $dom->find('object param[name="movie"]'); foreach ($movies as $movie) { $movieurl = $movie->value; $data = array('video' => $movieurl); scraperWiki::save_sqlite(array('video'), $data); } require 'scraperwiki/simple_html_dom.php'; $url = 'http://www.rugbydump.blogspot.com/'; $html = scraperWiki::scrape($url); $dom = new simple_html_dom(); $dom->load($html); $movies = $dom->find('object param[name="movie"]'); foreach ($movies as $movie) { $movieurl = $movie->value; $data = array('video' => $movieurl); scraperWiki::save_sqlite(array('video'), $data); }
function saveData($unique, $railway) { scraperWiki::save_sqlite($unique, $railway); }
foreach ($view_dom->find('div[id=left_col] img[class=imgcenter]') as $element) { $synthImages .= "http://www.vintagesynth.com" . $subDir . $element->src . ","; } $synthDescription = ""; foreach ($view_dom->find("div[class=grid_11] div[id=left_col] p") as $view_data) { $synthDescription .= $view_data->plaintext . "<br/>"; } $synths[] = array('name' => $cleanSynthName, 'manufacturer' => $navLink[0]->plaintext, 'url' => "http://www.vintagesynth.com" . $subNavLinkURL, 'description' => $synthDescription, 'images' => $synthImages); /*echo "<pre>"; print_r($synths); echo "</pre>";*/ } } else { break; } } } else { print "The scrape has completed at a depth level of {$depth}.\n"; break; } } } /*echo "<pre>"; print_r($synths); echo "</pre>";*/ //$dbName = "vintagesynth-scrape-".$today = date("m-d-Y"); //$saveMessage = scraperWiki::save_sqlite(array('manufacturer','name','url','description','images'), $synths,$table_name=$dbName); $saveMessage = scraperWiki::save_sqlite(array('manufacturer', 'name', 'url', 'description', 'images'), $synths); print strval($saveMessage); scraperwiki::save_var('total_results', count($synths)); print scraperWiki::get_var('total_results');
function saveData($unique, $record) { scraperWiki::save_sqlite(array("Data_autocount", "data_blank", "data_Feedback"), $record); }
# www.munich-airport.de # we extracted information of the arivales of the munich airport # we wanted schedule time of 5 minutes but this is not alowed for standard acount (1 day) require "scraperwiki/simple_html_dom.php"; $html = scraperWiki::scrape("http://www.munich-airport.de/de/consumer/index.jsp"); # Use the PHP Simple HTML DOM Parser to extract <td> tags $dom = new simple_html_dom(); $dom->load($html); $table = $dom->getElementById('navigation_mainpage_flightinfo_table'); foreach ($table->find('tr') as $data) { print $data->plaintext . "\n"; // Flight details. Read tds or ths $tds = sizeof($data->find("td")) > 1 ? $data->find("td") : $data->find("th"); if (sizeof($tds) == 0) { break; } $flightnr = $tds[1]->plaintext; $from = $tds[2]->plaintext; $time = $tds[3]->plaintext; $expected_time = $tds[4]->plaintext; // Skip header if ($flightnr == "Flug") { continue; } //Create date $date = date("m.d.y"); //Build array of flight informations $flight_data = array("date" => $date, "flightnr" => $flightnr, "from" => $from, "time" => $time, "expected_time" => $expected_time); //Save the informations of one flight scraperWiki::save_sqlite(array("date", "flightnr"), $flight_data); }
if ($td == "Évjárat:") { $prevtd = "Évjárat:"; } elseif ($td == "Kivitel:") { $prevtd = "Kivitel:"; } elseif ($td == "Állapot:") { $prevtd = "Állapot:"; } elseif ($td == "Üzemanyag:") { $prevtd = "Üzemanyag:"; } elseif ($td == "Sebességváltó fajtája:") { $prevtd = "Sebességváltó fajtája:"; } elseif ($td == "Hengerűrtartalom:") { $prevtd = "Hengerűrtartalom:"; } elseif ($td == "Henger-elrendezés:") { $prevtd = "Henger-elrendezés:"; } elseif ($td == "Hajtás:") { $prevtd = "Hajtás:"; } elseif ($td == "Teljesítmény:") { $prevtd = "Teljesítmény:"; } elseif ($td == "Ajtók száma:") { $prevtd = "Ajtók száma:"; } elseif ($td == "Klíma fajtája:") { $prevtd = "Klíma fajtája:"; } elseif ($td == "Szín:") { $prevtd = "Szín:"; } else { $prevtd = "..."; } } } scraperWiki::save_sqlite(array('id'), array('id' => $kod, 'url' => $newurl, 'ar' => $ar, 'evjarat' => $evjarat, 'kivitel' => $kivitel, 'allapot' => $allapot, 'uzemanyag' => $uzemanyag, 'sebvalto' => $sebvalto, 'urtartalom' => $urtartalom, 'hengerelrend' => $hengerelr, 'hajtas' => $hajtas, 'teljesitmeny' => $teljesitmeny, 'ajtok' => $ajtok, 'klima' => $klima, 'szin' => $szin)); }
function saveMembersLinks($unique, $recordLinks) { scraperWiki::save_sqlite(array("data_MemberID"), $recordLinks, "membersLinks"); }
require 'scraperwiki/simple_html_dom.php'; $page_counter = 0; //initial value of variable-table name //scraperwiki::save_var('which-table', "1"); //disable after first run $oldnap = scraperwiki::get_var('which-table'); if ($oldnap == "1") { scraperwiki::save_var('which-table', "2"); } else { scraperwiki::save_var('which-table', "1"); } $current = "swdata" . $oldnap; print "Writing table: " . $current . "\n"; do { $kovetkezo = ""; $page_counter++; $pageurl = "http://www.hasznaltauto.hu/talalatilista/auto/YHUQECPJ75JDSHH4K11URJIL5UM5ZJ67OJ0PSDARHGCQOAM901FIFTJ117ST1HZ508UQIHTYLS04GGOQJWA9WSRZWSFHQTUKOPY2GKPLULCZGKD479JY3IPMERARL1J9HFGYQ12RUA692DHRT071D83FDDERAH5HL528M7LK8HZQR4ILRLOD6FKZ7QZEK7P5KFTJJAZI6R1YEG3KK5QPYDMRE6OERT170MUJUQMP9Y7MSZKYU58F9FSCYA1GM1TWJS96RGAI5CIPZCM20REO47E15K4ZP1M3CF8FOG3MOCGU93088QKU33WAPEQLT1STGHGGUAET3KLJ0CHTOY6Z5YK2U2OE0185S/page{$page_counter}"; $html_content = scraperWiki::scrape($pageurl); $html = str_get_html($html_content); foreach ($html->find("div.talalati_lista") as $talalat) { foreach ($talalat->find("h2 a") as $el) { $url = $el->href; $kod = substr($url, -7); } scraperWiki::save_sqlite(array('id'), array('id' => $kod, 'url' => $url), $table_name = $current); } foreach ($html->find("div.oldalszamozas a[title=Következő]") as $kovetkezo) { // print $page_counter . "\n"; } //} while ($page_counter != 10); } while ($kovetkezo != "");
$manufacturerCounter++; } else { break; } } } else { $synthManufacturer = $manufacturerQuery; //Search for a specific manufacturer $tManName = preg_replace("/\\s/", "_", $synthManufacturer); //Build a temp name for the array key $synthManufacturer = urlencode($synthManufacturer); echo "Searching manufacturer: " . $synthManufacturer . "\n"; $foundSynths[$state][$tManName] = findSynthsOnCraigslist($state, $jsonManufacturerScraper, $synthManufacturer, $ignoreWords, $cityDepth, $synthDepth, $depthOverride); if (!empty($foundSynths[$state][$tManName])) { //scraperwiki::save_var($state.'-'.$synthManufacturer.'-'.'synths', json_encode($foundSynths[$state][$synthManufacturer])); $saveMessage = scraperWiki::save_sqlite(array('manufacturer', 'synth_name', 'post_item_date', 'post_item_name', 'post_item_price', 'post_item_link', 'post_item_state', 'query', 'link', 'post_item_description', 'post_item_images'), $foundSynths[$state][$tManName]); print strval($saveMessage); } else { //Delete existing data $info = scraperwiki::table_info($name = "swdata"); if (!empty($info)) { scraperwiki::sqliteexecute("DELETE FROM swdata"); //Truncate the table before adding new results } } } } } /** * Parses craigslist and returns synths * @param String state
$jsonManufacturers = json_decode($jsonSearchData); //echo "<pre>"; //print_r($jsonManufacturers); //echo "</pre>"; foreach ($states as $state) { $state = trim($state); echo "Searching state: " . $state . "\n"; $state = urlencode($state); if (empty($foundItems)) { $foundItems = findOnCraigslist($state, $jsonManufacturers, $ignoreWords, $cityDepth, $depthOverride); } else { $foundItems = array_merge($foundItems, findOnCraigslist($state, $jsonManufacturers, $ignoreWords, $cityDepth, $depthOverride)); } } if (!empty($foundItems)) { $saveMessage = scraperWiki::save_sqlite(array('manufacturer', 'name', 'post_item_date', 'post_item_name', 'post_item_price', 'post_item_link', 'post_item_state', 'query', 'link'), $foundItems); print strval($saveMessage); } else { //Delete existing data $info = scraperwiki::table_info($name = "swdata"); if (!empty($info)) { scraperwiki::sqliteexecute("DELETE FROM swdata"); //Truncate the table before adding new results } } } /** * Parses craigslist and returns items * @param String state * @param String itemManufacturer * @param ignoreWords