コード例 #1
0
function scrapeTeams($url)
{
    $html = scraperWiki::scrape($url);
    $dom = new simple_html_dom();
    $dom->load($html);
    $cells = $dom->find('td.cw a');
    foreach ($cells as $cell) {
        $name = $cell->plaintext;
        $team = array('club' => $name);
        scraperWiki::save_sqlite(array('club'), $team);
    }
}
コード例 #2
0
function scrapePage($url)
{
    $html = scraperWiki::scrape($url);
    $dom = new simple_html_dom();
    $dom->load($html);
    $cells = $dom->find('td.nom');
    foreach ($cells as $cell) {
        $name = $cell->find('a', 0)->plaintext;
        $parent = $cell->parent();
        $count = $parent->find('td.compte', 0)->plaintext;
        if ($count) {
            $payload = array('name' => $name, 'count' => $count);
            scraperWiki::save_sqlite(array('name'), $payload);
        }
    }
}
コード例 #3
0
function scrapeSmellsCategory($url)
{
    $html = scraperWiki::scrape($url);
    $dom = new simple_html_dom();
    $dom->load($html);
    $products = $dom->find('div.product');
    foreach ($products as $product) {
        $brand = $product->find('h3 > a', 0);
        $productName = $product->find('p.productName > a', 0);
        echo $productName->innertext;
        $prices = $product->find('p.productPrice', 0);
        $rrp = $prices->find('a', 0);
        $ourPrice = $prices->find('a.ourPrice', 0);
        $productDesc = $product->find('p.productDesc a', 0);
        $fixedRrp = substr($rrp->innertext, 10);
        $fixedPrice = substr($ourPrice->innertext, 16);
        $data = array('brand' => $brand->innertext, 'product' => $productName->innertext, 'rrp' => $fixedRrp, 'price' => $fixedPrice, 'desc' => $productDesc->innertext, 'url' => $url);
        scraperWiki::save_sqlite(array('brand', 'product', 'rrp', 'price', 'desc', 'url'), $data);
    }
}
コード例 #4
0
function saveData($unique, $flight_data)
{
    scraperWiki::save_sqlite($unique, $flight_data);
}
コード例 #5
0
}
//Current Synths
$synthList3 = file_get_contents("https://api.scraperwiki.com/api/1.0/datastore/sqlite?format=jsondict&name=current_synths&query=select%20DISTINCT%20manufacturer%2C%20url%2C%20name%20from%20%60swdata%60");
if (!empty($synthList3)) {
    $synthList3 = json_decode($synthList3);
}
$synths = array();
$synths = traverseList($synthList1);
$synths = array_merge(traverseList($synthList2), $synths);
$synths = array_merge(traverseList($synthList3), $synths);
$synths = array_map('unserialize', array_unique(array_map('serialize', $synths)));
echo "Total synths: " . count($synths) . "\n";
//var_dump($synths);
if (!empty($synths)) {
    //$dbName = "vintagesynth-scrape-".$today = date("m-d-Y");
    $saveMessage = scraperWiki::save_sqlite(array('manufacturer', 'name', 'url'), $synths);
    //print strval($saveMessage);
    scraperwiki::save_var('total_results', count($synths));
    print scraperWiki::get_var('total_results');
}
function traverseList($list)
{
    $dataList = array();
    foreach ($list as $item) {
        //Clean up the data
        foreach ($item as $key => $value) {
            $item->{$key} = preg_replace("/<*.>/", "", $value);
            //echo $item->$key."\n";
        }
        $dataList[] = $item;
    }
コード例 #6
0
    $dom = new simple_html_dom();
    $dom->load($html);
    $position = 0;
    $result = array();
    foreach ($dom->find("span[@class='topsites-label']") as $data) {
        $result = $data->plaintext;
        $target = scraperWiki::scrape($result);
        $ga = FALSE;
        if (strpos($target, 'ga.js') !== FALSE) {
            $ga = TRUE;
        }
        scraperWiki::save_sqlite(array("data"), array('data' => $result, 'ga' => $ga));
    }
}
require 'scraperwiki/simple_html_dom.php';
for ($i = 0; $i < 20; $i++) {
    $html = scraperWiki::scrape("http://www.alexa.com/topsites/countries;{$i}/CZ");
    $dom = new simple_html_dom();
    $dom->load($html);
    $position = 0;
    $result = array();
    foreach ($dom->find("span[@class='topsites-label']") as $data) {
        $result = $data->plaintext;
        $target = scraperWiki::scrape($result);
        $ga = FALSE;
        if (strpos($target, 'ga.js') !== FALSE) {
            $ga = TRUE;
        }
        scraperWiki::save_sqlite(array("data"), array('data' => $result, 'ga' => $ga));
    }
}
コード例 #7
0
function saveData($unique, $flight_data)
{
    scraperWiki::save_sqlite(array("date", "airline", "flight_num"), $flight_data);
}
コード例 #8
0
<?php

require 'scraperwiki/simple_html_dom.php';
scraperWiki::sqliteexecute('drop table if exists ttt');
$url = 'http://www.rugbydump.blogspot.com/';
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$movies = $dom->find('object param[name="movie"]');
$i = 0;
foreach ($movies as $movie) {
    $i++;
    $movieurl = $movie->value;
    $data = array('id' => "code:New_Script_Resolve('" . $movieurl . "')", 'title' => 'Video ' . $i);
    scraperWiki::save_sqlite(array('id'), $data);
}
require 'scraperwiki/simple_html_dom.php';
scraperWiki::sqliteexecute('drop table if exists ttt');
$url = 'http://www.rugbydump.blogspot.com/';
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$movies = $dom->find('object param[name="movie"]');
$i = 0;
foreach ($movies as $movie) {
    $i++;
    $movieurl = $movie->value;
    $data = array('id' => "code:New_Script_Resolve('" . $movieurl . "')", 'title' => 'Video ' . $i);
    scraperWiki::save_sqlite(array('id'), $data);
}
コード例 #9
0
<?php

require 'scraperwiki/simple_html_dom.php';
$url = 'http://www.rugbydump.blogspot.com/';
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$movies = $dom->find('object param[name="movie"]');
foreach ($movies as $movie) {
    $movieurl = $movie->value;
    $data = array('video' => $movieurl);
    scraperWiki::save_sqlite(array('video'), $data);
}
require 'scraperwiki/simple_html_dom.php';
$url = 'http://www.rugbydump.blogspot.com/';
$html = scraperWiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$movies = $dom->find('object param[name="movie"]');
foreach ($movies as $movie) {
    $movieurl = $movie->value;
    $data = array('video' => $movieurl);
    scraperWiki::save_sqlite(array('video'), $data);
}
コード例 #10
0
function saveData($unique, $railway)
{
    scraperWiki::save_sqlite($unique, $railway);
}
コード例 #11
0
                        foreach ($view_dom->find('div[id=left_col] img[class=imgcenter]') as $element) {
                            $synthImages .= "http://www.vintagesynth.com" . $subDir . $element->src . ",";
                        }
                        $synthDescription = "";
                        foreach ($view_dom->find("div[class=grid_11] div[id=left_col] p") as $view_data) {
                            $synthDescription .= $view_data->plaintext . "<br/>";
                        }
                        $synths[] = array('name' => $cleanSynthName, 'manufacturer' => $navLink[0]->plaintext, 'url' => "http://www.vintagesynth.com" . $subNavLinkURL, 'description' => $synthDescription, 'images' => $synthImages);
                        /*echo "<pre>";
                          print_r($synths);
                          echo "</pre>";*/
                    }
                } else {
                    break;
                }
            }
        } else {
            print "The scrape has completed at a depth level of {$depth}.\n";
            break;
        }
    }
}
/*echo "<pre>";
print_r($synths);
echo "</pre>";*/
//$dbName = "vintagesynth-scrape-".$today = date("m-d-Y");
//$saveMessage = scraperWiki::save_sqlite(array('manufacturer','name','url','description','images'), $synths,$table_name=$dbName);
$saveMessage = scraperWiki::save_sqlite(array('manufacturer', 'name', 'url', 'description', 'images'), $synths);
print strval($saveMessage);
scraperwiki::save_var('total_results', count($synths));
print scraperWiki::get_var('total_results');
コード例 #12
0
function saveData($unique, $record)
{
    scraperWiki::save_sqlite(array("Data_autocount", "data_blank", "data_Feedback"), $record);
}
コード例 #13
0
# www.munich-airport.de
# we extracted information of the arivales of the munich airport
# we wanted schedule time of 5 minutes but this is not alowed for standard acount (1 day)
require "scraperwiki/simple_html_dom.php";
$html = scraperWiki::scrape("http://www.munich-airport.de/de/consumer/index.jsp");
# Use the PHP Simple HTML DOM Parser to extract <td> tags
$dom = new simple_html_dom();
$dom->load($html);
$table = $dom->getElementById('navigation_mainpage_flightinfo_table');
foreach ($table->find('tr') as $data) {
    print $data->plaintext . "\n";
    // Flight details. Read tds or ths
    $tds = sizeof($data->find("td")) > 1 ? $data->find("td") : $data->find("th");
    if (sizeof($tds) == 0) {
        break;
    }
    $flightnr = $tds[1]->plaintext;
    $from = $tds[2]->plaintext;
    $time = $tds[3]->plaintext;
    $expected_time = $tds[4]->plaintext;
    // Skip header
    if ($flightnr == "Flug") {
        continue;
    }
    //Create date
    $date = date("m.d.y");
    //Build array of flight informations
    $flight_data = array("date" => $date, "flightnr" => $flightnr, "from" => $from, "time" => $time, "expected_time" => $expected_time);
    //Save the informations of one flight
    scraperWiki::save_sqlite(array("date", "flightnr"), $flight_data);
}
コード例 #14
0
            if ($td == "Évjárat:") {
                $prevtd = "Évjárat:";
            } elseif ($td == "Kivitel:") {
                $prevtd = "Kivitel:";
            } elseif ($td == "Állapot:") {
                $prevtd = "Állapot:";
            } elseif ($td == "Üzemanyag:") {
                $prevtd = "Üzemanyag:";
            } elseif ($td == "Sebességváltó fajtája:") {
                $prevtd = "Sebességváltó fajtája:";
            } elseif ($td == "Hengerűrtartalom:") {
                $prevtd = "Hengerűrtartalom:";
            } elseif ($td == "Henger-elrendezés:") {
                $prevtd = "Henger-elrendezés:";
            } elseif ($td == "Hajtás:") {
                $prevtd = "Hajtás:";
            } elseif ($td == "Teljesítmény:") {
                $prevtd = "Teljesítmény:";
            } elseif ($td == "Ajtók száma:") {
                $prevtd = "Ajtók száma:";
            } elseif ($td == "Klíma fajtája:") {
                $prevtd = "Klíma fajtája:";
            } elseif ($td == "Szín:") {
                $prevtd = "Szín:";
            } else {
                $prevtd = "...";
            }
        }
    }
    scraperWiki::save_sqlite(array('id'), array('id' => $kod, 'url' => $newurl, 'ar' => $ar, 'evjarat' => $evjarat, 'kivitel' => $kivitel, 'allapot' => $allapot, 'uzemanyag' => $uzemanyag, 'sebvalto' => $sebvalto, 'urtartalom' => $urtartalom, 'hengerelrend' => $hengerelr, 'hajtas' => $hajtas, 'teljesitmeny' => $teljesitmeny, 'ajtok' => $ajtok, 'klima' => $klima, 'szin' => $szin));
}
コード例 #15
0
function saveMembersLinks($unique, $recordLinks)
{
    scraperWiki::save_sqlite(array("data_MemberID"), $recordLinks, "membersLinks");
}
コード例 #16
0
require 'scraperwiki/simple_html_dom.php';
$page_counter = 0;
//initial value of variable-table name
//scraperwiki::save_var('which-table', "1");
//disable after first run
$oldnap = scraperwiki::get_var('which-table');
if ($oldnap == "1") {
    scraperwiki::save_var('which-table', "2");
} else {
    scraperwiki::save_var('which-table', "1");
}
$current = "swdata" . $oldnap;
print "Writing table: " . $current . "\n";
do {
    $kovetkezo = "";
    $page_counter++;
    $pageurl = "http://www.hasznaltauto.hu/talalatilista/auto/YHUQECPJ75JDSHH4K11URJIL5UM5ZJ67OJ0PSDARHGCQOAM901FIFTJ117ST1HZ508UQIHTYLS04GGOQJWA9WSRZWSFHQTUKOPY2GKPLULCZGKD479JY3IPMERARL1J9HFGYQ12RUA692DHRT071D83FDDERAH5HL528M7LK8HZQR4ILRLOD6FKZ7QZEK7P5KFTJJAZI6R1YEG3KK5QPYDMRE6OERT170MUJUQMP9Y7MSZKYU58F9FSCYA1GM1TWJS96RGAI5CIPZCM20REO47E15K4ZP1M3CF8FOG3MOCGU93088QKU33WAPEQLT1STGHGGUAET3KLJ0CHTOY6Z5YK2U2OE0185S/page{$page_counter}";
    $html_content = scraperWiki::scrape($pageurl);
    $html = str_get_html($html_content);
    foreach ($html->find("div.talalati_lista") as $talalat) {
        foreach ($talalat->find("h2 a") as $el) {
            $url = $el->href;
            $kod = substr($url, -7);
        }
        scraperWiki::save_sqlite(array('id'), array('id' => $kod, 'url' => $url), $table_name = $current);
    }
    foreach ($html->find("div.oldalszamozas a[title=Következő]") as $kovetkezo) {
        //    print $page_counter . "\n";
    }
    //} while ($page_counter != 10);
} while ($kovetkezo != "");
                    $manufacturerCounter++;
                } else {
                    break;
                }
            }
        } else {
            $synthManufacturer = $manufacturerQuery;
            //Search for a specific manufacturer
            $tManName = preg_replace("/\\s/", "_", $synthManufacturer);
            //Build a temp name for the array key
            $synthManufacturer = urlencode($synthManufacturer);
            echo "Searching manufacturer: " . $synthManufacturer . "\n";
            $foundSynths[$state][$tManName] = findSynthsOnCraigslist($state, $jsonManufacturerScraper, $synthManufacturer, $ignoreWords, $cityDepth, $synthDepth, $depthOverride);
            if (!empty($foundSynths[$state][$tManName])) {
                //scraperwiki::save_var($state.'-'.$synthManufacturer.'-'.'synths', json_encode($foundSynths[$state][$synthManufacturer]));
                $saveMessage = scraperWiki::save_sqlite(array('manufacturer', 'synth_name', 'post_item_date', 'post_item_name', 'post_item_price', 'post_item_link', 'post_item_state', 'query', 'link', 'post_item_description', 'post_item_images'), $foundSynths[$state][$tManName]);
                print strval($saveMessage);
            } else {
                //Delete existing data
                $info = scraperwiki::table_info($name = "swdata");
                if (!empty($info)) {
                    scraperwiki::sqliteexecute("DELETE FROM swdata");
                    //Truncate the table before adding new results
                }
            }
        }
    }
}
/**
* Parses craigslist and returns synths
* @param String state
コード例 #18
0
    $jsonManufacturers = json_decode($jsonSearchData);
    //echo "<pre>";
    //print_r($jsonManufacturers);
    //echo "</pre>";
    foreach ($states as $state) {
        $state = trim($state);
        echo "Searching state: " . $state . "\n";
        $state = urlencode($state);
        if (empty($foundItems)) {
            $foundItems = findOnCraigslist($state, $jsonManufacturers, $ignoreWords, $cityDepth, $depthOverride);
        } else {
            $foundItems = array_merge($foundItems, findOnCraigslist($state, $jsonManufacturers, $ignoreWords, $cityDepth, $depthOverride));
        }
    }
    if (!empty($foundItems)) {
        $saveMessage = scraperWiki::save_sqlite(array('manufacturer', 'name', 'post_item_date', 'post_item_name', 'post_item_price', 'post_item_link', 'post_item_state', 'query', 'link'), $foundItems);
        print strval($saveMessage);
    } else {
        //Delete existing data
        $info = scraperwiki::table_info($name = "swdata");
        if (!empty($info)) {
            scraperwiki::sqliteexecute("DELETE FROM swdata");
            //Truncate the table before adding new results
        }
    }
}
/**
* Parses craigslist and returns items
* @param String state
* @param String itemManufacturer
* @param ignoreWords