scraperwiki::attach PHP Exemples de code

Exemple #1

0

Afficher le fichier

Fichier : 4n_members.php Projet : flyeven/scraperwiki-scraper-vault

            $company = $profile->find("//*[@id='main']/div[1]/div/div[1]/div[2]/div[1]/div[1]/span/span", 0)->plaintext;
        }
        $website = $profile->find("span.orange-text a", 0) ? $profile->find("span.orange-text a", 0)->href : '';
        if ($profile->find("div.blue3-empty-box div.content div.word-wrap", 0)) {
            $info = $profile->find("div.blue3-empty-box div.content div.word-wrap", 0)->plaintext;
        } else {
            $info = '';
        }
        $record = array('name' => $profile->find("//div/a/span", 1)->plaintext, 'company' => $company, 'phone' => $profile->find("strong.big-blue3-text span", 0)->plaintext, 'website' => $website);
        scraperwiki::save(array('company'), $record);
        //print json_encode($record) . "\n";
        scraperwiki::save_var('last', $profile_no);
    }
}
//scraperwiki::save_var('last', 0);
scraperwiki::attach("find_4n_profiles");
$links = scraperwiki::select("profile from find_4n_profiles.swdata");
require 'scraperwiki/simple_html_dom.php';
$profile = new simple_html_dom();
foreach ($links as $link) {
    set_time_limit(0);
    $profile_no = intval(str_replace('http://www.4networking.biz/Members/Details/', '', $link['profile']));
    if ($profile_no > scraperwiki::get_var('last')) {
        $html = scraperWiki::scrape($link['profile']);
        $profile->load($html);
        if (!($company = $profile->find("//*[@id='main']/div[1]/div/div[1]/div[2]/div[1]/div[1]/span/span", 0)->title)) {
            $company = $profile->find("//*[@id='main']/div[1]/div/div[1]/div[2]/div[1]/div[1]/span/span", 0)->plaintext;
        }
        $website = $profile->find("span.orange-text a", 0) ? $profile->find("span.orange-text a", 0)->href : '';
        if ($profile->find("div.blue3-empty-box div.content div.word-wrap", 0)) {
            $info = $profile->find("div.blue3-empty-box div.content div.word-wrap", 0)->plaintext;

Exemple #2

0

Afficher le fichier

Fichier : tfl_bus_route_aggregator.php Projet : flyeven/scraperwiki-scraper-vault

$routemap = array();
foreach ($routes as $route) {
    $routemap[$route['route']]['route'] = $route['route'];
    @($routemap[$route['route']]['coords'] .= $route['latitude'] . ',' . $route['longitude'] . ',2357' . "\n");
}
$theroutes = array();
$count = 0;
foreach ($routemap as $a_route) {
    $count++;
    $r = $a_route['route'];
    $c = $a_route['coords'];
    $theroutes[] = array('id' => $count, 'route' => $r, 'coords' => $c);
}
scraperwiki::save_sqlite(array("id"), $theroutes);
//Whoops, seems that doing 600 queries in under 80 seconds isn't a smart idea. This scraper attempts to aggregate coordinates into something usable.
scraperwiki::attach("tfl_bus_routes_scraper", "src");
$routes = scraperwiki::select("route, stop_name, latitude, longitude from src.tfl_buses where run = 1 order by sequence asc");
$routemap = array();
foreach ($routes as $route) {
    $routemap[$route['route']]['route'] = $route['route'];
    @($routemap[$route['route']]['coords'] .= $route['latitude'] . ',' . $route['longitude'] . ',2357' . "\n");
}
$theroutes = array();
$count = 0;
foreach ($routemap as $a_route) {
    $count++;
    $r = $a_route['route'];
    $c = $a_route['coords'];
    $theroutes[] = array('id' => $count, 'route' => $r, 'coords' => $c);
}
scraperwiki::save_sqlite(array("id"), $theroutes);

Exemple #3

0

Afficher le fichier

Fichier : exhibit_json_1.php Projet : flyeven/scraperwiki-scraper-vault

$sourcescraper = 'fys_api_1';
# scraperwiki::attach('irish-epa-licenses', 'lic');
#   $licenses = scraperwiki::select("* from lic.swdata");
//    $licenses = scraperwiki::getData('irish-epa-licenses');
$s = scraperwiki::attach($sourcescraper, $limit = 250);
header('Content-type: application/json');
print "{ \"items\": " . json_encode($s) . "}";
# Blank PHP
$sourcescraper = 'fys_api_1';
# scraperwiki::attach('irish-epa-licenses', 'lic');
#   $licenses = scraperwiki::select("* from lic.swdata");
//    $licenses = scraperwiki::getData('irish-epa-licenses');
$s = scraperwiki::attach($sourcescraper, $limit = 250);
header('Content-type: application/json');
print "{ \"items\": " . json_encode($s) . "}";
# Blank PHP
$sourcescraper = 'fys_api_1';
# scraperwiki::attach('irish-epa-licenses', 'lic');
#   $licenses = scraperwiki::select("* from lic.swdata");
//    $licenses = scraperwiki::getData('irish-epa-licenses');
$s = scraperwiki::attach($sourcescraper, $limit = 250);
header('Content-type: application/json');
print "{ \"items\": " . json_encode($s) . "}";
# Blank PHP
$sourcescraper = 'fys_api_1';
# scraperwiki::attach('irish-epa-licenses', 'lic');
#   $licenses = scraperwiki::select("* from lic.swdata");
//    $licenses = scraperwiki::getData('irish-epa-licenses');
$s = scraperwiki::attach($sourcescraper, $limit = 250);
header('Content-type: application/json');
print "{ \"items\": " . json_encode($s) . "}";

Exemple #4

0

Afficher le fichier

Fichier : coast_and_country.php Projet : flyeven/scraperwiki-scraper-vault

    $txt = str_replace("</p>", "", $txt);
    $txt = preg_replace('/\\s+/', ' ', $txt);
    return $txt;
}
function searchForId($id, $array)
{
    foreach ($array as $key => $val) {
        if ($val['COTTAGE_URL'] === $id) {
            return $key;
        }
    }
    return null;
}
$blacklist = array();
$url = "http://www.coastandcountry.co.uk/cottage-details/";
scraperwiki::attach("coastandcountrycouk");
# get an array of the cottage data to scrape
$cottData = scraperwiki::select("COTTAGE_URL, PRICE_HIGH, PRICE_LOW from 'coastandcountrycouk'.SWDATA order by COTTAGE_URL");
$placeholder = scraperwiki::get_var("cottID");
if ($placeholder != "") {
    $index = searchForId($placeholder, $cottData);
    $cottData = array_splice($cottData, $index);
}
require 'scraperwiki/simple_html_dom.php';
$dom = new simple_html_dom();
foreach ($cottData as $value) {
    scraperwiki::save_var("cottID", $value['COTTAGE_URL']);
    // check the cottage url against the blacklist
    foreach ($blacklist as $blItem) {
        if ($value['COTTAGE_URL'] == $blItem) {
            continue 2;

Exemple #5

0

Afficher le fichier

Fichier : exhibit_jsoncopy_1.php Projet : flyeven/scraperwiki-scraper-vault

//print "{ \"items\": ".json_encode($alltrips) ."}";
$callback = $_GET['callback'];
if ($callback) {
    header("Content-Type: text/javascript; charset=utf8");
    echo $callback . "(" . json_encode($alltrips) . ");";
} else {
    header("Content-type: application/json");
    echo json_encode($alltrips);
}
// {label}    {id}    {type}    {day}    {date}            {year}    {time}    {startdate}    {latlng}        {arasnotaras}    {details}        {place}    {act}    {issue}    {constitutional}    {destf}    {address}    {days}        {destination}
?>
      <?php 
//$sourcescraper = 'irish_president_engagementstest';
//$s = scraperwiki::scrape($sourcescraper, $limit=250);
// = scraperwiki::attach($sourcescraper, $limit=250);
scraperwiki::attach('irish_president_engagementsjson');
$trips = scraperwiki::select("* from irish_president_engagementsjson.swdata where date > date('now','-7 day');");
$alltrips = array();
foreach ($trips as $trip) {
    $tripinfo = $trip["info"];
    $triplabel = $trip["label"];
    $tripinfo = str_replace('(', '', $tripinfo);
    $tripinfo = str_replace(')', ',', $tripinfo);
    $triplabel = str_replace('(', '', $triplabel);
    $triplabel = str_replace(')', ',', $triplabel);
    //print $triplabel;
    $trip["info"] = $tripinfo;
    $trip["label"] = $triplabel;
    $alltrips[] = $trip;
}
//header('Content-type: application/json');

Exemple #6

0

Afficher le fichier

Fichier : exfmrss.php Projet : flyeven/scraperwiki-scraper-vault

</itunes:owner>

<?php 
// .. CREACION DEL ARRAY
foreach ($data as $item) {
    echo "        <item>\n";
    echo "            <title>" . $item['artist'] . " - " . $item['title'] . "</title>\n";
    echo "            <enclosure url=\"" . $item['url'] . "\" type=\"audio/mpeg\" />\n";
    echo "            <guid>" . $item['loved_count'] . "</guid>\n";
    echo "        </item>\n";
}
?>
</channel>
</rss><?php 
scraperwiki::httpresponseheader('Content-Type', 'application/atom+xml');
scraperwiki::attach("exfm");
$data = scraperwiki::select("* from exfm.swdata");
?>
<rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" version="2.0">

<channel>
<title>TuMusika Evolution Podcast</title>
<link>http://www.tumusika.net/</link>
<language>es-es</language>
<itunes:owner>
<itunes:name>TuMusika Evolution</itunes:name>
<itunes:email>darkgiank@darkgiank.com</itunes:email>
</itunes:owner>

<?php 
// .. CREACION DEL ARRAY

Exemple #7

0

Afficher le fichier

Fichier : test_2_2.php Projet : flyeven/scraperwiki-scraper-vault

<?php

require 'scraperwiki/simple_html_dom.php';
$dom = new simple_html_dom();
scraperwiki::attach("test_1_2");
$result = scraperwiki::sqliteexecute("select html from hotel_list_pages");
$hotel_list_pages_contents = $result->data;
foreach ($hotel_list_pages_contents as $contents) {
    $html = $contents[0];
    $dom->load($html);
    foreach ($dom->find("table.hotellist tr") as $data) {
        $tds = $data->find("td h3 a");
        $record = array('hotel' => $tds[0]->plaintext, 'url' => $tds[0]->href);
        scraperwiki::save_sqlite(array('hotel'), $record, $table_name = 'hotel_list');
    }
}
require 'scraperwiki/simple_html_dom.php';
$dom = new simple_html_dom();
scraperwiki::attach("test_1_2");
$result = scraperwiki::sqliteexecute("select html from hotel_list_pages");
$hotel_list_pages_contents = $result->data;
foreach ($hotel_list_pages_contents as $contents) {
    $html = $contents[0];
    $dom->load($html);
    foreach ($dom->find("table.hotellist tr") as $data) {
        $tds = $data->find("td h3 a");
        $record = array('hotel' => $tds[0]->plaintext, 'url' => $tds[0]->href);
        scraperwiki::save_sqlite(array('hotel'), $record, $table_name = 'hotel_list');
    }
}

Exemple #8

0

Afficher le fichier

Fichier : communication_log_lobbier.php Projet : flyeven/scraperwiki-scraper-vault

print '</ul>';
$data = scraperwiki::select("count(*) AS c, subject AS s FROM contact INNER JOIN contact_subject ON contact_subject.contact_id=contact.contact_id WHERE behalf='{$w}' GROUP BY subject ORDER BY subject DESC");
print "<h2>Subjects covered</h2><ul>";
foreach ($data as $row) {
    extract($row);
    ?>
<li><?php 
    echo $s;
    ?>
 (<?php 
    echo $c;
    ?>
)</li><?php 
}
print "</ul>";
scraperwiki::attach("communication_log");
$who = $_SERVER['URLQUERY'];
$w = urldecode($who);
# Note that we don't bother to SQL escape our arg - we have a read-only connection already, so meh.
$data = scraperwiki::select("contact.contact_id AS id, person, title, organization, uri, date_contact_h FROM contact INNER JOIN victim ON victim.contact_id=contact.contact_id WHERE behalf='{$w}' ORDER BY date_contact_c DESC");
$orgs = array();
print "<h2>Contacts on behalf of {$w}</h2><ul>";
foreach ($data as $row) {
    extract($row);
    $sub = scraperwiki::select("subject FROM contact_subject WHERE contact_id='{$id}' ORDER BY subject");
    $s = array();
    foreach ($sub as $sRow) {
        $s[] = $sRow['subject'];
    }
    $s = join(', ', $s);
    ?>

Exemple #9

0

Afficher le fichier

Fichier : bluechip_full.php Projet : flyeven/scraperwiki-scraper-vault

            return $key;
        }
    }
    return null;
}
# Check for pages with no usable data
function fourOhFour($html)
{
    if (strpos($html, 'Home not available', 1200) !== false) {
        return true;
    }
    return false;
}
$blacklist = array();
# get an array of the cottage data to scrape
scraperwiki::attach("bluechip_summary");
$cottData = scraperwiki::select(" COTTAGE_ID, COTTAGE_URL, SLEEPS, BEDROOMS, FEATURES,COTTAGE_NAME, PRICE_LOW from 'bluechip_summary'.SWDATA order by COTTAGE_URL");
$placeholder = scraperwiki::get_var("cottURL");
if ($placeholder != "") {
    $index = searchForId($placeholder, $cottData);
    $cottData = array_splice($cottData, $index);
}
require 'scraperwiki/simple_html_dom.php';
$dom = new simple_html_dom();
foreach ($cottData as $value) {
    $highPrice = "";
    $lowPrice = "";
    $found = 0;
    $count = 0;
    scraperwiki::save_var("cottURL", $value['COTTAGE_URL']);
    // check the cottage url against the blacklist

Exemple #10

0

Afficher le fichier

Fichier : nh_house_votes.php Projet : flyeven/scraperwiki-scraper-vault

<?php

$sourcescraper = 'nh_gencourt_votes';
scraperwiki::attach('nh_gencourt_votes');
$data = scraperwiki::select("* from nh_gencourt_votes.bill_votes \n         order by \n             substr(date_of_vote, 7) || substr(date_of_vote, 1, 2) || substr(date_of_vote, 4, 2) desc, \n             cast(vote_num as int) desc\n        ");
?>
<!DOCTYPE html>
<html lang="en">
<head>
    <!-- http://twitter.github.com/bootstrap/base-css.html -->
    <link href="//netdna.bootstrapcdn.com/twitter-bootstrap/2.3.0/css/bootstrap-combined.min.css" rel="stylesheet">
    <script src="//ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script>
    <script src="//netdna.bootstrapcdn.com/twitter-bootstrap/2.3.0/js/bootstrap.min.js"></script>
    <style>
        th {
            white-space: nowrap;
        }
    </style>
</head>
<body>
    <div class="container">
    <div class="page-header"><h1>NH House Bills</h1></div>
    <div class="row">
        <div class="span12">
            <table class="table table-striped table-bordered table-hover table-condensed">
                <tr>
                    <th>Date of Vote</th>
                    <th>Vote #</th>
                    <th>Bill #</th>
                    <th>Bill Title</th>
                    <th>Question/Motion</th>

Exemple #11

0

Afficher le fichier

Fichier : hoseasons_full.php Projet : flyeven/scraperwiki-scraper-vault

            return $key;
        }
    }
    return null;
}
# Check for pages with no usable data
function fourOhFour($html)
{
    if (strpos($html, 'Home not available', 1200) !== false) {
        return true;
    }
    return false;
}
$blacklist = array();
# get an array of the cottage data to scrape
scraperwiki::attach("hoseasons_summary");
$cottData = scraperwiki::select(" COTTAGE_ID, COTTAGE_URL, SLEEPS, BEDROOMS, PETS,COTTAGE_NAME, PRICE_LOW, PRICE_HIGH from 'hoseasons_summary'.SWDATA order by COTTAGE_URL");
$placeholder = scraperwiki::get_var("cottURL");
if ($placeholder != "") {
    $index = searchForId($placeholder, $cottData);
    $cottData = array_splice($cottData, $index);
}
require 'scraperwiki/simple_html_dom.php';
$dom = new simple_html_dom();
foreach ($cottData as $value) {
    $highPrice = "";
    $lowPrice = "";
    $found = 0;
    $count = 0;
    scraperwiki::save_var("cottURL", $value['COTTAGE_URL']);
    // check the cottage url against the blacklist

Exemple #12

0

Afficher le fichier

Fichier : cz_public_organizations_ufis_ids.php Projet : flyeven/scraperwiki-scraper-vault

        //no form for given combinations
        return array();
    } else {
        //get dom
        $dom = new simple_html_dom();
        $dom->load($html);
        $orgs_obj = $dom->find('select[name=icoNam]', 0)->find('option');
        foreach ((array) $orgs_obj as $org) {
            $data[] = array('value' => $org->value, 'label' => trim($org->innertext));
        }
        return $data;
    }
}
require 'scraperwiki/simple_html_dom.php';
//read the saved tables
scraperwiki::attach("cz_public_organizations_ufis_basics", "src");
$dris = scraperwiki::select("* from src.dri order by value");
$periods = scraperwiki::select("* from src.period order by value");
$forms = scraperwiki::select("* from src.form order by value");
$chapters = scraperwiki::select("* from src.chapter order by value");
$regions = scraperwiki::select("* from src.region order by value");
$periods = array('0' => array('value' => '12/2012'));
//temp!!
//$forms = array('0' => array('value' => 50)); //temp
//scraperwiki::save_var('last_c',4); //temp
$d = scraperwiki::get_var('last_d', 0);
$p = scraperwiki::get_var('last_p', 0);
$f = scraperwiki::get_var('last_f', 0);
$c = scraperwiki::get_var('last_c', 0);
$r = scraperwiki::get_var('last_r', 0);
foreach ((array) $dris as $dkey => $dri) {

Exemple #13

0

Afficher le fichier

Fichier : cz_public_organizations_2_retrieval.php Projet : flyeven/scraperwiki-scraper-vault

    //foreach row save info
    foreach ((array) $rows as $row) {
        //inner org_id is in <a href= ... it is used for getting details from the system
        $as = $row->find("a");
        $tmp_text = $as[0]->href;
        $inner_org_id = substr($tmp_text, $len);
        //<td>
        $tds = $row->find("td");
        //save the data
        $out = array('org_id' => trim($tds[0]->plaintext), 'short_name' => trim($tds[1]->plaintext), 'inner_org_id' => $inner_org_id, 'chapter' => $data_row['chapter']);
        scraperwiki::save_sqlite(array('org_id'), $out);
    }
}
require 'scraperwiki/simple_html_dom.php';
//read the data saved from downloader
scraperwiki::attach("cz_public_organizations_2_downloader", "src");
$data = scraperwiki::select("* from src.swdata");
//helper
$len = strlen("/cgi-bin/ufisreg/detail.pl?org=");
foreach ((array) $data as $data_row) {
    //get dom from data
    $dom = new simple_html_dom();
    $dom->load($data_row['html']);
    //extract information
    $rows = $dom->find("tr");
    //first row is the header, removing it
    array_shift($rows);
    //foreach row save info
    foreach ((array) $rows as $row) {
        //inner org_id is in <a href= ... it is used for getting details from the system
        $as = $row->find("a");

Exemple #14

0

Afficher le fichier

Fichier : cz_praha_members_of_assembly.php Projet : flyeven/scraperwiki-scraper-vault

    return $result;
}
/**
 * finds 1st substring between opening and closing markers
 * @return result 1st substring
 */
function get_first_string($text, $openingMarker, $closingMarker)
{
    $out_ar = returnSubstrings($text, $openingMarker, $closingMarker);
    $out = $out_ar[0];
    return $out;
}
//retrieves data about voting members of assembly from https://scraperwiki.com/scrapers/cz_praha_voting_records_retrieval/
//2010-2014
require 'scraperwiki/simple_html_dom.php';
scraperwiki::attach("cz_praha_voting_records_retrieval", "src");
$rows = scraperwiki::select("distinct(mp_id) from src.mp_vote");
foreach ($rows as $row) {
    $url = "http://www.praha.eu/jnp/cz/home/volene_organy/zastupitelstvo_hmp/slozeni_zastupitelstva/index.html?memberId=" . $row['mp_id'];
    $html = scraperwiki::scrape($url);
    $dom = new simple_html_dom();
    $dom->load($html);
    $part = get_first_string($html, '</h2>', '<div>');
    $name = trim($dom->find('h2', 0)->plaintext);
    $email = get_first_string($part, 'mailto:', '"');
    $party = trim(get_first_string($part, 'Strana:</span>', '<br'));
    $club = trim(get_first_string(get_first_string($part, 'Klub:</span>', '</a') . '::', '">', '::'));
    $data[] = array('id' => $row['mp_id'], 'name' => $name, 'party' => $party, 'club' => $club);
}
scraperwiki::save_sqlite(array('id'), $data, 'info');
/**

Exemple #15

0

Afficher le fichier

Fichier : hydro_planning_applications_3.php Projet : flyeven/scraperwiki-scraper-vault

function fetchRegionHydroData($dbname, $aliasname)
{
    scraperwiki::attach($dbname, $aliasname);
    debug_tables($aliasname);
    updateHydroApplications($aliasname, '');
}

Exemple #16

0

Afficher le fichier

Fichier : ldaa_events_view.php Projet : flyeven/scraperwiki-scraper-vault

    ?>
</ev:startdate>
        <ev:enddate><?php 
    echo $enddate;
    ?>
</ev:enddate>
        <ev:location>Lichfield Cathedral</ev:location>
        <georss:point>52.685556 -1.830556</georss:point>
    </item>
    
            <?php 
}
?>
</rdf:RDF><?php 
// Attach the data
scraperwiki::attach("hhhlich-lichfield-arts-events");
// Get the data
$data = scraperwiki::select("* from swdata limit 10");
//print_r($data);
echo '<?xml version="1.0" encoding="utf-8"?>';
?>
<rdf:RDF 
    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 
    xmlns:ev="http://purl.org/rss/1.0/modules/event/"
    xmlns:dc="http://purl.org/dc/elements/1.1/"
    xmlns:georss="http://www.georss.org/georss"
    xmlns:creativeCommons="http://backend.userland.com/creativeCommonsRssModule"
    xmlns="http://purl.org/rss/1.0/">
    
    <rdf:Description rdf:about="http://lichfieldlive.co.uk/">
        <dc:title>Lichfield What's On Importer</dc:title>

Exemple #17

0

Afficher le fichier

Fichier : ons_release_schedule_icaltest.php Projet : flyeven/scraperwiki-scraper-vault

$vevents = scraperwiki::select("* from ons_release_schedule_ical.vevents limit {$limit} offset {$offset}");
$icalevents = array();
foreach ($vevents as $vevent) {
    $icalevent = "BEGIN:VEVENT\nDTSTAMP" . $vevent["DTSTAMP"] . "\nDTSTART" . $vevent["DTSTART"] . "\nX-TITLE:" . $vevent["Title"] . "\nSUMMARY:" . $vevent["Summary"] . "\nDESCRIPTION:Theme: " . $vevent["Theme"] . "\n" . $vevent["Summary"] . "\\nEND:VEVENT\n";
    #print_r($icalevent);
    $icalevents[] = $icalevent;
}
print "BEGIN:VCALENDAR\nMETHOD:PUBLISH\nVERSION:2.0\nX-WR-CALNAME:ONS Release Calendar\nPRODID:-//Apple Inc.//iCal 4.0.4//EN\nX-APPLE-CALENDAR-COLOR:#B027AE\nX-WR-TIMEZONE:Europe/London\nCALSCALE:GREGORIAN\n" . implode("", $icalevents) . "\nEND:VCALENDAR";
// Derive an ical string of 10 eventss
$querylist = explode("&", getenv("QUERY_STRING"));
$limit = 10;
$offset = 0;
foreach ($querylist as $queryl) {
    $ql = explode("=", $queryl);
    if ($ql[0] == "limit" && count($ql) == 2) {
        $limit = intval($ql[1]);
    }
    if ($ql[0] == "offset" && count($ql) == 2) {
        $offset = intval($ql[1]);
    }
}
scraperwiki::httpresponseheader("Content-Type", "text/plain");
scraperwiki::attach('ons_release_schedule_ical');
$vevents = scraperwiki::select("* from ons_release_schedule_ical.vevents limit {$limit} offset {$offset}");
$icalevents = array();
foreach ($vevents as $vevent) {
    $icalevent = "BEGIN:VEVENT\nDTSTAMP" . $vevent["DTSTAMP"] . "\nDTSTART" . $vevent["DTSTART"] . "\nX-TITLE:" . $vevent["Title"] . "\nSUMMARY:" . $vevent["Summary"] . "\nDESCRIPTION:Theme: " . $vevent["Theme"] . "\n" . $vevent["Summary"] . "\\nEND:VEVENT\n";
    #print_r($icalevent);
    $icalevents[] = $icalevent;
}
print "BEGIN:VCALENDAR\nMETHOD:PUBLISH\nVERSION:2.0\nX-WR-CALNAME:ONS Release Calendar\nPRODID:-//Apple Inc.//iCal 4.0.4//EN\nX-APPLE-CALENDAR-COLOR:#B027AE\nX-WR-TIMEZONE:Europe/London\nCALSCALE:GREGORIAN\n" . implode("", $icalevents) . "\nEND:VCALENDAR";

Exemple #18

0

Afficher le fichier

Fichier : sr_p3_poddar_opml.php Projet : flyeven/scraperwiki-scraper-vault

<?php 
foreach ($programs as $program) {
    print '<outline text="' . $program['progId'] . '">';
    $pods = scraperwiki::select("* from sr_p3_poddar.swdata where progId=" . $program['progId'] . " limit 5");
    foreach ($pods as $pod) {
        print '<outline text="' . $pod['description'] . '" URL="' . $pod['url'] . '" type="audio" />';
    }
    print '</outline>';
}
?>
  </body>
</opml>
<?php 
# Blank PHP
$sourcescraper = 'sr_p3_poddar';
scraperwiki::attach("sr_p3_poddar");
$programs = scraperwiki::select("distinct progId from sr_p3_poddar.swdata");
print '<?xml version="1.0" encoding="UTF-8"?>';
?>
<opml version="1.1">
    <head>
        <title>Podsändningar i P3</title>
    </head>
    <body>
<?php 
foreach ($programs as $program) {
    print '<outline text="' . $program['progId'] . '">';
    $pods = scraperwiki::select("* from sr_p3_poddar.swdata where progId=" . $program['progId'] . " limit 5");
    foreach ($pods as $pod) {
        print '<outline text="' . $pod['description'] . '" URL="' . $pod['url'] . '" type="audio" />';
    }

Exemple #19

0

Afficher le fichier

Fichier : clean_appcelerator_devlinks.php Projet : flyeven/scraperwiki-scraper-vault

            $LinkedIn = str_replace("LinkedIn:", "", $link->data);
            $LinkedIn = preg_replace("/\\s+/", "", $LinkedIn);
            $OBJ['linkedIn'] = $LinkedIn;
        }
    }
    // Clean certifications
    $certifications = array_unique(json_decode($row['certifications']));
    $OBJ['certifications'] = json_encode($certifications);
    // Geo
    scraperwiki::save_sqlite(array('id', 'name', 'company', 'location', 'date', 'url', 'profile', 'twitter', 'klout', 'profile_url', 'linkedIn', 'certifications'), $OBJ);
    scraperwiki::save_var('last_page', $counter);
    $counter = $counter + 1;
    print_r($counter);
}
print_r("start");
scraperwiki::attach("appcelerator_devlink");
// Bootstrap variables
if (!scraperwiki::table_info($name = "swvariables")) {
    scraperwiki::save_var('last_page', 0);
}
$lastPage = scraperwiki::get_var('last_page');
if ($lastPage > 0) {
    $offset = " OFFSET " . $lastPage;
    $counter = $lastPage;
} else {
    $offset = "";
    $counter = 0;
}
print_r($offset);
$data = scraperwiki::select("* from appcelerator_devlink.swdata LIMIT 1500" . $offset);
foreach ($data as $row) {

Exemple #20

0

Afficher le fichier

Fichier : whats_on_lichfield.php Projet : flyeven/scraperwiki-scraper-vault

scraperwiki::attach("lichfield_cathedral_events", "cathedral");
$cathedral = scraperwiki::select("* from cathedral.swdata");
foreach (scraperwiki::select("* from cathedral.swdata") as $record) {
    var_dump($record);
    die;
}
/*
$insert_ical = array();
$insert_ical['link'] = $insert['link'];
$insert_ical['DTSTART'] = $strt;
$insert_ical['DTEND'] = $strt+86399;
$insert_ical['FREQ'] = "DAILY";
$insert_ical['BYDAY'] = "";
$insert_ical['WKST'] = "MO";
$insert_ical['COUNT'] = round(($nd-$strt)/86400);
$insert_event = array();
$insert_event['name'] = "";
$insert_event['link'] = "";
$insert_venue = array();
$insert_venue['name'] = "";
$insert_venue['postcode'] = "";
$insert_venue['lat'] = "";
$insert_venue['lng'] = "";
$insert_venue['picture'] = "";
*/
scraperwiki::attach("lichfield_cathedral_events", "cathedral");
$cathedral = scraperwiki::select("* from cathedral.swdata");
foreach (scraperwiki::select("* from cathedral.swdata") as $record) {
    var_dump($record);
    die;
}

Exemple #21

0

Afficher le fichier

Fichier : special_offers_coast_and_country_267_single_table.php Projet : flyeven/scraperwiki-scraper-vault

    }
    return null;
}
####################################################################################################################
####################################################################################################################
####################################################################################################################
####################################################################################################################
$originalPrice = "";
$discountedPrice = "";
$calcPercentage = "";
$discountAmount = "";
$percentage = "";
$i = 0;
$blacklist = array();
$url = "http://www.coastandcountry.co.uk/cottage-details/";
scraperwiki::attach("special_offers_coast_and_country_summary_delete");
# get an array of the cottage data to scrape
$cottData = scraperwiki::select("COTTAGE_URL from 'special_offers_coast_and_country_summary_delete'.SWDATA order by COTTAGE_URL");
$placeholder = scraperwiki::get_var("cottID");
if ($placeholder != "") {
    $index = searchForId($placeholder, $cottData);
    $cottData = array_splice($cottData, $index);
}
require 'scraperwiki/simple_html_dom.php';
$dom = new simple_html_dom();
foreach ($cottData as $value) {
    scraperwiki::save_var("cottID", $value['COTTAGE_URL']);
    // check the cottage url against the blacklist
    foreach ($blacklist as $blItem) {
        if ($value['COTTAGE_URL'] == $blItem) {
            continue 2;

Exemple #22

0

Afficher le fichier

Fichier : walsall_food_safety_inspections_map.php Projet : flyeven/scraperwiki-scraper-vault

<style type="text/css">
body {
    margin: 0;
    padding: 0;
    font:0.8em/1.5em "Lucida Grande", "Lucida Sans Unicode", Helvetica, Arial, sans-serif;
    }
</style>

</head>
<body>
<div id="map_canvas" style="width: 100%; height: 100%;"></div>
</body>
</html>
<?php 
scraperwiki::attach("walsall_warwickshire_food_safety_inspections");
$data = scraperwiki::select("* from walsall_warwickshire_food_safety_inspections.swdata");
foreach ($data as $data) {
    if (strlen($data['latlng_lng']) > 0) {
        $markers[] = "['<h3>" . addslashes(trim($data['name'])) . "</h3><p>" . $data['rating'] . " stars</p>'," . $data['latlng_lat'] . "," . $data['latlng_lng'] . "]";
    }
}
$markers = implode(",", $markers);
?>
<html>
<head>
<script type="text/javascript" src="http://maps.google.com/maps/api/js?sensor=false"></script>
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.5.2/jquery.min.js" type="text/javascript"></script>
<script type="text/javascript">
jQuery.noConflict();

Exemple #23

0

Afficher le fichier

Fichier : s-in-s-noidung.php Projet : flyeven/scraperwiki-scraper-vault

    $tr = $html->find("div.postmessage div.t_msgfont");
    $j = 0;
    foreach ($tr as $trr) {
        $noidung = $trr->find('div', 0)->innertext;
        //$noidung = utf8_encode($noidung);
        if (mb_strlen($noidung) > 1000) {
            $j++;
            @scraperwiki::save_sqlite(array('id'), array('id' => $j . '-' . $src[0]['url'], 'title' => $src[0]['title'], 'url' => $src[0]['url'], 'content' => base64_encode($noidung), 'order' => $j, 'num' => $src[0]['num'], 'reply' => $src[0]['reply']));
        }
    }
    $html->clear();
    unset($html);
    scraperwiki::save_var('last_id', $i);
}
require 'scraperwiki/simple_html_dom.php';
scraperwiki::attach("s-in-s", "src");
//scraperwiki::save_var('last_id', 1);
//exit();
$id = scraperwiki::get_var('last_id');
for ($i = $id; $i < 1900; $i++) {
    $src = scraperwiki::select("* from src.swdata limit {$i},1");
    $url = $src[0]['link'];
    $url = 'http://sexinsex.net/bbs/' . $url;
    $html_content = scraperwiki::scrape($url);
    $html = str_get_html($html_content);
    $data = array();
    $tr = $html->find("div.postmessage div.t_msgfont");
    $j = 0;
    foreach ($tr as $trr) {
        $noidung = $trr->find('div', 0)->innertext;
        //$noidung = utf8_encode($noidung);

Exemple #24

0

Afficher le fichier

Fichier : eparliment-social-link-scraper.php Projet : flyeven/scraperwiki-scraper-vault

            if (stristr($url, "twitter")) {
                $results["twitter_url"] = $url;
            } else {
                $results["website_url"] = $url;
            }
        }
        // There are max 3 urls we are interested in
        if (sizeof($results) === 4) {
            break;
        }
    }
    scraperwiki::save_sqlite(array("id"), $results, "eparlimen_social_links");
}
require 'scraperwiki/simple_html_dom.php';
scraperwiki::sqliteexecute("DELETE FROM eparlimen_social_links");
scraperwiki::attach("eparlimen-constituencies", "urls");
$urls = scraperwiki::select("* FROM urls.eparlimen_constituencies_links");
foreach ($urls as $url) {
    $url = str_replace(",%20", "", $url["url"]);
    // A hack for a known bad link. This should be in the link scraper, but it's xmas and I have better things to do :)
    $html = scraperwiki::scrape($url);
    $dom = new simple_html_dom();
    $dom->load($html);
    $node = $dom->find("ul.wrap_senarai li", 2);
    if (is_object($node)) {
        $code = $node->children(1)->plaintext;
    } else {
        echo "Unable to parse {$url}\n";
        continue;
    }
    $results = array("id" => $code);

Exemple #25

0

Afficher le fichier

Fichier : german-landkreise-view.php Projet : flyeven/scraperwiki-scraper-vault

}
echo '</table>';
echo '<h2>Landkreise nach Fläche (km<sup>2</sup>)</h2>';
echo '<table>';
foreach ($all_by_area as $item) {
    echo '<tr><td>' . $item['name'] . '</td><td>' . $item['area'] . '</td></tr>' . "\n";
}
echo '</table>';
echo '<h2>Landkreise nach Bevölkerungsdichte (Einwohner/km<sup>2</sup>)</h2>';
echo '<table>';
foreach ($all_by_density as $item) {
    echo '<tr><td>' . $item['name'] . '</td><td>' . $item['inhab_density'] . '</td></tr>' . "\n";
}
echo '</table>';
$sourcescraper = 'german-landkreise';
scraperwiki::attach($sourcescraper);
$grouped_by_state = scraperwiki::select("state, COUNT(*) AS num FROM swdata GROUP BY state");
$all_by_population = scraperwiki::select("name, inhabitants FROM swdata ORDER BY inhabitants DESC");
$all_by_area = scraperwiki::select("name, CAST(area AS NUMERIC) AS area FROM swdata ORDER BY area DESC");
$all_by_density = scraperwiki::select("name, inhab_density FROM swdata ORDER BY inhab_density DESC");
echo '<h2>Landkreise nach Bundesland</h2>';
echo '<table>';
foreach ($grouped_by_state as $item) {
    echo '<tr><td>' . $item['state'] . '</td><td>' . $item['num'] . '</td></tr>' . "\n";
}
echo '</table>';
echo '<h2>Landkreise nach Einwohnerzahl</h2>';
echo '<table>';
foreach ($all_by_population as $item) {
    echo '<tr><td>' . $item['name'] . '</td><td>' . $item['inhabitants'] . '</td></tr>' . "\n";
}

Exemple #26

0

Afficher le fichier

Fichier : cz_senate_voting_records_retrieval_2.php Projet : flyeven/scraperwiki-scraper-vault

            } */
        scraperwiki::save_var('last_id', $html['id']);
    }
}
require 'scraperwiki/simple_html_dom.php';
//corrections:
//scraperwiki::save_var('last_id',55626); //55150
/*scraperwiki::sqliteexecute("delete from info where id>55652");
scraperwiki::sqlitecommit();
die();*/
//get last id
//scraperwiki::save_var('last_id',0);
$last_id = scraperwiki::get_var('last_id', 0);
echo $last_id;
//read the saved tables
scraperwiki::attach("cz_senate_voting_records_downloader_2", "src");
$rows = scraperwiki::select("id from src.swdata where id>{$last_id} order by id");
if (!empty($rows)) {
    foreach ($rows as $html) {
        //get dom
        $dom = new simple_html_dom();
        $html2 = scraperwiki::select("* from src.swdata where id={$html['id']}");
        $dom->load(str_replace("&nbsp;", " ", $html2[0]['html']));
        //common part
        $div = $dom->find("div[class=wysiwyg]", 0);
        //info
        $h1 = $div->find('h1', 0);
        preg_match('/([0-9]{1,}). schůze/', $h1->innertext, $matches);
        $schuze = $matches[1];
        preg_match('/([0-9]{1,}). hlasování/', $h1->innertext, $matches);
        $hlasovani = $matches[1];

Exemple #27

0

Afficher le fichier

Fichier : google_pr_2.php Projet : flyeven/scraperwiki-scraper-vault

        return $pagerank;
    }
}
function file_get_contents_curl($url)
{
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    //Set curl to return the data instead of printing it to the browser.
    curl_setopt($ch, "http://" + CURLOPT_URL, $url);
    $data = curl_exec($ch);
    curl_close($ch);
    return $data;
}
foreach ($scraper as $scr) {
    scraperwiki::attach($scr);
    $qry = "* from " . $scr . ".swdata";
    //echo $qry;
    $arr = scraperwiki::select($qry);
    // print_r($arr);
    foreach ($arr as $d) {
        //    print $d["key"];
        //    print $d["site"];
        $pr = (int) getPagerank($d["url"]);
        if (1) {
            $d_key = $d["key"];
            $d_site = $d["site"];
            //print_r($d["url"]." PR is ". (string)$pr ." site is ".$d_site); // ." key is " . $d_key);
            $record = array('url' => utf8_encode($d["url"]), 'pr' => utf8_encode($pr), 'ar' => utf8_encode($d["rank"]), 'id' => utf8_encode($d_key), 'desc' => $d["site"]);
            #print_r($record);
            scraperwiki::save_sqlite(array("id"), $record, "prank");

Exemple #28

0

Afficher le fichier

Fichier : scraper.php Projet : apcs41/hobbyking

<?php

require 'scraperwiki.php';
require 'scraperwiki/simple_html_dom.php';
$startProductId = scraperwiki::get_var("currentId", -1);
if ($startProductId == -1) {
    print "No previous saved position found. Starting from scratch.";
} else {
    print "Resuming from product id {$startProductId}\n";
}
scraperwiki::attach("hobbyking_batteryidlist");
$batteries = scraperwiki::select("id from hobbyking_batteryidlist.data where id > {$startProductId} order by id asc");
$remainingCount = count($batteries);
print "Found {$remainingCount} batteries left to be scraped.";
$maxPerRun = 100;
$loopCount = 0;
foreach ($batteries as $bat) {
    if ($loopCount > $maxPerRun) {
        print "Ending run after {$maxPerRun} iterations.";
        break;
    }
    $productId = $bat['id'];
    print "Retrieving " . $productId . "\n";
    $html = scraperWiki::scrape("http://www.hobbyking.com/hobbyking/store/uh_viewItem.asp?idProduct={$productId}");
    //print $html . "\n";
    $dom = new simple_html_dom();
    $dom->load($html);
    // Get the product data (located in a span tag). Should only be one product data area!
    $productDataAreasDom = $dom->find("SPAN[id=prodDataArea]");
    $productDataDom = $productDataAreasDom[0];
    //print $productData . "\n";

Exemple #29

0

Afficher le fichier

Fichier : washington_digisound_craigslist_synth_scraper.php Projet : flyeven/scraperwiki-scraper-vault

}
//Returns the month as a number
function getMonthNum($monthString)
{
    $months = array('jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec');
    for ($i = 0; $i < count($months); $i++) {
        if ($months[$i] == strtolower($monthString)) {
            return $i;
        }
    }
}
/*
"Ampron","Atlantex","ARP","Access Music","Akai","Alesis","Analogue Systems","Applied Acoustics","Aries","Arturia","BOSS","BitHeadz","Bomb Factory Studios","Buchla","Casio","Chamberlin","Cheetah","Chimera Synthesis","Clavia","Con Brio","Creamware","Crumar","Dave Smith Instruments","Doepfer","E-mu Systems","EDP","EML","EMS","Electrix Pro","Electro Harmonix","Elektron","Elka","Encore Electronics","Ensoniq","FBT Electronica","Fairlight","Farfisa","Formanta","Future Retro","GForce Software","Generalmusic","Gleeman","Hammond","Hartmann","Hohner","IK Multimedia","Image Line","Jen Electronics","JoMoX","Kawai","Kenton Electronics","KeyFax Hardware","Koblo","Korg","Kurzweil","Linn Electronics","Logan Electronics","MAM","MOTU","MacBeth Studio Systems","Marion Systems","Metasonix","??Miscellaneous??","Moog Music","Mutronics","Native Instruments","New England Digital","Novation","OSC","Oberheim","Octave","PAiA","PPG","Propellerheads","Prosoniq","Quasimidi","Red Sound Systems","Rhodes","Roland"
*/
require 'scraperwiki/simple_html_dom.php';
scraperwiki::attach('synthfilter_utils');
$state = "Washington";
//Must be Titlecase e.g, Alabama, Michigan. or use an array separated by commas Alabama, Washington
$states = explode(",", $state);
print_r($states);
$jsonManufacturerScraper = scraperwiki::get_var('manufacturer_scraper');
//Get a unique list of synth manufacturers
$jsonManufacturerData = file_get_contents($jsonManufacturerScraper);
$manufacturerQuery = 'Digisound';
//You can choose to search only a specific manufacturer or if not, just leave it blank
$ignoreWords = explode(',', scraperwiki::get_var('iw_digisound'));
echo "Total ignored words: " . count($ignoreWords) . "\n";
$cityDepth = 0;
//Set to 0 to search all cities found in the database
$synthDepth = 0;
//Set to 0 to search all synths found in the database

Exemple #30

0

Afficher le fichier

Fichier : manchester_pot_holes_rss.php Projet : flyeven/scraperwiki-scraper-vault

             </item>
             ';
}
print '</channel>
     </rss>
<!-- ScraperWiki insists on inserting this block, but its not valid XML, so we put it into a comment
Thanks http://scraperwiki.com/views/galway-city-planning-feed
<div id="scraperwikipane"/>
-->';
?>
 <?php 
# Blank PHP
#$sourcescraper = 'fill_that_hole';
scraperwiki::httpresponseheader('Content-Type', 'application/atom+xml');
date_default_timezone_set("Europe/London");
scraperwiki::attach("fill_that_hole_manchester");
//really this Manchester!
$data = scraperwiki::select("* from fill_that_hole_manchester.swdata order by dateadded desc limit 10");
//print_r($data);
print '<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
     <channel>
         <atom:link href="http://scraperwikiviews.com/run/manchester_pot_holes_rss/" rel="self" type="application/rss+xml" />
         <title>Potholes reported on Fill That Hole - Manchester</title>
         <link>http://www.fillthathole.org.uk/node/114/hazards</link>
         <description>RSS feed of latest pothole reports via Fill That Hole in Manchester</description>
         <language>en-gb</language>';
foreach ($data as $item) {
    print '
             <item>
                 <title>' . $item["road"] . '</title>

PHP scraperwiki::attach Exemples