function scrape_page()
{
    $row = 0;
    $html = scraperWiki::scrape("http://asuntojen.hintatiedot.fi/haku/?c=" . $GLOBALS['c'] . "&s=" . $GLOBALS['s'] . "&r=" . $GLOBALS['r'] . "&amin=" . $GLOBALS['amin'] . "&amax=" . $GLOBALS['amax'] . "&z=" . $GLOBALS['z']);
    $dom = new simple_html_dom();
    $dom->load($html);
    foreach ($dom->find("tr") as $data) {
        $tds = $data->find("td");
        if (count($tds) > 8) {
            $row++;
            $GLOBALS['rowTotal']++;
            $apt = array("Uniikkiavain" => $GLOBALS['rowTotal'], "Kaupunginosa" => $tds[0]->plaintext, "Myyntihinta" => $tds[3]->plaintext, "Neliohinta" => $tds[4]->plaintext, "Tyyppi" => $tds[1]->plaintext, "Koko" => $tds[2]->plaintext);
            scraperwiki::save_sqlite(null, $apt, $table_name = $GLOBALS['c'] . " " . $GLOBALS['time']);
            print $GLOBALS['rowTotal'] . "\n";
            print $row . ". Sijainti: " . $tds[0]->plaintext . " Hinta: " . $tds[3]->plaintext . " Tyyppi: " . $tds[1]->plaintext . " Koko: " . $tds[2]->plaintext . " Neliöhinta: " . $tds[4]->plaintext . "€" . "\n";
        }
    }
    if ($row == 50) {
        print "Vielä jatkuu, haetaan seuraava sivu..." . "\n";
        $GLOBALS['z']++;
        scrape_page();
    } else {
        print "Skrääpiminen suoritettu." . "\n";
        print "Sivuja yhteensä: " . $GLOBALS['z'] . "\n";
        print "Rivejä yhteensä: " . $GLOBALS['rowTotal'] . "\n";
    }
}
Beispiel #2
0
 public static function scrape_all()
 {
     $words = [];
     for ($i = 1; $i < 30; $i = $i + 2) {
         $words = array_merge($words, scrape_page(self::$WEBSITE . "/" . "Books/Clear-English/words-" . make_two_digits($i) . "-" . make_two_digits($i + 1) . "-hundred.html"));
     }
     $total_matches = count($words);
     echo "Scraped " . $total_matches . " words from Paul Noll site.<br />";
     for ($i = 0; $i < $total_matches; $i++) {
         echo $words[$i];
         echo "<br />";
     }
     return $words;
 }