function scrape_page() { $row = 0; $html = scraperWiki::scrape("http://asuntojen.hintatiedot.fi/haku/?c=" . $GLOBALS['c'] . "&s=" . $GLOBALS['s'] . "&r=" . $GLOBALS['r'] . "&amin=" . $GLOBALS['amin'] . "&amax=" . $GLOBALS['amax'] . "&z=" . $GLOBALS['z']); $dom = new simple_html_dom(); $dom->load($html); foreach ($dom->find("tr") as $data) { $tds = $data->find("td"); if (count($tds) > 8) { $row++; $GLOBALS['rowTotal']++; $apt = array("Uniikkiavain" => $GLOBALS['rowTotal'], "Kaupunginosa" => $tds[0]->plaintext, "Myyntihinta" => $tds[3]->plaintext, "Neliohinta" => $tds[4]->plaintext, "Tyyppi" => $tds[1]->plaintext, "Koko" => $tds[2]->plaintext); scraperwiki::save_sqlite(null, $apt, $table_name = $GLOBALS['c'] . " " . $GLOBALS['time']); print $GLOBALS['rowTotal'] . "\n"; print $row . ". Sijainti: " . $tds[0]->plaintext . " Hinta: " . $tds[3]->plaintext . " Tyyppi: " . $tds[1]->plaintext . " Koko: " . $tds[2]->plaintext . " Neliöhinta: " . $tds[4]->plaintext . "€" . "\n"; } } if ($row == 50) { print "Vielä jatkuu, haetaan seuraava sivu..." . "\n"; $GLOBALS['z']++; scrape_page(); } else { print "Skrääpiminen suoritettu." . "\n"; print "Sivuja yhteensä: " . $GLOBALS['z'] . "\n"; print "Rivejä yhteensä: " . $GLOBALS['rowTotal'] . "\n"; } }
public static function scrape_all() { $words = []; for ($i = 1; $i < 30; $i = $i + 2) { $words = array_merge($words, scrape_page(self::$WEBSITE . "/" . "Books/Clear-English/words-" . make_two_digits($i) . "-" . make_two_digits($i + 1) . "-hundred.html")); } $total_matches = count($words); echo "Scraped " . $total_matches . " words from Paul Noll site.<br />"; for ($i = 0; $i < $total_matches; $i++) { echo $words[$i]; echo "<br />"; } return $words; }