コード例 #1
0
$html = scraperWiki::scrape("http://www.who.int/csr/don/archive/disease/en/index.html");
$dom = new simple_html_dom();
$dom->load($html);
$handleNextEntry = false;
// no previous entries -> start from beginning
if ($start == '') {
    $handleNextEntry = true;
} else {
    print "starting after: {$start}\n\n";
}
foreach ($dom->find("ul[@class='a_z'] li a") as $data) {
    $disease = trim($data->plaintext);
    print "fetching data for disease: {$disease}";
    if ($handleNextEntry) {
        print "\n";
        // saves entries themselves
        fetchGARArchive($data->href, $disease);
        // saves last scraped disease
        scraperWiki::save_var('disease', $disease);
        if ($counter >= 600) {
            exit;
        }
    } else {
        print " - skipped\n";
        if ($disease == $start) {
            $handleNextEntry = true;
        }
    }
}
scraperWiki::save_var('disease', '');
    $tosave['PrimarySource'] = str_replace('PRIMARY SOURCE: ', '', $dom2->find("table", 1)->find("tr", 1)->plaintext);
    foreach ($dom2->find("table", 0)->find("tr") as $tr) {
        $tds = $tr->find("td");
        if (count($tds) == 2) {
            $add = $tds[1]->plaintext;
            $add = str_replace('  ', '', $add);
            //remove double spaces
            if ($add == ' ') {
                $add = '';
            }
            //format blanks correctly
            //$add=str_replace('ë','\u00CB',$add); //fix UTF error with one school
            //$add=str_replace('Ü','\u00DC',$add); //fix UTF error
            $add = utf8_encode($add);
            $key = $tds[0]->plaintext;
            $key = str_replace(' ', '', $key);
            //remove spaces from key
            $tosave[$key] = $add;
        }
    }
    //print_r($tosave);
    //print $data->value;
    try {
        scraperwiki::save(array('EMISNumber'), $tosave);
    } catch (Exception $e) {
        print 'Caught exception (' . $data->value . '): ' . $e->getMessage() . "\n";
    }
    //break; //uncomment to just process one school
}
scraperWiki::save_var('place', 'none');