for ($i = 1; $i <= 130; $i++) { $url = "http://www.ratemyprofessors.com/find/professor/?department=&institution=&page=" . (string) $i . "&query=*%3A*&queryoption=TEACHER&queryBy=schoolId&sid=1381&sortBy="; $json = file_get_contents($url); $data = json_decode($json, true); $professor = $data["professors"]; //echo $i."<br>"; //loops through professors of a single PAGE //adds professor's teacher id to array for ($m = 0; $m < sizeof($data["professors"]); $m++) { $professor = $data["professors"][$m]; $array[] = $professor["tid"]; } } for ($j = 0; $j < sizeof($array); $j++) { $teacherURL = "http://www.ratemyprofessors.com/ShowRatings.jsp?tid=" . $array[$j]; scrapeURL($teacherURL, $array[$j], $conn); } function insertProfessor($tid, $profName, $profQuality, $profGrade, $chili, $profHelpfulness, $profClarity, $profEasiness, $conn) { $query = "INSERT INTO professors (id, tid, profName, profQuality, profGrade, chili, profHelpfulness, profClarity, profEasiness) VALUES ('','{$tid}','{$profName}', '{$profQuality}', '{$profGrade}','{$chili}','{$profHelpfulness}','{$profClarity}','{$profEasiness}')"; if ($conn->query($query) === TRUE) { echo "New record created successfully"; } else { echo "Error: " . $query . "<br>" . $conn->error; } } function scrapeURL($url, $id, $conn) { echo "Scraping<br>"; $html = file_get_html($url); $tid = $id;
} else { $result = ""; } return $result; } ###################################### # SJ Station Situation scraper ###################################### require 'scraperwiki/simple_html_dom.php'; $stationIDs = array('cst', 'g', 'lp', 'nr', 'm', 'sk'); // TODO: Add dynamic list of stations foreach ($stationIDs as $station) { $stationID = $station; $theURL = "http://www5.trafikverket.se/trafikinformation/WebPage/TrafficSituationCity.aspx?JF=11&station=74," . $stationID . "&arrivals=0&nostat=1"; print "Scraping " . $stationID . "\n"; scrapeURL($theURL, "#TrafficSituationCityComponent_trafficSituationDepartureDataGrid", $stationID); } function scrapeURL($pageURL, $tableName, $stationID) { $html = scraperwiki::scrape($pageURL); $dom = new simple_html_dom(); $dom->load($html); $newurl = $dom->find("a"); $newhref = get_href($newurl[0]); # URL found, fetch it if ($newhref != "") { $html = scraperwiki::scrape(html_entity_decode($newhref)); $dom->load($html); # parse it $grid = $dom->find($tableName, 0); scrapeTable($grid, $stationID);