<?php require_once "config.php"; $xml = safe_scrape_page('http://www.wansbeck.gov.uk/planning.cfm?day=17&month=7&year=2007'); $parsed_applications = simplexml_load_string($xml); //Loop through the applications, add tinyurl / google maps etc and add to array if (sizeof($parsed_applications) > 0) { foreach ($parsed_applications->applications->application as $parsed_application) { } } print "ddd";
function parse_applications($feed_url, $authority_id) { $return_applications = array(); //reset warnings //Grab the XML $xml = ""; try { $xml = safe_scrape_page($feed_url); } catch (exception $e) { array_push($this->log, "ERROR: problem occured when grabbing feed: " . $feed_url . " ---->>>" . $e); } if ($xml == false) { $this->store_log("ERROR: empty feed feed: " . $feed_url); } //Turn the xml into an object $parsed_applications = simplexml_load_string($xml); //Loop through the applications, add tinyurl / google maps etc and add to array if (sizeof($parsed_applications) > 0) { foreach ($parsed_applications->applications->application as $parsed_application) { $application = new application(); //Grab basic data from the xml $application->authority_id = $authority_id; $application->council_reference = $parsed_application->council_reference; $date_received_dmy = split("/", $parsed_application->date_received); if (count($date_received_dmy) == 3) { $application->date_received = "{$date_received_dmy['2']}-{$date_received_dmy['1']}-{$date_received_dmy['0']}"; } else { // Make a best effort attempt to parse the date $ts = strtotime($parsed_application->date_received); if ($ts != FALSE && $ts != -1) { $application->date_received = date("Y-m-d", $ts); } } $application->address = $parsed_application->address; $application->description = $parsed_application->description; $application->info_url = $parsed_application->info_url; $application->comment_url = $parsed_application->comment_url; $application->date_scraped = mysql_date(time()); //Make the urls $info_tiny_url = tiny_url($application->info_url); if ($info_tiny_url == "") { $this->store_log("ERROR: Created blank info tiny url"); } $comment_tiny_url = tiny_url($application->comment_url); if ($comment_tiny_url == "") { $this->store_log("ERROR: Created blank comment tiny url"); } if (isset($parsed_application->postcode)) { //Workout the XY location from postcode $xy = postcode_to_location($parsed_application->postcode); $application->postcode = $parsed_application->postcode; $application->x = $xy[0]; $application->y = $xy[1]; } else { if (isset($parsed_application->easting) && isset($parsed_application->northing)) { $postcode = location_to_postcode($parsed_application->easting, $parsed_application->northing); $application->postcode = $postcode; $application->x = $parsed_application->easting; $application->y = $parsed_application->northing; } } $application->info_tinyurl = $info_tiny_url; $application->comment_tinyurl = $comment_tiny_url; $application->map_url = googlemap_url_from_postcode($application->postcode); //Add to array array_push($return_applications, $application); } } return $return_applications; }
function scrape_applications_islington($search_url, $info_url_base, $comment_url_base) { $applications = array(); $application_pattern = '/<TR>([^<]*)<TD class="lg" valign="top" >([^<]*)<a href([^<]*)<a href=wphappcriteria.display>Search Criteria(.*)([^<]*)<(.*)>([^<]*)<TD class="lg" >([^<]*)<\\/TD>([^<]*)<TD class="lg" >([^<]*)<INPUT TYPE=HIDDEN NAME([^>]*)([^<]*)/'; //grab the page $html = safe_scrape_page($search_url); preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER); foreach ($application_matches[0] as $application_match) { $application_string = str_replace("\n", "", $application_match); $reference_pattern = '/Search Results<\\/a>">([^<]*)/'; preg_match_all($reference_pattern, $application_string, $reference_matches, PREG_PATTERN_ORDER); $application = new Application(); //match the applicaiton number $application->council_reference = str_replace('Search Results</a>">', "", $reference_matches[0][0]); //Comment and info urls $application->info_url = $info_url_base . $application->council_reference; $application->comment_url = $comment_url_base . $application->council_reference; //get full details $details_html = ""; $details_html = safe_scrape_page($info_url_base . $application->council_reference); $details_html = str_replace("\r\n", "", $details_html); //Details $full_detail_pattern = '/Proposal:<\\/label><\\/td>([^<]*)<td colspan="3">([^<]*)/'; preg_match($full_detail_pattern, $details_html, $full_detail_matches); if (isset($full_detail_matches[2])) { $application->description = $full_detail_matches[2]; } //Address $address_pattern = '/Main location:<\\/label><\\/td>([^<]*)<td colspan="3">([^<]*)/'; $address = ""; preg_match($address_pattern, $details_html, $address_matches); if (isset($address_matches[2])) { $application->address = $address_matches[2]; } //postcode $postcode_pattern = "/[A-Z][A-Z]?[0-9][A-Z0-9]? ?[0-9][ABDEFGHJLNPQRSTUWXYZ]{2}/"; preg_match($postcode_pattern, $application->address, $postcode_matches); if (isset($postcode_matches[0])) { $application->postcode = $postcode_matches[0]; } //only add it if we have a postcode (bit useless otherwise) if (is_postcode($application->postcode)) { array_push($applications, $application); } } //return return $applications; }