Esempio n. 1
0
<?php

require_once "config.php";
$xml = safe_scrape_page('http://www.wansbeck.gov.uk/planning.cfm?day=17&month=7&year=2007');
$parsed_applications = simplexml_load_string($xml);
//Loop through the applications, add tinyurl / google maps etc and add to array
if (sizeof($parsed_applications) > 0) {
    foreach ($parsed_applications->applications->application as $parsed_application) {
    }
}
print "ddd";
 function parse_applications($feed_url, $authority_id)
 {
     $return_applications = array();
     //reset warnings
     //Grab the XML
     $xml = "";
     try {
         $xml = safe_scrape_page($feed_url);
     } catch (exception $e) {
         array_push($this->log, "ERROR: problem occured when grabbing feed: " . $feed_url . " ---->>>" . $e);
     }
     if ($xml == false) {
         $this->store_log("ERROR: empty feed feed: " . $feed_url);
     }
     //Turn the xml into an object
     $parsed_applications = simplexml_load_string($xml);
     //Loop through the applications, add tinyurl / google maps etc and add to array
     if (sizeof($parsed_applications) > 0) {
         foreach ($parsed_applications->applications->application as $parsed_application) {
             $application = new application();
             //Grab basic data from the xml
             $application->authority_id = $authority_id;
             $application->council_reference = $parsed_application->council_reference;
             $date_received_dmy = split("/", $parsed_application->date_received);
             if (count($date_received_dmy) == 3) {
                 $application->date_received = "{$date_received_dmy['2']}-{$date_received_dmy['1']}-{$date_received_dmy['0']}";
             } else {
                 // Make a best effort attempt to parse the date
                 $ts = strtotime($parsed_application->date_received);
                 if ($ts != FALSE && $ts != -1) {
                     $application->date_received = date("Y-m-d", $ts);
                 }
             }
             $application->address = $parsed_application->address;
             $application->description = $parsed_application->description;
             $application->info_url = $parsed_application->info_url;
             $application->comment_url = $parsed_application->comment_url;
             $application->date_scraped = mysql_date(time());
             //Make the urls
             $info_tiny_url = tiny_url($application->info_url);
             if ($info_tiny_url == "") {
                 $this->store_log("ERROR: Created blank info tiny url");
             }
             $comment_tiny_url = tiny_url($application->comment_url);
             if ($comment_tiny_url == "") {
                 $this->store_log("ERROR: Created blank comment tiny url");
             }
             if (isset($parsed_application->postcode)) {
                 //Workout the XY location from postcode
                 $xy = postcode_to_location($parsed_application->postcode);
                 $application->postcode = $parsed_application->postcode;
                 $application->x = $xy[0];
                 $application->y = $xy[1];
             } else {
                 if (isset($parsed_application->easting) && isset($parsed_application->northing)) {
                     $postcode = location_to_postcode($parsed_application->easting, $parsed_application->northing);
                     $application->postcode = $postcode;
                     $application->x = $parsed_application->easting;
                     $application->y = $parsed_application->northing;
                 }
             }
             $application->info_tinyurl = $info_tiny_url;
             $application->comment_tinyurl = $comment_tiny_url;
             $application->map_url = googlemap_url_from_postcode($application->postcode);
             //Add to array
             array_push($return_applications, $application);
         }
     }
     return $return_applications;
 }
function scrape_applications_islington($search_url, $info_url_base, $comment_url_base)
{
    $applications = array();
    $application_pattern = '/<TR>([^<]*)<TD class="lg" valign="top" >([^<]*)<a href([^<]*)<a href=wphappcriteria.display>Search Criteria(.*)([^<]*)<(.*)>([^<]*)<TD class="lg" >([^<]*)<\\/TD>([^<]*)<TD class="lg" >([^<]*)<INPUT TYPE=HIDDEN NAME([^>]*)([^<]*)/';
    //grab the page
    $html = safe_scrape_page($search_url);
    preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER);
    foreach ($application_matches[0] as $application_match) {
        $application_string = str_replace("\n", "", $application_match);
        $reference_pattern = '/Search Results<\\/a>">([^<]*)/';
        preg_match_all($reference_pattern, $application_string, $reference_matches, PREG_PATTERN_ORDER);
        $application = new Application();
        //match the applicaiton number
        $application->council_reference = str_replace('Search Results</a>">', "", $reference_matches[0][0]);
        //Comment and info urls
        $application->info_url = $info_url_base . $application->council_reference;
        $application->comment_url = $comment_url_base . $application->council_reference;
        //get full details
        $details_html = "";
        $details_html = safe_scrape_page($info_url_base . $application->council_reference);
        $details_html = str_replace("\r\n", "", $details_html);
        //Details
        $full_detail_pattern = '/Proposal:<\\/label><\\/td>([^<]*)<td colspan="3">([^<]*)/';
        preg_match($full_detail_pattern, $details_html, $full_detail_matches);
        if (isset($full_detail_matches[2])) {
            $application->description = $full_detail_matches[2];
        }
        //Address
        $address_pattern = '/Main location:<\\/label><\\/td>([^<]*)<td colspan="3">([^<]*)/';
        $address = "";
        preg_match($address_pattern, $details_html, $address_matches);
        if (isset($address_matches[2])) {
            $application->address = $address_matches[2];
        }
        //postcode
        $postcode_pattern = "/[A-Z][A-Z]?[0-9][A-Z0-9]? ?[0-9][ABDEFGHJLNPQRSTUWXYZ]{2}/";
        preg_match($postcode_pattern, $application->address, $postcode_matches);
        if (isset($postcode_matches[0])) {
            $application->postcode = $postcode_matches[0];
        }
        //only add it if we have a postcode (bit useless otherwise)
        if (is_postcode($application->postcode)) {
            array_push($applications, $application);
        }
    }
    //return
    return $applications;
}