switch (getenv('MORPH_PERIOD')) { case 'thismonth': $period = 'thismonth'; break; case 'lastmonth': $period = 'lastmonth'; break; default: $period = 'thisweek'; break; } $term_url = "http://datracking.wsc.nsw.gov.au/Modules/applicationmaster/Default.aspx"; $da_page = $url_base . "default.aspx?page=found&1=" . $period . "&4a=WLUA&6=F"; $comment_base = "mailto:wscmail@wsc.nsw.gov.au?subject=Development Application Enquiry: "; $user_agent = "User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) PlanningAlerts.org.au"; $cookies = accept_terms_get_cookies($term_url, "Agree"); # Manually set cookie's key and get the value from array $request = array('http' => array('header' => "Cookie: ASP.NET_SessionId=" . $cookies['ASP_NET_SessionId'] . "; path=/; HttpOnly\r\n" . "{$user_agent}\r\n")); $context = stream_context_create($request); $dom = file_get_html($da_page, false, $context); # By default, assume it is single page $dataset = $dom->find("tr[class=rgRow], tr[class=rgAltRow]"); $NumPages = count($dom->find('div[class=rgWrap rgNumPart] a')); if ($NumPages === 0) { $NumPages = 1; } for ($i = 1; $i <= $NumPages; $i++) { # If more than a single page, fetch the page if ($NumPages > 1) { $eventtarget = substr($dom->find('div[class=rgWrap rgNumPart] a', $i - 1)->href, 25, 61); $request = array('http' => array('method' => "POST", 'header' => "Cookie: ASP.NET_SessionId=" . $cookies['ASP_NET_SessionId'] . "; path=/; HttpOnly\r\n" . "Content-Type: application/x-www-form-urlencoded\r\n" . "{$user_agent}\r\n", 'content' => http_build_query(buildformdata($dom, $eventtarget))));
$cookies = array(); foreach ($matches[1] as $item) { parse_str($item, $cookie); $cookies = array_merge($cookies, $cookie); } return $cookies; } ### ### Main code start here ### $url_base = "https://services.greatlakes.nsw.gov.au/ePathway/Production/Web/GeneralEnquiry/"; $term_url = "https://services.greatlakes.nsw.gov.au/ePathway/Production/Web/GeneralEnquiry/EnquiryLists.aspx"; $user_agent = "User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) PlanningAlerts.org.au"; $da_page = $url_base . "EnquirySummaryView.aspx"; $comment_base = "mailto:council@greatlakes.nsw.gov.au?subject="; $cookies = accept_terms_get_cookies($term_url, "Next", array('mDataGrid:Column0:Property' => 'ctl00$MainBodyContent$mDataList$ctl02$mDataGrid$ctl03$ctl00')); # Manually set cookie's key and get the value from array $request = array('http' => array('header' => "Cookie: ASP.NET_SessionId=" . $cookies['ASP_NET_SessionId'] . "; path=/; HttpOnly\r\n" . "{$user_agent}\r\n")); $context = stream_context_create($request); $dom = file_get_html($da_page, false, $context); # Assume it is single page, the web site doesn't allow to select period like last month $dataset = $dom->find("tr[class=ContentPanel], tr[class=AlternateContentPanel]"); # The usual, look for the data set and if needed, save it foreach ($dataset as $record) { # Slow way to transform the date but it works $date_received = explode(' ', trim($record->find('span', 0)->plaintext), 2); $date_received = explode('/', $date_received[0]); $date_received = "{$date_received['2']}-{$date_received['1']}-{$date_received['0']}"; $date_received = date('Y-m-d', strtotime($date_received)); $address = preg_replace('/\\s+/', ' ', trim(html_entity_decode($record->find('span', 1)->plaintext))); $address = explode(",", $address, 2);
switch (getenv('MORPH_PERIOD')) { case 'thismonth': $period = 'thismonth'; break; case 'lastmonth': $period = 'lastmonth'; break; case '': case 'thisweek': default: $period = 'thisweek'; break; } $da_page = $url_base . "default.aspx?page=found&1=" . $period . "&4a=5,6,7,10,11,12,13,14,15,16,17,20&6=F"; $comment_base = "mailto:records@manly.nsw.gov.au?subject=Development Application Enquiry: "; $cookies = accept_terms_get_cookies($url_base . "default.aspx"); # Manually set cookie's key and get the value from array $request = array('http' => array('header' => "Cookie: ASP.NET_SessionId=" . $cookies['ASP_NET_SessionId'] . "; path=/; HttpOnly\r\n")); $context = stream_context_create($request); $dom = file_get_html($da_page, false, $context); # By default, assume it is single page $dataset = $dom->find("tr[class=rgRow], tr[class=rgAltRow]"); $NumPages = count($dom->find('div[class=rgWrap rgNumPart] a')); if ($NumPages === 0) { $NumPages = 1; } for ($i = 1; $i <= $NumPages; $i++) { # If more than a single page, fetch the page if ($NumPages > 1) { $eventtarget = substr($dom->find('div[class=rgWrap rgNumPart] a', $i - 1)->href, 25, 61); $request = array('http' => array('method' => "POST", 'header' => "Cookie: ASP.NET_SessionId=" . $cookies['ASP_NET_SessionId'] . "; path=/; HttpOnly\r\n" . "Content-Type: application/x-www-form-urlencoded\r\n", 'content' => http_build_query(buildformdata($dom, $eventtarget))));
preg_match_all('/^Set-Cookie:\\s*([^;]*)/mi', $terms_response, $matches); $cookies = array(); foreach ($matches[1] as $item) { parse_str($item, $cookie); $cookies = array_merge($cookies, $cookie); } return $cookies; } ### ### Main code start here ### $url_base = "https://secure.auburn.nsw.gov.au/ePathway/Production/Web/GeneralEnquiry/"; $term_url = "https://secure.auburn.nsw.gov.au/ePathway/Production/Web/GeneralEnquiry/EnquiryLists.aspx"; $da_page = $url_base . "EnquirySummaryView.aspx"; $comment_base = "mailto:auburncouncil@auburn.nsw.gov.au?subject=Number "; $cookies = accept_terms_get_cookies($term_url); # Manually set cookie's key and get the value from array $request = array('http' => array('header' => "Cookie: ASP.NET_SessionId=" . $cookies['ASP_NET_SessionId'] . "; path=/; HttpOnly\r\n")); $context = stream_context_create($request); $dom = file_get_html($da_page, false, $context); # Assume it is single page, the web site doesn't allow to select period like last month $dataset = $dom->find("tr[class=ContentPanel], tr[class=AlternateContentPanel]"); # The usual, look for the data set and if needed, save it foreach ($dataset as $record) { # Slow way to transform the date but it works $date_received = explode(' ', trim($record->find('span', 0)->plaintext), 2); $date_received = explode('/', $date_received[0]); $date_received = "{$date_received['2']}-{$date_received['1']}-{$date_received['0']}"; # Put all information in an array $application = array('council_reference' => trim($record->find('a', 0)->plaintext), 'address' => preg_replace('/\\s+/', ' ', trim($record->find('span', 1)->plaintext)) . ", Australia", 'description' => preg_replace('/\\s+/', ' ', trim($record->find('span', 2)->plaintext)), 'info_url' => 'https://secure.auburn.nsw.gov.au/ePathway/Production/Web/GeneralEnquiry/EnquiryLists.aspx?ModuleCode=LAP', 'comment_url' => $comment_base . trim($record->find('a', 0)->plaintext) . ', ' . preg_replace('/\\s+/', ' ', trim($record->find('span', 1)->plaintext)), 'date_scraped' => date('Y-m-d'), 'date_received' => date('Y-m-d', strtotime($date_received))); # Check if record exist, if not, INSERT, else do nothing