Esempio n. 1
0
function parse_search($date, $page_no = 1, $AppRef = '')
{
    global $applications;
    $url = 'http://www.planning.wealden.gov.uk/aspxpages/SearchResults.aspx?pageno=' . $page_no . '&QueryType=9&WeekNo=&WeekStart=&WeekEnd=&CaseNo=&Add=&ShowInd=&DocId=&AppRef=' . $AppRef . '&Category=DC&DateType=R&StartDate=' . $date . '&EndDate=' . $date . '&Agent=&ParishCode=&WardCode=&Parish=&Ward=&AdvAppNo=&AdvAdd=&AdvProposal=&DecisionCode=&Det=';
    //echo 'Loading page '.$page_no.' of data for '.$date.' URL:'.$url.'<br />';
    $data = fetch_page($url);
    if (strpos($data, "<title>Wealden District Council's applications online - Copyright, disclaimer & personal data</title>")) {
        //Accept their terms
        list($junk, $viewstate) = explode('<input type="hidden" name="__VIEWSTATE" value="', $data, 2);
        list($viewstate, $junk) = explode('" />', $viewstate, 2);
        //echo 'Attempting to bypass copyright page...<br />';
        $url = 'http://www.planning.wealden.gov.uk/aspxpages/Copyright.aspx?pageno=' . $page_no . '&QueryType=9&WeekNo=&WeekStart=&WeekEnd=&CaseNo=&Add=&ShowInd=&DocId=&AppRef=' . $AppRef . '&Category=DC&DateType=R&StartDate=' . $date . '&EndDate=' . $date . '&Agent=&ParishCode=&WardCode=&Parish=&Ward=&AdvAppNo=&AdvAdd=&AdvProposal=&DecisionCode=&Det=';
        $data = fetch_page($url, 'btnCopyrightAccept=Accept&__VIEWSTATE=' . urlencode($viewstate) . '', 2);
    }
    list($junk, $data) = explode('<span id="lblSearchResults">', $data);
    list($data, $next_page) = explode('<div id="pagenumbers">', $data);
    $data = explode('</ul>', $data);
    unset($data[10]);
    foreach ($data as $application) {
        $application = explode('</li>', $application);
        $AppNo = extract_data($application[0]);
        if (!empty($AppNo)) {
            $applications[$AppNo]['AppNo'] = $AppNo;
            $Loc = extract_data($application[1]);
            $applications[$AppNo]['Address'] = $Loc;
            preg_match("/([A-Z]{1,2}[0-9][0-9A-Z]?\\s?[0-9][A-Z]{2})/", $Loc, $PostCode);
            if (isset($PostCode[1])) {
                $applications[$AppNo]['PostCode'] = $PostCode[1];
            } else {
                $applications[$AppNo]['PostCode'] = false;
            }
            $applications[$AppNo]['Info'] = extract_data($application[2]);
            parse_detail($AppNo);
        }
    }
    if (strpos($next_page, 'Next</a></div></span> <br />')) {
        $page_no++;
        //echo "Loading next page...";
        if ($page_no < 6) {
            parse_search($date, $page_no, $AppNo);
        }
    }
}
function parse_search($page = 1)
{
    global $applications, $day, $month, $year, $xml;
    $start = $page * 10 - 19;
    if ($start < 0) {
        $start = 1;
    }
    if ($page == '2') {
        $shown = 'Y';
        $start = 1;
    } else {
        $shown = 'N';
    }
    $url = $xml['url'] . '?Controller=p2Controller&Action=FindApplicationsByDatesAction&START_DD=' . $day . '&START_MMM=' . $month . '&START_YYYY=' . $year . '&END_DD=' . $day . '&END_MMM=' . $month . '&END_YYYY=' . $year . '&WARD=ALL&CURR=&DECSN=&START_ROW=' . $start . '&FIRST_TEN_SHOWN=' . $shown . '&SEARCH_DIRECTION=F';
    //echo 'Loading page '.$page.' of data from URL:'.$url.'<br />';
    $data = explode('<div class="result">', fetch_page($url));
    unset($data[0]);
    foreach ($data as $app) {
        $app = explode('</span>', $app);
        $AppNo = trim(strip_tags($app[0]));
        $applications[$AppNo]['AppNo'] = $AppNo;
        list($info, $address) = explode('<br/>', $app[2]);
        $applications[$AppNo]['Info'] = trim(strip_tags($info));
        $applications[$AppNo]['Address'] = trim(strip_tags($address));
        preg_match("/([A-Z]{1,2}[0-9][0-9A-Z]?\\s?[0-9][A-Z]{2})/", $address, $PostCode);
        if (isset($PostCode[1])) {
            $applications[$AppNo]['PostCode'] = $PostCode[1];
        } else {
            $applications[$AppNo]['PostCode'] = false;
        }
        parse_detail($AppNo);
    }
    if (strpos($app[2], 'alt="Next 10 applications"')) {
        parse_search($page + 1);
    }
}
Esempio n. 3
0
                }
                if ($tmp && strpos($tmp, 'shop-title')) {
                    echo "[info] " . ($end - $start) . "s get " . $value . " success\n";
                    //红色模板
                    break;
                }
                sleep(1);
                $try_count--;
            }
            if (!$tmp) {
                echo "[error] get " . $value . " error \n";
                continue;
            }
            if ($tmp && !strpos($tmp, 'site-nav') && !strpos($tmp, 'shop-title')) {
                echo "[error] get " . $value . " error \n";
                continue;
            }
            file_put_contents('html/' . $city_key . '/' . substr(strrchr($value, '/'), 1) . '.html', $tmp);
        }
        echo "[info] get all success\n";
    } else {
        exit('执行出错');
    }
}
//初始化抓取url
$url_arr = init_grap();
//生成商店url
$detail_url_arr = get_details_url($url_arr, false);
//生成缓存html文件
parse_detail($detail_url_arr);