function get_city_data($url) { global $run_environment; $html = scraperWiki::scrape($url); $dom = new simple_html_dom(); $dom->load($html); $count = 1; // for debugging if (!$dom->find("table", 0)) { echo $url; exit; } // /html/body/table/tbody/tr/td/div/section/div/table // $content = $dom->find("table", 0)->find("tr", 0)->find("td", 0)->find("div", 0)->find("table", 0)->find("tr", 0)->find("td", 2)->find("table", 0); $content = $dom->find("table", 0)->find("tr", 0)->find("td", 0)->find("div", 0)->find("section", 0)->find("div", 0)->find("table", 0); $city['source'] = $url; $city['name_full'] = $content->find("h2", 0)->plaintext; $city['name'] = substr($city['name_full'], strpos($city['name_full'], ' of ') + 4); $city['type'] = strtolower(substr($city['name_full'], 0, strpos($city['name_full'], ' of '))); $city['url'] = $content->find("tr", 5)->find("td", 1)->find("a", 0) ? $content->find("tr", 5)->find("td", 1)->find("a", 0)->href : null; $city['region'] = trim($content->find("tr", 6)->find("td", 1)->plaintext); $city['county'] = trim($content->find("tr", 7)->find("td", 1)->plaintext); $city['address1'] = trim($content->find("tr", 8)->find("td", 1)->plaintext); $city['address2'] = trim($content->find("tr", 9)->find("td", 1)->plaintext); $city['phone'] = trim($content->find("tr", 10)->find("td", 1)->plaintext); $city['fax'] = trim($content->find("tr", 11)->find("td", 1)->plaintext); $city['council_meeting_time'] = trim($content->find("tr", 12)->find("td", 1)->plaintext); $city['year_incorporated'] = trim($content->find("tr", 13)->find("td", 1)->plaintext); $city['fiscal_year_start'] = trim($content->find("tr", 14)->find("td", 1)->plaintext); $city['population'] = trim($content->find("tr", 15)->find("td", 1)->plaintext); $city['government_type'] = trim($content->find("tr", 16)->find("td", 1)->plaintext); $city['civil_service'] = trim($content->find("tr", 17)->find("td", 1)->plaintext); $rep_details = get_rep_details($content, $url, $city['name']); // Clear memory $dom->__destruct(); $content->__destruct(); if ($run_environment == 'dev') { $city['reps'] = $rep_details; return $city; } else { scraperwiki::save_sqlite(array('name_full', 'source'), $city, $table_name = 'jurisdiction'); return true; } }
function get_city_data($name, $url) { global $run_environment; $html = scraperWiki::scrape($url); $dom = new simple_html_dom(); $dom->load($html); $count = 1; // for debugging if (!$dom->find("table", 0)) { echo $url; exit; } $content = $dom->find("div[id=ctl00_cphmain_pnlCityInfo]", 0)->find("table", 0); $city['source'] = $url; $city['name'] = $name; $city['address'] = trim($content->find("tr", 1)->find("td", 1)->plaintext); $city['address_shipping'] = trim($content->find("tr", 2)->find("td", 1)->plaintext); $city['phone'] = trim($content->find("tr", 4)->find("td", 1)->plaintext); $city['fax'] = trim($content->find("tr", 5)->find("td", 1)->plaintext); $city['url'] = $content->find("tr", 7)->find("td", 1)->find("a", 0) ? $content->find("tr", 7)->find("td", 1)->find("a", 0)->href : null; $city['email'] = $content->find("tr", 8)->find("td", 1)->find("a", 0) ? $content->find("tr", 8)->find("td", 1)->find("a", 0)->href : null; $city['year_incorporated'] = trim($content->find("tr", 10)->find("td", 1)->plaintext); $city['population'] = trim($content->find("tr", 11)->find("td", 1)->plaintext); $city['county'] = trim($content->find("tr", 12)->find("td", 1)->plaintext); $city['city_history'] = $content->find("tr", 14)->find("td", 1) ? trim($content->find("tr", 14)->find("td", 1)->plaintext) : null; // Get reps $rep_details = get_rep_details($dom, $url, $city['name']); // Clear memory $dom->__destruct(); $content->__destruct(); if ($run_environment == 'dev') { $city['reps'] = $rep_details; return $city; } else { scraperwiki::save_sqlite(array('name', 'source'), $city, $table_name = 'city'); return true; } }
function get_city_data($html = null, $url = null, $reps = null) { global $run_environment; global $post_url; if (!empty($url)) { $html = scraperWiki::scrape($url); } $dom = new simple_html_dom(); $dom->load($html); $count = 1; // for debugging if (!$dom->find("table", 0)) { echo $url; exit; } $table = $dom->find("table", 0); foreach ($table->find("tr") as $data) { $tds = $data->find("td"); if (trim($tds[0]->plaintext) == 'Full Name') { continue; } $rep = null; $rep['name_full'] = trim($tds[0]->plaintext); $rep['city'] = trim($tds[1]->plaintext); $rep['title'] = trim($tds[2]->plaintext); $rep['source'] = 'http://events.cacities.org' . $tds[0]->find('a', 0)->href; $rep_details = get_rep_details($rep['source']); if ($run_environment == 'prod') { sleep(1); // this may be needed on scraperwiki.com } //$rep = array_merge($rep, $rep_details); $rep = $rep_details; if ($run_environment == 'dev') { $reps[] = $rep; } else { $reps = null; scraperwiki::save_sqlite(array('title', 'name_full', 'city'), $rep, $table_name = 'rep'); } } if ($dom->find("div[class=prevNext]", 0) && ($next = $dom->find("div[class=prevNext]", 0)->find("a[class=last]", 0))) { $page = str_replace(' ', '%20', $next->href); $url = 'http://events.cacities.org' . $page; get_city_data(null, $url, $reps); } if ($run_environment == 'dev') { return $reps; } else { return true; } }