<?php include '../inc/pdoDB.php'; include 'cScrape.php'; $scrape = new Scrape(); $url = 'http://public.health.oregon.gov/Preparedness/CurrentHazards/Pages/AirMonitoring.aspx'; $scrape->fetch($url); $data = $scrape->removeNewlines($scrape->result); $data = $scrape->fetchBetween('<table style="width:500px" class=ms-rteTable-1 summary=""><tbody>', '</tbody></table>', $data, true); $rows = $scrape->fetchAllBetween('<tr', '</tr>', $data, true); $aprildata = $scrape->fetchBetween('<table style="width:67.29%;height:73px" class=ms-rteTable-1 summary=""><tbody>', '</tbody></table>', $data, true); $aprilrows = $scrape->fetchAllBetween('<tr', '</tr>', $aprildata, true); $totalrows = array_merge($rows, $aprilrows); $i = 0; $records[] = NULL; foreach ($totalrows as $id => $row) { $i++; if ($i == 1) { continue; } $record = array(); $cells = $scrape->fetchAllBetween('<td', '</td>', $row, true); $record['Date'] = strip_tags($cells[0]); $record['Portland'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[1])); $record['Corvallis'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[2])); $record['Eureka'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[3])); $records[] = $record; } /* if ($records[0]) { */ // print_r($records); // insert into DB
<?php // Get Config include 'config/config.php'; include 'classes/Scrape.class.php'; $silo = $_SESSION['crawler']['silo']; $auth = $_SESSION['crawler']['auth'] == false ? false : "{$_SESSION['crawler']['user']}:{$_SESSION['crawler']['pass']}"; $list = file_get_contents('crawl_lists/' . urldecode($_GET['crawl'])); $links = unserialize($list); $json = array(); Scrape::getCookies($_SESSION['crawler']['domain']); foreach ($links as $idx => $link) { $crawl_delay = mt_rand(1, MAX_CRAWL_DELAY); // random wait-time between 1 second and $max_delay seconds sleep($crawl_delay); $scrape = Scrape::fetch($link, $auth, $_SESSION['crawler']['respect_robots_meta'], $_SESSION['crawler']['respect_canonical']); if ($scrape == false) { continue; } $title = $scrape['title']; $description = $scrape['description']; $content = $scrape['plaintext']; $keywords = $scrape['keywords']; $json_member = array(); $json_member['silo'] = $_SESSION['crawler']['silo']; $json_member['id'] = $json_member['page_url'] = $link; $json_member['page_md5'] = md5($content); $json_member['page_title'] = $title; $json_member['page_meta_description'] = $description; $json_member['page_meta_keywords'] = $keywords; $json_member['page_content'] = $content;
<?php //create by @miajiao & @dcshi include 'lib/utility.php'; require_once "lib/Scrape.php"; $url = 'https://twitter.com/oauth/authorize'; $authenticity_token = $_POST['authenticity_token']; $oauth_token = $_POST['oauth_token']; $username = $_GET['username']; $password = urldecode(spDecrypt($_GET['password'])); $data = array(); $data = array('session[username_or_email]' => $username, 'session[password]' => $password); $scrape = new Scrape(); $data['authenticity_token'] = $authenticity_token; $data['oauth_token'] = $oauth_token; $scrape->fetch($url, $data); echo $scrape->result;