function search() { if (!empty($_POST)) { $scrape = new Scrape(); $search_title = $_POST['title']; $this->view->results = $scrape->getRemoteData($search_title); $this->view->search_title = $search_title; } $this->view->render('index/search'); }
public static function getCookies($for) { $ch = curl_init($for); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HEADER, 1); $result = curl_exec($ch); preg_match_all('/(Set-Cookie:[^\\n]*)/i', $result, $cookies); if (isset($cookies[0])) { $curl_cookies = array(); $str = ''; foreach ($cookies[0] as $cookie_idx => $cookie) { $tmp = str_ireplace('Set-Cookie:', 'Cookie: ', $cookie); $tmp .= '; '; $str .= $tmp; } self::$cookies = $str; } }
<?php // Get Config include 'config/config.php'; include 'classes/Scrape.class.php'; $silo = $_SESSION['crawler']['silo']; $auth = $_SESSION['crawler']['auth'] == false ? false : "{$_SESSION['crawler']['user']}:{$_SESSION['crawler']['pass']}"; $list = file_get_contents('crawl_lists/' . urldecode($_GET['crawl'])); $links = unserialize($list); $json = array(); Scrape::getCookies($_SESSION['crawler']['domain']); foreach ($links as $idx => $link) { $crawl_delay = mt_rand(1, MAX_CRAWL_DELAY); // random wait-time between 1 second and $max_delay seconds sleep($crawl_delay); $scrape = Scrape::fetch($link, $auth, $_SESSION['crawler']['respect_robots_meta'], $_SESSION['crawler']['respect_canonical']); if ($scrape == false) { continue; } $title = $scrape['title']; $description = $scrape['description']; $content = $scrape['plaintext']; $keywords = $scrape['keywords']; $json_member = array(); $json_member['silo'] = $_SESSION['crawler']['silo']; $json_member['id'] = $json_member['page_url'] = $link; $json_member['page_md5'] = md5($content); $json_member['page_title'] = $title; $json_member['page_meta_description'] = $description; $json_member['page_meta_keywords'] = $keywords; $json_member['page_content'] = $content;
<?php include '../inc/pdoDB.php'; include 'cScrape.php'; $scrape = new Scrape(); $url = 'http://public.health.oregon.gov/Preparedness/CurrentHazards/Pages/AirMonitoring.aspx'; $scrape->fetch($url); $data = $scrape->removeNewlines($scrape->result); $data = $scrape->fetchBetween('<table style="width:500px" class=ms-rteTable-1 summary=""><tbody>', '</tbody></table>', $data, true); $rows = $scrape->fetchAllBetween('<tr', '</tr>', $data, true); $aprildata = $scrape->fetchBetween('<table style="width:67.29%;height:73px" class=ms-rteTable-1 summary=""><tbody>', '</tbody></table>', $data, true); $aprilrows = $scrape->fetchAllBetween('<tr', '</tr>', $aprildata, true); $totalrows = array_merge($rows, $aprilrows); $i = 0; $records[] = NULL; foreach ($totalrows as $id => $row) { $i++; if ($i == 1) { continue; } $record = array(); $cells = $scrape->fetchAllBetween('<td', '</td>', $row, true); $record['Date'] = strip_tags($cells[0]); $record['Portland'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[1])); $record['Corvallis'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[2])); $record['Eureka'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[3])); $records[] = $record; } /* if ($records[0]) { */ // print_r($records); // insert into DB
<?php include '../inc/pdoDB.php'; include 'cScrape.php'; $scrape = new Scrape(); $url = 'http://www.doh.wa.gov/Topics/japan/monitor-history.htm'; $scrape->fetch($url); $data = $scrape->removeNewlines($scrape->result); $data = $scrape->fetchBetween('<table border="0" cellpadding="4">', '</table>', $data, true); $rows = $scrape->fetchAllBetween('<tr', '</tr>', $data, true); $i = 0; foreach ($rows as $id => $row) { $i++; if ($i < 3) { continue; } $record = array(); // $cells = $scrape->fetchAllBetween('<font face="Arial" size="2">','</font>',$row,true); $cells = $scrape->fetchAllBetween('<font face=', '</font></td>', $row, true); $record['Date'] = strip_tags($cells[0]); $record['Richland'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[1])); $record['Seattle'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[2])); $record['Spokane'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[3])); $record['Tumwater'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[4])); $records[] = $record; } print_r($records); // insert into DB foreach ($records as $id => $record) { if (is_numeric($record['Richland'])) { $db->query("INSERT INTO government_readings (city_id, reading_value, reading_system, reading_date, radiation_type, is_verified) VALUES (4, '" . $record['Richland'] . "', '2', '" . date('Y-m-d 00:00:00', strtotime($record['Date'])) . "', 'beta', 1)");
<?php //create by @miajiao & @dcshi include 'lib/utility.php'; require_once "lib/Scrape.php"; $url = 'https://twitter.com/oauth/authorize'; $authenticity_token = $_POST['authenticity_token']; $oauth_token = $_POST['oauth_token']; $username = $_GET['username']; $password = urldecode(spDecrypt($_GET['password'])); $data = array(); $data = array('session[username_or_email]' => $username, 'session[password]' => $password); $scrape = new Scrape(); $data['authenticity_token'] = $authenticity_token; $data['oauth_token'] = $oauth_token; $scrape->fetch($url, $data); echo $scrape->result;
$conn = new mysqli(DB_HOST, DB_USER, DB_PASSWORD, DB_NAME); if ($conn->connect_error) { die("Connection failed: " . $conn->connect_error); } include_once dirname(__FILE__) . '/classes/class.proxy.php'; include_once dirname(__FILE__) . '/classes/simple_html_dom.php'; include_once dirname(__FILE__) . '/classes/class.scrape.php'; // Get the 19 proxies for each page $proxy = new Proxy(); $json = $proxy->get_random(349); $proxies = json_decode($json, true); #var_dump($json); #die(); unset($proxy); echo "<br>" . 'PROXIES: ' . count($proxies) . "<hr>"; // Loop to pages scrape and search asins and ranks $scrape = new Scrape(); $page = 0; $adc_log = 'Total Proxies' . count($proxies) . "\r\n"; $adc_log_cnt = 0; foreach ($proxies as $proxy) { $page++; if (!empty($proxy)) { $sql = "SELECT u_id, asin FROM amz_products "; $sql .= " UNION ALL Select '0423' as u_id, 'B002KRDGC0' as asin"; #$sql .= " WHERE asin = 'B00KY5S81O' "; $result = $conn->query($sql); if ($result->num_rows > 0) { $data = array(); while ($rec = $result->fetch_assoc()) { $sql = ""; $u_id = $rec["u_id"];
<?php if (!isset($_SESSION)) { session_start(); } //create by @miajiao require_once 'lib/twitese.php'; require_once "lib/Scrape.php"; include 'acl.php'; $scrape = new Scrape(); $time = time() + 3600 * 24 * 365; if (isset($_REQUEST['oauth_token'])) { if ($_SESSION['oauth_token'] !== $_REQUEST['oauth_token']) { $_SESSION['oauth_status'] = 'bad'; session_destroy(); header('Location: ./login.php'); } else { $connection = new TwitterOAuth(OAUTH_KEY, OAUTH_SECRET, $_COOKIE['oauth_token'], $_COOKIE['oauth_token_secret']); $access_token = $connection->getAccessToken($_REQUEST['oauth_verifier']); $_SESSION['access_token'] = $access_token; unset($_SESSION['oauth_token']); unset($_SESSION['oauth_token_secret']); setcookie('user_id', $access_token['user_id'], $time, '/'); setcookie('screen_name', $access_token['screen_name'], $time, '/'); setcookie('oauth_token', $access_token['oauth_token'], $time, '/'); setcookie('oauth_token_secret', $access_token['oauth_token_secret'], $time, '/'); if (200 == $connection->http_code) { $_SESSION['login_status'] = 'verified'; $t = getTwitter(); $user = $t->veverify(); setEncryptCookie('twitese_name', $t->screen_name, $time, '/');