Exemple #1
0
 function search()
 {
     if (!empty($_POST)) {
         $scrape = new Scrape();
         $search_title = $_POST['title'];
         $this->view->results = $scrape->getRemoteData($search_title);
         $this->view->search_title = $search_title;
     }
     $this->view->render('index/search');
 }
 public static function getCookies($for)
 {
     $ch = curl_init($for);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
     curl_setopt($ch, CURLOPT_HEADER, 1);
     $result = curl_exec($ch);
     preg_match_all('/(Set-Cookie:[^\\n]*)/i', $result, $cookies);
     if (isset($cookies[0])) {
         $curl_cookies = array();
         $str = '';
         foreach ($cookies[0] as $cookie_idx => $cookie) {
             $tmp = str_ireplace('Set-Cookie:', 'Cookie: ', $cookie);
             $tmp .= '; ';
             $str .= $tmp;
         }
         self::$cookies = $str;
     }
 }
<?php

// Get Config
include 'config/config.php';
include 'classes/Scrape.class.php';
$silo = $_SESSION['crawler']['silo'];
$auth = $_SESSION['crawler']['auth'] == false ? false : "{$_SESSION['crawler']['user']}:{$_SESSION['crawler']['pass']}";
$list = file_get_contents('crawl_lists/' . urldecode($_GET['crawl']));
$links = unserialize($list);
$json = array();
Scrape::getCookies($_SESSION['crawler']['domain']);
foreach ($links as $idx => $link) {
    $crawl_delay = mt_rand(1, MAX_CRAWL_DELAY);
    // random wait-time between 1 second and $max_delay seconds
    sleep($crawl_delay);
    $scrape = Scrape::fetch($link, $auth, $_SESSION['crawler']['respect_robots_meta'], $_SESSION['crawler']['respect_canonical']);
    if ($scrape == false) {
        continue;
    }
    $title = $scrape['title'];
    $description = $scrape['description'];
    $content = $scrape['plaintext'];
    $keywords = $scrape['keywords'];
    $json_member = array();
    $json_member['silo'] = $_SESSION['crawler']['silo'];
    $json_member['id'] = $json_member['page_url'] = $link;
    $json_member['page_md5'] = md5($content);
    $json_member['page_title'] = $title;
    $json_member['page_meta_description'] = $description;
    $json_member['page_meta_keywords'] = $keywords;
    $json_member['page_content'] = $content;
<?php

include '../inc/pdoDB.php';
include 'cScrape.php';
$scrape = new Scrape();
$url = 'http://public.health.oregon.gov/Preparedness/CurrentHazards/Pages/AirMonitoring.aspx';
$scrape->fetch($url);
$data = $scrape->removeNewlines($scrape->result);
$data = $scrape->fetchBetween('<table style="width:500px" class=ms-rteTable-1 summary=""><tbody>', '</tbody></table>', $data, true);
$rows = $scrape->fetchAllBetween('<tr', '</tr>', $data, true);
$aprildata = $scrape->fetchBetween('<table style="width:67.29%;height:73px" class=ms-rteTable-1 summary=""><tbody>', '</tbody></table>', $data, true);
$aprilrows = $scrape->fetchAllBetween('<tr', '</tr>', $aprildata, true);
$totalrows = array_merge($rows, $aprilrows);
$i = 0;
$records[] = NULL;
foreach ($totalrows as $id => $row) {
    $i++;
    if ($i == 1) {
        continue;
    }
    $record = array();
    $cells = $scrape->fetchAllBetween('<td', '</td>', $row, true);
    $record['Date'] = strip_tags($cells[0]);
    $record['Portland'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[1]));
    $record['Corvallis'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[2]));
    $record['Eureka'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[3]));
    $records[] = $record;
}
/* if ($records[0]) { */
// print_r($records);
// insert into DB
<?php

include '../inc/pdoDB.php';
include 'cScrape.php';
$scrape = new Scrape();
$url = 'http://www.doh.wa.gov/Topics/japan/monitor-history.htm';
$scrape->fetch($url);
$data = $scrape->removeNewlines($scrape->result);
$data = $scrape->fetchBetween('<table border="0" cellpadding="4">', '</table>', $data, true);
$rows = $scrape->fetchAllBetween('<tr', '</tr>', $data, true);
$i = 0;
foreach ($rows as $id => $row) {
    $i++;
    if ($i < 3) {
        continue;
    }
    $record = array();
    //    $cells = $scrape->fetchAllBetween('<font face="Arial" size="2">','</font>',$row,true);
    $cells = $scrape->fetchAllBetween('<font face=', '</font></td>', $row, true);
    $record['Date'] = strip_tags($cells[0]);
    $record['Richland'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[1]));
    $record['Seattle'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[2]));
    $record['Spokane'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[3]));
    $record['Tumwater'] = preg_replace("/[^.0-9\\s]/", "", strip_tags($cells[4]));
    $records[] = $record;
}
print_r($records);
// insert into DB
foreach ($records as $id => $record) {
    if (is_numeric($record['Richland'])) {
        $db->query("INSERT INTO government_readings (city_id, reading_value, reading_system, reading_date, radiation_type, is_verified) VALUES (4, '" . $record['Richland'] . "', '2', '" . date('Y-m-d 00:00:00', strtotime($record['Date'])) . "', 'beta', 1)");
<?php

//create by  @miajiao & @dcshi
include 'lib/utility.php';
require_once "lib/Scrape.php";
$url = 'https://twitter.com/oauth/authorize';
$authenticity_token = $_POST['authenticity_token'];
$oauth_token = $_POST['oauth_token'];
$username = $_GET['username'];
$password = urldecode(spDecrypt($_GET['password']));
$data = array();
$data = array('session[username_or_email]' => $username, 'session[password]' => $password);
$scrape = new Scrape();
$data['authenticity_token'] = $authenticity_token;
$data['oauth_token'] = $oauth_token;
$scrape->fetch($url, $data);
echo $scrape->result;
$conn = new mysqli(DB_HOST, DB_USER, DB_PASSWORD, DB_NAME);
if ($conn->connect_error) {
    die("Connection failed: " . $conn->connect_error);
}
include_once dirname(__FILE__) . '/classes/class.proxy.php';
include_once dirname(__FILE__) . '/classes/simple_html_dom.php';
include_once dirname(__FILE__) . '/classes/class.scrape.php';
// Get the 19 proxies for each page
$proxy = new Proxy();
$json = $proxy->get_random(349);
$proxies = json_decode($json, true);
#var_dump($json); #die();
unset($proxy);
echo "<br>" . 'PROXIES: ' . count($proxies) . "<hr>";
// Loop to pages scrape and search asins and ranks
$scrape = new Scrape();
$page = 0;
$adc_log = 'Total Proxies' . count($proxies) . "\r\n";
$adc_log_cnt = 0;
foreach ($proxies as $proxy) {
    $page++;
    if (!empty($proxy)) {
        $sql = "SELECT u_id, asin FROM amz_products ";
        $sql .= " UNION ALL Select '0423' as u_id, 'B002KRDGC0' as asin";
        #$sql .= "  WHERE asin = 'B00KY5S81O' ";
        $result = $conn->query($sql);
        if ($result->num_rows > 0) {
            $data = array();
            while ($rec = $result->fetch_assoc()) {
                $sql = "";
                $u_id = $rec["u_id"];
Exemple #8
0
<?php

if (!isset($_SESSION)) {
    session_start();
}
//create by @miajiao
require_once 'lib/twitese.php';
require_once "lib/Scrape.php";
include 'acl.php';
$scrape = new Scrape();
$time = time() + 3600 * 24 * 365;
if (isset($_REQUEST['oauth_token'])) {
    if ($_SESSION['oauth_token'] !== $_REQUEST['oauth_token']) {
        $_SESSION['oauth_status'] = 'bad';
        session_destroy();
        header('Location: ./login.php');
    } else {
        $connection = new TwitterOAuth(OAUTH_KEY, OAUTH_SECRET, $_COOKIE['oauth_token'], $_COOKIE['oauth_token_secret']);
        $access_token = $connection->getAccessToken($_REQUEST['oauth_verifier']);
        $_SESSION['access_token'] = $access_token;
        unset($_SESSION['oauth_token']);
        unset($_SESSION['oauth_token_secret']);
        setcookie('user_id', $access_token['user_id'], $time, '/');
        setcookie('screen_name', $access_token['screen_name'], $time, '/');
        setcookie('oauth_token', $access_token['oauth_token'], $time, '/');
        setcookie('oauth_token_secret', $access_token['oauth_token_secret'], $time, '/');
        if (200 == $connection->http_code) {
            $_SESSION['login_status'] = 'verified';
            $t = getTwitter();
            $user = $t->veverify();
            setEncryptCookie('twitese_name', $t->screen_name, $time, '/');