Exemplo n.º 1
0
 public function crawl()
 {
     if (!Input::has('url')) {
         return $this->layout->content = View::make('crawlurl');
     }
     $startURL = Input::get("url");
     $depth = Input::get("depth");
     $crawler = new crawler($startURL, $depth);
     //$crawler->setHttpAuth($username, $password);
     $crawler->run();
     $i = 0;
     echo sizeof($crawler->getSeen());
     foreach ($crawler->getSeen() as $url => $seen) {
         $archemy = new Archemy($url, 1);
         if ($archemy->status()) {
             $archemy->parseRelations();
             //array_merge($relations,);
         }
         echo $archemy->response();
         //echo $url."<br>";
     }
     $relations = Sentence::where("full_text", $startURL);
     $this->layout->content = View::make('archemy.show', array("response" => "", "relations" => $relations, "data" => $startURL));
 }
Exemplo n.º 2
0
<?php

//This crawler searches through the links in a domain,
//makes an array from them and then searches them for emails
// see it in action at http://jacksworkspace.com
set_time_limit(6400);
$array = array();
$emailarray = array();
$crawler = new crawler($startURL, $depth);
$crawler->run();
class crawler
{
    protected $_url;
    protected $_depth;
    protected $_host;
    protected $_useHttpAuth = false;
    protected $_user;
    protected $_pass;
    protected $_seen = array();
    protected $_filter = array();
    public function __construct($url, $depth = 5)
    {
        $this->_url = $url;
        $this->_depth = $depth;
        $parse = parse_url($url);
        $this->_host = $parse['host'];
    }
    protected function _processAnchors($content, $url, $depth)
    {
        $dom = new DOMDocument('1.0');
        @$dom->loadHTML($content);
Exemplo n.º 3
0
require_once dirname(__FILE__) . '/class.proxy.php';
require_once dirname(__FILE__) . '/class.detector.php';
$mysqli = new mysqli('10.168.45.191', 'admin', 'txg19831210', 'crawler');
$mysqli->query('SET NAMES gbk');
$queueName = 'q_crawler';
$params = array('host' => '10.168.45.191', 'port' => 5672, 'login' => 'guest', 'password' => 'guest', 'vhost' => '/kwd');
$conn = new AMQPConnection($params);
$conn->connect();
$channel = new AMQPChannel($conn);
$queue = new AMQPQueue($channel);
$queue->setName($queueName);
$crawler = new crawler();
while ($message = $queue->get(AMQP_AUTOACK)) {
    $kwd = $message->getBody();
    $kwdArr = unserialize($kwd);
    $crawler->run($kwdArr);
    #    print_r($kwdObj);
    #    $price = $detector->run($kwdObj);
    #    if ($price['start_price'] && $price['end_price']) {
    #        $sql = "SELECT * FROM price WHERE kid = {$kwdObj->id} LIMIT 1";
    #        $result = $mysqli->query($sql);
    #        if ($result->num_rows) {
    #            $sql = "UPDATE price SET min_price = '{$price['start_price']}', max_price = '{$price['end_price']}', region = '{$price['region']}', crawl_status = 2, last_update = " . time(). " WHERE kid = " . $kwdObj->id;
    #        }
    #        else {
    #            $sql = "INSERT INTO price SET kid = {$kwdObj->id}, shop_type = '{$kwdObj->shop_type}', min_price = '{$price['start_price']}', max_price = '{$price['end_price']}', region = '{$price['region']}', crawl_status = 2, last_update = " . time();
    #        }
    #        echo $sql . "\n";
    #        $mysqli->query($sql);
    #    }
}