コード例 #1
0
ファイル: Crawl.php プロジェクト: imdaqian/phpcrawler
 /**
  * 初始化工作 
  *
  * @access public 
  * @param  string $driver 抓取类的名字 
  * @return void 
  **/
 public static function init($driver, $option = array())
 {
     Loader::load('crawl.driver.' . ucfirst(strtolower($driver)) . 'Driver');
     $class = ucfirst($driver) . 'Driver';
     $instance = new ReflectionClass($class);
     self::$_driver = $instance->newInstanceArgs($option);
 }
コード例 #2
0
ファイル: crawl.php プロジェクト: jimmytuc/CrawlPHP
 /**
  * Start-function with recursion
  * Creates new instances depending on recursion depth
  * Prints all obtained emails
  * @return mails
  */
 public function start()
 {
     $this->content = $this->getContent();
     $this->urls = $this->getURLArray();
     $mails = $this->getEmailArray();
     $this->printResult($mails);
     if ($this->rlevel < $this->rmax) {
         foreach ($this->urls as $url) {
             $temp = new Crawl($url, $this->rlevel + 1, $this->rmax);
             $temp->start();
         }
     }
 }
コード例 #3
0
                        RrdTool::updateRouterBatmanAdvOriginatorsCountHistory($data['router_id'], $originator_count);
                        $average_link_quality = 0;
                        if (!empty($data['batman_adv_originators'])) {
                            foreach ($data['batman_adv_originators'] as $originator) {
                                if (ConfigLine::configByName('crawl_direct_originators_only') == 'true' and $originator['originator'] == $originator['nexthop']) {
                                    $originator_status = new OriginatorStatus(false, (int) $actual_crawl_cycle, (int) $data['router_id'], $originator['originator'], (int) $originator['link_quality'], $originator['nexthop'], $originator['outgoing_interface'], $originator['last_seen']);
                                    $originator_status->store();
                                    RrdTool::updateRouterBatmanAdvOriginatorLinkQuality($data['router_id'], $originator['originator'], $originator['link_quality'], time());
                                    $average_link_quality = $average_link_quality + $originator['link_quality'];
                                }
                            }
                        }
                        $average_link_quality = $average_link_quality / $originator_count;
                        RrdTool::updateRouterBatmanAdvOriginatorLinkQuality($data['router_id'], "average", $average_link_quality, time());
                        echo "\t\t\tInserting all other Data into DB\n";
                        Crawl::insertCrawlData($data);
                    } else {
                        echo "\t\t\tRouterStatus could not be inserted into DB\n";
                    }
                    break 2;
                } else {
                    echo "\t\t\tCrawl was not successfull trying to ping next address\n";
                }
            }
        }
    }
}
echo "The process took " . (time() - $starttime) . " seconds\n";
function simplexml2array($xml)
{
    if (!is_string($xml) and get_class($xml) == 'SimpleXMLElement') {
コード例 #4
0
            header("Location: /");
            exit;
        }
        break;
    case "XMLurl":
        try {
            $openSearch = new OpenSearch(array("content" => $data['content'], "type" => $data['type']));
            $url = $openSearch->getUrlEncodedUrl();
        } catch (Exception $e) {
            header("Location: /");
            exit;
        }
        break;
    case "url":
        try {
            $crawl = new Crawl($data['content']);
            $OpenSearchArray = array("content" => $crawl->getOSLink(), "type" => $data['type'], "title" => $crawl->getTitle());
            if ($data['ddgSuggestion']) {
                $OpenSearchArray["osSuggestions"] = "https://ac.duckduckgo.com/ac/?q={searchTerms}&type=list";
            }
            $openSearch = new OpenSearch($OpenSearchArray);
            $url = $crawl->getUrlEncodedUrl();
        } catch (Exception $e) {
            header("Location: /");
            exit;
        }
        break;
}
$xmlUrl = $data['baseUrl'] . 'opensearch.xml/' . $url;
if (isset($openSearch)) {
    if ($openSearch->getTitle()) {