예제 #1
0
<?php

/*
	[Destoon B2B System] Copyright (c) 2008-2013 Destoon.COM
	This is NOT a freeware, use is subject to license.txt
*/
defined('IN_DESTOON') or exit('Access Denied');
$menus = array(array('已启用', '?file=' . $file), array('待审核', '?file=' . $file . '&status=2'));
$MODULE[-7]['moduleid'] = -7;
$MODULE[-7]['name'] = '报价';
$MODULE[-7]['linkurl'] = $MODULE[5]['linkurl'];
$MODULE[-9]['moduleid'] = -9;
$MODULE[-9]['name'] = '简历';
$MODULE[-9]['linkurl'] = $MODULE[9]['linkurl'];
$status = isset($status) ? intval($status) : 3;
$do = new keyword();
switch ($action) {
    case 'letter':
        if (!$word) {
            exit('');
        }
        if (strtoupper(DT_CHARSET) != 'UTF-8') {
            $word = convert($word, 'UTF-8', DT_CHARSET);
        }
        exit(gb2py($word));
        break;
    default:
        if ($submit) {
            $do->update($post);
            dmsg('更新成功', '?file=' . $file . '&status=' . $status);
        } else {
예제 #2
0
function crawler()
{
    $proxyObj = new proxy();
    $mysqli = new mysqli('10.168.45.191', 'admin', 'txg19831210', 'crawler');
    $mysqli->query('SET NAMES gbk');
    //for (;;) {
    $hour = date('G');
    $current = time();
    //$sql = "SELECT * FROM keyword WHERE status = 'active' AND clicked_times < times AND ((last_click_time + click_interval) < {$current}) AND ((path1_page < 5 AND path1_page > 0) OR (path2_page < 5 AND path2_page > 0) OR (path3_page < 5 AND path3_page > 0)) ORDER BY last_click_time ASC LIMIT 1";
    $sql = "SELECT * FROM keyword WHERE id = 13 LIMIT 1";
    $result = $mysqli->query($sql);
    $data = array();
    if ($result) {
        $obj = $result->fetch_object();
        $result->close();
    }
    if (!$obj || !$obj->id) {
        echo "zz\n";
        sleep(1);
        continue;
    } else {
        $kwd = urlencode($obj->kwd);
        $nid = $obj->nid;
        $date = date('Ymd');
        $sleep_time = $obj->sleep_time;
        $path1 = (int) $obj->path1;
        $path2 = $path1 + (int) $obj->path2;
        $path3 = $path2 + (int) $obj->path3;
        $ua = 'aa';
        $keyword = new keyword();
        $rand = rand(1, 100);
        $rand = 5;
        if ($rand <= $path1) {
            //taobao search
            $data = array('path' => 'taobao', 'kwd' => $kwd, 'date' => $date, 'region' => $obj->path1_region, 'price_from' => $obj->path1_price_from, 'price_to' => $obj->path1_price_to);
            $search_url = $keyword->buildSearchUrl($data);
            echo $search_url . "\n";
            exit;
            if ($obj->path1_page >= 5) {
                continue;
            }
            //$proxy = $proxyObj->getProxy();
            //$search_url = 'http://s.taobao.com/search?&initiative_id=tbindexz_'.$date.'&spm=1.7274553.1997520841.1&sourceId=tb.index&search_type=item&ssid=s5-e&commend=all&q='.$kwd.'&suggest=0_2';
            $search_selector = ".item[nid='" . $nid . "'] h3 a";
            $next_selector = ".page-next";
            $cmd = "/usr/bin/casperjs --output-encoding=gbk --script-encoding=gbk --proxy=" . $proxy . " /var/html/casperjs/pcntl/process.js \"" . $search_url . "\" " . " \"" . $search_selector . "\" " . "\"" . $next_selector . "\" " . $sleep_time . " \"" . $ua . "\"";
        } elseif ($rand <= $path2) {
            //taobao search tmall tab
            if ($obj->path2_page >= 5) {
                continue;
            }
            $proxy = $proxyObj->getProxy();
            $search_url = 'http://s.taobao.com/search?spm=a230r.1.0.0.9nMSJu&initiative_id=tbindexz_' . $date . '&tab=mall&q=' . $kwd . '&suggest=0_2';
            $search_selector = ".item[nid='" . $nid . "'] h3 a";
            $next_selector = ".page-next";
            $cmd = "/usr/bin/casperjs --output-encoding=gbk --script-encoding=gbk --proxy=" . $proxy . " /var/html/casperjs/pcntl/process.js \"" . $search_url . "\" " . " \"" . $search_selector . "\" " . "\"" . $next_selector . "\" " . $sleep_time . " \"" . $ua . "\"";
        } else {
            //tmall search
            if ($obj->path3_page >= 5) {
                continue;
            }
            $proxy = $proxyObj->getProxy(true);
            $search_url = 'http://list.tmall.com/search_product.htm?q=' . $kwd . '&type=p&vmarket=&spm=3.7396704.a2227oh.d100&from=mallfp..pc_1_searchbutton';
            $search_selector = ".product[data-id=' " . $nid . "'] div .productTitle a";
            $next_selector = "a.ui-page-s-next";
            $cmd = "/usr/bin/casperjs /var/html/casperjs/pcntl/process.js --ignore-ssl-errors=true --proxy=" . $proxy . " --output-encoding=gbk --script-encoding=gbk \"" . $search_url . "\" " . " \"" . $search_selector . "\" " . "\"" . $next_selector . "\" " . $sleep_time . " \"" . $ua . "\"";
        }
        $sql = "UPDATE keyword SET last_click_time = {$current} WHERE id = {$obj->id}";
        $mysqli->query($sql);
    }
    echo $cmd . "\n";
    system($cmd);
    $sql = "UPDATE keyword SET clicked_times = clicked_times + 1 WHERE id = " . $obj->id;
    $mysqli->query($sql);
    //}
}
예제 #3
0
 function spider_channel($intChannelID, $strUrl)
 {
     $download = new downloader();
     $feed = new agregator_feed();
     $data = new data();
     $keyword = new keyword();
     // закачиваем ресурс
     $str_data = $download->get_resource($strUrl);
     if ($str_data == false) {
         return false;
     }
     // обрабатываем документ
     $arrData = $feed->parse($str_data);
     $arrFeed = $arrData['feed'];
     $arrItems = $arrData['items'];
     // если данные присутствуют, делаем следующее
     if ($arrFeed) {
         $arrFeed->feed_id = $intChannelID;
         $arrFeed->feed_url = $strUrl;
         $arrFeed->lastindex = date("Ymdhis");
         //$arrFeedData->feed->update = date("Ymdhis");
         // отправляем массив данных на сохранение
         $data->save_feed($arrFeed->feed_id, $arrFeed->feed_url, $arrFeed->lastindex, $arrFeed->lastbuilddate_int, $arrFeed->pubdate_int, null, $arrFeed->title, $arrFeed->link, $arrFeed->description, $arrFeed->language, $arrFeed->copyright, $arrFeed->managingeditor, $arrFeed->webmaster, $arrFeed->pubdate, $arrFeed->lastbuilddate, $arrFeed->category, $arrFeed->generator, $arrFeed->docs, $arrFeed->cloud, $arrFeed->ttl, $arrFeed->image_url, $arrFeed->image_title, $arrFeed->image_link);
         for ($intCountItems = 0, $intNumItems = count($arrItems); $intCountItems < $intNumItems; $intCountItems++) {
             unset($itemsum);
             $arrItems[$intCountItems]->feed_id = $intChannelID;
             //print_r($arrItems[$intCountItems]);
             $item_id = $data->save_item("null", $arrItems[$intCountItems]->feed_id, $arrItems[$intCountItems]->pubdate_int, $arrItems[$intCountItems]->title, $arrItems[$intCountItems]->link, $arrItems[$intCountItems]->description, $arrItems[$intCountItems]->author, $arrItems[$intCountItems]->category, $arrItems[$intCountItems]->comments, $arrItems[$intCountItems]->enclousure, $arrItems[$intCountItems]->guid, $arrItems[$intCountItems]->pubdate, $arrItems[$intCountItems]->source, addslashes(json_encode($arrItems[$intCountItems])));
             if (isset($item_id) && $item_id > 0) {
                 echo "  new item: " . $item_id . "\n";
                 // Save enclosure
                 if (isset($arrItems[$intCountItems]->enclousure['URL']) && $arrItems[$intCountItems]->enclousure['LENGTH'] > 0) {
                     $enclosure_tmp = array();
                     // TODO: Download file
                     // ...
                     $enclosure_tmp['hash_32'] = md5($arrItems[$intCountItems]->enclousure['URL']);
                     $enclosure_tmp['hash_2'] = substr($enclosure_tmp['hash_32'], 0, 2);
                     $enclosure_tmp['hash_1'] = substr($enclosure_tmp['hash_32'], 0, 1);
                     $enclosure_tmp['length'] = $arrItems[$intCountItems]->enclousure['LENGTH'];
                     $enclosure_tmp['type'] = addslashes($arrItems[$intCountItems]->enclousure['TYPE']);
                     $enclosure_tmp['url'] = addslashes($arrItems[$intCountItems]->enclousure['URL']);
                     $_e_p = "../public/static";
                     // create folder in static, static/a/ab/
                     if (!is_dir($_e_p . "/" . $enclosure_tmp['hash_1'])) {
                         mkdir($_e_p . "/" . $enclosure_tmp['hash_1']);
                     }
                     if (!is_dir($_e_p . "/" . $enclosure_tmp['hash_1'] . "/" . $enclosure_tmp['hash_2'])) {
                         mkdir($_e_p . "/" . $enclosure_tmp['hash_1'] . "/" . $enclosure_tmp['hash_2']);
                     }
                     // get file from server, save in static
                     file_put_contents($_e_p . "/" . $enclosure_tmp['hash_1'] . "/" . $enclosure_tmp['hash_2'] . "/" . $enclosure_tmp['hash_32'], file_get_contents($enclosure_tmp['url']));
                     ///$_e = file_get_contents($enclosure_tmp['url']);
                     $data->feed_item_enclosure_add($item_id, $enclosure_tmp['hash_1'], $enclosure_tmp['hash_2'], $enclosure_tmp['hash_32'], $enclosure_tmp['length'], $enclosure_tmp['type'], $enclosure_tmp['url']);
                     unset($enclosure_tmp);
                 }
                 $arr_keywords = $keyword->extract_keywords($arrItems[$intCountItems]->title . " " . $arrItems[$intCountItems]->description);
                 foreach ($arr_keywords as $k) {
                     if ($keyword->check($k) == false) {
                         $keyword_id = $keyword->save($k);
                     } else {
                         $keyword_id = $keyword->get($k);
                     }
                     if ($item_id !== 0 || $item_id !== '' || $keyword_id !== 0 || $keyword_id !== '') {
                         //	mysql_query("INSERT INTO `feed_keyword_item` (`keyword_id`,`item_id`) VALUES ('{$keyword_id}','{$item_id}')");
                     }
                 }
                 unset($arr_keywords);
             }
         }
         return true;
     }
     return false;
 }