<?php /* [Destoon B2B System] Copyright (c) 2008-2013 Destoon.COM This is NOT a freeware, use is subject to license.txt */ defined('IN_DESTOON') or exit('Access Denied'); $menus = array(array('已启用', '?file=' . $file), array('待审核', '?file=' . $file . '&status=2')); $MODULE[-7]['moduleid'] = -7; $MODULE[-7]['name'] = '报价'; $MODULE[-7]['linkurl'] = $MODULE[5]['linkurl']; $MODULE[-9]['moduleid'] = -9; $MODULE[-9]['name'] = '简历'; $MODULE[-9]['linkurl'] = $MODULE[9]['linkurl']; $status = isset($status) ? intval($status) : 3; $do = new keyword(); switch ($action) { case 'letter': if (!$word) { exit(''); } if (strtoupper(DT_CHARSET) != 'UTF-8') { $word = convert($word, 'UTF-8', DT_CHARSET); } exit(gb2py($word)); break; default: if ($submit) { $do->update($post); dmsg('更新成功', '?file=' . $file . '&status=' . $status); } else {
function crawler() { $proxyObj = new proxy(); $mysqli = new mysqli('10.168.45.191', 'admin', 'txg19831210', 'crawler'); $mysqli->query('SET NAMES gbk'); //for (;;) { $hour = date('G'); $current = time(); //$sql = "SELECT * FROM keyword WHERE status = 'active' AND clicked_times < times AND ((last_click_time + click_interval) < {$current}) AND ((path1_page < 5 AND path1_page > 0) OR (path2_page < 5 AND path2_page > 0) OR (path3_page < 5 AND path3_page > 0)) ORDER BY last_click_time ASC LIMIT 1"; $sql = "SELECT * FROM keyword WHERE id = 13 LIMIT 1"; $result = $mysqli->query($sql); $data = array(); if ($result) { $obj = $result->fetch_object(); $result->close(); } if (!$obj || !$obj->id) { echo "zz\n"; sleep(1); continue; } else { $kwd = urlencode($obj->kwd); $nid = $obj->nid; $date = date('Ymd'); $sleep_time = $obj->sleep_time; $path1 = (int) $obj->path1; $path2 = $path1 + (int) $obj->path2; $path3 = $path2 + (int) $obj->path3; $ua = 'aa'; $keyword = new keyword(); $rand = rand(1, 100); $rand = 5; if ($rand <= $path1) { //taobao search $data = array('path' => 'taobao', 'kwd' => $kwd, 'date' => $date, 'region' => $obj->path1_region, 'price_from' => $obj->path1_price_from, 'price_to' => $obj->path1_price_to); $search_url = $keyword->buildSearchUrl($data); echo $search_url . "\n"; exit; if ($obj->path1_page >= 5) { continue; } //$proxy = $proxyObj->getProxy(); //$search_url = 'http://s.taobao.com/search?&initiative_id=tbindexz_'.$date.'&spm=1.7274553.1997520841.1&sourceId=tb.index&search_type=item&ssid=s5-e&commend=all&q='.$kwd.'&suggest=0_2'; $search_selector = ".item[nid='" . $nid . "'] h3 a"; $next_selector = ".page-next"; $cmd = "/usr/bin/casperjs --output-encoding=gbk --script-encoding=gbk --proxy=" . $proxy . " /var/html/casperjs/pcntl/process.js \"" . $search_url . "\" " . " \"" . $search_selector . "\" " . "\"" . $next_selector . "\" " . $sleep_time . " \"" . $ua . "\""; } elseif ($rand <= $path2) { //taobao search tmall tab if ($obj->path2_page >= 5) { continue; } $proxy = $proxyObj->getProxy(); $search_url = 'http://s.taobao.com/search?spm=a230r.1.0.0.9nMSJu&initiative_id=tbindexz_' . $date . '&tab=mall&q=' . $kwd . '&suggest=0_2'; $search_selector = ".item[nid='" . $nid . "'] h3 a"; $next_selector = ".page-next"; $cmd = "/usr/bin/casperjs --output-encoding=gbk --script-encoding=gbk --proxy=" . $proxy . " /var/html/casperjs/pcntl/process.js \"" . $search_url . "\" " . " \"" . $search_selector . "\" " . "\"" . $next_selector . "\" " . $sleep_time . " \"" . $ua . "\""; } else { //tmall search if ($obj->path3_page >= 5) { continue; } $proxy = $proxyObj->getProxy(true); $search_url = 'http://list.tmall.com/search_product.htm?q=' . $kwd . '&type=p&vmarket=&spm=3.7396704.a2227oh.d100&from=mallfp..pc_1_searchbutton'; $search_selector = ".product[data-id=' " . $nid . "'] div .productTitle a"; $next_selector = "a.ui-page-s-next"; $cmd = "/usr/bin/casperjs /var/html/casperjs/pcntl/process.js --ignore-ssl-errors=true --proxy=" . $proxy . " --output-encoding=gbk --script-encoding=gbk \"" . $search_url . "\" " . " \"" . $search_selector . "\" " . "\"" . $next_selector . "\" " . $sleep_time . " \"" . $ua . "\""; } $sql = "UPDATE keyword SET last_click_time = {$current} WHERE id = {$obj->id}"; $mysqli->query($sql); } echo $cmd . "\n"; system($cmd); $sql = "UPDATE keyword SET clicked_times = clicked_times + 1 WHERE id = " . $obj->id; $mysqli->query($sql); //} }
function spider_channel($intChannelID, $strUrl) { $download = new downloader(); $feed = new agregator_feed(); $data = new data(); $keyword = new keyword(); // закачиваем ресурс $str_data = $download->get_resource($strUrl); if ($str_data == false) { return false; } // обрабатываем документ $arrData = $feed->parse($str_data); $arrFeed = $arrData['feed']; $arrItems = $arrData['items']; // если данные присутствуют, делаем следующее if ($arrFeed) { $arrFeed->feed_id = $intChannelID; $arrFeed->feed_url = $strUrl; $arrFeed->lastindex = date("Ymdhis"); //$arrFeedData->feed->update = date("Ymdhis"); // отправляем массив данных на сохранение $data->save_feed($arrFeed->feed_id, $arrFeed->feed_url, $arrFeed->lastindex, $arrFeed->lastbuilddate_int, $arrFeed->pubdate_int, null, $arrFeed->title, $arrFeed->link, $arrFeed->description, $arrFeed->language, $arrFeed->copyright, $arrFeed->managingeditor, $arrFeed->webmaster, $arrFeed->pubdate, $arrFeed->lastbuilddate, $arrFeed->category, $arrFeed->generator, $arrFeed->docs, $arrFeed->cloud, $arrFeed->ttl, $arrFeed->image_url, $arrFeed->image_title, $arrFeed->image_link); for ($intCountItems = 0, $intNumItems = count($arrItems); $intCountItems < $intNumItems; $intCountItems++) { unset($itemsum); $arrItems[$intCountItems]->feed_id = $intChannelID; //print_r($arrItems[$intCountItems]); $item_id = $data->save_item("null", $arrItems[$intCountItems]->feed_id, $arrItems[$intCountItems]->pubdate_int, $arrItems[$intCountItems]->title, $arrItems[$intCountItems]->link, $arrItems[$intCountItems]->description, $arrItems[$intCountItems]->author, $arrItems[$intCountItems]->category, $arrItems[$intCountItems]->comments, $arrItems[$intCountItems]->enclousure, $arrItems[$intCountItems]->guid, $arrItems[$intCountItems]->pubdate, $arrItems[$intCountItems]->source, addslashes(json_encode($arrItems[$intCountItems]))); if (isset($item_id) && $item_id > 0) { echo " new item: " . $item_id . "\n"; // Save enclosure if (isset($arrItems[$intCountItems]->enclousure['URL']) && $arrItems[$intCountItems]->enclousure['LENGTH'] > 0) { $enclosure_tmp = array(); // TODO: Download file // ... $enclosure_tmp['hash_32'] = md5($arrItems[$intCountItems]->enclousure['URL']); $enclosure_tmp['hash_2'] = substr($enclosure_tmp['hash_32'], 0, 2); $enclosure_tmp['hash_1'] = substr($enclosure_tmp['hash_32'], 0, 1); $enclosure_tmp['length'] = $arrItems[$intCountItems]->enclousure['LENGTH']; $enclosure_tmp['type'] = addslashes($arrItems[$intCountItems]->enclousure['TYPE']); $enclosure_tmp['url'] = addslashes($arrItems[$intCountItems]->enclousure['URL']); $_e_p = "../public/static"; // create folder in static, static/a/ab/ if (!is_dir($_e_p . "/" . $enclosure_tmp['hash_1'])) { mkdir($_e_p . "/" . $enclosure_tmp['hash_1']); } if (!is_dir($_e_p . "/" . $enclosure_tmp['hash_1'] . "/" . $enclosure_tmp['hash_2'])) { mkdir($_e_p . "/" . $enclosure_tmp['hash_1'] . "/" . $enclosure_tmp['hash_2']); } // get file from server, save in static file_put_contents($_e_p . "/" . $enclosure_tmp['hash_1'] . "/" . $enclosure_tmp['hash_2'] . "/" . $enclosure_tmp['hash_32'], file_get_contents($enclosure_tmp['url'])); ///$_e = file_get_contents($enclosure_tmp['url']); $data->feed_item_enclosure_add($item_id, $enclosure_tmp['hash_1'], $enclosure_tmp['hash_2'], $enclosure_tmp['hash_32'], $enclosure_tmp['length'], $enclosure_tmp['type'], $enclosure_tmp['url']); unset($enclosure_tmp); } $arr_keywords = $keyword->extract_keywords($arrItems[$intCountItems]->title . " " . $arrItems[$intCountItems]->description); foreach ($arr_keywords as $k) { if ($keyword->check($k) == false) { $keyword_id = $keyword->save($k); } else { $keyword_id = $keyword->get($k); } if ($item_id !== 0 || $item_id !== '' || $keyword_id !== 0 || $keyword_id !== '') { // mysql_query("INSERT INTO `feed_keyword_item` (`keyword_id`,`item_id`) VALUES ('{$keyword_id}','{$item_id}')"); } } unset($arr_keywords); } } return true; } return false; }