$rootPath = $_SERVER['DOCUMENT_ROOT'] . '/'; } else { $rootPath = $_SERVER['DOCUMENT_ROOT']; } $saveFullPath = $rootPath . $savePath; if (!ereg('/$', $saveFullPath)) { $saveFullPath = $saveFullPath . '/'; } if (!ereg('/$', $savePath)) { $newPath = '/' . $savePath . '/'; } else { $newPath = '/' . $savePath; } $NEAT_IMG = new NEAT_ProcessHttpImg($saveFullPath); $NEAT_IMG->setAllowType(); $NBS = new NEATBulidSql(TB_DATA); if ($rulesID) { $sqlRules = 'AND rules = ' . $rulesID . ' '; } $sql = 'SELECT id, link_id, title, body, img_geted '; $sql .= 'FROM ' . TB_DATA . ' '; $sql .= 'WHERE img_geted = 0 '; $sql .= $sqlRules; $rs = $db->query($sql, 0, $eachTimes); $allLocalFile = array(); while ($rs->next_record()) { unset($NEAT_IMG[saved]); $bodyContent = $rs->get('body'); $NEAT_IMG->setContents($bodyContent); $array = $NEAT_IMG->getImgUrl(); $NEAT_IMG->saveImg($array, $maxSize, $minSize);
} else { error('配置文件上传失败,请检查配置和目录设置'); } } else { $_POST['ruleName'] = trim($_POST['ruleName']); if (!$_POST['ruleName']) { error('请输入采集器的名称'); } $content = base64_decode($_POST['contents']); $NX = new NEAT_XML(); $NX->parse_document($content); $newArray = $NX->xml_array['nc_rules_config']; if (!intval($newArray['http']['method'])) { $newArray['http']['method'] = 1; } $NBS = new NEATBulidSql(TB_RULES); $rulesFids['id'] = ''; $rulesFids['name'] = addslashes($_POST['ruleName']); $rulesFids['index_type'] = addslashes(trim($newArray['index']['index_type'])); $rulesFids['url'] = addslashes(trim(encodestring($newArray['index']['index_url']))); $rulesFids['page_start'] = intval($newArray['index']['page_start']); $rulesFids['page_end'] = intval($newArray['index']['page_end']); $rulesFids['link_replace'] = addslashes($newArray['index']['link_replace']); $rulesFids['method'] = intval($newArray['http']['method']); $rulesFids['posts'] = addslashes($newArray['http']['posts']); $rulesFids['cookies'] = addslashes($newArray['http']['cookies']); $rulesFids['area_link'] = addslashes(encodestring($newArray['area']['area_link'])); $rulesFids['area_title'] = addslashes(encodestring($newArray['area']['area_title'])); $rulesFids['area_body'] = addslashes(encodestring($newArray['area']['area_body'])); $rulesFids['area_body_page'] = addslashes(encodestring($newArray['page']['page_area'])); $rulesFids['area_body_page_link'] = addslashes(encodestring($newArray['page']['page_next']));
$alertTitle = $nextAlertTitle; $alertMessage = '当前任务:第' . $_GET['start'] . '页,一共' . $pageTotal . '页在任务队列中.' . $nextAlertMessage; $gotoURL = $baseURL . '3&start=' . ++$_GET['start'] . '&dataCount=' . $dataCount . '&existsCount=' . $existsCount; } else { $gotoURL = $finishBaseURL . '&dataCount=' . $dataCount . '&existsCount=' . $existsCount; $alertTitle = $finishAlertTitle; $alertMessage = $finishAlertMessage; } } } showloading($gotoURL, $alertTitle, $alertMessage, 1); $tpShowBody = false; } else { if ($_GET['action'] == 'finish') { if (0 < $_GET['dataCount']) { $NBS = new NEATBulidSql(TB_RULES); $updateFids['link_num'] = 'link_num'; $conditionFids['id'] = $_GET['ID']; $config['link_num']['method'] = '+'; $config['link_num']['num'] = $_GET['dataCount']; $sql = $NBS->update($updateFids, $conditionFids, $config); $db->update($sql); } $tp->set_templatefile('templates/link_collection_result.html'); $tp->assign('existsCount', $_GET['existsCount']); $tp->assign('dataCount', $_GET['dataCount']); $moduleTemplate = $tp->result(); $moduleTitle = '采集统计'; } } }
$countRulesList .= $_POST['count_rules'][$k]; $countRulesValueList .= $_POST['count_rules_value'][$k]; if ($i < $num) { $countFieldsList .= '|'; $countFieldsValueList .= '|'; $countRulesList .= '|'; $countRulesValueList .= '|'; } ++$i; continue; } } } $name = trim($_POST['configName']); $time = strtotime(date('Y-m-d H:i:s')); $NBS = new NEATBulidSql(TB_DB2DB); $configFids['id'] = ''; $configFids['db_type'] = trim($_GET['type']); $configFids['rules'] = @implode(',', $_POST['rulesID']); $configFids['name'] = $name; $configFids['host'] = $_SESSION['TARGET_DB_HOST']; $configFids['user'] = $_SESSION['TARGET_DB_USER']; $configFids['password'] = $_SESSION['TARGET_DB_PASS']; $configFids['db_name'] = $_SESSION['TARGET_DB_NAME']; $configFids['article_table'] = $tableList; $configFids['field_list'] = $fieldList; $configFids['value_list'] = $dataList; $configFids['recount_fields_list'] = $countFieldsList; $configFids['recount_fields_value_list'] = $countFieldsValueList; $configFids['recount_rules_list'] = $countRulesList; $configFids['recount_rules_value_list'] = $countRulesValueList;
if (!$_GET['ID']) { error('采集器编号不能为空!'); } if (!is_numeric($_GET['ID'])) { error('采集器编号只能是数字!'); } $sql = 'SELECT * '; $sql .= 'FROM ' . TB_RULES . ' '; $sql .= 'WHERE id = ' . $_GET['ID']; $db = new MySQL(DB_SERVER, DB_USER, DB_PASSWORD, DB_DATABASE); $rs = $db->query($sql); if (!$rs->next_record()) { error('找不到编号为' . $_GET['ID'] . '的采集器规则!'); } $NBS = new NEATBulidSql(TB_RULES); $rulesFids['id'] = $_GET['ID']; $sql = $NBS->del($rulesFids); $db->query($sql); $NBS->setTable(TB_FILTER); $filterFids['rule_id'] = $_GET['ID']; $sql = $NBS->del($filterFids); $db->query($sql); $NBS->setTable(TB_LINKS); $linksFids['rules'] = $_GET['ID']; $sql = $NBS->del($linksFids); $db->query($sql); $NBS->setTable(TB_DATA); $dataFids['rules'] = $_GET['ID']; $sql = $NBS->del($dataFids); $db->query($sql);
$sql .= 'ON r.cid = c.id '; $sql .= 'WHERE r.id = ' . $_GET['ID']; $rs = $NDB->query($sql); if (!$rs->next_record()) { error('没找到您要清空的采集器'); } $tp->set_templatefile('templates/rules_clear.html'); $tp->assign('id', $rs->get('id')); $tp->assign('ruleName', $rs->get('name')); $tp->assign('cateName', $rs->get('cateName')); $tp->assign('linkNum', $rs->get('link_num')); $tp->assign('importNum', $rs->get('import_num')); $moduleTemplate = $tp->result(); $moduleTitle = '清空采集器数据'; } else { $NBS = new NEATBulidSql(TB_LINKS); if ($_POST['link']) { $NBS->setTable(TB_LINKS); $linkFids['rules'] = $_GET['ID']; $sql = $NBS->del($linkFids); $NDB->query($sql); $NBS->setTable(TB_RULES); $conditionFids['id'] = $_GET['ID']; $rulesFids['link_num'] = 0; $sql = $NBS->update($rulesFids, $conditionFids); $NDB->update($sql); } if ($_POST['data']) { $NBS->setTable(TB_DATA); $dataFids['rules'] = $_GET['ID']; $sql = $NBS->del($dataFids);
$db->update($sql); } $nextStart = $start + $eachTimes; $geted = $nextStart - 1; $leaveTotal = $total - $geted; if ($leaveTotal < $eachTimes) { $eachTimes = $leaveTotal; } if ($total < $nextStart) { $countSql = 'SELECT COUNT(*) AS total '; $countSql .= 'FROM ' . TB_DATA . ' '; $countSql .= 'WHERE rules = ' . $_GET['rulesID']; $rs = $db->query($countSql); $rs->next_record(); $importTotal = $rs->get('total'); $NBS = new NEATBulidSql(TB_RULES); $conditionFids['id'] = $_GET['rulesID']; $rulesFids['import_num'] = $importTotal; $sql = $NBS->update($rulesFids, $conditionFids); $db->update($sql); $url = $_GET['rulesID'] ? '?module=updateRulesCount&ID=' . $_GET['rulesID'] : 'index.php'; showloading($url, '数据入库任务完成...', '文章已经全部采集到本地数据库'); $tpShowBody = false; } else { $url = '?module=import&action=processing&total=' . $total . '&rulesID=' . $rulesID . '&eachTimes=' . $eachTimes . '&start=' . $nextStart; $message = '当前已经入库 : ' . $geted . ' 条,还有 ' . $leaveTotal . ' 条在任务队列中. (一共 ' . $total . ' 条)'; showloading($url, '数据入库任务进行中...', $message, 1); $tpShowBody = false; } } }
$countRulesList .= $_POST['count_rules'][$k]; $countRulesValueList .= $_POST['count_rules_value'][$k]; if ($i < $num) { $countFieldsList .= '|'; $countFieldsValueList .= '|'; $countRulesList .= '|'; $countRulesValueList .= '|'; } ++$i; continue; } } } $name = $_POST['configName']; $time = strtotime(date('Y-m-d H:i:s')); $NBS = new NEATBulidSql(TB_DB2DB); $configFids['name'] = $name; $configFids['db_type'] = trim($_GET['type']); $configFids['rules'] = @implode(',', $_POST['rulesID']); $configFids['host'] = $_SESSION['TARGET_DB_HOST']; $configFids['user'] = $_SESSION['TARGET_DB_USER']; $configFids['password'] = $_SESSION['TARGET_DB_PASS']; $configFids['db_name'] = $_SESSION['TARGET_DB_NAME']; $configFids['article_table'] = $tableList; $configFids['field_list'] = $fieldList; $configFids['value_list'] = $dataList; $configFids['recount_fields_list'] = $countFieldsList; $configFids['recount_fields_value_list'] = $countFieldsValueList; $configFids['recount_rules_list'] = $countRulesList; $configFids['recount_rules_value_list'] = $countRulesValueList; $configCondition['id'] = intval($_GET['ID']);
break; case 'ado_access': $myDSN = 'PROVIDER=Microsoft.Jet.OLEDB.4.0;DATA SOURCE=' . realpath($HOST) . ';' . 'USER ID=' . $USER . ';PASSWORD='******';'; $TDB = adonewconnection('ado_access'); $TDB->Connect($myDSN, '', '', ''); break; case 'ado_mssql': $myDSN = 'Driver={SQL Server};Server=' . $HOST . ';Database=' . $NAME . ';Uid=' . $USER . ';Pwd=' . $PASS . ';'; $TDB = adonewconnection('ado_mssql'); $TDB->Connect($myDSN, '', '', ''); break; default: error('不可识别或者不支持的数据库类型'); break; } $NBS = new NEATBulidSql($dbAllList['orderList'][0]); while ($rs->next_record()) { switch ($DB_TYPE) { case 'mysql': $contents['title'] = addslashes($rs->get('title')); $contents['body'] = addslashes($rs->get('body')); $contents['date'] = $rs->get('date'); $contents['url'] = addslashes($rs->get('url')); $contents['author'] = addslashes($rs->get('author')); $contents['from'] = addslashes($rs->get('data_from')); $contents['intro'] = addslashes($rs->get('intro')); $contents['currentDate'] = time(); $contents['articleID'] = $rs->get('id'); // var_dump($contents);die(); $pregTagReplace['currentDateFormat'] = '@date(\'\\1\')'; $pregTagReplace['dateFormat'] = '@date(\'\\1\', ' . $rs->get('date') . ')';
$moduleTitle = '编辑采集器规则'; } else { if (!trim($_POST['name'])) { error('请输入采集器名字'); } if (!intval($_POST['pid'])) { error('请选择采集器分类'); } // 从POST中合并replaceRNT $replaceRNT_ALL = trim($_POST['delr']) . ',' . trim($_POST['deln']) . ',' . trim($_POST['delt']) . ',' . trim($_POST['debugshow']) . ',' . trim($_POST['charset']); $page_rules = trim($_POST['page_mula']) . ',' . trim($_POST['page_add']) . ',' . trim($_POST['page_fill']); $urlType[1] = 'I'; $urlType[2] = 'II'; $urlType[3] = 'III'; $url = 'url_' . $urlType[$_POST['indexType']]; $NBS = new NEATBulidSql(TB_RULES); $conditionFids['id'] = $_GET['ID']; $rulesFids['cid'] = intval($_POST['pid']); $rulesFids['name'] = trim($_POST['name']); $rulesFids['index_type'] = trim($_POST['indexType']); $rulesFids['url'] = trim($_POST[$url]); // add type $rulesFids['replaceRNT'] = $replaceRNT_ALL; $rulesFids['page_rules'] = $page_rules; $rulesFids['useragent'] = trim($_POST['useragent']); $rulesFids['referer'] = trim($_POST['referer']); $rulesFids['method'] = trim($_POST['method']); $rulesFids['posts'] = trim($_POST['posts']); $rulesFids['cookies'] = trim($_POST['cookies']); $rulesFids['page_start'] = intval($_POST['page_start']); $rulesFids['page_end'] = intval($_POST['page_end']);