function do_save() { $id = (int) $_POST['id']; $uid = (int) $_POST['uid']; $rootid = (int) $_POST['rootid']; $cid = implode(',', (array) $_POST['cid']); $tcid = implode(',', (array) $_POST['tcid']); $pid = implode(',', (array) $_POST['pid']); $_cid = iS::escapeStr($_POST['_cid']); $_tcid = iS::escapeStr($_POST['_tcid']); $_pid = iS::escapeStr($_POST['_pid']); $name = iS::escapeStr($_POST['name']); $subtitle = iS::escapeStr($_POST['subtitle']); $tkey = iS::escapeStr($_POST['tkey']); $seotitle = iS::escapeStr($_POST['seotitle']); $keywords = iS::escapeStr($_POST['keywords']); $pic = iS::escapeStr($_POST['pic']); $bpic = iS::escapeStr($_POST['bpic']); $mpic = iS::escapeStr($_POST['mpic']); $spic = iS::escapeStr($_POST['spic']); $description = iS::escapeStr($_POST['description']); $url = iS::escapeStr($_POST['url']); $related = iS::escapeStr($_POST['related']); $tpl = iS::escapeStr($_POST['tpl']); $weight = _int($_POST['weight']); $ordernum = _int($_POST['ordernum']); $status = (int) $_POST['status']; $haspic = $pic ? '1' : '0'; $pubdate = time(); $metadata = $_POST['metadata']; $uid or $uid = iMember::$userid; if ($callback) { if (empty($name)) { echo '标签名称不能为空!'; return false; } } $name or iPHP::alert('标签名称不能为空!'); $cid or iPHP::alert('请选择标签所属栏目!'); if ($metadata) { if ($metadata['key']) { $md = array(); foreach ($metadata['key'] as $_mk => $_mval) { !preg_match("/[a-zA-Z0-9_\\-]/", $_mval) && iPHP::alert($this->name_text . '附加属性名称只能由英文字母、数字或_-组成(不支持中文)'); $md[$_mval] = $metadata['value'][$_mk]; } } else { $md = $metadata; } $metadata = addslashes(json_encode($md)); } if (empty($id)) { $hasNameId = iDB::value("SELECT `id` FROM `#iCMS@__tags` where `name` = '{$name}'"); if ($hasNameId) { if (isset($_POST['spider_update'])) { $id = $hasNameId; } else { iPHP::alert('该标签已经存在!请检查是否重复'); } } } if (empty($tkey) && $url) { $tkey = substr(md5($url), 8, 16); $hasTkey = iDB::value("SELECT `id` FROM `#iCMS@__tags` where `tkey` = '{$tkey}'"); if ($hasTkey) { if (isset($_POST['spider_check_tkey'])) { echo '该自定义链接已经存在!请检查是否重复'; return false; } else { iPHP::alert('该自定义链接已经存在!请检查是否重复'); } } } $tkey or $tkey = strtolower(pinyin($name)); iFS::$forceExt = "jpg"; iFS::checkHttp($pic) && ($pic = iFS::http($pic)); iFS::checkHttp($bpic) && ($bpic = iFS::http($bpic)); iFS::checkHttp($mpic) && ($mpic = iFS::http($mpic)); iFS::checkHttp($spic) && ($spic = iFS::http($spic)); iPHP::import(iPHP_APP_CORE . '/iMAP.class.php'); $fields = array('uid', 'rootid', 'cid', 'tcid', 'pid', 'tkey', 'name', 'seotitle', 'subtitle', 'keywords', 'description', 'metadata', 'haspic', 'pic', 'bpic', 'mpic', 'spic', 'url', 'related', 'count', 'weight', 'tpl', 'ordernum', 'pubdate', 'status'); $data = compact($fields); if (empty($id)) { $data['postime'] = $pubdate; $data['count'] = '0'; $data['comments'] = '0'; $id = iDB::insert('tags', $data); tag::cache($id, 'id'); map::init('prop', $this->appid); $pid && map::add($pid, $id); map::init('category', $this->appid); map::add($cid, $id); $tcid && map::add($tcid, $id); $msg = '标签添加完成'; } else { if (isset($_POST['spider_update'])) { // $data = array(); $hasTag = iDB::row("SELECT * FROM `#iCMS@__tags` where `id` = '{$id}'", ARRAY_A); $this->check_spider_data($data, $hasTag, 'subtitle', $subtitle); $this->check_spider_data($data, $hasTag, 'description', $description); $this->check_spider_data($data, $hasTag, 'seotitle', $seotitle); $this->check_spider_data($data, $hasTag, 'keywords', $keywords); $this->check_spider_data($data, $hasTag, 'related', $related); $hasTag['cid'] && $cid && ($data['cid'] = $cid); $_cid = $hasTag['cid']; $hasTag['tcid'] && $tcid && ($data['tcid'] = $tcid); $_tcid = $hasTag['tcid']; $hasTag['pid'] && $pid && ($data['pid'] = $pid); $_pid = $hasTag['pid']; } unset($data['count'], $data['comments']); iDB::update('tags', $data, array('id' => $id)); tag::cache($id, 'id'); map::init('prop', $this->appid); map::diff($pid, $_pid, $id); map::init('category', $this->appid); map::diff($cid, $_cid, $id); map::diff($tcid, $_tcid, $id); $msg = '标签更新完成'; } iACP::callback($id, $this); if ($this->callback['code']) { return array("code" => $this->callback['code'], 'indexid' => $id); } iPHP::success($msg, "url:" . APP_URI); }
function getpic($path) { $uri = parse_url(iCMS_FS_URL); $pic = iS::escapeStr($path); if (stripos($pic, $uri['host']) === false) { stripos($pic, 'http://') === false or $pic = iFS::http($pic); } else { $pic = iFS::fp($pic, "-http"); } return $pic; }
/** * 抓取资源 * @param [string] $html [抓取结果] * @param [array] $data [数据项] * @param [array] $rule [规则] * @param [array] $responses [已经抓取资源] * @return [array] [返回处理结果] */ public static function crawl($html, $data, $rule, $responses) { if (trim($data['rule']) === '') { return; } $name = $data['name']; if (spider::$dataTest) { print_r('<b>[' . $name . ']规则:</b>' . iS::escapeStr($data['rule'])); echo "<hr />"; } if (strpos($data['rule'], 'RULE@') !== false) { spider::$rid = str_replace('RULE@', '', $data['rule']); $_urls = trim($html); if (spider::$dataTest) { print_r('<b>使用[rid:' . spider::$rid . ']规则抓取</b>:' . $_urls); echo "<hr />"; } return spiderUrls::crawl('DATA@RULE', false, spider::$rid, $_urls); } /** * RAND@10,0 * 返回随机数 */ if (strpos($data['rule'], 'RAND@') !== false) { $random = str_replace('RAND@', '', $data['rule']); list($length, $numeric) = explode(',', $random); return random($length, empty($numeric) ? 0 : 1); } $contentArray = array(); $contentHash = array(); $_content = null; $_content = spiderContent::match($html, $data, $rule); $cmd5 = md5($_content); $contentArray[] = $_content; $contentHash[$cmd5] = true; if ($data['page']) { if (empty($rule['page_url'])) { $rule['page_url'] = $rule['list_url']; } if (empty(spider::$allHtml)) { $page_url_array = array(); $page_area_rule = trim($rule['page_area_rule']); if ($page_area_rule) { if (strpos($page_area_rule, 'DOM::') !== false) { iPHP::import(iPHP_LIB . '/phpQuery.php'); $doc = phpQuery::newDocumentHTML($html, 'UTF-8'); $pq_dom = str_replace('DOM::', '', $page_area_rule); $pq_array = phpQuery::pq($pq_dom); foreach ($pq_array as $pn => $pq_val) { $href = phpQuery::pq($pq_val)->attr('href'); if ($href) { if ($rule['page_url_rule']) { if (strpos($rule['page_url_rule'], '<%') !== false) { $page_url_rule = spiderTools::pregTag($rule['page_url_rule']); if (!preg_match('|' . $page_url_rule . '|is', $href)) { continue; } } else { $cleanhref = spiderTools::dataClean($rule['page_url_rule'], $href); if ($cleanhref) { $href = $cleanhref; unset($cleanhref); } else { continue; } } } $href = str_replace('<%url%>', $href, $rule['page_url']); $page_url_array[$pn] = spiderTools::url_complement($rule['__url__'], $href); } } phpQuery::unloadDocuments($doc->getDocumentID()); } else { $page_area_rule = spiderTools::pregTag($page_area_rule); if ($page_area_rule) { preg_match('|' . $page_area_rule . '|is', $html, $matches, $PREG_SET_ORDER); $page_area = $matches['content']; } else { $page_area = $html; } if ($rule['page_url_rule']) { $page_url_rule = spiderTools::pregTag($rule['page_url_rule']); preg_match_all('|' . $page_url_rule . '|is', $page_area, $page_url_matches, PREG_SET_ORDER); foreach ($page_url_matches as $pn => $row) { $href = str_replace('<%url%>', $row['url'], $rule['page_url']); $page_url_array[$pn] = spiderTools::url_complement($rule['__url__'], $href); gc_collect_cycles(); } } unset($page_area); } } else { // 逻辑方式 if ($rule['page_url_parse'] == '<%url%>') { $page_url = str_replace('<%url%>', $rule['__url__'], $rule['page_url']); } else { $page_url_rule = spiderTools::pregTag($rule['page_url_parse']); preg_match('|' . $page_url_rule . '|is', $rule['__url__'], $matches, $PREG_SET_ORDER); $page_url = str_replace('<%url%>', $matches['url'], $rule['page_url']); } if (stripos($page_url, '<%step%>') !== false) { for ($pn = $rule['page_no_start']; $pn <= $rule['page_no_end']; $pn = $pn + $rule['page_no_step']) { $page_url_array[$pn] = str_replace('<%step%>', $pn, $page_url); gc_collect_cycles(); } } } //URL去重清理 if ($page_url_array) { $page_url_array = array_filter($page_url_array); $page_url_array = array_unique($page_url_array); $puk = array_search($rule['__url__'], $page_url_array); if ($puk !== false) { unset($page_url_array[$puk]); } } if (spider::$dataTest) { echo "<b>内容页网址:</b>" . $rule['__url__'] . "<br />"; echo "<b>分页:</b>" . $rule['page_url'] . "<br />"; echo iS::escapeStr($page_url_rule); echo "<hr />"; } if (spider::$dataTest) { echo "<b>分页列表:</b><pre>"; print_r($page_url_array); echo "</pre><hr />"; } spider::$content_right_code = trim($rule['page_url_right']); spider::$content_error_code = trim($rule['page_url_error']); spider::$curl_proxy = $rule['proxy']; $pageurl = array(); foreach ($page_url_array as $pukey => $purl) { //usleep(100); $phtml = spiderTools::remote($purl); if (empty($phtml)) { break; } $md5 = md5($phtml); if ($pageurl[$md5]) { break; } $check_content = spiderTools::check_content_code($phtml); if ($check_content === false) { unset($check_content, $phtml); break; } $_content = spiderContent::match($phtml, $data, $rule); $cmd5 = md5($_content); if ($contentHash[$cmd5]) { break; } $contentArray[] = $_content; $contentHash[$cmd5] = true; $pageurl[$md5] = $purl; spider::$allHtml[$md5] = $phtml; } gc_collect_cycles(); unset($check_content, $phtml); if (spider::$dataTest) { echo "<b>最终分页列表:</b><pre>"; print_r($pageurl); echo "</pre><hr />"; } } else { foreach ((array) spider::$allHtml as $ahkey => $phtml) { $contentArray[] = spiderContent::match($phtml, $data, $rule); } } } $content = implode('#--iCMS.PageBreak--#', $contentArray); $html = null; unset($html, $contentArray, $contentHash, $_content); $content = stripslashes($content); if (spider::$dataTest) { print_r('<b>[' . $name . ']匹配结果:</b>' . htmlspecialchars($content)); echo "<hr />"; } if ($data['cleanbefor']) { $content = spiderTools::dataClean($data['cleanbefor'], $content); } /** * 在数据项里调用之前采集的数据[DATA@name][DATA@name.key] */ if (strpos($content, '[DATA@') !== false) { $content = spiderTools::getDATA($responses, $content); } if ($data['cleanhtml']) { $content = stripslashes($content); $content = preg_replace('/<[\\/\\!]*?[^<>]*?>/is', '', $content); } if ($data['format'] && $content) { $content = autoformat($content); } if ($data['img_absolute'] && $content) { // $content = stripslashes($content); preg_match_all("/<img.*?src\\s*=[\"|'](.*?)[\"|']/is", $content, $img_match); if ($img_match[1]) { $_img_array = array_unique($img_match[1]); $_img_urls = array(); foreach ((array) $_img_array as $_img_key => $_img_src) { $_img_urls[$_img_key] = spiderTools::url_complement($rule['__url__'], $_img_src); } $content = str_replace($_img_array, $_img_urls, $content); } unset($img_match, $_img_array, $_img_urls, $_img_src); } if ($data['trim']) { $content = trim($content); } if ($data['capture']) { // $content = stripslashes($content); $content = spiderTools::remote($content); } if ($data['download']) { // $content = stripslashes($content); $content = iFS::http($content); } if ($data['cleanafter']) { $content = spiderTools::dataClean($data['cleanafter'], $content); // $content = stripslashes($content); } if ($data['autobreakpage']) { $content = spiderTools::autoBreakPage($content); } if ($data['mergepage']) { $content = spiderTools::mergePage($content); } if ($data['empty'] && empty($content)) { $emptyMsg = '[' . $name . ']规则设置了不允许为空.当前抓取结果为空!请检查,规则是否正确!'; if (spider::$dataTest) { exit('<h1>' . $emptyMsg . '</h1>'); } if (spider::$work) { echo "\n{$emptyMsg}\n"; return false; } else { iPHP::alert($emptyMsg); } } if ($data['json_decode']) { $content = json_decode($content, true); } if ($data['array']) { return (array) $content; } if (spider::$callback['content'] && is_callable(spider::$callback['content'])) { $content = call_user_func_array(spider::$callback['content'], array($content)); } return $content; }
function do_catchimage() { $url_array = (array) $_POST['source']; /* 抓取远程图片 */ $list = array(); $uri = parse_url(iCMS_FS_URL); foreach ($url_array as $_k => $imgurl) { if (stripos($imgurl, $uri['host']) !== false) { unset($_array[$_k]); } $F = iFS::http($imgurl, 'array'); if ($F === false) { $a = iFS::$ERROR; } else { $F['path'] && ($url = iFS::fp($F['path'], '+http')); $a = array("state" => 'SUCCESS', "url" => $url, "size" => $F["size"], "title" => iS::escapeStr($info["title"]), "original" => iS::escapeStr($F["oname"]), "source" => iS::escapeStr($imgurl)); } array_push($list, $a); } /* 返回抓取数据 */ iPHP::json(array('code' => count($list) ? '1' : '0', 'state' => count($list) ? 'SUCCESS' : 'ERROR', 'list' => $list)); }
function remotepic($content, $remote = false, $aid = 0) { if (!$remote) { return $content; } iFS::$forceExt = "jpg"; $content = stripslashes($content); preg_match_all("/<img.*?src\\s*=[\"|'](.*?)[\"|']/is", $content, $match); $array = array_unique($match[1]); $uri = parse_url(iCMS_FS_URL); $fArray = array(); $fpArray = array(); foreach ($array as $key => $value) { $value = trim($value); if (stripos($value, $uri['host']) === false) { $filepath = iFS::http($value); if ($filepath) { if ($aid) { $filename = basename($filepath); $filename = substr($filename, 0, 32); $faid = articleTable::filedata_value($filename); empty($faid) && articleTable::filedata_update_indexid($aid, $filename); } $value = iFS::fp($filepath, '+http'); $fArray[$key] = $value; } } else { unset($array[$key]); } if ($remote === "autopic" && $key == 0) { return $value; } } if ($remote === "autopic" && empty($array)) { return; } if ($array && $fArray) { krsort($array); krsort($fArray); $content = str_replace($array, $fArray, $content); } return addslashes($content); }
function do_save() { $id = (int) $_POST['id']; $uid = (int) $_POST['uid']; $cid = implode(',', (array) $_POST['cid']); $tcid = implode(',', (array) $_POST['tcid']); $pid = implode(',', (array) $_POST['pid']); $_cid = iS::escapeStr($_POST['_cid']); $_tcid = iS::escapeStr($_POST['_tcid']); $_pid = iS::escapeStr($_POST['_pid']); $name = iS::escapeStr($_POST['name']); $subtitle = iS::escapeStr($_POST['subtitle']); $tkey = iS::escapeStr($_POST['tkey']); $seotitle = iS::escapeStr($_POST['seotitle']); $keywords = iS::escapeStr($_POST['keywords']); $pic = iS::escapeStr($_POST['pic']); $description = iS::escapeStr($_POST['description']); $url = iS::escapeStr($_POST['url']); $related = iS::escapeStr($_POST['related']); $tpl = iS::escapeStr($_POST['tpl']); $weight = _int($_POST['weight']); $ordernum = _int($_POST['ordernum']); $status = (int) $_POST['status']; $haspic = $pic ? '1' : '0'; $pubdate = time(); $metadata = iS::escapeStr($_POST['metadata']); $uid or $uid = iMember::$userid; $name or iPHP::alert('标签名称不能为空!'); $cid or iPHP::alert('请选择标签所属栏目!'); if ($metadata) { $md = array(); foreach ($metadata['key'] as $_mk => $_mval) { !preg_match("/[a-zA-Z0-9_\\-]/", $_mval) && iPHP::alert($this->name_text . '附加属性名称只能由英文字母、数字或_-组成(不支持中文)'); $md[$_mval] = $metadata['value'][$_mk]; } $metadata = addslashes(serialize($md)); } if (empty($id)) { iDB::value("SELECT `id` FROM `#iCMS@__tags` where `name` = '{$name}'") && iPHP::alert('该标签已经存在!请检查是否重复'); } if (empty($tkey) && $url) { $tkey = substr(md5($url), 8, 16); iDB::value("SELECT `id` FROM `#iCMS@__tags` where `tkey` = '{$tkey}'") && iPHP::alert('该自定义链接已经存在!请检查是否重复'); } $tkey or $tkey = strtolower(pinyin($name)); strstr($pic, 'http://') && ($pic = iFS::http($pic)); iPHP::import(iPHP_APP_CORE . '/iMAP.class.php'); $fields = array('uid', 'cid', 'tcid', 'pid', 'tkey', 'name', 'seotitle', 'subtitle', 'keywords', 'description', 'metadata', 'haspic', 'pic', 'url', 'related', 'count', 'weight', 'tpl', 'ordernum', 'pubdate', 'status'); $data = compact($fields); if (empty($id)) { $data['count'] = '0'; $data['comments'] = '0'; $id = iDB::insert('tags', $data); tag::cache($id, 'id'); map::init('prop', $this->appid); $pid && map::add($pid, $id); map::init('category', $this->appid); map::add($cid, $id); $tcid && map::add($tcid, $id); iPHP::success('标签添加完成', "url:" . APP_URI); } else { unset($data['count'], $data['comments']); iDB::update('tags', $data, array('id' => $id)); tag::cache($id, 'id'); map::init('prop', $this->appid); map::diff($pid, $_pid, $id); map::init('category', $this->appid); map::diff($cid, $_cid, $id); map::diff($tcid, $_tcid, $id); iPHP::success('标签更新完成', "url:" . APP_URI); } }