예제 #1
0
파일: tags.app.php 프로젝트: sunhk25/iCMS
 function do_save()
 {
     $id = (int) $_POST['id'];
     $uid = (int) $_POST['uid'];
     $rootid = (int) $_POST['rootid'];
     $cid = implode(',', (array) $_POST['cid']);
     $tcid = implode(',', (array) $_POST['tcid']);
     $pid = implode(',', (array) $_POST['pid']);
     $_cid = iS::escapeStr($_POST['_cid']);
     $_tcid = iS::escapeStr($_POST['_tcid']);
     $_pid = iS::escapeStr($_POST['_pid']);
     $name = iS::escapeStr($_POST['name']);
     $subtitle = iS::escapeStr($_POST['subtitle']);
     $tkey = iS::escapeStr($_POST['tkey']);
     $seotitle = iS::escapeStr($_POST['seotitle']);
     $keywords = iS::escapeStr($_POST['keywords']);
     $pic = iS::escapeStr($_POST['pic']);
     $bpic = iS::escapeStr($_POST['bpic']);
     $mpic = iS::escapeStr($_POST['mpic']);
     $spic = iS::escapeStr($_POST['spic']);
     $description = iS::escapeStr($_POST['description']);
     $url = iS::escapeStr($_POST['url']);
     $related = iS::escapeStr($_POST['related']);
     $tpl = iS::escapeStr($_POST['tpl']);
     $weight = _int($_POST['weight']);
     $ordernum = _int($_POST['ordernum']);
     $status = (int) $_POST['status'];
     $haspic = $pic ? '1' : '0';
     $pubdate = time();
     $metadata = $_POST['metadata'];
     $uid or $uid = iMember::$userid;
     if ($callback) {
         if (empty($name)) {
             echo '标签名称不能为空!';
             return false;
         }
     }
     $name or iPHP::alert('标签名称不能为空!');
     $cid or iPHP::alert('请选择标签所属栏目!');
     if ($metadata) {
         if ($metadata['key']) {
             $md = array();
             foreach ($metadata['key'] as $_mk => $_mval) {
                 !preg_match("/[a-zA-Z0-9_\\-]/", $_mval) && iPHP::alert($this->name_text . '附加属性名称只能由英文字母、数字或_-组成(不支持中文)');
                 $md[$_mval] = $metadata['value'][$_mk];
             }
         } else {
             $md = $metadata;
         }
         $metadata = addslashes(json_encode($md));
     }
     if (empty($id)) {
         $hasNameId = iDB::value("SELECT `id` FROM `#iCMS@__tags` where `name` = '{$name}'");
         if ($hasNameId) {
             if (isset($_POST['spider_update'])) {
                 $id = $hasNameId;
             } else {
                 iPHP::alert('该标签已经存在!请检查是否重复');
             }
         }
     }
     if (empty($tkey) && $url) {
         $tkey = substr(md5($url), 8, 16);
         $hasTkey = iDB::value("SELECT `id` FROM `#iCMS@__tags` where `tkey` = '{$tkey}'");
         if ($hasTkey) {
             if (isset($_POST['spider_check_tkey'])) {
                 echo '该自定义链接已经存在!请检查是否重复';
                 return false;
             } else {
                 iPHP::alert('该自定义链接已经存在!请检查是否重复');
             }
         }
     }
     $tkey or $tkey = strtolower(pinyin($name));
     iFS::$forceExt = "jpg";
     iFS::checkHttp($pic) && ($pic = iFS::http($pic));
     iFS::checkHttp($bpic) && ($bpic = iFS::http($bpic));
     iFS::checkHttp($mpic) && ($mpic = iFS::http($mpic));
     iFS::checkHttp($spic) && ($spic = iFS::http($spic));
     iPHP::import(iPHP_APP_CORE . '/iMAP.class.php');
     $fields = array('uid', 'rootid', 'cid', 'tcid', 'pid', 'tkey', 'name', 'seotitle', 'subtitle', 'keywords', 'description', 'metadata', 'haspic', 'pic', 'bpic', 'mpic', 'spic', 'url', 'related', 'count', 'weight', 'tpl', 'ordernum', 'pubdate', 'status');
     $data = compact($fields);
     if (empty($id)) {
         $data['postime'] = $pubdate;
         $data['count'] = '0';
         $data['comments'] = '0';
         $id = iDB::insert('tags', $data);
         tag::cache($id, 'id');
         map::init('prop', $this->appid);
         $pid && map::add($pid, $id);
         map::init('category', $this->appid);
         map::add($cid, $id);
         $tcid && map::add($tcid, $id);
         $msg = '标签添加完成';
     } else {
         if (isset($_POST['spider_update'])) {
             // $data = array();
             $hasTag = iDB::row("SELECT * FROM `#iCMS@__tags` where `id` = '{$id}'", ARRAY_A);
             $this->check_spider_data($data, $hasTag, 'subtitle', $subtitle);
             $this->check_spider_data($data, $hasTag, 'description', $description);
             $this->check_spider_data($data, $hasTag, 'seotitle', $seotitle);
             $this->check_spider_data($data, $hasTag, 'keywords', $keywords);
             $this->check_spider_data($data, $hasTag, 'related', $related);
             $hasTag['cid'] && $cid && ($data['cid'] = $cid);
             $_cid = $hasTag['cid'];
             $hasTag['tcid'] && $tcid && ($data['tcid'] = $tcid);
             $_tcid = $hasTag['tcid'];
             $hasTag['pid'] && $pid && ($data['pid'] = $pid);
             $_pid = $hasTag['pid'];
         }
         unset($data['count'], $data['comments']);
         iDB::update('tags', $data, array('id' => $id));
         tag::cache($id, 'id');
         map::init('prop', $this->appid);
         map::diff($pid, $_pid, $id);
         map::init('category', $this->appid);
         map::diff($cid, $_cid, $id);
         map::diff($tcid, $_tcid, $id);
         $msg = '标签更新完成';
     }
     iACP::callback($id, $this);
     if ($this->callback['code']) {
         return array("code" => $this->callback['code'], 'indexid' => $id);
     }
     iPHP::success($msg, "url:" . APP_URI);
 }
예제 #2
0
파일: push.app.php 프로젝트: World3D/iCMS
 function getpic($path)
 {
     $uri = parse_url(iCMS_FS_URL);
     $pic = iS::escapeStr($path);
     if (stripos($pic, $uri['host']) === false) {
         stripos($pic, 'http://') === false or $pic = iFS::http($pic);
     } else {
         $pic = iFS::fp($pic, "-http");
     }
     return $pic;
 }
예제 #3
0
 /**
  * 抓取资源
  * @param  [string] $html      [抓取结果]
  * @param  [array] $data      [数据项]
  * @param  [array] $rule      [规则]
  * @param  [array] $responses [已经抓取资源]
  * @return [array]           [返回处理结果]
  */
 public static function crawl($html, $data, $rule, $responses)
 {
     if (trim($data['rule']) === '') {
         return;
     }
     $name = $data['name'];
     if (spider::$dataTest) {
         print_r('<b>[' . $name . ']规则:</b>' . iS::escapeStr($data['rule']));
         echo "<hr />";
     }
     if (strpos($data['rule'], 'RULE@') !== false) {
         spider::$rid = str_replace('RULE@', '', $data['rule']);
         $_urls = trim($html);
         if (spider::$dataTest) {
             print_r('<b>使用[rid:' . spider::$rid . ']规则抓取</b>:' . $_urls);
             echo "<hr />";
         }
         return spiderUrls::crawl('DATA@RULE', false, spider::$rid, $_urls);
     }
     /**
      * RAND@10,0
      * 返回随机数
      */
     if (strpos($data['rule'], 'RAND@') !== false) {
         $random = str_replace('RAND@', '', $data['rule']);
         list($length, $numeric) = explode(',', $random);
         return random($length, empty($numeric) ? 0 : 1);
     }
     $contentArray = array();
     $contentHash = array();
     $_content = null;
     $_content = spiderContent::match($html, $data, $rule);
     $cmd5 = md5($_content);
     $contentArray[] = $_content;
     $contentHash[$cmd5] = true;
     if ($data['page']) {
         if (empty($rule['page_url'])) {
             $rule['page_url'] = $rule['list_url'];
         }
         if (empty(spider::$allHtml)) {
             $page_url_array = array();
             $page_area_rule = trim($rule['page_area_rule']);
             if ($page_area_rule) {
                 if (strpos($page_area_rule, 'DOM::') !== false) {
                     iPHP::import(iPHP_LIB . '/phpQuery.php');
                     $doc = phpQuery::newDocumentHTML($html, 'UTF-8');
                     $pq_dom = str_replace('DOM::', '', $page_area_rule);
                     $pq_array = phpQuery::pq($pq_dom);
                     foreach ($pq_array as $pn => $pq_val) {
                         $href = phpQuery::pq($pq_val)->attr('href');
                         if ($href) {
                             if ($rule['page_url_rule']) {
                                 if (strpos($rule['page_url_rule'], '<%') !== false) {
                                     $page_url_rule = spiderTools::pregTag($rule['page_url_rule']);
                                     if (!preg_match('|' . $page_url_rule . '|is', $href)) {
                                         continue;
                                     }
                                 } else {
                                     $cleanhref = spiderTools::dataClean($rule['page_url_rule'], $href);
                                     if ($cleanhref) {
                                         $href = $cleanhref;
                                         unset($cleanhref);
                                     } else {
                                         continue;
                                     }
                                 }
                             }
                             $href = str_replace('<%url%>', $href, $rule['page_url']);
                             $page_url_array[$pn] = spiderTools::url_complement($rule['__url__'], $href);
                         }
                     }
                     phpQuery::unloadDocuments($doc->getDocumentID());
                 } else {
                     $page_area_rule = spiderTools::pregTag($page_area_rule);
                     if ($page_area_rule) {
                         preg_match('|' . $page_area_rule . '|is', $html, $matches, $PREG_SET_ORDER);
                         $page_area = $matches['content'];
                     } else {
                         $page_area = $html;
                     }
                     if ($rule['page_url_rule']) {
                         $page_url_rule = spiderTools::pregTag($rule['page_url_rule']);
                         preg_match_all('|' . $page_url_rule . '|is', $page_area, $page_url_matches, PREG_SET_ORDER);
                         foreach ($page_url_matches as $pn => $row) {
                             $href = str_replace('<%url%>', $row['url'], $rule['page_url']);
                             $page_url_array[$pn] = spiderTools::url_complement($rule['__url__'], $href);
                             gc_collect_cycles();
                         }
                     }
                     unset($page_area);
                 }
             } else {
                 // 逻辑方式
                 if ($rule['page_url_parse'] == '<%url%>') {
                     $page_url = str_replace('<%url%>', $rule['__url__'], $rule['page_url']);
                 } else {
                     $page_url_rule = spiderTools::pregTag($rule['page_url_parse']);
                     preg_match('|' . $page_url_rule . '|is', $rule['__url__'], $matches, $PREG_SET_ORDER);
                     $page_url = str_replace('<%url%>', $matches['url'], $rule['page_url']);
                 }
                 if (stripos($page_url, '<%step%>') !== false) {
                     for ($pn = $rule['page_no_start']; $pn <= $rule['page_no_end']; $pn = $pn + $rule['page_no_step']) {
                         $page_url_array[$pn] = str_replace('<%step%>', $pn, $page_url);
                         gc_collect_cycles();
                     }
                 }
             }
             //URL去重清理
             if ($page_url_array) {
                 $page_url_array = array_filter($page_url_array);
                 $page_url_array = array_unique($page_url_array);
                 $puk = array_search($rule['__url__'], $page_url_array);
                 if ($puk !== false) {
                     unset($page_url_array[$puk]);
                 }
             }
             if (spider::$dataTest) {
                 echo "<b>内容页网址:</b>" . $rule['__url__'] . "<br />";
                 echo "<b>分页:</b>" . $rule['page_url'] . "<br />";
                 echo iS::escapeStr($page_url_rule);
                 echo "<hr />";
             }
             if (spider::$dataTest) {
                 echo "<b>分页列表:</b><pre>";
                 print_r($page_url_array);
                 echo "</pre><hr />";
             }
             spider::$content_right_code = trim($rule['page_url_right']);
             spider::$content_error_code = trim($rule['page_url_error']);
             spider::$curl_proxy = $rule['proxy'];
             $pageurl = array();
             foreach ($page_url_array as $pukey => $purl) {
                 //usleep(100);
                 $phtml = spiderTools::remote($purl);
                 if (empty($phtml)) {
                     break;
                 }
                 $md5 = md5($phtml);
                 if ($pageurl[$md5]) {
                     break;
                 }
                 $check_content = spiderTools::check_content_code($phtml);
                 if ($check_content === false) {
                     unset($check_content, $phtml);
                     break;
                 }
                 $_content = spiderContent::match($phtml, $data, $rule);
                 $cmd5 = md5($_content);
                 if ($contentHash[$cmd5]) {
                     break;
                 }
                 $contentArray[] = $_content;
                 $contentHash[$cmd5] = true;
                 $pageurl[$md5] = $purl;
                 spider::$allHtml[$md5] = $phtml;
             }
             gc_collect_cycles();
             unset($check_content, $phtml);
             if (spider::$dataTest) {
                 echo "<b>最终分页列表:</b><pre>";
                 print_r($pageurl);
                 echo "</pre><hr />";
             }
         } else {
             foreach ((array) spider::$allHtml as $ahkey => $phtml) {
                 $contentArray[] = spiderContent::match($phtml, $data, $rule);
             }
         }
     }
     $content = implode('#--iCMS.PageBreak--#', $contentArray);
     $html = null;
     unset($html, $contentArray, $contentHash, $_content);
     $content = stripslashes($content);
     if (spider::$dataTest) {
         print_r('<b>[' . $name . ']匹配结果:</b>' . htmlspecialchars($content));
         echo "<hr />";
     }
     if ($data['cleanbefor']) {
         $content = spiderTools::dataClean($data['cleanbefor'], $content);
     }
     /**
      * 在数据项里调用之前采集的数据[DATA@name][DATA@name.key]
      */
     if (strpos($content, '[DATA@') !== false) {
         $content = spiderTools::getDATA($responses, $content);
     }
     if ($data['cleanhtml']) {
         $content = stripslashes($content);
         $content = preg_replace('/<[\\/\\!]*?[^<>]*?>/is', '', $content);
     }
     if ($data['format'] && $content) {
         $content = autoformat($content);
     }
     if ($data['img_absolute'] && $content) {
         // $content = stripslashes($content);
         preg_match_all("/<img.*?src\\s*=[\"|'](.*?)[\"|']/is", $content, $img_match);
         if ($img_match[1]) {
             $_img_array = array_unique($img_match[1]);
             $_img_urls = array();
             foreach ((array) $_img_array as $_img_key => $_img_src) {
                 $_img_urls[$_img_key] = spiderTools::url_complement($rule['__url__'], $_img_src);
             }
             $content = str_replace($_img_array, $_img_urls, $content);
         }
         unset($img_match, $_img_array, $_img_urls, $_img_src);
     }
     if ($data['trim']) {
         $content = trim($content);
     }
     if ($data['capture']) {
         // $content = stripslashes($content);
         $content = spiderTools::remote($content);
     }
     if ($data['download']) {
         // $content = stripslashes($content);
         $content = iFS::http($content);
     }
     if ($data['cleanafter']) {
         $content = spiderTools::dataClean($data['cleanafter'], $content);
         // $content = stripslashes($content);
     }
     if ($data['autobreakpage']) {
         $content = spiderTools::autoBreakPage($content);
     }
     if ($data['mergepage']) {
         $content = spiderTools::mergePage($content);
     }
     if ($data['empty'] && empty($content)) {
         $emptyMsg = '[' . $name . ']规则设置了不允许为空.当前抓取结果为空!请检查,规则是否正确!';
         if (spider::$dataTest) {
             exit('<h1>' . $emptyMsg . '</h1>');
         }
         if (spider::$work) {
             echo "\n{$emptyMsg}\n";
             return false;
         } else {
             iPHP::alert($emptyMsg);
         }
     }
     if ($data['json_decode']) {
         $content = json_decode($content, true);
     }
     if ($data['array']) {
         return (array) $content;
     }
     if (spider::$callback['content'] && is_callable(spider::$callback['content'])) {
         $content = call_user_func_array(spider::$callback['content'], array($content));
     }
     return $content;
 }
예제 #4
0
파일: editor.app.php 프로젝트: World3D/iCMS
 function do_catchimage()
 {
     $url_array = (array) $_POST['source'];
     /* 抓取远程图片 */
     $list = array();
     $uri = parse_url(iCMS_FS_URL);
     foreach ($url_array as $_k => $imgurl) {
         if (stripos($imgurl, $uri['host']) !== false) {
             unset($_array[$_k]);
         }
         $F = iFS::http($imgurl, 'array');
         if ($F === false) {
             $a = iFS::$ERROR;
         } else {
             $F['path'] && ($url = iFS::fp($F['path'], '+http'));
             $a = array("state" => 'SUCCESS', "url" => $url, "size" => $F["size"], "title" => iS::escapeStr($info["title"]), "original" => iS::escapeStr($F["oname"]), "source" => iS::escapeStr($imgurl));
         }
         array_push($list, $a);
     }
     /* 返回抓取数据 */
     iPHP::json(array('code' => count($list) ? '1' : '0', 'state' => count($list) ? 'SUCCESS' : 'ERROR', 'list' => $list));
 }
예제 #5
0
 function remotepic($content, $remote = false, $aid = 0)
 {
     if (!$remote) {
         return $content;
     }
     iFS::$forceExt = "jpg";
     $content = stripslashes($content);
     preg_match_all("/<img.*?src\\s*=[\"|'](.*?)[\"|']/is", $content, $match);
     $array = array_unique($match[1]);
     $uri = parse_url(iCMS_FS_URL);
     $fArray = array();
     $fpArray = array();
     foreach ($array as $key => $value) {
         $value = trim($value);
         if (stripos($value, $uri['host']) === false) {
             $filepath = iFS::http($value);
             if ($filepath) {
                 if ($aid) {
                     $filename = basename($filepath);
                     $filename = substr($filename, 0, 32);
                     $faid = articleTable::filedata_value($filename);
                     empty($faid) && articleTable::filedata_update_indexid($aid, $filename);
                 }
                 $value = iFS::fp($filepath, '+http');
                 $fArray[$key] = $value;
             }
         } else {
             unset($array[$key]);
         }
         if ($remote === "autopic" && $key == 0) {
             return $value;
         }
     }
     if ($remote === "autopic" && empty($array)) {
         return;
     }
     if ($array && $fArray) {
         krsort($array);
         krsort($fArray);
         $content = str_replace($array, $fArray, $content);
     }
     return addslashes($content);
 }
예제 #6
0
파일: tags.app.php 프로젝트: World3D/iCMS
 function do_save()
 {
     $id = (int) $_POST['id'];
     $uid = (int) $_POST['uid'];
     $cid = implode(',', (array) $_POST['cid']);
     $tcid = implode(',', (array) $_POST['tcid']);
     $pid = implode(',', (array) $_POST['pid']);
     $_cid = iS::escapeStr($_POST['_cid']);
     $_tcid = iS::escapeStr($_POST['_tcid']);
     $_pid = iS::escapeStr($_POST['_pid']);
     $name = iS::escapeStr($_POST['name']);
     $subtitle = iS::escapeStr($_POST['subtitle']);
     $tkey = iS::escapeStr($_POST['tkey']);
     $seotitle = iS::escapeStr($_POST['seotitle']);
     $keywords = iS::escapeStr($_POST['keywords']);
     $pic = iS::escapeStr($_POST['pic']);
     $description = iS::escapeStr($_POST['description']);
     $url = iS::escapeStr($_POST['url']);
     $related = iS::escapeStr($_POST['related']);
     $tpl = iS::escapeStr($_POST['tpl']);
     $weight = _int($_POST['weight']);
     $ordernum = _int($_POST['ordernum']);
     $status = (int) $_POST['status'];
     $haspic = $pic ? '1' : '0';
     $pubdate = time();
     $metadata = iS::escapeStr($_POST['metadata']);
     $uid or $uid = iMember::$userid;
     $name or iPHP::alert('标签名称不能为空!');
     $cid or iPHP::alert('请选择标签所属栏目!');
     if ($metadata) {
         $md = array();
         foreach ($metadata['key'] as $_mk => $_mval) {
             !preg_match("/[a-zA-Z0-9_\\-]/", $_mval) && iPHP::alert($this->name_text . '附加属性名称只能由英文字母、数字或_-组成(不支持中文)');
             $md[$_mval] = $metadata['value'][$_mk];
         }
         $metadata = addslashes(serialize($md));
     }
     if (empty($id)) {
         iDB::value("SELECT `id` FROM `#iCMS@__tags` where `name` = '{$name}'") && iPHP::alert('该标签已经存在!请检查是否重复');
     }
     if (empty($tkey) && $url) {
         $tkey = substr(md5($url), 8, 16);
         iDB::value("SELECT `id` FROM `#iCMS@__tags` where `tkey` = '{$tkey}'") && iPHP::alert('该自定义链接已经存在!请检查是否重复');
     }
     $tkey or $tkey = strtolower(pinyin($name));
     strstr($pic, 'http://') && ($pic = iFS::http($pic));
     iPHP::import(iPHP_APP_CORE . '/iMAP.class.php');
     $fields = array('uid', 'cid', 'tcid', 'pid', 'tkey', 'name', 'seotitle', 'subtitle', 'keywords', 'description', 'metadata', 'haspic', 'pic', 'url', 'related', 'count', 'weight', 'tpl', 'ordernum', 'pubdate', 'status');
     $data = compact($fields);
     if (empty($id)) {
         $data['count'] = '0';
         $data['comments'] = '0';
         $id = iDB::insert('tags', $data);
         tag::cache($id, 'id');
         map::init('prop', $this->appid);
         $pid && map::add($pid, $id);
         map::init('category', $this->appid);
         map::add($cid, $id);
         $tcid && map::add($tcid, $id);
         iPHP::success('标签添加完成', "url:" . APP_URI);
     } else {
         unset($data['count'], $data['comments']);
         iDB::update('tags', $data, array('id' => $id));
         tag::cache($id, 'id');
         map::init('prop', $this->appid);
         map::diff($pid, $_pid, $id);
         map::init('category', $this->appid);
         map::diff($cid, $_cid, $id);
         map::diff($tcid, $_tcid, $id);
         iPHP::success('标签更新完成', "url:" . APP_URI);
     }
 }