$SqlStr .= '\'' . $cateid . '\',';
//起始标记
$SqlStr .= '`flagstart`=';
$SqlStr .= '\'' . $flagstart . '\',';
//结束标记
$SqlStr .= '`flagend`=';
$SqlStr .= '\'' . $flagend . '\',';
//内容开始标记
$SqlStr .= '`flagcontentstart`=';
$SqlStr .= '\'' . $flagcontentstart . '\',';
//内容结束标记
$SqlStr .= '`flagcontentend`=';
$SqlStr .= '\'' . $flagcontentend . '\',';
//广告开始标记Array
$SqlStr .= '`flagadstart`=';
$SqlStr .= '\'' . EnCodeStr($flagadstartarray) . '\',';
//广告结束标记Array
$SqlStr .= '`flagadend`=';
$SqlStr .= '\'' . EnCodeStr($flagadendarray) . '\',';
//单项替换
$SqlStr .= '`flagsingle`=';
$SqlStr .= '\'' . EnCodeStr($flagsinglearray) . '\',';
//更新日期
$SqlStr .= '`posttime`=';
$SqlStr .= '\'' . date("Y-m-d", time()) . '\'';
$SqlStr .= ' WHERE `id`=' . $id;
query($SqlStr);
$refresh_msg = '采集规则[<font color="#FF0000">' . $title . '</font>],修改添加成功,返回修改页面。';
$refresh_url = 'source_edit.php?id=' . $id;
require $page_name . '.php';
require '../include/debug.inc.php';
Exemple #2
0
/**
 * 远程抓取图片,保存到本地服务器
 * @param  $content  需要转换的内容
 * @return 返回图片替换后的数据
 */
function getContent($Content)
{
    $Content = stripslashes($Content);
    //  echo $Content;
    //获取图片路径
    //  preg_match_all( " <img[^>]*src=[\"|']?(^>+)[\"|']?[^>]*>", $Content, $temp );
    //  preg_match_all( "/src=(\"|')(.*?)(\"|')/i", DeCodeStr($Content), $temp );
    preg_match_all("/src=(\"|')(.*?)(\"|')/i", $Content, $temp);
    $imageList = $temp[2];
    //  echo '<hr>'. print_r($imageList) . '<hr>';
    //*/
    $ImagePath = date("ym", time()) . '/' . date("d", time());
    createFolder(IMAGEPATH, $ImagePath);
    //网页上面的路径
    $ImageUrl = IMAGEURL . $ImagePath;
    for ($i = 0; $i < count($imageList); $i++) {
        $fName = saveFile($imageList[$i], $ImagePath, $ImageUrl);
        if (!empty($fName)) {
            $filename[$i] = $fName;
        }
    }
    for ($i = 0; $i < count($imageList); $i++) {
        $Content = str_replace($imageList[$i], $ImageUrl . $filename[$i], $Content);
    }
    /*
       echo '<hr>';
       echo $Content;
       echo '<hr>';
       exit();
       //*/
    /*
    //去掉无用的页面脚本
    //去掉js
    $cp = preg_replace( "@\<script(.*?)\</script\>@is", "", $cp );
    
    //去掉HTML
    //去Table
    $cp = preg_replace( "@\<table(.*?)\</table\>@is", "", $cp );
    //去Tr
    $cp = preg_replace( "@\<tr(.*?)\</tr\>@is", "", $cp );
    //去Td
    $cp = preg_replace( "@\<td(.*?)\</td\>@is", "", $cp );
    //去div
    $cp = preg_replace( "@\<div(.*?)\</div\>@is", "", $cp );
    
    //去iframe
    $cp = preg_replace( "@\<iframe(.*?)\</iframe\>@is", "", $cp );
    
    //去掉css
    //$cp = preg_replace( "@\<style(.*?)\</style\>@is", "", $cp );
    */
    //去掉超连接
    $Content = preg_replace(EnCodeStr("@\\<a(.*?)\\>@is"), "", $Content);
    //去<!-- -->
    $Content = preg_replace(EnCodeStr("@\\<!--(.*?)\\--\\>@is"), "", $Content);
    return $Content;
}
/**
* 得到一个字符串中的某一部分
* @param  $Url 需要抓取的地址
* @param  $ArticleId  入库文章编号
* @param  $ContentStartFlag	文章内容开始、结束
* @param  $ContentEndFlag
* @param	$FlagAdStart				过滤广告开始、结束标志
* @param	$FlagAdEnd
* @param	$FlagSingle					单项过滤
* @param	$ImagePath					图片保存地址
* @param	$ImageUrl						图片显示地址
*/
function getContent($Url, $id, $ContentStartFlag, $ContentEndFlag, $FlagAdStart, $FlagAdEnd, $FlagSingle, $ImagePath, $ImageUrl, $utf8)
{
    //获取源数据
    $Content = file_get_contents($Url);
    //echo $Content;
    $Content = EnCodeStr($Content);
    //echo $Content;
    /*
    	Zerolone Add 07-04-28
    	如果是UTF-8编码的则
    	//*/
    if ($utf8) {
        $Content = mb_convert_encoding($Content, "GB2312", "UTF-8");
    }
    //切割数据
    $Content = CutStr($Content, $ContentStartFlag, $ContentEndFlag);
    //echo $Content;
    //切割广告
    //*//
    $FlagAdStartArray = explode(",", $FlagAdStart);
    $FlagAdEndArray = explode(",", $FlagAdEnd);
    for ($i = 0; $i < count($FlagAdStartArray); $i++) {
        $Content = CutStr($Content, $FlagAdStartArray[$i], $FlagAdEndArray[$i], 1);
    }
    //*/
    //echo $Content;
    //单项替换
    $FlagSingleArray = explode(",", $FlagSingle);
    for ($i = 0; $i < count($FlagSingleArray); $i++) {
        $Content = str_replace($FlagSingleArray[$i], '', $Content);
    }
    echo $Content;
    //获取图片路径
    preg_match_all("/src=(\"|')(.*?)(\"|')/i", DeCodeStr($Content), $temp);
    $imageList = $temp[2];
    //echo $imageList;
    //建立文件夹
    if (!is_dir($ImagePath)) {
        mkdir($ImagePath);
    }
    $ImagePath .= '/' . date("m", time());
    if (!is_dir($ImagePath)) {
        mkdir($ImagePath);
    }
    $ImagePath .= '/' . date("d", time()) . '/';
    if (!is_dir($ImagePath)) {
        mkdir($ImagePath);
    }
    //网页上面的路径
    $ImageUrl .= date("m", time()) . '/' . date("d", time()) . '/';
    for ($i = 0; $i < count($imageList); $i++) {
        $fName = saveFile($imageList[$i], $ImagePath, $ImageUrl);
        if (!empty($fName)) {
            $filename[$i] = $fName;
        }
    }
    for ($i = 0; $i < count($imageList); $i++) {
        $Content = str_replace($imageList[$i], $ImageUrl . $filename[$i], $Content);
    }
    /*
    	echo '<hr>';
    	echo decode($TheContent);
    	echo '<hr>';
    	exit();
    	//*/
    /*
    //去掉无用的页面脚本
    //去掉js	
    $cp = preg_replace( "@\<script(.*?)\</script\>@is", "", $cp );
    
    //去掉HTML
    //去Table
    $cp = preg_replace( "@\<table(.*?)\</table\>@is", "", $cp );
    //去Tr
    $cp = preg_replace( "@\<tr(.*?)\</tr\>@is", "", $cp );
    //去Td
    $cp = preg_replace( "@\<td(.*?)\</td\>@is", "", $cp );
    //去div
    $cp = preg_replace( "@\<div(.*?)\</div\>@is", "", $cp );
    
    //去iframe
    $cp = preg_replace( "@\<iframe(.*?)\</iframe\>@is", "", $cp );
    
    //去掉css
    //$cp = preg_replace( "@\<style(.*?)\</style\>@is", "", $cp );
    */
    //去掉超连接
    $Content = preg_replace(EnCodeStr("@\\<a(.*?)\\>@is"), "", $Content);
    //去<!-- -->
    $Content = preg_replace(EnCodeStr("@\\<!--(.*?)\\--\\>@is"), "", $Content);
    //页面内容入库
    $SqlStr = 'UPDATE `' . table_pre . 'article` SET ';
    //文章内容
    $SqlStr .= '`flag`=1,';
    //文章内容
    $SqlStr .= '`content`=';
    $SqlStr .= '\'' . $Content . '\'';
    $SqlStr .= ' WHERE `id`=' . $id;
    //	echo $SqlStr;
    query($SqlStr);
    return true;
}