コード例 #1
0
/**
 * 通过关键字获取相关文章id
 * @params int $docId 	根据用户浏览的文章ID
 * @params int $num		需要返回的文章id的数量,最多是36(此参数暂时不用)
 * return 返回以带有文章数据和论坛数据的数组,  array('article'=>array(5315078=>43,...),'bbs'=>array())
 */
function get_arti_by_word($docIdArr, $num = 12)
{
    if (!$docIdArr) {
        return false;
    }
    global $db_guess, $randNum, $needNum, $propertyArr;
    /* $classArr = get_class_id_by_docid(array(
    		'articleIdArr'=>$docIdArr,
    	));
    	$classStr = implode(',',array_unique($classArr));
    	$classNum = count($classArr);
    	if($classNum>1){
    		$sqlClassStr = ' class_id in('.$classStr.') ';
    	}else if($classNum==1){
    		$sqlClassStr = ' class_id = '.$classStr.' ';
    	}else{
    		$sqlClassStr = '';
    	} */
    $wordArr = get_word_results($docIdArr);
    $wordStr = '"' . implode('","', $wordArr) . '"';
    $sqlWordStr = $wordArr ? ' AND t.word in(' . $wordStr . ') ' : '';
    #var_dump($classArr,$classStr);exit('#667-1#');
    if (is_array($docIdArr)) {
        $docIdStr = implode(',', $docIdArr);
        $docIdStr = ' article_id in (' . $docIdStr . ') ';
    } else {
        $docIdStr = ' article_id=' . $docIdStr . ' ';
    }
    # 查询的字段
    $fields1 = ' t.article_id,t.title,t.uv,count(article_id) as cnt,t.bbsUrl,t.flag ';
    $fields2 = ' t.article_id,t.title,t.uv,count(t.bbsUrl) as cnt,t.bbsUrl,t.flag ';
    $wheres1 = $sqlWordStr . ' AND t.bbsUrl="" ';
    $wheres2 = $sqlWordStr . ' AND t.bbsUrl<>"" AND page_type_id<>4 ';
    $order1 = $order2 = ' ORDER BY uv desc ';
    # 将小结果集放前边(小结果集驱动大结果集)
    # $wordStr = '"京东","苹果","手机","笔记本"';
    $sql = '(SELECT ' . $fields1 . ' from tongji_article_title_words t where 1 ' . $wheres1 . ' GROUP BY t.article_id,t.flag ' . $order1 . ' limit 120)
			UNION 
			(SELECT ' . $fields1 . ' from tongji_article_title_words t where 1 ' . $wheres2 . ' ' . $order2 . ' limit 2)';
    /**
     * @desc START 杨叔说搞一个缓存 add by 任新强 2015-12-30 11:20:21
     */
    if (!$sqlWordStr) {
        return array();
        $mongokey = 'zol:cms:guess:you:like:union:sql:mongo:nb:key';
        $mongoDate = ZOL_Api::run("Kv.MongoCenter.get", array('module' => 'cms', 'key' => $mongokey));
        if (!$mongoDate) {
            $resArr1 = $db_guess->get_results($sql);
            ZOL_Api::run("Kv.MongoCenter.set", array('module' => 'cms', 'key' => $mongokey, 'data' => $resArr1, 'life' => 1800));
        } else {
            $resArr1 = $mongoDate;
        }
    } else {
        $resArr1 = $db_guess->get_results($sql);
    }
    /**
     * @desc END
     */
    # 统计词频 + 分词权重
    $tmpArr1 = $tmpArr2 = $bbsData = $articleData = array();
    $resArr2 = $resArr1;
    $bbsDataEnough = false;
    # 方案1_1
    if ($resArr1) {
        foreach ($resArr1 as $k => $v) {
            # 只需要取1条论坛数据
            if ($v['bbsUrl'] && !$bbsDataEnough) {
                $bbsData[] = $v;
                if (count($bbsData) > 1) {
                    $bbsDataEnough = true;
                }
            }
            if ($v['article_id'] == 1) {
                continue;
            }
            if (!array_key_exists($v['article_id'], $tmpArr1)) {
                $tmpArr1[$v['article_id']]['word_power_val'] = $propertyArr[$v['flag']] * $v['cnt'];
            } else {
                $tmpArr1[$v['article_id']]['num']++;
                $tmpArr1[$v['article_id']]['word_power_val'] += $propertyArr[$v['flag']] * $v['cnt'];
            }
            $tmpArr1[$v['article_id']]['uv'] = $v['uv'];
            $tmpArr1[$v['article_id']]['article_id'] = $v['article_id'];
        }
        # 排除用于查找相关文章的文章id
        foreach ($docIdArr as $k => $v) {
            if (isset($tmpArr1[$v])) {
                unset($tmpArr1[$v]);
            }
        }
        # 对数据按照“分词权重”进行倒序
        $tmpArr1 = multi_array_sort($tmpArr1, 'word_power_val', SORT_DESC);
        $i = 1;
        # 每种相似度一个数组,存储“相似度相同”的数据
        foreach ($tmpArr1 as $k => $v) {
            $newKey = $v['word_power_val'] * 10000;
            $tmpArr2[$newKey][] = $v;
        }
        $tmpArr1 = array();
        # 相同相似度的数据按照uv倒序
        foreach ($tmpArr2 as $k => $v) {
            $tmpArr2[$k] = multi_array_sort($v, 'uv', SORT_DESC);
            $tmpArr1 = array_merge($tmpArr1, $tmpArr2[$k]);
        }
        $tmpArr1 = array_slice($tmpArr1, 0, 12, true);
        $tmpArr2 = array();
        foreach ($tmpArr1 as $k => $v) {
            $tmpArr2[$v['article_id']] = $v;
        }
        $articleData = $tmpArr2;
    }
    #var_dump($articleData,$bbsData);
    #echo $sql; exit('759-5');
    # 文章属性,优先展示第一类属性
    #$propertyArr1 = array('nproduct','nmanu','nsubcat','eng','nproperty','ntype','nbooktitle');
    #$propertyArr2 = array('n','nr','nz');
    if ($resArr1 && is_array($resArr1)) {
        # 数量是否足够
        $num = count($articleData);
        $newArr2 = $articleData;
        if ($num >= $needNum) {
            return array('article' => $newArr2, 'bbs' => $bbsData);
            //return get_from_rand($resArr3);
        } else {
            //exit('821_1');
            return array('article' => $newArr2, 'bbs' => $bbsData);
        }
    } else {
        //mail('*****@*****.**','【ZOL首页自"猜你喜欢"查出的数据不是数组】',"get_arti_by_word\r\n".'查出的数据不是数组'.$sql);
        return array();
    }
}
コード例 #2
0
ファイル: get_rel_doc.php プロジェクト: suhanyujie/myFavorite
/**
 * 通过关键字获取相关文章id
 * 文章页的要排除:370(Z超值)
 * @params int $docId 	根据用户浏览的文章ID
 * @params int $num		需要返回的文章id的数量,最多是36(此参数暂时不用)
 */
function get_arti_by_word($docIdArr, $num2 = 36)
{
    global $randNum, $classId, $db_doc_read, $propertyArr;
    if (!$docIdArr) {
        return array();
    }
    $wordArr = get_word_results($docIdArr);
    $wordStr = '"' . implode('","', $wordArr) . '"';
    $sqlWordStr = $wordArr ? ' AND t.word in(' . $wordStr . ') ' : '';
    # 查询的字段
    $fields1 = ' t.article_id,t.title,t.uv,count(t.article_id) as cnt,t.bbsUrl,t.flag ';
    # 将小结果集放前边(小结果集驱动大结果集)
    $wheres1 = $sqlWordStr . ' AND t.bbsUrl="" ';
    if (!$sqlWordStr) {
        return array();
    }
    $wheres2 = $sqlWordStr . ' AND t.bbsUrl<>"" AND page_type_id<>4 ';
    $order1 = $order2 = ' ORDER BY uv desc ';
    $sql = '(SELECT ' . $fields1 . ' from tongji_article_title_words t where 1 ' . $wheres1 . ' GROUP BY t.article_id,t.flag ' . $order1 . ' limit 120)
			';
    #echo $sql;exit();
    $resArr1 = $db_doc_read->get_results($sql);
    //201601062133 suhy
    /**
     * @desc START 杨叔说搞一个缓存 add by 任新强 2015-12-29 20:37:21 
     */
    // 	if(!$sqlWordStr) {
    //     	$mongokey = 'zol:cms:keyword:relevance:get:docid:by:ry';
    //     	$mongoDate = ZOL_Api::run("Kv.MongoCenter.get" , array(
    //         	'module'         => 'cms',           #业务名
    //         	'key'            => $mongokey,   #key
    //         ));
    //     	if(!$mongoDate){
    //     	    $resArr1 = $db_doc_read->get_results($sql);
    //     	    ZOL_Api::run("Kv.MongoCenter.set" , array(
    //     	        'module'         => 'cms',           #业务名
    //     	        'key'            => $mongokey,   	 #key
    //     	        'data'           => $resArr1,       #数据
    //     	        'life'           => 60*60*2,        #生命期
    //     	    ));
    //     	} else {
    //     	    $resArr1 = $mongoDate;
    //     	}
    // 	}
    /**
     * @desc END
     */
    # 统计词频 + 分词权重
    $tmpArr1 = $tmpArr2 = $bbsData = $articleData = array();
    $resArr2 = $resArr1;
    $bbsDataEnough = false;
    # 方案1_1
    if ($resArr1) {
        foreach ($resArr1 as $k => $v) {
            # 只需要取1条论坛数据
            if ($v['bbsUrl'] && !$bbsDataEnough) {
                $bbsData[] = $v;
                if (count($bbsData) > 1) {
                    $bbsDataEnough = true;
                }
            }
            if ($v['article_id'] == 1) {
                continue;
            }
            if (!array_key_exists($v['article_id'], $tmpArr1)) {
                $tmpArr1[$v['article_id']]['word_power_val'] = $propertyArr[$v['flag']] * $v['cnt'];
            } else {
                $tmpArr1[$v['article_id']]['num']++;
                $tmpArr1[$v['article_id']]['word_power_val'] += $propertyArr[$v['flag']] * $v['cnt'];
            }
            $tmpArr1[$v['article_id']]['uv'] = $v['uv'];
            $tmpArr1[$v['article_id']]['article_id'] = $v['article_id'];
        }
        # 排除用于查找相关文章的文章id
        foreach ($docIdArr as $k => $v) {
            if (isset($tmpArr1[$v])) {
                unset($tmpArr1[$v]);
            }
        }
        # 对数据按照“分词权重”进行倒序
        $tmpArr1 = multi_array_sort($tmpArr1, 'word_power_val', SORT_DESC);
        $i = 1;
        # 每种相似度一个数组,存储“相似度相同”的数据
        foreach ($tmpArr1 as $k => $v) {
            $newKey = $v['word_power_val'] * 10000;
            $tmpArr2[$newKey][] = $v;
        }
        $tmpArr1 = array();
        # 相同相似度的数据按照uv倒序
        foreach ($tmpArr2 as $k => $v) {
            $tmpArr2[$k] = multi_array_sort($v, 'uv', SORT_DESC);
            $tmpArr1 = array_merge($tmpArr1, $tmpArr2[$k]);
        }
        $tmpArr1 = array_slice($tmpArr1, 0, 12, true);
        $tmpArr2 = array();
        foreach ($tmpArr1 as $k => $v) {
            $tmpArr2[$v['article_id']] = $v;
        }
        $articleData = $tmpArr2;
    }
    #var_dump($articleData);exit('#868-1#');
    if ($resArr1 && is_array($resArr1)) {
        # 数量是否足够
        $num = count($articleData);
        $newArr2 = $articleData;
        if ($num >= $needNum) {
            return array('article' => $newArr2, 'bbs' => $bbsData);
            //return get_from_rand($resArr3);
        } else {
            //exit('821_1');
            return array('article' => $newArr2, 'bbs' => $bbsData);
        }
    } else {
        //		mail('*****@*****.**','【ZOL首页自"猜你喜欢"查出的数据不是数组】',"get_arti_by_word\r\n".'查出的数据不是数组'.$sql);
        return array();
    }
}