示例#1
0
 /**
  * 关键词分词
  *
  * @Author tianyunzi
  * @DateTime 2015-10-16T10:38:46+0800
  *
  * @param [type] $keyword
  *            [description]
  * @return array [description]
  */
 public function splitKeywordHelper($keyword)
 {
     if (empty($keyword)) {
         return array();
     }
     $url = $this->di['config']->base->split . "/wd/" . urlencode($keyword);
     $num = 3;
     while ($num) {
         $res = Tools::curl($url, '', '', true, 5);
         if ($res[0] == 200) {
             $res[1] = str_replace("+", "", $res[1]);
             $keywords = explode(" ", $res[1]);
             $keywords = array_filter($keywords);
             return $keywords;
         }
         $num--;
     }
     return array($keyword);
 }
示例#2
0
 /**
  * 功能描述 通过sphinxQL方式查询
  * @author 吕小虎
  * @datetime ${DATE} ${TIME}
  * @version
  * @param
  * @return
  */
 public function getDataFromSpinx($data)
 {
     $this->data = $data;
     //分词
     $this->data['split'] = \Xz\Func\Common\Tools::curlGetContentMs($this->di['config']->base->split . '/wd/' . urlencode($this->data['wd']), 50);
     if (empty($this->data['split'])) {
         $this->data['split'] = $data['wd'];
     }
     $sphinxConfig = $this->di["config"]["combusinessearchsphinx"];
     $conn = new Connection();
     $indexTable = $sphinxConfig->table;
     $conn->setParams(array('host' => $sphinxConfig->host, 'port' => $sphinxConfig->port));
     $fieldArr = array('id');
     $query = SphinxQL::create($conn)->select($fieldArr)->from($indexTable);
     //企业名称和企业法人搜索  企业注册号搜索
     if (!empty($this->data['wd'])) {
         //处理搜索词
         $keyArr = explode(' ', $this->data['split']);
         $keyArr = array_filter($keyArr);
         $keyStr = '';
         if (is_array($keyArr) && !empty($keyArr)) {
             foreach ($keyArr as $value) {
                 $keyStr .= '"' . $value . '" ';
             }
         }
         if (is_numeric($this->data['wd']) && mb_strlen($this->data['wd'], 'UTF-8') == 15) {
             //注册号全匹配搜索
             $query->where('regno', '=', $this->data['wd']);
         } else {
             //企业名称和法人搜索
             $query->match(array('comname', 'legal'), $keyStr, true);
         }
     }
     //一级分类筛选
     if (!empty($this->data['cate1id']) && intval($this->data['cate1id']) > 0) {
         $query->where('cate1', '=', intval($this->data['cate1id']));
     }
     //二级分类筛选
     if (!empty($this->data['cate2id']) && intval($this->data['cate2id']) > 0) {
         $query->where('cate2', '=', intval($this->data['cate2id']));
     }
     //地区筛选
     if (!empty($this->data['areaid']) && intval($this->data['areaid']) > 0) {
         if ($this->data['areaid'] % 10000 == 0) {
             $start = intval($this->data['areaid'] / 10000) * 10000;
             $end = $start + 9999;
             $query->where('areaid', 'BETWEEN', array($start, $end));
         } elseif ($this->data['areaid'] % 100 == 0) {
             $start = intval($this->data['areaid'] / 100) * 100;
             $end = $start + 99;
             $query->where('areaid', 'BETWEEN', array($start, $end));
         } else {
             $query->where('areaid', '=', intval($this->data['areaid']));
         }
     }
     //成立时间筛选
     if (isset($this->data['foundstart']) && $this->data['foundstart'] > 0) {
         $query->where('startdate', '>=', intval($this->data['foundstart']));
         if (isset($this->data['foundend']) && $this->data['foundend'] > 0 && $this->data['foundend'] > $this->data['foundstart']) {
             $query->where('startdate', '<=', intval($this->data['foundend']));
         }
     }
     //查询条数
     if (isset($data['max'])) {
         $query->option('max_matches', $data['max']);
     }
     if (isset($this->data['offset']) && isset($this->data['limit'])) {
         $query->limit($this->data['offset'], $this->data['limit']);
     }
     //分类
     $facet = Facet::create($conn)->facet('cate1');
     $query->facet($facet);
     $facet = Facet::create($conn)->facet('cate2');
     $query->facet($facet);
     //地区
     $facet = Facet::create($conn)->facet('areaid');
     $query->facet($facet);
     $result = array('data' => array(), 'cate1' => array(), 'cate2' => array(), 'areaid' => array());
     $batchResult = $query->executeBatch();
     if (is_array($batchResult) && count($batchResult) > 0) {
         $result['data'] = $batchResult[0];
         $result['cate1'] = isset($batchResult[1]) ? $batchResult[1] : array();
         $result['cate2'] = isset($batchResult[2]) ? $batchResult[2] : array();
         $result['areaid'] = isset($batchResult[3]) ? $batchResult[3] : array();
     }
     if (!empty($result)) {
         if (!empty($result['cate1'])) {
             $result['cate1'] = array_combine(array_column($result['cate1'], 'cate1'), array_column($result['cate1'], 'count(*)'));
         }
         if (!empty($result['cate2'])) {
             $result['cate2'] = array_combine(array_column($result['cate2'], 'cate2'), array_column($result['cate2'], 'count(*)'));
         }
         if (!empty($result['areaid'])) {
             $result['areaid'] = array_combine(array_column($result['areaid'], 'areaid'), array_column($result['areaid'], 'count(*)'));
         }
     }
     $cidArr = array();
     if (!empty($result['data'])) {
         foreach ($result['data'] as $key => $value) {
             $cidArr[] = $value['id'];
         }
     }
     //根据id获取缓存
     if (!empty($cidArr)) {
         $combusObj = new CacheCombusiness();
         $fieldArr = array('cid', 'comname', 'regno', 'province', 'address', 'legal', 'gccid', 'RegistCapi', 'businessstart', 'startdate');
         $result['data'] = $combusObj->getMore($cidArr, $fieldArr);
     } else {
         $result['data'] = array();
     }
     //重新实例化以获取查询到的总数
     $sphinxql = new SphinxQL($conn);
     $total = $sphinxql->query('show meta');
     $total = $this->formatSphQLArray($total->execute());
     $result['total_found'] = $total['total_found'];
     $result['time'] = $total['time'];
     $result['split'] = $this->data['split'];
     return $result;
 }
 /**
  * 功能描述
  * @author 吕小虎
  * @datetime ${DATE} ${TIME}
  * @version
  * @param
  * @return
  */
 public function getDataFilterFromSpinx($data)
 {
     return array('cate1' => array(), 'cate2' => array(), 'areaid' => array());
     $this->data = $data;
     //分词
     $this->data['split'] = \Xz\Func\Common\Tools::curlGetContentMs($this->di['config']->base->split . '/wd/' . urlencode($this->data['wd']), 50);
     if (empty($this->data['split'])) {
         $this->data['split'] = $data['wd'];
     }
     $sphinxConfig = $this->di["config"]["combusinessearchsphinx"];
     $conn = new Connection();
     $indexTable = $sphinxConfig->table;
     $conn->setParams(array('host' => $sphinxConfig->host, 'port' => $sphinxConfig->port));
     $fieldArr = isset($data['field']) && !empty($data['field']) ? $data['field'] : array('id');
     $query = SphinxQL::create($conn)->select($fieldArr)->from($indexTable);
     //企业名称和法人搜索
     $query->match(array('comname'), $this->data['split']);
     //一级分类筛选
     if (!empty($this->data['cate1id']) && intval($this->data['cate1id']) > 0) {
         //            $query->match(array('cate1'), $this->data['cate1id']);
         $query->where('cate1', '=', intval($this->data['cate1id']));
     }
     //二级分类筛选
     if (!empty($this->data['cate2id']) && intval($this->data['cate2id']) > 0) {
         //            $query->match(array('cate2'), $this->data['cate2id']);
         $query->where('cate2', '=', intval($this->data['cate2id']));
     }
     //地区筛选
     if (!empty($this->data['areaid']) && intval($this->data['areaid']) > 0) {
         if ($this->data['areaid'] % 10000 == 0) {
             $start = intval($this->data['areaid'] / 10000) * 10000;
             $end = $start + 9999;
             $query->where('areaid', 'BETWEEN', array($start, $end));
         } elseif ($this->data['areaid'] % 100 == 0) {
             $start = intval($this->data['areaid'] / 100) * 100;
             $end = $start + 99;
             $query->where('areaid', 'BETWEEN', array($start, $end));
         } else {
             $query->where('areaid', '=', intval($this->data['areaid']));
         }
     }
     //分类
     $facet = Facet::create($conn)->facet('cate1');
     $query->facet($facet);
     $facet = Facet::create($conn)->facet('cate2');
     $query->facet($facet);
     //地区
     $facet = Facet::create($conn)->facet('areaid');
     $query->facet($facet);
     $result = array();
     //            echo $query->compileSelect()->getCompiled();die();
     $batchResult = $query->executeBatch();
     if (is_array($batchResult) && count($batchResult) > 0) {
         //            $result['data'] = $batchResult[0];
         $result['cate1'] = isset($batchResult[1]) ? $batchResult[1] : array();
         $result['cate2'] = isset($batchResult[2]) ? $batchResult[2] : array();
         $result['areaid'] = isset($batchResult[3]) ? $batchResult[3] : array();
     }
     if (!empty($result)) {
         if (!empty($result['cate1'])) {
             $result['cate1'] = array_combine(array_column($result['cate1'], 'cate1'), array_column($result['cate1'], 'count(*)'));
         }
         if (!empty($result['cate2'])) {
             $result['cate2'] = array_combine(array_column($result['cate2'], 'cate2'), array_column($result['cate2'], 'count(*)'));
         }
         if (!empty($result['areaid'])) {
             $result['areaid'] = array_combine(array_column($result['areaid'], 'areaid'), array_column($result['areaid'], 'count(*)'));
         }
     }
     return $result;
 }
示例#4
0
 /**
  * 从sphinx获取数据
  * @author 刘建辉
  * @datetime 2015-08-19T18:05:19+0800
  * @return   [type]                   [description]
  */
 public function getDataFromSpinx($data)
 {
     $this->data = $data;
     if (isset($this->data['redword']) && !empty($this->data['redword'])) {
         $this->data['split'] = $this->data['redword'];
     } else {
         //分词
         $this->data['split'] = $this->data['wd'];
         $splitWd = \Xz\Func\Common\Tools::curlGetContentMs($this->di['config']->base->split . '/wd/' . urlencode($this->data['wd']), 50);
         if ($splitWd) {
             $this->data['split'] = $splitWd;
         }
     }
     $sphinxConfig = $this->di["config"]["prosearchsphinx"];
     $conn = new Connection();
     //$indexTable = "product_distri";
     $indexTable = "product_m_distri";
     //TODO 索引范围
     if (!isset($this->data['cateid'])) {
         if (isset($this->data['cate3'])) {
             $this->data['cateid'] = $this->data['cate3'];
         }
     }
     //逻辑判断必须加>0
     if (isset($this->data['cateid']) && $this->data['cateid'] > 0) {
         $cateinfo = \Xz\Lib\Cate::getCateInfo(array($this->data['cateid']), array('nav'));
         if (!empty($cateinfo)) {
             $this->data['cate1'] = $cateinfo[$this->data['cateid']]['nav'][0]['cateid'];
             $indexTable = "product_distri" . '_' . $this->data['cate1'];
         }
         //$indexTable = "product_distri";
     }
     $conn->setParams(array('host' => $sphinxConfig->host, 'port' => $sphinxConfig->port));
     $gcdweight = "weight()+IF(id>900000000, tradenum*100, 0)+inquirynum*20+star*2+basescore*5+creditscore+IF(is_op=1, all_uv*10+all_pv, 0)+IF(id>900000000, weight()*0.1, 0) as gcpdweight";
     //$gcdweight = "weight() + gcdweight as gcpdweight";
     $query = SphinxQL::create($conn)->select('id', 'cid', $gcdweight)->from($indexTable);
     $query->match('*', $this->data['split']);
     $query->option('max_matches', 200);
     if (!empty($this->data['cateid']) && intval($this->data['cateid']) > 0) {
         $query->where('cate3', '=', intval($this->data['cateid']));
     }
     if (!empty($this->data['brand']) && intval($this->data['brand']) > 0) {
         $query->where('brand', '=', intval($this->data['brand']));
     }
     if (!empty($this->data['province']) && intval($this->data['province']) > 0) {
         $query->where('province', '=', intval($this->data['province']));
     }
     if (!empty($this->data['city']) && intval($this->data['city']) > 0) {
         $query->where('city', '=', intval($this->data['city']));
     }
     if (!empty($this->data['iscertify']) && intval($this->data['iscertify']) > 0) {
         $query->where('is_gccertify', '=', intval($this->data['iscertify']));
     }
     if (!empty($this->data['isprice']) && intval($this->data['isprice']) > 0) {
         $query->where('price', '>', 0);
     }
     if (!empty($this->data['feature'])) {
         $featureArr = explode('_', $this->data['feature']);
         foreach ($featureArr as $value) {
             $query->where('feature', '=', intval($value));
         }
     }
     if (!empty($this->data['sort'])) {
         switch ($this->data['sort']) {
             case 1:
                 $query->orderBy('tradenum', 'DESC');
                 //销量
                 break;
             case 2:
                 $query->orderBy('visitnum', 'DESC');
                 //访问量/热度
                 break;
             case 3:
                 $query->orderBy("price", "DESC");
                 break;
             case 4:
                 $query->orderBy("price", "ASC");
                 break;
             case 6:
                 $query->orderBy("integral", "ASC");
                 break;
             default:
                 $query->orderBy("gcpdweight", "DESC");
                 break;
         }
     } else {
         $query->orderBy("gcpdweight", "DESC");
     }
     //$facet = Facet::create($conn)->facet('cate3');
     //$query->facet($facet);
     $facet = Facet::create($conn)->facet('feature')->limit(20)->orderby("count(*)", "desc");
     $query->facet($facet);
     //品牌
     $brand = Facet::create($conn)->facet('brand')->limit(20)->orderby("count(*)", "desc");
     $query->facet($brand);
     if (!empty($this->data['province'])) {
         //市
         $facet = Facet::create($conn)->facet('city')->limit(20)->orderby("count(*)", "desc");
         $query->facet($facet);
     } else {
         //省
         $facet = Facet::create($conn)->facet('province')->limit(20)->orderby("count(*)", "desc");
         $query->facet($facet);
     }
     //查询条数
     //$query->limit($this->data['offset'], $this->data['limit']);
     $query->limit(0, 200);
     //匹配设定
     $query->option('field_weights', array('proname' => 300));
     //$query->option('ranker', 'sph04');
     $batchResult = $query->executeBatch();
     $result = array();
     if (is_array($batchResult) && count($batchResult) > 0) {
         $result['data'] = $batchResult[0];
         //$result['cate3'] = isset($batchResult[1]) ? $batchResult[1] : array();
         $result['property'] = isset($batchResult[1]) ? $batchResult[1] : array();
         $result['brand'] = isset($batchResult[2]) ? $batchResult[2] : array();
         if (!empty($this->data['province'])) {
             $result['city'] = isset($batchResult[3]) ? $batchResult[3] : array();
         } else {
             $result['province'] = isset($batchResult[3]) ? $batchResult[3] : array();
         }
     }
     //if (!empty($result['data'])) {
     foreach ($result['data'] as $key => &$value) {
         //拿到产品ID,企业ID
         $value = array($value['id'], $value['cid']);
     }
     unset($value);
     //防霸屏
     $result['data'] = $this->sortData($result['data']);
     /*if (!empty($result['cate3'])) {
       $result['cate3'] = json_encode(array_combine(array_column($result['cate3'], 'cate3'), array_column($result['cate3'], 'count(*)')));
       } else {
       $result['cate3'] = '[]';
       }*/
     if (!empty($result['province'])) {
         $result['province'] = json_encode(array_combine(array_column($result['province'], 'province'), array_column($result['province'], 'count(*)')));
     } else {
         $result['province'] = '[]';
     }
     if (!empty($result['city'])) {
         $result['city'] = json_encode(array_combine(array_column($result['city'], 'city'), array_column($result['city'], 'count(*)')));
     } else {
         $result['city'] = '[]';
     }
     if (!empty($result['brand'])) {
         $result['brand'] = json_encode(array_combine(array_column($result['brand'], 'brand'), array_column($result['brand'], 'count(*)')));
     } else {
         $result['brand'] = '[]';
     }
     if (!empty($result['property'])) {
         $result['property'] = json_encode(array_combine(array_column($result['property'], 'feature'), array_column($result['property'], 'count(*)')));
     } else {
         $result['property'] = '[]';
     }
     //}
     //重新实例化以获取查询到的总数
     $sphinxql = new SphinxQL($conn);
     $total = $sphinxql->query('show meta');
     $total = $this->formatSphQLArray($total->execute());
     $result['total_found'] = $total['total_found'];
     $result['time'] = $total['time'];
     $result['split'] = $this->data['split'];
     return $result;
 }
示例#5
0
 /**
  * 获取分词&&核心词
  * @author 刘建辉
  * @datetime 2015-10-31T10:25:30+0800
  * @param    [type]                   $string [description]
  * @param    [type]                   $type   [description]
  * @return   [type]                           [description]
  */
 public static function splitWord($string, $type)
 {
     $url = 'http://tj.yw.gongchang.com/api/uid/?mark=jieba_output&type=cut_all&secret=7232275&content=';
     $result = \Xz\Func\Common\Tools::curlGetContentMs($url . urlencode($string), 500);
     if ($result) {
         $result = json_decode($result, true);
     }
     if ($type == 'split') {
         return $result['data']['cut_all'];
     }
     if ($type == 'core') {
         if (!empty($result['data']['TF-IDF_keyword_extract'])) {
             return $result['data']['TF-IDF_keyword_extract'][0];
         }
     }
     return $string;
 }