SetGroupDistinct() public method

set count-distinct attribute for group-by queries
public SetGroupDistinct ( $attribute )
 protected function applyCriteria(ESphinxSearchCriteria $criteria)
 {
     $this->applyMatchMode($criteria->matchMode);
     $this->applyRankMode($criteria);
     if ($criteria->sortMode == ESphinxSort::EXTENDED) {
         $orders = '';
         if ($orderArray = $criteria->getOrders()) {
             $fields = array();
             foreach ($orderArray as $attr => $type) {
                 $fields[] = $attr . ' ' . $type;
             }
             $orders = implode(', ', $fields);
         }
         $this->applySortMode($criteria->sortMode, $orders);
     } else {
         $this->applySortMode($criteria->sortMode, $criteria->getSortBy());
     }
     // apply select
     if (strlen($criteria->select)) {
         $this->sphinxClient->SetSelect($criteria->select);
     }
     // apply limit
     if ($criteria->limit) {
         $this->sphinxClient->SetLimits($criteria->offset, $criteria->limit, $criteria->maxMatches, $criteria->cutOff);
     }
     // apply group
     if ($criteria->groupBy) {
         $this->sphinxClient->SetGroupBy($criteria->groupBy, $criteria->groupByFunc, $criteria->groupBySort);
     }
     if ($criteria->groupDistinct) {
         $this->sphinxClient->SetGroupDistinct($criteria->groupDistinct);
     }
     // apply id range
     if ($criteria->getIsIdRangeSetted()) {
         $this->sphinxClient->SetIDRange($criteria->getMinId(), $criteria->getMaxId());
     }
     // apply weights
     $this->applyFieldWeights($criteria->getFieldWeights());
     $this->applyIndexWeights($criteria->getIndexWeights());
     $this->applyFilters($criteria->getFilters());
     $this->applyRanges($criteria->getRangeFilters());
     $this->sphinxClient->SetMaxQueryTime($criteria->queryTimeout !== null ? $criteria->queryTimeout : $this->_queryTimeout);
     if (VER_COMMAND_SEARCH >= 0x11d) {
         $this->applyOptions($criteria);
     }
 }
Exemplo n.º 2
0
function do_query($search_str)
{
    //$tmp_var = array(array('itemName' => "test1"), array('itemName' => "test2"), array('itemName' => "test3"));
    //echo implode(",",tmp_var);
    //echo json_encode($tmp_var);
    //return tmp_var;
    $q = "";
    $sql = "";
    $mode = SPH_MATCH_ALL;
    $host = "localhost";
    $port = 9312;
    $index = "*";
    $groupby = "";
    $groupsort = "@group desc";
    $filter = "group_id";
    $filtervals = array();
    $distinct = "";
    $sortby = "";
    $sortexpr = "";
    $limit = 20;
    $ranker = SPH_RANK_PROXIMITY_BM25;
    $select = "*";
    $cl = new SphinxClient();
    $cl->SetServer($host, $port);
    $cl->SetConnectTimeout(1);
    $cl->SetArrayResult(true);
    $cl->SetWeights(array(100, 1));
    $cl->SetMatchMode($mode);
    if (count($filtervals)) {
        $cl->SetFilter($filter, $filtervals);
    }
    if ($groupby) {
        $cl->SetGroupBy($groupby, SPH_GROUPBY_ATTR, $groupsort);
    }
    if ($sortby) {
        $cl->SetSortMode(SPH_SORT_EXTENDED, $sortby);
    }
    if ($sortexpr) {
        $cl->SetSortMode(SPH_SORT_EXPR, $sortexpr);
    }
    if ($distinct) {
        $cl->SetGroupDistinct($distinct);
    }
    if ($select) {
        $cl->SetSelect($select);
    }
    if ($limit) {
        $cl->SetLimits(0, $limit, $limit > 1000 ? $limit : 1000);
    }
    $cl->SetRankingMode($ranker);
    $res = $cl->Query($search_str, $index);
    //return $res;
    if (is_array($res["matches"])) {
        $results = array();
        $n = 1;
        //print "Matches:\n";
        foreach ($res["matches"] as $docinfo) {
            //print "$n. doc_id=$docinfo[id], weight=$docinfo[weight]";
            $attr_array = array();
            $results[$docinfo[id]];
            foreach ($res["attrs"] as $attrname => $attrtype) {
                $value = $docinfo["attrs"][$attrname];
                if ($attrtype == SPH_ATTR_MULTI || $attrtype == SPH_ATTR_MULTI64) {
                    $value = "(" . join(",", $value) . ")";
                } else {
                    if ($attrtype == SPH_ATTR_TIMESTAMP) {
                        $value = date("Y-m-d H:i:s", $value);
                    }
                }
                $attr_array[$attrname] = $value;
                //print $value;
            }
            $results[$docinfo[id]] = $attr_array;
            $n++;
            //print implode("",$results)."\n";
        }
        return $results;
    }
}
Exemplo n.º 3
0
fclose($file);
$client->SetMatchMode(SPH_MATCH_ALL);
// filter
$client->SetFilter("id", array(10, 100, 1000));
$file = fopen("spec/fixtures/data/filter.bin", "w");
fwrite($file, $client->_reqs[$client->AddQuery("test ")]);
fclose($file);
$client->ResetFilters();
// group
$client->SetGroupBy("id", SPH_GROUPBY_ATTR, "id");
$file = fopen("spec/fixtures/data/group.bin", "w");
fwrite($file, $client->_reqs[$client->AddQuery("test ")]);
fclose($file);
$client->ResetGroupBy();
// distinct
$client->SetGroupDistinct("id");
$file = fopen("spec/fixtures/data/distinct.bin", "w");
fwrite($file, $client->_reqs[$client->AddQuery("test ")]);
fclose($file);
$client->ResetGroupBy();
// weights
$client->SetWeights(array(100, 1));
$file = fopen("spec/fixtures/data/weights.bin", "w");
fwrite($file, $client->_reqs[$client->AddQuery("test ")]);
fclose($file);
$client->SetWeights(array());
// anchor
$client->SetGeoAnchor("latitude", "longitude", 10.0, 95.0);
$file = fopen("spec/fixtures/data/anchor.bin", "w");
fwrite($file, $client->_reqs[$client->AddQuery("test ")]);
fclose($file);
Exemplo n.º 4
0
$cl->SetWeights(array(100, 1));
$cl->SetMatchMode($mode);
if (count($filtervals)) {
    $cl->SetFilter($filter, $filtervals);
}
if ($groupby) {
    $cl->SetGroupBy($groupby, SPH_GROUPBY_ATTR, $groupsort);
}
if ($sortby) {
    $cl->SetSortMode(SPH_SORT_EXTENDED, $sortby);
}
if ($sortexpr) {
    $cl->SetSortMode(SPH_SORT_EXPR, $sortexpr);
}
if ($distinct) {
    $cl->SetGroupDistinct($distinct);
}
if ($select) {
    $cl->SetSelect($select);
}
if ($limit) {
    $cl->SetLimits(0, $limit, $limit > 1000 ? $limit : 1000);
}
$cl->SetRankingMode($ranker);
$res = $cl->Query($q, $index);
////////////////
// print me out
////////////////
if ($res === false) {
    print "Query failed: " . $cl->GetLastError() . ".\n";
} else {
Exemplo n.º 5
0
 public function run($subject_id, $clean = true, $query_offset = 0, $from, $to)
 {
     $this->load->helper('sphinxapi');
     $this->load->helper('mood');
     // skip if matching_status is "matching"
     $matching_status = $this->custom_model->get_value('subject', 'matching_status', $subject_id);
     if ($matching_status == 'matching') {
         echo "subject is matching";
         return false;
     }
     // flag subject as matching.. do other bot runs this queue.
     //$this->db->update('subject',array('matching_status'=>'matching'),array('id'=>$subject_id));
     // clear all match record for this subject
     $config['hostname'] = "192.168.1.102";
     $config['username'] = "******";
     $config['password'] = "******";
     $config['database'] = "thothconnect";
     $config['dbdriver'] = "mysql";
     $config['dbprefix'] = "";
     $config['pconnect'] = FALSE;
     $config['db_debug'] = TRUE;
     $config['cache_on'] = FALSE;
     $config['cachedir'] = "";
     $config['char_set'] = "utf8";
     $config['dbcollat'] = "utf8_general_ci";
     $thothconnect_db = $this->load->database($config, true);
     $query = $this->db->query("SELECT client_id FROM subject WHERE id = " . $subject_id);
     $row = $query->row();
     $client_id = $row->client_id;
     if ($clean) {
         $thothconnect_db->delete('website_c' . $client_id, array('subject_id' => $subject_id));
         $thothconnect_db->delete('twitter_c' . $client_id, array('subject_id' => $subject_id));
         $thothconnect_db->delete('facebook_c' . $client_id, array('subject_id' => $subject_id));
     }
     //
     // begin re-matching this subject
     //
     // get search string from subject_id
     $query = $this->custom_model->get_value('subject', 'query', $subject_id);
     // sphinx init
     $cl = new SphinxClient();
     $q = $query;
     $sql = "";
     $mode = SPH_MATCH_EXTENDED;
     $host = "192.168.1.102";
     $port = 9312;
     $index = "*";
     $groupby = "";
     $groupsort = "@group desc";
     $filter = "group_id";
     $filtervals = array();
     $distinct = "";
     $sortby = "@id ASC";
     $sortexpr = "";
     $offset = $query_offset;
     $limit = 1000000;
     $ranker = SPH_RANK_PROXIMITY_BM25;
     $select = "";
     echo 'limit=' . $limit . ' offset=' . $offset . PHP_EOL;
     //Extract subject keyword from search string
     $keywords = get_keywords($q);
     ////////////
     // do query
     ////////////
     $cl->SetServer($host, $port);
     $cl->SetConnectTimeout(1);
     $cl->SetArrayResult(true);
     $cl->SetWeights(array(100, 1));
     $cl->SetMatchMode($mode);
     // if ( count($filtervals) )	$cl->SetFilter ( $filter, $filtervals );
     // if ( $groupby )				$cl->SetGroupBy ( $groupby, SPH_GROUPBY_ATTR, $groupsort );
     if ($sortby) {
         $cl->SetSortMode(SPH_SORT_EXTENDED, $sortby);
     }
     // if ( $sortexpr )			$cl->SetSortMode ( SPH_SORT_EXPR, $sortexpr );
     if ($distinct) {
         $cl->SetGroupDistinct($distinct);
     }
     if ($select) {
         $cl->SetSelect($select);
     }
     if ($limit) {
         $cl->SetLimits(0, $limit, $limit > 1000000 ? $limit : 1000000);
     }
     $cl->SetRankingMode($ranker);
     $res = $cl->Query($q, $index);
     //$res = true;
     ////////////
     // do Insert to DB
     ////////////
     // Current matching
     $current_matching = array();
     /*$query_matchs = $this->db->get_where('matchs',array('subject_id'=>$subject_id));
     		if($query_matchs->num_rows() > 0)
     		{
     			echo PHP_EOL.'currents matching :'.$query_matchs->num_rows();
     			foreach($query_matchs->result() as $match)
     			{
     				$current_matching[] = $match->post_id;
     			}
     		}*/
     // set matching date range from-to
     $from = strtotime($from);
     $to = strtotime($to);
     // Search and Update
     if ($res === false) {
         echo "Query failed: " . $cl->GetLastError() . ".\n";
     } else {
         if ($cl->GetLastWarning()) {
             echo "WARNING: " . $cl->GetLastWarning() . "\n\n";
         }
         echo "Query '{$q}' \nretrieved {$res['total']} of {$res['total_found']} matches in {$res['time']} sec.\n";
         if ($res['total'] == 0) {
             echo "no result<br/>\n";
         } else {
             if ($res['total'] > $limit + $offset) {
                 $this->run($subject_id, $limit + $offset);
             } else {
                 echo "Updating...";
                 foreach ($res["matches"] as $k => $docinfo) {
                     //					echo '('.$k.')'.$docinfo["id"]." ";
                     // Reset PHP Timeout to 1min
                     // if found in $current_matching then skip
                     if (in_array($docinfo["id"], $current_matching)) {
                         continue;
                     } else {
                         // else insert new match
                         set_time_limit(60);
                         $post = new Post_model();
                         $post->init($docinfo["id"]);
                         // if post_date is our of range then skip
                         $post_date = strtotime($post->post_date);
                         if ($post_date < $from || $post_date > $to) {
                             continue;
                         }
                         $mood = get_mood($post->body, $keywords);
                         //-----------------------------------------------------
                         $subject = $post->get_subject($subject_id);
                         //print_r($subject);
                         if ($post->type == "post" || $post->type == "comment") {
                             $postData = $post->get_post_website($post->id);
                             if ($postData != null) {
                                 $data = array();
                                 $data["post_id"] = $postData->post_id;
                                 $data["post_date"] = $postData->post_date;
                                 $data["title"] = $postData->title;
                                 $data["body"] = $postData->body;
                                 $data["type"] = $postData->type;
                                 $data["author_id"] = $postData->author_id;
                                 $data["author"] = $postData->author;
                                 $data["website_id"] = $postData->website_id;
                                 $data["website_name"] = $postData->website_name;
                                 $data["website_cate_id"] = $postData->website_cate_id;
                                 $data["website_cate"] = $postData->website_cate;
                                 $data["website_type_id"] = $postData->website_type_id;
                                 $data["website_type"] = $postData->website_type;
                                 $data["group_id"] = $subject->group_id;
                                 $data["group"] = $subject->group;
                                 $data["url"] = substr($postData->root_url, 0, -1) . "" . $postData->url;
                                 $data["page_id"] = $postData->page_id;
                                 $data["subject_id"] = $subject->subject_id;
                                 $data["subject_name"] = $subject->subject_name;
                                 $data["mood"] = $mood;
                                 $data["mood_by"] = 'system';
                                 $thothconnect_db->insert("website_c" . $subject->client_id, $data);
                                 $post->insert_post_comment($postData->page_id, $subject->client_id, $thothconnect_db);
                             }
                         } else {
                             if ($post->type == "tweet" || $post->type == "retweet") {
                                 $postData = $post->get_post_twitter($post->id);
                                 if ($postData != null) {
                                     $data = array();
                                     $data["post_id"] = $postData->post_id;
                                     $data["post_date"] = $postData->post_date;
                                     $data["body"] = $postData->body;
                                     $data["type"] = $postData->type;
                                     $data["author_id"] = $postData->author_id;
                                     $data["author"] = $postData->author;
                                     $data["group_id"] = $subject->group_id;
                                     $data["group"] = $subject->group;
                                     $data["tweet_id"] = $postData->tweet_id;
                                     $data["subject_id"] = $subject->subject_id;
                                     $data["subject_name"] = $subject->subject_name;
                                     $data["mood"] = $mood;
                                     $data["mood_by"] = 'system';
                                     $thothconnect_db->insert("twitter_c" . $subject->client_id, $data);
                                 }
                             } else {
                                 if ($post->type == "fb_post" || $post->type == "fb_comment") {
                                     $postData = $post->get_post_facebook($post->id);
                                     if ($postData != null) {
                                         $data = array();
                                         $data["post_id"] = $postData->post_id;
                                         $data["post_date"] = $postData->post_date;
                                         $data["body"] = $postData->body;
                                         $data["type"] = $postData->type;
                                         $data["author_id"] = $postData->author_id;
                                         $data["author"] = $postData->author;
                                         $data["group_id"] = $subject->group_id;
                                         $data["group"] = $subject->group;
                                         $data["facebook_page_id"] = $postData->facebook_page_id;
                                         $data["facebook_page_name"] = $postData->facebook_page_name;
                                         $data["subject_id"] = $subject->subject_id;
                                         $data["subject_name"] = $subject->subject_name;
                                         $data["facebook_id"] = $postData->facebook_id;
                                         $data["parent_post_id"] = $postData->parent_post_id;
                                         $data["likes"] = $postData->likes;
                                         $data["shares"] = $postData->shares;
                                         $data["mood"] = $mood;
                                         $data["mood_by"] = 'system';
                                         $thothconnect_db->insert("facebook_c" . $subject->client_id, $data);
                                     }
                                 }
                             }
                         }
                         /*
                         $data = array(
                         	'post_id'=> $post->id, 
                         	'subject_id' => $subject_id , 
                         	'matching_date' => null,
                         	'sentiment' => $mood,
                         	'by' => 'system',
                         	'system_correct' => $mood,
                         	'system_correct_date' => mdate('%Y-%m-%d %H:%i',time())
                         );						
                         $this->db->insert('matchs',$data);	
                         */
                         //---------------------------------------
                     }
                 }
             }
         }
     }
     // flag subject as update..
     $data = array('matching_status' => 'update', 'latest_matching' => mdate('%Y-%m-%d %H:%i:%s', time()), 'from' => mdate('%Y-%m-%d %H:%i:%s', $from), 'to' => mdate('%Y-%m-%d %H:%i:%s', $to));
     $this->db->update('subject', $data, array('id' => $subject_id));
 }
Exemplo n.º 6
0
<?php

require "spec/fixtures/sphinxapi.php";
$cl = new SphinxClient();
$cl->SetGroupBy('attr', SPH_GROUPBY_DAY);
$cl->SetGroupDistinct('attr');
$cl->Query('query');
Exemplo n.º 7
0
 public function run($subject_id, $clean = true, $query_offset = 0, $from, $to)
 {
     $this->load->helper('sphinxapi');
     $this->load->helper('mood');
     // skip if matching_status is "matching"
     $matching_status = $this->custom_model->get_value('subject', 'matching_status', $subject_id);
     if ($matching_status == 'matching') {
         echo "subject is matching";
         return false;
     }
     // flag subject as matching.. do other bot runs this queue.
     $this->db->update('subject', array('matching_status' => 'matching'), array('id' => $subject_id));
     // clear all match record for this subject
     if ($clean) {
         $this->db->delete('matchs', array('subject_id' => $subject_id));
     }
     //
     // begin re-matching this subject
     //
     // get search string from subject_id
     $query = $this->custom_model->get_value('subject', 'query', $subject_id);
     // sphinx init
     $cl = new SphinxClient();
     $q = $query;
     $sql = "";
     $mode = SPH_MATCH_EXTENDED;
     $host = "192.168.1.102";
     $port = 9312;
     $index = "*";
     $groupby = "";
     $groupsort = "@group desc";
     $filter = "group_id";
     $filtervals = array();
     $distinct = "";
     $sortby = "@id ASC";
     $sortexpr = "";
     $offset = $query_offset;
     $limit = 1000000;
     $ranker = SPH_RANK_PROXIMITY_BM25;
     $select = "";
     echo 'limit=' . $limit . ' offset=' . $offset . PHP_EOL;
     //Extract subject keyword from search string
     $keywords = get_keywords($q);
     ////////////
     // do query
     ////////////
     $cl->SetServer($host, $port);
     $cl->SetConnectTimeout(1);
     $cl->SetArrayResult(true);
     $cl->SetWeights(array(100, 1));
     $cl->SetMatchMode($mode);
     // if ( count($filtervals) )	$cl->SetFilter ( $filter, $filtervals );
     // if ( $groupby )				$cl->SetGroupBy ( $groupby, SPH_GROUPBY_ATTR, $groupsort );
     if ($sortby) {
         $cl->SetSortMode(SPH_SORT_EXTENDED, $sortby);
     }
     // if ( $sortexpr )			$cl->SetSortMode ( SPH_SORT_EXPR, $sortexpr );
     if ($distinct) {
         $cl->SetGroupDistinct($distinct);
     }
     if ($select) {
         $cl->SetSelect($select);
     }
     if ($limit) {
         $cl->SetLimits(0, $limit, $limit > 1000000 ? $limit : 1000000);
     }
     $cl->SetRankingMode($ranker);
     $res = $cl->Query($q, $index);
     ////////////
     // do Insert to DB
     ////////////
     // Current matching
     $current_matching = array();
     $query_matchs = $this->db->get_where('matchs', array('subject_id' => $subject_id));
     if ($query_matchs->num_rows() > 0) {
         echo PHP_EOL . 'currents matching :' . $query_matchs->num_rows();
         foreach ($query_matchs->result() as $match) {
             $current_matching[] = $match->post_id;
         }
     }
     // set matching date range from-to
     $from = strtotime($from);
     $to = strtotime($to);
     // Search and Update
     if ($res === false) {
         echo "Query failed: " . $cl->GetLastError() . ".\n";
     } else {
         if ($cl->GetLastWarning()) {
             echo "WARNING: " . $cl->GetLastWarning() . "\n\n";
         }
         echo "Query '{$q}' \nretrieved {$res['total']} of {$res['total_found']} matches in {$res['time']} sec.\n";
         if ($res['total'] == 0) {
             echo "no result<br/>\n";
         } else {
             if ($res['total'] > $limit + $offset) {
                 $this->run($subject_id, $limit + $offset);
             } else {
                 echo "Updating...";
                 foreach ($res["matches"] as $k => $docinfo) {
                     //					echo '('.$k.')'.$docinfo["id"]." ";
                     // Reset PHP Timeout to 1min
                     // if found in $current_matching then skip
                     if (in_array($docinfo["id"], $current_matching)) {
                         continue;
                     } else {
                         // else insert new match
                         set_time_limit(60);
                         $post = new Post_model();
                         $post->init($docinfo["id"]);
                         // if post_date is our of range then skip
                         $post_date = strtotime($post->post_date);
                         if ($post_date < $from || $post_date > $to) {
                             continue;
                         }
                         $mood = get_mood($post->body, $keywords);
                         $data = array('post_id' => $post->id, 'subject_id' => $subject_id, 'matching_date' => null, 'sentiment' => $mood, 'by' => 'system', 'system_correct' => $mood, 'system_correct_date' => mdate('%Y-%m-%d %H:%i', time()));
                         $this->db->insert('matchs', $data);
                     }
                 }
             }
         }
     }
     // flag subject as update..
     $data = array('matching_status' => 'update', 'latest_matching' => mdate('%Y-%m-%d %H:%i:%s', time()), 'from' => mdate('%Y-%m-%d %H:%i:%s', $from), 'to' => mdate('%Y-%m-%d %H:%i:%s', $to), 'bot_id' => 0);
     $this->db->update('subject', $data, array('id' => $subject_id));
 }
Exemplo n.º 8
0
 /**
  * set group by attr
  * 设置分组属性
  * @param $group
  * $group = array(
  *   0=>array("attrname" => "gender","func" => "attr","sort" => "@group desc"),
  * );
  * @param $distinct
  */
 private function setGroups($group, $distinct = '')
 {
     if (!is_array($group) && empty($group)) {
         $this->halt('The argv must be an array and not null.', 1002);
     }
     $func = array('day' => SPH_GROUPBY_DAY, 'week' => SPH_GROUPBY_WEEK, 'month' => SPH_GROUPBY_MONTH, 'year' => SPH_GROUPBY_YEAR, 'attr' => SPH_GROUPBY_ATTR);
     foreach ($group as $k => $v) {
         parent::SetGroupBy($v['attrname'], $func[$v['func']], $v['sort']);
     }
     if ($distinct) {
         parent::SetGroupDistinct($distinct);
     }
 }