protected function applyCriteria(ESphinxSearchCriteria $criteria) { $this->applyMatchMode($criteria->matchMode); $this->applyRankMode($criteria); if ($criteria->sortMode == ESphinxSort::EXTENDED) { $orders = ''; if ($orderArray = $criteria->getOrders()) { $fields = array(); foreach ($orderArray as $attr => $type) { $fields[] = $attr . ' ' . $type; } $orders = implode(', ', $fields); } $this->applySortMode($criteria->sortMode, $orders); } else { $this->applySortMode($criteria->sortMode, $criteria->getSortBy()); } // apply select if (strlen($criteria->select)) { $this->sphinxClient->SetSelect($criteria->select); } // apply limit if ($criteria->limit) { $this->sphinxClient->SetLimits($criteria->offset, $criteria->limit, $criteria->maxMatches, $criteria->cutOff); } // apply group if ($criteria->groupBy) { $this->sphinxClient->SetGroupBy($criteria->groupBy, $criteria->groupByFunc, $criteria->groupBySort); } if ($criteria->groupDistinct) { $this->sphinxClient->SetGroupDistinct($criteria->groupDistinct); } // apply id range if ($criteria->getIsIdRangeSetted()) { $this->sphinxClient->SetIDRange($criteria->getMinId(), $criteria->getMaxId()); } // apply weights $this->applyFieldWeights($criteria->getFieldWeights()); $this->applyIndexWeights($criteria->getIndexWeights()); $this->applyFilters($criteria->getFilters()); $this->applyRanges($criteria->getRangeFilters()); $this->sphinxClient->SetMaxQueryTime($criteria->queryTimeout !== null ? $criteria->queryTimeout : $this->_queryTimeout); if (VER_COMMAND_SEARCH >= 0x11d) { $this->applyOptions($criteria); } }
function do_query($search_str) { //$tmp_var = array(array('itemName' => "test1"), array('itemName' => "test2"), array('itemName' => "test3")); //echo implode(",",tmp_var); //echo json_encode($tmp_var); //return tmp_var; $q = ""; $sql = ""; $mode = SPH_MATCH_ALL; $host = "localhost"; $port = 9312; $index = "*"; $groupby = ""; $groupsort = "@group desc"; $filter = "group_id"; $filtervals = array(); $distinct = ""; $sortby = ""; $sortexpr = ""; $limit = 20; $ranker = SPH_RANK_PROXIMITY_BM25; $select = "*"; $cl = new SphinxClient(); $cl->SetServer($host, $port); $cl->SetConnectTimeout(1); $cl->SetArrayResult(true); $cl->SetWeights(array(100, 1)); $cl->SetMatchMode($mode); if (count($filtervals)) { $cl->SetFilter($filter, $filtervals); } if ($groupby) { $cl->SetGroupBy($groupby, SPH_GROUPBY_ATTR, $groupsort); } if ($sortby) { $cl->SetSortMode(SPH_SORT_EXTENDED, $sortby); } if ($sortexpr) { $cl->SetSortMode(SPH_SORT_EXPR, $sortexpr); } if ($distinct) { $cl->SetGroupDistinct($distinct); } if ($select) { $cl->SetSelect($select); } if ($limit) { $cl->SetLimits(0, $limit, $limit > 1000 ? $limit : 1000); } $cl->SetRankingMode($ranker); $res = $cl->Query($search_str, $index); //return $res; if (is_array($res["matches"])) { $results = array(); $n = 1; //print "Matches:\n"; foreach ($res["matches"] as $docinfo) { //print "$n. doc_id=$docinfo[id], weight=$docinfo[weight]"; $attr_array = array(); $results[$docinfo[id]]; foreach ($res["attrs"] as $attrname => $attrtype) { $value = $docinfo["attrs"][$attrname]; if ($attrtype == SPH_ATTR_MULTI || $attrtype == SPH_ATTR_MULTI64) { $value = "(" . join(",", $value) . ")"; } else { if ($attrtype == SPH_ATTR_TIMESTAMP) { $value = date("Y-m-d H:i:s", $value); } } $attr_array[$attrname] = $value; //print $value; } $results[$docinfo[id]] = $attr_array; $n++; //print implode("",$results)."\n"; } return $results; } }
fclose($file); $client->SetMatchMode(SPH_MATCH_ALL); // filter $client->SetFilter("id", array(10, 100, 1000)); $file = fopen("spec/fixtures/data/filter.bin", "w"); fwrite($file, $client->_reqs[$client->AddQuery("test ")]); fclose($file); $client->ResetFilters(); // group $client->SetGroupBy("id", SPH_GROUPBY_ATTR, "id"); $file = fopen("spec/fixtures/data/group.bin", "w"); fwrite($file, $client->_reqs[$client->AddQuery("test ")]); fclose($file); $client->ResetGroupBy(); // distinct $client->SetGroupDistinct("id"); $file = fopen("spec/fixtures/data/distinct.bin", "w"); fwrite($file, $client->_reqs[$client->AddQuery("test ")]); fclose($file); $client->ResetGroupBy(); // weights $client->SetWeights(array(100, 1)); $file = fopen("spec/fixtures/data/weights.bin", "w"); fwrite($file, $client->_reqs[$client->AddQuery("test ")]); fclose($file); $client->SetWeights(array()); // anchor $client->SetGeoAnchor("latitude", "longitude", 10.0, 95.0); $file = fopen("spec/fixtures/data/anchor.bin", "w"); fwrite($file, $client->_reqs[$client->AddQuery("test ")]); fclose($file);
$cl->SetWeights(array(100, 1)); $cl->SetMatchMode($mode); if (count($filtervals)) { $cl->SetFilter($filter, $filtervals); } if ($groupby) { $cl->SetGroupBy($groupby, SPH_GROUPBY_ATTR, $groupsort); } if ($sortby) { $cl->SetSortMode(SPH_SORT_EXTENDED, $sortby); } if ($sortexpr) { $cl->SetSortMode(SPH_SORT_EXPR, $sortexpr); } if ($distinct) { $cl->SetGroupDistinct($distinct); } if ($select) { $cl->SetSelect($select); } if ($limit) { $cl->SetLimits(0, $limit, $limit > 1000 ? $limit : 1000); } $cl->SetRankingMode($ranker); $res = $cl->Query($q, $index); //////////////// // print me out //////////////// if ($res === false) { print "Query failed: " . $cl->GetLastError() . ".\n"; } else {
public function run($subject_id, $clean = true, $query_offset = 0, $from, $to) { $this->load->helper('sphinxapi'); $this->load->helper('mood'); // skip if matching_status is "matching" $matching_status = $this->custom_model->get_value('subject', 'matching_status', $subject_id); if ($matching_status == 'matching') { echo "subject is matching"; return false; } // flag subject as matching.. do other bot runs this queue. //$this->db->update('subject',array('matching_status'=>'matching'),array('id'=>$subject_id)); // clear all match record for this subject $config['hostname'] = "192.168.1.102"; $config['username'] = "******"; $config['password'] = "******"; $config['database'] = "thothconnect"; $config['dbdriver'] = "mysql"; $config['dbprefix'] = ""; $config['pconnect'] = FALSE; $config['db_debug'] = TRUE; $config['cache_on'] = FALSE; $config['cachedir'] = ""; $config['char_set'] = "utf8"; $config['dbcollat'] = "utf8_general_ci"; $thothconnect_db = $this->load->database($config, true); $query = $this->db->query("SELECT client_id FROM subject WHERE id = " . $subject_id); $row = $query->row(); $client_id = $row->client_id; if ($clean) { $thothconnect_db->delete('website_c' . $client_id, array('subject_id' => $subject_id)); $thothconnect_db->delete('twitter_c' . $client_id, array('subject_id' => $subject_id)); $thothconnect_db->delete('facebook_c' . $client_id, array('subject_id' => $subject_id)); } // // begin re-matching this subject // // get search string from subject_id $query = $this->custom_model->get_value('subject', 'query', $subject_id); // sphinx init $cl = new SphinxClient(); $q = $query; $sql = ""; $mode = SPH_MATCH_EXTENDED; $host = "192.168.1.102"; $port = 9312; $index = "*"; $groupby = ""; $groupsort = "@group desc"; $filter = "group_id"; $filtervals = array(); $distinct = ""; $sortby = "@id ASC"; $sortexpr = ""; $offset = $query_offset; $limit = 1000000; $ranker = SPH_RANK_PROXIMITY_BM25; $select = ""; echo 'limit=' . $limit . ' offset=' . $offset . PHP_EOL; //Extract subject keyword from search string $keywords = get_keywords($q); //////////// // do query //////////// $cl->SetServer($host, $port); $cl->SetConnectTimeout(1); $cl->SetArrayResult(true); $cl->SetWeights(array(100, 1)); $cl->SetMatchMode($mode); // if ( count($filtervals) ) $cl->SetFilter ( $filter, $filtervals ); // if ( $groupby ) $cl->SetGroupBy ( $groupby, SPH_GROUPBY_ATTR, $groupsort ); if ($sortby) { $cl->SetSortMode(SPH_SORT_EXTENDED, $sortby); } // if ( $sortexpr ) $cl->SetSortMode ( SPH_SORT_EXPR, $sortexpr ); if ($distinct) { $cl->SetGroupDistinct($distinct); } if ($select) { $cl->SetSelect($select); } if ($limit) { $cl->SetLimits(0, $limit, $limit > 1000000 ? $limit : 1000000); } $cl->SetRankingMode($ranker); $res = $cl->Query($q, $index); //$res = true; //////////// // do Insert to DB //////////// // Current matching $current_matching = array(); /*$query_matchs = $this->db->get_where('matchs',array('subject_id'=>$subject_id)); if($query_matchs->num_rows() > 0) { echo PHP_EOL.'currents matching :'.$query_matchs->num_rows(); foreach($query_matchs->result() as $match) { $current_matching[] = $match->post_id; } }*/ // set matching date range from-to $from = strtotime($from); $to = strtotime($to); // Search and Update if ($res === false) { echo "Query failed: " . $cl->GetLastError() . ".\n"; } else { if ($cl->GetLastWarning()) { echo "WARNING: " . $cl->GetLastWarning() . "\n\n"; } echo "Query '{$q}' \nretrieved {$res['total']} of {$res['total_found']} matches in {$res['time']} sec.\n"; if ($res['total'] == 0) { echo "no result<br/>\n"; } else { if ($res['total'] > $limit + $offset) { $this->run($subject_id, $limit + $offset); } else { echo "Updating..."; foreach ($res["matches"] as $k => $docinfo) { // echo '('.$k.')'.$docinfo["id"]." "; // Reset PHP Timeout to 1min // if found in $current_matching then skip if (in_array($docinfo["id"], $current_matching)) { continue; } else { // else insert new match set_time_limit(60); $post = new Post_model(); $post->init($docinfo["id"]); // if post_date is our of range then skip $post_date = strtotime($post->post_date); if ($post_date < $from || $post_date > $to) { continue; } $mood = get_mood($post->body, $keywords); //----------------------------------------------------- $subject = $post->get_subject($subject_id); //print_r($subject); if ($post->type == "post" || $post->type == "comment") { $postData = $post->get_post_website($post->id); if ($postData != null) { $data = array(); $data["post_id"] = $postData->post_id; $data["post_date"] = $postData->post_date; $data["title"] = $postData->title; $data["body"] = $postData->body; $data["type"] = $postData->type; $data["author_id"] = $postData->author_id; $data["author"] = $postData->author; $data["website_id"] = $postData->website_id; $data["website_name"] = $postData->website_name; $data["website_cate_id"] = $postData->website_cate_id; $data["website_cate"] = $postData->website_cate; $data["website_type_id"] = $postData->website_type_id; $data["website_type"] = $postData->website_type; $data["group_id"] = $subject->group_id; $data["group"] = $subject->group; $data["url"] = substr($postData->root_url, 0, -1) . "" . $postData->url; $data["page_id"] = $postData->page_id; $data["subject_id"] = $subject->subject_id; $data["subject_name"] = $subject->subject_name; $data["mood"] = $mood; $data["mood_by"] = 'system'; $thothconnect_db->insert("website_c" . $subject->client_id, $data); $post->insert_post_comment($postData->page_id, $subject->client_id, $thothconnect_db); } } else { if ($post->type == "tweet" || $post->type == "retweet") { $postData = $post->get_post_twitter($post->id); if ($postData != null) { $data = array(); $data["post_id"] = $postData->post_id; $data["post_date"] = $postData->post_date; $data["body"] = $postData->body; $data["type"] = $postData->type; $data["author_id"] = $postData->author_id; $data["author"] = $postData->author; $data["group_id"] = $subject->group_id; $data["group"] = $subject->group; $data["tweet_id"] = $postData->tweet_id; $data["subject_id"] = $subject->subject_id; $data["subject_name"] = $subject->subject_name; $data["mood"] = $mood; $data["mood_by"] = 'system'; $thothconnect_db->insert("twitter_c" . $subject->client_id, $data); } } else { if ($post->type == "fb_post" || $post->type == "fb_comment") { $postData = $post->get_post_facebook($post->id); if ($postData != null) { $data = array(); $data["post_id"] = $postData->post_id; $data["post_date"] = $postData->post_date; $data["body"] = $postData->body; $data["type"] = $postData->type; $data["author_id"] = $postData->author_id; $data["author"] = $postData->author; $data["group_id"] = $subject->group_id; $data["group"] = $subject->group; $data["facebook_page_id"] = $postData->facebook_page_id; $data["facebook_page_name"] = $postData->facebook_page_name; $data["subject_id"] = $subject->subject_id; $data["subject_name"] = $subject->subject_name; $data["facebook_id"] = $postData->facebook_id; $data["parent_post_id"] = $postData->parent_post_id; $data["likes"] = $postData->likes; $data["shares"] = $postData->shares; $data["mood"] = $mood; $data["mood_by"] = 'system'; $thothconnect_db->insert("facebook_c" . $subject->client_id, $data); } } } } /* $data = array( 'post_id'=> $post->id, 'subject_id' => $subject_id , 'matching_date' => null, 'sentiment' => $mood, 'by' => 'system', 'system_correct' => $mood, 'system_correct_date' => mdate('%Y-%m-%d %H:%i',time()) ); $this->db->insert('matchs',$data); */ //--------------------------------------- } } } } } // flag subject as update.. $data = array('matching_status' => 'update', 'latest_matching' => mdate('%Y-%m-%d %H:%i:%s', time()), 'from' => mdate('%Y-%m-%d %H:%i:%s', $from), 'to' => mdate('%Y-%m-%d %H:%i:%s', $to)); $this->db->update('subject', $data, array('id' => $subject_id)); }
<?php require "spec/fixtures/sphinxapi.php"; $cl = new SphinxClient(); $cl->SetGroupBy('attr', SPH_GROUPBY_DAY); $cl->SetGroupDistinct('attr'); $cl->Query('query');
public function run($subject_id, $clean = true, $query_offset = 0, $from, $to) { $this->load->helper('sphinxapi'); $this->load->helper('mood'); // skip if matching_status is "matching" $matching_status = $this->custom_model->get_value('subject', 'matching_status', $subject_id); if ($matching_status == 'matching') { echo "subject is matching"; return false; } // flag subject as matching.. do other bot runs this queue. $this->db->update('subject', array('matching_status' => 'matching'), array('id' => $subject_id)); // clear all match record for this subject if ($clean) { $this->db->delete('matchs', array('subject_id' => $subject_id)); } // // begin re-matching this subject // // get search string from subject_id $query = $this->custom_model->get_value('subject', 'query', $subject_id); // sphinx init $cl = new SphinxClient(); $q = $query; $sql = ""; $mode = SPH_MATCH_EXTENDED; $host = "192.168.1.102"; $port = 9312; $index = "*"; $groupby = ""; $groupsort = "@group desc"; $filter = "group_id"; $filtervals = array(); $distinct = ""; $sortby = "@id ASC"; $sortexpr = ""; $offset = $query_offset; $limit = 1000000; $ranker = SPH_RANK_PROXIMITY_BM25; $select = ""; echo 'limit=' . $limit . ' offset=' . $offset . PHP_EOL; //Extract subject keyword from search string $keywords = get_keywords($q); //////////// // do query //////////// $cl->SetServer($host, $port); $cl->SetConnectTimeout(1); $cl->SetArrayResult(true); $cl->SetWeights(array(100, 1)); $cl->SetMatchMode($mode); // if ( count($filtervals) ) $cl->SetFilter ( $filter, $filtervals ); // if ( $groupby ) $cl->SetGroupBy ( $groupby, SPH_GROUPBY_ATTR, $groupsort ); if ($sortby) { $cl->SetSortMode(SPH_SORT_EXTENDED, $sortby); } // if ( $sortexpr ) $cl->SetSortMode ( SPH_SORT_EXPR, $sortexpr ); if ($distinct) { $cl->SetGroupDistinct($distinct); } if ($select) { $cl->SetSelect($select); } if ($limit) { $cl->SetLimits(0, $limit, $limit > 1000000 ? $limit : 1000000); } $cl->SetRankingMode($ranker); $res = $cl->Query($q, $index); //////////// // do Insert to DB //////////// // Current matching $current_matching = array(); $query_matchs = $this->db->get_where('matchs', array('subject_id' => $subject_id)); if ($query_matchs->num_rows() > 0) { echo PHP_EOL . 'currents matching :' . $query_matchs->num_rows(); foreach ($query_matchs->result() as $match) { $current_matching[] = $match->post_id; } } // set matching date range from-to $from = strtotime($from); $to = strtotime($to); // Search and Update if ($res === false) { echo "Query failed: " . $cl->GetLastError() . ".\n"; } else { if ($cl->GetLastWarning()) { echo "WARNING: " . $cl->GetLastWarning() . "\n\n"; } echo "Query '{$q}' \nretrieved {$res['total']} of {$res['total_found']} matches in {$res['time']} sec.\n"; if ($res['total'] == 0) { echo "no result<br/>\n"; } else { if ($res['total'] > $limit + $offset) { $this->run($subject_id, $limit + $offset); } else { echo "Updating..."; foreach ($res["matches"] as $k => $docinfo) { // echo '('.$k.')'.$docinfo["id"]." "; // Reset PHP Timeout to 1min // if found in $current_matching then skip if (in_array($docinfo["id"], $current_matching)) { continue; } else { // else insert new match set_time_limit(60); $post = new Post_model(); $post->init($docinfo["id"]); // if post_date is our of range then skip $post_date = strtotime($post->post_date); if ($post_date < $from || $post_date > $to) { continue; } $mood = get_mood($post->body, $keywords); $data = array('post_id' => $post->id, 'subject_id' => $subject_id, 'matching_date' => null, 'sentiment' => $mood, 'by' => 'system', 'system_correct' => $mood, 'system_correct_date' => mdate('%Y-%m-%d %H:%i', time())); $this->db->insert('matchs', $data); } } } } } // flag subject as update.. $data = array('matching_status' => 'update', 'latest_matching' => mdate('%Y-%m-%d %H:%i:%s', time()), 'from' => mdate('%Y-%m-%d %H:%i:%s', $from), 'to' => mdate('%Y-%m-%d %H:%i:%s', $to), 'bot_id' => 0); $this->db->update('subject', $data, array('id' => $subject_id)); }
/** * set group by attr * 设置分组属性 * @param $group * $group = array( * 0=>array("attrname" => "gender","func" => "attr","sort" => "@group desc"), * ); * @param $distinct */ private function setGroups($group, $distinct = '') { if (!is_array($group) && empty($group)) { $this->halt('The argv must be an array and not null.', 1002); } $func = array('day' => SPH_GROUPBY_DAY, 'week' => SPH_GROUPBY_WEEK, 'month' => SPH_GROUPBY_MONTH, 'year' => SPH_GROUPBY_YEAR, 'attr' => SPH_GROUPBY_ATTR); foreach ($group as $k => $v) { parent::SetGroupBy($v['attrname'], $func[$v['func']], $v['sort']); } if ($distinct) { parent::SetGroupDistinct($distinct); } }