/** * Search in sphinx client * * @param string $query * @param string $index * @return SphinxResult|string * @throws \Exception */ public function search($query, $index = '*') { $result = $this->_sphinx_client->Query($query, $index); if (!$result) { throw new \Exception("Sphinx client error: " . $this->_sphinx_client->GetLastError()); } else { if (!empty($result['warning'])) { return $this->_sphinx_client->GetLastWarning(); } return new SphinxResult($result); } }
/** * {@inheritdoc} */ public function query($query, $offset, $perPage, SearchEngineOptions $options = null) { if (null === $options) { $options = new SearchEngineOptions(); } $this->applyOptions($options); assert(is_int($offset)); assert($offset >= 0); assert(is_int($perPage)); $query = $this->parseQuery($query); $preg = preg_match('/\\s?(recordid|storyid)\\s?=\\s?([0-9]+)/i', $query, $matches, 0, 0); if ($preg > 0) { $this->sphinx->SetFilter('record_id', [$matches[2]]); $query = ''; } $this->sphinx->SetLimits($offset, $perPage); $this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED2); $index = $this->getQueryIndex($query, $options); $res = $this->sphinx->Query($query, $index); $results = new ArrayCollection(); if ($res === false) { if ($this->sphinx->IsConnectError() === true) { $error = $this->app->trans('Sphinx server is offline'); } else { $error = $this->sphinx->GetLastError(); } $warning = $this->sphinx->GetLastWarning(); $total = $available = $duration = 0; $suggestions = new ArrayCollection(); $propositions = []; } else { $error = $res['error']; $warning = $res['warning']; $duration = $res['time']; $total = $res['total_found']; $available = $res['total']; $resultOffset = $offset; if (isset($res['matches'])) { foreach ($res['matches'] as $record_id => $match) { try { $record = new \record_adapter($this->app, $match['attrs']['sbas_id'], $match['attrs']['record_id'], $resultOffset); $results->add($record); } catch (\Exception $e) { } $resultOffset++; } } $suggestions = $this->getSuggestions($query, $options); $propositions = ''; } return new SearchEngineResult($results, $query, $duration, $offset, $available, $total, $error, $warning, $suggestions, $propositions, $index); }
protected function execute() { $sph = $this->sphinxClient->RunQueries(); if ($error = $this->sphinxClient->GetLastError()) { throw new ESphinxException($error); } if ($error = $this->sphinxClient->GetLastWarning()) { throw new ESphinxException($error); } if (!is_array($sph)) { throw new ESphinxException("Sphinx client returns result not array"); } $results = array(); foreach ($sph as $result) { if (isset($result['error']) && strlen($result['error'])) { throw new ESphinxException($result['error']); } $results[] = new ESphinxResult($result); } return $results; }
} if ($select) { $cl->SetSelect($select); } if ($limit) { $cl->SetLimits(0, $limit, $limit > 1000 ? $limit : 1000); } $cl->SetRankingMode($ranker); $res = $cl->Query($q, $index); //////////////// // print me out //////////////// if ($res === false) { print "Query failed: " . $cl->GetLastError() . ".\n"; } else { if ($cl->GetLastWarning()) { print "WARNING: " . $cl->GetLastWarning() . "\n\n"; } print "Query '{$q}' retrieved {$res['total']} of {$res['total_found']} matches in {$res['time']} sec.\n"; print "Query stats:\n"; if (is_array($res["words"])) { foreach ($res["words"] as $word => $info) { print " '{$word}' found {$info['hits']} times in {$info['docs']} documents\n"; } } print "\n"; if (is_array($res["matches"])) { $n = 1; print "Matches:\n"; foreach ($res["matches"] as $docinfo) { print "{$n}. doc_id={$docinfo['id']}, weight={$docinfo['weight']}";
if ($forum = intval(trim($forum))) { $sp_fids[] = $forum; } } !empty($sp_fids) && $cl->setfilter('fid', $sp_fids); } //分页 $sp_page = isset($page) ? abs((int) $page) : 1; $sp_page = max(1, $sp_page); $sp_perpage = 10; $sp_start = ($page - 1) * $sp_perpage; $sp_total_result = 100; $cl->setlimits($sp_start, $sp_perpage); $res = $cl->query($sp_keyword, $sp_index); //* 显示sphinx错误信息,用于调试 if ($cl->GetLastWarning()) { var_dump($res); die("WARNING: " . $cl->GetLastWarning() . "\n\n"); } //*/ if (empty($res['matches'])) { include template('search_threads'); exit; } //分页 $query_string = 'search.php?' . preg_replace('|&page=\\d+|', '', $_SERVER['QUERY_STRING']); $multipage = multi($res['total'], $sp_perpage, $sp_page, $query_string); //从数据获取信息 $sp_res_keys = array_keys($res['matches']); $sp_find_ids = join(',', $sp_res_keys); $sp_res_order = array_flip($sp_res_keys);
public function run($subject_id, $clean = true, $query_offset = 0, $from, $to) { $this->load->helper('sphinxapi'); $this->load->helper('mood'); // skip if matching_status is "matching" $matching_status = $this->custom_model->get_value('subject', 'matching_status', $subject_id); if ($matching_status == 'matching') { echo "subject is matching"; return false; } // flag subject as matching.. do other bot runs this queue. //$this->db->update('subject',array('matching_status'=>'matching'),array('id'=>$subject_id)); // clear all match record for this subject $config['hostname'] = "192.168.1.102"; $config['username'] = "******"; $config['password'] = "******"; $config['database'] = "thothconnect"; $config['dbdriver'] = "mysql"; $config['dbprefix'] = ""; $config['pconnect'] = FALSE; $config['db_debug'] = TRUE; $config['cache_on'] = FALSE; $config['cachedir'] = ""; $config['char_set'] = "utf8"; $config['dbcollat'] = "utf8_general_ci"; $thothconnect_db = $this->load->database($config, true); $query = $this->db->query("SELECT client_id FROM subject WHERE id = " . $subject_id); $row = $query->row(); $client_id = $row->client_id; if ($clean) { $thothconnect_db->delete('website_c' . $client_id, array('subject_id' => $subject_id)); $thothconnect_db->delete('twitter_c' . $client_id, array('subject_id' => $subject_id)); $thothconnect_db->delete('facebook_c' . $client_id, array('subject_id' => $subject_id)); } // // begin re-matching this subject // // get search string from subject_id $query = $this->custom_model->get_value('subject', 'query', $subject_id); // sphinx init $cl = new SphinxClient(); $q = $query; $sql = ""; $mode = SPH_MATCH_EXTENDED; $host = "192.168.1.102"; $port = 9312; $index = "*"; $groupby = ""; $groupsort = "@group desc"; $filter = "group_id"; $filtervals = array(); $distinct = ""; $sortby = "@id ASC"; $sortexpr = ""; $offset = $query_offset; $limit = 1000000; $ranker = SPH_RANK_PROXIMITY_BM25; $select = ""; echo 'limit=' . $limit . ' offset=' . $offset . PHP_EOL; //Extract subject keyword from search string $keywords = get_keywords($q); //////////// // do query //////////// $cl->SetServer($host, $port); $cl->SetConnectTimeout(1); $cl->SetArrayResult(true); $cl->SetWeights(array(100, 1)); $cl->SetMatchMode($mode); // if ( count($filtervals) ) $cl->SetFilter ( $filter, $filtervals ); // if ( $groupby ) $cl->SetGroupBy ( $groupby, SPH_GROUPBY_ATTR, $groupsort ); if ($sortby) { $cl->SetSortMode(SPH_SORT_EXTENDED, $sortby); } // if ( $sortexpr ) $cl->SetSortMode ( SPH_SORT_EXPR, $sortexpr ); if ($distinct) { $cl->SetGroupDistinct($distinct); } if ($select) { $cl->SetSelect($select); } if ($limit) { $cl->SetLimits(0, $limit, $limit > 1000000 ? $limit : 1000000); } $cl->SetRankingMode($ranker); $res = $cl->Query($q, $index); //$res = true; //////////// // do Insert to DB //////////// // Current matching $current_matching = array(); /*$query_matchs = $this->db->get_where('matchs',array('subject_id'=>$subject_id)); if($query_matchs->num_rows() > 0) { echo PHP_EOL.'currents matching :'.$query_matchs->num_rows(); foreach($query_matchs->result() as $match) { $current_matching[] = $match->post_id; } }*/ // set matching date range from-to $from = strtotime($from); $to = strtotime($to); // Search and Update if ($res === false) { echo "Query failed: " . $cl->GetLastError() . ".\n"; } else { if ($cl->GetLastWarning()) { echo "WARNING: " . $cl->GetLastWarning() . "\n\n"; } echo "Query '{$q}' \nretrieved {$res['total']} of {$res['total_found']} matches in {$res['time']} sec.\n"; if ($res['total'] == 0) { echo "no result<br/>\n"; } else { if ($res['total'] > $limit + $offset) { $this->run($subject_id, $limit + $offset); } else { echo "Updating..."; foreach ($res["matches"] as $k => $docinfo) { // echo '('.$k.')'.$docinfo["id"]." "; // Reset PHP Timeout to 1min // if found in $current_matching then skip if (in_array($docinfo["id"], $current_matching)) { continue; } else { // else insert new match set_time_limit(60); $post = new Post_model(); $post->init($docinfo["id"]); // if post_date is our of range then skip $post_date = strtotime($post->post_date); if ($post_date < $from || $post_date > $to) { continue; } $mood = get_mood($post->body, $keywords); //----------------------------------------------------- $subject = $post->get_subject($subject_id); //print_r($subject); if ($post->type == "post" || $post->type == "comment") { $postData = $post->get_post_website($post->id); if ($postData != null) { $data = array(); $data["post_id"] = $postData->post_id; $data["post_date"] = $postData->post_date; $data["title"] = $postData->title; $data["body"] = $postData->body; $data["type"] = $postData->type; $data["author_id"] = $postData->author_id; $data["author"] = $postData->author; $data["website_id"] = $postData->website_id; $data["website_name"] = $postData->website_name; $data["website_cate_id"] = $postData->website_cate_id; $data["website_cate"] = $postData->website_cate; $data["website_type_id"] = $postData->website_type_id; $data["website_type"] = $postData->website_type; $data["group_id"] = $subject->group_id; $data["group"] = $subject->group; $data["url"] = substr($postData->root_url, 0, -1) . "" . $postData->url; $data["page_id"] = $postData->page_id; $data["subject_id"] = $subject->subject_id; $data["subject_name"] = $subject->subject_name; $data["mood"] = $mood; $data["mood_by"] = 'system'; $thothconnect_db->insert("website_c" . $subject->client_id, $data); $post->insert_post_comment($postData->page_id, $subject->client_id, $thothconnect_db); } } else { if ($post->type == "tweet" || $post->type == "retweet") { $postData = $post->get_post_twitter($post->id); if ($postData != null) { $data = array(); $data["post_id"] = $postData->post_id; $data["post_date"] = $postData->post_date; $data["body"] = $postData->body; $data["type"] = $postData->type; $data["author_id"] = $postData->author_id; $data["author"] = $postData->author; $data["group_id"] = $subject->group_id; $data["group"] = $subject->group; $data["tweet_id"] = $postData->tweet_id; $data["subject_id"] = $subject->subject_id; $data["subject_name"] = $subject->subject_name; $data["mood"] = $mood; $data["mood_by"] = 'system'; $thothconnect_db->insert("twitter_c" . $subject->client_id, $data); } } else { if ($post->type == "fb_post" || $post->type == "fb_comment") { $postData = $post->get_post_facebook($post->id); if ($postData != null) { $data = array(); $data["post_id"] = $postData->post_id; $data["post_date"] = $postData->post_date; $data["body"] = $postData->body; $data["type"] = $postData->type; $data["author_id"] = $postData->author_id; $data["author"] = $postData->author; $data["group_id"] = $subject->group_id; $data["group"] = $subject->group; $data["facebook_page_id"] = $postData->facebook_page_id; $data["facebook_page_name"] = $postData->facebook_page_name; $data["subject_id"] = $subject->subject_id; $data["subject_name"] = $subject->subject_name; $data["facebook_id"] = $postData->facebook_id; $data["parent_post_id"] = $postData->parent_post_id; $data["likes"] = $postData->likes; $data["shares"] = $postData->shares; $data["mood"] = $mood; $data["mood_by"] = 'system'; $thothconnect_db->insert("facebook_c" . $subject->client_id, $data); } } } } /* $data = array( 'post_id'=> $post->id, 'subject_id' => $subject_id , 'matching_date' => null, 'sentiment' => $mood, 'by' => 'system', 'system_correct' => $mood, 'system_correct_date' => mdate('%Y-%m-%d %H:%i',time()) ); $this->db->insert('matchs',$data); */ //--------------------------------------- } } } } } // flag subject as update.. $data = array('matching_status' => 'update', 'latest_matching' => mdate('%Y-%m-%d %H:%i:%s', time()), 'from' => mdate('%Y-%m-%d %H:%i:%s', $from), 'to' => mdate('%Y-%m-%d %H:%i:%s', $to)); $this->db->update('subject', $data, array('id' => $subject_id)); }
function getLastWarning() { return $this->sphinx->GetLastWarning(); }
/** * Return last warning * @return string */ public function getWarning() { return $this->client->GetLastWarning(); }
private function doSearch($q, $page) { global $wgOut; $mode = SPH_MATCH_ALL; $index = 'suggested_titles'; $page_size = 20; $limit = 1000; $ranker = SPH_RANK_PROXIMITY_BM25; $host = 'localhost'; $port = 9312; $cl = new SphinxClient(); $cl->SetServer($host, $port); //$cl->SetConnectTimeout(1); $cl->SetSortMode(SPH_SORT_RELEVANCE); $cl->SetArrayResult(true); $cl->SetWeights(array('wst_title' => 5, 'wst_text' => 2)); $cl->SetMatchMode($mode); $cl->SetRankingMode($ranker); $cl->SetLimits(($page - 1) * $page_size, $page_size, $limit); // don't search w/ leading "how to" if user added it $q_prime = preg_replace('@^\\s*how\\s+to\\s+@i', '', $q); $res = $cl->Query($q_prime, $index); $error = $res === false ? $cl->GetLastError() : ''; $warning = $cl->GetLastWarning(); /*$spelling = $this->getSpellingInfo($q); if ($spelling) { $res['spelling'] = $spelling; } else { $res['spelling'] = ''; }*/ if (count($res['matches']) > 0) { $titles = $this->getInfo($res['matches']); $keys = array_keys($titles); $excerpts = $cl->BuildExcerpts($titles, 'suggested_titles', $q); foreach ($excerpts as $i => $excerpt) { $excerpts[$keys[$i]] = $excerpt; unset($excerpts[$i]); } foreach ($res['matches'] as $i => &$docinfo) { $id = $docinfo['id']; $docinfo['attrs']['excerpt'] = $excerpts[$id]; } } else { $error = wfMsg('search-keywords-not-found', $q); } // construct paging bar $total = (int) ceil(1.0 * $res['total_found'] / $page_size); $paging = array(); if ($page > 1) { $paging[] = 'prev'; } if ($page > 1) { $paging[] = 1; } if ($page >= 5) { $paging[] = '...'; } if ($page >= 4) { $paging[] = $page - 2; } if ($page >= 3) { $paging[] = $page - 1; } $paging[] = $page; if ($page < $total) { $paging[] = $page + 1; } if ($page + 1 < $total) { $paging[] = $page + 2; } if ($page + 2 < $total) { $paging[] = '...'; } if ($page < $total) { $paging[] = 'next'; } $vars = array('results' => $res, 'q' => $q, 'error' => $error, 'warning' => $warning, 'page' => $page, 'page_size' => $page_size, 'paging' => $paging); return $vars; }
<?php /** * User: yunsong * Date: 3/12/16 * Time: 10:46 AM * Email:awebc@qq.com */ require "../../common/components/SphinxClient.php"; $sphinx = new SphinxClient(); $sphinx->SetServer("127.0.0.1", 9312); // $sphinx->SetMatchMode(6);// SPH_MATCH_EXTENDED2 $sphinx->SetSelect("id"); $sphinx->SetArrayResult(true); $sphinx->SetLimits(0, 20); $res = $sphinx->Query($query, "feeds"); echo '<pre>'; print_r($res['matches']); print_r($res); print_r($sphinx->GetLastError()); print_r($sphinx->GetLastWarning()); echo '</pre>'; echo 1;
function sphinx_search_action($arrSearch) { global $PHORUM; // No pecl class, try php version if (!class_exists('SphinxClient')) { // loads from php include_path require_once 'sphinxapi.php'; } // these are the index-names set in sphinx.conf - one for searching messages, the other for searching by authors only // both contain an additional index for the deltas - changes done after the last full reindex $index_name_msg = 'phorum5_msg_d phorum5_msg'; $index_name_author = 'phorum5_author phorum5_author_d'; // excerpts_index is just one index as that function only accepts one, it used for determining charsets / mapping tables, nothing more $excerpts_index = 'phorum5_msg'; $index = $index_name_msg; if ($arrSearch['match_type'] == 'ALL') { $match_mode = SPH_MATCH_ALL; } elseif ($arrSearch['match_type'] == 'ANY') { $match_mode = SPH_MATCH_ANY; } elseif ($arrSearch['match_type'] == 'PHRASE') { $match_mode = SPH_MATCH_PHRASE; } elseif ($arrSearch['match_type'] == 'AUTHOR') { $match_mode = SPH_MATCH_PHRASE; $index = $index_name_author; } else { // Return search control to Phorum in case the search type isn't handled by the module. return $arrSearch; } if (empty($arrSearch['search']) && !empty($arrSearch['author'])) { $arrSearch['search'] = $arrSearch['author']; $index = $index_name_author; } $sphinx = new SphinxClient(); $sphinx->SetServer($PHORUM['mod_sphinx_search']['hostname'], $PHORUM['mod_sphinx_search']['port']); $sphinx->SetMatchMode($match_mode); // set the limits for paging $sphinx->SetLimits($arrSearch['offset'], $arrSearch['length']); // set the timeframe to search if ($arrSearch['match_dates'] > 0) { $min_ts = time() - 86400 * $arrSearch['match_dates']; $max_ts = time(); $sphinx->SetFilterRange('datestamp', $min_ts, $max_ts); } // Check what forums the active Phorum user can read. $allowed_forums = phorum_api_user_check_access(PHORUM_USER_ALLOW_READ, PHORUM_ACCESS_LIST); // If the user is not allowed to search any forum or the current // active forum, then return the emtpy search results array. if (empty($allowed_forums) || $PHORUM['forum_id'] > 0 && !in_array($PHORUM['forum_id'], $allowed_forums)) { $arrSearch['results'] = array(); $arrSearch['totals'] = 0; $arrSearch['continue'] = 0; $arrSearch['raw_body'] = 1; return $arrSearch; } // Prepare forum_id restriction. $search_forums = array(); foreach (explode(',', $arrSearch['match_forum']) as $forum_id) { if ($forum_id == 'ALL') { $search_forums = $allowed_forums; break; } if (isset($allowed_forums[$forum_id])) { $search_forums[] = $forum_id; } } $sphinx->SetFilter('forum_id', $search_forums); // set the sort-mode $sphinx->SetSortMode(SPH_SORT_ATTR_DESC, 'datestamp'); // do the actual query $results = $sphinx->Query($arrSearch['search'], $index); $res = $sphinx->GetLastWarning(); if ($res) { error_log("sphinx_search.php: WARNING: {$res}"); } $res = $sphinx->GetLastError(); if ($res) { error_log("sphinx_search.php: ERROR: {$res}"); } // if no messages were found, then return empty handed. if (!isset($results['matches'])) { $arrSearch['results'] = array(); $arrSearch['totals'] = 0; $arrSearch['continue'] = 0; $arrSearch['raw_body'] = 1; return $arrSearch; } $search_msg_ids = $results['matches']; // get the messages we found $found_messages = phorum_db_get_message(array_keys($search_msg_ids), 'message_id', true); // sort them in reverse order of the message_id to automagically sort them by date desc this way krsort($found_messages); reset($found_messages); // prepare the array for building highlighted excerpts $docs = array(); foreach ($found_messages as $id => $data) { // remove hidden text in the output - only added by the hidden_msg module $data['body'] = preg_replace("/(\\[hide=([\\#a-z0-9]+?)\\](.+?)\\[\\/hide\\])/is", '', $data['body']); $docs[] = htmlspecialchars(phorum_strip_body($data['body'])); } $words = ''; if (!empty($results['words'])) { $words = implode(' ', array_keys($results['words'])); } $opts = array('chunk_separator' => ' [...] '); // build highlighted excerpts $highlighted = $sphinx->BuildExcerpts($docs, $excerpts_index, $words, $opts); $res = $sphinx->GetLastWarning(); if ($res) { error_log("sphinx_search.php: WARNING: {$res}"); } $res = $sphinx->GetLastError(); if ($res) { error_log("sphinx_search.php: ERROR: {$res}"); } $cnt = 0; foreach ($found_messages as $id => $content) { $found_messages[$id]['short_body'] = $highlighted[$cnt]; $cnt++; } $arrSearch['results'] = $found_messages; // we need the total results $arrSearch['totals'] = $results['total_found']; if ($arrSearch['totals'] > 1000) { $arrSearch['totals'] = 1000; } // don't run the default search $arrSearch['continue'] = 0; // tell it to leave the body alone $arrSearch['raw_body'] = 1; return $arrSearch; }
public function __construct($rowsPerPage, $currentPage, $siteID, $wildCardString, $sortBy, $sortDirection) { $this->_db = DatabaseConnection::getInstance(); $this->_siteID = $siteID; $this->_sortByFields = array('firstName', 'lastName', 'city', 'state', 'dateModifiedSort', 'dateCreatedSort', 'ownerSort'); if (ENABLE_SPHINX) { /* Sphinx API likes to throw PHP errors *AND* use it's own error * handling. */ assert_options(ASSERT_WARNING, 0); $sphinx = new SphinxClient(); $sphinx->SetServer(SPHINX_HOST, SPHINX_PORT); $sphinx->SetWeights(array(0, 100, 0, 0, 50)); $sphinx->SetMatchMode(SPH_MATCH_EXTENDED); $sphinx->SetLimits(0, 1000); $sphinx->SetSortMode(SPH_SORT_TIME_SEGMENTS, 'date_added'); // FIXME: This can be sped up a bit by actually grouping ranges of // site IDs into their own index's. Maybe every 500 or so at // least on the Hosted system. $sphinx->SetFilter('site_id', array($this->_siteID)); /* Create the Sphinx query string. */ $wildCardString = DatabaseSearch::humanToSphinxBoolean($wildCardString); /* Execute the Sphinx query. Sphinx can ask us to retry if its * maxed out. Retry up to 5 times. */ $tries = 0; do { /* Wait for one second if this isn't out first attempt. */ if (++$tries > 1) { sleep(1); } $results = $sphinx->Query($wildCardString, SPHINX_INDEX); $errorMessage = $sphinx->GetLastError(); } while ($results === false && strpos($errorMessage, 'server maxed out, retry') !== false && $tries <= 5); /* Throw a fatal error if Sphinx errors occurred. */ if ($results === false) { $this->fatal('Sphinx Error: ' . ucfirst($errorMessage) . '.'); } /* Throw a fatal error (for now) if Sphinx warnings occurred. */ $lastWarning = $sphinx->GetLastWarning(); if (!empty($lastWarning)) { // FIXME: Just display a warning, and notify dev team. $this->fatal('Sphinx Warning: ' . ucfirst($lastWarning) . '.'); } /* Show warnings for assert()s again. */ assert_options(ASSERT_WARNING, 1); if (empty($results['matches'])) { $this->_WHERE = '0'; } else { $attachmentIDs = implode(',', array_keys($results['matches'])); $this->_WHERE = 'attachment.attachment_id IN(' . $attachmentIDs . ')'; } } else { $tmp = DatabaseSearch::makeBooleanSQLWhere(DatabaseSearch::fulltextEncode($wildCardString), $this->_db, 'attachment.text'); $this->_WHERE = str_replace(") (", ") AND (", $tmp); /*trace($tmp); $matches=array(); preg_match_all('/"(?:\\\\.|[^\\\\"])*"|\S+/', $wildCardString, $matches); $arrWhere=array(); $arrNew=array(); for($m=0;$m<count($matches[0]);$m++) { $match=$matches[0][$m]; if($match[0]=='"') { $arrNew[]=$match; } else { $tmp=""; for($n=$m;$n<count($matches[0]);$n++) { $match=$matches[0][$n]; if($match[0]=='"' || strtolower($match)=="and" || strtolower($match)=="or") { $m=$n-1; break; } if($tmp==="") { $tmp=$match; } else { $tmp=$tmp." ".$match; } } $tmp='"'.$tmp.'"'; $arrNew[]=$tmp; } } foreach($arrNew as $match) { $arrWhere[] = DatabaseSearch::makeBooleanSQLWhere( DatabaseSearch::fulltextEncode($match), $this->_db, 'attachment.text' ); } $this->_WHERE=implode(" AND ",$arrWhere);*/ } $sbase = new SearchBase(); $arrFilter = $sbase->buildFilter(); $filter = $arrFilter["where"]; $column = $arrFilter["extra_column"]; $join = $arrFilter["extra_join"]; $isSearchAttachment = isset($_REQUEST["bulk_resume"]) && $_REQUEST["bulk_resume"]; if ($isSearchAttachment) { /* How many companies do we have? */ $sql = sprintf("SELECT\r\n count(*) AS count\r\n FROM\r\n attachment\r\n LEFT JOIN candidate\r\n ON attachment.data_item_id = candidate.candidate_id\r\n AND attachment.site_id = candidate.site_id\r\n LEFT JOIN user AS owner_user\r\n ON candidate.owner = owner_user.user_id\r\n %s\r\n WHERE\r\n resume = 1\r\n AND\r\n %s\r\n AND\r\n (attachment.data_item_type = %s OR attachment.data_item_type = %s)\r\n AND\r\n attachment.site_id = %s\r\n AND\r\n (ISNULL(candidate.is_admin_hidden) OR (candidate.is_admin_hidden = 0))\r\n AND\r\n (ISNULL(candidate.is_active) OR (candidate.is_active = 1))\r\n %s\r\n", $join, $this->_WHERE, DATA_ITEM_CANDIDATE, DATA_ITEM_BULKRESUME, $this->_siteID, $filter); } else { $sql = sprintf("SELECT\r\n count(*) AS count\r\n FROM\r\n attachment\r\n LEFT JOIN candidate\r\n ON attachment.data_item_id = candidate.candidate_id\r\n AND attachment.site_id = candidate.site_id\r\n LEFT JOIN user AS owner_user\r\n ON candidate.owner = owner_user.user_id\r\n %s\r\n WHERE\r\n resume = 1\r\n AND\r\n %s\r\n AND\r\n (attachment.data_item_type = %s)\r\n AND\r\n attachment.site_id = %s\r\n AND\r\n (ISNULL(candidate.is_admin_hidden) OR (candidate.is_admin_hidden = 0))\r\n AND\r\n (ISNULL(candidate.is_active) OR (candidate.is_active = 1))\r\n %s\r\n", $join, $this->_WHERE, DATA_ITEM_CANDIDATE, $this->_siteID, $filter); } $rs = $this->_db->getAssoc($sql); /* Pass "Search By Resume"-specific parameters to Pager constructor. */ parent::__construct($rs['count'], $rowsPerPage, $currentPage); }
public function run($subject_id, $clean = true, $query_offset = 0, $from, $to) { $this->load->helper('sphinxapi'); $this->load->helper('mood'); // skip if matching_status is "matching" $matching_status = $this->custom_model->get_value('subject', 'matching_status', $subject_id); if ($matching_status == 'matching') { echo "subject is matching"; return false; } // flag subject as matching.. do other bot runs this queue. $this->db->update('subject', array('matching_status' => 'matching'), array('id' => $subject_id)); // clear all match record for this subject if ($clean) { $this->db->delete('matchs', array('subject_id' => $subject_id)); } // // begin re-matching this subject // // get search string from subject_id $query = $this->custom_model->get_value('subject', 'query', $subject_id); // sphinx init $cl = new SphinxClient(); $q = $query; $sql = ""; $mode = SPH_MATCH_EXTENDED; $host = "192.168.1.102"; $port = 9312; $index = "*"; $groupby = ""; $groupsort = "@group desc"; $filter = "group_id"; $filtervals = array(); $distinct = ""; $sortby = "@id ASC"; $sortexpr = ""; $offset = $query_offset; $limit = 1000000; $ranker = SPH_RANK_PROXIMITY_BM25; $select = ""; echo 'limit=' . $limit . ' offset=' . $offset . PHP_EOL; //Extract subject keyword from search string $keywords = get_keywords($q); //////////// // do query //////////// $cl->SetServer($host, $port); $cl->SetConnectTimeout(1); $cl->SetArrayResult(true); $cl->SetWeights(array(100, 1)); $cl->SetMatchMode($mode); // if ( count($filtervals) ) $cl->SetFilter ( $filter, $filtervals ); // if ( $groupby ) $cl->SetGroupBy ( $groupby, SPH_GROUPBY_ATTR, $groupsort ); if ($sortby) { $cl->SetSortMode(SPH_SORT_EXTENDED, $sortby); } // if ( $sortexpr ) $cl->SetSortMode ( SPH_SORT_EXPR, $sortexpr ); if ($distinct) { $cl->SetGroupDistinct($distinct); } if ($select) { $cl->SetSelect($select); } if ($limit) { $cl->SetLimits(0, $limit, $limit > 1000000 ? $limit : 1000000); } $cl->SetRankingMode($ranker); $res = $cl->Query($q, $index); //////////// // do Insert to DB //////////// // Current matching $current_matching = array(); $query_matchs = $this->db->get_where('matchs', array('subject_id' => $subject_id)); if ($query_matchs->num_rows() > 0) { echo PHP_EOL . 'currents matching :' . $query_matchs->num_rows(); foreach ($query_matchs->result() as $match) { $current_matching[] = $match->post_id; } } // set matching date range from-to $from = strtotime($from); $to = strtotime($to); // Search and Update if ($res === false) { echo "Query failed: " . $cl->GetLastError() . ".\n"; } else { if ($cl->GetLastWarning()) { echo "WARNING: " . $cl->GetLastWarning() . "\n\n"; } echo "Query '{$q}' \nretrieved {$res['total']} of {$res['total_found']} matches in {$res['time']} sec.\n"; if ($res['total'] == 0) { echo "no result<br/>\n"; } else { if ($res['total'] > $limit + $offset) { $this->run($subject_id, $limit + $offset); } else { echo "Updating..."; foreach ($res["matches"] as $k => $docinfo) { // echo '('.$k.')'.$docinfo["id"]." "; // Reset PHP Timeout to 1min // if found in $current_matching then skip if (in_array($docinfo["id"], $current_matching)) { continue; } else { // else insert new match set_time_limit(60); $post = new Post_model(); $post->init($docinfo["id"]); // if post_date is our of range then skip $post_date = strtotime($post->post_date); if ($post_date < $from || $post_date > $to) { continue; } $mood = get_mood($post->body, $keywords); $data = array('post_id' => $post->id, 'subject_id' => $subject_id, 'matching_date' => null, 'sentiment' => $mood, 'by' => 'system', 'system_correct' => $mood, 'system_correct_date' => mdate('%Y-%m-%d %H:%i', time())); $this->db->insert('matchs', $data); } } } } } // flag subject as update.. $data = array('matching_status' => 'update', 'latest_matching' => mdate('%Y-%m-%d %H:%i:%s', time()), 'from' => mdate('%Y-%m-%d %H:%i:%s', $from), 'to' => mdate('%Y-%m-%d %H:%i:%s', $to), 'bot_id' => 0); $this->db->update('subject', $data, array('id' => $subject_id)); }
<?php require 'sphinxapi.php'; $cl = new SphinxClient(); $cl->SetServer('localhost'); $cl->SetConnectTimeout(5); $cl->SetMatchMode(SPH_MATCH_ANY); $cl->SetLimits(0, 25, 1000); $cl->SetArrayResult(true); $result = $cl->Query('Codeine'); if ($result === false) { echo "Query failed: " . $cl->GetLastError() . ".\n"; } else { if ($cl->GetLastWarning()) { echo "WARNING: " . $cl->GetLastWarning(); } print '<pre>'; // if(!empty($result["matches"])){ // print '<pre>'; // foreach ( $result["matches"] as $doc => $docinfo ) { print_r($result["matches"]); // } // } }
public function __construct($rowsPerPage, $currentPage, $siteID, $wildCardString, $sortBy, $sortDirection) { $this->_db = DatabaseConnection::getInstance(); $this->_siteID = $siteID; $this->_sortByFields = array('firstName', 'lastName', 'city', 'state', 'dateModifiedSort', 'dateCreatedSort', 'ownerSort'); if (ENABLE_SPHINX) { /* Sphinx API likes to throw PHP errors *AND* use it's own error * handling. */ assert_options(ASSERT_WARNING, 0); $sphinx = new SphinxClient(); $sphinx->SetServer(SPHINX_HOST, SPHINX_PORT); $sphinx->SetWeights(array(0, 100, 0, 0, 50)); $sphinx->SetMatchMode(SPH_MATCH_EXTENDED); $sphinx->SetLimits(0, 1000); $sphinx->SetSortMode(SPH_SORT_TIME_SEGMENTS, 'date_added'); // FIXME: This can be sped up a bit by actually grouping ranges of // site IDs into their own index's. Maybe every 500 or so at // least on the Hosted system. $sphinx->SetFilter('site_id', array($this->_siteID)); /* Create the Sphinx query string. */ $wildCardString = DatabaseSearch::humanToSphinxBoolean($wildCardString); /* Execute the Sphinx query. Sphinx can ask us to retry if its * maxed out. Retry up to 5 times. */ $tries = 0; do { /* Wait for one second if this isn't out first attempt. */ if (++$tries > 1) { sleep(1); } $results = $sphinx->Query($wildCardString, SPHINX_INDEX); $errorMessage = $sphinx->GetLastError(); } while ($results === false && strpos($errorMessage, 'server maxed out, retry') !== false && $tries <= 5); /* Throw a fatal error if Sphinx errors occurred. */ if ($results === false) { $this->fatal('Sphinx Error: ' . ucfirst($errorMessage) . '.'); } /* Throw a fatal error (for now) if Sphinx warnings occurred. */ $lastWarning = $sphinx->GetLastWarning(); if (!empty($lastWarning)) { // FIXME: Just display a warning, and notify dev team. $this->fatal('Sphinx Warning: ' . ucfirst($lastWarning) . '.'); } /* Show warnings for assert()s again. */ assert_options(ASSERT_WARNING, 1); if (empty($results['matches'])) { $this->_WHERE = '0'; } else { $attachmentIDs = implode(',', array_keys($results['matches'])); $this->_WHERE = 'attachment.attachment_id IN(' . $attachmentIDs . ')'; } } else { $this->_WHERE = DatabaseSearch::makeBooleanSQLWhere(DatabaseSearch::fulltextEncode($wildCardString), $this->_db, 'attachment.text'); } /* How many companies do we have? */ $sql = sprintf("SELECT\n COUNT(*) AS count\n FROM\n attachment\n LEFT JOIN candidate\n ON attachment.data_item_id = candidate.candidate_id\n AND attachment.data_item_type = %s\n AND attachment.site_id = candidate.site_id\n LEFT JOIN user AS owner_user\n ON candidate.owner = owner_user.user_id\n WHERE\n resume = 1\n AND\n %s\n AND\n (ISNULL(candidate.is_admin_hidden) OR (candidate.is_admin_hidden = 0))\n AND\n (ISNULL(candidate.is_active) OR (candidate.is_active = 1))\n AND\n attachment.site_id = %s", DATA_ITEM_CANDIDATE, $this->_WHERE, $this->_siteID); $rs = $this->_db->getAssoc($sql); /* Pass "Search By Resume"-specific parameters to Pager constructor. */ parent::__construct($rs['count'], $rowsPerPage, $currentPage); }
$sphinx->SetServer(SPHINX_HOST, SPHINX_PORT); $sphinx->SetWeights(array(0, 100, 0, 0, 50)); $sphinx->SetMatchMode(SPH_MATCH_BOOLEAN); $sphinx->SetLimits(0, 10); $sphinx->SetSortMode(SPH_SORT_TIME_SEGMENTS, 'date_added'); $sphinx->SetFilter('site_id', TEST_SITE_ID); /* Execute the Sphinx query. Sphinx can ask us to retry if its * maxed out. Retry up to 5 times. */ $tries = 0; do { /* Wait for one second if this isn't out first attempt. */ if (++$tries > 1) { sleep(1); } $results = $sphinx->Query(TEST_QUERY, SPHINX_INDEX); $errorMessage = $sphinx->GetLastError(); } while ($results === false && strpos($errorMessage, 'server maxed out, retry') !== false && $tries <= 5); /* Throw a fatal error if Sphinx errors occurred. */ if ($results === false) { fwrite($stderr, 'Sphinx Error: ' . ucfirst($errorMessage) . ".\n"); exit(1); } /* Throw a fatal error (for now) if Sphinx warnings occurred. */ $lastWarning = $sphinx->GetLastWarning(); if (!empty($lastWarning)) { fwrite($stderr, 'Sphinx Warning: ' . ucfirst($lastWarning) . ".\n"); exit(1); } fwrite($stdout, "Sphinx appears to be working properly.\n"); exit(0);
/** * RunQueries() + validate. * * - Single Query: Resultados da Query * * - Multi Query: Array de Resultados das Querys * * Formato de cada Resultado: * * <code> * //Results * array( * array( * 'id' => 12345, * 'weight' => 30, * 'attrs' => array(...) * ), * array( * 'id' => 23456, * 'weight' => 20, * 'attrs' => array(...) * ), * ... * ); * </code> * * @param \SphinxClient $sphinxClient * * @throws \Exception * * @return array */ protected function getResult(\SphinxClient $sphinxClient) { $result = $sphinxClient->RunQueries(); if (false === $result) { throw new \Exception($sphinxClient->getLastError()); } if ($sphinxClient->GetLastWarning()) { throw new \Exception($sphinxClient->GetLastWarning()); } if (false === $result) { throw new \Exception($sphinxClient->getLastError()); } if ($sphinxClient->GetLastWarning()) { throw new \Exception($sphinxClient->GetLastWarning()); } //Suporte ao formato inicial de unica query if (count($result) === 1) { return current($result); } return $result; }