public function run($subject_id, $clean = true, $query_offset = 0, $from, $to) { $this->load->helper('sphinxapi'); $this->load->helper('mood'); // skip if matching_status is "matching" $matching_status = $this->custom_model->get_value('subject', 'matching_status', $subject_id); if ($matching_status == 'matching') { echo "subject is matching"; return false; } // flag subject as matching.. do other bot runs this queue. //$this->db->update('subject',array('matching_status'=>'matching'),array('id'=>$subject_id)); // clear all match record for this subject $config['hostname'] = "192.168.1.102"; $config['username'] = "******"; $config['password'] = "******"; $config['database'] = "thothconnect"; $config['dbdriver'] = "mysql"; $config['dbprefix'] = ""; $config['pconnect'] = FALSE; $config['db_debug'] = TRUE; $config['cache_on'] = FALSE; $config['cachedir'] = ""; $config['char_set'] = "utf8"; $config['dbcollat'] = "utf8_general_ci"; $thothconnect_db = $this->load->database($config, true); $query = $this->db->query("SELECT client_id FROM subject WHERE id = " . $subject_id); $row = $query->row(); $client_id = $row->client_id; if ($clean) { $thothconnect_db->delete('website_c' . $client_id, array('subject_id' => $subject_id)); $thothconnect_db->delete('twitter_c' . $client_id, array('subject_id' => $subject_id)); $thothconnect_db->delete('facebook_c' . $client_id, array('subject_id' => $subject_id)); } // // begin re-matching this subject // // get search string from subject_id $query = $this->custom_model->get_value('subject', 'query', $subject_id); // sphinx init $cl = new SphinxClient(); $q = $query; $sql = ""; $mode = SPH_MATCH_EXTENDED; $host = "192.168.1.102"; $port = 9312; $index = "*"; $groupby = ""; $groupsort = "@group desc"; $filter = "group_id"; $filtervals = array(); $distinct = ""; $sortby = "@id ASC"; $sortexpr = ""; $offset = $query_offset; $limit = 1000000; $ranker = SPH_RANK_PROXIMITY_BM25; $select = ""; echo 'limit=' . $limit . ' offset=' . $offset . PHP_EOL; //Extract subject keyword from search string $keywords = get_keywords($q); //////////// // do query //////////// $cl->SetServer($host, $port); $cl->SetConnectTimeout(1); $cl->SetArrayResult(true); $cl->SetWeights(array(100, 1)); $cl->SetMatchMode($mode); // if ( count($filtervals) ) $cl->SetFilter ( $filter, $filtervals ); // if ( $groupby ) $cl->SetGroupBy ( $groupby, SPH_GROUPBY_ATTR, $groupsort ); if ($sortby) { $cl->SetSortMode(SPH_SORT_EXTENDED, $sortby); } // if ( $sortexpr ) $cl->SetSortMode ( SPH_SORT_EXPR, $sortexpr ); if ($distinct) { $cl->SetGroupDistinct($distinct); } if ($select) { $cl->SetSelect($select); } if ($limit) { $cl->SetLimits(0, $limit, $limit > 1000000 ? $limit : 1000000); } $cl->SetRankingMode($ranker); $res = $cl->Query($q, $index); //$res = true; //////////// // do Insert to DB //////////// // Current matching $current_matching = array(); /*$query_matchs = $this->db->get_where('matchs',array('subject_id'=>$subject_id)); if($query_matchs->num_rows() > 0) { echo PHP_EOL.'currents matching :'.$query_matchs->num_rows(); foreach($query_matchs->result() as $match) { $current_matching[] = $match->post_id; } }*/ // set matching date range from-to $from = strtotime($from); $to = strtotime($to); // Search and Update if ($res === false) { echo "Query failed: " . $cl->GetLastError() . ".\n"; } else { if ($cl->GetLastWarning()) { echo "WARNING: " . $cl->GetLastWarning() . "\n\n"; } echo "Query '{$q}' \nretrieved {$res['total']} of {$res['total_found']} matches in {$res['time']} sec.\n"; if ($res['total'] == 0) { echo "no result<br/>\n"; } else { if ($res['total'] > $limit + $offset) { $this->run($subject_id, $limit + $offset); } else { echo "Updating..."; foreach ($res["matches"] as $k => $docinfo) { // echo '('.$k.')'.$docinfo["id"]." "; // Reset PHP Timeout to 1min // if found in $current_matching then skip if (in_array($docinfo["id"], $current_matching)) { continue; } else { // else insert new match set_time_limit(60); $post = new Post_model(); $post->init($docinfo["id"]); // if post_date is our of range then skip $post_date = strtotime($post->post_date); if ($post_date < $from || $post_date > $to) { continue; } $mood = get_mood($post->body, $keywords); //----------------------------------------------------- $subject = $post->get_subject($subject_id); //print_r($subject); if ($post->type == "post" || $post->type == "comment") { $postData = $post->get_post_website($post->id); if ($postData != null) { $data = array(); $data["post_id"] = $postData->post_id; $data["post_date"] = $postData->post_date; $data["title"] = $postData->title; $data["body"] = $postData->body; $data["type"] = $postData->type; $data["author_id"] = $postData->author_id; $data["author"] = $postData->author; $data["website_id"] = $postData->website_id; $data["website_name"] = $postData->website_name; $data["website_cate_id"] = $postData->website_cate_id; $data["website_cate"] = $postData->website_cate; $data["website_type_id"] = $postData->website_type_id; $data["website_type"] = $postData->website_type; $data["group_id"] = $subject->group_id; $data["group"] = $subject->group; $data["url"] = substr($postData->root_url, 0, -1) . "" . $postData->url; $data["page_id"] = $postData->page_id; $data["subject_id"] = $subject->subject_id; $data["subject_name"] = $subject->subject_name; $data["mood"] = $mood; $data["mood_by"] = 'system'; $thothconnect_db->insert("website_c" . $subject->client_id, $data); $post->insert_post_comment($postData->page_id, $subject->client_id, $thothconnect_db); } } else { if ($post->type == "tweet" || $post->type == "retweet") { $postData = $post->get_post_twitter($post->id); if ($postData != null) { $data = array(); $data["post_id"] = $postData->post_id; $data["post_date"] = $postData->post_date; $data["body"] = $postData->body; $data["type"] = $postData->type; $data["author_id"] = $postData->author_id; $data["author"] = $postData->author; $data["group_id"] = $subject->group_id; $data["group"] = $subject->group; $data["tweet_id"] = $postData->tweet_id; $data["subject_id"] = $subject->subject_id; $data["subject_name"] = $subject->subject_name; $data["mood"] = $mood; $data["mood_by"] = 'system'; $thothconnect_db->insert("twitter_c" . $subject->client_id, $data); } } else { if ($post->type == "fb_post" || $post->type == "fb_comment") { $postData = $post->get_post_facebook($post->id); if ($postData != null) { $data = array(); $data["post_id"] = $postData->post_id; $data["post_date"] = $postData->post_date; $data["body"] = $postData->body; $data["type"] = $postData->type; $data["author_id"] = $postData->author_id; $data["author"] = $postData->author; $data["group_id"] = $subject->group_id; $data["group"] = $subject->group; $data["facebook_page_id"] = $postData->facebook_page_id; $data["facebook_page_name"] = $postData->facebook_page_name; $data["subject_id"] = $subject->subject_id; $data["subject_name"] = $subject->subject_name; $data["facebook_id"] = $postData->facebook_id; $data["parent_post_id"] = $postData->parent_post_id; $data["likes"] = $postData->likes; $data["shares"] = $postData->shares; $data["mood"] = $mood; $data["mood_by"] = 'system'; $thothconnect_db->insert("facebook_c" . $subject->client_id, $data); } } } } /* $data = array( 'post_id'=> $post->id, 'subject_id' => $subject_id , 'matching_date' => null, 'sentiment' => $mood, 'by' => 'system', 'system_correct' => $mood, 'system_correct_date' => mdate('%Y-%m-%d %H:%i',time()) ); $this->db->insert('matchs',$data); */ //--------------------------------------- } } } } } // flag subject as update.. $data = array('matching_status' => 'update', 'latest_matching' => mdate('%Y-%m-%d %H:%i:%s', time()), 'from' => mdate('%Y-%m-%d %H:%i:%s', $from), 'to' => mdate('%Y-%m-%d %H:%i:%s', $to)); $this->db->update('subject', $data, array('id' => $subject_id)); }