function save_term_to_index($term, $hits, $docs_to_add, $index) { global $conn; $docs_old = get_docs_from_index($term, $index); $hits += count($docs_old); foreach ($docs_to_add as $doc) { //check if the docs that we found appear already in the posting if (!in_array($doc, $docs_old)) { //if not, edit $docs_old[] = $doc; } } $index[$term]["docs"] = $docs_old; $index[$term]["hits"] = $hits; return $index; }
function search_words($q, $index) { global $total_docs; global $stopwords; global $operators_dictionary; global $has_quotes; $total_docs = get_docs_id(); $query_vars = explode(" ", $q); //for every search word the user types change to lowercase //check if surrounded with quotes //if true remove them and update flag //if false check they are in the stop words array foreach ($query_vars as $key => $value) { $token = strtolower($value); if (mb_substr($token, 0, 1) != '"' && mb_substr($token, -1) != '"') { $has_quotes = false; if (in_array($token, $stopwords)) { //in case of an operator command with a stop word $query_vars = array_diff($query_vars, array('and', 'or', 'not', 'And', 'Or', 'Not', 'OR', 'AND', 'NOT')); unset($query_vars[$key]); continue; } } else { $token = substr($token, 1, -1); $has_quotes = true; } $query_vars[$key] = $token; } //transform in the correct order for a regular expression and validity of parentheses $query_vars = convert_to_npr($query_vars); if (empty($query_vars)) { return array(); } $added_docs = array(); //this is the docs of the synonyms //foreach word in the search look up for the word and its synonyms foreach ($query_vars as $token) { if (!in_array($token, $operators_dictionary) || $has_quotes) { $synonyms = get_synonyms($token); //get all the synonyms word from a given term if ($synonyms != NULL) { //adds the synonyms words doc id from the current word foreach ($synonyms as $synonym) { $added_docs[] = get_docs_from_index($synonym, $index); } } //adds to temp index the id of the docs which the word given appear in $temp_index[] = array("term" => $token, "docs" => get_docs_from_index($token, $index)); } else { $temp_index[] = array("term" => $token, "docs" => null); } } //create the expression tree in which every intersection represent a boolean operator //and every leaf represents a word $root = create_tree(new ArrayIterator($temp_index)); $result = $root->evaluate(); //adds the synonyms docs for final search foreach ($added_docs as $doc) { foreach ($doc as $d) { $result[] = $d; } } $docs = array(); foreach ($result as $id) { $docs[$id] = get_doc_details($id); } return $docs; }