public function mutipleSearchWithElasticSearch($searchedTerm, $selectedTerm)
 {
     if ($selectedTerm == "") {
         $selectedTerm = $searchedTerm;
     }
     function auto_fuzzy($term)
     {
         if (mb_strlen($term) <= 2) {
             $fuzzy_int = 0;
         } elseif (mb_strlen($term) < 10) {
             $fuzzy_int = 1;
         } elseif (mb_strlen($term) < 17) {
             $fuzzy_int = 2;
         } elseif (mb_strlen($term) <= 23) {
             $fuzzy_int = 3;
         } else {
             $fuzzy_int = 4;
         }
         return $fuzzy_int;
     }
     $terms = array();
     $finder = $this->container->get('fos_elastica.finder.shlml.word');
     // On explose le string d'entrée en tableau, en séparant les mots espacés par un espace
     $terms = explode(" ", $searchedTerm);
     // Je ne retiens que les trois premiers mots
     $terms = array_slice($terms, 0, 3);
     // Je trouve les mots "similaires" grâce à Elasticsearch
     $found = array();
     for ($i = 0; $i < sizeof($terms); $i++) {
         $query = new \Elastica\Query\Fuzzy();
         $query->setField("content", $terms[$i]);
         $fuzzy_int = auto_fuzzy($terms[$i]);
         $query->setFieldOption("fuzziness", $fuzzy_int);
         array_push($found, $finder->find($query, 10000000));
     }
     // Choisir le type de recherche en fonction du nombre de combinaisons pour éviter l'explosion combinatoire
     $nb_combi = 1;
     foreach ($found as $tab) {
         $nb_combi = $nb_combi * sizeof($tab);
     }
     // J'effectue toutes les combinaisons possibles
     $combinations = array();
     if ($nb_combi < 1000) {
         for ($i = 0; $i < sizeof($found[0]); $i++) {
             array_push($combinations, $found[0][$i]->getContent());
         }
         for ($i = 1; $i < sizeof($found); $i++) {
             $temp = array();
             for ($j = 0; $j < sizeof($found[$i]); $j++) {
                 for ($k = 0; $k < sizeof($combinations); $k++) {
                     array_push($temp, $combinations[$k] . " " . $found[$i][$j]->getContent());
                 }
             }
             $combinations = $temp;
         }
     } else {
         // Je ne fais varier que un seul mot
         for ($i = 0; $i < sizeof($found); $i++) {
             for ($j = 0; $j < sizeof($found[$i]); $j++) {
                 $temp = "";
                 for ($k = 0; $k < sizeof($found); $k++) {
                     if ($i != $k) {
                         $temp = $temp . " " . $terms[$k];
                     } else {
                         $temp = $temp . " " . $found[$k][$j]->getContent();
                     }
                 }
                 array_push($combinations, substr($temp, 1));
             }
         }
     }
     // On cherche les combinaisons qui sont effectivement dans les documents
     $finder = $this->container->get('fos_elastica.finder.shlml.document');
     $results = array();
     for ($i = 0; $i < sizeof($combinations); $i++) {
         $query = new \Elastica\Query\Match();
         $query->setFieldQuery('content', $combinations[$i]);
         $query->setFieldType('content', 'phrase');
         if ($combinations[$i] == $selectedTerm) {
             $docs = $finder->find($query, 100);
         } else {
             $docs = $finder->find($query, 1);
         }
         if ($docs != null) {
             $results[$combinations[$i]] = array();
             for ($j = 0; $j < sizeof($docs); $j++) {
                 if ($this->get('security.context')->isGranted('ROLE_USER') || $docs[$j]->getPublic()) {
                     array_push($results[$combinations[$i]], $docs[$j]->getPath());
                 }
             }
         }
     }
     $wordList = array_keys($results);
     $selectedDocList = null;
     //var_dump($results);
     if (empty($results)) {
         $selectedDocList = array("mot_introuvable.pdf");
         $wordList = array($searchedTerm);
         if ($searchedTerm != $selectedTerm) {
             array_push($wordList, $selectedTerm);
         }
     } else {
         if (array_key_exists($searchedTerm, $results)) {
             unset($wordList[array_search($searchedTerm, $wordList)]);
             array_unshift($wordList, $searchedTerm);
             $selectedDocList = $results[$searchedTerm];
         } else {
             array_unshift($wordList, $searchedTerm);
         }
         if (array_key_exists($selectedTerm, $results)) {
             $selectedDocList = $results[$selectedTerm];
         }
         if ($selectedDocList == null) {
             $selectedDocList = array("mot_introuvable.pdf");
         }
     }
     $res = array($selectedDocList, $wordList);
     return $res;
 }
Example #2
0
 /**
  * @param {Integer} libraryID
  * @param {String} searchText
  * @return {Array<String>|Boolean} An array of item keys, or FALSE if no results
  */
 public static function searchInLibrary($libraryID, $searchText)
 {
     // TEMP: For now, strip double-quotes and make everything a phrase search
     $searchText = str_replace('"', '', $searchText);
     $type = self::getReadType();
     $libraryFilter = new \Elastica\Filter\Term();
     $libraryFilter->setTerm("libraryID", $libraryID);
     $matchQuery = new \Elastica\Query\Match();
     $matchQuery->setFieldQuery('content', $searchText);
     $matchQuery->setFieldType('content', 'phrase');
     $matchQuery = new \Elastica\Query\Filtered($matchQuery, $libraryFilter);
     $start = microtime(true);
     $resultSet = $type->search($matchQuery, ['routing' => $libraryID]);
     StatsD::timing("elasticsearch.client.item_fulltext.search", (microtime(true) - $start) * 1000);
     if ($resultSet->getResponse()->hasError()) {
         throw new Exception($resultSet->getResponse()->getError());
     }
     $results = $resultSet->getResults();
     $keys = array();
     foreach ($results as $result) {
         $keys[] = explode("/", $result->getId())[1];
     }
     return $keys;
 }