public static function searchFullText($words, $hasDiacritics)
 {
     $intersection = null;
     $matchingLexems = array();
     foreach ($words as $word) {
         $lexems = Lexem::searchInflectedForms($word, $hasDiacritics);
         $lexemIds = array();
         foreach ($lexems as $lexem) {
             $lexemIds[] = $lexem->id;
         }
         $matchingLexems[] = $lexemIds;
     }
     foreach ($words as $i => $word) {
         // Load all the definitions for any possible lexem for this word.
         $lexemIds = $matchingLexems[$i];
         $defIds = FullTextIndex::loadDefinitionIdsForLexems($lexemIds);
         DebugInfo::resetClock();
         $intersection = $intersection === null ? $defIds : util_intersectArrays($intersection, $defIds);
         DebugInfo::stopClock("Intersected with lexems for {$word}");
     }
     if ($intersection === null) {
         // This can happen when the query is all stopwords
         $intersection = array();
     }
     $shortestInvervals = array();
     DebugInfo::resetClock();
     // Now compute a score for every definition
     foreach ($intersection as $defId) {
         // Compute the position matrix (for every word, load all the matching
         // positions)
         $p = array();
         foreach ($matchingLexems as $lexemIds) {
             $p[] = FullTextIndex::loadPositionsByLexemIdsDefinitionId($lexemIds, $defId);
         }
         $shortestIntervals[] = util_findSnippet($p);
     }
     if ($intersection) {
         array_multisort($shortestIntervals, $intersection);
     }
     DebugInfo::stopClock("Computed score for every definition");
     return $intersection;
 }
示例#2
0
 public static function searchFullText($words, $hasDiacritics, $sourceId)
 {
     $field = $hasDiacritics ? 'formNoAccent' : 'formUtf8General';
     $intersection = null;
     $stopWords = array();
     $lmMap = array();
     foreach ($words as $word) {
         // Get all LexemModels generating this form
         $lms = Model::factory('LexemModel')->table_alias('L')->select('L.id')->distinct()->join('InflectedForm', 'I.lexemModelId = L.id', 'I')->where("I.{$field}", $word)->find_many();
         $lmIds = util_objectProperty($lms, 'id');
         $lmMap[] = $lmIds;
         // Get the FullTextIndex records for each LexemModels. Note that the FTI excludes stop words.
         $defIds = FullTextIndex::loadDefinitionIdsForLexemModels($lmIds, $sourceId);
         // Determine whether the word is a stop word.
         if (empty($defIds)) {
             $isStopWord = Model::factory('InflectedForm')->table_alias('I')->join('LexemModel', 'I.lexemModelId = LM.id', 'LM')->join('Lexem', 'LM.lexemId = L.id', 'L')->where("I.{$field}", $word)->where('L.stopWord', 1)->count();
         } else {
             $isStopWord = false;
         }
         if ($isStopWord) {
             $stopWords[] = $word;
         } else {
             $intersection = $intersection === null ? $defIds : util_intersectArrays($intersection, $defIds);
         }
     }
     if (empty($intersection)) {
         // This can happen when the query is all stopwords or the source selection produces no results
         return array(array(), $stopWords);
     }
     if (count($words) == 1) {
         // For single-word queries, skip the ordering part.
         // We could sort the definitions by lexicon, but it is very expensive.
         return array($intersection, $stopWords);
     }
     // Now compute a score for every definition
     DebugInfo::resetClock();
     $positionMap = FullTextIndex::loadPositionsByLexemIdsDefinitionIds($lmMap, $intersection);
     $shortestIntervals = array();
     foreach ($intersection as $defId) {
         $shortestIntervals[] = util_findSnippet($positionMap[$defId]);
     }
     if ($intersection) {
         array_multisort($shortestIntervals, $intersection);
     }
     DebugInfo::stopClock("Computed score for every definition");
     return array($intersection, $stopWords);
 }
示例#3
0
assertEquals("ș'aibă", FlexStringUtil::placeAccent("șaibă", 2, 'a'));
assertEquals("ș'aibă", FlexStringUtil::placeAccent("șaibă", 3, 'a'));
assertEquals("șa'ibă", FlexStringUtil::placeAccent("șaibă", 2, 'i'));
assertEquals("șa'ibă", FlexStringUtil::placeAccent("șaibă", 3, 'i'));
assertEquals("unfuckingbelievable", FlexStringUtil::insert("unbelievable", "f*****g", 2));
assertEquals("abcdef", FlexStringUtil::insert("cdef", "ab", 0));
assertEquals("abcdef", FlexStringUtil::insert("abcd", "ef", 4));
assertEquals('mamă      ', AdminStringUtil::padRight('mamă', 10));
assertEquals('mama      ', AdminStringUtil::padRight('mama', 10));
assertEquals('ăâîșț   ', AdminStringUtil::padRight('ăâîșț', 8));
assertEquals('ăâîșț', AdminStringUtil::padRight('ăâîșț', 5));
assertEquals('ăâîșț', AdminStringUtil::padRight('ăâîșț', 3));
assertEqualArrays(array('c', 'a', 'r'), AdminStringUtil::unicodeExplode('car'));
assertEqualArrays(array('ă', 'a', 'â', 'ș', 'ț'), AdminStringUtil::unicodeExplode('ăaâșț'));
assertEqualArrays(array(1, 5, 10), util_intersectArrays(array(1, 3, 5, 7, 9, 10), array(1, 2, 4, 5, 6, 8, 10)));
assertEqualArrays(array(), util_intersectArrays(array(2, 4, 6, 8), array(1, 3, 5, 7)));
assert(!Lock::release('test'));
assert(!Lock::exists('test'));
assert(Lock::acquire('test'));
assert(Lock::exists('test'));
assert(!Lock::acquire('test'));
assert(Lock::release('test'));
assert(!Lock::exists('test'));
assert(!Lock::release('test'));
assertEquals(0, util_findSnippet(array(array(1, 2, 10))));
assertEquals(1, util_findSnippet(array(array(1, 2, 10), array(5, 6, 9))));
assertEquals(2, util_findSnippet(array(array(1, 2, 10), array(5, 6, 8))));
assertEquals(4, util_findSnippet(array(array(1, 2, 10), array(6, 20), array(8, 15))));
assertEquals('$abc$ @def@', AdminStringUtil::formatLexem('$abc$ @def@'));
// This is intentional -- lexem formatting is very lenient.
assertEquals("m'amă m'are", AdminStringUtil::formatLexem("m'am~a máre  "));