public static function searchFullText($words, $hasDiacritics) { $intersection = null; $matchingLexems = array(); foreach ($words as $word) { $lexems = Lexem::searchInflectedForms($word, $hasDiacritics); $lexemIds = array(); foreach ($lexems as $lexem) { $lexemIds[] = $lexem->id; } $matchingLexems[] = $lexemIds; } foreach ($words as $i => $word) { // Load all the definitions for any possible lexem for this word. $lexemIds = $matchingLexems[$i]; $defIds = FullTextIndex::loadDefinitionIdsForLexems($lexemIds); DebugInfo::resetClock(); $intersection = $intersection === null ? $defIds : util_intersectArrays($intersection, $defIds); DebugInfo::stopClock("Intersected with lexems for {$word}"); } if ($intersection === null) { // This can happen when the query is all stopwords $intersection = array(); } $shortestInvervals = array(); DebugInfo::resetClock(); // Now compute a score for every definition foreach ($intersection as $defId) { // Compute the position matrix (for every word, load all the matching // positions) $p = array(); foreach ($matchingLexems as $lexemIds) { $p[] = FullTextIndex::loadPositionsByLexemIdsDefinitionId($lexemIds, $defId); } $shortestIntervals[] = util_findSnippet($p); } if ($intersection) { array_multisort($shortestIntervals, $intersection); } DebugInfo::stopClock("Computed score for every definition"); return $intersection; }
public static function searchFullText($words, $hasDiacritics, $sourceId) { $field = $hasDiacritics ? 'formNoAccent' : 'formUtf8General'; $intersection = null; $stopWords = array(); $lmMap = array(); foreach ($words as $word) { // Get all LexemModels generating this form $lms = Model::factory('LexemModel')->table_alias('L')->select('L.id')->distinct()->join('InflectedForm', 'I.lexemModelId = L.id', 'I')->where("I.{$field}", $word)->find_many(); $lmIds = util_objectProperty($lms, 'id'); $lmMap[] = $lmIds; // Get the FullTextIndex records for each LexemModels. Note that the FTI excludes stop words. $defIds = FullTextIndex::loadDefinitionIdsForLexemModels($lmIds, $sourceId); // Determine whether the word is a stop word. if (empty($defIds)) { $isStopWord = Model::factory('InflectedForm')->table_alias('I')->join('LexemModel', 'I.lexemModelId = LM.id', 'LM')->join('Lexem', 'LM.lexemId = L.id', 'L')->where("I.{$field}", $word)->where('L.stopWord', 1)->count(); } else { $isStopWord = false; } if ($isStopWord) { $stopWords[] = $word; } else { $intersection = $intersection === null ? $defIds : util_intersectArrays($intersection, $defIds); } } if (empty($intersection)) { // This can happen when the query is all stopwords or the source selection produces no results return array(array(), $stopWords); } if (count($words) == 1) { // For single-word queries, skip the ordering part. // We could sort the definitions by lexicon, but it is very expensive. return array($intersection, $stopWords); } // Now compute a score for every definition DebugInfo::resetClock(); $positionMap = FullTextIndex::loadPositionsByLexemIdsDefinitionIds($lmMap, $intersection); $shortestIntervals = array(); foreach ($intersection as $defId) { $shortestIntervals[] = util_findSnippet($positionMap[$defId]); } if ($intersection) { array_multisort($shortestIntervals, $intersection); } DebugInfo::stopClock("Computed score for every definition"); return array($intersection, $stopWords); }
assertEquals("ș'aibă", FlexStringUtil::placeAccent("șaibă", 2, 'a')); assertEquals("ș'aibă", FlexStringUtil::placeAccent("șaibă", 3, 'a')); assertEquals("șa'ibă", FlexStringUtil::placeAccent("șaibă", 2, 'i')); assertEquals("șa'ibă", FlexStringUtil::placeAccent("șaibă", 3, 'i')); assertEquals("unfuckingbelievable", FlexStringUtil::insert("unbelievable", "f*****g", 2)); assertEquals("abcdef", FlexStringUtil::insert("cdef", "ab", 0)); assertEquals("abcdef", FlexStringUtil::insert("abcd", "ef", 4)); assertEquals('mamă ', AdminStringUtil::padRight('mamă', 10)); assertEquals('mama ', AdminStringUtil::padRight('mama', 10)); assertEquals('ăâîșț ', AdminStringUtil::padRight('ăâîșț', 8)); assertEquals('ăâîșț', AdminStringUtil::padRight('ăâîșț', 5)); assertEquals('ăâîșț', AdminStringUtil::padRight('ăâîșț', 3)); assertEqualArrays(array('c', 'a', 'r'), AdminStringUtil::unicodeExplode('car')); assertEqualArrays(array('ă', 'a', 'â', 'ș', 'ț'), AdminStringUtil::unicodeExplode('ăaâșț')); assertEqualArrays(array(1, 5, 10), util_intersectArrays(array(1, 3, 5, 7, 9, 10), array(1, 2, 4, 5, 6, 8, 10))); assertEqualArrays(array(), util_intersectArrays(array(2, 4, 6, 8), array(1, 3, 5, 7))); assert(!Lock::release('test')); assert(!Lock::exists('test')); assert(Lock::acquire('test')); assert(Lock::exists('test')); assert(!Lock::acquire('test')); assert(Lock::release('test')); assert(!Lock::exists('test')); assert(!Lock::release('test')); assertEquals(0, util_findSnippet(array(array(1, 2, 10)))); assertEquals(1, util_findSnippet(array(array(1, 2, 10), array(5, 6, 9)))); assertEquals(2, util_findSnippet(array(array(1, 2, 10), array(5, 6, 8)))); assertEquals(4, util_findSnippet(array(array(1, 2, 10), array(6, 20), array(8, 15)))); assertEquals('$abc$ @def@', AdminStringUtil::formatLexem('$abc$ @def@')); // This is intentional -- lexem formatting is very lenient. assertEquals("m'amă m'are", AdminStringUtil::formatLexem("m'am~a máre "));