public static function getForLexem($lexem) { $lexemSources = LexemSource::get_all_by_lexemId($lexem->id); return util_objectProperty($lexemSources, 'sourceId'); }
public static function searchFullText($words, $hasDiacritics, $sourceId) { $field = $hasDiacritics ? 'formNoAccent' : 'formUtf8General'; $intersection = null; $stopWords = array(); $lmMap = array(); foreach ($words as $word) { // Get all LexemModels generating this form $lms = Model::factory('LexemModel')->table_alias('L')->select('L.id')->distinct()->join('InflectedForm', 'I.lexemModelId = L.id', 'I')->where("I.{$field}", $word)->find_many(); $lmIds = util_objectProperty($lms, 'id'); $lmMap[] = $lmIds; // Get the FullTextIndex records for each LexemModels. Note that the FTI excludes stop words. $defIds = FullTextIndex::loadDefinitionIdsForLexemModels($lmIds, $sourceId); // Determine whether the word is a stop word. if (empty($defIds)) { $isStopWord = Model::factory('InflectedForm')->table_alias('I')->join('LexemModel', 'I.lexemModelId = LM.id', 'LM')->join('Lexem', 'LM.lexemId = L.id', 'L')->where("I.{$field}", $word)->where('L.stopWord', 1)->count(); } else { $isStopWord = false; } if ($isStopWord) { $stopWords[] = $word; } else { $intersection = $intersection === null ? $defIds : util_intersectArrays($intersection, $defIds); } } if (empty($intersection)) { // This can happen when the query is all stopwords or the source selection produces no results return array(array(), $stopWords); } if (count($words) == 1) { // For single-word queries, skip the ordering part. // We could sort the definitions by lexicon, but it is very expensive. return array($intersection, $stopWords); } // Now compute a score for every definition DebugInfo::resetClock(); $positionMap = FullTextIndex::loadPositionsByLexemIdsDefinitionIds($lmMap, $intersection); $shortestIntervals = array(); foreach ($intersection as $defId) { $shortestIntervals[] = util_findSnippet($positionMap[$defId]); } if ($intersection) { array_multisort($shortestIntervals, $intersection); } DebugInfo::stopClock("Computed score for every definition"); return array($intersection, $stopWords); }
/** * Load all lexems having the same form as one of the given lexems, but exclude the given lexems. **/ function loadSetHomonyms($lexems) { if (count($lexems) == 0) { return array(); } $names = util_objectProperty($lexems, 'formNoAccent'); $ids = util_objectProperty($lexems, 'id'); return Model::factory('Lexem')->where_in('formNoAccent', $names)->where_not_in('id', $ids)->find_many(); }