Exemple #1
0
 public static function getForLexem($lexem)
 {
     $lexemSources = LexemSource::get_all_by_lexemId($lexem->id);
     return util_objectProperty($lexemSources, 'sourceId');
 }
Exemple #2
0
 public static function searchFullText($words, $hasDiacritics, $sourceId)
 {
     $field = $hasDiacritics ? 'formNoAccent' : 'formUtf8General';
     $intersection = null;
     $stopWords = array();
     $lmMap = array();
     foreach ($words as $word) {
         // Get all LexemModels generating this form
         $lms = Model::factory('LexemModel')->table_alias('L')->select('L.id')->distinct()->join('InflectedForm', 'I.lexemModelId = L.id', 'I')->where("I.{$field}", $word)->find_many();
         $lmIds = util_objectProperty($lms, 'id');
         $lmMap[] = $lmIds;
         // Get the FullTextIndex records for each LexemModels. Note that the FTI excludes stop words.
         $defIds = FullTextIndex::loadDefinitionIdsForLexemModels($lmIds, $sourceId);
         // Determine whether the word is a stop word.
         if (empty($defIds)) {
             $isStopWord = Model::factory('InflectedForm')->table_alias('I')->join('LexemModel', 'I.lexemModelId = LM.id', 'LM')->join('Lexem', 'LM.lexemId = L.id', 'L')->where("I.{$field}", $word)->where('L.stopWord', 1)->count();
         } else {
             $isStopWord = false;
         }
         if ($isStopWord) {
             $stopWords[] = $word;
         } else {
             $intersection = $intersection === null ? $defIds : util_intersectArrays($intersection, $defIds);
         }
     }
     if (empty($intersection)) {
         // This can happen when the query is all stopwords or the source selection produces no results
         return array(array(), $stopWords);
     }
     if (count($words) == 1) {
         // For single-word queries, skip the ordering part.
         // We could sort the definitions by lexicon, but it is very expensive.
         return array($intersection, $stopWords);
     }
     // Now compute a score for every definition
     DebugInfo::resetClock();
     $positionMap = FullTextIndex::loadPositionsByLexemIdsDefinitionIds($lmMap, $intersection);
     $shortestIntervals = array();
     foreach ($intersection as $defId) {
         $shortestIntervals[] = util_findSnippet($positionMap[$defId]);
     }
     if ($intersection) {
         array_multisort($shortestIntervals, $intersection);
     }
     DebugInfo::stopClock("Computed score for every definition");
     return array($intersection, $stopWords);
 }
Exemple #3
0
/**
 * Load all lexems having the same form as one of the given lexems, but exclude the given lexems.
 **/
function loadSetHomonyms($lexems)
{
    if (count($lexems) == 0) {
        return array();
    }
    $names = util_objectProperty($lexems, 'formNoAccent');
    $ids = util_objectProperty($lexems, 'id');
    return Model::factory('Lexem')->where_in('formNoAccent', $names)->where_not_in('id', $ids)->find_many();
}