/** * Creates a map($sourceId => map($from, pair($to, $ambiguous))). * That is, for each sourceId and abbreviated text, we store the expanded text and whether the abbreviation is ambiguous. * An ambigious abbreviation such as "top" or "gen" also has a meaning as an inflected form. * Ambiguous abbreviations should be expanded carefully, or with human approval. */ private static function loadAbbreviations() { if (!self::$ABBREVS) { $raw = self::loadRawAbbreviations(); $result = array(); foreach ($raw['sources'] as $sourceId => $sectionList) { $sections = preg_split('/, */', $sectionList); $list = array(); foreach ($sections as $section) { // If an abbreviation is defined in several sections, use the one that's defined later $list = array_merge($list, $raw[$section]); } $result[$sourceId] = array(); foreach ($list as $from => $to) { $ambiguous = $from[0] == '*'; if ($ambiguous) { $from = substr($from, 1); } $numWords = 1 + substr_count($from, ' '); $regexp = str_replace(array('.', ' '), array("\\.", ' *'), $from); $pattern = "[^-a-zăâîșțáéíóúA-ZĂÂÎȘȚÁÉÍÓÚ.]({$regexp})([^-a-zăâîșțáéíóúA-ZĂÂÎȘȚÁÉÍÓÚ.]|\$)"; $hasCaps = $from !== mb_strtolower($from); $result[$sourceId][$from] = array('to' => $to, 'ambiguous' => $ambiguous, 'regexp' => $pattern, 'numWords' => $numWords, 'hasCaps' => $hasCaps); } // Sort the list by number of words, then by ambiguous uasort($result[$sourceId], 'self::abbrevCmp'); } self::$ABBREVS = $result; } return self::$ABBREVS; }