private function preprocess($ps_expression) { // Trigger TimeExpressionParser preprocess hook $o_app_plugin_manager = new ApplicationPluginManager(); $va_hook_result = $o_app_plugin_manager->hookTimeExpressionParserPreprocessBefore(array("expression" => $ps_expression)); if ($va_hook_result["expression"] != $ps_expression) { $ps_expression = $va_hook_result["expression"]; } # convert $va_dict = $this->opo_datetime_settings->getAssoc("expressions"); $vs_lc_expression = mb_strtolower($ps_expression); if (isset($va_dict[$vs_lc_expression])) { $ps_expression = $va_dict[$vs_lc_expression]; } # remove commas $ps_expression = str_replace(',', ' ', $ps_expression); $ps_expression = preg_replace('![ ]+!', ' ', $ps_expression); # remove articles $definiteArticles = $this->opo_language_settings->getList("definiteArticles"); if (sizeof($definiteArticles)) { $ps_expression = " " . $ps_expression . " "; foreach ($definiteArticles as $article) { $ps_expression = str_ireplace(" " . $article . " ", " ", $ps_expression); } } $indefiniteArticles = $this->opo_language_settings->getList("indefiniteArticles"); if (sizeof($indefiniteArticles)) { $ps_expression = " " . $ps_expression . " "; foreach ($indefiniteArticles as $article) { $ps_expression = str_ireplace(" " . $article . " ", " ", $ps_expression); } } $ps_expression = trim($ps_expression); #replace time keywords containing spaces with conf defined replacement, allowing treatments for expression like "av. J.-C." in french $wordsWithSpaces = $this->opo_language_settings->getList("wordsWithSpaces"); $wordsWithSpacesReplacements = $this->opo_language_settings->getList("wordsWithSpacesReplacements"); if (sizeof($wordsWithSpaces) && sizeof($wordsWithSpacesReplacements)) { $ps_expression = str_replace($wordsWithSpaces, $wordsWithSpacesReplacements, $ps_expression); } # separate '?' from words $ps_expression = preg_replace('!([^\\?\\/]+)\\?{1}([^\\?]+)!', '\\1 ? \\2', $ps_expression); $ps_expression = preg_replace('!([^\\?\\/]+)\\?{1}$!', '\\1 ?', $ps_expression); # Remove UTC offset if present $ps_expression = preg_replace("/(T[\\d]{1,2}:[\\d]{2}:[\\d]{2})-[\\d]{1,2}:[\\d]{2}/i", "\$1", $ps_expression); # distinguish w3cdtf dates since we already use '-' for ranges $ps_expression = preg_replace("/([\\d]{4})-([\\d]{2})-([\\d]{2})/", "\$1#\$2#\$3", $ps_expression); # distinguish w3cdtf dates since we already use '-' for ranges $ps_expression = preg_replace("/([\\d]{4})-([\\d]{2})([^\\d\\-\\/\\.]+)/", "\$1#\$2\$3", $ps_expression); # distinguish dd-MMM-yy and dd-MMM-yyyy dates since we already use '-' for ranges (ex. 10-JUN-80 or 10-JUN-1980) $ps_expression = preg_replace("/([\\d]{1,2})-([A-Za-z]{3,15})-([\\d]{2,4})/", "\$1#\$2#\$3", $ps_expression); # convert dd-mm-yyyy dates to dd/mm/yyyy to prevent our range conjunction code below doesn't mangle it $ps_expression = preg_replace("/([\\d]{2})-([\\d]{2})-([\\d]{4})/", "\$1/\$2/\$3", $ps_expression); if (preg_match("/([\\d]{4})-([\\d]{2})\$/", $ps_expression, $va_matches)) { if (intval($va_matches[2]) > 12) { $ps_expression = preg_replace("/([\\d]{4})-([\\d]{2})\$/", "\$1-" . substr($va_matches[1], 0, 2) . "\$2", $ps_expression); } else { $ps_expression = preg_replace("/([\\d]{4})-([\\d]{2})\$/", "\$1#\$2", $ps_expression); } } # process 6-number year ranges # replace '-' used to express decades (eg. 192-) and centuries (eg. 19--) with underscores since we use '-' for ranges if (preg_match('![\\d]{4}\\-!', $ps_expression)) { $ps_expression = preg_replace("![\\-]{1}!", " - ", $ps_expression); } else { $ps_expression = preg_replace('!([\\d]{2})[\\-]{2}!', '\\1__', $ps_expression); $ps_expression = preg_replace('!([\\d]{3})[\\-]{1}$!', '\\1_', $ps_expression); $ps_expression = preg_replace('!([\\d]{3})[\\-]{1}[\\D]+!', '\\1_', $ps_expression); } if (!preg_match("!^[\\-]{1}[\\d]+\$!", $ps_expression)) { $ps_expression = preg_replace("![\\-\\–\\—]{1}!", " - ", $ps_expression); } $va_era_list = array_merge(array_keys($this->opo_language_settings->getAssoc("ADBCTable")), array($this->opo_language_settings->get("dateADIndicator"), $this->opo_language_settings->get("dateBCIndicator"))); foreach ($va_era_list as $vs_era) { $ps_expression = preg_replace("/([\\d]+)" . $vs_era . "[ ]*/i", "\$1 {$vs_era} ", $ps_expression); #str_replace($vs_era, " ".$vs_era, $ps_expression); } $va_meridian_list = array_merge(array_keys($this->opo_language_settings->getAssoc("meridianTable")), array($this->opo_language_settings->get("timeAMMeridian"), $this->opo_language_settings->get("timePMMeridian"))); foreach ($va_meridian_list as $vs_meridian) { $ps_expression = preg_replace("/([\\d]+)" . $vs_meridian . "[ ]*/i", "\$1 {$vs_meridian} ", $ps_expression); #str_replace($vs_meridian, " ".$vs_meridian, $ps_expression); } if (is_array($va_after = $this->opo_language_settings->getList("afterQualifier"))) { $vs_primary_after = array_shift($va_after); foreach ($va_after as $vs_after) { $ps_expression = preg_replace("/^{$vs_after}[ ]+/i", "{$vs_primary_after} ", $ps_expression); } } if (is_array($va_before = $this->opo_language_settings->getList("beforeQualifier"))) { $vs_primary_before = array_shift($va_before); foreach ($va_before as $vs_before) { $ps_expression = preg_replace("/^{$vs_before}[ ]+/i", "{$vs_primary_before} ", $ps_expression); } } if (is_array($va_born = $this->opo_language_settings->getList("bornQualifier"))) { $vs_primary_born = array_shift($va_born); foreach ($va_born as $vs_born) { $ps_expression = preg_replace("/^{$vs_born}[ ]+/i", "{$vs_primary_born} ", $ps_expression); } } if (is_array($va_died = $this->opo_language_settings->getList("diedQualifier"))) { $vs_primary_died = array_shift($va_died); foreach ($va_died as $vs_died) { $ps_expression = preg_replace("/^{$vs_died}[ ]+/i", "{$vs_primary_died} ", $ps_expression); } } $va_conjunction_list = $this->opo_language_settings->getList("rangeConjunctions"); foreach ($va_conjunction_list as $vs_conjunction) { if (!preg_match("/^[A-Za-z0-9\\-]+\$/", $vs_conjunction)) { // only add spaces around non-alphanumeric conjunctions $ps_expression = str_replace($vs_conjunction, ' ' . $vs_conjunction . ' ', $ps_expression); } } // check for ISO 8601 date/times... if we find one split the time off into a separate token $va_datetime_conjunctions = $this->opo_language_settings->getList('dateTimeConjunctions'); $ps_expression = preg_replace("/([\\d]+)T([\\d]+)/i", "\$1 " . $va_datetime_conjunctions[0] . " \$2", $ps_expression); // support year ranges in the form yyyy/yyyy $ps_expression = preg_replace("!^([\\d]{4})/([\\d]{4})\$!", "\$1 - \$2", trim($ps_expression)); // support date entry in the form yyyy-mm-dd/yyy-mm-dd (HSP) $ps_expression = preg_replace("/([\\d]{4}#[\\d]{2}#[\\d]{2})\\/([\\d]{4}#[\\d]{2}#[\\d]{2})/", "\$1 - \$2", $ps_expression); // Trigger TimeExpressionParser preprocess hook $va_hook_result = $o_app_plugin_manager->hookTimeExpressionParserPreprocessAfter(array("expression" => $ps_expression)); if ($va_hook_result["expression"] != $ps_expression) { $ps_expression = $va_hook_result["expression"]; } return trim($ps_expression); }