function hypher_load($conffile, $recompile = AUTO_RECOMPILE) { do { $dname = 'hy' . rand(100000, 999999); } while (isset(${$dname})); if (!is_file($conffile)) { return false; } $conf = sk_parse_config($conffile); if (!$conf) { return false; } $path = dirname($conffile); if (isset($conf['compiled'][0])) { $conf['compiled'][0] = $path . '/' . $conf['compiled'][0]; } else { return false; } if (!is_file($conf['compiled'][0])) { $recompile = ALWAYS_RECOMPILE; } if (isset($conf['rules'])) { foreach ($conf['rules'] as $key => $val) { $conf['rules'][$key] = $path . '/' . $conf['rules'][$key]; } } // define the necessety to remake dictionary if ($recompile == AUTO_RECOMPILE) { $date_out = sk_array_value(stat($conf['compiled'][0]), 'mtime'); $date_in = sk_array_value(stat($conffile), 'mtime'); foreach ($conf['rules'] as $val) { $date_in = max($date_in, sk_array_value(stat($val), 'mtime')); } if ($date_in > $date_out) { $recompile = ALWAYS_RECOMPILE; } } if ($recompile == ALWAYS_RECOMPILE) { // make alphabet string and translation table $ret['alph'] = preg_replace('/\\((.+)\\>(.+)\\)/Ue', '$ret[\'trans\'][\'$2\'] = \'$1\'', $conf['alphabet'][0]); if (!isset($ret['trans'])) { $ret['trans'] = array(); } $ret['alphUC'] = $conf['alphabetUC'][0]; $ret['ll'] = $conf['left_limit'][0]; $ret['rl'] = $conf['right_limit'][0]; $ret['enc'] = $conf['internal_encoding'][0]; foreach ($conf['rules'] as $fnm) { if (is_file($fnm)) { $in_file = explode("\n", sk_clean_config(file_get_contents($fnm))); // first string of the rules file is the encoding of this file $encoding = $in_file[0]; unset($in_file[0]); // create rules array: keys -- letters combinations; values -- digital masks foreach ($in_file as $str) { // translate rules to internal encoding if (strcasecmp($encoding, $ret['enc']) != 0) { $str = @iconv($encoding, $ret['enc'], $str); } // patterns not containing digits and dots are treated as dictionary words // converting ones to pattern if (!preg_match('/[\\d\\.]/', $str)) { $str = str_replace('-', '9', $str); $str = preg_replace('/(?<=\\D)(?=\\D)/', '8', $str); $str = '.' . $str . '.'; } // insert zero between the letters $str = preg_replace('/(?<=\\D)(?=\\D)/', '0', $str); // insert zero on beginning and on the end if (preg_match('/^\\D/', $str)) { $str = '0' . $str; } if (preg_match('/\\D$/', $str)) { $str .= '0'; } // make array $ind = preg_replace('/[\\d\\n\\s]/', '', $str); $vl = preg_replace('/\\D/', '', $str); if ($ind != '' && $vl != '') { // duplicated pattern warning if (isset($ret['dict'][$ind]) && $ret['dict'][$ind] !== 0) { trigger_error('Duplicated pattern ' . $ind . ' in file ' . $fnm); } $ret['dict'][$ind] = $vl; // optimize: if there is, for example, "abcde" letters combination // then we need "abcd", "abc", "ab" and "a" combinations // to be presented $sb = $ind; do { $sb = substr($sb, 0, strlen($sb) - 1); if (!isset($ret['dict'][$sb])) { $ret['dict'][$sb] = 0; } else { break; } } while (strlen($sb) > 1); } } } } $fh = fopen($conf['compiled'][0], 'w'); fwrite($fh, serialize($ret)); fclose($fh); $GLOBALS[$dname] = $ret; } else { $GLOBALS[$dname] = unserialize(file_get_contents($conf['compiled'][0])); } return $dname; }
/** * @param $conffile - filename of config file * @param int $recompile - necessity to recompile ruleset */ function __construct($conffile, $recompile = self::AUTO_RECOMPILE) { if (!is_file($conffile)) { return false; } $conf = sk_parse_config($conffile); if (!$conf) { return false; } $path = dirname($conffile); if (isset($conf['compiled'][0])) { $conf['compiled'][0] = $path . '/' . $conf['compiled'][0]; } if (!is_file($conf['compiled'][0])) { $recompile = self::ALWAYS_RECOMPILE; } if (isset($conf['rules'])) { foreach ($conf['rules'] as $key => $val) { $conf['rules'][$key] = $path . '/' . $val; } } else { return false; } // define the necessety to remake dictionary if ($recompile == self::AUTO_RECOMPILE) { $date_out = sk_array_value(stat($conf['compiled'][0]), 'mtime'); $date_in = sk_array_value(stat($conffile), 'mtime'); foreach ($conf['rules'] as $val) { $date_in = max($date_in, sk_array_value(stat($val), 'mtime')); } if ($date_in > $date_out) { $recompile = self::ALWAYS_RECOMPILE; } } // recompile the dictionary in case of version mismatch if ($recompile != self::ALWAYS_RECOMPILE) { $ret = unserialize(file_get_contents($conf['compiled'][0])); if (!isset($ret['ver']) || $ret['ver'] !== self::VERSION) { $recompile = self::ALWAYS_RECOMPILE; } } // recompile and save the dictionary if ($recompile == self::ALWAYS_RECOMPILE) { $ret = array(); // parse alphabet $ret['alph'] = preg_replace('/\\((.+)\\>(.+)\\)/U', '$1', $conf['alphabet'][0]); $ret['alphUC'] = $conf['alphabetUC'][0]; // make translation table if (preg_match_all('/\\((.+)\\>(.+)\\)/U', $conf['alphabet'][0], $matches, PREG_PATTERN_ORDER)) { foreach ($matches[1] as $key => $val) { $ret['trans'][$val] = $matches[2][$key]; } } else { $ret['trans'] = array(); } $ret['ll'] = $conf['left_limit'][0]; $ret['rl'] = $conf['right_limit'][0]; $ret['enc'] = $conf['internal_encoding'][0]; $ret['ver'] = self::VERSION; foreach ($conf['rules'] as $fnm) { if (is_file($fnm)) { $in_file = explode("\n", sk_clean_config(file_get_contents($fnm))); // first string of the rules file is the encoding of this file $encoding = $in_file[0]; unset($in_file[0]); // create rules array: keys -- letters combinations; values -- digital masks foreach ($in_file as $str) { // translate rules to internal encoding if (strcasecmp($encoding, $ret['enc']) != 0) { $str = @iconv($encoding, $ret['enc'], $str); } // patterns not containing digits and dots are treated as dictionary words // converting ones to pattern if (!preg_match('/[\\d\\.]/', $str)) { $str = str_replace('-', '9', $str); $str = preg_replace('/(?<=\\D)(?=\\D)/', '8', $str); $str = '.' . $str . '.'; } // insert zero between the letters $str = preg_replace('/(?<=\\D)(?=\\D)/', '0', $str); // insert zero on beginning and on the end if (preg_match('/^\\D/', $str)) { $str = '0' . $str; } if (preg_match('/\\D$/', $str)) { $str .= '0'; } // make array $ind = preg_replace('/[\\d\\n\\s]/', '', $str); $vl = preg_replace('/\\D/', '', $str); if ($ind != '' && $vl != '') { $ret['dict'][$ind] = $vl; // optimize: if there is, for example, "abcde" pattern // then we need "abcd", "abc", "ab" and "a" patterns // to be presented $sb = $ind; do { $sb = substr($sb, 0, strlen($sb) - 1); if (!isset($ret['dict'][$sb])) { $ret['dict'][$sb] = 0; } else { break; } } while (strlen($sb) > 1); } } } } if (isset($conf['compiled'][0])) { file_put_contents($conf['compiled'][0], serialize($ret)); } } $this->internal_encoding = $ret['enc']; $this->alphabet = $ret['alph']; $this->alphabet_uc = $ret['alphUC']; $this->translation = $ret['trans']; $this->dictionary = $ret['dict']; $this->min_left_limit = $ret['ll']; $this->min_right_limit = $ret['rl']; $this->check_limits(); }