Esempio n. 1
0
File: hypher.php Progetto: GGF/baza4
function hypher_load($conffile, $recompile = AUTO_RECOMPILE)
{
    do {
        $dname = 'hy' . rand(100000, 999999);
    } while (isset(${$dname}));
    if (!is_file($conffile)) {
        return false;
    }
    $conf = sk_parse_config($conffile);
    if (!$conf) {
        return false;
    }
    $path = dirname($conffile);
    if (isset($conf['compiled'][0])) {
        $conf['compiled'][0] = $path . '/' . $conf['compiled'][0];
    } else {
        return false;
    }
    if (!is_file($conf['compiled'][0])) {
        $recompile = ALWAYS_RECOMPILE;
    }
    if (isset($conf['rules'])) {
        foreach ($conf['rules'] as $key => $val) {
            $conf['rules'][$key] = $path . '/' . $conf['rules'][$key];
        }
    }
    // define the necessety to remake dictionary
    if ($recompile == AUTO_RECOMPILE) {
        $date_out = sk_array_value(stat($conf['compiled'][0]), 'mtime');
        $date_in = sk_array_value(stat($conffile), 'mtime');
        foreach ($conf['rules'] as $val) {
            $date_in = max($date_in, sk_array_value(stat($val), 'mtime'));
        }
        if ($date_in > $date_out) {
            $recompile = ALWAYS_RECOMPILE;
        }
    }
    if ($recompile == ALWAYS_RECOMPILE) {
        // make alphabet string and translation table
        $ret['alph'] = preg_replace('/\\((.+)\\>(.+)\\)/Ue', '$ret[\'trans\'][\'$2\'] = \'$1\'', $conf['alphabet'][0]);
        if (!isset($ret['trans'])) {
            $ret['trans'] = array();
        }
        $ret['alphUC'] = $conf['alphabetUC'][0];
        $ret['ll'] = $conf['left_limit'][0];
        $ret['rl'] = $conf['right_limit'][0];
        $ret['enc'] = $conf['internal_encoding'][0];
        foreach ($conf['rules'] as $fnm) {
            if (is_file($fnm)) {
                $in_file = explode("\n", sk_clean_config(file_get_contents($fnm)));
                // first string of the rules file is the encoding of this file
                $encoding = $in_file[0];
                unset($in_file[0]);
                // create rules array: keys -- letters combinations; values -- digital masks
                foreach ($in_file as $str) {
                    // translate rules to internal encoding
                    if (strcasecmp($encoding, $ret['enc']) != 0) {
                        $str = @iconv($encoding, $ret['enc'], $str);
                    }
                    // patterns not containing digits and dots are treated as dictionary words
                    // converting ones to pattern
                    if (!preg_match('/[\\d\\.]/', $str)) {
                        $str = str_replace('-', '9', $str);
                        $str = preg_replace('/(?<=\\D)(?=\\D)/', '8', $str);
                        $str = '.' . $str . '.';
                    }
                    // insert zero between the letters
                    $str = preg_replace('/(?<=\\D)(?=\\D)/', '0', $str);
                    // insert zero on beginning and on the end
                    if (preg_match('/^\\D/', $str)) {
                        $str = '0' . $str;
                    }
                    if (preg_match('/\\D$/', $str)) {
                        $str .= '0';
                    }
                    // make array
                    $ind = preg_replace('/[\\d\\n\\s]/', '', $str);
                    $vl = preg_replace('/\\D/', '', $str);
                    if ($ind != '' && $vl != '') {
                        // duplicated pattern warning
                        if (isset($ret['dict'][$ind]) && $ret['dict'][$ind] !== 0) {
                            trigger_error('Duplicated pattern ' . $ind . ' in file ' . $fnm);
                        }
                        $ret['dict'][$ind] = $vl;
                        // optimize: if there is, for example, "abcde" letters combination
                        // then we need "abcd", "abc", "ab" and "a" combinations
                        // to be presented
                        $sb = $ind;
                        do {
                            $sb = substr($sb, 0, strlen($sb) - 1);
                            if (!isset($ret['dict'][$sb])) {
                                $ret['dict'][$sb] = 0;
                            } else {
                                break;
                            }
                        } while (strlen($sb) > 1);
                    }
                }
            }
        }
        $fh = fopen($conf['compiled'][0], 'w');
        fwrite($fh, serialize($ret));
        fclose($fh);
        $GLOBALS[$dname] = $ret;
    } else {
        $GLOBALS[$dname] = unserialize(file_get_contents($conf['compiled'][0]));
    }
    return $dname;
}
Esempio n. 2
0
 /**
  * @param $conffile - filename of config file
  * @param int $recompile - necessity to recompile ruleset
  */
 function __construct($conffile, $recompile = self::AUTO_RECOMPILE)
 {
     if (!is_file($conffile)) {
         return false;
     }
     $conf = sk_parse_config($conffile);
     if (!$conf) {
         return false;
     }
     $path = dirname($conffile);
     if (isset($conf['compiled'][0])) {
         $conf['compiled'][0] = $path . '/' . $conf['compiled'][0];
     }
     if (!is_file($conf['compiled'][0])) {
         $recompile = self::ALWAYS_RECOMPILE;
     }
     if (isset($conf['rules'])) {
         foreach ($conf['rules'] as $key => $val) {
             $conf['rules'][$key] = $path . '/' . $val;
         }
     } else {
         return false;
     }
     // define the necessety to remake dictionary
     if ($recompile == self::AUTO_RECOMPILE) {
         $date_out = sk_array_value(stat($conf['compiled'][0]), 'mtime');
         $date_in = sk_array_value(stat($conffile), 'mtime');
         foreach ($conf['rules'] as $val) {
             $date_in = max($date_in, sk_array_value(stat($val), 'mtime'));
         }
         if ($date_in > $date_out) {
             $recompile = self::ALWAYS_RECOMPILE;
         }
     }
     // recompile the dictionary in case of version mismatch
     if ($recompile != self::ALWAYS_RECOMPILE) {
         $ret = unserialize(file_get_contents($conf['compiled'][0]));
         if (!isset($ret['ver']) || $ret['ver'] !== self::VERSION) {
             $recompile = self::ALWAYS_RECOMPILE;
         }
     }
     // recompile and save the dictionary
     if ($recompile == self::ALWAYS_RECOMPILE) {
         $ret = array();
         // parse alphabet
         $ret['alph'] = preg_replace('/\\((.+)\\>(.+)\\)/U', '$1', $conf['alphabet'][0]);
         $ret['alphUC'] = $conf['alphabetUC'][0];
         // make translation table
         if (preg_match_all('/\\((.+)\\>(.+)\\)/U', $conf['alphabet'][0], $matches, PREG_PATTERN_ORDER)) {
             foreach ($matches[1] as $key => $val) {
                 $ret['trans'][$val] = $matches[2][$key];
             }
         } else {
             $ret['trans'] = array();
         }
         $ret['ll'] = $conf['left_limit'][0];
         $ret['rl'] = $conf['right_limit'][0];
         $ret['enc'] = $conf['internal_encoding'][0];
         $ret['ver'] = self::VERSION;
         foreach ($conf['rules'] as $fnm) {
             if (is_file($fnm)) {
                 $in_file = explode("\n", sk_clean_config(file_get_contents($fnm)));
                 // first string of the rules file is the encoding of this file
                 $encoding = $in_file[0];
                 unset($in_file[0]);
                 // create rules array: keys -- letters combinations; values -- digital masks
                 foreach ($in_file as $str) {
                     // translate rules to internal encoding
                     if (strcasecmp($encoding, $ret['enc']) != 0) {
                         $str = @iconv($encoding, $ret['enc'], $str);
                     }
                     // patterns not containing digits and dots are treated as dictionary words
                     // converting ones to pattern
                     if (!preg_match('/[\\d\\.]/', $str)) {
                         $str = str_replace('-', '9', $str);
                         $str = preg_replace('/(?<=\\D)(?=\\D)/', '8', $str);
                         $str = '.' . $str . '.';
                     }
                     // insert zero between the letters
                     $str = preg_replace('/(?<=\\D)(?=\\D)/', '0', $str);
                     // insert zero on beginning and on the end
                     if (preg_match('/^\\D/', $str)) {
                         $str = '0' . $str;
                     }
                     if (preg_match('/\\D$/', $str)) {
                         $str .= '0';
                     }
                     // make array
                     $ind = preg_replace('/[\\d\\n\\s]/', '', $str);
                     $vl = preg_replace('/\\D/', '', $str);
                     if ($ind != '' && $vl != '') {
                         $ret['dict'][$ind] = $vl;
                         // optimize: if there is, for example, "abcde" pattern
                         // then we need "abcd", "abc", "ab" and "a" patterns
                         // to be presented
                         $sb = $ind;
                         do {
                             $sb = substr($sb, 0, strlen($sb) - 1);
                             if (!isset($ret['dict'][$sb])) {
                                 $ret['dict'][$sb] = 0;
                             } else {
                                 break;
                             }
                         } while (strlen($sb) > 1);
                     }
                 }
             }
         }
         if (isset($conf['compiled'][0])) {
             file_put_contents($conf['compiled'][0], serialize($ret));
         }
     }
     $this->internal_encoding = $ret['enc'];
     $this->alphabet = $ret['alph'];
     $this->alphabet_uc = $ret['alphUC'];
     $this->translation = $ret['trans'];
     $this->dictionary = $ret['dict'];
     $this->min_left_limit = $ret['ll'];
     $this->min_right_limit = $ret['rl'];
     $this->check_limits();
 }