function testRunTestsFromFile()
 {
     $file = file(dirname(__FILE__) . "/../test_data_files/damerau_levenshtein_mod.txt");
     foreach ($file as $line => $test_case) {
         if (!preg_match("/^\\s*#/", $test_case) && preg_match("/^([^\\|]+)\\|([^\\|]+)\\|([^\\|]+)\\|([^\\|]+)\\|([^\\|]+)\$/", $test_case, $arr)) {
             $test_value_1 = $arr[1];
             $test_value_2 = $arr[2];
             $test_max_distance = trim($arr[3]);
             $test_block_size = trim($arr[4]);
             $test_result = trim($arr[5]);
             if ($test_result == 'null') {
                 $test_result = null;
             }
             $res = DamerauLevenshteinMod::distance($test_value_1, $test_value_2, $test_block_size, $test_max_distance);
             //echo $res . ' "' . $test_value_1 . '" "' . $test_value_2 . '" <br/>';
             echo $res . "/{$test_result} ### {$test_value_1}, {$test_value_2}, {$test_block_size}, {$test_max_distance}<br/>";
             $this->assertTrue($test_result == $res);
             // "$test_value_1 with $test_value_2, block_size $test_block_size and max_distance $test_max_distance should give $test_result on line (". ($line+1) .")");
         }
     }
 }
Beispiel #2
0
 /**
  * Function: mdld
  * Purpose: Performs Damerau-Levenshtein Distance test on two input strings, supporting block
  *   transpositions of multiple characters
  * Inputs: string 1 as p_str1, string 2 as p_str2, numeric limit on length of transposed block to be searched for as p_block_limit
  * Outputs: computed edit distance between the input strings (0=identical on this measure, 1..n=increasing dissimilarity)
  * @param string $p_str1
  * @param string $p_str2
  * @param integer $p_block_limit
  * @return integer : computed edit distance between the input strings
  */
 public function mdld($p_str1, $p_str2, $p_block_limit, $max_distance = 4)
 {
     //			return( levenshtein( $p_str1, $p_str2 ) );
     require_once 'class.damerau_levenshtein_mod.php';
     $value = DamerauLevenshteinMod::distance($p_str1, $p_str2, $p_block_limit, $max_distance);
     $this->debug['mdld'][] = "1 (p_str1:{$p_str1}) (p_str2:{$p_str2}) (p_block_limit:{$p_block_limit}) (value:{$value})";
     return $value;
 }
Beispiel #3
0
 public static function match_author_words($author1, $author2)
 {
     $match = $phonetic_match = false;
     $nm = new NearMatch();
     $author1_phonetic = $nm->near_match($author1);
     $author2_phonetic = $nm->near_match($author2);
     $author1_length = strlen($author1);
     $author2_length = strlen($author2);
     $ed = DamerauLevenshteinMod::distance($author1, $author2, 2, 3);
     // add the author post-filter
     // min. 51% "good" chars
     // first char must match for ED 2+
     if ($ed <= 3 && min($author1_length, $author2_length) > $ed * 2 && ($ed < 2 || substr($author1, 0, 1) == substr($author2, 0, 1)) || $author1_phonetic == $author2_phonetic) {
         $match = true;
         if ($author1_phonetic == $author2_phonetic) {
             $phonetic_match = true;
         }
     }
     return array('match' => $match, 'phonetic_match' => $phonetic_match, 'edit_distance' => $ed);
 }
Beispiel #4
0
         $data = $tm->getXML();
     }
     $debug = $tm->debug;
     break;
 case 'ngram':
     require_once 'classes/class.taxamatch.php';
     $tm = new Taxamatch();
     $data = $tm->ngram($str, $str2);
     if ($output == 'xml') {
         $data = $tm->getXML();
     }
     $debug = $tm->debug;
     break;
 case 'mdld':
     require_once 'classes/class.damerau_levenshtein_mod.php';
     $mdld = new DamerauLevenshteinMod();
     $data = $mdld->mdld_php($str, $str2, 10, 1);
     break;
 case 'taxamatch':
     require_once 'classes/class.taxamatch.php';
     if ($cache) {
         $output = 'rest';
     }
     $db = select_source($source, $classification);
     $data = array();
     $names = preg_split("/[\r\n;]+/", $str);
     if (is_array($names)) {
         foreach ($names as $name) {
             $tm = new Taxamatch($db);
             $tm->set('debug_flag', $debug);
             $tm->set('output_type', strtolower($output));