function testRunTestsFromFile() { $file = file(dirname(__FILE__) . "/../test_data_files/damerau_levenshtein_mod.txt"); foreach ($file as $line => $test_case) { if (!preg_match("/^\\s*#/", $test_case) && preg_match("/^([^\\|]+)\\|([^\\|]+)\\|([^\\|]+)\\|([^\\|]+)\\|([^\\|]+)\$/", $test_case, $arr)) { $test_value_1 = $arr[1]; $test_value_2 = $arr[2]; $test_max_distance = trim($arr[3]); $test_block_size = trim($arr[4]); $test_result = trim($arr[5]); if ($test_result == 'null') { $test_result = null; } $res = DamerauLevenshteinMod::distance($test_value_1, $test_value_2, $test_block_size, $test_max_distance); //echo $res . ' "' . $test_value_1 . '" "' . $test_value_2 . '" <br/>'; echo $res . "/{$test_result} ### {$test_value_1}, {$test_value_2}, {$test_block_size}, {$test_max_distance}<br/>"; $this->assertTrue($test_result == $res); // "$test_value_1 with $test_value_2, block_size $test_block_size and max_distance $test_max_distance should give $test_result on line (". ($line+1) .")"); } } }
/** * Function: mdld * Purpose: Performs Damerau-Levenshtein Distance test on two input strings, supporting block * transpositions of multiple characters * Inputs: string 1 as p_str1, string 2 as p_str2, numeric limit on length of transposed block to be searched for as p_block_limit * Outputs: computed edit distance between the input strings (0=identical on this measure, 1..n=increasing dissimilarity) * @param string $p_str1 * @param string $p_str2 * @param integer $p_block_limit * @return integer : computed edit distance between the input strings */ public function mdld($p_str1, $p_str2, $p_block_limit, $max_distance = 4) { // return( levenshtein( $p_str1, $p_str2 ) ); require_once 'class.damerau_levenshtein_mod.php'; $value = DamerauLevenshteinMod::distance($p_str1, $p_str2, $p_block_limit, $max_distance); $this->debug['mdld'][] = "1 (p_str1:{$p_str1}) (p_str2:{$p_str2}) (p_block_limit:{$p_block_limit}) (value:{$value})"; return $value; }
public static function match_author_words($author1, $author2) { $match = $phonetic_match = false; $nm = new NearMatch(); $author1_phonetic = $nm->near_match($author1); $author2_phonetic = $nm->near_match($author2); $author1_length = strlen($author1); $author2_length = strlen($author2); $ed = DamerauLevenshteinMod::distance($author1, $author2, 2, 3); // add the author post-filter // min. 51% "good" chars // first char must match for ED 2+ if ($ed <= 3 && min($author1_length, $author2_length) > $ed * 2 && ($ed < 2 || substr($author1, 0, 1) == substr($author2, 0, 1)) || $author1_phonetic == $author2_phonetic) { $match = true; if ($author1_phonetic == $author2_phonetic) { $phonetic_match = true; } } return array('match' => $match, 'phonetic_match' => $phonetic_match, 'edit_distance' => $ed); }
$data = $tm->getXML(); } $debug = $tm->debug; break; case 'ngram': require_once 'classes/class.taxamatch.php'; $tm = new Taxamatch(); $data = $tm->ngram($str, $str2); if ($output == 'xml') { $data = $tm->getXML(); } $debug = $tm->debug; break; case 'mdld': require_once 'classes/class.damerau_levenshtein_mod.php'; $mdld = new DamerauLevenshteinMod(); $data = $mdld->mdld_php($str, $str2, 10, 1); break; case 'taxamatch': require_once 'classes/class.taxamatch.php'; if ($cache) { $output = 'rest'; } $db = select_source($source, $classification); $data = array(); $names = preg_split("/[\r\n;]+/", $str); if (is_array($names)) { foreach ($names as $name) { $tm = new Taxamatch($db); $tm->set('debug_flag', $debug); $tm->set('output_type', strtolower($output));