<?php require 'libs/misc.php'; $weightMetaphone = 2 / 3; $weightLevenshtein = 2 / 3 * 1 / 3; $weightSoundex = 1 / 3 * 1 / 3; $matrix = array(); if ($_GET['w1'] && $_GET['w2']) { $matrix[] = array($_GET['w2'], $_GET['w2']); } else { $matrix[] = array('Maria Kirilenko', 'Masha Kirilenko'); $matrix[] = array('Andrei Neculau', 'Andrei N.'); $matrix[] = array('Neculau', 'N.'); $matrix[] = array('Andrei Neculau', 'Neculau Andrei'); $matrix[] = array('Luminita', 'Luminița'); } function noDiacriticsParse(&$item) { $item = noDiacritics($item); } array_walk_recursive($matrix, 'noDiacriticsParse'); $result = array(); foreach ($matrix as $row) { list($w1, $w2) = $row; $result[] = array('w1' => $w1, 'w2' => $w2, 'levenshtein' => levenshtein($w1, $w2), 'metaphone' => metaphone($w1) . " - " . metaphone($w2), 'metaphone_compare' => compareMetaphone($w1, $w2), 'soundex' => soundex($w1) . " - " . soundex($w2), 'soundex_compare' => compareSoundex($w1, $w2), 'compare' => compareComplexMulti(explode(' ', $w1), explode(' ', $w2))); } print_a($result);
function compareTwoContacts($c1, $c2) { global $thresholdUsernameSimilarity, $thresholdNameSimilarity; logMsg('DEBUG', "Comparing " . $c1['email'] . " with " . $c2['email']); // merge primary name with secondary names $c1Names = $c1['secondaryNames']; $c2Names = $c2['secondaryNames']; if ($c1['name']) { $c1Names[] = $c1['name']; } if ($c2['name']) { $c2Names[] = $c2['name']; } // if we have names if (count($c1Names) && count($c2Names)) { // clean names array_walk($c1Names, 'cleanName'); array_walk($c2Names, 'cleanName'); // check for exact items $commonNames = array_intersect($c1Names, $c2Names); } $c1Username = $c1['usernames'][0]; cleanName($c1Username); $c2Username = $c2['usernames'][0]; cleanName($c2Username); // check similarity if (!empty($commonNames)) { // full names match return 1; } elseif ($c1Username && $c2Username && ($usernameSimilarity = compareComplex($c1Username, $c2Username)) && $usernameSimilarity > $thresholdUsernameSimilarity) { // very high username similarity logMsg('DEBUG', "Username similarity between " . $c1Username . " and " . $c2Username . " is {$usernameSimilarity}"); return 1; } elseif (count($c1Names) && count($c2Names)) { // check names similarity $nameSimilarity = 0; foreach ($c1Names as $c1Name) { if (!$c1Name) { continue; } $c1NamesSplit = explode(' ', $c1Name); $nameSimilarity = 0; foreach ($c2Names as $c2Name) { if (!$c2Name) { continue; } $c2NamesSplit = explode(' ', $c2Name); $similarity = compareComplexMulti($c1NamesSplit, $c2NamesSplit); if ($similarity > $nameSimilarity) { logMsg('DEBUG', "Name similarity between " . $c1Name . " and " . $c2Name . " is {$similarity}"); $nameSimilarity = $similarity; } } } unset($c1Names); unset($c2Names); unset($c1NamesSplit); unset($c2NamesSplit); return $nameSimilarity > $thresholdUsernameSimilarity ? 1 : 0; } else { unset($c1Names); unset($c2Names); return 0; } }