Пример #1
0
<?php

require 'libs/misc.php';
$weightMetaphone = 2 / 3;
$weightLevenshtein = 2 / 3 * 1 / 3;
$weightSoundex = 1 / 3 * 1 / 3;
$matrix = array();
if ($_GET['w1'] && $_GET['w2']) {
    $matrix[] = array($_GET['w2'], $_GET['w2']);
} else {
    $matrix[] = array('Maria Kirilenko', 'Masha Kirilenko');
    $matrix[] = array('Andrei Neculau', 'Andrei N.');
    $matrix[] = array('Neculau', 'N.');
    $matrix[] = array('Andrei Neculau', 'Neculau Andrei');
    $matrix[] = array('Luminita', 'Luminița');
}
function noDiacriticsParse(&$item)
{
    $item = noDiacritics($item);
}
array_walk_recursive($matrix, 'noDiacriticsParse');
$result = array();
foreach ($matrix as $row) {
    list($w1, $w2) = $row;
    $result[] = array('w1' => $w1, 'w2' => $w2, 'levenshtein' => levenshtein($w1, $w2), 'metaphone' => metaphone($w1) . " - " . metaphone($w2), 'metaphone_compare' => compareMetaphone($w1, $w2), 'soundex' => soundex($w1) . " - " . soundex($w2), 'soundex_compare' => compareSoundex($w1, $w2), 'compare' => compareComplexMulti(explode(' ', $w1), explode(' ', $w2)));
}
print_a($result);
Пример #2
0
function compareTwoContacts($c1, $c2)
{
    global $thresholdUsernameSimilarity, $thresholdNameSimilarity;
    logMsg('DEBUG', "Comparing " . $c1['email'] . " with " . $c2['email']);
    // merge primary name with secondary names
    $c1Names = $c1['secondaryNames'];
    $c2Names = $c2['secondaryNames'];
    if ($c1['name']) {
        $c1Names[] = $c1['name'];
    }
    if ($c2['name']) {
        $c2Names[] = $c2['name'];
    }
    // if we have names
    if (count($c1Names) && count($c2Names)) {
        // clean names
        array_walk($c1Names, 'cleanName');
        array_walk($c2Names, 'cleanName');
        // check for exact items
        $commonNames = array_intersect($c1Names, $c2Names);
    }
    $c1Username = $c1['usernames'][0];
    cleanName($c1Username);
    $c2Username = $c2['usernames'][0];
    cleanName($c2Username);
    // check similarity
    if (!empty($commonNames)) {
        // full names match
        return 1;
    } elseif ($c1Username && $c2Username && ($usernameSimilarity = compareComplex($c1Username, $c2Username)) && $usernameSimilarity > $thresholdUsernameSimilarity) {
        // very high username similarity
        logMsg('DEBUG', "Username similarity between " . $c1Username . " and " . $c2Username . " is {$usernameSimilarity}");
        return 1;
    } elseif (count($c1Names) && count($c2Names)) {
        // check names similarity
        $nameSimilarity = 0;
        foreach ($c1Names as $c1Name) {
            if (!$c1Name) {
                continue;
            }
            $c1NamesSplit = explode(' ', $c1Name);
            $nameSimilarity = 0;
            foreach ($c2Names as $c2Name) {
                if (!$c2Name) {
                    continue;
                }
                $c2NamesSplit = explode(' ', $c2Name);
                $similarity = compareComplexMulti($c1NamesSplit, $c2NamesSplit);
                if ($similarity > $nameSimilarity) {
                    logMsg('DEBUG', "Name similarity between " . $c1Name . " and " . $c2Name . " is {$similarity}");
                    $nameSimilarity = $similarity;
                }
            }
        }
        unset($c1Names);
        unset($c2Names);
        unset($c1NamesSplit);
        unset($c2NamesSplit);
        return $nameSimilarity > $thresholdUsernameSimilarity ? 1 : 0;
    } else {
        unset($c1Names);
        unset($c2Names);
        return 0;
    }
}