public static function remove_duplicate_authors($author_words1, $author_words2) { $unique_authors1 = $author_words1; $unique_authors2 = $author_words2; //print_r($author_words1); //print_r($author_words2); foreach ($author_words1 as $key1 => $author1) { $author1_matches = false; $author1 = Normalize::normalize_author_string($author1); foreach ($author_words2 as $key2 => $author2) { $author2_matches = false; $author2 = Normalize::normalize_author_string($author2); if ($author1 == $author2) { //echo '$1'; $author1_matches = true; $author2_matches = true; } elseif (preg_match("/^" . preg_quote($author1, "/") . "/i", $author2)) { //echo '$2'; $author1_matches = true; } elseif (preg_match("/^" . preg_quote($author2, "/") . "/i", $author1)) { //echo '$3'; $author2_matches = true; } // equal or one is contained in the other, so consider it a match for both terms if (strlen($author1) >= 3 && $author1_matches || strlen($author2) >= 3 && $author2_matches || $author1 == $author2) { //echo '$4'; unset($unique_authors1[$key1]); unset($unique_authors2[$key2]); } elseif ($author1_matches) { //echo '$5'; // author1 was abbreviation of author2 unset($unique_authors1[$key1]); } elseif ($author2_matches) { //echo '$6'; // author1 was abbreviation of author2 unset($unique_authors2[$key2]); } else { //echo '$7'; // no match or abbreviation so try a fuzzy match // $max_length = max(strlen($author1), strlen($author2)); // $lev = levenshtein($author1, $author2); // if(($lev/$max_length) <= .167) $match = self::match_author_words($author1, $author2); if ($match['match']) { //echo '$8'; unset($unique_authors1[$key1]); unset($unique_authors2[$key2]); } } } reset($author_words2); } return array($unique_authors1, $unique_authors2); }