コード例 #1
0
ファイル: class.authormatch.php プロジェクト: kyleLesack/TNRS
 public static function remove_duplicate_authors($author_words1, $author_words2)
 {
     $unique_authors1 = $author_words1;
     $unique_authors2 = $author_words2;
     //print_r($author_words1);
     //print_r($author_words2);
     foreach ($author_words1 as $key1 => $author1) {
         $author1_matches = false;
         $author1 = Normalize::normalize_author_string($author1);
         foreach ($author_words2 as $key2 => $author2) {
             $author2_matches = false;
             $author2 = Normalize::normalize_author_string($author2);
             if ($author1 == $author2) {
                 //echo '$1';
                 $author1_matches = true;
                 $author2_matches = true;
             } elseif (preg_match("/^" . preg_quote($author1, "/") . "/i", $author2)) {
                 //echo '$2';
                 $author1_matches = true;
             } elseif (preg_match("/^" . preg_quote($author2, "/") . "/i", $author1)) {
                 //echo '$3';
                 $author2_matches = true;
             }
             // equal or one is contained in the other, so consider it a match for both terms
             if (strlen($author1) >= 3 && $author1_matches || strlen($author2) >= 3 && $author2_matches || $author1 == $author2) {
                 //echo '$4';
                 unset($unique_authors1[$key1]);
                 unset($unique_authors2[$key2]);
             } elseif ($author1_matches) {
                 //echo '$5';
                 // author1 was abbreviation of author2
                 unset($unique_authors1[$key1]);
             } elseif ($author2_matches) {
                 //echo '$6';
                 // author1 was abbreviation of author2
                 unset($unique_authors2[$key2]);
             } else {
                 //echo '$7';
                 // no match or abbreviation so try a fuzzy match
                 // $max_length = max(strlen($author1), strlen($author2));
                 // $lev = levenshtein($author1, $author2);
                 // if(($lev/$max_length) <= .167)
                 $match = self::match_author_words($author1, $author2);
                 if ($match['match']) {
                     //echo '$8';
                     unset($unique_authors1[$key1]);
                     unset($unique_authors2[$key2]);
                 }
             }
         }
         reset($author_words2);
     }
     return array($unique_authors1, $unique_authors2);
 }