示例#1
0
文件: api.php 项目: TaiBIF/MyMatch
     if (round($score0 / 3.5, 3) == round($score1 / 3.5, 3)) {
         $undecide = true;
     }
     //var_dump($undecide);
 } else {
     $onlyOne = true;
 }
 if ($undecide || $onlyOne) {
     $comb = array();
     $comb_string = array();
     foreach ($matched_only as $m_idx => $mo) {
         $comb_dmin = 999;
         $comb[$m_idx] = array('[whatever]', '[whatever]', '[whatever]');
         $comb_string[$m_idx] = "";
         $comb_common = array();
         $parts1 = explode(" ", canonical_form($mo, true));
         $parts2 = explode(" ", $name_cleaned);
         $parts_bc_1 = array_slice($parts1, 1);
         $parts_bc_2 = array_slice($parts2, 1);
         $diff_rank = false;
         foreach ($parts_bc_1 as $idx1 => $pbc1) {
             foreach ($parts_bc_2 as $idx2 => $pbc2) {
                 // var_dump(levenshtein($pbc1, $pbc2));
                 if (levenshtein($pbc1, $pbc2) < $comb_dmin) {
                     // $comb = $parts1[0] . " " . $pbc1;
                     $comb[$m_idx][0] = $parts1[0];
                     $comb_common = array('idx' => $idx1, 'name' => $pbc1);
                     $comb_dmin = levenshtein($pbc1, $pbc2);
                     if ($idx1 != $idx2) {
                         $diff_rank = true;
                     }
示例#2
0
function queryNames($name, $against, $best, $ep)
{
    if (empty($ep)) {
        return false;
    }
    $ep .= '/select?wt=json&q=*:*';
    // $ep = 'http://localhost:8983/solr/taxa/select?wt=json&q=*:*';
    // $ep = 'http://140.109.28.72/solr4/taxa/select?wt=json&q=*:*';
    extract_results("", "", $reset = true);
    // mix2; work with latin part b2, c2, and suggestions of latin part b2, c2
    $mix2 = array();
    $sound_mix2 = array();
    $matched = array();
    $info = array();
    $suggestions = array();
    $long_suggestions = array();
    $name_cleaned = canonical_form($name, true);
    $parts = explode(" ", $name_cleaned);
    $lpa2 = $parts[0];
    $lpb2 = @$parts[1];
    $lpc2 = @$parts[2];
    $spa2 = treat_word($lpa2);
    $spb2 = treat_word($lpb2);
    $spc2 = treat_word($lpc2);
    if (!empty($parts[1])) {
        $mix2[] = $parts[1];
    } else {
        //		return null;
        return array('N/A' => array('name' => $name, 'name_cleaned' => $name_cleaned, 'matched' => 'N/A', 'matched_clean' => 'N/A', 'accepted_namecode' => array(), 'namecode' => array(), 'source' => array(), 'url_id' => array(), 'a_url_id' => array(), 'kingdom' => array(), 'phylum' => array(), 'class' => array(), 'order' => array(), 'family' => array(), 'higher_than_family' => array(), 'type' => 'N/A'));
    }
    if (!empty($parts[2])) {
        $mix2[] = $parts[2];
    }
    if (!empty($spb2)) {
        $sound_mix2[] = $spb2;
    }
    if (!empty($spc2)) {
        $sound_mix2[] = $spc2;
    }
    // Type 1
    $query_url_1 = $ep . '&fq=canonical_name:"' . urlencode($name_cleaned) . '"';
    extract_results($query_url_1, TYPE_1, $reset = false, $against);
    // with minor spell error
    $query_url_1_err_suggestion = $ep . "&rows=0&spellcheck.q=" . urlencode(implode(" ", $mix2));
    $suggestion = extract_suggestion($query_url_1_err_suggestion, TYPE_1_E);
    if (!empty($suggestion)) {
        $query_url_1_err = $ep . '&fq=canonical_name:"' . urlencode("{$lpa2} {$suggestion}") . '"';
        extract_results($query_url_1_err, TYPE_1_E, $reset = false, $against);
    }
    //*
    $query_url_1_err_long_suggestion = $ep . "&rows=0&spellcheck.q=" . urlencode($name_cleaned);
    $long_suggestion = extract_suggestion($query_url_1_err_long_suggestion, TYPE_1_E);
    if (!empty($long_suggestion)) {
        $query_url_1_err = $ep . '&fq=latin_part_a:' . $lpa2 . '&fq=canonical_name:"' . urlencode("{$long_suggestion}") . '"';
        extract_results($query_url_1_err, TYPE_1_E, $reset = false, $against);
    }
    //*/
    $all_matched_tmp = extract_results();
    if (!empty($all_matched_tmp['']) || $best == 'no') {
        // Type 2
        $query_url_2 = $ep . '&fq=latin_part_a:' . urlencode($lpa2) . '&fq=latin_part_bc:(' . urlencode(implode(' OR ', $mix2)) . ")";
        extract_results($query_url_2, TYPE_2, $reset = false, $against);
        // with minor spell error
        foreach (array_unique($mix2) as $p) {
            $query_url_2_err_suggestion = $ep . "&rows=0&spellcheck.q=" . urlencode($p);
            $suggestion = extract_suggestion($query_url_2_err_suggestion, TYPE_2_E);
            if (!empty($suggestion)) {
                $suggestions[] = $suggestion;
            }
            $query_url_2_err_long_suggestion = $ep . "&rows=0&spellcheck.q=" . urlencode("{$lpa2} {$p}");
            $long_suggestion = extract_suggestion($query_url_2_err_long_suggestion, TYPE_2_E);
            if (!empty($long_suggestion)) {
                $long_suggestions[] = $long_suggestion;
            }
        }
        if (!empty($suggestions)) {
            $suggestions = array_unique(array_merge($suggestions, $mix2));
            $query_url_2_err = $ep . '&fq=latin_part_a:' . urlencode($lpa2) . '&fq=latin_part_bc:(' . urlencode(implode(' OR ', $suggestions)) . ")";
            extract_results($query_url_2_err, TYPE_2_E, $reset = false, $against);
        }
        if (!empty($long_suggestions) && count($mix2) > 1) {
            foreach ($long_suggestions as $long_suggestion) {
                $query_url_2_err = $ep . '&fq=canonical_name:"' . urlencode($long_suggestion) . '"';
                extract_results($query_url_2_err, TYPE_2_E, $reset = false, $against);
            }
        }
        // Genus spell error???
        $query_url_2_genus_err_suggestion = $ep . "&rows=0&spellcheck.q=" . urlencode($lpa2);
        $suggestion = extract_suggestion($query_url_2_genus_err_suggestion, TYPE_2_GE);
        if (is_null($suggestion)) {
            $query_url_2_genus_err_suggestion = $ep . "&rows=0&spellcheck.q=" . urlencode($name_cleaned);
            $suggestion = array_shift(explode(" ", extract_suggestion($query_url_2_genus_err_suggestion, TYPE_2_GE)));
            if (is_null($suggestion)) {
                foreach ($mix2 as $mp) {
                    $query_url_2_genus_err_suggestion = $ep . "&rows=0&spellcheck.q=" . urlencode($lpa2 + ' ' + $mp);
                    $suggestion = array_shift(explode(" ", extract_suggestion($query_url_2_genus_err_suggestion, TYPE_2_GE)));
                    if (!is_null($suggestion)) {
                        break;
                    }
                }
            }
        }
        if (treat_word($lpa2, true) == treat_word($suggestion, true)) {
            $query_url_2_genus_err = $ep . '&fq=latin_part_a:' . urlencode($suggestion) . '&fq=latin_part_bc:(' . urlencode(implode(' OR ', $mix2)) . ")";
            extract_results($query_url_2_genus_err, TYPE_2_GS, $reset = false, $against);
        } elseif (levenshtein($lpa2, $suggestion) == 1 && strlen($lpa2) == strlen($suggestion)) {
            $len = strlen($lpa2);
            for ($i = 0; $i < $len; $i++) {
                if ($lpa2[$i] != $suggestion[$i]) {
                    if (similar_char($lpa2[$i], $suggestion[$i], @$lpa2[$i + 1], @$suggestion[$i + 1])) {
                        $query_url_2_genus_err = $ep . '&fq=latin_part_a:' . urlencode($suggestion) . '&fq=latin_part_bc:(' . urlencode(implode(' OR ', $mix2)) . ")";
                        extract_results($query_url_2_genus_err, TYPE_2_GL, $reset = false, $against);
                    }
                }
            }
        } elseif (levenshtein($lpa2, $suggestion) == 1) {
            $query_url_2_genus_err = $ep . '&fq=latin_part_a:' . urlencode($suggestion) . '&fq=latin_part_bc:(' . urlencode(implode(' OR ', $mix2)) . ")";
            extract_results($query_url_2_genus_err, TYPE_2_GL2, $reset = false, $against);
        }
        $all_matched_tmp = extract_results();
    }
    if (!empty($all_matched_tmp['']) || $best == 'no') {
        // Type 3
        $sound = treat_word($name_cleaned);
        $query_url_3 = $ep . '&fq=sound_name:"' . urlencode($sound) . '"';
        extract_results($query_url_3, TYPE_3_S, $reset = false, $against);
        // Type 3 mix
        $query_url_3 = $ep . '&fq=sound_part_a:' . urlencode($spa2) . '&fq=sound_part_bc:(' . urlencode(implode(' OR ', $sound_mix2)) . ")";
        extract_results($query_url_3, TYPE_3_S2, $reset = false, $against);
        $sound_mix2_strip_ending = array_map("treat_word", $mix2, array_fill(0, count($mix2), true));
        $query_url_3_strip_bc_ending = $ep . '&fq=sound_part_a:' . urlencode($spa2) . '&fq=sound_part_bc_strip_ending:(' . urlencode(implode(' OR ', $sound_mix2_strip_ending)) . ")";
        extract_results($query_url_3_strip_bc_ending, TYPE3_S3, $reset = false, $against);
        $query_url_3_strip_all_ending = $ep . '&fq=sound_part_a_strip_ending:' . urlencode(treat_word($spa2, true)) . '&fq=sound_part_bc_strip_ending:(' . urlencode(implode(' OR ', $sound_mix2_strip_ending)) . ")";
        extract_results($query_url_3_strip_all_ending, TYPE_3_GUESS, $reset = false, $against);
        $all_matched_tmp = extract_results();
    }
    foreach ($all_matched_tmp as $m) {
        $all_matched[$m['matched']] = array_merge(array('name' => $name, 'name_cleaned' => $name_cleaned), $m);
    }
    /*
    echo "<xmp>";
    var_dump($all_matched);
    echo "</xmp>";
    //*/
    //var_dump($all_matched);
    return $all_matched;
}
示例#3
0
 $rec['phylum'] = array_shift(explode(" ", $vals[8]));
 $rec['kingdom'] = array_shift(explode(" ", $vals[9]));
 $rec['sound_family'] = treat_word($rec['family']);
 $rec['sound_order'] = treat_word($rec['order']);
 $rec['sound_class'] = treat_word($rec['class']);
 $rec['sound_phylum'] = treat_word($rec['phylum']);
 $rec['sound_kingdom'] = treat_word($rec['kingdom']);
 $rec['namecode'] = $vals[0];
 //	$rec['taibnet_url'] = "http://taibnet.sinica.edu.tw/chi/taibnet_species_detail.php?name_code=" . $vals[0];
 if (!empty($vals[1])) {
     $rec['accepted_namecode'] = $vals[1];
 } else {
     $rec['accepted_namecode'] = $vals[0];
 }
 $rec['original_name'] = $vals[2];
 $rec['canonical_name'] = canonical_form($vals[2], true);
 //if ($rec['canonical_name'] == 'Bombyx pernyi') {
 //var_dump($rec);
 //}
 $rec['sound_name'] = treat_word($rec['canonical_name']);
 $frags = explode(" ", $rec['canonical_name']);
 $rec['latin_part_a'] = $frags[0];
 $rec['genus'] = $frags[0];
 $rec['sound_part_a'] = treat_word($frags[0]);
 $rec['sound_genus'] = $frags[0];
 $rec['sound_part_a_strip_ending'] = treat_word($frags[0], true);
 $rec['nameSpell'][] = $frags[0];
 if (!empty($frags[1])) {
     $rec['latin_part_bc'][] = $frags[1];
     $rec['nameSpell'][] = $frags[1];
     $rec['nameSpell'][] = $frags[0] . " " . $frags[1];