if (round($score0 / 3.5, 3) == round($score1 / 3.5, 3)) { $undecide = true; } //var_dump($undecide); } else { $onlyOne = true; } if ($undecide || $onlyOne) { $comb = array(); $comb_string = array(); foreach ($matched_only as $m_idx => $mo) { $comb_dmin = 999; $comb[$m_idx] = array('[whatever]', '[whatever]', '[whatever]'); $comb_string[$m_idx] = ""; $comb_common = array(); $parts1 = explode(" ", canonical_form($mo, true)); $parts2 = explode(" ", $name_cleaned); $parts_bc_1 = array_slice($parts1, 1); $parts_bc_2 = array_slice($parts2, 1); $diff_rank = false; foreach ($parts_bc_1 as $idx1 => $pbc1) { foreach ($parts_bc_2 as $idx2 => $pbc2) { // var_dump(levenshtein($pbc1, $pbc2)); if (levenshtein($pbc1, $pbc2) < $comb_dmin) { // $comb = $parts1[0] . " " . $pbc1; $comb[$m_idx][0] = $parts1[0]; $comb_common = array('idx' => $idx1, 'name' => $pbc1); $comb_dmin = levenshtein($pbc1, $pbc2); if ($idx1 != $idx2) { $diff_rank = true; }
function queryNames($name, $against, $best, $ep) { if (empty($ep)) { return false; } $ep .= '/select?wt=json&q=*:*'; // $ep = 'http://localhost:8983/solr/taxa/select?wt=json&q=*:*'; // $ep = 'http://140.109.28.72/solr4/taxa/select?wt=json&q=*:*'; extract_results("", "", $reset = true); // mix2; work with latin part b2, c2, and suggestions of latin part b2, c2 $mix2 = array(); $sound_mix2 = array(); $matched = array(); $info = array(); $suggestions = array(); $long_suggestions = array(); $name_cleaned = canonical_form($name, true); $parts = explode(" ", $name_cleaned); $lpa2 = $parts[0]; $lpb2 = @$parts[1]; $lpc2 = @$parts[2]; $spa2 = treat_word($lpa2); $spb2 = treat_word($lpb2); $spc2 = treat_word($lpc2); if (!empty($parts[1])) { $mix2[] = $parts[1]; } else { // return null; return array('N/A' => array('name' => $name, 'name_cleaned' => $name_cleaned, 'matched' => 'N/A', 'matched_clean' => 'N/A', 'accepted_namecode' => array(), 'namecode' => array(), 'source' => array(), 'url_id' => array(), 'a_url_id' => array(), 'kingdom' => array(), 'phylum' => array(), 'class' => array(), 'order' => array(), 'family' => array(), 'higher_than_family' => array(), 'type' => 'N/A')); } if (!empty($parts[2])) { $mix2[] = $parts[2]; } if (!empty($spb2)) { $sound_mix2[] = $spb2; } if (!empty($spc2)) { $sound_mix2[] = $spc2; } // Type 1 $query_url_1 = $ep . '&fq=canonical_name:"' . urlencode($name_cleaned) . '"'; extract_results($query_url_1, TYPE_1, $reset = false, $against); // with minor spell error $query_url_1_err_suggestion = $ep . "&rows=0&spellcheck.q=" . urlencode(implode(" ", $mix2)); $suggestion = extract_suggestion($query_url_1_err_suggestion, TYPE_1_E); if (!empty($suggestion)) { $query_url_1_err = $ep . '&fq=canonical_name:"' . urlencode("{$lpa2} {$suggestion}") . '"'; extract_results($query_url_1_err, TYPE_1_E, $reset = false, $against); } //* $query_url_1_err_long_suggestion = $ep . "&rows=0&spellcheck.q=" . urlencode($name_cleaned); $long_suggestion = extract_suggestion($query_url_1_err_long_suggestion, TYPE_1_E); if (!empty($long_suggestion)) { $query_url_1_err = $ep . '&fq=latin_part_a:' . $lpa2 . '&fq=canonical_name:"' . urlencode("{$long_suggestion}") . '"'; extract_results($query_url_1_err, TYPE_1_E, $reset = false, $against); } //*/ $all_matched_tmp = extract_results(); if (!empty($all_matched_tmp['']) || $best == 'no') { // Type 2 $query_url_2 = $ep . '&fq=latin_part_a:' . urlencode($lpa2) . '&fq=latin_part_bc:(' . urlencode(implode(' OR ', $mix2)) . ")"; extract_results($query_url_2, TYPE_2, $reset = false, $against); // with minor spell error foreach (array_unique($mix2) as $p) { $query_url_2_err_suggestion = $ep . "&rows=0&spellcheck.q=" . urlencode($p); $suggestion = extract_suggestion($query_url_2_err_suggestion, TYPE_2_E); if (!empty($suggestion)) { $suggestions[] = $suggestion; } $query_url_2_err_long_suggestion = $ep . "&rows=0&spellcheck.q=" . urlencode("{$lpa2} {$p}"); $long_suggestion = extract_suggestion($query_url_2_err_long_suggestion, TYPE_2_E); if (!empty($long_suggestion)) { $long_suggestions[] = $long_suggestion; } } if (!empty($suggestions)) { $suggestions = array_unique(array_merge($suggestions, $mix2)); $query_url_2_err = $ep . '&fq=latin_part_a:' . urlencode($lpa2) . '&fq=latin_part_bc:(' . urlencode(implode(' OR ', $suggestions)) . ")"; extract_results($query_url_2_err, TYPE_2_E, $reset = false, $against); } if (!empty($long_suggestions) && count($mix2) > 1) { foreach ($long_suggestions as $long_suggestion) { $query_url_2_err = $ep . '&fq=canonical_name:"' . urlencode($long_suggestion) . '"'; extract_results($query_url_2_err, TYPE_2_E, $reset = false, $against); } } // Genus spell error??? $query_url_2_genus_err_suggestion = $ep . "&rows=0&spellcheck.q=" . urlencode($lpa2); $suggestion = extract_suggestion($query_url_2_genus_err_suggestion, TYPE_2_GE); if (is_null($suggestion)) { $query_url_2_genus_err_suggestion = $ep . "&rows=0&spellcheck.q=" . urlencode($name_cleaned); $suggestion = array_shift(explode(" ", extract_suggestion($query_url_2_genus_err_suggestion, TYPE_2_GE))); if (is_null($suggestion)) { foreach ($mix2 as $mp) { $query_url_2_genus_err_suggestion = $ep . "&rows=0&spellcheck.q=" . urlencode($lpa2 + ' ' + $mp); $suggestion = array_shift(explode(" ", extract_suggestion($query_url_2_genus_err_suggestion, TYPE_2_GE))); if (!is_null($suggestion)) { break; } } } } if (treat_word($lpa2, true) == treat_word($suggestion, true)) { $query_url_2_genus_err = $ep . '&fq=latin_part_a:' . urlencode($suggestion) . '&fq=latin_part_bc:(' . urlencode(implode(' OR ', $mix2)) . ")"; extract_results($query_url_2_genus_err, TYPE_2_GS, $reset = false, $against); } elseif (levenshtein($lpa2, $suggestion) == 1 && strlen($lpa2) == strlen($suggestion)) { $len = strlen($lpa2); for ($i = 0; $i < $len; $i++) { if ($lpa2[$i] != $suggestion[$i]) { if (similar_char($lpa2[$i], $suggestion[$i], @$lpa2[$i + 1], @$suggestion[$i + 1])) { $query_url_2_genus_err = $ep . '&fq=latin_part_a:' . urlencode($suggestion) . '&fq=latin_part_bc:(' . urlencode(implode(' OR ', $mix2)) . ")"; extract_results($query_url_2_genus_err, TYPE_2_GL, $reset = false, $against); } } } } elseif (levenshtein($lpa2, $suggestion) == 1) { $query_url_2_genus_err = $ep . '&fq=latin_part_a:' . urlencode($suggestion) . '&fq=latin_part_bc:(' . urlencode(implode(' OR ', $mix2)) . ")"; extract_results($query_url_2_genus_err, TYPE_2_GL2, $reset = false, $against); } $all_matched_tmp = extract_results(); } if (!empty($all_matched_tmp['']) || $best == 'no') { // Type 3 $sound = treat_word($name_cleaned); $query_url_3 = $ep . '&fq=sound_name:"' . urlencode($sound) . '"'; extract_results($query_url_3, TYPE_3_S, $reset = false, $against); // Type 3 mix $query_url_3 = $ep . '&fq=sound_part_a:' . urlencode($spa2) . '&fq=sound_part_bc:(' . urlencode(implode(' OR ', $sound_mix2)) . ")"; extract_results($query_url_3, TYPE_3_S2, $reset = false, $against); $sound_mix2_strip_ending = array_map("treat_word", $mix2, array_fill(0, count($mix2), true)); $query_url_3_strip_bc_ending = $ep . '&fq=sound_part_a:' . urlencode($spa2) . '&fq=sound_part_bc_strip_ending:(' . urlencode(implode(' OR ', $sound_mix2_strip_ending)) . ")"; extract_results($query_url_3_strip_bc_ending, TYPE3_S3, $reset = false, $against); $query_url_3_strip_all_ending = $ep . '&fq=sound_part_a_strip_ending:' . urlencode(treat_word($spa2, true)) . '&fq=sound_part_bc_strip_ending:(' . urlencode(implode(' OR ', $sound_mix2_strip_ending)) . ")"; extract_results($query_url_3_strip_all_ending, TYPE_3_GUESS, $reset = false, $against); $all_matched_tmp = extract_results(); } foreach ($all_matched_tmp as $m) { $all_matched[$m['matched']] = array_merge(array('name' => $name, 'name_cleaned' => $name_cleaned), $m); } /* echo "<xmp>"; var_dump($all_matched); echo "</xmp>"; //*/ //var_dump($all_matched); return $all_matched; }
$rec['phylum'] = array_shift(explode(" ", $vals[8])); $rec['kingdom'] = array_shift(explode(" ", $vals[9])); $rec['sound_family'] = treat_word($rec['family']); $rec['sound_order'] = treat_word($rec['order']); $rec['sound_class'] = treat_word($rec['class']); $rec['sound_phylum'] = treat_word($rec['phylum']); $rec['sound_kingdom'] = treat_word($rec['kingdom']); $rec['namecode'] = $vals[0]; // $rec['taibnet_url'] = "http://taibnet.sinica.edu.tw/chi/taibnet_species_detail.php?name_code=" . $vals[0]; if (!empty($vals[1])) { $rec['accepted_namecode'] = $vals[1]; } else { $rec['accepted_namecode'] = $vals[0]; } $rec['original_name'] = $vals[2]; $rec['canonical_name'] = canonical_form($vals[2], true); //if ($rec['canonical_name'] == 'Bombyx pernyi') { //var_dump($rec); //} $rec['sound_name'] = treat_word($rec['canonical_name']); $frags = explode(" ", $rec['canonical_name']); $rec['latin_part_a'] = $frags[0]; $rec['genus'] = $frags[0]; $rec['sound_part_a'] = treat_word($frags[0]); $rec['sound_genus'] = $frags[0]; $rec['sound_part_a_strip_ending'] = treat_word($frags[0], true); $rec['nameSpell'][] = $frags[0]; if (!empty($frags[1])) { $rec['latin_part_bc'][] = $frags[1]; $rec['nameSpell'][] = $frags[1]; $rec['nameSpell'][] = $frags[0] . " " . $frags[1];