Пример #1
0
 function OneQuery($query_key, $text, $limit = 1, $properties = null)
 {
     global $config;
     // clean text
     $text = str_replace(':', '', $text);
     $text = str_replace('"', '', $text);
     // BioStor search API
     $url = 'http://biostor.org/api.php?q=' . urlencode($text);
     //file_put_contents('/tmp/q.txt', $url, FILE_APPEND);
     $json = get($url);
     //file_put_contents('/tmp/q.txt', $json, FILE_APPEND);
     if ($json != '') {
         $obj = json_decode($json);
         if (isset($obj->rows)) {
             //foreach ($obj->rows as $row)
             //$row = $obj->rows[0];
             for ($i = 0; $i < 3; $i++) {
                 $row = $obj->rows[$i];
                 // check
                 $v1 = finger_print($text);
                 $v2 = finger_print($row->fields->default);
                 $lcs = new LongestCommonSequence($v1, $v2);
                 $d = $lcs->score();
                 // echo $d;
                 $score = min($d / strlen($v1), $d / strlen($v2));
                 if ($score > 0.8) {
                     $hit = new stdclass();
                     $hit->id = str_replace('biostor/', '', $row->id);
                     $hit->name = $row->doc->title;
                     $hit->score = $score;
                     $hit->match = true;
                     $this->StoreHit($query_key, $hit);
                 }
             }
         }
     }
 }
Пример #2
0
function cluster($strings)
{
    $result = new stdclass();
    $n = count($strings);
    // clean
    for ($i = 0; $i < $n; $i++) {
        $strings[$i] = finger_print(trim($strings[$i]));
    }
    $map = array();
    $inv_map = array();
    $count = 0;
    foreach ($strings as $k => $v) {
        $map[$k] = $count;
        $inv_map[$count] = $k;
        $count++;
    }
    // Create adjacency matrix and fill with 0's
    $X = array();
    for ($i = 0; $i < $n; $i++) {
        $X[$i] = array();
        for ($j = 0; $j < $n; $j++) {
            $X[$i][$j] = 0;
        }
    }
    $nodes = '';
    $edges = '';
    // Compare names using approximate string matching
    $i = 0;
    foreach ($strings as $k1 => $v1) {
        $nodes .= "node {$i} [label=\"" . $v1 . "\"];\n";
        if ($i < $n - 1) {
            $j = 0;
            foreach ($strings as $k2 => $v2) {
                if ($j > $i && $j < $n) {
                    // Find longest common subsequence for this pair of cleaned names
                    $lcs = new LongestCommonSequence($v1, $v2);
                    $d = $lcs->score();
                    // Filter by longest common substring (to ensure we have a "meaningful"
                    // match), that is, so that we avoid subsequences that have little continuity
                    $str = '';
                    $lcstr = LongestCommonSubstring($v1, $v2, $str);
                    if ($lcstr >= 4) {
                        // Ignore matches just on date, we want more than that
                        if (is_numeric(trim($str))) {
                        } else {
                            // If longest common subsequence is > 70% of the length of both strings
                            // we accept it.
                            if ($d / strlen($v1) >= 0.7 || $d / strlen($v2) >= 0.7) {
                                $X[$map[$k1]][$map[$k2]] = 1;
                                $X[$map[$k2]][$map[$k1]] = 1;
                                $edges .= $i . " -- " . $j . " [label=\"" . $lcstr . "\"];\n";
                            }
                        }
                    } else {
                        // If just a short match is it the start if the string (e.g., an abbreviation)
                        $abbreviation = false;
                        if (strlen($v1) == $d) {
                            if (strpos($v2, $v1, 0) === false) {
                            } else {
                                $abbreviation = true;
                            }
                        } else {
                            if (strpos($v1, $v2, 0) === false) {
                            } else {
                                $abbreviation = true;
                            }
                        }
                        // Accept abbreviation
                        if ($abbreviation) {
                            $X[$map[$k1]][$map[$k2]] = 1;
                            $X[$map[$k2]][$map[$k1]] = 1;
                            $edges .= $i . " -- " . $j . " [label=\"" . $lcstr . "\"];\n";
                        }
                    }
                }
                $j++;
            }
            $i++;
        }
    }
    $result->graph = "graph {\n" . $nodes . $edges . "}\n";
    //echo $graph;
    // Get components of adjacency matrix
    $components = get_components($X);
    $result->clusters = array();
    foreach ($components as $component) {
        $cluster = array();
        foreach ($component as $k => $v) {
            $member = new stdclass();
            $member->id = $inv_map[$v];
            $member->string = $strings[$inv_map[$v]];
            $cluster[] = $member;
        }
        $result->clusters[] = $cluster;
    }
    //print_r($map);
    if (0) {
        print_r($c);
    }
    return $result;
}
Пример #3
0
        $html = '<div>' . $this->left . '<br/>' . $this->right . '</div>';
        return $html;
    }
    function printDiff($C, $X, $Y, $i, $j)
    {
        if ($i > 0 and $j > 0 and $X[$i - 1] == $Y[$j - 1]) {
            $this->printDiff($C, $X, $Y, $i - 1, $j - 1);
            //echo "  " , $X{$i-1};
            $this->left .= "<span style=\"background:rgb(100,255,100);color:black;\">" . $X[$i - 1] . "</span>";
            $this->right .= "<span style=\"background:rgb(100,255,100);color:black;\">" . $X[$i - 1] . "</span>";
        } else {
            if ($j > 0 and ($i == 0 or $C[$i][$j - 1] >= $C[$i - 1][$j])) {
                $this->printDiff($C, $X, $Y, $i, $j - 1);
                //echo "+ " , $Y{$j-1};
                $this->right .= $Y[$j - 1];
            } else {
                if ($i > 0 and ($j == 0 or $C[$i][$j - 1] < $C[$i - 1][$j])) {
                    $this->printDiff($C, $X, $Y, $i - 1, $j);
                    //echo "- " , $X{$i-1};
                    $this->left .= $X[$i - 1];
                }
            }
        }
    }
}
// test
$s1 = 'hello blue marine';
$s2 = 'yellow blue submarine';
$lcs = new LongestCommonSequence($s1, $s2);
echo $lcs->score();
echo $lcs->display();