Ejemplo n.º 1
0
 /**
  *
  */
 public function __construct()
 {
     global $g_information_service_settings_AAT;
     WLPlugInformationServiceAAT::$s_settings = $g_information_service_settings_AAT;
     parent::__construct();
     $this->info['NAME'] = 'AAT';
     $this->description = _t('Provides access to Getty Linked Open Data AAT service');
 }
Ejemplo n.º 2
0
/**
 * Try to match given (partial) hierarchy path to a single subject in getty linked data AAT service
 * @param array $pa_hierarchy_path
 * @param int $pn_threshold
 * @param array $pa_options
 * 		removeParensFromLabels = Remove parens from labels for search and string comparison. This can improve results in specific cases.
 * @return bool|string
 */
function caMatchAAT($pa_hierarchy_path, $pn_threshold = 180, $pa_options = array())
{
    $vs_cache_key = md5(print_r($pa_hierarchy_path, true));
    if (MemoryCache::contains($vs_cache_key, 'AATMatches')) {
        return MemoryCache::fetch($vs_cache_key, 'AATMatches');
    }
    if (!is_array($pa_hierarchy_path)) {
        return false;
    }
    $pb_remove_parens_from_labels = caGetOption('removeParensFromLabels', $pa_options, false);
    // search the bottom-most component (the actual term)
    $vs_bot = trim(array_pop($pa_hierarchy_path));
    if ($pb_remove_parens_from_labels) {
        $vs_lookup = trim(preg_replace("/\\([\\p{L}\\-\\_\\s]+\\)/", '', $vs_bot));
    } else {
        $vs_lookup = $vs_bot;
    }
    $o_service = new WLPlugInformationServiceAAT();
    $va_hits = $o_service->lookup(array(), $vs_lookup, array('phrase' => true, 'raw' => true, 'limit' => 2000));
    if (!is_array($va_hits)) {
        return false;
    }
    $vn_best_distance = 0;
    $vn_pick = -1;
    foreach ($va_hits as $vn_i => $va_hit) {
        if (stripos($va_hit['TermPrefLabel']['value'], $vs_lookup) !== false) {
            // only consider terms that match what we searched
            // calculate similarity as a number by comparing both the term and the parent string
            $vs_label_with_parens = $va_hit['TermPrefLabel']['value'];
            $vs_label_without_parens = trim(preg_replace("/\\([\\p{L}\\s]+\\)/", '', $vs_label_with_parens));
            $va_label_percentages = array();
            // we try every combination with and without parens on both sides
            // unfortunately this code gets rather ugly because getting the similarity
            // as percentage is only possible by passing a reference parameter :-(
            similar_text($vs_label_with_parens, $vs_bot, $vn_label_percent);
            $va_label_percentages[] = $vn_label_percent;
            similar_text($vs_label_with_parens, $vs_lookup, $vn_label_percent);
            $va_label_percentages[] = $vn_label_percent;
            similar_text($vs_label_without_parens, $vs_bot, $vn_label_percent);
            $va_label_percentages[] = $vn_label_percent;
            similar_text($vs_label_without_parens, $vs_lookup, $vn_label_percent);
            $va_label_percentages[] = $vn_label_percent;
            // similarity to parent path
            similar_text($va_hit['ParentsFull']['value'], join(' ', array_reverse($pa_hierarchy_path)), $vn_parent_percent);
            // it's a weighted sum because the term label is more important than the exact path
            $vn_tmp = 2 * max($va_label_percentages) + $vn_parent_percent;
            //var_dump($va_hit); var_dump($vn_tmp);
            if ($vn_tmp > $vn_best_distance) {
                $vn_best_distance = $vn_tmp;
                $vn_pick = $vn_i;
            }
        }
    }
    if ($vn_pick >= 0 && $vn_best_distance > $pn_threshold) {
        $va_pick = $va_hits[$vn_pick];
        MemoryCache::save($vs_cache_key, $va_pick['ID']['value'], 'AATMatches');
        return $va_pick['ID']['value'];
    }
    return false;
}
 public function testGetDisplayLabelFromLookupText()
 {
     $o_service = new WLPlugInformationServiceAAT();
     $this->assertEquals('dump trucks', $o_service->getDisplayValueFromLookupText('[300022372] dump trucks [trucks, cargo vehicles by form]'));
 }