Esempio n. 1
0
function cinii_rdf($rdf_url, &$item, $issn = '', $debug = 0)
{
    $result = 0;
    $debug = 0;
    $rdf = get($rdf_url);
    //echo $rdf;
    // convert...
    $dom = new DOMDocument();
    $dom->loadXML($rdf);
    $xpath = new DOMXPath($dom);
    // Get JSON
    $xp = new XsltProcessor();
    $xsl = new DomDocument();
    $xsl->load('xsl/cinii.xsl');
    $xp->importStylesheet($xsl);
    $xml_doc = new DOMDocument();
    $xml_doc->loadXML($rdf);
    $json = $xp->transformToXML($xml_doc);
    if ($debug) {
        echo $json;
    }
    $item = json_decode($json);
    if ($debug) {
        print_r($item);
    }
    // Ensure we have ISSN (might not be in the metadata)
    if (!isset($item->issn) != '') {
        if ($issn != '') {
            $item->issn = $issn;
        } else {
            if (isset($item->title)) {
                $issn = issn_from_journal_title($item->title);
                if ($issn != '') {
                    $item->issn = $issn;
                }
            }
        }
    }
    // Check we have journal name
    if ($item->title == '') {
        $item->title = journal_title_from_issn($item->issn);
    }
    // Id
    $item->publisher_id = str_replace('http://ci.nii.ac.jp/naid/', '', $item->url);
    // Do some cleaning of authors
    foreach ($item->authors as $a) {
        // Last name in ALL CAPS
        if (preg_match('/^(?<lastname>[A-Z]+),?\\s*(?<forename>[A-Z](.*)$)/', $a->author, $matches)) {
            $a->lastname = mb_convert_case($matches['lastname'], MB_CASE_TITLE, mb_detect_encoding($matches['lastname']));
            $a->forename = mb_convert_case($matches['forename'], MB_CASE_TITLE, mb_detect_encoding($matches['forename']));
        } else {
            $parts = explode(",", $a->author);
            $a->lastname = trim($parts[0]);
            $a->forename = trim($parts[1]);
        }
    }
    // Clean pages
    if (isset($item->spage)) {
        $item->spage = preg_replace('/^p/', '', $item->spage);
    }
    // Title if not english...
    if ($item->atitle == '') {
        if (isset($item->jp_atitle)) {
            $item->atitle = $item->jp_atitle;
        }
    }
    if ($debug) {
        print_r($item);
    }
    return $result;
}
Esempio n. 2
0
function jstor_metadata($sici, &$item)
{
    global $config;
    global $debug;
    $found = false;
    $url = 'http://links.jstor.org/sici?sici=' . urlencode($sici);
    //echo $url;
    $html = get($url);
    if ($debug) {
        echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">';
        echo $url . "\n";
        echo htmlentities($html);
        echo "</pre>";
    }
    // Check for any error messages
    if (preg_match("/<h2>We're Sorry<\\/h2>/", $html)) {
        return $found;
    } else {
        $found = true;
    }
    if ('' == $config['proxy_name']) {
        // Outside Glasgow so we get metadata directly
    } else {
        // Inside Glasgow, we are licensed, so we need one more step
        // Extract stable indentifier
        if (preg_match('/stable\\/info\\/(?<jstorid>\\d+)\\?/', $html, $match)) {
            $stable = $match['jstorid'];
            $item->url = 'http://www.jstor.org/stable/' . $match[1];
            // ok, harvest
            $html = get('http://www.jstor.org/stable/info/' . $match[1]);
        }
    }
    //echo "url=" . $item->url;
    // Add line feeds so regular expresison works
    $html = str_replace('<meta', "\n<meta", $html);
    // Pull out the meta tags
    preg_match_all("|<meta\\s*name=\"(dc.[A-Za-z]*)\"\\s*(scheme=\"(.*)\")?\\s*(content=\"(.*)\")><\\/meta>|", $html, $out, PREG_PATTERN_ORDER);
    $r = print_r($out, true);
    parseDcMeta($out, $item);
    if ($debug) {
        echo '<h3>metadata</h3>';
        print_r($out);
    }
    $out = unpack_sici($sici);
    //print_r($out);
    if (isset($out['issn'])) {
        $item->issn = $out['issn'];
    }
    if (isset($out['year'])) {
        $item->year = $out['year'];
    }
    // Some JSTOR articles, such as Copeia, have all three elements in the enumeration,
    // so that the volume and issue are the second and third elements
    if (isset($out['locn'])) {
        if (isset($out['volume'])) {
            $item->volume = $out['issue'];
        }
        if (isset($out['issue'])) {
            $item->issue = $out['locn'];
        }
    } else {
        if (isset($out['volume'])) {
            $item->volume = $out['volume'];
        }
        if (isset($out['issue'])) {
            $item->issue = $out['issue'];
        }
    }
    if (isset($out['site'])) {
        $item->spage = $out['site'];
    }
    // Handle identifiers
    // Make stable URL
    if (isset($item->doi)) {
        $stable = $item->doi;
        $stable = str_replace("10.2307/", "", $stable);
        $stable = 'http://www.jstor.org/stable/' . $stable;
        $item->url = $stable;
    }
    /*if ($debug)
    	{
    		print_r($item);
    		echo __LINE__ . ' in ' . __FILE__ . "\n";
    		exit();
    	}*/
    // Is the DOI valid? (not all DOIs in the HTML metadata are valid
    if (isset($item->doi)) {
        $crossref_item = new stdClass();
        $exists = doi_metadata($item->doi, $crossref_item);
        if ($exists) {
            // DOI is cool, so add journal name
            if (isset($crossref_item->title)) {
                $item->title = $crossref_item->title;
            }
        } else {
            // Dud DOI, so remove it from the metadata
            unset($item->doi);
        }
    }
    // Might not have journal name
    if (!isset($item->title)) {
        $title = journal_title_from_issn($item->issn);
        if ($title != '') {
            $item->title = $title;
        }
    }
    return $found;
}