function cinii_rdf($rdf_url, &$item, $issn = '', $debug = 0) { $result = 0; $debug = 0; $rdf = get($rdf_url); //echo $rdf; // convert... $dom = new DOMDocument(); $dom->loadXML($rdf); $xpath = new DOMXPath($dom); // Get JSON $xp = new XsltProcessor(); $xsl = new DomDocument(); $xsl->load('xsl/cinii.xsl'); $xp->importStylesheet($xsl); $xml_doc = new DOMDocument(); $xml_doc->loadXML($rdf); $json = $xp->transformToXML($xml_doc); if ($debug) { echo $json; } $item = json_decode($json); if ($debug) { print_r($item); } // Ensure we have ISSN (might not be in the metadata) if (!isset($item->issn) != '') { if ($issn != '') { $item->issn = $issn; } else { if (isset($item->title)) { $issn = issn_from_journal_title($item->title); if ($issn != '') { $item->issn = $issn; } } } } // Check we have journal name if ($item->title == '') { $item->title = journal_title_from_issn($item->issn); } // Id $item->publisher_id = str_replace('http://ci.nii.ac.jp/naid/', '', $item->url); // Do some cleaning of authors foreach ($item->authors as $a) { // Last name in ALL CAPS if (preg_match('/^(?<lastname>[A-Z]+),?\\s*(?<forename>[A-Z](.*)$)/', $a->author, $matches)) { $a->lastname = mb_convert_case($matches['lastname'], MB_CASE_TITLE, mb_detect_encoding($matches['lastname'])); $a->forename = mb_convert_case($matches['forename'], MB_CASE_TITLE, mb_detect_encoding($matches['forename'])); } else { $parts = explode(",", $a->author); $a->lastname = trim($parts[0]); $a->forename = trim($parts[1]); } } // Clean pages if (isset($item->spage)) { $item->spage = preg_replace('/^p/', '', $item->spage); } // Title if not english... if ($item->atitle == '') { if (isset($item->jp_atitle)) { $item->atitle = $item->jp_atitle; } } if ($debug) { print_r($item); } return $result; }
function jstor_metadata($sici, &$item) { global $config; global $debug; $found = false; $url = 'http://links.jstor.org/sici?sici=' . urlencode($sici); //echo $url; $html = get($url); if ($debug) { echo '<pre style="text-align: left;border: 1px solid #c7cfd5;background: #f1f5f9;padding:15px;">'; echo $url . "\n"; echo htmlentities($html); echo "</pre>"; } // Check for any error messages if (preg_match("/<h2>We're Sorry<\\/h2>/", $html)) { return $found; } else { $found = true; } if ('' == $config['proxy_name']) { // Outside Glasgow so we get metadata directly } else { // Inside Glasgow, we are licensed, so we need one more step // Extract stable indentifier if (preg_match('/stable\\/info\\/(?<jstorid>\\d+)\\?/', $html, $match)) { $stable = $match['jstorid']; $item->url = 'http://www.jstor.org/stable/' . $match[1]; // ok, harvest $html = get('http://www.jstor.org/stable/info/' . $match[1]); } } //echo "url=" . $item->url; // Add line feeds so regular expresison works $html = str_replace('<meta', "\n<meta", $html); // Pull out the meta tags preg_match_all("|<meta\\s*name=\"(dc.[A-Za-z]*)\"\\s*(scheme=\"(.*)\")?\\s*(content=\"(.*)\")><\\/meta>|", $html, $out, PREG_PATTERN_ORDER); $r = print_r($out, true); parseDcMeta($out, $item); if ($debug) { echo '<h3>metadata</h3>'; print_r($out); } $out = unpack_sici($sici); //print_r($out); if (isset($out['issn'])) { $item->issn = $out['issn']; } if (isset($out['year'])) { $item->year = $out['year']; } // Some JSTOR articles, such as Copeia, have all three elements in the enumeration, // so that the volume and issue are the second and third elements if (isset($out['locn'])) { if (isset($out['volume'])) { $item->volume = $out['issue']; } if (isset($out['issue'])) { $item->issue = $out['locn']; } } else { if (isset($out['volume'])) { $item->volume = $out['volume']; } if (isset($out['issue'])) { $item->issue = $out['issue']; } } if (isset($out['site'])) { $item->spage = $out['site']; } // Handle identifiers // Make stable URL if (isset($item->doi)) { $stable = $item->doi; $stable = str_replace("10.2307/", "", $stable); $stable = 'http://www.jstor.org/stable/' . $stable; $item->url = $stable; } /*if ($debug) { print_r($item); echo __LINE__ . ' in ' . __FILE__ . "\n"; exit(); }*/ // Is the DOI valid? (not all DOIs in the HTML metadata are valid if (isset($item->doi)) { $crossref_item = new stdClass(); $exists = doi_metadata($item->doi, $crossref_item); if ($exists) { // DOI is cool, so add journal name if (isset($crossref_item->title)) { $item->title = $crossref_item->title; } } else { // Dud DOI, so remove it from the metadata unset($item->doi); } } // Might not have journal name if (!isset($item->title)) { $title = journal_title_from_issn($item->issn); if ($title != '') { $item->title = $title; } } return $found; }