function main() { $feeds = array('', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''); foreach ($feeds as $url) { $result = GetRSS($url, $rss, true); echo $result . "\n"; //exit(); if ($result == 0) { echo $rss; // Process $rss = str_replace("\n", '', $rss); $rss = str_replace("\r", '', $rss); // Clean up Zookeys ATOM feed if (preg_match('/<\\/feed><br(.*)$/', $rss)) { $rss = preg_replace('/<\\/feed><br(.*)$/', '</feed>', $rss); } // Clean up Zookeys RSS1 feed if (preg_match('/<\\/rdf:RDF><br(.*)$/', $rss)) { $rss = preg_replace('/<\\/rdf:RDF><br(.*)$/', '</rdf:RDF>', $rss); } // Clean up Zotero ATOM feed if (preg_match('/^Content-Type: application\\/atom\\+xml/', $rss)) { $rss = preg_replace('/^Content-Type: application\\/atom\\+xml/', '', $rss); } // Extract links (feed-type specific) $links = array(); $dom = new DOMDocument(); $dom->loadXML($rss); $xpath = new DOMXPath($dom); // Add namespaces to XPath to ensure our queries work $xpath->registerNamespace("rdf", ""); $xpath->registerNamespace("annotate", ""); $xpath->registerNamespace("content", ""); $xpath->registerNamespace("rss", ""); $xpath->registerNamespace("slash", ""); $xpath->registerNamespace("dcterms", ""); $xpath->registerNamespace("dc", ""); $xpath->registerNamespace("atom", ""); // Is it RSS 2.0? $xpath_query = "//rss/channel/item/link"; $nodeCollection = $xpath->query($xpath_query); foreach ($nodeCollection as $node) { array_push($links, $node->firstChild->nodeValue); } // Is it RSS 1.0? $xpath_query = "//rdf:RDF/rss:item/rss:link"; $nodeCollection = $xpath->query($xpath_query); foreach ($nodeCollection as $node) { array_push($links, $node->firstChild->nodeValue); } // Add to bioguid via OpenURL print_r($links); foreach ($links as $link) { $done = false; echo "Link={$link}\n"; // Journal-specific handling // Nuytsia is complicated as link is a database query that may return // multiple papers (e.g., same author may have > 1 paper in a volume if (preg_match('/http:\\/\\/\\/nuytsia\\//', $link)) { parse_nuytsia($link); $done = true; } if (!$done) { // Default, link is URL of a single article... $url = "" . urlencode($link) . "&display=json"; $json = get($url); $obj = json_decode($json); print_r($obj); } } /* // Nope, is it RSS 1? if ($feed_title == '') { // RSS 1.0 $xpath_query = "//rss:channel/rss:title"; $nodeCollection = $xpath->query ($xpath_query); foreach($nodeCollection as $node) { $feed_title = $node->firstChild->nodeValue; $feed_type = 'rss'; $feed_version = 'RSS1'; $nc = $xpath->query ('//rss:channel/rss:description'); foreach($nc as $n) { $feed_description = $n->firstChild->nodeValue; } $nc = $xpath->query ('//rss:channel/rss:link'); foreach($nc as $n) { $feed_link = $n->firstChild->nodeValue; } } } // Nope, is it ATOM? if ($feed_title == '') { // Atom $xpath_query = "//atom:feed/atom:title"; $nodeCollection = $xpath->query ($xpath_query); foreach($nodeCollection as $node) { $feed_title = $node->firstChild->nodeValue; $feed_type = 'atom'; } $xpath_query = "//atom:feed/atom:subtitle"; $nodeCollection = $xpath->query ($xpath_query); foreach($nodeCollection as $node) { $feed_description = $node->firstChild->nodeValue; } // Link with rel="self" attribute // <link rel="self" href="" /> $xpath_query = "//atom:feed/atom:link[@rel='self']/@href"; $nodeCollection = $xpath->query ($xpath_query); foreach($nodeCollection as $node) { $feed_link = $node->firstChild->nodeValue; } // Link with no 'rel' attribute if ($feed_link == '') { $xpath_query = "//atom:feed/atom:link/@href"; $nodeCollection = $xpath->query ($xpath_query); foreach($nodeCollection as $node) { $feed_link = $node->firstChild->nodeValue; } } } */ } } }
} } if (preg_match('/<a href="(?<pdf>(http:\\/\\/*)\\.pdf))/', $pp, $match)) { $item->pdf = $match['pdf']; } if (preg_match('/\\([0-9]{4}\\).(?<atitle>(.*))<i>Nuytsia<\\/i>/', $pp, $match)) { $item->atitle = strip_tags($match['atitle']); } // <i>Nuytsia</i> <u>19</u> (1) : 191–196 // page separator is en dash 2013 if (preg_match('/<i>(Nuytsia)<\\/i> <u>(?<volume>(.*))<\\/u>\\s*\\((?<issue>(.*))\\)\\s*:\\s*(?<spage>[0-9]+)–(?<epage>(.*))\\.<\\/p>/', $pp, $match)) { $item->title = 'Nuytsia'; $item->volume = $match['volume']; $item->issue = $match['issue']; $item->spage = $match['spage']; $item->epage = $match['epage']; $item->issn = '0085-4417'; } print_r($item); // Store reference here... if (find_in_cache($item) == 0) { store_in_cache($item); } } } } // test if (0) { $url = ''; parse_nuytsia($url); }