function main() { $feeds = array('http://www.bioone.org/action/showFeed?type=etoc&feed=rss&jc=harv', 'http://www.bioone.org/action/showFeed?type=etoc&feed=rss&jc=novi', 'http://www.bioone.org/action/showFeed?type=etoc&feed=rss&jc=afzo', 'http://www.bioone.org/action/showFeed?type=etoc&feed=rss&jc=mobt', 'http://www.bioone.org/action/showFeed?type=etoc&feed=rss&jc=cara', 'http://www.bioone.org/action/showFeed?type=etoc&feed=rss&jc=esaa', 'http://www.bioone.org/action/showFeed?type=etoc&feed=rss&jc=brvo', 'http://www.bioone.org/action/showFeed?type=etoc&feed=rss&jc=brit', 'http://www.bioone.org/action/showFeed?type=etoc&feed=rss&jc=amnb', 'http://www.bioone.org/action/showFeed?type=etoc&feed=rss&jc=acta', 'http://www.bioone.org/action/showFeed?type=etoc&feed=rss&jc=jzoo', 'http://api.ingentaconnect.com/content/rbsb/bjb/latest?format=rss', 'http://api.ingentaconnect.com/content/iapt/tax/latest?format=rss', 'http://api.ingentaconnect.com/content/schweiz/novh/latest?format=rss', 'http://api.ingentaconnect.com/content/nhn/pimj/latest?format=rss', 'http://api.ingentaconnect.com/content/rssa/trssa/latest?format=rss', 'http://api.ingentaconnect.com/content/esa/jme/latest?format=rss', 'http://api.ingentaconnect.com/content/esa/aesa/latest?format=rss', 'http://www.scielo.br/rss.php?pid=0085-562620090001&lang=en', 'http://www.scielo.br/rss.php?pid=0100-8404&lang=en', 'http://www.scielo.br/rss.php?pid=0074-0276&lang=en', 'http://www.scielo.br/rss.php?pid=0031-1049&lang=en', 'http://www.scielo.br/rss.php?pid=1519-566X&lang=en', 'http://www.scielo.br/rss.php?pid=0101-817520080004&lang=en', 'http://www.scielo.br/rss.php?pid=1679-6225&lang=en', 'http://www.scielo.br/rss.php?pid=0102-330620080004&lang=en', 'http://www3.interscience.wiley.com/rss/journal/118902517', 'http://www3.interscience.wiley.com/rss/journal/118902517', 'http://www3.interscience.wiley.com/rss/journal/118506135', 'http://www3.interscience.wiley.com/rss/journal/117964631', 'http://www.publish.csiro.au/RSS_Feed/CSIRO_Publishing_Recent_SB.xml', 'http://www.publish.csiro.au/RSS_Feed/CSIRO_Publishing_Recent_IS.xml', 'http://rss.sciencedirect.com/publication/science/6963', 'http://www.springerlink.com/content/0165-5752?sortorder=asc&export=rss', 'http://www.akademiai.com/content/jw080595p305/?sortorder=asc&export=rss', 'http://www.hindawi.com/journals/psyche/rss.xml', 'http://science.dec.wa.gov.au/nuytsia/nuytsia.rss.xml', 'http://pensoftonline.net/zookeys/index.php/journal/gateway/plugin/WebFeedGatewayPlugin/rss'); foreach ($feeds as $url) { $result = GetRSS($url, $rss, true); echo $result . "\n"; //exit(); if ($result == 0) { echo $rss; // Process $rss = str_replace("\n", '', $rss); $rss = str_replace("\r", '', $rss); // Clean up Zookeys ATOM feed if (preg_match('/<\\/feed><br(.*)$/', $rss)) { $rss = preg_replace('/<\\/feed><br(.*)$/', '</feed>', $rss); } // Clean up Zookeys RSS1 feed if (preg_match('/<\\/rdf:RDF><br(.*)$/', $rss)) { $rss = preg_replace('/<\\/rdf:RDF><br(.*)$/', '</rdf:RDF>', $rss); } // Clean up Zotero ATOM feed if (preg_match('/^Content-Type: application\\/atom\\+xml/', $rss)) { $rss = preg_replace('/^Content-Type: application\\/atom\\+xml/', '', $rss); } // Extract links (feed-type specific) $links = array(); $dom = new DOMDocument(); $dom->loadXML($rss); $xpath = new DOMXPath($dom); // Add namespaces to XPath to ensure our queries work $xpath->registerNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); $xpath->registerNamespace("annotate", "http://purl.org/rss/1.0/modules/annotate/"); $xpath->registerNamespace("content", "http://purl.org/rss/1.0/modules/content/"); $xpath->registerNamespace("rss", "http://purl.org/rss/1.0/"); $xpath->registerNamespace("slash", "http://purl.org/rss/1.0/modules/slash/"); $xpath->registerNamespace("dcterms", "http://purl.org/dc/terms/"); $xpath->registerNamespace("dc", "http://purl.org/dc/elements/1.1/"); $xpath->registerNamespace("atom", "http://www.w3.org/2005/Atom"); // Is it RSS 2.0? $xpath_query = "//rss/channel/item/link"; $nodeCollection = $xpath->query($xpath_query); foreach ($nodeCollection as $node) { array_push($links, $node->firstChild->nodeValue); } // Is it RSS 1.0? $xpath_query = "//rdf:RDF/rss:item/rss:link"; $nodeCollection = $xpath->query($xpath_query); foreach ($nodeCollection as $node) { array_push($links, $node->firstChild->nodeValue); } // Add to bioguid via OpenURL print_r($links); foreach ($links as $link) { $done = false; echo "Link={$link}\n"; // Journal-specific handling // Nuytsia is complicated as link is a database query that may return // multiple papers (e.g., same author may have > 1 paper in a volume if (preg_match('/http:\\/\\/science.dec.wa.gov.au\\/nuytsia\\//', $link)) { parse_nuytsia($link); $done = true; } if (!$done) { // Default, link is URL of a single article... $url = "http://bioguid.info/openurl?id=" . urlencode($link) . "&display=json"; $json = get($url); $obj = json_decode($json); print_r($obj); } } /* // Nope, is it RSS 1? if ($feed_title == '') { // RSS 1.0 $xpath_query = "//rss:channel/rss:title"; $nodeCollection = $xpath->query ($xpath_query); foreach($nodeCollection as $node) { $feed_title = $node->firstChild->nodeValue; $feed_type = 'rss'; $feed_version = 'RSS1'; $nc = $xpath->query ('//rss:channel/rss:description'); foreach($nc as $n) { $feed_description = $n->firstChild->nodeValue; } $nc = $xpath->query ('//rss:channel/rss:link'); foreach($nc as $n) { $feed_link = $n->firstChild->nodeValue; } } } // Nope, is it ATOM? if ($feed_title == '') { // Atom $xpath_query = "//atom:feed/atom:title"; $nodeCollection = $xpath->query ($xpath_query); foreach($nodeCollection as $node) { $feed_title = $node->firstChild->nodeValue; $feed_type = 'atom'; } $xpath_query = "//atom:feed/atom:subtitle"; $nodeCollection = $xpath->query ($xpath_query); foreach($nodeCollection as $node) { $feed_description = $node->firstChild->nodeValue; } // Link with rel="self" attribute // <link rel="self" href="http://api.flickr.com/services/feeds/groups_pool.gne?id=806927@N20&lang=en-us&format=atom" /> $xpath_query = "//atom:feed/atom:link[@rel='self']/@href"; $nodeCollection = $xpath->query ($xpath_query); foreach($nodeCollection as $node) { $feed_link = $node->firstChild->nodeValue; } // Link with no 'rel' attribute if ($feed_link == '') { $xpath_query = "//atom:feed/atom:link/@href"; $nodeCollection = $xpath->query ($xpath_query); foreach($nodeCollection as $node) { $feed_link = $node->firstChild->nodeValue; } } } */ } } }
} } if (preg_match('/<a href="(?<pdf>(http:\\/\\/www.dec.wa.gov.au(.*)\\.pdf))/', $pp, $match)) { $item->pdf = $match['pdf']; } if (preg_match('/\\([0-9]{4}\\).(?<atitle>(.*))<i>Nuytsia<\\/i>/', $pp, $match)) { $item->atitle = strip_tags($match['atitle']); } // <i>Nuytsia</i> <u>19</u> (1) : 191–196 // page separator is en dash 2013 if (preg_match('/<i>(Nuytsia)<\\/i> <u>(?<volume>(.*))<\\/u>\\s*\\((?<issue>(.*))\\)\\s*:\\s*(?<spage>[0-9]+)–(?<epage>(.*))\\.<\\/p>/', $pp, $match)) { $item->title = 'Nuytsia'; $item->volume = $match['volume']; $item->issue = $match['issue']; $item->spage = $match['spage']; $item->epage = $match['epage']; $item->issn = '0085-4417'; } print_r($item); // Store reference here... if (find_in_cache($item) == 0) { store_in_cache($item); } } } } // test if (0) { $url = 'http://science.dec.wa.gov.au/nuytsia/search.php?authors=Rye&volume=19&part=1'; parse_nuytsia($url); }