array_push($refs, 'Edinburgh J. Bot. 66(1): 110 (-113; fig. 2, map). 2009 [Mar 2009]'); array_push($refs, 'Taxon 55(2): 467 (466; fig. 1) 2006 [22 Jun 2006]'); array_push($refs, 'Blumea 50(1): 58 (-60; fig. 9) 2005'); array_push($refs, 'Pl. Syst. Evol. 246(3-4): 241 2004[27 July 2004]'); array_push($refs, 'Novon 13(4): 384 (5 Dec. 2003)'); array_push($refs, 'Amer J. Bot. 89(4): 702 (699-706; figs. 1-4) 2002'); array_push($refs, 'Brittonia 54(4): 354 (-356; fig. 2A-D) 2002 [16 Apr 2003]'); array_push($refs, 'Brittonia 53(4): 559 (2001 publ. 2002)'); // check what is actual publication date... array_push($refs, 'Bradleya 26: 92. 2008 [18 Jul 2008]'); array_push($refs, 'Austral Syst Biol 18(2): 202 (-203, 195; fig. 10d (map)) 2005'); array_push($refs, 'Bot. J. Linn. Soc. 159(3): 430 (fig. 5, map). 2009 [12 Mar 2009] '); array_push($refs, 'Pl. Syst. Evol. 278(1-2): 120. 2009 [Mar 2009]'); array_push($refs, 'Taxon 58(1): 317. 2009'); array_push($refs, 'Acta Bot. Hung. 51(1-2): 21 (-23). 2009 [Mar 2009'); array_push($refs, 'Acta Bot. Hung. 51(1-2): 11 (-14; fig. 1). 2009 [Mar 2009]'); array_push($refs, 'Madro–o 55(3):188. 2008 [Jul 208]'); $ok = 0; foreach ($refs as $str) { $matched = parse_ipni_ref($str, $matches, 1); if ($matched) { $ok++; } else { array_push($failed, $str); } } // report echo "--------------------------\n"; echo count($refs) . ' references, ' . (count($refs) - $ok) . ' failed' . "\n"; print_r($failed); }
function Harvest() { global $debug; //echo "|" . $this->url . "|"; //$html = get($this->url); //echo $html; $url = 'http://www.ipni.org/ipni/advPlantNameSearch.do?find_family=' . $this->title . '&find_genus=&find_species=&find_infrafamily=&find_infragenus=&find_infraspecies=&find_authorAbbrev=&find_includePublicationAuthors=on&find_includePublicationAuthors=off&find_includeBasionymAuthors=on&find_includeBasionymAuthors=off&find_publicationTitle=&show_extras=on&find_geoUnit=&find_addedSince=' . ($d = date("Y-m-d", strtotime("now - 2 months")) . '&find_modifiedSince=&find_isAPNIRecord=on&find_isAPNIRecord=false&find_isGCIRecord=on&find_isGCIRecord=false&find_isIKRecord=on&find_isIKRecord=false&find_rankToReturn=all&output_format=delimited-minimal&find_sortByFamily=on&find_sortByFamily=off&query_type=by_query&back_page=plantsearch'); //echo $url; $text = 'Id%Version%Family%Full name without family and authors%Authors 77096980-1%1.2%Begoniaceae%Begonia hekensis%D.C.Thomas 77097937-1%1.1%Begoniaceae%Begonia mysteriosa%L.Kollmann & A.P.Fontana 77096979-1%1.1%Begoniaceae%Begonia ozotothrix%D.C.Thomas'; $text = get($url); $text = trim($text); /*$text='Id%Version%Family%Full name without family and authors%Authors 60451177-2%1.1%Euphorbiaceae%Croton subgen. Geiseleria%(Klotzsch) A.Gray 77097911-1%1.1%Euphorbiaceae%Croton pallidulus var. glabrus%L.R.Lima 77097476-1%1.1%Euphorbiaceae%Euphorbia confinalis subsp. rhodesiaca%L.C.Leach 77097491-1%1.1%Euphorbiaceae%Euphorbia maryrichardsiae%G.Will. 77098208-1%1.2%Euphorbiaceae%Euphorbia ohiva%Swanepoel 60451526-2%1.1.2.1%Euphorbiaceae%Luntia%Neck. ex Raf.'; */ if ($debug) { echo $url . "\n"; echo $text . "\n"; } // Get array of individual lines $lines = explode("\n", $text); // Extract headings from first line $parts = explode("%", $lines[0]); $size = count($parts); $heading = array(); for ($i = 0; $i < $size; $i++) { $heading[$parts[$i]] = $i; } // Read each remaining line $size = count($lines); for ($i = 1; $i < $size; $i++) { $parts = explode("%", $lines[$i]); //print_r($parts); $item = new stdclass(); //Add elements to the feed item $lsid = 'urn:lsid:ipni.org:names:' . $parts[$heading["Id"]]; $item->title = $parts[$heading["Full name without family and authors"]]; $item->id = $lsid; $item->link = 'http://www.ipni.org/ipni/idPlantNameSearch.do?id=' . $parts[$heading["Id"]]; $item->description = '<i>' . $parts[$heading["Full name without family and authors"]] . '</i> ' . $parts[$heading["Authors"]]; $item->description = str_replace('subsp.', '</i>subsp.<i>', $item->description); $item->description = str_replace('var.', '</i>var.<i>', $item->description); // Identifiers $item->links = array(); // tag // $parts[$heading["Family"]] // retrieve metadata... $rdf = ResolveGuid($lsid); // Fix IPNI bug $rdf = preg_replace('/ & /', ' & ', $rdf); if ($debug) { echo $rdf; } //echo $rdf; // extract extra details... $dom = new DOMDocument(); $dom->loadXML($rdf); $xpath = new DOMXPath($dom); $nodeCollection = $xpath->query("//tcom:publishedIn"); foreach ($nodeCollection as $node) { $publishedIn = $node->firstChild->nodeValue; $item->description .= '<br/>' . $publishedIn; // Can we get any GUIDs for this...? $matches = array(); if (parse_ipni_ref($publishedIn, $matches)) { //print_r($matches); // we parsed it OK, now find guid... $url = 'http://bioguid.info/openurl/?genre=article'; $url .= '&title=' . urlencode($matches['journal']); $url .= '&volume=' . $matches['volume']; $url .= '&pages=' . $matches['page']; $url .= '&display=json'; //echo $url; $j = get($url); $ref = json_decode($j); //print_r($ref); if ($ref->status == 'ok') { if (isset($ref->doi)) { array_push($item->links, array('doi' => $ref->doi)); $item->description .= '<br/><a href="http://dx.doi.org/' . $ref->doi . '">doi:' . $ref->doi . '</a>'; } if (isset($ref->pmid)) { array_push($item->links, array('pmid' => $ref->pmid)); } if (isset($ref->hdl)) { array_push($item->links, array('hdl' => $ref->hdl)); $item->description .= '<br/><a href="http://hdl.handle.net/' . $ref->hdl . '">doi:' . $ref->hdl . '</a>'; } if (isset($ref->url)) { array_push($item->links, array('url' => $ref->url)); $item->description .= '<br/><a href="' . $ref->url . '">' . $ref->url . '</a>'; } } else { // No guid found, but we did parse it OK... } } else { // Don't understand this reference at all... } } $nodeCollection = $xpath->query("//dcterms:created"); foreach ($nodeCollection as $node) { $item->created = $node->firstChild->nodeValue; } $nodeCollection = $xpath->query("//dcterms:modified"); foreach ($nodeCollection as $node) { $item->updated = $node->firstChild->nodeValue; } //print_r($item); $this->StoreFeedItem($item); } }