} // Test cases for ION if (0) { $refs = array(); $failed = array(); /* array_push($refs, 'A new species of glassfrog from the elfin forests of the Cordillera del Condor, southeastern Ecuador. (Anura: Centrolenidae). Herpetozoa, 21(1-2), 30 Juni 2008: 49-56.'); array_push($refs, 'A new species of mermithid nematode parasite Romanomermis narayani n. sp. from Culex sp. Mosquito larvae in the rice fields of A.P. Current Nematology, 17(1-2), June-December 2006: 7-15.'); array_push($refs, 'A review of the Telorchiinae, a group of Distomid Trematodes. Parasitology Cambridge, 20 1928: pp. 336-356.'); array_push($refs, 'Mas datos para el conocimiento de las esponjas de las costas espanolas. Boletin de Pescas Madrid, 7 1922: pp. 247-272.'); array_push($refs, 'Caruncle in Megalomma Johansson, 1925 (Polychaeta: Sabellidae) and the description of a new species from the eastern Tropical Pacific. Journal of Natural History, 42(29-30) 2008: 1951-1973.'); */ array_push($refs, '. Memoirs of the Queensland Museum, 50(2), 10 January 2005: 133-194.'); array_push($refs, '. Venus (Tokyo), 63(3-4), January 2005: 109-119.'); $ok = 0; foreach ($refs as $str) { $matched = parse_ion_ref($str, $matches, 1); if ($matched) { $ok++; } else { array_push($failed, $str); } } // report echo "--------------------------\n"; echo count($refs) . ' references, ' . (count($refs) - $ok) . ' failed' . "\n"; print_r($failed); } // Test cases for IPNI if (0) { $refs = array(); $failed = array();
/** * @brief Populate record for one name with details from ION web site using screen scraping * * @param item ION record * */ function ion_process(&$item) { $debug = true; $url = $item->link; //echo $url . "\n"; $html = get($url); //echo $html; $author = ''; // extract $matches = array(); if (preg_match('/<\\/h1><p>(.*)<\\/p><div class="documentContent">/', $html, $matches)) { //print_r($matches); $author = html_entity_decode($matches[1]); if (preg_match('/(.*)<\\/li>/', $author)) { $pos = strpos($author, "<"); if ($pos != false) { $author = substr($author, 0, $pos); } } $item->taxonAuthor = $author; //echo "author=$author\n"; } // if (preg_match('/<h4>Original Description Reference<\/h4><ul><li>(.*)\s*\[Zoological/', $html, $matches)) if (preg_match('/<h3>Original Description Reference<\\/h3><ul><li>([^<]+|(?R))*<\\/li>/', $html, $matches)) { //print_r($matches); $description = html_entity_decode($matches[1]); $item->full_publication = $description; if (preg_match('/(.*)\\[Zoological Record/', $description)) { $pos = strpos($description, "[Zoological Record"); if ($pos != false) { $description = substr($description, 0, $pos); } } // Remove article title $description = trim(str_replace($item->publicationTitle, '', $description)); //echo "description=$description\n"; $item->full_publication = $description; //Natuurwetenschappelijke Studiekring voor Suriname en de Nederlandse Antillen, No. 112 1984: 1-167. // Extract bibliographic details if (parse_ion_ref($description, $matches)) { //print_r($matches); $item->publication->journal = $matches['journal']; $item->publication->volume = $matches['volume']; $item->publication->issue = $matches['issue']; $item->publication->year = $matches['year']; $item->publication->spage = $matches['spage']; $item->publication->epage = $matches['epage']; $item->publication->date = $matches['date']; $item->publication->actualyear = $matches['actualyear']; if (isset($item->publication->date)) { $d = format_date($item->publication->date); if ($d != '') { $item->publication->yyyy_mm_dd = $d; } } } } // Do stuff for this record... (such as get DOI if it exists) if (isset($item->publication->journal) && isset($item->publication->volume) && isset($item->publication->spage)) { $url = 'http://bioguid.info/openurl?genre=article' . '&title=' . urlencode($item->publication->journal) . '&volume=' . $item->publication->volume . '&spage=' . $item->publication->spage . '&display=json'; $j = json_decode(get($url)); if ($debug) { print_r($j); } if ($j->status == 'ok') { if (isset($j->doi)) { $item->publication->doi = $j->doi; } if (isset($j->pmid)) { $item->publication->pmid = $j->pmid; } if (isset($j->hdl)) { $item->publication->hdl = $j->hdl; } if (isset($j->url)) { $item->publication->url = $j->url; } } } // Store store_item($item); }