/** * @brief Populate record for one name with details from ION web site using screen scraping * * @param item ION record * */ function ion_process(&$item) { $debug = true; $url = $item->link; //echo $url . "\n"; $html = get($url); //echo $html; $author = ''; // extract $matches = array(); if (preg_match('/<\\/h1><p>(.*)<\\/p><div class="documentContent">/', $html, $matches)) { //print_r($matches); $author = html_entity_decode($matches[1]); if (preg_match('/(.*)<\\/li>/', $author)) { $pos = strpos($author, "<"); if ($pos != false) { $author = substr($author, 0, $pos); } } $item->taxonAuthor = $author; //echo "author=$author\n"; } // if (preg_match('/<h4>Original Description Reference<\/h4><ul><li>(.*)\s*\[Zoological/', $html, $matches)) if (preg_match('/<h3>Original Description Reference<\\/h3><ul><li>([^<]+|(?R))*<\\/li>/', $html, $matches)) { //print_r($matches); $description = html_entity_decode($matches[1]); $item->full_publication = $description; if (preg_match('/(.*)\\[Zoological Record/', $description)) { $pos = strpos($description, "[Zoological Record"); if ($pos != false) { $description = substr($description, 0, $pos); } } // Remove article title $description = trim(str_replace($item->publicationTitle, '', $description)); //echo "description=$description\n"; $item->full_publication = $description; //Natuurwetenschappelijke Studiekring voor Suriname en de Nederlandse Antillen, No. 112 1984: 1-167. // Extract bibliographic details if (parse_ion_ref($description, $matches)) { //print_r($matches); $item->publication->journal = $matches['journal']; $item->publication->volume = $matches['volume']; $item->publication->issue = $matches['issue']; $item->publication->year = $matches['year']; $item->publication->spage = $matches['spage']; $item->publication->epage = $matches['epage']; $item->publication->date = $matches['date']; $item->publication->actualyear = $matches['actualyear']; if (isset($item->publication->date)) { $d = format_date($item->publication->date); if ($d != '') { $item->publication->yyyy_mm_dd = $d; } } } } // Do stuff for this record... (such as get DOI if it exists) if (isset($item->publication->journal) && isset($item->publication->volume) && isset($item->publication->spage)) { $url = 'http://bioguid.info/openurl?genre=article' . '&title=' . urlencode($item->publication->journal) . '&volume=' . $item->publication->volume . '&spage=' . $item->publication->spage . '&display=json'; $j = json_decode(get($url)); if ($debug) { print_r($j); } if ($j->status == 'ok') { if (isset($j->doi)) { $item->publication->doi = $j->doi; } if (isset($j->pmid)) { $item->publication->pmid = $j->pmid; } if (isset($j->hdl)) { $item->publication->hdl = $j->hdl; } if (isset($j->url)) { $item->publication->url = $j->url; } } } // Store store_item($item); }
$item->doi = $j->doi; } if (isset($j->pmid)) { $item->pmid = $j->pmid; } if (isset($j->hdl)) { $item->hdl = $j->hdl; } if (isset($j->url)) { $item->url = $j->url; } } } } print_r($item); $item_id = store_item($item); // if ($item->pmid != '') exit(); //----------------------------------------------------------------------------------------------- // Handle names // keywords are taxon names uBio has extracted from articles/abstracts foreach ($item->keywords as $k) { echo $k, "\n"; } // Do our thang $annotations = extract_new_names(strip_tags($item->title), $item->keywords); echo "Names--------------------\n"; print_r($annotations); // store names foreach ($annotations as $k => $v) { // lookup name $namebankID = 0;