Example #1
0
/**
 * @brief Populate record for one name with details from ION web site using screen scraping
 *
 * @param item ION record
 *
 */
function ion_process(&$item)
{
    $debug = true;
    $url = $item->link;
    //echo $url . "\n";
    $html = get($url);
    //echo $html;
    $author = '';
    // extract
    $matches = array();
    if (preg_match('/<\\/h1><p>(.*)<\\/p><div class="documentContent">/', $html, $matches)) {
        //print_r($matches);
        $author = html_entity_decode($matches[1]);
        if (preg_match('/(.*)<\\/li>/', $author)) {
            $pos = strpos($author, "<");
            if ($pos != false) {
                $author = substr($author, 0, $pos);
            }
        }
        $item->taxonAuthor = $author;
        //echo "author=$author\n";
    }
    //	if (preg_match('/<h4>Original Description Reference<\/h4><ul><li>(.*)\s*\[Zoological/', $html, $matches))
    if (preg_match('/<h3>Original Description Reference<\\/h3><ul><li>([^<]+|(?R))*<\\/li>/', $html, $matches)) {
        //print_r($matches);
        $description = html_entity_decode($matches[1]);
        $item->full_publication = $description;
        if (preg_match('/(.*)\\[Zoological Record/', $description)) {
            $pos = strpos($description, "[Zoological Record");
            if ($pos != false) {
                $description = substr($description, 0, $pos);
            }
        }
        // Remove article title
        $description = trim(str_replace($item->publicationTitle, '', $description));
        //echo "description=$description\n";
        $item->full_publication = $description;
        //Natuurwetenschappelijke Studiekring voor Suriname en de Nederlandse Antillen, No. 112 1984: 1-167.
        // Extract bibliographic details
        if (parse_ion_ref($description, $matches)) {
            //print_r($matches);
            $item->publication->journal = $matches['journal'];
            $item->publication->volume = $matches['volume'];
            $item->publication->issue = $matches['issue'];
            $item->publication->year = $matches['year'];
            $item->publication->spage = $matches['spage'];
            $item->publication->epage = $matches['epage'];
            $item->publication->date = $matches['date'];
            $item->publication->actualyear = $matches['actualyear'];
            if (isset($item->publication->date)) {
                $d = format_date($item->publication->date);
                if ($d != '') {
                    $item->publication->yyyy_mm_dd = $d;
                }
            }
        }
    }
    // Do stuff for this record... (such as get DOI if it exists)
    if (isset($item->publication->journal) && isset($item->publication->volume) && isset($item->publication->spage)) {
        $url = 'http://bioguid.info/openurl?genre=article' . '&title=' . urlencode($item->publication->journal) . '&volume=' . $item->publication->volume . '&spage=' . $item->publication->spage . '&display=json';
        $j = json_decode(get($url));
        if ($debug) {
            print_r($j);
        }
        if ($j->status == 'ok') {
            if (isset($j->doi)) {
                $item->publication->doi = $j->doi;
            }
            if (isset($j->pmid)) {
                $item->publication->pmid = $j->pmid;
            }
            if (isset($j->hdl)) {
                $item->publication->hdl = $j->hdl;
            }
            if (isset($j->url)) {
                $item->publication->url = $j->url;
            }
        }
    }
    // Store
    store_item($item);
}
Example #2
0
                 $item->doi = $j->doi;
             }
             if (isset($j->pmid)) {
                 $item->pmid = $j->pmid;
             }
             if (isset($j->hdl)) {
                 $item->hdl = $j->hdl;
             }
             if (isset($j->url)) {
                 $item->url = $j->url;
             }
         }
     }
 }
 print_r($item);
 $item_id = store_item($item);
 //	if ($item->pmid != '') exit();
 //-----------------------------------------------------------------------------------------------
 // Handle names
 // keywords are taxon names uBio has extracted from articles/abstracts
 foreach ($item->keywords as $k) {
     echo $k, "\n";
 }
 // Do our thang
 $annotations = extract_new_names(strip_tags($item->title), $item->keywords);
 echo "Names--------------------\n";
 print_r($annotations);
 // store names
 foreach ($annotations as $k => $v) {
     // lookup name
     $namebankID = 0;