Beispiel #1
0
/**
 * @brief Parse OpenURL parameters and return referent
 *
 * @param params Array of OpenURL parameters
 * @param referent Referent object to populate
 *
 */
function parse_openurl($params, &$referent)
{
    global $debug;
    $referent->authors = array();
    foreach ($params as $key => $value) {
        switch ($key) {
            case 'rft_val_fmt':
                switch ($value) {
                    case 'info:ofi/fmt:kev:mtx:journal':
                        $referent->genre = 'article';
                        break;
                    case 'info:ofi/fmt:kev:mtx:book':
                        $referent->genre = 'book';
                        break;
                    default:
                        if (!isset($referent->genre)) {
                            $referent->genre = 'unknown';
                        }
                        break;
                }
                break;
                // Article title
            // Article title
            case 'rft.atitle':
            case 'atitle':
                $title = $value[0];
                $title = preg_replace('/\\.$/', '', $title);
                $title = strip_tags($title);
                $title = html_entity_decode($title, ENT_NOQUOTES, 'UTF-8');
                $referent->title = $title;
                $referent->genre = 'article';
                break;
                // Book title
            // Book title
            case 'rft.btitle':
            case 'btitle':
                $referent->title = $value[0];
                $referent->genre = 'book';
                break;
                // Journal title
            // Journal title
            case 'rft.jtitle':
            case 'rft.title':
            case 'title':
                $secondary_title = trim($value[0]);
                $secondary_title = preg_replace('/^\\[\\[/', '', $secondary_title);
                $secondary_title = preg_replace('/\\]\\]$/', '', $secondary_title);
                $referent->secondary_title = $secondary_title;
                $referent->genre = 'article';
                break;
            case 'rft.issn':
            case 'issn':
                $ISSN_proto = $value[0];
                $clean = ISN_clean($ISSN_proto);
                $class = ISSN_classifier($clean);
                if ($class == "checksumOK") {
                    $referent->issn = canonical_ISSN($ISSN_proto);
                    $referent->genre = 'article';
                }
                break;
                // Identifiers
            // Identifiers
            case 'rft_id':
            case 'id':
                foreach ($value as $v) {
                    // DOI
                    if (preg_match('/^(info:doi\\/|doi:)(?<doi>.*)/', $v, $match)) {
                        $referent->doi = $match['doi'];
                    }
                    // URL
                    if (preg_match('/^http:\\/\\//', $v, $match)) {
                        $referent->url = $v;
                    }
                    // LSID
                    if (preg_match('/^urn:lsid:/', $v, $match)) {
                        $referent->lsid = $v;
                    }
                }
                break;
                // Authors
            // Authors
            case 'rft.au':
            case 'au':
                foreach ($value as $v) {
                    $parts = parse_name($v);
                    $author = new stdClass();
                    if (isset($parts['last'])) {
                        $author->lastname = $parts['last'];
                    }
                    if (isset($parts['suffix'])) {
                        $author->suffix = $parts['suffix'];
                    }
                    if (isset($parts['first'])) {
                        $author->forename = $parts['first'];
                        if (array_key_exists('middle', $parts)) {
                            $author->forename .= ' ' . $parts['middle'];
                        }
                    }
                    $referent->authors[] = $author;
                }
                break;
            default:
                $k = str_replace("rft.", '', $key);
                $referent->{$k} = $value[0];
                break;
        }
    }
    // Clean
    // Dates
    if (isset($referent->date)) {
        if (preg_match('/^[0-9]{4}$/', $referent->date)) {
            $referent->year = $referent->date;
            $referent->date = $referent->date . '-00-00';
        }
        if (preg_match('/^(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})$/', $referent->date, $match)) {
            $referent->year = $match['year'];
            $referent->date = $match['year'] . '-' . $match['month'] . '-' . $match['day'];
        }
    }
    // Zotero
    if (isset($referent->pages)) {
        // Note "u" option in regular expression, so that we match UTF-8 characters such as –
        if (preg_match('/(?<spage>[0-9]+)[\\-|–](?<epage>[0-9]+)/u', $referent->pages, $match)) {
            $referent->spage = $match['spage'];
            $referent->epage = $match['epage'];
            unset($referent->pages);
        }
    }
    // Endnote epage may have leading "-" as it splits spage-epage to generate OpenURL
    if (isset($referent->epage)) {
        $referent->epage = preg_replace('/^\\-/', '', $referent->epage);
    }
    // Single page
    if (isset($referent->pages)) {
        if (is_numeric($referent->pages)) {
            $referent->spage = $referent->pages;
            $referent->epage = $referent->pages;
            unset($referent->pages);
        }
    }
    // Journal titles with series numbers are split into title,series fields
    if (preg_match('/(?<title>.*),?\\s+series\\s+(?<series>[0-9]+)$/i', $referent->secondary_title, $match)) {
        $referent->secondary_title = $match['title'];
        $referent->series = $match['series'];
    }
    // Volume might have series information
    if (preg_match('/^series\\s+(?<series>[0-9]+),\\s*(?<volume>[0-9]+)$/i', $referent->volume, $match)) {
        $referent->volume = $match['volume'];
        $referent->series = $match['series'];
    }
    // Roman to Arabic volume
    if (!is_numeric($referent->volume)) {
        if (preg_match('/^[ivxicl]+$/', $referent->volume)) {
            $referent->volume = arabic($referent->volume);
        }
    }
    // Author array might not be populated, in which case add author from aulast and aufirst fields
    if (count($referent->authors) == 0 && (isset($referent->aulast) && isset($referent->aufirst))) {
        $author = new stdClass();
        $author->lastname = $referent->aulast;
        $author->forename = $referent->aufirst;
        $referent->authors[] = $author;
    }
    // Use aulast and aufirst to ensure first author name properly parsed
    if (isset($referent->aulast) && isset($referent->aufirst)) {
        $author = new stdClass();
        $author->lastname = $referent->aulast;
        $author->forename = $referent->aufirst;
        $referent->authors[0] = $author;
    }
    // EndNote encodes accented characters, which break journal names
    if (isset($referent->secondary_title)) {
        $referent->secondary_title = preg_replace('/%9F/', 'ü', $referent->secondary_title);
    }
}
Beispiel #2
0
/**
 * @brief Classify an identifier string
 *
 * Use regular expressions to match (and clean) the identifier.
 *
 * @return Array 
 */
function IdentifierKind($id)
{
    $result = array();
    $identifierType = IDENTIFIER_UNKNOWN;
    $identifierString = $id;
    $matches = array();
    // DOI
    if (preg_match('/^(http:\\/\\/dx.doi.org\\/|doi:|info:doi\\/|info:doi\\/http:\\/\\/dx.doi.org\\/)?(10.[0-9]*\\/(.*))/i', $identifierString, $matches)) {
        $identifierString = $matches[2];
        $identifierType = IDENTIFIER_DOI;
    }
    if (IDENTIFIER_DOI != $identifierType) {
        // Handle
        if (preg_match('/^(http:\\/\\/hdl.handle.net\\/|hdl:|info:hdl\\/)?(([0-9][0-9]*(.[0-9]*)?)\\/(.*))/i', $identifierString, $matches)) {
            $identifierString = $matches[2];
            $identifierType = IDENTIFIER_HANDLE;
        }
    }
    // SICI
    if (preg_match('/^(http:\\/\\/links.jstor.org\\/sici\\?sici=|info:sici\\/|sici:)(.*)/i', $identifierString, $matches)) {
        $identifierString = $matches[2];
        $identifierType = IDENTIFIER_SICI;
    }
    // PubMed
    if (preg_match('/http:\\/\\/www.ncbi.nlm.nih.gov/', $identifierString)) {
        preg_match('/list_uids=([0-9]+)/', $identifierString, $matches);
        //print_r($matches);
        if (isset($matches[1])) {
            $identifierString = $matches[1];
            $identifierType = IDENTIFIER_PUBMED;
        }
        if (preg_match('/http:\\/\\/www.ncbi.nlm.nih.gov\\/pubmed\\/([0-9]+)/', $identifierString, $matches)) {
            $identifierString = $matches[1];
            $identifierType = IDENTIFIER_PUBMED;
        }
    }
    if (preg_match('/^(pmid:|info:pmid\\/)([0-9]*)/i', $identifierString, $matches)) {
        $identifierString = $matches[2];
        $identifierType = IDENTIFIER_PUBMED;
    }
    if (preg_match('/^(genbank:|info:ddbj-embl-genbank\\/)(.*)/i', $identifierString, $matches)) {
        $identifierString = $matches[2];
        $identifierType = IDENTIFIER_GENBANK;
    }
    if (preg_match('/^(gi:)([0-9]*)/i', $identifierString, $matches)) {
        $identifierString = $matches[2];
        $identifierType = IDENTIFIER_GI;
    }
    // LSIDs
    if (preg_match("/^(lsidres:)?([uU][rR][nN]:[lL][sS][iI][dD]:([A-Za-z0-9][\\w\\(\\)\\+\\,\\-\\.\\=\\@\\;\$\"\\!\\*\\']*):([A-Za-z0-9][\\w\\(\\)\\+\\,\\-\\.\\=\\@\\;\$\"\\!\\*\\']*):[A-Za-z0-9][\\w\\(\\)\\+\\,\\-\\.\\=\\@\\;\$\"\\!\\*\\']*(:[A-Za-z0-9][\\w\\(\\)\\+\\,\\-\\.\\=\\@\\;\$\"\\!\\*\\']*)?)\$/", $identifierString, $matches)) {
        // 3 is the authority, 4 is the namespace
        $identifierString = $matches[2];
        $identifierType = IDENTIFIER_LSID;
    }
    // ISSN
    if (IDENTIFIER_UNKNOWN == $identifierType) {
        $ISSN_proto = $id;
        // strip any prefix
        $ISSN_proto = preg_replace("/^issn:/i", '', $ISSN_proto);
        $clean = ISN_clean($ISSN_proto);
        $class = ISSN_classifier($clean);
        if ($class == "checksumOK") {
            $identifierString = canonical_ISSN($ISSN_proto);
            $identifierType = IDENTIFIER_ISSN;
        }
    }
    // URL
    if (IDENTIFIER_UNKNOWN == $identifierType) {
        if (preg_match('/^(http:\\/\\/(.*))/i', $identifierString, $matches)) {
            $identifierString = $matches[2];
            $identifierType = IDENTIFIER_URL;
        }
    }
    // OCLC
    if (IDENTIFIER_UNKNOWN == $identifierType) {
        if (preg_match('/^(info:oclcnum\\/)(.*)/i', $identifierString, $matches)) {
            $identifierString = $matches[2];
            $identifierType = IDENTIFIER_OCLC;
        }
    }
    $result['identifier_type'] = $identifierType;
    $result['identifier_string'] = $identifierString;
    //print_r($result);
    return $result;
}
Beispiel #3
0
     if (preg_match('/^http:\\/\\/dx.doi.org\\//', $url)) {
         $obj->doi = $url;
         $obj->doi = str_replace('http://dx.doi.org/', '', $obj->doi);
         $done = true;
     }
     if (!$done) {
         $obj->url = $url;
     }
 }
 // issn
 $nc = $xpath->query("atom:content/xhtml:div/xhtml:table/xhtml:tr[@class='ISSN']/xhtml:td", $node);
 foreach ($nc as $n) {
     $issn = $n->firstChild->nodeValue;
     //	Format ISSN
     $clean = ISN_clean($issn);
     $class = ISSN_classifier($clean);
     if ($class == "checksumOK") {
         $obj->issn = canonical_ISSN($issn);
     }
 }
 // volume
 $nc = $xpath->query("atom:content/xhtml:div/xhtml:table/xhtml:tr[@class='volume']/xhtml:td", $node);
 foreach ($nc as $n) {
     $obj->volume = $n->firstChild->nodeValue;
 }
 // issue
 $nc = $xpath->query("atom:content/xhtml:div/xhtml:table/xhtml:tr[@class='issue']/xhtml:td", $node);
 foreach ($nc as $n) {
     $obj->issue = $n->firstChild->nodeValue;
 }
 // pages
Beispiel #4
0
function journal_title_from_issn($issn, $language_code = 'en')
{
    global $db;
    $title = '';
    //	Format ISSN
    $clean = ISN_clean($issn);
    $class = ISSN_classifier($clean);
    if ($class == "checksumOK") {
        $issn = canonical_ISSN($issn);
        $sql = 'SELECT * FROM issn WHERE (issn = ' . $db->Quote($issn) . ') 
		AND (language_code=' . $db->Quote($language_code) . ') ORDER BY LENGTH(title) DESC LIMIT 1';
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed: " . $sql);
        }
        if ($result->NumRows() == 1) {
            $title = $result->fields['title'];
        }
    }
    return $title;
}
Beispiel #5
0
function store_in_cache($item)
{
    global $db;
    // sanity check (to do)
    $sql = 'INSERT INTO article_cache(';
    $columns = '';
    $values = ') VALUES (';
    $first = true;
    // ISSN
    if (isset($item->issn)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        //	Format ISSN
        $clean = ISN_clean($item->issn);
        $class = ISSN_classifier($clean);
        if ($class == "checksumOK") {
            $columns .= 'issn';
            $values .= $db->qstr(canonical_ISSN($item->issn));
        }
        $first = false;
    }
    // eISSN
    if (isset($item->eissn)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        //	Format ISSN
        $clean = ISN_clean($item->eissn);
        $class = ISSN_classifier($clean);
        if ($class == "checksumOK") {
            $columns .= 'eissn';
            $values .= $db->qstr(canonical_ISSN($item->eissn));
        }
        $first = false;
    }
    // Volume
    if (isset($item->volume)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        $columns .= 'volume';
        $values .= $db->qstr($item->volume);
    }
    // Issue
    if (isset($item->issue)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        $columns .= 'issue';
        $values .= $db->qstr($item->issue);
    }
    // Spage
    if (isset($item->spage)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        $columns .= 'spage';
        $values .= $db->qstr($item->spage);
    }
    // EPage
    if (isset($item->epage)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        $columns .= 'epage';
        $values .= $db->qstr($item->epage);
        $columns .= ',hard';
        $values .= ',1';
    }
    // Year
    if (isset($item->year)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        $columns .= 'year';
        $values .= $db->qstr($item->year);
    }
    // Date
    if (isset($item->date)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        $columns .= 'date';
        $values .= $db->qstr($item->date);
    }
    // Article title
    if (isset($item->atitle)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        // Clean up and make UTF-8
        $atitle = html_entity_decode($atitle, ENT_QUOTES, "utf-8");
        $atitle = strip_tags($item->atitle);
        $columns .= 'atitle';
        $values .= $db->qstr(trim($atitle));
    }
    // Journal title
    if (isset($item->title)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        $columns .= 'title';
        $values .= $db->qstr(trim($item->title));
    }
    // URL
    if (isset($item->url)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        $columns .= 'url';
        $values .= $db->qstr($item->url);
    }
    // doi
    if (isset($item->doi)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        $columns .= 'doi';
        $values .= $db->qstr($item->doi);
    }
    // handle
    if (isset($item->hdl)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        $columns .= 'hdl';
        $values .= $db->qstr($item->hdl);
    }
    // sici
    if (isset($item->sici)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        $columns .= 'sici';
        $values .= $db->qstr($item->sici);
    }
    // pmid
    if (isset($item->pmid)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        $columns .= 'pmid';
        $values .= $db->qstr($item->pmid);
    }
    // pdf
    if (isset($item->pdf)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        $columns .= 'pdf';
        $values .= $db->qstr($item->pdf);
    }
    // publisher id, such as OAI urn
    if (isset($item->publisher_id)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        $columns .= 'publisher_id';
        $values .= $db->qstr($item->publisher_id);
    }
    // XML url (e.g., from Scielo
    if (isset($item->xml_url)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        $columns .= 'xml_url';
        $values .= $db->qstr($item->xml_url);
    }
    // abstract
    if (isset($item->abstract)) {
        if (!$first) {
            $columns .= ',';
        }
        if (!$first) {
            $values .= ',';
        }
        if ($first) {
            $first = false;
        }
        // Clean up and make UTF-8
        $abstract = html_entity_decode($abstract, ENT_QUOTES, "utf-8");
        $abstract = strip_tags($item->abstract);
        $columns .= 'abstract';
        $values .= $db->qstr($abstract);
    }
    // Set open_access flag to 1 if item is Open access
    // availability
    if (isset($item->availability)) {
        if ($item->availability == 'Open access') {
            if (!$first) {
                $columns .= ',';
            }
            if (!$first) {
                $values .= ',';
            }
            if ($first) {
                $first = false;
            }
            $columns .= 'open_access';
            $values .= $db->qstr('Y');
        }
    }
    $sql .= $columns . $values . ');';
    //echo $sql;
    // Store
    $result = $db->Execute($sql);
    if ($result == false) {
        die("failed [" . __LINE__ . "]: " . $sql);
    }
    $id = $db->Insert_ID();
    // Authors (don't actually need them, but seems a shame to through this information away)
    if (isset($item->authors)) {
        store_authors($id, $item->authors);
    }
    return $id;
}
Beispiel #6
0
/**
 * @brief Parse OpenURL parameters and return context object
 *
 * @param params Array of OpenURL parameters
 * @param context_object Context object to populate
 *
 */
function parse_openurl($params, &$context_object)
{
    global $debug;
    $context_object->referring_entity = new stdClass();
    $context_object->referent = new stdClass();
    $context_object->referent->type = 'unknown';
    foreach ($params as $key => $value) {
        switch ($key) {
            case 'ctx_ver':
                $context_object->version = $value[0];
                break;
            case 'rfe_id':
                $context_object->referring_entity->id = $value[0];
                break;
            case 'rft_val_fmt':
                switch ($value) {
                    case 'info:ofi/fmt:kev:mtx:journal':
                        $context_object->referent->type = 'article';
                        break;
                    case 'info:ofi/fmt:kev:mtx:book':
                        $context_object->referent->type = 'book';
                        break;
                    default:
                        if (!isset($context_object->referent->type)) {
                            $context_object->referent->type = 'Unknown';
                        }
                        break;
                }
                break;
                // Article title
            // Article title
            case 'rft.atitle':
            case 'atitle':
                $title = $value[0];
                $title = preg_replace('/\\.$/', '', $title);
                $title = strip_tags($title);
                $title = html_entity_decode($title, ENT_NOQUOTES, 'UTF-8');
                $context_object->referent->title = $title;
                $context_object->referent->type = 'article';
                break;
                // Book title
            // Book title
            case 'rft.btitle':
            case 'btitle':
                $context_object->referent->title = $value[0];
                $context_object->referent->type = 'book';
                break;
                // Journal title
            // Journal title
            case 'rft.jtitle':
            case 'rft.title':
            case 'title':
                $publication_outlet = trim($value[0]);
                $publication_outlet = preg_replace('/^\\[\\[/', '', $publication_outlet);
                $publication_outlet = preg_replace('/\\]\\]$/', '', $publication_outlet);
                if (!isset($context_object->referent->journal)) {
                    $context_object->referent->journal = new stdclass();
                }
                $context_object->referent->journal->name = $publication_outlet;
                $context_object->referent->type = 'article';
                break;
            case 'rft.issn':
            case 'issn':
                $ISSN_proto = $value[0];
                $clean = ISN_clean($ISSN_proto);
                $class = ISSN_classifier($clean);
                if ($class == "checksumOK") {
                    $identifier = new stdclass();
                    $identifier->type = 'issn';
                    $identifier->id = canonical_ISSN($ISSN_proto);
                    if (!isset($context_object->referent->journal)) {
                        $context_object->referent->journal = new stdclass();
                    }
                    $context_object->referent->journal->identifier[] = $identifier;
                }
                break;
                // Identifiers
            // Identifiers
            case 'rft_id':
            case 'id':
                foreach ($value as $v) {
                    // DOI
                    if (preg_match('/^(info:doi\\/|doi:)(?<doi>.*)/', $v, $match)) {
                        $identifier = new stdclass();
                        $identifier->type = 'doi';
                        $identifier->id = $match['doi'];
                        $context_object->referent->identifier[] = $identifier;
                    }
                    // Handle
                    if (preg_match('/^(info:hdl\\/|hdl:)(?<hdl>.*)/', $v, $match)) {
                        $identifier = new stdclass();
                        $identifier->type = 'handle';
                        $identifier->id = $match['hdl'];
                        $context_object->referent->identifier[] = $identifier;
                    }
                    // PMID
                    if (preg_match('/^(info:pmid\\/|pmid:)(?<pmid>.*)/', $v, $match)) {
                        $identifier = new stdclass();
                        $identifier->type = 'pmid';
                        $identifier->id = $match['pmid'];
                        $context_object->referent->identifier[] = $identifier;
                    }
                    // PMC
                    if (preg_match('/^(pmc:)(?<pmc>.*)/', $v, $match)) {
                        $identifier = new stdclass();
                        $identifier->type = 'pmc';
                        $identifier->id = $match['pmc'];
                        $context_object->referent->identifier[] = $identifier;
                    }
                    // Without INFO-URI prefix
                    // LSID
                    if (preg_match('/^urn:lsid:/', $v)) {
                        $identifier = new stdclass();
                        $identifier->type = 'lsid';
                        $identifier->id = $v;
                        $context_object->referent->identifier[] = $identifier;
                    }
                    // URL (including PDFs)
                    if (preg_match('/^http:\\/\\//', $v)) {
                        $matched = false;
                        // PDF
                        if (!$matched) {
                            if (preg_match('/\\.pdf/', $v)) {
                                $matched = true;
                                $context_object->referent->pdf = $v;
                            }
                        }
                        // BioStor
                        if (!$matched) {
                            if (preg_match('/http:\\/\\/biostor.org\\/reference\\/(?<id>\\d+)$/', $v, $match)) {
                                $matched = true;
                                $identifier = new stdclass();
                                $identifier->type = 'biostor';
                                $identifier->id = $match['id'];
                                $context_object->referent->identifier[] = $identifier;
                            }
                        }
                        if (!$matched) {
                            $context_object->referent->link = new stdclass();
                            $context_object->referent->link->url = $v;
                        }
                    }
                }
                break;
                // Authors
            // Authors
            case 'rft.au':
            case 'au':
                foreach ($value as $v) {
                    $parts = parse_name($v);
                    $author = new stdClass();
                    if (isset($parts['last'])) {
                        $author->lastname = $parts['last'];
                    }
                    if (isset($parts['suffix'])) {
                        $author->suffix = $parts['suffix'];
                    }
                    if (isset($parts['first'])) {
                        $author->forename = $parts['first'];
                        if (array_key_exists('middle', $parts)) {
                            $author->forename .= ' ' . $parts['middle'];
                        }
                    }
                    $context_object->referent->author[] = $author;
                }
                break;
                // article details
            // article details
            case 'rft.volume':
            case 'volume':
                if (!isset($context_object->referent->journal)) {
                    $context_object->referent->journal = new stdclass();
                }
                $context_object->referent->journal->volume = $value[0];
                break;
            case 'rft.issue':
            case 'issue':
                if (!isset($context_object->referent->journal)) {
                    $context_object->referent->journal = new stdclass();
                }
                $context_object->referent->journal->issue = $value[0];
                break;
            case 'rft.spage':
            case 'spage':
                if (!isset($context_object->referent->journal)) {
                    $context_object->referent->journal = new stdclass();
                }
                $context_object->referent->journal->pages = $value[0];
                break;
            case 'rft.epage':
            case 'epage':
                if (!isset($context_object->referent->journal)) {
                    $context_object->referent->journal = new stdclass();
                }
                if (isset($context_object->referent->journal->pages)) {
                    $context_object->referent->journal->pages .= '--' . $value[0];
                } else {
                    $context_object->referent->journal->pages = $value[0];
                }
                break;
            case 'rft.pages':
            case 'pages':
                if (!isset($context_object->referent->journal)) {
                    $context_object->referent->journal = new stdclass();
                }
                $context_object->referent->journal->pages = $value[0];
                break;
            default:
                $k = str_replace("rft.", '', $key);
                $context_object->referent->{$k} = $value[0];
                break;
        }
    }
    // Clean
    // Dates
    if (isset($context_object->referent->date)) {
        if (preg_match('/^[0-9]{4}$/', $context_object->referent->date)) {
            $context_object->referent->year = $context_object->referent->date;
            $context_object->referent->date = $context_object->referent->date . '-00-00';
        }
        if (preg_match('/^(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})$/', $context_object->referent->date, $match)) {
            $context_object->referent->year = $match['year'];
            $context_object->referent->date = $match['year'] . '-' . $match['month'] . '-' . $match['day'];
        }
    }
    // Zotero
    if (isset($context_object->referent->pages)) {
        // Note "u" option in regular expression, so that we match UTF-8 characters such as –
        if (preg_match('/(?<spage>[0-9]+)[\\-|–](?<epage>[0-9]+)/u', $context_object->referent->pages, $match)) {
            $context_object->referent->spage = $match['spage'];
            $context_object->referent->epage = $match['epage'];
            unset($context_object->referent->pages);
        }
    }
    // Endnote epage may have leading "-" as it splits spage-epage to generate OpenURL
    if (isset($context_object->referent->epage)) {
        $context_object->referent->epage = preg_replace('/^\\-/', '', $context_object->referent->epage);
    }
    // Journal titles with series numbers are split into title,series fields
    if (preg_match('/(?<title>.*),?\\s+series\\s+(?<series>[0-9]+)$/i', $context_object->referent->journal->name, $match)) {
        $context_object->referent->journal->name = $match['title'];
        $context_object->referent->journal->series = $match['series'];
    }
    // Volume might have series information
    if (preg_match('/^series\\s+(?<series>[0-9]+),\\s*(?<volume>[0-9]+)$/i', $context_object->referent->journal->volume, $match)) {
        $context_object->referent->journal->volume = $match['volume'];
        $context_object->referent->journal->series = $match['series'];
    }
    // Author array might not be populated, in which case add author from aulast and aufirst fields
    if (isset($context_object->referent->author)) {
        if (count($context_object->referent->author) == 0 && (isset($context_object->referent->aulast) && isset($context_object->referent->aufirst))) {
            $author = new stdClass();
            $author->surname = $context_object->referent->aulast;
            $author->forename = $context_object->referent->aufirst;
            $context_object->referent->author[] = $author;
        }
    }
    // Use aulast and aufirst to ensure first author name properly parsed
    if (isset($context_object->referent->aulast) && isset($context_object->referent->aufirst)) {
        $author = new stdClass();
        $author->surname = $context_object->referent->aulast;
        $author->forename = $context_object->referent->aufirst;
        $context_object->referent->author[0] = $author;
    }
    // EndNote encodes accented characters, which break journal names
    if (isset($context_object->referent->publication_outlet)) {
        $context_object->referent->publication_outlet = preg_replace('/%9F/', 'ü', $context_object->referent->publication_outlet);
    }
}
Beispiel #7
0
 function GetParameters($parameters)
 {
     //print_r($parameters);
     foreach ($parameters as $k => $v) {
         switch ($k) {
             case 'submit':
                 break;
             case 'rft_id':
                 $this->StoreIdentifier($v);
                 break;
             default:
                 if (preg_match('/^rft_/', $k)) {
                     switch ($k) {
                         case 'rft_val_fmt':
                             switch ($v) {
                                 case 'info:ofi/fmt:kev:mtx:journal':
                                     $this->values['genre'] = 'article';
                                     break;
                                 default:
                                     $this->values['genre'] = 'article';
                                     break;
                             }
                             break;
                         case 'rft_jtitle':
                             $this->values['title'] = trim($v);
                             break;
                         default:
                             $key = $k;
                             $key = str_replace("rft_", '', $key);
                             $this->values[$key] = trim($v);
                             break;
                     }
                 } else {
                     if (preg_match('/^rfr/', $k)) {
                         // eat referrer info
                     } else {
                         if ($this->version == 0.1) {
                             $key = '';
                             switch ($k) {
                                 case 'title':
                                     if (isset($parameters['genre'])) {
                                         if ($parameters['genre'] == 'book') {
                                             $key = 'btitle';
                                         }
                                         if ($parameters['genre'] == 'article') {
                                             $key = 'title';
                                         }
                                         if ($parameters['genre'] == 'journal') {
                                             $key = 'title';
                                         }
                                     } else {
                                         // assume it's an article
                                         $key = 'title';
                                         $this->values['genre'] = 'article';
                                     }
                                     break;
                                 case 'id':
                                     $this->StoreIdentifier(urldecode($v));
                                     break;
                                 case 'vol':
                                     $key = 'volume';
                                     // liberal input (catch case of vol=)
                                     break;
                                 default:
                                     $key = $k;
                                     break;
                             }
                             if ($key != '') {
                                 $this->values[$key] = trim($v);
                             }
                         }
                     }
                 }
                 break;
         }
     }
     // Clean
     // Endnote may have leading "-" as it splits spage-epage to generate OpenURL
     if (isset($this->values['epage'])) {
         $this->values['epage'] = str_replace("-", "", $this->values['epage']);
     }
     // If an article has an issue but not a volume, we will use the issue as the volume
     if (!isset($this->values['volume'])) {
         if (isset($this->values['issue'])) {
             $this->values['volume'] = $this->values['issue'];
             unset($this->values['issue']);
         }
     }
     // Ensure ISSN is formatted correctly (may lack hyphen if coming from Wiki)
     if (isset($this->values['issn'])) {
         $ISSN_proto = $this->values['issn'];
         $clean = ISN_clean($ISSN_proto);
         $class = ISSN_classifier($clean);
         if ($class == "checksumOK") {
             $this->values['issn'] = canonical_ISSN($ISSN_proto);
         } else {
             unset($this->values['issn']);
         }
     }
 }