/** * @covers DateStringNormalizerFilter */ public function testExecute() { $filter = new DateStringNormalizerFilter(); self::assertEquals('2003', $filter->execute($dateString = ' 2003 ')); self::assertEquals('2003-07', $filter->execute($dateString = ' 2003 Jul ')); self::assertEquals('2003-07-05', $filter->execute($dateString = ' 2003 Jul 5 ')); self::assertEquals('2003', $filter->execute($dateString = ' 2003 5 ')); self::assertNull($filter->execute($dateString = 'unparsable string')); }
/** * Custom implementation of Form::validate() that validates * meta-data form data and injects it into the internal citation * object. * * NB: The configuration of the internal citation object * would normally be done in readInputData(). Validation and * injection can easily be done in one step. It therefore avoids * code duplication and improves performance to do both here. */ function validate() { // Make sure that this method is not called twice which // would corrupt internal state. assert(empty($this->_metadataDescriptions)); parent::validate(); // Validate form data and inject it into // the associated citation object. $citation =& $this->getCitation(); $citation->setRawCitation($this->getData('rawCitation')); if ($this->getData('citationApproved') == 'citationApproved') { // Editor's shortcut to the approved state, e.g. for manually edited citations. $citation->setCitationState(CITATION_APPROVED); } elseif (in_array($this->getData('citationState'), Citation::_getSupportedCitationStates())) { // Reset citation state if necessary if ($this->getData('citationState') == CITATION_APPROVED) { $this->setData('citationState', CITATION_LOOKED_UP); } $citation->setCitationState($this->getData('citationState')); } // Extract data from citation form fields and inject it into the citation import('lib.pkp.classes.metadata.MetadataDescription'); $metadataSchemas = $citation->getSupportedMetadataSchemas(); foreach ($metadataSchemas as $metadataSchema) { /* @var $metadataSchema MetadataSchema */ // Instantiate a meta-data description for the given schema $metadataDescription = new MetadataDescription($metadataSchema->getClassName(), ASSOC_TYPE_CITATION); // Set the meta-data statements foreach ($metadataSchema->getProperties() as $propertyName => $property) { $fieldName = $metadataSchema->getNamespacedPropertyId($propertyName); $fieldValue = trim($this->getData($fieldName)); if (empty($fieldValue)) { // Delete empty statements so that previously set // statements (if any) will be deleted. $metadataDescription->removeStatement($propertyName); if ($property->getMandatory()) { // A mandatory field is missing - add a validation error. $this->addError($fieldName, __($property->getValidationMessage())); $this->addErrorField($fieldName); } } else { // Try to convert the field value to (a) strongly // typed object(s) if applicable. Start with the most // specific allowed type so that we always get the // most strongly typed result possible. $allowedTypes = $property->getAllowedTypes(); switch (true) { case isset($allowedTypes[METADATA_PROPERTY_TYPE_VOCABULARY]) && is_numeric($fieldValue): case isset($allowedTypes[METADATA_PROPERTY_TYPE_INTEGER]) && is_numeric($fieldValue): $typedFieldValues = array((int) $fieldValue); break; case isset($allowedTypes[METADATA_PROPERTY_TYPE_DATE]): import('lib.pkp.classes.metadata.DateStringNormalizerFilter'); $dateStringFilter = new DateStringNormalizerFilter(); assert($dateStringFilter->supportsAsInput($fieldValue)); $typedFieldValues = array($dateStringFilter->execute($fieldValue)); break; case isset($allowedTypes[METADATA_PROPERTY_TYPE_COMPOSITE]): // We currently only support name composites $allowedAssocIds = $allowedTypes[METADATA_PROPERTY_TYPE_COMPOSITE]; if (in_array(ASSOC_TYPE_AUTHOR, $allowedAssocIds)) { $assocType = ASSOC_TYPE_AUTHOR; } elseif (in_array(ASSOC_TYPE_EDITOR, $allowedAssocIds)) { $assocType = ASSOC_TYPE_EDITOR; } else { assert(false); } // Try to transform the field to a name composite. import('lib.pkp.plugins.metadata.nlm30.filter.PersonStringNlm30NameSchemaFilter'); $personStringFilter = new PersonStringNlm30NameSchemaFilter($assocType, PERSON_STRING_FILTER_MULTIPLE); assert($personStringFilter->supportsAsInput($fieldValue)); $typedFieldValues =& $personStringFilter->execute($fieldValue); break; default: $typedFieldValues = array($fieldValue); } // Inject data into the meta-data description and thereby // implicitly validate the field value. foreach ($typedFieldValues as $typedFieldValue) { if (!$metadataDescription->addStatement($propertyName, $typedFieldValue)) { // Add form field error $this->addError($fieldName, __($property->getValidationMessage())); $this->addErrorField($fieldName); } unset($typedFieldValue); } unset($typedFieldValues); } } // Inject the meta-data into the citation. $citation->injectMetadata($metadataDescription); // Save the meta-data description for later usage. $this->_metadataDescriptions[] =& $metadataDescription; unset($metadataDescription); } return $this->isValid(); }
/** * Fills the given citation object with * meta-data retrieved from PubMed. * @param $pmid string * @return MetadataDescription */ function &_lookup($pmid) { $nullVar = null; // Use eFetch to get XML metadata for the given PMID $lookupParams = array('db' => 'pubmed', 'mode' => 'xml', 'tool' => 'pkp-wal', 'id' => $pmid); if (!is_null($this->getEmail())) { $lookupParams['email'] = $this->getEmail(); } // Call the eFetch URL and get an XML result if (is_null($resultDOM = $this->callWebService(PUBMED_WEBSERVICE_EFETCH, $lookupParams))) { return $nullVar; } $articleTitleNodes =& $resultDOM->getElementsByTagName("ArticleTitle"); $articleTitleFirstNode =& $articleTitleNodes->item(0); $medlineTaNodes =& $resultDOM->getElementsByTagName("MedlineTA"); $medlineTaFirstNode =& $medlineTaNodes->item(0); $metadata = array('pub-id[@pub-id-type="pmid"]' => $pmid, 'article-title' => $articleTitleFirstNode->textContent, 'source' => $medlineTaFirstNode->textContent); $volumeNodes =& $resultDOM->getElementsByTagName("Volume"); $issueNodes =& $resultDOM->getElementsByTagName("Issue"); if ($volumeNodes->length > 0) { $volumeFirstNode =& $volumeNodes->item(0); } $metadata['volume'] = $volumeFirstNode->textContent; if ($issueNodes->length > 0) { $issueFirstNode =& $issueNodes->item(0); } $metadata['issue'] = $issueFirstNode->textContent; // Get list of author full names foreach ($resultDOM->getElementsByTagName("Author") as $authorNode) { if (!isset($metadata['person-group[@person-group-type="author"]'])) { $metadata['person-group[@person-group-type="author"]'] = array(); } // Instantiate an NLM name description $authorDescription = new MetadataDescription('lib.pkp.plugins.metadata.nlm30.schema.Nlm30NameSchema', ASSOC_TYPE_AUTHOR); // Surname $lastNameNodes =& $authorNode->getElementsByTagName("LastName"); $lastNameFirstNode =& $lastNameNodes->item(0); $authorDescription->addStatement('surname', $lastNameFirstNode->textContent); // Given names $givenNamesString = ''; $firstNameNodes =& $authorNode->getElementsByTagName("FirstName"); if ($firstNameNodes->length > 0) { $firstNameFirstNode =& $firstNameNodes->item(0); $givenNamesString = $firstNameFirstNode->textContent; } else { $foreNameNodes =& $authorNode->getElementsByTagName("ForeName"); if ($foreNameNodes->length > 0) { $foreNameFirstNode =& $foreNameNodes->item(0); $givenNamesString = $foreNameFirstNode->textContent; } } if (!empty($givenNamesString)) { foreach (explode(' ', $givenNamesString) as $givenName) { $authorDescription->addStatement('given-names', String::trimPunctuation($givenName)); } } // Suffix $suffixNodes =& $authorNode->getElementsByTagName("Suffix"); if ($suffixNodes->length > 0) { $suffixFirstNode =& $suffixNodes->item(0); $authorDescription->addStatement('suffix', $suffixFirstNode->textContent); } // Include collective names // FIXME: This corresponds to an NLM-citation <collab> tag and should be part of the Metadata implementation /*if ($resultDOM->getElementsByTagName("CollectiveName")->length > 0 && $authorNode->getElementsByTagName("CollectiveName")->item(0)->textContent != '') { }*/ $metadata['person-group[@person-group-type="author"]'][] =& $authorDescription; unset($authorDescription); } // Extract pagination $medlinePgnNodes =& $resultDOM->getElementsByTagName("MedlinePgn"); $medlinePgnFirstNode =& $medlinePgnNodes->item(0); if (String::regexp_match_get("/^[:p\\.\\s]*(?P<fpage>[Ee]?\\d+)(-(?P<lpage>\\d+))?/", $medlinePgnFirstNode->textContent, $pages)) { $fPage = (int) $pages['fpage']; $metadata['fpage'] = $fPage; if (!empty($pages['lpage'])) { $lPage = (int) $pages['lpage']; // Deal with shortcuts like '382-7' if ($lPage < $fPage) { $lPage = (int) (String::substr($pages['fpage'], 0, -String::strlen($pages['lpage'])) . $pages['lpage']); } $metadata['lpage'] = $lPage; } } // Get publication date (can be in several places in PubMed). $dateNode = null; $articleDateNodes =& $resultDOM->getElementsByTagName("ArticleDate"); if ($articleDateNodes->length > 0) { $dateNode =& $articleDateNodes->item(0); } else { $pubDateNodes =& $resultDOM->getElementsByTagName("PubDate"); if ($pubDateNodes->length > 0) { $dateNode =& $pubDateNodes->item(0); } } // Retrieve the data parts and assemble date. if (!is_null($dateNode)) { $publicationDate = ''; $requiresNormalization = false; foreach (array('Year' => 4, 'Month' => 2, 'Day' => 2) as $dateElement => $padding) { $dateElementNodes =& $dateNode->getElementsByTagName($dateElement); if ($dateElementNodes->length > 0) { if (!empty($publicationDate)) { $publicationDate .= '-'; } $dateElementFirstNode =& $dateElementNodes->item(0); $datePart = str_pad($dateElementFirstNode->textContent, $padding, '0', STR_PAD_LEFT); if (!is_numeric($datePart)) { $requiresNormalization = true; } $publicationDate .= $datePart; } else { break; } } // Normalize the date to NLM standard if necessary. if ($requiresNormalization) { $dateFilter = new DateStringNormalizerFilter(); $publicationDate = $dateFilter->execute($publicationDate); } if (!empty($publicationDate)) { $metadata['date'] = $publicationDate; } } // Get publication type $publicationTypeNodes =& $resultDOM->getElementsByTagName("PublicationType"); if ($publicationTypeNodes->length > 0) { foreach ($publicationTypeNodes as $publicationType) { // The vast majority of items on PubMed are articles so catch these... if (String::strpos(String::strtolower($publicationType->textContent), 'article') !== false) { $metadata['[@publication-type]'] = NLM30_PUBLICATION_TYPE_JOURNAL; break; } } } // Get DOI if it exists $articleIdNodes =& $resultDOM->getElementsByTagName("ArticleId"); foreach ($articleIdNodes as $idNode) { if ($idNode->getAttribute('IdType') == 'doi') { $metadata['pub-id[@pub-id-type="doi"]'] = $idNode->textContent; } } // Use eLink utility to find fulltext links $lookupParams = array('dbfrom' => 'pubmed', 'cmd' => 'llinks', 'tool' => 'pkp-wal', 'id' => $pmid); if (!is_null($resultDOM = $this->callWebService(PUBMED_WEBSERVICE_ELINK, $lookupParams))) { // Get a list of possible links foreach ($resultDOM->getElementsByTagName("ObjUrl") as $linkOut) { $attributes = ''; foreach ($linkOut->getElementsByTagName("Attribute") as $attribute) { $attributes .= String::strtolower($attribute->textContent) . ' / '; } // Only add links to open access resources if (String::strpos($attributes, "subscription") === false && String::strpos($attributes, "membership") === false && String::strpos($attributes, "fee") === false && $attributes != "") { $urlNodes =& $linkOut->getElementsByTagName("Url"); $urlFirstNode =& $urlNodes->item(0); $links[] = $urlFirstNode->textContent; } } // Take the first link if we have any left (presumably pubmed returns them in preferential order) if (isset($links[0])) { $metadata['uri'] = $links[0]; } } return $this->getNlm30CitationDescriptionFromMetadataArray($metadata); }
/** * Post processes an NLM meta-data array * @param $preliminaryNlm30Array array * @return array */ function &postProcessMetadataArray(&$preliminaryNlm30Array) { // Clean array $preliminaryNlm30Array = arrayClean($preliminaryNlm30Array); // Trim punctuation $preliminaryNlm30Array =& $this->_recursivelyTrimPunctuation($preliminaryNlm30Array); // Parse (=filter) author/editor strings into NLM name descriptions foreach (array('author' => ASSOC_TYPE_AUTHOR, 'editor' => ASSOC_TYPE_EDITOR) as $personType => $personAssocType) { if (isset($preliminaryNlm30Array[$personType])) { // Get the author/editor strings from the result $personStrings = $preliminaryNlm30Array[$personType]; unset($preliminaryNlm30Array[$personType]); // Parse the author/editor strings into NLM name descriptions // Interpret a scalar as a textual authors list if (is_scalar($personStrings)) { $personStringFilter = new PersonStringNlm30NameSchemaFilter($personAssocType, PERSON_STRING_FILTER_MULTIPLE); $persons =& $personStringFilter->execute($personStrings); } else { $personStringFilter = new PersonStringNlm30NameSchemaFilter($personAssocType, PERSON_STRING_FILTER_SINGLE); $persons =& array_map(array($personStringFilter, 'execute'), $personStrings); } $preliminaryNlm30Array['person-group[@person-group-type="' . $personType . '"]'] = $persons; unset($persons); } } // Join comments if (isset($preliminaryNlm30Array['comment']) && is_array($preliminaryNlm30Array['comment'])) { // Implode comments from the result into a single string // as required by the NLM citation schema. $preliminaryNlm30Array['comment'] = implode("\n", $preliminaryNlm30Array['comment']); } // Normalize date strings foreach (array('date', 'conf-date', 'access-date') as $dateProperty) { if (isset($preliminaryNlm30Array[$dateProperty])) { $dateFilter = new DateStringNormalizerFilter(); $preliminaryNlm30Array[$dateProperty] = $dateFilter->execute($preliminaryNlm30Array[$dateProperty]); } } // Cast strings to integers where necessary foreach (array('fpage', 'lpage', 'size') as $integerProperty) { if (isset($preliminaryNlm30Array[$integerProperty]) && is_numeric($preliminaryNlm30Array[$integerProperty])) { $preliminaryNlm30Array[$integerProperty] = (int) $preliminaryNlm30Array[$integerProperty]; } } // Rename elements that are stored in attributes in NLM citation $elementToAttributeMap = array('access-date' => 'date-in-citation[@content-type="access-date"]', 'issn-ppub' => 'issn[@pub-type="ppub"]', 'issn-epub' => 'issn[@pub-type="epub"]', 'pub-id-doi' => 'pub-id[@pub-id-type="doi"]', 'pub-id-publisher-id' => 'pub-id[@pub-id-type="publisher-id"]', 'pub-id-coden' => 'pub-id[@pub-id-type="coden"]', 'pub-id-sici' => 'pub-id[@pub-id-type="sici"]', 'pub-id-pmid' => 'pub-id[@pub-id-type="pmid"]', 'publication-type' => '[@publication-type]'); foreach ($elementToAttributeMap as $elementName => $nlm30PropertyName) { if (isset($preliminaryNlm30Array[$elementName])) { $preliminaryNlm30Array[$nlm30PropertyName] = $preliminaryNlm30Array[$elementName]; unset($preliminaryNlm30Array[$elementName]); } } // Guess a publication type if none has been set by the // citation service. $this->_guessPublicationType($preliminaryNlm30Array); // Some services return the title as article-title although // the publication type is a book. if (isset($preliminaryNlm30Array['[@publication-type]']) && $preliminaryNlm30Array['[@publication-type]'] == 'book') { if (isset($preliminaryNlm30Array['article-title']) && !isset($preliminaryNlm30Array['source'])) { $preliminaryNlm30Array['source'] = $preliminaryNlm30Array['article-title']; unset($preliminaryNlm30Array['article-title']); } } return $preliminaryNlm30Array; }
/** * Save citation */ function execute() { $citation =& $this->getCitation(); $citation->setEditedCitation($this->getData('editedCitation')); if (in_array($this->getData('citationState'), Citation::_getSupportedCitationStates())) { $citation->setCitationState($this->getData('citationState')); } // Extract data from citation form fields and inject it into the citation $metadataAdapters = $citation->getSupportedMetadataAdapters(); foreach ($metadataAdapters as $metadataAdapter) { // Instantiate a meta-data description for the given schema $metadataSchema =& $metadataAdapter->getMetadataSchema(); import('metadata.MetadataDescription'); $metadataDescription = new MetadataDescription($metadataSchema, ASSOC_TYPE_CITATION); // Set the meta-data statements $metadataSchemaNamespace = $metadataSchema->getNamespace(); foreach ($metadataSchema->getProperties() as $propertyName => $property) { $fieldName = $metadataSchema->getNamespacedPropertyId($propertyName); $fieldValue = trim($this->getData($fieldName)); if (empty($fieldValue)) { $metadataDescription->removeStatement($propertyName); } else { $foundValidType = false; foreach ($property->getTypes() as $type) { // Some property types need to be converted first switch ($type) { // We currently only support name composites case array(METADATA_PROPERTY_TYPE_COMPOSITE => ASSOC_TYPE_AUTHOR): case array(METADATA_PROPERTY_TYPE_COMPOSITE => ASSOC_TYPE_EDITOR): import('metadata.nlm.PersonStringNlmNameSchemaFilter'); $personStringFilter = new PersonStringNlmNameSchemaFilter($type[METADATA_PROPERTY_TYPE_COMPOSITE], PERSON_STRING_FILTER_MULTIPLE); assert($personStringFilter->supportsAsInput($fieldValue)); $fieldValue =& $personStringFilter->execute($fieldValue); $foundValidType = true; break; case METADATA_PROPERTY_TYPE_INTEGER: $fieldValue = array((int) $fieldValue); $foundValidType = true; break; case METADATA_PROPERTY_TYPE_DATE: import('metadata.DateStringNormalizerFilter'); $dateStringFilter = new DateStringNormalizerFilter(); assert($dateStringFilter->supportsAsInput($fieldValue)); $fieldValue = array($dateStringFilter->execute($fieldValue)); $foundValidType = true; break; default: if ($property->isValid($fieldValue)) { $fieldValue = array($fieldValue); $foundValidType = true; break; } } // Break the outer loop once we found a valid // interpretation for our form field. if ($foundValidType) { break; } } foreach ($fieldValue as $fieldValueStatement) { $metadataDescription->addStatement($propertyName, $fieldValueStatement); unset($fieldValueStatement); } } } // Inject the meta-data into the citation $citation->injectMetadata($metadataDescription, true); } // Persist citation $citationDAO =& DAORegistry::getDAO('CitationDAO'); if (is_numeric($citation->getId())) { $citationDAO->updateCitation($citation); } else { $citationDAO->insertCitation($citation); } return true; }