/** * Creates a test description in Openurl10 format * @return MetadataDescription */ protected function getTestOpenurl10Description() { $citationData = array('aulast' => 'von Surname1', 'aufirst' => 'Given1 P', 'auinit1' => 'G', 'auinitm' => 'P', 'auinit' => 'GP', 'ausuffix' => 'suff', 'au' => array(0 => 'Surname1 suff, P. (Given1) von', 1 => 'Surname2, (Given2)'), 'genre' => 'article', 'jtitle' => 'Some Journal Title', 'atitle' => 'Some Article Title', 'date' => '2005-07-03', 'issn' => '0694760949645', 'spage' => 17, 'epage' => 33, 'volume' => '7', 'issue' => '5', 'eissn' => '3049674960475', 'artnum' => '45', 'coden' => 'coden', 'sici' => 'sici'); $openurl10Description = new MetadataDescription('lib.pkp.plugins.metadata.openurl10.schema.Openurl10JournalSchema', ASSOC_TYPE_CITATION); self::assertTrue($openurl10Description->setStatements($citationData)); return $openurl10Description; }
/** * This implementation of the CrosswalkFilter * simply removes statements from the incoming meta-data * description that are not in the target description's schema. * @see Filter::process() * @param $input MetadataDescription * @return MetadataDescription */ function &process(&$input) { // Create the target description $output = new MetadataDescription($this->_toSchema); // Compare the property names of the incoming description with // the property names allowed in the target schema. $sourceProperties = $input->getSetPropertyNames(); $targetProperties = $output->getPropertyNames(); $propertiesToBeRemoved = array_diff($sourceProperties, $targetProperties); // Remove statements for properties that are not in the target schema. $statements =& $input->getStatements(); foreach ($propertiesToBeRemoved as $propertyToBeRemoved) { assert(isset($statements[$propertyToBeRemoved])); unset($statements[$propertyToBeRemoved]); } // Set the remaining statements in the target description $success = $output->setStatements($statements); assert($success); return $output; }
/** * @copydoc MetadataDataObjectAdapter::extractMetadataFromDataObject() * @param $dataObject Citation * @return MetadataDescription */ function extractMetadataFromDataObject(&$dataObject) { $metadataDescription = $this->instantiateMetadataDescription(); // Establish the association between the meta-data description // and the citation object. $metadataDescription->setAssocId($dataObject->getId()); // Identify the length of the name space prefix $namespacePrefixLength = strlen($this->getMetadataNamespace()) + 1; // Get all meta-data field names $fieldNames = array_merge($this->getDataObjectMetadataFieldNames(false), $this->getDataObjectMetadataFieldNames(true)); // Retrieve the statements from the data object $statements = array(); foreach ($fieldNames as $fieldName) { if ($dataObject->hasData($fieldName)) { // Remove the name space prefix $propertyName = substr($fieldName, $namespacePrefixLength); if (in_array($propertyName, array('person-group[@person-group-type="author"]', 'person-group[@person-group-type="editor"]'))) { // Retrieve the names array (must not be by-ref // to protect the original citation object!) $names = $dataObject->getData($fieldName); // Convert key/value arrays to MetadataDescription objects. foreach ($names as $key => $name) { if (is_array($name)) { // Construct a meta-data description from // this name array. switch ($propertyName) { case 'person-group[@person-group-type="author"]': $assocType = ASSOC_TYPE_AUTHOR; break; case 'person-group[@person-group-type="editor"]': $assocType = ASSOC_TYPE_EDITOR; break; } $nameDescription = new MetadataDescription('lib.pkp.plugins.metadata.nlm30.schema.Nlm30NameSchema', $assocType); $nameDescription->setStatements($name); $names[$key] =& $nameDescription; unset($nameDescription); } else { // The only non-structured data allowed here // is the et-al string. import('lib.pkp.plugins.metadata.nlm30.filter.Nlm30PersonStringFilter'); assert($name == PERSON_STRING_FILTER_ETAL); } } $statements[$propertyName] =& $names; unset($names); } else { $statements[$propertyName] =& $dataObject->getData($fieldName); } } } // Set the statements in the meta-data description $success = $metadataDescription->setStatements($statements); assert($success); return $metadataDescription; }
/** * Take an array of citation parse/lookup results and derive a citation * with one "best" set of values. * * We determine the best values within the citations that have a score above * the given threshold. Citations with a score below the threshold will be * ignored. * * For these citations we count the frequency of values per meta-data property. * The most frequent value will be chosen as "best" value. * * If two values have the same frequency then decide based on the score. If * this is still ambivalent then return the first of the remaining values. * * @param $scoredCitations * @param $scoreThreshold integer a number between 0 (=no threshold) and 100 * @return Citation one citation with the "best" values set */ function &_guessValues(&$scoredCitations, $scoreThreshold) { assert($scoreThreshold >= 0 && $scoreThreshold <= 100); // Create the target citation description. $targetDescription = new MetadataDescription('lib.pkp.plugins.metadata.nlm30.schema.Nlm30CitationSchema', ASSOC_TYPE_CITATION); // Step 1: List all values and max scores that have been identified for a given element // but only include values from results above a given scoring threshold // Initialize variables for the first step. $valuesByPropertyName = array(); $maxScoresByPropertyNameAndValue = array(); // Sort the scored citations by score with the highest score first. krsort($scoredCitations); foreach ($scoredCitations as $currentScore => $citationsForCurrentScore) { // Check whether the current score is below the threshold, if so // stop the loop. We've sorted our citations by score so the remaining // citations all have scores below the threshold and we can forget // about them. if ($currentScore < $scoreThreshold) { break; } foreach ($citationsForCurrentScore as $citationForCurrentScore) { $statements = $citationForCurrentScore->getStatements(); // Add the property values and scores of this citation // to the overall property lists foreach ($statements as $propertyName => $value) { // Initialize sub-arrays if necessary if (!isset($valuesByPropertyName[$propertyName])) { $valuesByPropertyName[$propertyName] = array(); } if (!isset($maxScoresByPropertyNameAndValue[$propertyName])) { $maxScoresByPropertyNameAndValue[$propertyName] = array(); } // Add the value for the given property, as we want to count // value frequencies later, we explicitly allow duplicates. $serializedValue = serialize($value); $valuesByPropertyName[$propertyName][] = $serializedValue; // As we have ordered our citations descending by score, the // first score found for a value is also the maximum score. if (!isset($maxScoresByPropertyNameAndValue[$propertyName][$serializedValue])) { $maxScoresByPropertyNameAndValue[$propertyName][$serializedValue] = $currentScore; } } } } // Step 2: Find out the values that occur most frequently for each element // and order these by score. foreach ($valuesByPropertyName as $propertyName => $values) { // Count the occurrences of each value within the given element $valueFrequencies = array_count_values($values); // Order the most frequent values to the beginning of the array arsort($valueFrequencies); // Get the most frequent values (may be several if there are more than one // with the same frequency). $scoresOfMostFrequentValues = array(); $previousValueFrequency = 0; foreach ($valueFrequencies as $value => $valueFrequency) { // Only extract the most frequent values, jump out of the // loop when less frequent values start. if ($previousValueFrequency > $valueFrequency) { break; } $previousValueFrequency = $valueFrequency; $scoresOfMostFrequentValues[$value] = $maxScoresByPropertyNameAndValue[$propertyName][$value]; } // Now we can order the most frequent values by score, starting // with the highest score. arsort($scoresOfMostFrequentValues); // Now get the first key which represents the value with the // highest frequency and the highest score. reset($scoresOfMostFrequentValues); $bestValue = unserialize(key($scoresOfMostFrequentValues)); // Set the found "best" element value in the result citation. $statements = array($propertyName => $bestValue); $success = $targetDescription->setStatements($statements); assert($success); } // Instantiate the target citation $targetCitation = new Citation(); $targetCitation->injectMetadata($targetDescription); return $targetCitation; }
/** * Creates a new NLM citation description and adds the data * of an array of property/value pairs as statements. * @param $metadataArray array * @return MetadataDescription */ function &getNlm30CitationDescriptionFromMetadataArray(&$metadataArray) { // Create a new citation description $citationDescription = new MetadataDescription('lib.pkp.plugins.metadata.nlm30.schema.Nlm30CitationSchema', ASSOC_TYPE_CITATION); // Add the meta-data to the description $metadataArray = arrayClean($metadataArray); if (!$citationDescription->setStatements($metadataArray)) { $translationParams = array('filterName' => $this->getDisplayName()); $this->addError(__('submission.citations.filter.invalidMetadata', $translationParams)); $nullVar = null; return $nullVar; } // Set display name in the meta-data description // to the corresponding value from the filter. This is important // so that we later know which result came from which filter. $citationDescription->setDisplayName($this->getDisplayName()); return $citationDescription; }
/** * Instantiate an NLM name description from an array. * @param $personArray array * @param $assocType integer * @return MetadataDescription */ private function &instantiateNlm30NameDescriptions(&$personArray, $assocType) { $personDescriptions = array(); foreach ($personArray as $key => $person) { if ($person == PERSON_STRING_FILTER_ETAL) { $personDescription = 'et-al'; } else { // Create a new NLM name description and fill it // with the values from the test array. $personDescription = new MetadataDescription('lib.pkp.plugins.metadata.nlm30.schema.Nlm30NameSchema', $assocType); self::assertTrue($personDescription->setStatements($person)); } // Add the result to the descriptions list $personDescriptions[$key] = $personDescription; } return $personDescriptions; }
/** * Adds the data of an array of property/value pairs * as statements to an NLM citation description. * If no citation description is given, a new one will * be instantiated. * @param $metadataArray array * @param $citationDescription MetadataDescription * @return MetadataDescription */ function &addMetadataArrayToNlmCitationDescription(&$metadataArray, $citationDescription = null) { // Create a new citation description if no one was given if (is_null($citationDescription)) { $metadataSchema = new NlmCitationSchema(); $citationDescription = new MetadataDescription($metadataSchema, ASSOC_TYPE_CITATION); } // Add the meta-data to the description if (!$citationDescription->setStatements($metadataArray)) { $nullVar = null; return $nullVar; } return $citationDescription; }
/** * @see MetadataDataObjectAdapter::extractMetadataFromDataObject() * @param $dataObject Citation * @return MetadataDescription */ function &extractMetadataFromDataObject(&$dataObject) { $metadataDescription =& $this->instantiateMetadataDescription(); // Identify the length of the name space prefix $namespacePrefixLength = strlen($this->getMetadataNamespace()) + 1; // Get all meta-data field names $fieldNames = array_merge($this->getDataObjectMetadataFieldNames(false), $this->getDataObjectMetadataFieldNames(true)); // Retrieve the statements from the data object $statements = array(); $nameSchema = new NlmNameSchema(); foreach ($fieldNames as $fieldName) { if ($dataObject->hasData($fieldName)) { // Remove the name space prefix $propertyName = substr($fieldName, $namespacePrefixLength); if (in_array($propertyName, array('person-group[@person-group-type="author"]', 'person-group[@person-group-type="editor"]'))) { // Convert key/value arrays to MetadataDescription objects. $names =& $dataObject->getData($fieldName); foreach ($names as $key => $name) { switch ($propertyName) { case 'person-group[@person-group-type="author"]': $assocType = ASSOC_TYPE_AUTHOR; break; case 'person-group[@person-group-type="editor"]': $assocType = ASSOC_TYPE_EDITOR; break; } $nameDescription = new MetadataDescription($nameSchema, $assocType); $nameDescription->setStatements($name); $names[$key] =& $nameDescription; unset($nameDescription); } $statements[$propertyName] =& $names; } else { $statements[$propertyName] =& $dataObject->getData($fieldName); } } } // Set the statements in the meta-data description $metadataDescription->setStatements($statements); return $metadataDescription; }
/** * Instantiate an NLM name description from an array. * @param $personArray array * @param $assocType integer * @return MetadataDescription */ private function &instantiateNlmNameDescriptions(&$personArray, $assocType) { $nlmNameSchema = new NlmNameSchema(); $personDescriptions = array(); foreach ($personArray as $key => $person) { // Create a new NLM name description and fill it // with the values from the test array. $personDescription = new MetadataDescription($nlmNameSchema, $assocType); self::assertTrue($personDescription->setStatements($person)); // Add the result to the descriptions list $personDescriptions[$key] = $personDescription; } return $personDescriptions; }