public function testCitationCrud() { $nameSchema = new NlmNameSchema(); $nameDescription = new MetadataDescription($nameSchema, ASSOC_TYPE_AUTHOR); $nameDescription->addStatement('given-names', $value = 'Peter'); $nameDescription->addStatement('given-names', $value = 'B'); $nameDescription->addStatement('surname', $value = 'Bork'); $nameDescription->addStatement('prefix', $value = 'Mr.'); $citationSchema = new NlmCitationSchema(); $citationDescription = new MetadataDescription($citationSchema, ASSOC_TYPE_CITATION); $citationDescription->addStatement('person-group[@person-group-type="author"]', $nameDescription); $citationDescription->addStatement('article-title', $value = 'PHPUnit in a nutshell', 'en_US'); $citationDescription->addStatement('article-title', $value = 'PHPUnit in Kürze', 'de_DE'); $citationDescription->addStatement('date', $value = '2009-08-17'); $citationDescription->addStatement('size', $value = 320); $citationDescription->addStatement('uri', $value = 'http://phpunit.org/nutshell'); $citation = new Citation('raw citation'); $citation->setAssocType(ASSOC_TYPE_ARTICLE); $citation->setAssocId(5); $citation->setEditedCitation('edited citation'); $citation->setParseScore(50); $citation->injectMetadata($citationDescription); $citationId = $this->citationDAO->insertCitation($citation); self::assertTrue(is_numeric($citationId)); self::assertTrue($citationId > 0); }
public function testCitationCrud() { $nameSchema = new NlmNameSchema(); $nameDescription = new MetadataDescription($nameSchema, ASSOC_TYPE_AUTHOR); $nameDescription->addStatement('given-names', $value = 'Peter'); $nameDescription->addStatement('given-names', $value = 'B'); $nameDescription->addStatement('surname', $value = 'Bork'); $nameDescription->addStatement('prefix', $value = 'Mr.'); $citationSchema = new NlmCitationSchema(); $citationDescription = new MetadataDescription($citationSchema, ASSOC_TYPE_CITATION); $citationDescription->addStatement('person-group[@person-group-type="author"]', $nameDescription); $citationDescription->addStatement('article-title', $value = 'PHPUnit in a nutshell', 'en_US'); $citationDescription->addStatement('article-title', $value = 'PHPUnit in Kürze', 'de_DE'); $citationDescription->addStatement('date', $value = '2009-08-17'); $citationDescription->addStatement('size', $value = 320); $citationDescription->addStatement('uri', $value = 'http://phpunit.org/nutshell'); $citation = new Citation('raw citation'); $citation->setAssocType(ASSOC_TYPE_ARTICLE); $citation->setAssocId(999999); $citation->setEditedCitation('edited citation'); $citation->setParseScore(50); $citation->injectMetadata($citationDescription); // Create citation $citationId = $this->citationDAO->insertCitation($citation); self::assertTrue(is_numeric($citationId)); self::assertTrue($citationId > 0); // Retrieve citation $citationById = $this->citationDAO->getCitation($citationId); $citationById->getMetadataFieldNames(); // Initializes internal state for comparison. self::assertEquals($citation, $citationById); $citationsByAssocIdDaoFactory = $this->citationDAO->getCitationsByAssocId(ASSOC_TYPE_ARTICLE, 999999); $citationsByAssocId = $citationsByAssocIdDaoFactory->toArray(); self::assertEquals(1, count($citationsByAssocId)); $citationsByAssocId[0]->getMetadataFieldNames(); // Initializes internal state for comparison. self::assertEquals($citation, $citationsByAssocId[0]); // Update citation $citationDescription->removeStatement('date'); $citationDescription->addStatement('article-title', $value = 'PHPUnit rápido', 'pt_BR'); $updatedCitation = new Citation('another raw citation'); $updatedCitation->setId($citationId); $updatedCitation->setAssocType(ASSOC_TYPE_ARTICLE); $updatedCitation->setAssocId(999998); $updatedCitation->setEditedCitation('another edited citation'); $updatedCitation->setParseScore(50); $updatedCitation->injectMetadata($citationDescription); $this->citationDAO->updateCitation($updatedCitation); $citationAfterUpdate = $this->citationDAO->getCitation($citationId); $citationAfterUpdate->getMetadataFieldNames(); // Initializes internal state for comparison. self::assertEquals($updatedCitation, $citationAfterUpdate); // Delete citation $this->citationDAO->deleteCitationsByAssocId(ASSOC_TYPE_ARTICLE, 999998); self::assertNull($this->citationDAO->getCitation($citationId)); }
/** * Inject the given meta-data into a new citation object. * @param $metadataDescription MetadataDescription * @return Citation */ protected function &getCitation($citationDescription) { // Instantiate the citation and inject the meta-data. import('lib.pkp.classes.citation.Citation'); $citation = new Citation('raw citation'); $citation->setAssocType($this->assocType); $citation->setAssocId($this->assocId); $citation->injectMetadata($citationDescription); return $citation; }
/** * @covers CitationDAO */ public function testCitationCrud() { $citationDao = DAORegistry::getDAO('CitationDAO'); /* @var $citationDao CitationDAO */ $nameSchemaName = 'lib.pkp.plugins.metadata.nlm30.schema.Nlm30NameSchema'; $nameDescription = new MetadataDescription($nameSchemaName, ASSOC_TYPE_AUTHOR); $nameDescription->addStatement('given-names', $value = 'Peter'); $nameDescription->addStatement('given-names', $value = 'B'); $nameDescription->addStatement('surname', $value = 'Bork'); $nameDescription->addStatement('prefix', $value = 'Mr.'); $citationSchemaName = 'lib.pkp.plugins.metadata.nlm30.schema.Nlm30CitationSchema'; $citationDescription = new MetadataDescription($citationSchemaName, ASSOC_TYPE_CITATION); $citationDescription->addStatement('person-group[@person-group-type="author"]', $nameDescription); $citationDescription->addStatement('article-title', $value = 'PHPUnit in a nutshell', 'en_US'); $citationDescription->addStatement('article-title', $value = 'PHPUnit in Kürze', 'de_DE'); $citationDescription->addStatement('date', $value = '2009-08-17'); $citationDescription->addStatement('size', $value = 320); $citationDescription->addStatement('uri', $value = 'http://phpunit.org/nutshell'); // Add a simple source description. $sourceDescription = new MetadataDescription($citationSchemaName, ASSOC_TYPE_CITATION); $sourceDescription->setDisplayName('test'); $sourceDescription->addStatement('article-title', $value = 'a simple source description', 'en_US'); $sourceDescription->setSeq(0); $citation = new Citation('raw citation'); $citation->setAssocType(ASSOC_TYPE_ARTICLE); $citation->setAssocId(999999); $citation->setSeq(50); $citation->addSourceDescription($sourceDescription); $citation->injectMetadata($citationDescription); // Create citation. $citationId = $citationDao->insertObject($citation); self::assertTrue(is_numeric($citationId)); self::assertTrue($citationId > 0); // Retrieve citation. $citationById = $citationDao->getObjectById($citationId); // Fix state differences for comparison. $citation->removeSupportedMetadataAdapter($citationSchemaName); $citationById->removeSupportedMetadataAdapter($citationSchemaName); $citationById->_extractionAdaptersLoaded = true; $citationById->_injectionAdaptersLoaded = true; $sourceDescription->setAssocId($citationId); $sourceDescription->removeSupportedMetadataAdapter($citationSchemaName); $sourceDescriptions = $citationById->getSourceDescriptions(); $sourceDescriptions['test']->getMetadataSchema(); // this will instantiate the meta-data schema internally. self::assertEquals($citation, $citationById); $citationsByAssocIdDaoFactory = $citationDao->getObjectsByAssocId(ASSOC_TYPE_ARTICLE, 999999); $citationsByAssocId = $citationsByAssocIdDaoFactory->toArray(); self::assertEquals(1, count($citationsByAssocId)); // Fix state differences for comparison. $citationsByAssocId[0]->_extractionAdaptersLoaded = true; $citationsByAssocId[0]->_injectionAdaptersLoaded = true; $citationsByAssocId[0]->removeSupportedMetadataAdapter($citationSchemaName); $sourceDescriptionsByAssocId = $citationsByAssocId[0]->getSourceDescriptions(); $sourceDescriptionsByAssocId['test']->getMetadataSchema(); // this will instantiate the meta-data schema internally. self::assertEquals($citation, $citationsByAssocId[0]); // Update citation. $citationDescription->removeStatement('date'); $citationDescription->addStatement('article-title', $value = 'PHPUnit rápido', 'pt_BR'); // Update source descriptions. $sourceDescription->addStatement('article-title', $value = 'edited source description', 'en_US', true); $updatedCitation = new Citation('another raw citation'); $updatedCitation->setId($citationId); $updatedCitation->setAssocType(ASSOC_TYPE_ARTICLE); $updatedCitation->setAssocId(999998); $updatedCitation->setSeq(50); $updatedCitation->addSourceDescription($sourceDescription); $updatedCitation->injectMetadata($citationDescription); $citationDao->updateObject($updatedCitation); $citationAfterUpdate = $citationDao->getObjectById($citationId); // Fix state differences for comparison. $updatedCitation->removeSupportedMetadataAdapter($citationSchemaName); $citationAfterUpdate->removeSupportedMetadataAdapter($citationSchemaName); $citationAfterUpdate->_extractionAdaptersLoaded = true; $citationAfterUpdate->_injectionAdaptersLoaded = true; $sourceDescriptionsAfterUpdate = $citationAfterUpdate->getSourceDescriptions(); $sourceDescriptionsAfterUpdate['test']->getMetadataSchema(); // this will instantiate the meta-data schema internally. $sourceDescription->removeSupportedMetadataAdapter($citationSchemaName); self::assertEquals($updatedCitation, $citationAfterUpdate); // Delete citation $citationDao->deleteObjectsByAssocId(ASSOC_TYPE_ARTICLE, 999998); self::assertNull($citationDao->getObjectById($citationId)); }
/** * Take an array of citation parse/lookup results and derive a citation * with one "best" set of values. * * We determine the best values within the citations that have a score above * the given threshold. Citations with a score below the threshold will be * ignored. * * For these citations we count the frequency of values per meta-data property. * The most frequent value will be chosen as "best" value. * * If two values have the same frequency then decide based on the score. If * this is still ambivalent then return the first of the remaining values. * * @param $scoredCitations * @param $scoreThreshold integer a number between 0 (=no threshold) and 100 * @return Citation one citation with the "best" values set */ function &_guessValues(&$scoredCitations, $scoreThreshold) { assert($scoreThreshold >= 0 && $scoreThreshold <= 100); // Create the target citation description. $targetDescription = new MetadataDescription('lib.pkp.plugins.metadata.nlm30.schema.Nlm30CitationSchema', ASSOC_TYPE_CITATION); // Step 1: List all values and max scores that have been identified for a given element // but only include values from results above a given scoring threshold // Initialize variables for the first step. $valuesByPropertyName = array(); $maxScoresByPropertyNameAndValue = array(); // Sort the scored citations by score with the highest score first. krsort($scoredCitations); foreach ($scoredCitations as $currentScore => $citationsForCurrentScore) { // Check whether the current score is below the threshold, if so // stop the loop. We've sorted our citations by score so the remaining // citations all have scores below the threshold and we can forget // about them. if ($currentScore < $scoreThreshold) { break; } foreach ($citationsForCurrentScore as $citationForCurrentScore) { $statements = $citationForCurrentScore->getStatements(); // Add the property values and scores of this citation // to the overall property lists foreach ($statements as $propertyName => $value) { // Initialize sub-arrays if necessary if (!isset($valuesByPropertyName[$propertyName])) { $valuesByPropertyName[$propertyName] = array(); } if (!isset($maxScoresByPropertyNameAndValue[$propertyName])) { $maxScoresByPropertyNameAndValue[$propertyName] = array(); } // Add the value for the given property, as we want to count // value frequencies later, we explicitly allow duplicates. $serializedValue = serialize($value); $valuesByPropertyName[$propertyName][] = $serializedValue; // As we have ordered our citations descending by score, the // first score found for a value is also the maximum score. if (!isset($maxScoresByPropertyNameAndValue[$propertyName][$serializedValue])) { $maxScoresByPropertyNameAndValue[$propertyName][$serializedValue] = $currentScore; } } } } // Step 2: Find out the values that occur most frequently for each element // and order these by score. foreach ($valuesByPropertyName as $propertyName => $values) { // Count the occurrences of each value within the given element $valueFrequencies = array_count_values($values); // Order the most frequent values to the beginning of the array arsort($valueFrequencies); // Get the most frequent values (may be several if there are more than one // with the same frequency). $scoresOfMostFrequentValues = array(); $previousValueFrequency = 0; foreach ($valueFrequencies as $value => $valueFrequency) { // Only extract the most frequent values, jump out of the // loop when less frequent values start. if ($previousValueFrequency > $valueFrequency) { break; } $previousValueFrequency = $valueFrequency; $scoresOfMostFrequentValues[$value] = $maxScoresByPropertyNameAndValue[$propertyName][$value]; } // Now we can order the most frequent values by score, starting // with the highest score. arsort($scoresOfMostFrequentValues); // Now get the first key which represents the value with the // highest frequency and the highest score. reset($scoresOfMostFrequentValues); $bestValue = unserialize(key($scoresOfMostFrequentValues)); // Set the found "best" element value in the result citation. $statements = array($propertyName => $bestValue); $success = $targetDescription->setStatements($statements); assert($success); } // Instantiate the target citation $targetCitation = new Citation(); $targetCitation->injectMetadata($targetDescription); return $targetCitation; }