Example #1
 public function testCitationCrud()
     $nameSchema = new NlmNameSchema();
     $nameDescription = new MetadataDescription($nameSchema, ASSOC_TYPE_AUTHOR);
     $nameDescription->addStatement('given-names', $value = 'Peter');
     $nameDescription->addStatement('given-names', $value = 'B');
     $nameDescription->addStatement('surname', $value = 'Bork');
     $nameDescription->addStatement('prefix', $value = 'Mr.');
     $citationSchema = new NlmCitationSchema();
     $citationDescription = new MetadataDescription($citationSchema, ASSOC_TYPE_CITATION);
     $citationDescription->addStatement('person-group[@person-group-type="author"]', $nameDescription);
     $citationDescription->addStatement('article-title', $value = 'PHPUnit in a nutshell', 'en_US');
     $citationDescription->addStatement('article-title', $value = 'PHPUnit in Kürze', 'de_DE');
     $citationDescription->addStatement('date', $value = '2009-08-17');
     $citationDescription->addStatement('size', $value = 320);
     $citationDescription->addStatement('uri', $value = 'http://phpunit.org/nutshell');
     $citation = new Citation('raw citation');
     $citation->setEditedCitation('edited citation');
     $citationId = $this->citationDAO->insertCitation($citation);
     self::assertTrue($citationId > 0);
Example #2
 public function testCitationCrud()
     $nameSchema = new NlmNameSchema();
     $nameDescription = new MetadataDescription($nameSchema, ASSOC_TYPE_AUTHOR);
     $nameDescription->addStatement('given-names', $value = 'Peter');
     $nameDescription->addStatement('given-names', $value = 'B');
     $nameDescription->addStatement('surname', $value = 'Bork');
     $nameDescription->addStatement('prefix', $value = 'Mr.');
     $citationSchema = new NlmCitationSchema();
     $citationDescription = new MetadataDescription($citationSchema, ASSOC_TYPE_CITATION);
     $citationDescription->addStatement('person-group[@person-group-type="author"]', $nameDescription);
     $citationDescription->addStatement('article-title', $value = 'PHPUnit in a nutshell', 'en_US');
     $citationDescription->addStatement('article-title', $value = 'PHPUnit in Kürze', 'de_DE');
     $citationDescription->addStatement('date', $value = '2009-08-17');
     $citationDescription->addStatement('size', $value = 320);
     $citationDescription->addStatement('uri', $value = 'http://phpunit.org/nutshell');
     $citation = new Citation('raw citation');
     $citation->setEditedCitation('edited citation');
     // Create citation
     $citationId = $this->citationDAO->insertCitation($citation);
     self::assertTrue($citationId > 0);
     // Retrieve citation
     $citationById = $this->citationDAO->getCitation($citationId);
     // Initializes internal state for comparison.
     self::assertEquals($citation, $citationById);
     $citationsByAssocIdDaoFactory = $this->citationDAO->getCitationsByAssocId(ASSOC_TYPE_ARTICLE, 999999);
     $citationsByAssocId = $citationsByAssocIdDaoFactory->toArray();
     self::assertEquals(1, count($citationsByAssocId));
     // Initializes internal state for comparison.
     self::assertEquals($citation, $citationsByAssocId[0]);
     // Update citation
     $citationDescription->addStatement('article-title', $value = 'PHPUnit rápido', 'pt_BR');
     $updatedCitation = new Citation('another raw citation');
     $updatedCitation->setEditedCitation('another edited citation');
     $citationAfterUpdate = $this->citationDAO->getCitation($citationId);
     // Initializes internal state for comparison.
     self::assertEquals($updatedCitation, $citationAfterUpdate);
     // Delete citation
     $this->citationDAO->deleteCitationsByAssocId(ASSOC_TYPE_ARTICLE, 999998);
  * Take an array of citation parse/lookup results and derive a citation
  * with one "best" set of values.
  * We determine the best values within the citations that have a score above
  * the given threshold. Citations with a score below the threshold will be
  * ignored.
  * For these citations we count the frequency of values per meta-data property.
  * The most frequent value will be chosen as "best" value.
  * If two values have the same frequency then decide based on the score. If
  * this is still ambivalent then return the first of the remaining values.
  * This method will also calculate the overall parsing score for the target
  * citation.
  * @param $scoredCitations
  * @param $scoreThreshold integer a number between 0 (=no threshold) and 100,
  *  default: no threshold
  * @return Citation one citation with the "best" values set
 function &_guessValues(&$scoredCitations, $scoreThreshold = 0)
     assert($scoreThreshold >= 0 && $scoreThreshold <= 100);
     // Create the target citation description.
     $metadataSchema = new NlmCitationSchema();
     $targetDescription = new MetadataDescription($metadataSchema, ASSOC_TYPE_CITATION);
     // Step 1: List all values and max scores that have been identified for a given element
     //         but only include values from results above a given scoring threshold
     // Initialize variables for the first step.
     $valuesByPropertyName = array();
     $maxScoresByPropertyNameAndValue = array();
     // Sort the scored citations by score with the highest score first.
     foreach ($scoredCitations as $currentScore => $citationsForCurrentScore) {
         // Check whether the current score is below the threshold, if so
         // stop the loop. We've sorted our citations by score so the remaining
         // citations all have scores below the threshold and we can forget
         // about them.
         if ($currentScore < $scoreThreshold) {
         foreach ($citationsForCurrentScore as $citationForCurrentScore) {
             $statements = $citationForCurrentScore->getStatements();
             // Add the property values and scores of this citation
             // to the overall property lists
             foreach ($statements as $propertyName => $value) {
                 // Initialize sub-arrays if necessary
                 if (!isset($valuesByPropertyName[$propertyName])) {
                     $valuesByPropertyName[$propertyName] = array();
                 if (!isset($maxScoresByPropertyNameAndValue[$propertyName])) {
                     $maxScoresByPropertyNameAndValue[$propertyName] = array();
                 // Add the value for the given property, as we want to count
                 // value frequencies later, we explicitly allow duplicates.
                 $valuesByPropertyName[$propertyName][] = serialize($value);
                 // As we have ordered our citations descending by score, the
                 // first score found for a value is also the maximum score.
                 if (!isset($maxScoresByPropertyNameAndValue[$propertyName][serialize($value)])) {
                     $maxScoresByPropertyNameAndValue[$propertyName][serialize($value)] = $currentScore;
     // Step 2: Find out the values that were occur most frequently for each element
     //         and order these by score.
     foreach ($valuesByPropertyName as $propertyName => $values) {
         // Count the occurrences of each value within the given element
         $valueFrequencies = array_count_values($values);
         // Order the most frequent values to the beginning of the array
         // Get the most frequent values (may be several if there are more than one
         // with the same frequency).
         $scoresOfMostFrequentValues = array();
         $previousValueFrequency = 0;
         foreach ($valueFrequencies as $value => $valueFrequency) {
             // Only extract the most frequent values, jump out of the
             // loop when less frequent values start.
             if ($previousValueFrequency > $valueFrequency) {
             $previousValueFrequency = $valueFrequency;
             $scoresOfMostFrequentValues[$value] = $maxScoresByPropertyNameAndValue[$propertyName][$value];
         // Now we can order the most frequent values by score, starting
         // with the highest score.
         // Now get the first key which represents the value with the
         // highest frequency and the highest score.
         $bestValue = unserialize(key($scoresOfMostFrequentValues));
         // Set the found "best" element value in the result citation.
         $statements = array($propertyName => $bestValue);
         $success = $targetDescription->setStatements($statements);
     // Calculate the average of all scores
     $overallScoreSum = 0;
     $overallScoreCount = 0;
     foreach ($scoredCitations as $currentScore => $citationsForCurrentScore) {
         $countCitationsForCurrentScore = count($citationsForCurrentScore);
         $overallScoreSum += $countCitationsForCurrentScore * $currentScore;
         $overallScoreCount += $countCitationsForCurrentScore;
     $averageScore = $overallScoreSum / $overallScoreCount;
     // Get the max score (= the first key from scoredCitations
     // as these are sorted by score).
     $maxScore = key($scoredCitations);
     // Calculate the overall parse score as by weighing
     // the max score and the average score 50% each.
     // FIXME: This algorithm seems a bit arbitrary.
     $parseScore = ($maxScore + $averageScore) / 2;
     // Instantiate the target citation
     $targetCitation = new Citation();
     return $targetCitation;