diff() static public method

The calculation is optimized to identify the common largest substring. The return value is an array of the following format: array( array( diff-type => substring ), array(...) ) whereby diff-type can be one of: -1 = deletion 0 = common substring 1 = addition
static public diff ( $originalString, $editedString ) : array
$originalString string
$editedString string
return array
 /**
  * @covers PKPString::diff
  */
 public function testDiff()
 {
     // Test two strings that have common substrings.
     $originalString = 'The original string.';
     $editedString = 'The edited original.';
     $expectedDiff = array(array(0 => 'The'), array(1 => ' edited'), array(0 => ' original'), array(-1 => ' string'), array(0 => '.'));
     $resultDiff = PKPString::diff($originalString, $editedString);
     self::assertEquals($expectedDiff, $resultDiff);
     // Test two completely different strings.
     $originalString = 'abc';
     $editedString = 'def';
     $expectedDiff = array(array(-1 => 'abc'), array(1 => 'def'));
     $resultDiff = PKPString::diff($originalString, $editedString);
     self::assertEquals($expectedDiff, $resultDiff);
     // A more realistic example from the citation editor use case
     $originalString = 'Willinsky, B. (2006). The access principle: The case for open acces to research and scholarship. Cambridge, MA: MIT Press.';
     $editedString = 'Willinsky, J. (2006). The access principle: The case for open access to research and scholarship. Cambridge, MA: MIT Press.';
     $expectedDiff = array(array(0 => 'Willinsky, '), array(-1 => 'B'), array(1 => 'J'), array(0 => '. (2006). The access principle: The case for open acce'), array(1 => 's'), array(0 => 's to research and scholarship. Cambridge, MA: MIT Press.'));
     $resultDiff = PKPString::diff($originalString, $editedString);
     self::assertEquals($expectedDiff, $resultDiff);
 }
 /**
  * Derive a confidence score calculated as the similarity of the
  * original raw citation and the citation text generated from the
  * citation description.
  * @param $metadataDescription MetadataDescription
  * @return integer filter confidence score
  */
 function _filterConfidenceScore(&$metadataDescription)
 {
     // Retrieve the original plain text citation.
     $originalCitation = $this->getOriginalRawCitation();
     // Generate the formatted citation output from the description.
     $citationOutputFilter =& $this->getCitationOutputFilter();
     $generatedCitation = $citationOutputFilter->execute($metadataDescription);
     // Strip formatting and the Google Scholar tag so that we get a plain
     // text string that is comparable with the raw citation.
     $generatedCitation = trim(str_replace(GOOGLE_SCHOLAR_TAG, '', strip_tags($generatedCitation)));
     // Compare the original to the generated citation.
     $citationDiff = PKPString::diff($originalCitation, $generatedCitation);
     // Calculate similarity as the number of deleted characters in relation to the
     // number of characters in the original citation. This intentionally excludes
     // additions as these can represent useful data like a DOI or an external link.
     $deletedCharacters = 0;
     foreach ($citationDiff as $diffPart) {
         // Identify deletions.
         if (key($diffPart) == -1) {
             $deletedCharacters += PKPString::strlen(current($diffPart));
         }
     }
     $originalCharacters = PKPString::strlen($originalCitation);
     $partOfCommonCharacters = ($originalCharacters - $deletedCharacters) / $originalCharacters;
     $filterConfidenceScore = (int) round(min($partOfCommonCharacters * 100, 100));
     return $filterConfidenceScore;
 }