/**
  * Check if the uploaded file has a similar name to an existing
  * file which would then be a candidate for a revised file.
  * @param $uploadedFile SubmissionFile
  * @param $submissionFiles array a list of submission files to
  *  check the uploaded file against.
  * @return integer the if of the possibly revised file or null
  *  if no matches were found.
  */
 function &_checkForRevision(&$uploadedFile, &$submissionFiles)
 {
     // Get the file name.
     $uploadedFileName = $uploadedFile->getOriginalFileName();
     // Start with the minimal required similarity.
     $minPercentage = SUBMISSION_MIN_SIMILARITY_OF_REVISION;
     // Find out whether one of the files belonging to the current
     // file stage matches the given file name.
     $possibleRevisedFileId = null;
     $matchedPercentage = 0;
     foreach ($submissionFiles as $submissionFile) {
         /* @var $submissionFile SubmissionFile */
         // Do not consider the uploaded file itself.
         if ($uploadedFile->getFileId() == $submissionFile->getFileId()) {
             continue;
         }
         // Do not consider files from different publication formats.
         if ($uploadedFile->getAssocType() == ASSOC_TYPE_REPRESENTATION && $submissionFile->getAssocType() == ASSOC_TYPE_REPRESENTATION && $uploadedFile->getAssocId() != $submissionFile->getAssocId()) {
             continue;
         }
         // Test whether the current submission file is similar
         // to the uploaded file. (Transliterate to ASCII -- the
         // similar_text function can't handle UTF-8.)
         import('lib.pkp.classes.core.Transcoder');
         $transcoder = new Transcoder('UTF-8', 'ASCII', true);
         similar_text($a = $transcoder->trans($uploadedFileName), $b = $transcoder->trans($submissionFile->getOriginalFileName()), $matchedPercentage);
         if ($matchedPercentage > $minPercentage && !$this->_onlyNumbersDiffer($a, $b)) {
             // We found a file that might be a possible revision.
             $possibleRevisedFileId = $submissionFile->getFileId();
             // Reset the min percentage to this comparison's precentage
             // so that only better matches will be considered from now on.
             $minPercentage = $matchedPercentage;
         }
     }
     // Return the id of the file that we found similar.
     return $possibleRevisedFileId;
 }
Ejemplo n.º 2
0
 /**
  * Normalize a string in an unknown (non-UTF8) encoding into a valid UTF-8 sequence
  * @param $str string input string
  * @return string
  */
 static function utf8_normalize($str)
 {
     import('lib.pkp.classes.core.Transcoder');
     if (String::hasMBString()) {
         // NB: CP-1252 often segfaults; we've left it out here but it will detect as 'ISO-8859-1'
         $mb_encoding_order = 'UTF-8, UTF-7, ASCII, ISO-8859-1, EUC-JP, SJIS, eucJP-win, SJIS-win, JIS, ISO-2022-JP';
         $detected_encoding = mb_detect_encoding($str, $mb_encoding_order, false);
     } elseif (function_exists('iconv') && strlen(iconv('CP1252', 'UTF-8', $str)) != strlen(iconv('ISO-8859-1', 'UTF-8', $str))) {
         // use iconv to detect CP-1252, assuming default ISO-8859-1
         $detected_encoding = 'CP1252';
     } else {
         // assume ISO-8859-1, PHP default
         $detected_encoding = 'ISO-8859-1';
     }
     // transcode CP-1252/ISO-8859-1 into HTML entities; this works because CP-1252 is mapped onto ISO-8859-1
     if ('ISO-8859-1' == $detected_encoding || 'CP1252' == $detected_encoding) {
         $trans = new Transcoder('CP1252', 'HTML-ENTITIES');
         $str = $trans->trans($str);
     }
     // transcode from detected encoding to to UTF-8
     $trans = new Transcoder($detected_encoding, 'UTF-8');
     $str = $trans->trans($str);
     return $str;
 }