Author: Sebastian Bergmann (sb@sebastian-bergmann.de)
Inheritance: implements Countable, implements Iterator
Example #1
0
 /**
  * Copy & Paste Detection (CPD).
  *
  * @param  string       $file
  * @param  integer      $minLines
  * @param  integer      $minTokens
  * @param  CodeCloneMap $result
  * @param  boolean      $fuzzy
  * @author Johann-Peter Hartmann <*****@*****.**>
  */
 public function processFile($file, $minLines, $minTokens, CodeCloneMap $result, $fuzzy = false)
 {
     $buffer = file_get_contents($file);
     $currentTokenPositions = array();
     $currentTokenRealPositions = array();
     $currentSignature = '';
     $tokens = token_get_all($buffer);
     $tokenNr = 0;
     $lastTokenLine = 0;
     $result->setNumLines($result->getNumLines() + substr_count($buffer, "\n"));
     unset($buffer);
     foreach (array_keys($tokens) as $key) {
         $token = $tokens[$key];
         if (is_array($token)) {
             if (!isset($this->tokensIgnoreList[$token[0]])) {
                 if ($tokenNr == 0) {
                     $currentTokenPositions[$tokenNr] = $token[2] - $lastTokenLine;
                 } else {
                     $currentTokenPositions[$tokenNr] = $currentTokenPositions[$tokenNr - 1] + $token[2] - $lastTokenLine;
                 }
                 $currentTokenRealPositions[$tokenNr++] = $token[2];
                 if ($fuzzy && $token[0] == T_VARIABLE) {
                     $token[1] = 'variable';
                 }
                 $currentSignature .= chr($token[0] & 255) . pack('N*', crc32($token[1]));
             }
             $lastTokenLine = $token[2];
         }
     }
     $count = count($currentTokenPositions);
     $firstLine = 0;
     $firstRealLine = 0;
     $found = false;
     $tokenNr = 0;
     while ($tokenNr <= $count - $minTokens) {
         $line = $currentTokenPositions[$tokenNr];
         $realLine = $currentTokenRealPositions[$tokenNr];
         $hash = substr(md5(substr($currentSignature, $tokenNr * 5, $minTokens * 5), true), 0, 8);
         if (isset($this->hashes[$hash])) {
             $found = true;
             if ($firstLine === 0) {
                 $firstLine = $line;
                 $firstRealLine = $realLine;
                 $firstHash = $hash;
                 $firstToken = $tokenNr;
             }
         } else {
             if ($found) {
                 $fileA = $this->hashes[$firstHash][0];
                 $firstLineA = $this->hashes[$firstHash][1];
                 $lastToken = $tokenNr - 1 + $minTokens - 1;
                 $lastLine = $currentTokenPositions[$lastToken];
                 $lastRealLine = $currentTokenRealPositions[$lastToken];
                 $numLines = $lastLine + 1 - $firstLine;
                 $realNumLines = $lastRealLine + 1 - $firstRealLine;
                 if ($numLines >= $minLines && ($fileA != $file || $firstLineA != $firstRealLine)) {
                     $result->addClone(new CodeClone(new CodeCloneFile($fileA, $firstLineA), new CodeCloneFile($file, $firstRealLine), $realNumLines, $lastToken + 1 - $firstToken));
                 }
                 $found = false;
                 $firstLine = 0;
             }
             $this->hashes[$hash] = array($file, $realLine);
         }
         $tokenNr++;
     }
     if ($found) {
         $fileA = $this->hashes[$firstHash][0];
         $firstLineA = $this->hashes[$firstHash][1];
         $lastToken = $tokenNr - 1 + $minTokens - 1;
         $lastLine = $currentTokenPositions[$lastToken];
         $lastRealLine = $currentTokenRealPositions[$lastToken];
         $numLines = $lastLine + 1 - $firstLine;
         $realNumLines = $lastRealLine + 1 - $firstRealLine;
         if ($numLines >= $minLines && ($fileA != $file || $firstLineA != $firstRealLine)) {
             $result->addClone(new CodeClone(new CodeCloneFile($fileA, $firstLineA), new CodeCloneFile($file, $firstRealLine), $realNumLines, $lastToken + 1 - $firstToken));
         }
         $found = false;
     }
 }
Example #2
0
 /**
  * Copy & Paste Detection (CPD).
  *
  * @param  string       $file
  * @param  integer      $minLines
  * @param  integer      $minTokens
  * @param  CodeCloneMap $result
  * @author Johann-Peter Hartmann <*****@*****.**>
  */
 public function processFile($file, $minLines, $minTokens, CodeCloneMap $result)
 {
     $buffer = file_get_contents($file);
     $currentTokenPositions = array();
     $currentSignature = '';
     $tokens = token_get_all($buffer);
     $tokenNr = 0;
     $line = 1;
     $result->setNumLines($result->getNumLines() + substr_count($buffer, "\n"));
     unset($buffer);
     foreach (array_keys($tokens) as $key) {
         $token = $tokens[$key];
         if (is_string($token)) {
             $line += substr_count($token, "\n");
         } else {
             if (!isset($this->tokensIgnoreList[$token[0]])) {
                 $currentTokenPositions[$tokenNr++] = $line;
                 $currentSignature .= chr($token[0] & 255) . pack('N*', crc32($token[1]));
             }
             $line += substr_count($token[1], "\n");
         }
     }
     $count = count($currentTokenPositions);
     $firstLine = 0;
     $found = FALSE;
     $tokenNr = 0;
     if ($count > 0) {
         do {
             $line = $currentTokenPositions[$tokenNr];
             $hash = substr(md5(substr($currentSignature, $tokenNr * 5, $minTokens * 5), TRUE), 0, 8);
             if (isset($this->hashes[$hash])) {
                 $found = TRUE;
                 if ($firstLine === 0) {
                     $firstLine = $line;
                     $firstHash = $hash;
                     $firstToken = $tokenNr;
                 }
             } else {
                 if ($found) {
                     $fileA = $this->hashes[$firstHash][0];
                     $firstLineA = $this->hashes[$firstHash][1];
                     if ($line + 1 - $firstLine > $minLines && ($fileA != $file || $firstLineA != $firstLine)) {
                         $result->addClone(new CodeClone($fileA, $firstLineA, $file, $firstLine, $line + 1 - $firstLine, $tokenNr + 1 - $firstToken));
                     }
                     $found = FALSE;
                     $firstLine = 0;
                 }
                 $this->hashes[$hash] = array($file, $line);
             }
             $tokenNr++;
         } while ($tokenNr <= count($currentTokenPositions) - 1);
     }
     if ($found) {
         $fileA = $this->hashes[$firstHash][0];
         $firstLineA = $this->hashes[$firstHash][1];
         if ($line + 1 - $firstLine > $minLines && ($fileA != $file || $firstLineA != $firstLine)) {
             $result->addClone(new CodeClone($fileA, $firstLineA, $file, $firstLine, $line + 1 - $firstLine, $tokenNr + 1 - $firstToken));
         }
         $found = FALSE;
     }
 }