/** * Copy & Paste Detection (CPD). * * @param string $file * @param integer $minLines * @param integer $minTokens * @param CodeCloneMap $result * @param boolean $fuzzy * @author Johann-Peter Hartmann <*****@*****.**> */ public function processFile($file, $minLines, $minTokens, CodeCloneMap $result, $fuzzy = false) { $buffer = file_get_contents($file); $currentTokenPositions = array(); $currentTokenRealPositions = array(); $currentSignature = ''; $tokens = token_get_all($buffer); $tokenNr = 0; $lastTokenLine = 0; $result->setNumLines($result->getNumLines() + substr_count($buffer, "\n")); unset($buffer); foreach (array_keys($tokens) as $key) { $token = $tokens[$key]; if (is_array($token)) { if (!isset($this->tokensIgnoreList[$token[0]])) { if ($tokenNr == 0) { $currentTokenPositions[$tokenNr] = $token[2] - $lastTokenLine; } else { $currentTokenPositions[$tokenNr] = $currentTokenPositions[$tokenNr - 1] + $token[2] - $lastTokenLine; } $currentTokenRealPositions[$tokenNr++] = $token[2]; if ($fuzzy && $token[0] == T_VARIABLE) { $token[1] = 'variable'; } $currentSignature .= chr($token[0] & 255) . pack('N*', crc32($token[1])); } $lastTokenLine = $token[2]; } } $count = count($currentTokenPositions); $firstLine = 0; $firstRealLine = 0; $found = false; $tokenNr = 0; while ($tokenNr <= $count - $minTokens) { $line = $currentTokenPositions[$tokenNr]; $realLine = $currentTokenRealPositions[$tokenNr]; $hash = substr(md5(substr($currentSignature, $tokenNr * 5, $minTokens * 5), true), 0, 8); if (isset($this->hashes[$hash])) { $found = true; if ($firstLine === 0) { $firstLine = $line; $firstRealLine = $realLine; $firstHash = $hash; $firstToken = $tokenNr; } } else { if ($found) { $fileA = $this->hashes[$firstHash][0]; $firstLineA = $this->hashes[$firstHash][1]; $lastToken = $tokenNr - 1 + $minTokens - 1; $lastLine = $currentTokenPositions[$lastToken]; $lastRealLine = $currentTokenRealPositions[$lastToken]; $numLines = $lastLine + 1 - $firstLine; $realNumLines = $lastRealLine + 1 - $firstRealLine; if ($numLines >= $minLines && ($fileA != $file || $firstLineA != $firstRealLine)) { $result->addClone(new CodeClone(new CodeCloneFile($fileA, $firstLineA), new CodeCloneFile($file, $firstRealLine), $realNumLines, $lastToken + 1 - $firstToken)); } $found = false; $firstLine = 0; } $this->hashes[$hash] = array($file, $realLine); } $tokenNr++; } if ($found) { $fileA = $this->hashes[$firstHash][0]; $firstLineA = $this->hashes[$firstHash][1]; $lastToken = $tokenNr - 1 + $minTokens - 1; $lastLine = $currentTokenPositions[$lastToken]; $lastRealLine = $currentTokenRealPositions[$lastToken]; $numLines = $lastLine + 1 - $firstLine; $realNumLines = $lastRealLine + 1 - $firstRealLine; if ($numLines >= $minLines && ($fileA != $file || $firstLineA != $firstRealLine)) { $result->addClone(new CodeClone(new CodeCloneFile($fileA, $firstLineA), new CodeCloneFile($file, $firstRealLine), $realNumLines, $lastToken + 1 - $firstToken)); } $found = false; } }
/** * Copy & Paste Detection (CPD). * * @param string $file * @param integer $minLines * @param integer $minTokens * @param CodeCloneMap $result * @author Johann-Peter Hartmann <*****@*****.**> */ public function processFile($file, $minLines, $minTokens, CodeCloneMap $result) { $buffer = file_get_contents($file); $currentTokenPositions = array(); $currentSignature = ''; $tokens = token_get_all($buffer); $tokenNr = 0; $line = 1; $result->setNumLines($result->getNumLines() + substr_count($buffer, "\n")); unset($buffer); foreach (array_keys($tokens) as $key) { $token = $tokens[$key]; if (is_string($token)) { $line += substr_count($token, "\n"); } else { if (!isset($this->tokensIgnoreList[$token[0]])) { $currentTokenPositions[$tokenNr++] = $line; $currentSignature .= chr($token[0] & 255) . pack('N*', crc32($token[1])); } $line += substr_count($token[1], "\n"); } } $count = count($currentTokenPositions); $firstLine = 0; $found = FALSE; $tokenNr = 0; if ($count > 0) { do { $line = $currentTokenPositions[$tokenNr]; $hash = substr(md5(substr($currentSignature, $tokenNr * 5, $minTokens * 5), TRUE), 0, 8); if (isset($this->hashes[$hash])) { $found = TRUE; if ($firstLine === 0) { $firstLine = $line; $firstHash = $hash; $firstToken = $tokenNr; } } else { if ($found) { $fileA = $this->hashes[$firstHash][0]; $firstLineA = $this->hashes[$firstHash][1]; if ($line + 1 - $firstLine > $minLines && ($fileA != $file || $firstLineA != $firstLine)) { $result->addClone(new CodeClone($fileA, $firstLineA, $file, $firstLine, $line + 1 - $firstLine, $tokenNr + 1 - $firstToken)); } $found = FALSE; $firstLine = 0; } $this->hashes[$hash] = array($file, $line); } $tokenNr++; } while ($tokenNr <= count($currentTokenPositions) - 1); } if ($found) { $fileA = $this->hashes[$firstHash][0]; $firstLineA = $this->hashes[$firstHash][1]; if ($line + 1 - $firstLine > $minLines && ($fileA != $file || $firstLineA != $firstLine)) { $result->addClone(new CodeClone($fileA, $firstLineA, $file, $firstLine, $line + 1 - $firstLine, $tokenNr + 1 - $firstToken)); } $found = FALSE; } }