if ($old_class != "") { $old_class = ""; array_push($diff_content_lines, "<tr class='headline'><th class='index'>{$c1_idx}</th><td><pre><span>{$c1_line}</span></pre></td><th class='index'>{$c2_idx}</th><td><pre><span>{$c2_line}</span></pre></td></tr>"); } else { array_push($diff_content_lines, "<tr><th class='index'>{$c1_idx}</th><td><pre><span>{$c1_line}</span></pre></td><th class='index'>{$c2_idx}</th><td><pre><span>{$c2_line}</span></pre></td></tr>"); } } } } } array_push($diff_content_lines, "</table>\n<br>"); $index++; } $url_html_str = implode("\n", $url_html_lines); $diff_content_str = implode("\n", $diff_content_lines); echo "<div id='filelist' class='content'>\n"; echo $url_html_str . "\n"; echo "</div>\n"; echo "<br>\n"; echo "<div id='diff_content'>\n"; echo $diff_content_str . "\n"; echo "</div>\n"; echo "<br>\n"; echo "<div id='filelist2' class='content'>\n"; echo $url_html_str . "\n"; echo "</div>\n"; echo "<br>\n"; } $lines = get_diff_lines($ldap, $passwd, $url, $version1, $version2); $diffObjList = get_diff($lines); output_html_result($diffObjList);
} if ($found_it == true) { array_push($temp_lines, $line); } } return $temp_lines; } save_log($ldap, $code_v0, $code_v1, $code_v2); $lines1 = get_diff_summary($ldap, $passwd, $code_v0, $code_v1, $code_v1_raw); $lines2 = get_diff_summary($ldap, $passwd, $code_v0, $code_v2, $code_v2_raw); if ($simple_view == "1") { simple_output($lines1, $lines2); } else { if ($same_update_view == "1") { $file_list = array(); foreach (array_keys($lines1) as $file) { if (array_key_exists($file, $lines2)) { array_push($file_list, $file); } } $lines = get_diff_lines($ldap, $passwd, $code_v0, $code_v1); $temp_lines = get_care_lines($lines, $file_list); $diffObjList = get_diff($temp_lines); $lines2 = get_diff_lines($ldap, $passwd, $code_v0, $code_v2); $temp_lines2 = get_care_lines($lines2, $file_list); $diffObjList2 = get_diff($temp_lines2); output_html_result($diffObjList, "1"); echo ""; output_html_result($diffObjList2, "2"); } }
/** * from https://github.com/wikigit/Duplication-Detector * * Copyright (c) 2011, Derrick Coetzee (User:Dcoetzee) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ function do_copycat($formatter, $params = array()) { global $DBInfo; $charset = $DBInfo->charset; //ini_set('display_errors',1); //error_reporting(E_ALL|E_STRICT); // Get starting time to measure time elapsed later. $time_start = microtime_float(); $source1 = 'Downloaded'; if (!empty($params['source'])) { $source1 = 'Text'; $name1 = 'source'; $filecontents1 = $params['source']; } else { if (!empty($params['url0']) && preg_match("@^https?://@", $params['url0'])) { $name1 = $params['url0']; $filecontents1 = wget($params['url0']); } else { if (isset($_FILES['file0']) && $_FILES['file0']['tmp_name']) { $source1 = 'Uploaded'; $name1 = $_FILES['file0']['name']; $filecontents1 = read_file_contents($_FILES['file0']['tmp_name']); } else { $source1 = 'Wiki'; $name1 = $formatter->page->name; $filecontents1 = $formatter->page->_get_raw_body(); } } } $source2 = 'Downloaded'; if (!empty($params['url']) && preg_match("@^https?://@", $params['url'])) { $name2 = $params['url']; $filecontents2 = wget($params['url']); } else { if (isset($_FILES['file']) && $_FILES['file']['tmp_name']) { $source2 = 'Uploaded'; $name2 = $_FILES['file']['name']; $filecontents2 = read_file_contents($_FILES['file']['tmp_name']); } else { if (!empty($params['target'])) { $source2 = 'Text'; $name2 = 'target'; $filecontents2 = $params['target']; } else { $params['.title'] = _("Copycat detector"); $formatter->send_header('', $params); $formatter->send_title('', '', $params); $comp_btn = _("Detect"); $diff_btn = _("Diff"); $raw_check = _("Raw"); $source_lab = _("Source (URL or PageName)"); $target_lab = _("Target (URL)"); $min_num_words = _("Minimum number of words"); $min_num_chars = _("Minimum number of characters"); $remove_quotes = _("Remove quotations"); $remove_number = _("Remove numbers"); $or = _("or"); echo <<<FORM <form name="comparesimple" method="post"> <input type="hidden" name="action" value="copycat" /> {$source_lab}: <input style="width: 99%" type="text" name="url0" value="{$name1}" size="120"><br/> {$or} <br /> <textarea style="width: 100%; height: 408px; box-sizing: border-box" name="source" cols="80" rows="10"> </textarea> <br/> {$target_lab}: <input style="width: 99%" type="text" name="url" size="120"><br /> {$or} <br /> <textarea style="width: 100%; height: 408px; box-sizing: border-box" name="target" cols="80" rows="10"> </textarea> <br/> {$min_num_words}: <input type="text" name="minwords" value="2"><br/> {$min_num_chars}: <input type="text" name="minchars" value="13"><br/> <input type="checkbox" name="removequotations" checked="checked"> {$remove_quotes}<br/> <input type="checkbox" name="removenumbers" checked="checked"> {$remove_number}<br/> <br/> <input type="submit" name="btn_compare" value="{$comp_btn}" /> {$or} <input type="submit" name="btn_diff" value="{$diff_btn}" /> <input type="checkbox" name="raw" /> {$raw_check} </form> FORM; $formatter->send_footer('', $params); return; } } } $shorturl1 = htmlspecialchars(shorten_url($name1)); $shorturl2 = htmlspecialchars(shorten_url($name2)); $title = "Duplicate Detector: {$shorturl1} v {$shorturl2}"; $button_diff = !empty($params['btn_diff']) ? true : false; if ($button_diff) { $diff = get_diff($filecontents1, $filecontents2, $params); if (!empty($params['raw'])) { header("Content-Type: text/plain"); echo $diff; return; } $params['.title'] = "Diff: {$shorturl1} v {$shorturl2}"; $formatter->send_header('', $params); $formatter->send_title('', '', $params); if (isset($diff[0])) { echo "<div id='wikiDiffPreview'>\n"; echo $formatter->processor_repl('diff', $diff, $params); echo "</div>\n"; } $formatter->send_footer('', $params); return; } $minwords = $params['minwords']; if (!$minwords || $minwords < 2) { $minwords = 2; } $minchars = $params['minchars']; if (!$minchars) { $minchars = 13; } $removequotations = $params['removequotations']; $removenumbers = $params['removenumbers']; $params['.title'] = $title; $formatter->send_header('', $params); $formatter->send_title('', '', $params); print '<p><b>Warning</b>: Duplication Detector may in some cases give no results or incomplete results. This does not necessarily indicate copying has not occurred. Manually examine the source document to verify.</p>'; print "<h2>Comparing documents for duplicated text:</h2>"; print '<ul>'; if (preg_match('/^https?:\\/\\//', $name1)) { print '<li><a href="' . htmlspecialchars($name1) . '">' . htmlspecialchars($name1) . '</a></li>'; } else { print '<li>' . htmlspecialchars($name1) . '</li>'; } if (preg_match('/^https?:\\/\\//', $name2)) { print '<li><a href="' . htmlspecialchars($name2) . '">' . htmlspecialchars($name2) . '</a></li>'; } else { print '<li>' . htmlspecialchars($name2) . '</li>'; } print '</ul>'; echo "<p>", "\n"; $terms1 = get_terms($name1, $source1, $filecontents1, $removenumbers, $removequotations, $charset); $terms2 = get_terms($name2, $source2, $filecontents2, $removenumbers, $removequotations, $charset); // print("terms1: " . join(',', $terms1) . "\n"); // print("terms2: " . join(',', $terms2) . "\n"); $terms1_posts = compute_posts($terms1, $minwords); $matches1 = compute_matches($terms1, $terms2, $terms1_posts, $minwords); echo "<h3>Total match candidates found: ", count($matches1), "</h3>", "\n"; echo " (before eliminating redundant matches)</p>", "\n"; echo "</p>", "\n"; usort($matches1, 'cmp_by_length_desc'); $already_matched_phrases = (array) null; $num_matches = 0; $context_words = 30; $min_context_words = 6; $max_context_words = 20; echo "<h2>Matched phrases:</h2>", "\n"; foreach ($matches1 as $value) { list($pos1, $pos2, $length, $phrase) = $value; $skip = 0; foreach ($already_matched_phrases as $already_phrase) { if (strpos($already_phrase, $phrase) !== false) { $skip = 1; break; } } $characters = mb_strlen($phrase, $charset); if (!$skip && $characters >= $minchars) { if ($length > $context_words - $min_context_words) { print "<p><ins class='diff-added'>{$phrase}</ins>" . " ({$length} words, {$characters} characters)</p>"; } else { $context_len = ($context_words - $length) / 2; if ($context_len * 2 >= $max_context_words) { $context_len = $max_context_words / 2; } $phraseprefix1 = join(' ', array_slice($terms1, max($pos1 - $context_len, 0), $pos1 - max($pos1 - $context_len, 0))); $phrasesuffix1 = join(' ', array_slice($terms1, $pos1 + $length, min(count($terms1) - ($pos1 + $length), $context_len))); $phraseprefix2 = join(' ', array_slice($terms2, max($pos2 - $context_len, 0), $pos2 - max($pos2 - $context_len, 0))); $phrasesuffix2 = join(' ', array_slice($terms2, $pos2 + $length, min(count($terms2) - ($pos2 + $length), $context_len))); print "<p>{$phraseprefix1} <ins class='diff-added'>{$phrase}</ins> {$phrasesuffix1}<br/>"; print "{$phraseprefix2} <ins class='diff-added'>{$phrase}</ins> {$phrasesuffix2}<br/>"; print "({$length} words, {$characters} characters)</p>"; } $num_matches++; } $already_matched_phrases[] = $phrase; } echo "<h3>Matching phrases found: ", $num_matches, "<h3>\n"; $time_delta = microtime_float() - $time_start; echo '<p>', 'Elapsed time:', $time_delta, '</p>'; $formatter->send_footer('', $params); return; }