/** * check for plagiarism with copyscape * return true if there's an issue */ private static function copyCheck($t) { $threshold = 0.05; $result = ''; $r = Revision::newFromTitle($t); if (!$r) { return 'No such article'; } $text = Wikitext::flatten($r->getText()); $res = copyscape_api_text_search_internet($text, 'ISO-8859-1', 2); if ($res['count']) { $words = $res['querywords']; foreach ($res['result'] as $r) { if (!preg_match("@^http://[a-z0-9]*.(wikihow|whstatic|youtube).com@i", $r['url'])) { //if ($r['minwordsmatched'] / $words > $threshold) { //we got one! $result .= '<b>Plagiarized:</b> <a href="' . $r['url'] . '">' . $r['url'] . '</a><br />'; //} } } } else { $result = ''; } return $result; }
$params['h'] = $handle; return copyscape_api_call('pindexdel', $params); } /* D. Some examples of use */ if (@$run_examples) { $exampletext = 'We hold these truths to be self-evident, that all men are created equal, that they are endowed by their ' . 'Creator with certain unalienable rights, that among these are Life, Liberty, and the pursuit of Happiness. That to ' . 'secure these rights, Governments are instituted among Men, deriving their just powers from the consent of the ' . 'governed. That whenever any Form of Government becomes destructive of these ends, it is the Right of the People to ' . 'alter or to abolish it, and to institute new Government, laying its foundation on such principles and organizing ' . 'its powers in such form, as to them shall seem most likely to effect their Safety and Happiness. Prudence, indeed, ' . 'will dictate that Governments long established should not be changed for light and transient causes; and ' . 'accordingly all experience hath shown, that mankind are more disposed to suffer, while evils are sufferable, than ' . 'to right themselves by abolishing the forms to which they are accustomed. But when a long train of abuses and ' . 'usurpations, pursuing invariably the same Object evinces a design to reduce them under absolute Despotism, it is ' . 'their right, it is their duty, to throw off such Government, and to provide new Guards for their future security. ' . 'Such has been the patient sufferance of these Colonies; and such is now the necessity which constrains them to ' . 'alter their former Systems of Government. The history of the present King of Great Britain is a history of ' . 'repeated injuries and usurpations, all having in direct object the establishment of an absolute Tyranny over these ' . 'States. To prove this, let Facts be submitted to a candid world. He has refused his Assent to Laws, the most ' . 'wholesome and necessary for the public good. ' . 'We, therefore, the Representatives of the United States of America, in General Congress, Assembled, ' . 'appealing to the Supreme Judge of the world for the rectitude of our intentions, do, in the Name, and by Authority ' . 'of the good People of these Colonies, solemnly publish and declare, That these United Colonies are, and of Right ' . 'ought to be free and independent states; that they are Absolved from all Allegiance to the British Crown, and that ' . 'all political connection between them and the State of Great Britain, is and ought to be totally dissolved; and ' . 'that as Free and Independent States, they have full Power to levy War, conclude Peace, contract Alliances, ' . 'establish Commerce, and to do all other Acts and Things which Independent States may of right do. And for the ' . 'support of this Declaration, with a firm reliance on the Protection of Divine Providence, we mutually pledge to ' . 'each other our Lives, our Fortunes, and our sacred Honor.'; my_echo_title('Response for a simple URL Internet search'); my_print_r(copyscape_api_url_search_internet('http://www.copyscape.com/example.html')); my_echo_title('Response for a URL Internet search with full comparisons for the first two results'); my_print_r(copyscape_api_url_search_internet('http://www.copyscape.com/example.html', 2)); my_echo_title('Response for a simple text Internet search'); my_print_r(copyscape_api_text_search_internet($exampletext, 'ISO-8859-1')); my_echo_title('Response for a text Internet search with full comparisons for the first two results'); my_print_r(copyscape_api_text_search_internet($exampletext, 'ISO-8859-1', 2)); my_echo_title('Response for a check balance request'); my_print_r(copyscape_api_check_balance()); my_echo_title('Response for a URL add to private index request'); my_print_r(copyscape_api_url_add_to_private('http://www.copyscape.com/example.html')); my_echo_title('Response for a text add to private index request'); $response = copyscape_api_text_add_to_private($exampletext, 'ISO-8859-1', 'Extract from Declaration of Independence', 'EXAMPLE_1234'); my_print_r($response); $handle = $response['handle']; my_echo_title('Response for a URL private index search'); my_print_r(copyscape_api_url_search_private('http://www.copyscape.com/example.html')); my_echo_title('Response for a delete from private index request'); my_print_r(copyscape_api_delete_from_private($handle)); my_echo_title('Response for a text search of both Internet and private index with full comparisons for the first result (of each type)'); my_print_r(copyscape_api_text_search_internet_and_private($exampletext, 'ISO-8859-1', 1)); }
/** * check for plagiarism with copyscape * return true if there's an issue */ private static function is_plagiarized($doc, $sample) { $threshold = 0.25; $text = file_get_contents($doc); $res = copyscape_api_text_search_internet($text, 'ISO-8859-1', 2); if ($res['count']) { $words = $res['querywords']; foreach ($res['result'] as $r) { if (!preg_match("@^http://[a-z0-9]*.(wikihow|whstatic|youtube).com@i", $r['url'])) { if ($r['minwordsmatched'] / $words > $threshold) { //we got one! $is_plagiarized = true; self::error_log($sample, 'Possibly plagiarized from here: ' . $r['url']); } } } } else { $is_plagiarized = false; } return $is_plagiarized; }
} /* debugging $text = preg_replace("@\{\{[^\}]*\}\}@", "", $text); if (strpos($text, "{{") !== false) { echo $text; exit; } if (preg_match("@\[\[@", $text)) { echo $text; exit; } */ // do the search $copyviourl = null; $match = null; $results = copyscape_api_text_search_internet($text, 'ISO-8859-1', 2); $checkstoday++; if ($results['count']) { $words = $results['querywords']; $index = 0; foreach ($results['result'] as $r) { if (!preg_match("@^http://[a-z0-9]*.(wikihow|whstatic|youtube).com@i", $r['url'])) { if ($r['minwordsmatched'] / $words > $threshold) { // can we find a reference to us? $f = file_get_contents($r['url']); if (strpos($f, $t->getFullURL()) !== false) { echo "Got a reference to {$t->getFullURL()} on {$r['url']}\n"; continue; } $match = number_format($r['minwordsmatched'] / $words, 2); echo "{$t->getFullURL()}\t{$r['url']}: {$words},{$r['minwordsmatched']}, {$match}\n";