/**
  * check for plagiarism with copyscape
  * return true if there's an issue
  */
 private static function copyCheck($t)
 {
     $threshold = 0.05;
     $result = '';
     $r = Revision::newFromTitle($t);
     if (!$r) {
         return 'No such article';
     }
     $text = Wikitext::flatten($r->getText());
     $res = copyscape_api_text_search_internet($text, 'ISO-8859-1', 2);
     if ($res['count']) {
         $words = $res['querywords'];
         foreach ($res['result'] as $r) {
             if (!preg_match("@^http://[a-z0-9]*.(wikihow|whstatic|youtube).com@i", $r['url'])) {
                 //if ($r['minwordsmatched'] / $words > $threshold) {
                 //we got one!
                 $result .= '<b>Plagiarized:</b> <a href="' . $r['url'] . '">' . $r['url'] . '</a><br />';
                 //}
             }
         }
     } else {
         $result = '';
     }
     return $result;
 }
    $params['h'] = $handle;
    return copyscape_api_call('pindexdel', $params);
}
/*
	D. Some examples of use
*/
if (@$run_examples) {
    $exampletext = 'We hold these truths to be self-evident, that all men are created equal, that they are endowed by their ' . 'Creator with certain unalienable rights, that among these are Life, Liberty, and the pursuit of Happiness. That to ' . 'secure these rights, Governments are instituted among Men, deriving their just powers from the consent of the ' . 'governed. That whenever any Form of Government becomes destructive of these ends, it is the Right of the People to ' . 'alter or to abolish it, and to institute new Government, laying its foundation on such principles and organizing ' . 'its powers in such form, as to them shall seem most likely to effect their Safety and Happiness. Prudence, indeed, ' . 'will dictate that Governments long established should not be changed for light and transient causes; and ' . 'accordingly all experience hath shown, that mankind are more disposed to suffer, while evils are sufferable, than ' . 'to right themselves by abolishing the forms to which they are accustomed. But when a long train of abuses and ' . 'usurpations, pursuing invariably the same Object evinces a design to reduce them under absolute Despotism, it is ' . 'their right, it is their duty, to throw off such Government, and to provide new Guards for their future security. ' . 'Such has been the patient sufferance of these Colonies; and such is now the necessity which constrains them to ' . 'alter their former Systems of Government. The history of the present King of Great Britain is a history of ' . 'repeated injuries and usurpations, all having in direct object the establishment of an absolute Tyranny over these ' . 'States. To prove this, let Facts be submitted to a candid world. He has refused his Assent to Laws, the most ' . 'wholesome and necessary for the public good. ' . 'We, therefore, the Representatives of the United States of America, in General Congress, Assembled, ' . 'appealing to the Supreme Judge of the world for the rectitude of our intentions, do, in the Name, and by Authority ' . 'of the good People of these Colonies, solemnly publish and declare, That these United Colonies are, and of Right ' . 'ought to be free and independent states; that they are Absolved from all Allegiance to the British Crown, and that ' . 'all political connection between them and the State of Great Britain, is and ought to be totally dissolved; and ' . 'that as Free and Independent States, they have full Power to levy War, conclude Peace, contract Alliances, ' . 'establish Commerce, and to do all other Acts and Things which Independent States may of right do. And for the ' . 'support of this Declaration, with a firm reliance on the Protection of Divine Providence, we mutually pledge to ' . 'each other our Lives, our Fortunes, and our sacred Honor.';
    my_echo_title('Response for a simple URL Internet search');
    my_print_r(copyscape_api_url_search_internet('http://www.copyscape.com/example.html'));
    my_echo_title('Response for a URL Internet search with full comparisons for the first two results');
    my_print_r(copyscape_api_url_search_internet('http://www.copyscape.com/example.html', 2));
    my_echo_title('Response for a simple text Internet search');
    my_print_r(copyscape_api_text_search_internet($exampletext, 'ISO-8859-1'));
    my_echo_title('Response for a text Internet search with full comparisons for the first two results');
    my_print_r(copyscape_api_text_search_internet($exampletext, 'ISO-8859-1', 2));
    my_echo_title('Response for a check balance request');
    my_print_r(copyscape_api_check_balance());
    my_echo_title('Response for a URL add to private index request');
    my_print_r(copyscape_api_url_add_to_private('http://www.copyscape.com/example.html'));
    my_echo_title('Response for a text add to private index request');
    $response = copyscape_api_text_add_to_private($exampletext, 'ISO-8859-1', 'Extract from Declaration of Independence', 'EXAMPLE_1234');
    my_print_r($response);
    $handle = $response['handle'];
    my_echo_title('Response for a URL private index search');
    my_print_r(copyscape_api_url_search_private('http://www.copyscape.com/example.html'));
    my_echo_title('Response for a delete from private index request');
    my_print_r(copyscape_api_delete_from_private($handle));
    my_echo_title('Response for a text search of both Internet and private index with full comparisons for the first result (of each type)');
    my_print_r(copyscape_api_text_search_internet_and_private($exampletext, 'ISO-8859-1', 1));
}
Example #3
0
 /**
  * check for plagiarism with copyscape
  * return true if there's an issue
  */
 private static function is_plagiarized($doc, $sample)
 {
     $threshold = 0.25;
     $text = file_get_contents($doc);
     $res = copyscape_api_text_search_internet($text, 'ISO-8859-1', 2);
     if ($res['count']) {
         $words = $res['querywords'];
         foreach ($res['result'] as $r) {
             if (!preg_match("@^http://[a-z0-9]*.(wikihow|whstatic|youtube).com@i", $r['url'])) {
                 if ($r['minwordsmatched'] / $words > $threshold) {
                     //we got one!
                     $is_plagiarized = true;
                     self::error_log($sample, 'Possibly plagiarized from here: ' . $r['url']);
                 }
             }
         }
     } else {
         $is_plagiarized = false;
     }
     return $is_plagiarized;
 }
 }
 /* debugging 
 		$text = preg_replace("@\{\{[^\}]*\}\}@", "", $text);
 		if (strpos($text, "{{") !== false) {
 			echo $text;
 			exit;
 		}
 		if (preg_match("@\[\[@", $text)) {
 			echo $text;
 			exit;
 		}
 		*/
 // do the search
 $copyviourl = null;
 $match = null;
 $results = copyscape_api_text_search_internet($text, 'ISO-8859-1', 2);
 $checkstoday++;
 if ($results['count']) {
     $words = $results['querywords'];
     $index = 0;
     foreach ($results['result'] as $r) {
         if (!preg_match("@^http://[a-z0-9]*.(wikihow|whstatic|youtube).com@i", $r['url'])) {
             if ($r['minwordsmatched'] / $words > $threshold) {
                 // can we find a reference to us?
                 $f = file_get_contents($r['url']);
                 if (strpos($f, $t->getFullURL()) !== false) {
                     echo "Got a reference to {$t->getFullURL()} on {$r['url']}\n";
                     continue;
                 }
                 $match = number_format($r['minwordsmatched'] / $words, 2);
                 echo "{$t->getFullURL()}\t{$r['url']}: {$words},{$r['minwordsmatched']}, {$match}\n";