Пример #1
0
 function post_index()
 {
     $sblam = $this->getSblam();
     $bayes = new SblamTestBayes(array(), $this->services);
     $spamverts = new SblamTestSpamvertises(array(), $this->services);
     $isspam = NULL;
     if (!empty($_POST['ham'])) {
         $isspam = false;
     } else {
         if (!empty($_POST['spam'])) {
             $isspam = true;
         }
     }
     $addtext = empty($_POST['nowords']);
     $linkstoadd = array();
     $spamvertresult = NULL;
     $bayesresult = NULL;
     if (!empty($_POST['stuff']) && NULL !== $isspam) {
         if ($addtext) {
             $bayes->addText($_POST['stuff'], $isspam, (int) $_POST['howmuch']);
         }
         if (preg_match_all('@(?:https?://|www\\.)([a-z0-9.-]+\\.[a-z]{2,4}(?:/[^\\s]{1,15})?)@', $_POST['stuff'], $links)) {
             foreach ($links[0] as &$l) {
                 if (!preg_match('@^https?://@', $l)) {
                     $l = 'http://' . $l;
                 }
             }
             $spamverts->addURIs($links[0], $isspam, (int) $_POST['howmuch']);
             $spamvertresult = $spamverts->testURIs($links[0]);
         } else {
             d("no links found");
         }
     }
     if (isset($_POST['stuff']) && $addtext) {
         $bayesresult = $bayes->testText($_POST['stuff']);
     } else {
         $bayesresult = NULL;
     }
     return array('title' => 'Added to bayes base', 'isspam' => $isspam, 'addtext' => $addtext, 'linksadded' => $linkstoadd, 'spamvertresult' => $spamvertresult, 'bayesresult' => $bayesresult);
 }
Пример #2
0
 function post_index($max = 12500, $batchsize = 300)
 {
     if ($this->is_active()) {
         die("Another process is active!");
     }
     $this->ping();
     ignore_user_abort(true);
     $batchsize = max(5, intval($batchsize));
     $pdo = $this->services->getDB();
     $pdo->exec("truncate bayeswordsh_s");
     $pdo->exec("truncate linkswordsh_s");
     $base = new SblamBase($pdo);
     $bayes = new SblamTestBayes(array(), $this->services);
     $done = 0;
     $failures = 0;
     $wait = 0;
     $maxspam = 600;
     while ($max--) {
         $this->ping();
         $sort = rand() & 64 ? 'DESC' : '';
         $doneinbatch = 0;
         foreach ($pdo->query("/*maxtime20*/SELECT id FROM posts_meta\n                WHERE (added IS NULL OR added = 0) AND (manualspam IS NOT NULL OR (abs(spamscore)>20 AND spamcert > 90)) AND spamscore < {$maxspam}\n                ORDER BY id {$sort} LIMIT {$batchsize}")->fetchAll(PDO::FETCH_ASSOC) as $res) {
             $doneinbatch++;
             $post = $base->getPostById($res['id']);
             if (!$post) {
                 $failures++;
                 warn($res['id'], "Can't find post");
                 continue;
             }
             $poststarttime = microtime(true);
             $this->ping();
             list($spamscore, $spamcert) = $post->getSpamScore();
             $howmuch = 1;
             if (!$post->bayesadded && ($post->manualspam !== NULL || abs($spamscore) > 0.9)) {
                 $isspam = $post->manualspam !== NULL ? $post->manualspam : ($spamscore > 0 ? 1 : 0);
                 if ($post->manualspam !== NULL && $post->manualspam == 0 || $spamscore < -2.5) {
                     $howmuch = 3;
                 }
                 if (!$bayes->addPost($post, $isspam, $howmuch)) {
                     $failures++;
                     warn("Failed to add post " . $post->getPostId());
                     continue;
                 }
                 if (!$pdo->exec("/*maxtime15*/UPDATE posts_meta set added=1{$howmuch}\n                                                    WHERE (added=0 or added is null) and id= '" . addslashes($post->getPostId()) . "'")) {
                     warn($post->getPostId(), "update of post failed");
                     break;
                 }
             }
             $done++;
             $postchecktime = microtime(true) - $poststarttime;
             $load = sys_getloadavg();
             $load = max($load[0] - 0.4, $load[1] / 2, $load[2] / 3, 0);
             if ($load < 1) {
                 $load /= 3;
             } elseif ($load >= 2.2) {
                 $load *= 3;
             }
             $load = max($load, $postchecktime);
             $wait += $load;
             $this->ping();
             echo "#{$done}; {$failures} fail; id{$res['id']}; score {$spamscore} * {$howmuch}; load {$load}; wait " . round(0.1 * $wait, 1) . "\n<br>";
             flush();
             usleep(100000 * $load);
         }
         if (!$doneinbatch) {
             sleep(5);
             $maxspam += 40 + $maxspam / 10;
             if ($maxspam > 1500) {
                 break;
             }
         } else {
             if ($maxspam > 400) {
                 $maxspam -= 10;
             }
         }
     }
     $this->ended();
     return array('done' => $done, 'failed' => $failures, 'waited' => round(0.1 * $wait), 'waitperpost' => round(0.1 * $wait / $done, 2));
 }