コード例 #1
0
 public function fetchTestBinaries($groupname, $numarticles, $clearexistingbins)
 {
     $db = new DB();
     $nntp = new Nntp();
     $binaries = new Binaries();
     $groups = new Groups();
     $ret = array();
     if ($clearexistingbins == true) {
         $db->exec('truncate releaseregextesting');
     }
     $nntp->doConnect();
     $groupsToFetch = array();
     if (preg_match('/^[a-z]{2,3}(\\.[a-z0-9\\-]+)+$/', $groupname)) {
         $groupsToFetch[] = array('name' => $groupname);
     } elseif ($groupname === 0) {
         $groupsToFetch = $groups->getAll();
     } else {
         $newsgroups = $nntp->getGroups();
         foreach ($newsgroups as $ngroup) {
             if (preg_match('/' . $groupname . '/', $ngroup['group'])) {
                 $groupsToFetch[] = array('name' => $ngroup['group']);
             }
         }
     }
     foreach ($groupsToFetch as $groupArr) {
         $group = $groupArr['name'];
         $data = $nntp->selectGroup($group);
         if ($nntp->isError($data)) {
             $ret[] = "Could not select group (doesnt exist on USP): {$group}";
             continue;
         } else {
             $rangeStart = $data['last'] - $numarticles;
             $rangeEnd = $groupEnd = $data['last'];
             $rangeTotal = $rangeEnd - $rangeStart;
             $done = false;
             while ($done === false) {
                 if ($rangeTotal > $binaries->messagebuffer) {
                     if ($rangeStart + $binaries->messagebuffer > $groupEnd) {
                         $rangeEnd = $groupEnd;
                     } else {
                         $rangeEnd = $rangeStart + $binaries->messagebuffer;
                     }
                 }
                 if ($binaries->compressedHeaders) {
                     $msgs = $nntp->getXOverview($rangeStart . "-" . $rangeEnd, true, false);
                 } else {
                     $msgs = $nntp->getOverview($rangeStart . "-" . $rangeEnd, true, false);
                 }
                 if ($nntp->isError($msgs)) {
                     $ret[] = "Error {$msgs->code}: {$msgs->message} on " . $group;
                     continue 2;
                 }
                 $headers = array();
                 if (is_array($msgs)) {
                     //loop headers, figure out parts
                     foreach ($msgs as $msg) {
                         if (!isset($msg['Number'])) {
                             continue;
                         }
                         $msgPart = $msgTotalParts = 0;
                         $pattern = '|\\((\\d+)[\\/](\\d+)\\)|i';
                         preg_match_all($pattern, $msg['Subject'], $matches, PREG_PATTERN_ORDER);
                         $matchcnt = sizeof($matches[0]);
                         for ($i = 0; $i < $matchcnt; $i++) {
                             //not (int)'d here because of the preg_replace later on
                             $msgPart = $matches[1][$i];
                             $msgTotalParts = $matches[2][$i];
                         }
                         if (!isset($msg['Subject']) || $matchcnt == 0) {
                             // not a binary post most likely.. continue
                             continue;
                         }
                         if ((int) $msgPart > 0 && (int) $msgTotalParts > 0) {
                             $subject = utf8_encode(trim(preg_replace('|\\(' . $msgPart . '[\\/]' . $msgTotalParts . '\\)|i', '', $msg['Subject'])));
                             if (!isset($headers[$subject])) {
                                 $headers[$subject]['Subject'] = $subject;
                                 $headers[$subject]['From'] = $msg['From'];
                                 $headers[$subject]['Date'] = strtotime($msg['Date']);
                                 $headers[$subject]['Message-ID'] = $msg['Message-ID'];
                                 $headers[$subject]['Size'] = $msg['Bytes'];
                             } else {
                                 $headers[$subject]['Size'] += $msg['Bytes'];
                             }
                         }
                     }
                     unset($msgs);
                     if (isset($headers) && count($headers)) {
                         $groupRegexes = $this->getForGroup($group);
                         $binSetData = array();
                         foreach ($headers as $subject => $data) {
                             $binData = array('name' => $subject, 'fromname' => $data['From'], 'date' => $data['Date'], 'binaryhash' => md5($subject . $data['From'] . $group), 'groupname' => $group, 'regexID' => "null", 'categoryID' => "null", 'reqID' => "null", 'blacklistID' => 0, 'size' => $data['Size'], 'relname' => "null", 'relpart' => "null", 'reltotalpart' => "null");
                             //Filter binaries based on black/white list
                             if ($binaries->isBlackListed($data, $group)) {
                                 //binary is blacklisted
                                 $binData['blacklistID'] = 1;
                             }
                             //Apply Regexes
                             $regexMatches = array();
                             foreach ($groupRegexes as $groupRegex) {
                                 $regexCheck = $this->performMatch($groupRegex, $subject, $data['From']);
                                 if ($regexCheck !== false) {
                                     $regexMatches = $regexCheck;
                                     $binData['regexID'] = $regexCheck['regexID'];
                                     $binData['categoryID'] = $regexCheck['regcatid'];
                                     $binData['reqID'] = empty($regexCheck['reqID']) ? "null" : $regexCheck['reqID'];
                                     $binData['relname'] = $regexCheck['name'];
                                     break;
                                 }
                             }
                             $binSetData[] = $binData;
                         }
                         //insert 500 bins at a time
                         $binChunks = array_chunk($binSetData, 500);
                         foreach ($binChunks as $binChunk) {
                             foreach ($binChunk as $chunk) {
                                 $binParams[] = sprintf("(%s, %s, FROM_UNIXTIME(%s), %s, %s, %s, %s, %s, %d, %d, now())", $db->escapeString($chunk['name']), $db->escapeString($chunk['fromname']), $db->escapeString($chunk['date']), $db->escapeString($chunk['binaryhash']), $db->escapeString($chunk['groupname']), $chunk['regexID'], $chunk['categoryID'], $chunk['reqID'], $chunk['blacklistID'], $chunk['size']);
                             }
                             $binSql = "INSERT IGNORE INTO releaseregextesting (name, fromname, date, binaryhash, groupname, regexID, categoryID, reqID, blacklistID, size, dateadded) VALUES " . implode(', ', $binParams);
                             //echo $binSql;
                             $db->exec($binSql);
                         }
                         $ret[] = "Fetched " . number_format($numarticles) . " articles from " . $group;
                     } else {
                         $ret[] = "No headers found on " . $group;
                         continue;
                     }
                 } else {
                     $ret[] = "Can't get parts from server (msgs not array) on " . $group;
                     continue;
                 }
                 if ($rangeEnd == $groupEnd) {
                     $done = true;
                 }
                 $rangeStart = $rangeEnd + 1;
             }
         }
     }
     $nntp->doQuit();
     return $ret;
 }
コード例 #2
0
 /**
  * Process headers and store in database for a group.
  */
 function updateGroup($nntp = null, $groupArr)
 {
     $blnDoDisconnect = false;
     if ($nntp == null) {
         $nntp = new Nntp();
         if (!$nntp->doConnect()) {
             echo "Failed to get NNTP connection.";
             return;
         }
         $this->message = array();
         $blnDoDisconnect = true;
     }
     $db = new DB();
     $backfill = new Backfill();
     $n = $this->n;
     $this->startGroup = microtime(true);
     $this->startLoop = microtime(true);
     echo 'Processing ' . $groupArr['name'] . $n;
     // Connect to server
     $data = $nntp->selectGroup($groupArr['name']);
     if ($nntp->isError($data)) {
         echo "Could not select group (bad name?): {$groupArr['name']}{$n} {$n}";
         return;
     }
     if ($groupArr['regexmatchonly'] == 1) {
         $this->onlyProcessRegexBinaries = true;
         echo "Note: Discarding parts that do not match a regex" . $n;
     } else {
         $this->onlyProcessRegexBinaries = false;
     }
     //Attempt to repair any missing parts before grabbing new ones
     $this->partRepair($nntp, $groupArr);
     //Get first and last part numbers from newsgroup
     $last = $grouplast = $data['last'];
     // For new newsgroups - determine here how far you want to go back.
     if ($groupArr['last_record'] == 0) {
         if ($this->NewGroupScanByDays) {
             $first = $backfill->daytopost($nntp, $groupArr['name'], $this->NewGroupDaysToScan, true);
             if ($first == '') {
                 echo "Skipping group: {$groupArr['name']}{$n}";
                 return;
             }
         } else {
             if ($data['first'] > $data['last'] - $this->NewGroupMsgsToScan) {
                 $first = $data['first'];
             } else {
                 $first = $data['last'] - $this->NewGroupMsgsToScan;
             }
         }
         $first_record_postdate = $backfill->postdate($nntp, $first, false);
         if ($first_record_postdate != "") {
             $db->exec(sprintf("update groups SET first_record = %s, first_record_postdate = FROM_UNIXTIME(" . $first_record_postdate . ") WHERE ID = %d", $db->escapeString($first), $groupArr['ID']));
         }
     } else {
         if ($data['last'] < $groupArr['last_record']) {
             echo "Warning: Server's last num {$data['last']} is lower than the local last num {$groupArr['last_record']}" . $n;
             return;
         }
         $first = $groupArr['last_record'] + 1;
     }
     // Generate postdates for first and last records, for those that upgraded
     if ((is_null($groupArr['first_record_postdate']) || is_null($groupArr['last_record_postdate'])) && ($groupArr['last_record'] != "0" && $groupArr['first_record'] != "0")) {
         $db->exec(sprintf("update groups SET first_record_postdate = FROM_UNIXTIME(" . $backfill->postdate($nntp, $groupArr['first_record'], false) . "), last_record_postdate = FROM_UNIXTIME(" . $backfill->postdate($nntp, $groupArr['last_record'], false) . ") WHERE ID = %d", $groupArr['ID']));
     }
     // Deactivate empty groups
     if ($data['last'] - $data['first'] <= 5) {
         $db->exec(sprintf("update groups SET active = %s, last_updated = now() WHERE ID = %d", $db->escapeString('0'), $groupArr['ID']));
     }
     // Calculate total number of parts
     $total = $grouplast - $first + 1;
     // If total is bigger than 0 it means we have new parts in the newsgroup
     if ($total > 0) {
         echo "Group " . $data["group"] . " has " . number_format($total) . " new parts." . $n;
         if ($total > $this->MaxMsgsPerRun) {
             $grouplast = $first + $this->MaxMsgsPerRun;
             echo "NOTICE: Only processing first " . number_format($this->MaxMsgsPerRun) . " parts." . $n;
         }
         echo "First: " . $data['first'] . " Last: " . $data['last'] . " Local last: " . $groupArr['last_record'] . $n;
         if ($groupArr['last_record'] == 0) {
             echo "New group starting with " . ($this->NewGroupScanByDays ? $this->NewGroupDaysToScan . " days" : $this->NewGroupMsgsToScan . " messages") . " worth." . $n;
         }
         $done = false;
         // Get all the parts (in portions of $this->messagebuffer to not use too much memory)
         while ($done === false) {
             $this->startLoop = microtime(true);
             if ($total > $this->messagebuffer) {
                 if ($first + $this->messagebuffer > $grouplast) {
                     $last = $grouplast;
                 } else {
                     $last = $first + $this->messagebuffer;
                 }
             }
             echo "Getting " . number_format($last - $first + 1) . " parts (" . $first . " to " . $last . ") - " . number_format($grouplast - $last) . " in queue" . $n;
             flush();
             //get headers from newsgroup
             $lastId = $this->scan($nntp, $groupArr, $first, $last);
             if ($lastId === false) {
                 //scan failed - skip group
                 return;
             }
             $db->exec(sprintf("update groups SET last_record = %s, last_updated = now() WHERE ID = %d", $db->escapeString($lastId), $groupArr['ID']));
             if ($last == $grouplast) {
                 $done = true;
             } else {
                 $last = $lastId;
                 $first = $last + 1;
             }
         }
         $last_record_postdate = $backfill->postdate($nntp, $last, false);
         if ($last_record_postdate != "") {
             $db->exec(sprintf("update groups SET last_record_postdate = FROM_UNIXTIME(" . $last_record_postdate . "), last_updated = now() WHERE ID = %d", $groupArr['ID']));
             //Set group's last postdate
         }
         $timeGroup = number_format(microtime(true) - $this->startGroup, 2);
         echo "Group processed in {$timeGroup} seconds {$n} {$n}";
     } else {
         echo "No new records for " . $data["group"] . " (first {$first} last {$last} total {$total}) grouplast " . $groupArr['last_record'] . $n . $n;
     }
     if ($blnDoDisconnect) {
         $nntp->doQuit();
     }
 }
コード例 #3
0
 /**
  * Update a group back to a specified date.
  */
 function backfillGroup($groupArr, $backfillDate = null, $regexOnly = false)
 {
     $db = new DB();
     $binaries = new Binaries();
     $n = $this->n;
     if ($regexOnly === true) {
         echo "Only inserting binaries which match regex{$n}";
         $binaries->onlyProcessRegexBinaries = true;
     }
     $this->startGroup = microtime(true);
     $nntp = new Nntp();
     $nntpc = new Nntp();
     //Make sure we actually have a connection going before doing anything.
     if ($nntp->doConnect(5, false, true)) {
         echo 'Processing ' . $groupArr['name'] . $n;
         $data = $nntp->selectGroup($groupArr['name']);
         if ($nntp->isError($data)) {
             echo "Could not select group (bad name?): {$groupArr['name']}{$n}";
             return;
         }
         if ($backfillDate) {
             $targetpost = $this->daytopost($nntp, $groupArr['name'], $this->dateToDays($backfillDate), TRUE);
         } else {
             $targetpost = $this->daytopost($nntp, $groupArr['name'], $groupArr['backfill_target'], TRUE);
         }
         //get targetpost based on days target
         if ($groupArr['first_record'] == 0 || $groupArr['backfill_target'] == 0 && !$backfillDate) {
             echo "Group " . $groupArr['name'] . " has invalid numbers.  Have you run update on it?  Have you set the backfill days amount?{$n}";
             return;
         }
         echo "Group " . $data["group"] . ": server has " . $data['first'] . " - " . $data['last'] . ", or ~";
         echo (int) (($this->postdate($nntp, $data['last'], FALSE) - $this->postdate($nntp, $data['first'], FALSE)) / 86400);
         echo " days." . $n . "Local first = " . $groupArr['first_record'] . " (";
         echo (int) ((date('U') - $this->postdate($nntp, $groupArr['first_record'], FALSE)) / 86400);
         echo " days).  Backfill target of " . ($backfillDate ? date('Y-m-d', $backfillDate) : $groupArr['backfill_target'] . " days") . " is post {$targetpost}.{$n}";
         if ($targetpost >= $groupArr['first_record']) {
             echo "Nothing to do, we already have the target post.{$n} {$n}";
             return "";
         }
         //get first and last part numbers from newsgroup
         if ($targetpost < $data['first']) {
             echo "WARNING: Backfill came back as before server's first.  Setting targetpost to server first.{$n}";
             echo "Skipping Group {$n}";
             return "";
         }
         echo $binaries->onlyProcessRegexBinaries === true ? "Note: Discarding parts that do not match a regex" . $n : "";
         //Done with $nntp for now, close it to avoid timeouts.
         $nntp->doQuit();
         $nntpc->doConnect();
         $datac = $nntpc->selectGroup($groupArr['name']);
         if ($nntpc->isError($datac)) {
             echo "Could not select group (bad name?): {$groupArr['name']}{$n}";
             return;
         }
         //calculate total number of parts
         $total = $groupArr['first_record'] - $targetpost;
         $done = false;
         //set first and last, moving the window by maxxMssgs
         $last = $groupArr['first_record'] - 1;
         $first = $last - $binaries->messagebuffer + 1;
         //set initial "chunk"
         if ($targetpost > $first) {
             //just in case this is the last chunk we needed
             $first = $targetpost;
         }
         while ($done === false) {
             $binaries->startLoop = microtime(true);
             echo "Getting " . ($last - $first + 1) . " parts (" . number_format($first - $targetpost) . " in queue)" . $n;
             flush();
             $success = $binaries->scan($nntpc, $groupArr, $first, $last, 'backfill');
             if (!$success) {
                 return "";
             }
             $db->exec(sprintf("update groups SET first_record = %s, last_updated = now() WHERE ID = %d", $db->escapeString($first), $groupArr['ID']));
             if ($first == $targetpost) {
                 $done = true;
             } else {
                 //Keep going: set new last, new first, check for last chunk.
                 $last = $first - 1;
                 $first = $last - $binaries->messagebuffer + 1;
                 if ($targetpost > $first) {
                     $first = $targetpost;
                 }
             }
         }
         //Done with $nntpc
         $nntpc->doQuit();
         //Just need $nntp for a quick check on the first_record_postdate
         $nntp->doConnect();
         //$nntp->selectGroup($groupArr['name']); // some users report having this in keeps backfill working
         $first_record_postdate = $this->postdate($nntp, $first, false);
         //All done with NNTP.
         $nntp->doQuit();
         if ($first_record_postdate != "") {
             $db->exec(sprintf("update groups SET first_record_postdate = FROM_UNIXTIME(" . $first_record_postdate . "), last_updated = now() WHERE ID = %d", $groupArr['ID']));
         }
         //Set group's first postdate
         $timeGroup = number_format(microtime(true) - $this->startGroup, 2);
         echo "Group processed in {$timeGroup} seconds {$n}";
     } else {
         echo "Failed to get NNTP connection.{$n}";
     }
 }
コード例 #4
0
 /**
  * Get nzpre data from usenet and parse.
  */
 public function nzpreUpdate()
 {
     require_once WWW_DIR . "/lib/nntp.php";
     $s = new Sites();
     $site = $s->get();
     if (empty($site->nzpregroup) || empty($site->nzpresubject) || empty($site->nzpreposter) || empty($site->nzprefield) || empty($site->nzprekey)) {
         return false;
     }
     if ($this->echooutput) {
         echo "Predb   : Checking for new pre data ";
     }
     $db = new DB();
     $nntp = new Nntp();
     if (!$nntp->doConnect()) {
         echo "Failed to get NNTP connection\n";
         return false;
     }
     $ret = $groupData = $nntp->selectGroup($site->nzpregroup);
     if ($nntp->isError($ret)) {
         echo "Predb   : Error " . $ret->getMessage() . "\n";
         return false;
     }
     $ret = $groupMsgs = $nntp->getOverview($groupData['last'] - (!empty($site->nzprearticles) ? $site->nzprearticles : 500) . '-' . $groupData['last']);
     if ($nntp->isError($ret)) {
         echo "Predb   : Error " . $ret->getMessage() . "\n";
         return false;
     }
     $added_updated = 0;
     $nzprekey = $site->nzprekey;
     while (strlen($nzprekey) < 1024) {
         $nzprekey = $nzprekey . $nzprekey;
     }
     $cnt = !empty($site->nzprearticles) ? $site->nzprearticles : 500;
     foreach ($groupMsgs as $groupMsg) {
         if ($cnt % 50 == 0 && $cnt != 0 && $this->echooutput) {
             echo $cnt . "..";
         }
         $cnt--;
         if (preg_match('/^' . $site->nzpresubject . '$/', $groupMsg['Subject']) && preg_match('/^' . $site->nzpreposter . '$/', $groupMsg['From'])) {
             $ret = $msgHeader = $nntp->getHeader($groupMsg['Message-ID']);
             if ($nntp->isError($ret)) {
                 continue;
             }
             for ($i = 0; $i < count($msgHeader); $i++) {
                 if (preg_match('/^' . $site->nzprefield . ': /', $msgHeader[$i])) {
                     if ($nzpreParse = $this->nzpreParse(str_replace($site->nzprefield . ': ', '', $msgHeader[$i]), $nzprekey)) {
                         if ($this->updatePreDB($db, $nzpreParse)) {
                             $added_updated++;
                         }
                     }
                     break;
                 }
             }
         }
     }
     $nntp->disconnect();
     if ($this->echooutput) {
         echo "\nPredb   : Added/Updated " . $added_updated . " records\n";
     }
 }