/** * Download a range of usenet messages. Store binaries with subjects matching a * specific pattern in the database. */ function scan($nntp, $groupArr, $first, $last, $type = 'update') { $db = new Db(); $releaseRegex = new ReleaseRegex(); $n = $this->n; $this->startHeaders = microtime(true); if ($this->compressedHeaders) { $nntpn = new Nntp(); $nntpn->doConnect(5, false, true); $response = $nntpn->_sendCommand('XFEATURE COMPRESS GZIP'); if ($nntpn->isError($response) || $response != 290) { $response2 = $nntpn->_sendCommand('XZVER'); if ($nntpn->isError($response2) || $response2 != 412) { $msgs = $nntp->getOverview($first . "-" . $last, true, false); $nntpn->doQuit(); } else { $msgs = $nntp->getXOverview($first . "-" . $last, true, false); $nntpn->doQuit(); } } else { $msgs = $nntp->getOverview($first . "-" . $last, true, false); $nntpn->doQuit(); } } else { $msgs = $nntp->getOverview($first . "-" . $last, true, false); } if ($nntp->isError($msgs) && ($msgs->code == 400 || $msgs->code == 503)) { echo "NNTP connection timed out. Reconnecting...{$n}"; if (!$nntp->doConnect()) { // TODO: What now? echo "Failed to get NNTP connection.{$n}"; return; } $nntp->selectGroup($groupArr['name']); if ($this->compressedHeaders) { $nntpn = new Nntp(); $nntpn->doConnect(5, false, true); $response = $nntpn->_sendCommand('XFEATURE COMPRESS GZIP'); if ($nntpn->isError($response) || $response != 290) { $response2 = $nntpn->_sendCommand('XZVER'); if ($nntpn->isError($response2) || $response2 != 412) { $msgs = $nntp->getOverview($first . "-" . $last, true, false); $nntpn->doQuit(); } else { $msgs = $nntp->getXOverview($first . "-" . $last, true, false); $nntpn->doQuit(); } } else { $msgs = $nntp->getOverview($first . "-" . $last, true, false); $nntpn->doQuit(); } } else { $msgs = $nntp->getOverview($first . "-" . $last, true, false); } } $rangerequested = range($first, $last); $msgsreceived = array(); $msgsblacklisted = array(); $msgsignored = array(); $msgsinserted = array(); $msgsnotinserted = array(); $timeHeaders = number_format(microtime(true) - $this->startHeaders, 2); if ($nntp->isError($msgs)) { echo "Error {$msgs->code}: {$msgs->message}{$n}"; echo "Skipping group{$n}"; return false; } $this->startUpdate = microtime(true); if (is_array($msgs)) { //loop headers, figure out parts foreach ($msgs as $msg) { if (!isset($msg['Number'])) { continue; } $msgsreceived[] = $msg['Number']; $msgPart = $msgTotalParts = 0; $pattern = '|\\((\\d+)[\\/](\\d+)\\)|i'; preg_match_all($pattern, $msg['Subject'], $matches, PREG_PATTERN_ORDER); $matchcnt = sizeof($matches[0]); for ($i = 0; $i < $matchcnt; $i++) { $msgPart = $matches[1][$i]; $msgTotalParts = $matches[2][$i]; } if (!isset($msg['Subject']) || $matchcnt == 0) { $msgsignored[] = $msg['Number']; continue; } if ((int) $msgPart > 0 && (int) $msgTotalParts > 0) { $subject = utf8_encode(trim(preg_replace('|\\(' . $msgPart . '[\\/]' . $msgTotalParts . '\\)|i', '', $msg['Subject']))); if (!isset($this->message[$subject])) { $this->message[$subject] = $msg; $this->message[$subject]['MaxParts'] = (int) $msgTotalParts; $this->message[$subject]['Date'] = strtotime($this->message[$subject]['Date']); } if ((int) $msgPart > 0) { $this->message[$subject]['Parts'][(int) $msgPart] = array('Message-ID' => substr($msg['Message-ID'], 1, -1), 'number' => $msg['Number'], 'part' => (int) $msgPart, 'size' => $msg['Bytes']); $this->message[$subject]['PartNumbers'][(int) $msgPart] = $msg['Number']; } } } unset($msg); unset($msgs); $count = 0; $updatecount = 0; $partcount = 0; $rangenotreceived = array_diff($rangerequested, $msgsreceived); if ($type != 'partrepair') { echo "Received " . sizeof($msgsreceived) . " articles of " . ($last - $first + 1) . " requested, " . sizeof($msgsignored) . " not binaries {$n}"; } if ($type == 'update' && sizeof($msgsreceived) == 0) { echo "Error: Server did not return any articles.{$n}"; echo "Skipping group{$n}"; return false; } if (sizeof($rangenotreceived) > 0) { switch ($type) { case 'backfill': //don't add missing articles break; case 'partrepair': case 'update': default: $this->addMissingParts($rangenotreceived, $groupArr['ID']); break; } echo "Server did not return " . count($rangenotreceived) . " article(s).{$n}"; } if (isset($this->message) && count($this->message)) { $groupRegexes = $releaseRegex->getForGroup($groupArr['name']); //insert binaries and parts into database. when binary already exists; only insert new parts foreach ($this->message as $subject => $data) { //Filter binaries based on black/white list if ($this->isBlackListed($data, $groupArr['name'])) { $msgsblacklisted[] = count($data['Parts']); if ($type == 'partrepair') { $partIds = array(); foreach ($data['Parts'] as $partdata) { $partIds[] = $partdata['number']; } $db->exec(sprintf("DELETE FROM partrepair WHERE numberID IN (%s) AND groupID=%d", implode(',', $partIds), $groupArr['ID'])); } continue; } if (isset($data['Parts']) && count($data['Parts']) > 0 && $subject != '') { //Check for existing binary $binaryID = 0; $binaryHash = md5($subject . $data['From'] . $groupArr['ID']); $res = $db->queryOneRow(sprintf("SELECT ID FROM binaries WHERE binaryhash = %s", $db->escapeString($binaryHash))); if (!$res) { //Apply Regexes $regexMatches = array(); foreach ($groupRegexes as $groupRegex) { $regexCheck = $releaseRegex->performMatch($groupRegex, $subject); if ($regexCheck !== false) { $regexMatches = $regexCheck; break; } } $sql = ''; if (!empty($regexMatches)) { $relparts = explode("/", $regexMatches['parts']); $sql = sprintf("INSERT INTO binaries (name, fromname, date, xref, totalparts, groupID, procstat, categoryID, regexID, reqID, relpart, reltotalpart, binaryhash, relname, dateadded) VALUES (%s, %s, FROM_UNIXTIME(%s), %s, %s, %d, %d, %s, %d, %s, %d, %d, %s, %s, now())", $db->escapeString($subject), $db->escapeString(utf8_encode($data['From'])), $db->escapeString($data['Date']), $db->escapeString($data['Xref']), $db->escapeString($data['MaxParts']), $groupArr['ID'], Releases::PROCSTAT_TITLEMATCHED, $regexMatches['regcatid'], $regexMatches['regexID'], $db->escapeString($regexMatches['reqID']), $relparts[0], $relparts[1], $db->escapeString($binaryHash), $db->escapeString(str_replace('_', ' ', $regexMatches['name']))); } elseif ($this->onlyProcessRegexBinaries === false) { $sql = sprintf("INSERT INTO binaries (name, fromname, date, xref, totalparts, groupID, binaryhash, dateadded) VALUES (%s, %s, FROM_UNIXTIME(%s), %s, %s, %d, %s, now())", $db->escapeString($subject), $db->escapeString(utf8_encode($data['From'])), $db->escapeString($data['Date']), $db->escapeString($data['Xref']), $db->escapeString($data['MaxParts']), $groupArr['ID'], $db->escapeString($binaryHash)); } elseif ($type == 'partrepair') { $partIds = array(); foreach ($data['Parts'] as $partdata) { $partIds[] = $partdata['number']; } $db->exec(sprintf("DELETE FROM partrepair WHERE numberID IN (%s) AND groupID=%d", implode(',', $partIds), $groupArr['ID'])); continue; } if ($sql != '') { $binaryID = $db->queryInsert($sql); $count++; if ($count % 500 == 0) { echo "{$count} bin adds..."; } } } else { $binaryID = $res["ID"]; $updatecount++; if ($updatecount % 500 == 0) { echo "{$updatecount} bin updates..."; } } if ($binaryID != 0) { $partParams = array(); $partNumbers = array(); $totsize = 0; foreach ($data['Parts'] as $partdata) { $partcount++; $totsize += $partdata['size']; $partParams[] = sprintf("(%d, %s, %s, %s, %s)", $binaryID, $db->escapeString($partdata['Message-ID']), $db->escapeString($partdata['number']), $db->escapeString(round($partdata['part'])), $db->escapeString($partdata['size'])); $partNumbers[] = $partdata['number']; } $partSql = "INSERT INTO parts (binaryID, messageID, number, partnumber, size) VALUES " . implode(', ', $partParams); $pidata = $db->queryInsert($partSql, false); if (!$pidata) { $msgsnotinserted = array_merge($msgsnotinserted, $partNumbers); } else { $msgsinserted = array_merge($msgsinserted, $partNumbers); } // update bin size $upsql = sprintf("update binaries set size = size + %d where ID = %d", $totsize, $binaryID); $db->exec($upsql); } } } //TODO: determine whether to add to missing articles if insert failed if (sizeof($msgsnotinserted) > 0) { echo 'WARNING: ' . count($msgsnotinserted) . ' Parts failed to insert' . $n; $this->addMissingParts($msgsnotinserted, $groupArr['ID']); } if ($count >= 500 || $updatecount >= 500) { echo $n; } //line break for bin adds output } $timeUpdate = number_format(microtime(true) - $this->startUpdate, 2); $timeLoop = number_format(microtime(true) - $this->startLoop, 2); if (sizeof($msgsblacklisted) > 0) { echo "Blacklisted " . array_sum($msgsblacklisted) . " parts in " . sizeof($msgsblacklisted) . " binaries" . $n; } if ($type != 'partrepair') { echo number_format($count) . ' new, ' . number_format($updatecount) . ' updated, ' . number_format($partcount) . ' parts.'; echo " {$timeHeaders} headers, {$timeUpdate} update, {$timeLoop} range.{$n}"; } unset($this->message); unset($data); return $last; } else { echo "Error: Can't get parts from server (msgs not array) {$n}"; echo "Skipping group{$n}"; return false; } }