Пример #1
0
 /**
  * Loop over range of wanted headers, insert headers into DB.
  *
  * @param array      $groupMySQL   The group info from mysql.
  * @param int        $first        The oldest wanted header.
  * @param int        $last         The newest wanted header.
  * @param string     $type         Is this partrepair or update or backfill?
  * @param null|array $missingParts If we are running in partrepair, the list of missing article numbers.
  *
  * @return array Empty on failure.
  */
 public function scan($groupMySQL, $first, $last, $type = 'update', $missingParts = null)
 {
     // Start time of scan method and of fetching headers.
     $startLoop = microtime(true);
     // Check if MySQL tables exist, create if they do not, get their names at the same time.
     $tableNames = $this->_groups->getCBPTableNames($this->_tablePerGroup, $groupMySQL['id']);
     $returnArray = [];
     $partRepair = $type === 'partrepair';
     $addToPartRepair = $type === 'update' && $this->_partRepair;
     // Download the headers.
     if ($partRepair === true) {
         // This is slower but possibly is better with missing headers.
         $headers = $this->_nntp->getOverview($first . '-' . $last, true, false);
     } else {
         $headers = $this->_nntp->getXOVER($first . '-' . $last);
     }
     // If there was an error, try to reconnect.
     if ($this->_nntp->isError($headers)) {
         // Increment if part repair and return false.
         if ($partRepair === true) {
             $this->_pdo->queryExec(sprintf('UPDATE missed_parts SET attempts = attempts + 1 WHERE group_id = %d AND numberid %s', $groupMySQL['id'], $first == $last ? '= ' . $first : 'IN (' . implode(',', range($first, $last)) . ')'));
             return $returnArray;
         }
         // This is usually a compression error, so try disabling compression.
         $this->_nntp->doQuit();
         if ($this->_nntp->doConnect(false) !== true) {
             return $returnArray;
         }
         // Re-select group, download headers again without compression and re-enable compression.
         $this->_nntp->selectGroup($groupMySQL['name']);
         $headers = $this->_nntp->getXOVER($first . '-' . $last);
         $this->_nntp->enableCompression();
         // Check if the non-compression headers have an error.
         if ($this->_nntp->isError($headers)) {
             $message = $headers->code == 0 ? 'Unknown error' : $headers->message;
             $this->log("Code {$headers->code}: {$message}\nSkipping group: {$groupMySQL['name']}", __FUNCTION__, Logger::LOG_WARNING, 'error');
             return $returnArray;
         }
     }
     // Start of processing headers.
     $startCleaning = microtime(true);
     // End of the getting data from usenet.
     $timeHeaders = number_format($startCleaning - $startLoop, 2);
     // Check if we got headers.
     $msgCount = count($headers);
     if ($msgCount < 1) {
         return $returnArray;
     }
     // Get highest and lowest article numbers/dates.
     $iterator1 = 0;
     $iterator2 = $msgCount - 1;
     while (true) {
         if (!isset($returnArray['firstArticleNumber']) && isset($headers[$iterator1]['Number'])) {
             $returnArray['firstArticleNumber'] = $headers[$iterator1]['Number'];
             $returnArray['firstArticleDate'] = $headers[$iterator1]['Date'];
         }
         if (!isset($returnArray['lastArticleNumber']) && isset($headers[$iterator2]['Number'])) {
             $returnArray['lastArticleNumber'] = $headers[$iterator2]['Number'];
             $returnArray['lastArticleDate'] = $headers[$iterator2]['Date'];
         }
         // Break if we found non empty articles.
         if (isset($returnArray['firstArticleNumber']) && isset($returnArray['lastArticleNumber'])) {
             break;
         }
         // Break out if we couldn't find anything.
         if ($iterator1++ >= $msgCount - 1 || $iterator2-- <= 0) {
             break;
         }
     }
     $headersRepaired = $articles = $rangeNotReceived = $collectionIDs = $binariesUpdate = $headersReceived = $headersNotInserted = [];
     $notYEnc = $headersBlackListed = 0;
     $partsQuery = $partsCheck = sprintf('INSERT IGNORE INTO %s (binaryid, number, messageid, partnumber, size) VALUES ', $tableNames['pname']);
     $this->_pdo->beginTransaction();
     // Loop articles, figure out files/parts.
     foreach ($headers as $header) {
         // Check if we got the article or not.
         if (isset($header['Number'])) {
             $headersReceived[] = $header['Number'];
         } else {
             if ($addToPartRepair) {
                 $rangeNotReceived[] = $header['Number'];
             }
             continue;
         }
         // If set we are running in partRepair mode.
         if ($partRepair === true && !is_null($missingParts)) {
             if (!in_array($header['Number'], $missingParts)) {
                 // If article isn't one that is missing skip it.
                 continue;
             } else {
                 // We got the part this time. Remove article from part repair.
                 $headersRepaired[] = $header['Number'];
             }
         }
         /*
          * Find part / total parts. Ignore if no part count found.
          *
          * \s* Trims the leading space.
          * (?!"Usenet Index Post) ignores these types of articles, they are useless.
          * (.+) Fetches the subject.
          * \s+ Trims trailing space after the subject.
          * \((\d+)\/(\d+)\) Gets the part count.
          * No ending ($) as there are cases of subjects with extra data after the part count.
          */
         if (preg_match('/^\\s*(?!"Usenet Index Post)(.+)\\s+\\((\\d+)\\/(\\d+)\\)/', $header['Subject'], $matches)) {
             // Add yEnc to subjects that do not have them, but have the part number at the end of the header.
             if (!stristr($header['Subject'], 'yEnc')) {
                 $matches[1] .= ' yEnc';
             }
         } else {
             if ($this->_showDroppedYEncParts === true && strpos($header['Subject'], '"Usenet Index Post') !== 0) {
                 file_put_contents(nZEDb_LOGS . 'not_yenc' . $groupMySQL['name'] . '.dropped.log', $header['Subject'] . PHP_EOL, FILE_APPEND);
             }
             $notYEnc++;
             continue;
         }
         // Filter subject based on black/white list.
         if ($this->isBlackListed($header, $groupMySQL['name'])) {
             $headersBlackListed++;
             continue;
         }
         if (!isset($header['Bytes'])) {
             $header['Bytes'] = isset($header[':bytes']) ? $header[':bytes'] : 0;
         }
         $header['Bytes'] = (int) $header['Bytes'];
         // Set up the info for inserting into parts/binaries/collections tables.
         if (!isset($articles[$matches[1]])) {
             // check whether file count should be ignored (XXX packs for now only).
             $whitelistMatch = false;
             if ($this->_ignoreFileCount($groupMySQL['name'], $matches[1])) {
                 $whitelistMatch = true;
                 $fileCount[1] = $fileCount[3] = 0;
             }
             // Attempt to find the file count. If it is not found, set it to 0.
             if (!$whitelistMatch && !preg_match('/[[(\\s](\\d{1,5})(\\/|[\\s_]of[\\s_]|-)(\\d{1,5})[])\\s$:]/i', $matches[1], $fileCount)) {
                 $fileCount[1] = $fileCount[3] = 0;
                 if ($this->_showDroppedYEncParts === true) {
                     file_put_contents(nZEDb_LOGS . 'no_files' . $groupMySQL['name'] . '.log', $header['Subject'] . PHP_EOL, FILE_APPEND);
                 }
             }
             // Used to group articles together when forming the release/nzb.
             $header['CollectionKey'] = $this->_collectionsCleaning->collectionsCleaner($matches[1], $groupMySQL['name']) . $header['From'] . $groupMySQL['id'] . $fileCount[3];
             if (!isset($collectionIDs[$header['CollectionKey']])) {
                 /* Date from header should be a string this format:
                  * 31 Mar 2014 15:36:04 GMT or 6 Oct 1998 04:38:40 -0500
                  * Still make sure it's not unix time, convert it to unix time if it is.
                  */
                 $header['Date'] = is_numeric($header['Date']) ? $header['Date'] : strtotime($header['Date']);
                 // Get the current unixtime from PHP.
                 $now = time();
                 $collectionID = $this->_pdo->queryInsert(sprintf("\n\t\t\t\t\t\t\tINSERT INTO %s (subject, fromname, date, xref, group_id,\n\t\t\t\t\t\t\t\ttotalfiles, collectionhash, dateadded)\n\t\t\t\t\t\t\tVALUES (%s, %s, FROM_UNIXTIME(%s), %s, %d, %d, '%s', NOW())\n\t\t\t\t\t\t\tON DUPLICATE KEY UPDATE dateadded = NOW(), noise = '%s'", $tableNames['cname'], $this->_pdo->escapeString(substr(utf8_encode($matches[1]), 0, 255)), $this->_pdo->escapeString(utf8_encode($header['From'])), is_numeric($header['Date']) ? $header['Date'] > $now ? $now : $header['Date'] : $now, $this->_pdo->escapeString(substr($header['Xref'], 0, 255)), $groupMySQL['id'], $fileCount[3], sha1($header['CollectionKey']), bin2hex(openssl_random_pseudo_bytes(16))));
                 if ($collectionID === false) {
                     if ($addToPartRepair) {
                         $headersNotInserted[] = $header['Number'];
                     }
                     $this->_pdo->Rollback();
                     $this->_pdo->beginTransaction();
                     continue;
                 }
                 $collectionIDs[$header['CollectionKey']] = $collectionID;
             } else {
                 $collectionID = $collectionIDs[$header['CollectionKey']];
             }
             $binaryID = $this->_pdo->queryInsert(sprintf("\n\t\t\t\t\t\tINSERT INTO %s (binaryhash, name, collection_id, totalparts, currentparts, filenumber, partsize)\n\t\t\t\t\t\tVALUES (UNHEX('%s'), %s, %d, %d, 1, %d, %d)\n\t\t\t\t\t\tON DUPLICATE KEY UPDATE currentparts = currentparts + 1, partsize = partsize + %d", $tableNames['bname'], md5($matches[1] . $header['From'] . $groupMySQL['id']), $this->_pdo->escapeString(utf8_encode($matches[1])), $collectionID, $matches[3], $fileCount[1], $header['Bytes'], $header['Bytes']));
             if ($binaryID === false) {
                 if ($addToPartRepair) {
                     $headersNotInserted[] = $header['Number'];
                 }
                 $this->_pdo->Rollback();
                 $this->_pdo->beginTransaction();
                 continue;
             }
             $binariesUpdate[$binaryID]['Size'] = 0;
             $binariesUpdate[$binaryID]['Parts'] = 0;
             $articles[$matches[1]]['CollectionID'] = $collectionID;
             $articles[$matches[1]]['BinaryID'] = $binaryID;
         } else {
             $binaryID = $articles[$matches[1]]['BinaryID'];
             $collectionID = $articles[$matches[1]]['CollectionID'];
             $binariesUpdate[$binaryID]['Size'] += $header['Bytes'];
             $binariesUpdate[$binaryID]['Parts']++;
         }
         // Strip the < and >, saves space in DB.
         $header['Message-ID'][0] = "'";
         $partsQuery .= '(' . $binaryID . ',' . $header['Number'] . ',' . rtrim($header['Message-ID'], '>') . "'," . $matches[2] . ',' . $header['Bytes'] . '),';
     }
     unset($headers);
     // Reclaim memory.
     // Start of inserting into SQL.
     $startUpdate = microtime(true);
     // End of processing headers.
     $timeCleaning = number_format($startUpdate - $startCleaning, 2);
     $binariesQuery = $binariesCheck = sprintf('INSERT INTO %s (id, partsize, currentparts) VALUES ', $tableNames['bname']);
     foreach ($binariesUpdate as $binaryID => $binary) {
         $binariesQuery .= '(' . $binaryID . ',' . $binary['Size'] . ',' . $binary['Parts'] . '),';
     }
     $binariesEnd = ' ON DUPLICATE KEY UPDATE partsize = VALUES(partsize) + partsize, currentparts = VALUES(currentparts) + currentparts';
     $binariesQuery = rtrim($binariesQuery, ',') . $binariesEnd;
     // Check if we got any binaries. If we did, try to insert them.
     if (strlen($binariesCheck . $binariesEnd) === strlen($binariesQuery) ? true : $this->_pdo->queryExec($binariesQuery)) {
         if ($this->_debug) {
             $this->_colorCLI->doEcho($this->_colorCLI->debug('Sending ' . round(strlen($partsQuery) / 1024, 2) . ' KB of parts to MySQL'));
         }
         if (strlen($partsQuery) === strlen($partsCheck) ? true : $this->_pdo->queryExec(rtrim($partsQuery, ','))) {
             $this->_pdo->Commit();
         } else {
             if ($addToPartRepair) {
                 $headersNotInserted += $headersReceived;
             }
             $this->_pdo->Rollback();
         }
     } else {
         if ($addToPartRepair) {
             $headersNotInserted += $headersReceived;
         }
         $this->_pdo->Rollback();
     }
     if ($this->_echoCLI && $partRepair === false) {
         $this->_colorCLI->doEcho($this->_colorCLI->primary('Received ' . count($headersReceived) . ' articles of ' . number_format($last - $first + 1) . ' requested, ' . $headersBlackListed . ' blacklisted, ' . $notYEnc . ' not yEnc.'));
     }
     // Start of part repair.
     $startPR = microtime(true);
     // End of inserting.
     $timeInsert = number_format($startPR - $startUpdate, 2);
     if ($partRepair && count($headersRepaired) > 0) {
         $this->removeRepairedParts($headersRepaired, $tableNames['prname'], $groupMySQL['id']);
     }
     if ($addToPartRepair) {
         $notInsertedCount = count($headersNotInserted);
         if ($notInsertedCount > 0) {
             $this->addMissingParts($headersNotInserted, $tableNames['prname'], $groupMySQL['id']);
             $this->log($notInsertedCount . ' articles failed to insert!', __FUNCTION__, Logger::LOG_WARNING, 'warning');
         }
         // Check if we have any missing headers.
         if ($last - $first - $notYEnc - $headersBlackListed + 1 > count($headersReceived)) {
             $rangeNotReceived = array_merge($rangeNotReceived, array_diff(range($first, $last), $headersReceived));
         }
         $notReceivedCount = count($rangeNotReceived);
         if ($notReceivedCount > 0) {
             $this->addMissingParts($rangeNotReceived, $tableNames['prname'], $groupMySQL['id']);
             if ($this->_echoCLI) {
                 $this->_colorCLI->doEcho($this->_colorCLI->alternate('Server did not return ' . $notReceivedCount . ' articles from ' . $groupMySQL['name'] . '.'), true);
             }
         }
     }
     $currentMicroTime = microtime(true);
     if ($this->_echoCLI) {
         $this->_colorCLI->doEcho($this->_colorCLI->alternateOver($timeHeaders . 's') . $this->_colorCLI->primaryOver(' to download articles, ') . $this->_colorCLI->alternateOver($timeCleaning . 's') . $this->_colorCLI->primaryOver(' to process collections, ') . $this->_colorCLI->alternateOver($timeInsert . 's') . $this->_colorCLI->primaryOver(' to insert binaries/parts, ') . $this->_colorCLI->alternateOver(number_format($currentMicroTime - $startPR, 2) . 's') . $this->_colorCLI->primaryOver(' for part repair, ') . $this->_colorCLI->alternateOver(number_format($currentMicroTime - $startLoop, 2) . 's') . $this->_colorCLI->primary(' total.'));
     }
     return $returnArray;
 }