/** * Restart the NNTP connection if an error occurs in the selectGroup * function, if it does not restart display the error. * * @param NNTP $nntp Instance of class NNTP. * @param string $group Name of the group. * @param bool $comp Use compression or not? * * @return mixed On success : (array) The group summary. * On Failure : (object) PEAR_Error. * * @access public */ public function dataError($nntp, $group, $comp = true) { // Disconnect. $nntp->doQuit(); // Try reconnecting. This uses another round of max retries. if ($nntp->doConnect($comp) !== true) { if ($this->_debugBool) { $this->_debugging->log(get_class(), __FUNCTION__, 'Unable to reconnect to usenet!', Logger::LOG_NOTICE); } return $this->throwError('Unable to reconnect to usenet!'); } // Try re-selecting the group. $data = $nntp->selectGroup($group); if ($this->isError($data)) { $message = "Code {$data->code}: {$data->message}\nSkipping group: {$group}"; if ($this->_debugBool) { $this->_debugging->log(get_class(), __FUNCTION__, $message, Logger::LOG_NOTICE); } if ($this->_echo) { $this->pdo->log->doEcho($this->pdo->log->error($message), true); } $nntp->doQuit(); } return $data; }
/** * Update the list of newsgroups and return an array of messages. * * @param string $groupList * @param int $active * @param int $backfill * * @return array */ public function addBulk($groupList, $active = 1, $backfill = 1) { if (preg_match('/^\\s*$/m', $groupList)) { $ret = "No group list provided."; } else { $nntp = new NNTP(['Echo' => false]); if ($nntp->doConnect() !== true) { return 'Problem connecting to usenet.'; } $groups = $nntp->getGroups(); $nntp->doQuit(); if ($nntp->isError($groups)) { return 'Problem fetching groups from usenet.'; } $regFilter = '/' . $groupList . '/i'; $ret = []; foreach ($groups as $group) { if (preg_match($regFilter, $group['group']) > 0) { $res = $this->pdo->queryOneRow(sprintf('SELECT id FROM groups WHERE name = %s', $this->pdo->escapeString($group['group']))); if ($res === false) { $this->pdo->queryInsert(sprintf('INSERT INTO groups (name, active, backfill) VALUES (%s, %d, %d)', $this->pdo->escapeString($group['group']), $active, $backfill)); $ret[] = ['group' => $group['group'], 'msg' => 'Created']; } } } if (count($ret) === 0) { $ret = 'No groups found with your regex, try again!'; } } return $ret; }
/** * Loop over range of wanted headers, insert headers into DB. * * @param array $groupMySQL The group info from mysql. * @param int $first The oldest wanted header. * @param int $last The newest wanted header. * @param string $type Is this partrepair or update or backfill? * @param null|array $missingParts If we are running in partrepair, the list of missing article numbers. * * @return array Empty on failure. */ public function scan($groupMySQL, $first, $last, $type = 'update', $missingParts = null) { // Start time of scan method and of fetching headers. $startLoop = microtime(true); // Check if MySQL tables exist, create if they do not, get their names at the same time. $tableNames = $this->_groups->getCBPTableNames($this->_tablePerGroup, $groupMySQL['id']); $returnArray = []; $partRepair = $type === 'partrepair'; $addToPartRepair = $type === 'update' && $this->_partRepair; // Download the headers. if ($partRepair === true) { // This is slower but possibly is better with missing headers. $headers = $this->_nntp->getOverview($first . '-' . $last, true, false); } else { $headers = $this->_nntp->getXOVER($first . '-' . $last); } // If there was an error, try to reconnect. if ($this->_nntp->isError($headers)) { // Increment if part repair and return false. if ($partRepair === true) { $this->_pdo->queryExec(sprintf('UPDATE missed_parts SET attempts = attempts + 1 WHERE group_id = %d AND numberid %s', $groupMySQL['id'], $first == $last ? '= ' . $first : 'IN (' . implode(',', range($first, $last)) . ')')); return $returnArray; } // This is usually a compression error, so try disabling compression. $this->_nntp->doQuit(); if ($this->_nntp->doConnect(false) !== true) { return $returnArray; } // Re-select group, download headers again without compression and re-enable compression. $this->_nntp->selectGroup($groupMySQL['name']); $headers = $this->_nntp->getXOVER($first . '-' . $last); $this->_nntp->enableCompression(); // Check if the non-compression headers have an error. if ($this->_nntp->isError($headers)) { $message = $headers->code == 0 ? 'Unknown error' : $headers->message; $this->log("Code {$headers->code}: {$message}\nSkipping group: {$groupMySQL['name']}", __FUNCTION__, Logger::LOG_WARNING, 'error'); return $returnArray; } } // Start of processing headers. $startCleaning = microtime(true); // End of the getting data from usenet. $timeHeaders = number_format($startCleaning - $startLoop, 2); // Check if we got headers. $msgCount = count($headers); if ($msgCount < 1) { return $returnArray; } // Get highest and lowest article numbers/dates. $iterator1 = 0; $iterator2 = $msgCount - 1; while (true) { if (!isset($returnArray['firstArticleNumber']) && isset($headers[$iterator1]['Number'])) { $returnArray['firstArticleNumber'] = $headers[$iterator1]['Number']; $returnArray['firstArticleDate'] = $headers[$iterator1]['Date']; } if (!isset($returnArray['lastArticleNumber']) && isset($headers[$iterator2]['Number'])) { $returnArray['lastArticleNumber'] = $headers[$iterator2]['Number']; $returnArray['lastArticleDate'] = $headers[$iterator2]['Date']; } // Break if we found non empty articles. if (isset($returnArray['firstArticleNumber']) && isset($returnArray['lastArticleNumber'])) { break; } // Break out if we couldn't find anything. if ($iterator1++ >= $msgCount - 1 || $iterator2-- <= 0) { break; } } $headersRepaired = $articles = $rangeNotReceived = $collectionIDs = $binariesUpdate = $headersReceived = $headersNotInserted = []; $notYEnc = $headersBlackListed = 0; $partsQuery = $partsCheck = sprintf('INSERT IGNORE INTO %s (binaryid, number, messageid, partnumber, size) VALUES ', $tableNames['pname']); $this->_pdo->beginTransaction(); // Loop articles, figure out files/parts. foreach ($headers as $header) { // Check if we got the article or not. if (isset($header['Number'])) { $headersReceived[] = $header['Number']; } else { if ($addToPartRepair) { $rangeNotReceived[] = $header['Number']; } continue; } // If set we are running in partRepair mode. if ($partRepair === true && !is_null($missingParts)) { if (!in_array($header['Number'], $missingParts)) { // If article isn't one that is missing skip it. continue; } else { // We got the part this time. Remove article from part repair. $headersRepaired[] = $header['Number']; } } /* * Find part / total parts. Ignore if no part count found. * * \s* Trims the leading space. * (?!"Usenet Index Post) ignores these types of articles, they are useless. * (.+) Fetches the subject. * \s+ Trims trailing space after the subject. * \((\d+)\/(\d+)\) Gets the part count. * No ending ($) as there are cases of subjects with extra data after the part count. */ if (preg_match('/^\\s*(?!"Usenet Index Post)(.+)\\s+\\((\\d+)\\/(\\d+)\\)/', $header['Subject'], $matches)) { // Add yEnc to subjects that do not have them, but have the part number at the end of the header. if (!stristr($header['Subject'], 'yEnc')) { $matches[1] .= ' yEnc'; } } else { if ($this->_showDroppedYEncParts === true && strpos($header['Subject'], '"Usenet Index Post') !== 0) { file_put_contents(nZEDb_LOGS . 'not_yenc' . $groupMySQL['name'] . '.dropped.log', $header['Subject'] . PHP_EOL, FILE_APPEND); } $notYEnc++; continue; } // Filter subject based on black/white list. if ($this->isBlackListed($header, $groupMySQL['name'])) { $headersBlackListed++; continue; } if (!isset($header['Bytes'])) { $header['Bytes'] = isset($header[':bytes']) ? $header[':bytes'] : 0; } $header['Bytes'] = (int) $header['Bytes']; // Set up the info for inserting into parts/binaries/collections tables. if (!isset($articles[$matches[1]])) { // check whether file count should be ignored (XXX packs for now only). $whitelistMatch = false; if ($this->_ignoreFileCount($groupMySQL['name'], $matches[1])) { $whitelistMatch = true; $fileCount[1] = $fileCount[3] = 0; } // Attempt to find the file count. If it is not found, set it to 0. if (!$whitelistMatch && !preg_match('/[[(\\s](\\d{1,5})(\\/|[\\s_]of[\\s_]|-)(\\d{1,5})[])\\s$:]/i', $matches[1], $fileCount)) { $fileCount[1] = $fileCount[3] = 0; if ($this->_showDroppedYEncParts === true) { file_put_contents(nZEDb_LOGS . 'no_files' . $groupMySQL['name'] . '.log', $header['Subject'] . PHP_EOL, FILE_APPEND); } } // Used to group articles together when forming the release/nzb. $header['CollectionKey'] = $this->_collectionsCleaning->collectionsCleaner($matches[1], $groupMySQL['name']) . $header['From'] . $groupMySQL['id'] . $fileCount[3]; if (!isset($collectionIDs[$header['CollectionKey']])) { /* Date from header should be a string this format: * 31 Mar 2014 15:36:04 GMT or 6 Oct 1998 04:38:40 -0500 * Still make sure it's not unix time, convert it to unix time if it is. */ $header['Date'] = is_numeric($header['Date']) ? $header['Date'] : strtotime($header['Date']); // Get the current unixtime from PHP. $now = time(); $collectionID = $this->_pdo->queryInsert(sprintf("\n\t\t\t\t\t\t\tINSERT INTO %s (subject, fromname, date, xref, group_id,\n\t\t\t\t\t\t\t\ttotalfiles, collectionhash, dateadded)\n\t\t\t\t\t\t\tVALUES (%s, %s, FROM_UNIXTIME(%s), %s, %d, %d, '%s', NOW())\n\t\t\t\t\t\t\tON DUPLICATE KEY UPDATE dateadded = NOW(), noise = '%s'", $tableNames['cname'], $this->_pdo->escapeString(substr(utf8_encode($matches[1]), 0, 255)), $this->_pdo->escapeString(utf8_encode($header['From'])), is_numeric($header['Date']) ? $header['Date'] > $now ? $now : $header['Date'] : $now, $this->_pdo->escapeString(substr($header['Xref'], 0, 255)), $groupMySQL['id'], $fileCount[3], sha1($header['CollectionKey']), bin2hex(openssl_random_pseudo_bytes(16)))); if ($collectionID === false) { if ($addToPartRepair) { $headersNotInserted[] = $header['Number']; } $this->_pdo->Rollback(); $this->_pdo->beginTransaction(); continue; } $collectionIDs[$header['CollectionKey']] = $collectionID; } else { $collectionID = $collectionIDs[$header['CollectionKey']]; } $binaryID = $this->_pdo->queryInsert(sprintf("\n\t\t\t\t\t\tINSERT INTO %s (binaryhash, name, collection_id, totalparts, currentparts, filenumber, partsize)\n\t\t\t\t\t\tVALUES (UNHEX('%s'), %s, %d, %d, 1, %d, %d)\n\t\t\t\t\t\tON DUPLICATE KEY UPDATE currentparts = currentparts + 1, partsize = partsize + %d", $tableNames['bname'], md5($matches[1] . $header['From'] . $groupMySQL['id']), $this->_pdo->escapeString(utf8_encode($matches[1])), $collectionID, $matches[3], $fileCount[1], $header['Bytes'], $header['Bytes'])); if ($binaryID === false) { if ($addToPartRepair) { $headersNotInserted[] = $header['Number']; } $this->_pdo->Rollback(); $this->_pdo->beginTransaction(); continue; } $binariesUpdate[$binaryID]['Size'] = 0; $binariesUpdate[$binaryID]['Parts'] = 0; $articles[$matches[1]]['CollectionID'] = $collectionID; $articles[$matches[1]]['BinaryID'] = $binaryID; } else { $binaryID = $articles[$matches[1]]['BinaryID']; $collectionID = $articles[$matches[1]]['CollectionID']; $binariesUpdate[$binaryID]['Size'] += $header['Bytes']; $binariesUpdate[$binaryID]['Parts']++; } // Strip the < and >, saves space in DB. $header['Message-ID'][0] = "'"; $partsQuery .= '(' . $binaryID . ',' . $header['Number'] . ',' . rtrim($header['Message-ID'], '>') . "'," . $matches[2] . ',' . $header['Bytes'] . '),'; } unset($headers); // Reclaim memory. // Start of inserting into SQL. $startUpdate = microtime(true); // End of processing headers. $timeCleaning = number_format($startUpdate - $startCleaning, 2); $binariesQuery = $binariesCheck = sprintf('INSERT INTO %s (id, partsize, currentparts) VALUES ', $tableNames['bname']); foreach ($binariesUpdate as $binaryID => $binary) { $binariesQuery .= '(' . $binaryID . ',' . $binary['Size'] . ',' . $binary['Parts'] . '),'; } $binariesEnd = ' ON DUPLICATE KEY UPDATE partsize = VALUES(partsize) + partsize, currentparts = VALUES(currentparts) + currentparts'; $binariesQuery = rtrim($binariesQuery, ',') . $binariesEnd; // Check if we got any binaries. If we did, try to insert them. if (strlen($binariesCheck . $binariesEnd) === strlen($binariesQuery) ? true : $this->_pdo->queryExec($binariesQuery)) { if ($this->_debug) { $this->_colorCLI->doEcho($this->_colorCLI->debug('Sending ' . round(strlen($partsQuery) / 1024, 2) . ' KB of parts to MySQL')); } if (strlen($partsQuery) === strlen($partsCheck) ? true : $this->_pdo->queryExec(rtrim($partsQuery, ','))) { $this->_pdo->Commit(); } else { if ($addToPartRepair) { $headersNotInserted += $headersReceived; } $this->_pdo->Rollback(); } } else { if ($addToPartRepair) { $headersNotInserted += $headersReceived; } $this->_pdo->Rollback(); } if ($this->_echoCLI && $partRepair === false) { $this->_colorCLI->doEcho($this->_colorCLI->primary('Received ' . count($headersReceived) . ' articles of ' . number_format($last - $first + 1) . ' requested, ' . $headersBlackListed . ' blacklisted, ' . $notYEnc . ' not yEnc.')); } // Start of part repair. $startPR = microtime(true); // End of inserting. $timeInsert = number_format($startPR - $startUpdate, 2); if ($partRepair && count($headersRepaired) > 0) { $this->removeRepairedParts($headersRepaired, $tableNames['prname'], $groupMySQL['id']); } if ($addToPartRepair) { $notInsertedCount = count($headersNotInserted); if ($notInsertedCount > 0) { $this->addMissingParts($headersNotInserted, $tableNames['prname'], $groupMySQL['id']); $this->log($notInsertedCount . ' articles failed to insert!', __FUNCTION__, Logger::LOG_WARNING, 'warning'); } // Check if we have any missing headers. if ($last - $first - $notYEnc - $headersBlackListed + 1 > count($headersReceived)) { $rangeNotReceived = array_merge($rangeNotReceived, array_diff(range($first, $last), $headersReceived)); } $notReceivedCount = count($rangeNotReceived); if ($notReceivedCount > 0) { $this->addMissingParts($rangeNotReceived, $tableNames['prname'], $groupMySQL['id']); if ($this->_echoCLI) { $this->_colorCLI->doEcho($this->_colorCLI->alternate('Server did not return ' . $notReceivedCount . ' articles from ' . $groupMySQL['name'] . '.'), true); } } } $currentMicroTime = microtime(true); if ($this->_echoCLI) { $this->_colorCLI->doEcho($this->_colorCLI->alternateOver($timeHeaders . 's') . $this->_colorCLI->primaryOver(' to download articles, ') . $this->_colorCLI->alternateOver($timeCleaning . 's') . $this->_colorCLI->primaryOver(' to process collections, ') . $this->_colorCLI->alternateOver($timeInsert . 's') . $this->_colorCLI->primaryOver(' to insert binaries/parts, ') . $this->_colorCLI->alternateOver(number_format($currentMicroTime - $startPR, 2) . 's') . $this->_colorCLI->primaryOver(' for part repair, ') . $this->_colorCLI->alternateOver(number_format($currentMicroTime - $startLoop, 2) . 's') . $this->_colorCLI->primary(' total.')); } return $returnArray; }