Ejemplo n.º 1
0
 /**
  * Loop over range of wanted headers, insert headers into DB.
  *
  * @param array      $groupMySQL   The group info from mysql.
  * @param int        $first        The oldest wanted header.
  * @param int        $last         The newest wanted header.
  * @param string     $type         Is this partrepair or update or backfill?
  * @param null|array $missingParts If we are running in partrepair, the list of missing article numbers.
  *
  * @return array Empty on failure.
  */
 public function scan($groupMySQL, $first, $last, $type = 'update', $missingParts = null)
 {
     // Start time of scan method and of fetching headers.
     $startLoop = microtime(true);
     // Check if MySQL tables exist, create if they do not, get their names at the same time.
     $tableNames = $this->_groups->getCBPTableNames($this->_tablePerGroup, $groupMySQL['id']);
     $returnArray = [];
     $partRepair = $type === 'partrepair';
     $addToPartRepair = $type === 'update' && $this->_partRepair;
     // Download the headers.
     if ($partRepair === true) {
         // This is slower but possibly is better with missing headers.
         $headers = $this->_nntp->getOverview($first . '-' . $last, true, false);
     } else {
         $headers = $this->_nntp->getXOVER($first . '-' . $last);
     }
     // If there was an error, try to reconnect.
     if ($this->_nntp->isError($headers)) {
         // Increment if part repair and return false.
         if ($partRepair === true) {
             $this->_pdo->queryExec(sprintf('UPDATE missed_parts SET attempts = attempts + 1 WHERE group_id = %d AND numberid %s', $groupMySQL['id'], $first == $last ? '= ' . $first : 'IN (' . implode(',', range($first, $last)) . ')'));
             return $returnArray;
         }
         // This is usually a compression error, so try disabling compression.
         $this->_nntp->doQuit();
         if ($this->_nntp->doConnect(false) !== true) {
             return $returnArray;
         }
         // Re-select group, download headers again without compression and re-enable compression.
         $this->_nntp->selectGroup($groupMySQL['name']);
         $headers = $this->_nntp->getXOVER($first . '-' . $last);
         $this->_nntp->enableCompression();
         // Check if the non-compression headers have an error.
         if ($this->_nntp->isError($headers)) {
             $message = $headers->code == 0 ? 'Unknown error' : $headers->message;
             $this->log("Code {$headers->code}: {$message}\nSkipping group: {$groupMySQL['name']}", __FUNCTION__, Logger::LOG_WARNING, 'error');
             return $returnArray;
         }
     }
     // Start of processing headers.
     $startCleaning = microtime(true);
     // End of the getting data from usenet.
     $timeHeaders = number_format($startCleaning - $startLoop, 2);
     // Check if we got headers.
     $msgCount = count($headers);
     if ($msgCount < 1) {
         return $returnArray;
     }
     // Get highest and lowest article numbers/dates.
     $iterator1 = 0;
     $iterator2 = $msgCount - 1;
     while (true) {
         if (!isset($returnArray['firstArticleNumber']) && isset($headers[$iterator1]['Number'])) {
             $returnArray['firstArticleNumber'] = $headers[$iterator1]['Number'];
             $returnArray['firstArticleDate'] = $headers[$iterator1]['Date'];
         }
         if (!isset($returnArray['lastArticleNumber']) && isset($headers[$iterator2]['Number'])) {
             $returnArray['lastArticleNumber'] = $headers[$iterator2]['Number'];
             $returnArray['lastArticleDate'] = $headers[$iterator2]['Date'];
         }
         // Break if we found non empty articles.
         if (isset($returnArray['firstArticleNumber']) && isset($returnArray['lastArticleNumber'])) {
             break;
         }
         // Break out if we couldn't find anything.
         if ($iterator1++ >= $msgCount - 1 || $iterator2-- <= 0) {
             break;
         }
     }
     $headersRepaired = $articles = $rangeNotReceived = $collectionIDs = $binariesUpdate = $headersReceived = $headersNotInserted = [];
     $notYEnc = $headersBlackListed = 0;
     $partsQuery = $partsCheck = sprintf('INSERT IGNORE INTO %s (binaryid, number, messageid, partnumber, size) VALUES ', $tableNames['pname']);
     $this->_pdo->beginTransaction();
     // Loop articles, figure out files/parts.
     foreach ($headers as $header) {
         // Check if we got the article or not.
         if (isset($header['Number'])) {
             $headersReceived[] = $header['Number'];
         } else {
             if ($addToPartRepair) {
                 $rangeNotReceived[] = $header['Number'];
             }
             continue;
         }
         // If set we are running in partRepair mode.
         if ($partRepair === true && !is_null($missingParts)) {
             if (!in_array($header['Number'], $missingParts)) {
                 // If article isn't one that is missing skip it.
                 continue;
             } else {
                 // We got the part this time. Remove article from part repair.
                 $headersRepaired[] = $header['Number'];
             }
         }
         /*
          * Find part / total parts. Ignore if no part count found.
          *
          * \s* Trims the leading space.
          * (?!"Usenet Index Post) ignores these types of articles, they are useless.
          * (.+) Fetches the subject.
          * \s+ Trims trailing space after the subject.
          * \((\d+)\/(\d+)\) Gets the part count.
          * No ending ($) as there are cases of subjects with extra data after the part count.
          */
         if (preg_match('/^\\s*(?!"Usenet Index Post)(.+)\\s+\\((\\d+)\\/(\\d+)\\)/', $header['Subject'], $matches)) {
             // Add yEnc to subjects that do not have them, but have the part number at the end of the header.
             if (!stristr($header['Subject'], 'yEnc')) {
                 $matches[1] .= ' yEnc';
             }
         } else {
             if ($this->_showDroppedYEncParts === true && strpos($header['Subject'], '"Usenet Index Post') !== 0) {
                 file_put_contents(nZEDb_LOGS . 'not_yenc' . $groupMySQL['name'] . '.dropped.log', $header['Subject'] . PHP_EOL, FILE_APPEND);
             }
             $notYEnc++;
             continue;
         }
         // Filter subject based on black/white list.
         if ($this->isBlackListed($header, $groupMySQL['name'])) {
             $headersBlackListed++;
             continue;
         }
         if (!isset($header['Bytes'])) {
             $header['Bytes'] = isset($header[':bytes']) ? $header[':bytes'] : 0;
         }
         $header['Bytes'] = (int) $header['Bytes'];
         // Set up the info for inserting into parts/binaries/collections tables.
         if (!isset($articles[$matches[1]])) {
             // check whether file count should be ignored (XXX packs for now only).
             $whitelistMatch = false;
             if ($this->_ignoreFileCount($groupMySQL['name'], $matches[1])) {
                 $whitelistMatch = true;
                 $fileCount[1] = $fileCount[3] = 0;
             }
             // Attempt to find the file count. If it is not found, set it to 0.
             if (!$whitelistMatch && !preg_match('/[[(\\s](\\d{1,5})(\\/|[\\s_]of[\\s_]|-)(\\d{1,5})[])\\s$:]/i', $matches[1], $fileCount)) {
                 $fileCount[1] = $fileCount[3] = 0;
                 if ($this->_showDroppedYEncParts === true) {
                     file_put_contents(nZEDb_LOGS . 'no_files' . $groupMySQL['name'] . '.log', $header['Subject'] . PHP_EOL, FILE_APPEND);
                 }
             }
             // Used to group articles together when forming the release/nzb.
             $header['CollectionKey'] = $this->_collectionsCleaning->collectionsCleaner($matches[1], $groupMySQL['name']) . $header['From'] . $groupMySQL['id'] . $fileCount[3];
             if (!isset($collectionIDs[$header['CollectionKey']])) {
                 /* Date from header should be a string this format:
                  * 31 Mar 2014 15:36:04 GMT or 6 Oct 1998 04:38:40 -0500
                  * Still make sure it's not unix time, convert it to unix time if it is.
                  */
                 $header['Date'] = is_numeric($header['Date']) ? $header['Date'] : strtotime($header['Date']);
                 // Get the current unixtime from PHP.
                 $now = time();
                 $collectionID = $this->_pdo->queryInsert(sprintf("\n\t\t\t\t\t\t\tINSERT INTO %s (subject, fromname, date, xref, group_id,\n\t\t\t\t\t\t\t\ttotalfiles, collectionhash, dateadded)\n\t\t\t\t\t\t\tVALUES (%s, %s, FROM_UNIXTIME(%s), %s, %d, %d, '%s', NOW())\n\t\t\t\t\t\t\tON DUPLICATE KEY UPDATE dateadded = NOW(), noise = '%s'", $tableNames['cname'], $this->_pdo->escapeString(substr(utf8_encode($matches[1]), 0, 255)), $this->_pdo->escapeString(utf8_encode($header['From'])), is_numeric($header['Date']) ? $header['Date'] > $now ? $now : $header['Date'] : $now, $this->_pdo->escapeString(substr($header['Xref'], 0, 255)), $groupMySQL['id'], $fileCount[3], sha1($header['CollectionKey']), bin2hex(openssl_random_pseudo_bytes(16))));
                 if ($collectionID === false) {
                     if ($addToPartRepair) {
                         $headersNotInserted[] = $header['Number'];
                     }
                     $this->_pdo->Rollback();
                     $this->_pdo->beginTransaction();
                     continue;
                 }
                 $collectionIDs[$header['CollectionKey']] = $collectionID;
             } else {
                 $collectionID = $collectionIDs[$header['CollectionKey']];
             }
             $binaryID = $this->_pdo->queryInsert(sprintf("\n\t\t\t\t\t\tINSERT INTO %s (binaryhash, name, collection_id, totalparts, currentparts, filenumber, partsize)\n\t\t\t\t\t\tVALUES (UNHEX('%s'), %s, %d, %d, 1, %d, %d)\n\t\t\t\t\t\tON DUPLICATE KEY UPDATE currentparts = currentparts + 1, partsize = partsize + %d", $tableNames['bname'], md5($matches[1] . $header['From'] . $groupMySQL['id']), $this->_pdo->escapeString(utf8_encode($matches[1])), $collectionID, $matches[3], $fileCount[1], $header['Bytes'], $header['Bytes']));
             if ($binaryID === false) {
                 if ($addToPartRepair) {
                     $headersNotInserted[] = $header['Number'];
                 }
                 $this->_pdo->Rollback();
                 $this->_pdo->beginTransaction();
                 continue;
             }
             $binariesUpdate[$binaryID]['Size'] = 0;
             $binariesUpdate[$binaryID]['Parts'] = 0;
             $articles[$matches[1]]['CollectionID'] = $collectionID;
             $articles[$matches[1]]['BinaryID'] = $binaryID;
         } else {
             $binaryID = $articles[$matches[1]]['BinaryID'];
             $collectionID = $articles[$matches[1]]['CollectionID'];
             $binariesUpdate[$binaryID]['Size'] += $header['Bytes'];
             $binariesUpdate[$binaryID]['Parts']++;
         }
         // Strip the < and >, saves space in DB.
         $header['Message-ID'][0] = "'";
         $partsQuery .= '(' . $binaryID . ',' . $header['Number'] . ',' . rtrim($header['Message-ID'], '>') . "'," . $matches[2] . ',' . $header['Bytes'] . '),';
     }
     unset($headers);
     // Reclaim memory.
     // Start of inserting into SQL.
     $startUpdate = microtime(true);
     // End of processing headers.
     $timeCleaning = number_format($startUpdate - $startCleaning, 2);
     $binariesQuery = $binariesCheck = sprintf('INSERT INTO %s (id, partsize, currentparts) VALUES ', $tableNames['bname']);
     foreach ($binariesUpdate as $binaryID => $binary) {
         $binariesQuery .= '(' . $binaryID . ',' . $binary['Size'] . ',' . $binary['Parts'] . '),';
     }
     $binariesEnd = ' ON DUPLICATE KEY UPDATE partsize = VALUES(partsize) + partsize, currentparts = VALUES(currentparts) + currentparts';
     $binariesQuery = rtrim($binariesQuery, ',') . $binariesEnd;
     // Check if we got any binaries. If we did, try to insert them.
     if (strlen($binariesCheck . $binariesEnd) === strlen($binariesQuery) ? true : $this->_pdo->queryExec($binariesQuery)) {
         if ($this->_debug) {
             $this->_colorCLI->doEcho($this->_colorCLI->debug('Sending ' . round(strlen($partsQuery) / 1024, 2) . ' KB of parts to MySQL'));
         }
         if (strlen($partsQuery) === strlen($partsCheck) ? true : $this->_pdo->queryExec(rtrim($partsQuery, ','))) {
             $this->_pdo->Commit();
         } else {
             if ($addToPartRepair) {
                 $headersNotInserted += $headersReceived;
             }
             $this->_pdo->Rollback();
         }
     } else {
         if ($addToPartRepair) {
             $headersNotInserted += $headersReceived;
         }
         $this->_pdo->Rollback();
     }
     if ($this->_echoCLI && $partRepair === false) {
         $this->_colorCLI->doEcho($this->_colorCLI->primary('Received ' . count($headersReceived) . ' articles of ' . number_format($last - $first + 1) . ' requested, ' . $headersBlackListed . ' blacklisted, ' . $notYEnc . ' not yEnc.'));
     }
     // Start of part repair.
     $startPR = microtime(true);
     // End of inserting.
     $timeInsert = number_format($startPR - $startUpdate, 2);
     if ($partRepair && count($headersRepaired) > 0) {
         $this->removeRepairedParts($headersRepaired, $tableNames['prname'], $groupMySQL['id']);
     }
     if ($addToPartRepair) {
         $notInsertedCount = count($headersNotInserted);
         if ($notInsertedCount > 0) {
             $this->addMissingParts($headersNotInserted, $tableNames['prname'], $groupMySQL['id']);
             $this->log($notInsertedCount . ' articles failed to insert!', __FUNCTION__, Logger::LOG_WARNING, 'warning');
         }
         // Check if we have any missing headers.
         if ($last - $first - $notYEnc - $headersBlackListed + 1 > count($headersReceived)) {
             $rangeNotReceived = array_merge($rangeNotReceived, array_diff(range($first, $last), $headersReceived));
         }
         $notReceivedCount = count($rangeNotReceived);
         if ($notReceivedCount > 0) {
             $this->addMissingParts($rangeNotReceived, $tableNames['prname'], $groupMySQL['id']);
             if ($this->_echoCLI) {
                 $this->_colorCLI->doEcho($this->_colorCLI->alternate('Server did not return ' . $notReceivedCount . ' articles from ' . $groupMySQL['name'] . '.'), true);
             }
         }
     }
     $currentMicroTime = microtime(true);
     if ($this->_echoCLI) {
         $this->_colorCLI->doEcho($this->_colorCLI->alternateOver($timeHeaders . 's') . $this->_colorCLI->primaryOver(' to download articles, ') . $this->_colorCLI->alternateOver($timeCleaning . 's') . $this->_colorCLI->primaryOver(' to process collections, ') . $this->_colorCLI->alternateOver($timeInsert . 's') . $this->_colorCLI->primaryOver(' to insert binaries/parts, ') . $this->_colorCLI->alternateOver(number_format($currentMicroTime - $startPR, 2) . 's') . $this->_colorCLI->primaryOver(' for part repair, ') . $this->_colorCLI->alternateOver(number_format($currentMicroTime - $startLoop, 2) . 's') . $this->_colorCLI->primary(' total.'));
     }
     return $returnArray;
 }
Ejemplo n.º 2
0
    /**
     * Get all new comments from usenet.
     *
     * @access protected
     */
    protected function fetchAll()
    {
        // Get NNTP group data.
        $group = $this->nntp->selectGroup(self::group, false, true);
        // Check if there's an issue.
        if ($this->nntp->isError($group)) {
            return;
        }
        // Check if this is the first time, set our oldest article.
        if ($this->siteSettings['last_article'] == 0) {
            // If the user picked to start from the oldest, get the oldest.
            if ($this->siteSettings['start_position'] === true) {
                $this->siteSettings['last_article'] = $ourOldest = $group['first'];
                // Else get the newest.
            } else {
                $this->siteSettings['last_article'] = $ourOldest = (string) ($group['last'] - $this->siteSettings['max_download']);
                if ($ourOldest < $group['first']) {
                    $this->siteSettings['last_article'] = $ourOldest = $group['first'];
                }
            }
        } else {
            $ourOldest = (string) ($this->siteSettings['last_article'] + 1);
        }
        // Set our newest to our oldest wanted + max pull setting.
        $newest = (string) ($ourOldest + $this->siteSettings['max_pull']);
        // Check if our newest wanted is newer than the group's newest, set to group's newest.
        if ($newest >= $group['last']) {
            $newest = $group['last'];
        }
        // We have nothing to do, so return.
        if ($ourOldest > $newest) {
            return;
        }
        if (nZEDb_ECHOCLI) {
            echo '(Sharing) Starting to fetch new comments.' . PHP_EOL;
        }
        // Get the wanted aritcles
        $headers = $this->nntp->getOverview($ourOldest . '-' . $newest, true, false);
        // Check if we received nothing or there was an error.
        if ($this->nntp->isError($headers) || count($headers) === 0) {
            return;
        }
        $found = $total = $currentArticle = 0;
        // Loop over NNTP headers until we find comments.
        foreach ($headers as $header) {
            // Check if the article is missing.
            if (!isset($header['Number'])) {
                continue;
            }
            // Get the current article number.
            $currentArticle = $header['Number'];
            // Break out of the loop if we have downloaded more comments than the user wants.
            if ($found > $this->siteSettings['max_download']) {
                break;
            }
            $matches = [];
            //(_nZEDb_)nZEDb_533f16e46a5091.73152965_3d12d7c1169d468aaf50d5541ef02cc88f3ede10 - [1/1] "92ba694cebc4fbbd0d9ccabc8604c71b23af1131" (1/1) yEnc
            if ($header['From'] === '<*****@*****.**>' && preg_match('/^\\(_nZEDb_\\)(?P<site>.+?)_(?P<guid>[a-f0-9]{40}) - \\[1\\/1\\] "(?P<sid>[a-f0-9]{40})" yEnc \\(1\\/1\\)$/i', $header['Subject'], $matches)) {
                // Check if this is from our own site.
                if ($matches['guid'] === $this->siteSettings['site_guid']) {
                    continue;
                }
                // Check if we already have the comment.
                $check = $this->pdo->queryOneRow(sprintf('SELECT id FROM release_comments WHERE shareid = %s', $this->pdo->escapeString($matches['sid'])));
                // We don't have it, so insert it.
                if ($check === false) {
                    // Check if we have the site and if it is enabled.
                    $check = $this->pdo->queryOneRow(sprintf('SELECT enabled FROM sharing_sites WHERE site_guid = %s', $this->pdo->escapeString($matches['guid'])));
                    if ($check === false) {
                        // Check if the user has auto enable on.
                        if ($this->siteSettings['auto_enable'] === false) {
                            // Insert the site so the admin can enable it later on.
                            $this->pdo->queryExec(sprintf('
									INSERT INTO sharing_sites
									(site_name, site_guid, last_time, first_time, enabled, comments)
									VALUES (%s, %s, NOW(), NOW(), 0, 0)', $this->pdo->escapeString($matches['site']), $this->pdo->escapeString($matches['guid'])));
                            continue;
                        } else {
                            // Insert the site as enabled since the user has auto enabled on.
                            $this->pdo->queryExec(sprintf('
									INSERT INTO sharing_sites
									(site_name, site_guid, last_time, first_time, enabled, comments)
									VALUES (%s, %s, NOW(), NOW(), 1, 0)', $this->pdo->escapeString($matches['site']), $this->pdo->escapeString($matches['guid'])));
                        }
                    } else {
                        // The user has disabled this site, so continue.
                        if ($check['enabled'] == 0) {
                            continue;
                        }
                    }
                    // Insert the comment, if we got it, update the site to increment comment count.
                    if ($this->insertNewComment($header['Message-ID'], $matches['guid'])) {
                        $this->pdo->queryExec(sprintf('
								UPDATE sharing_sites SET comments = comments + 1, last_time = NOW(), site_name = %s WHERE site_guid = %s', $this->pdo->escapeString($matches['site']), $this->pdo->escapeString($matches['guid'])));
                        $found++;
                        if (nZEDb_ECHOCLI) {
                            echo '.';
                            if ($found % 40 == 0) {
                                echo '[' . $found . ']' . PHP_EOL;
                            }
                        }
                    }
                }
            }
            // Update once in a while in case the user cancels the script.
            if ($total++ % 10 == 0) {
                $this->siteSettings['lastarticle'] = $currentArticle;
                $this->pdo->queryExec(sprintf('UPDATE sharing SET last_article = %d', $currentArticle));
            }
        }
        if ($currentArticle > 0) {
            // Update sharing's last article number.
            $this->siteSettings['lastarticle'] = $currentArticle;
            $this->pdo->queryExec(sprintf('UPDATE sharing SET last_article = %d', $currentArticle));
        }
        if (nZEDb_ECHOCLI) {
            if ($found > 0) {
                echo PHP_EOL . '(Sharing) Fetched ' . $found . ' new comments.' . PHP_EOL;
            } else {
                echo '(Sharing) Finish looking for new comments, but did not find any.' . PHP_EOL;
            }
        }
    }