/** * Returns an Sevice_Nntp_Engine but tries to minimize * the amount of different objects and hence connections * which are created by issueing existing NNTP engines * when possible * * @returns Services_Nntp_Engine Instance of Services_NNTP_Engine */ public static function pool(Services_Settings_Container $settings, $type) { SpotDebug::msg(SpotDebug::DEBUG, __CLASS__ . '::pool:(' . $type . ') called'); if (isset(self::$_instances[$type])) { return self::$_instances[$type]; } # if /* * Make sure we have a valid NNTP configuration */ $settings_nntp_hdr = $settings->get('nntp_hdr'); if (empty($settings_nntp_hdr)) { throw new MissingNntpConfigurationException(); } # if /* * Retrieve the NNTP header settings we can validate those */ switch ($type) { case 'hdr': self::$_instances[$type] = new Services_Nntp_Engine($settings_nntp_hdr); break; case 'bin': $settings_nntp_bin = $settings->get('nntp_nzb'); if (empty($settings_nntp_bin['host'])) { self::$_instances[$type] = self::pool($settings, 'hdr'); } else { self::$_instances[$type] = new Services_Nntp_Engine($settings_nntp_bin); } # else break; # nzb # nzb case 'post': $settings_nntp_post = $settings->get('nntp_post'); if (empty($settings_nntp_post['host'])) { self::$_instances[$type] = self::pool($settings, 'hdr'); } else { self::$_instances[$type] = new Services_Nntp_Engine($settings_nntp_post); } # else break; # post # post default: throw new Exception("Unknown NNTP type engine (" . $type . ") for pool creation"); } # switch return self::$_instances[$type]; }
/** * Retrieves an uncached GET from the web * * @param $url string to retrieve * @param $lastModTime int Last modification time, can be null * @param int $redirTries Amount of tries already passed to follow a redirect * @return mixed array with first element the HTTP code, and second with the data (if any) */ public function perform($url, $lastModTime = null, $redirTries = 0) { SpotTiming::start(__CLASS__ . '::' . __FUNCTION__); /* * Default our effectiveUrl to be the current URL, * so this way we can always return the effectiveUrl */ $effectiveUrl = $url; $ch = curl_init(); curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0'); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5); curl_setopt($ch, CURLOPT_TIMEOUT, 15); curl_setopt($ch, CURLOPT_ENCODING, ''); // Don't use fail on error, because sometimes we do want to se // the output of the content // curl_setopt ($ch, CURLOPT_FAILONERROR, 1); // eg, if a site returns an 400 we might want to know why. curl_setopt($ch, CURLOPT_HEADER, 1); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); curl_setopt($ch, CURLINFO_HEADER_OUT, true); curl_setopt($ch, CURLOPT_VERBOSE, true); // send a cookie with the request if defined if ($this->getCookie() !== null) { curl_setopt($ch, CURLOPT_COOKIE, $this->getCookie()); } # if // Only use these curl options if no open base dir is set and php mode is off. $manualRedirect = false; if (ini_get('open_basedir') != '' || ini_get('safe_mode')) { $manualRedirect = true; curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); curl_setopt($ch, CURLOPT_MAXREDIRS, 1); } else { curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); } # else /* * If specified, pass authorization for this request */ $username = $this->getUsername(); if (!empty($username)) { curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_BASIC); curl_setopt($ch, CURLOPT_USERPWD, $this->getUsername() . ':' . $this->getPassword()); } // # if /* * OAuth 2.0 uses 'Bearer' authentication, we support this by manually sending the * HTTP header field */ $bearerAuth = $this->getBearerAuth(); if (!empty($bearerAuth)) { $this->addHttpHeaders(array('Authorization: Bearer ' . $this->getBearerAuth())); } # if /* * Should we be posting? */ if ($this->getMethod() == 'POST') { curl_setopt($ch, CURLOPT_POST, true); } # if /* * If we are passed fields to post to the server, actuall post them */ if (($this->getPostContent() != null || $this->getUploadFiles() != null || $this->getRawPostData() != null) && $this->getMethod() == 'POST') { $this->addPostFieldsToCurl($ch, $this->getPostContent(), $this->getUploadFiles(), $this->getRawPostData()); } # if /* * If we already have content stored in our cache, just ask * the server if the content is modified since our last * time this was stored in the cache */ if ($lastModTime != null && $lastModTime > 0) { curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE); curl_setopt($ch, CURLOPT_TIMEVALUE, $lastModTime); } # if /* * Send our custom HTTP headers */ $httpHeaders = $this->getHttpHeaders(); if (!empty($httpHeaders)) { curl_setopt($ch, CURLOPT_HTTPHEADER, $this->getHttpHeaders()); } # if $response = curl_exec($ch); $errorStr = curl_error($ch); /* * Curl returns false on some unspecified errors (eg: a timeout) */ if ($response !== false) { $curl_info = curl_getinfo($ch); $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE); /* * Server responded with 304 (Resource not modified) */ if ($http_code != 304) { $data = substr($response, $curl_info['header_size']); } else { $data = ''; } # else /* * We also follow redirects, but PHP's safemode doesn't allow * for redirects, so fix those as well. */ $effectiveUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); if (($effectiveUrl != $url || $http_code == 301 || $http_code == 302) && $manualRedirect) { if (preg_match('/Location:(.*?)\\n/', $response, $matches)) { $redirUrl = trim(array_pop($matches)); $redirTries++; if ($redirTries < 20) { return $this->perform($redirUrl, $lastModTime, $redirTries); } # if } # if } # if // Get the url. if (preg_match('/meta.+?http-equiv\\W+?refresh/i', $response)) { preg_match('/content.+?url\\W+?(.+?)\\"/i', $response, $matches); if (isset($matches[1])) { SpotDebug::msg(SpotDebug::DEBUG, __CLASS__ . '-perform(), matches[1]= ' . $matches[1]); /* * We can get either an relative redirect, or an fully * qualified redirect. Hideref, for example, uses an * relative direct. Look for those. * * parse_url() doesn't support relative url's, so we have * to do a guess ourselves. */ $redirUrl = $matches[1]; if (stripos($redirUrl, 'http://') !== 0 && stripos($redirUrl, 'https://') !== 0 && stripos($redirUrl, '//') !== 0) { SpotDebug::msg(SpotDebug::DEBUG, __CLASS__ . '->perform(), we have gotten an correct url'); $urlParts = parse_url($url); SpotDebug::msg(SpotDebug::DEBUG, __CLASS__ . '->perform(), parse_url: ' . json_encode($urlParts)); if ($redirUrl[0] == '/') { $redirUrl = $urlParts['scheme'] . '://' . $urlParts['host'] . $redirUrl; } else { if (!isset($urlParts['path'])) { $urlParts['path'] = ''; } // if $redirUrl = $urlParts['scheme'] . '://' . $urlParts['host'] . $urlParts['path'] . $redirUrl; } # if } # if SpotDebug::msg(SpotDebug::DEBUG, __CLASS__ . '->perform(), after metafresh, url = : ' . $url); $redirTries++; if ($redirTries < 20) { return $this->perform($redirUrl, $lastModTime, $redirTries); } # if } # if } # if } else { $http_code = 700; # Curl returned an error $curl_info = curl_getinfo($ch); $data = ''; } # else curl_close($ch); /* * Sometimes we get an HTTP error of 0 back, which * probably means a timeout or something, so fix up * the error string manually. */ if ($errorStr == '' && $http_code == 0) { $errorStr = 'unable to connect to URL: ' . $url; } # if SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__, array($url)); return array('http_code' => $http_code, 'data' => $data, 'finalurl' => $effectiveUrl, 'successful' => $http_code == 200 || $http_code == 304, 'errorstr' => 'http returncode: ' . $http_code . ' / ' . $errorStr, 'curl_info' => $curl_info); }
function process($hdrList, $curArtNr, $increment, $timer) { $this->displayStatus("progress", $curArtNr . " till " . $increment); $lastProcessedId = ''; $lastProcessedArtNr = 0; $commentDbList = array(); $fullCommentDbList = array(); /* * Determine the cutoff date (unixtimestamp) from whereon we do not want to * load the spots */ if ($this->_settings->get('retention') > 0) { $retentionStamp = time() - $this->_settings->get('retention') * 24 * 60 * 60; } else { $retentionStamp = 0; } # else /** * We ask the database to match our messageid's we just retrieved with * the list of id's we have just retrieved from the server */ SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':matchCommentMessageIds'); $dbIdList = $this->_commentDao->matchCommentMessageIds($hdrList); SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':matchCommentMessageIds'); /* * We keep a seperate list of messageid's for updating the amount of * comments each spot. */ $spotMsgIdList = array(); /* * and a different list for comments with a rating, this way we wont * calculcate the rating for a spot when a comments has no rating */ $spotMsgIdRatingList = array(); # Process each header SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-hdrList'); foreach ($hdrList as $msgheader) { SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop: iter-start'); # Reset timelimit set_time_limit(120); # strip the <>'s from the reference $commentId = $msgheader['Message-ID']; $artNr = $msgheader['Number']; SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop: processing: ' . $commentId . ', artNr=' . $artNr); /* * We prepare some variables to we don't have to perform an array * lookup for each check and the code is easier to read. */ $header_isInDb = isset($dbIdList['comment'][$commentId]); $fullcomment_isInDb = isset($dbIdList['fullcomment'][$commentId]); SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop: headerIsInDb: ' . (int) $header_isInDb . ', fullComment=' . (int) $fullcomment_isInDb . ', retrieveFull= ' . (int) $this->_retrieveFull); /* * Do we have the comment in the database already? If not, lets process it */ if (!$header_isInDb || !$fullcomment_isInDb && $this->_retrieveFull) { SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-hdrList->retrieveFull'); /* * Because not all usenet servers pass the reference field properly, * we manually create this reference field by using the messageid of * the comment */ $msgIdParts = explode(".", $commentId); $msgheader['References'] = $msgIdParts[0] . substr($commentId, strpos($commentId, '@')); $msgheader['stamp'] = strtotime($msgheader['Date']); SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop: msgHeader=' . serialize($msgheader)); /* * Don't add older comments than specified for the retention stamp */ if ($retentionStamp > 0 && $msgheader['stamp'] < $retentionStamp && $this->_settings->get('retentiontype') == 'everything') { continue; } # if if ($msgheader['stamp'] < $this->_settings->get('retrieve_newer_than')) { continue; } # if /* * Newer kind of comments contain a rating, if we think this comment * is such a comment, extract the rating */ if (count($msgIdParts) == 5) { $msgheader['rating'] = (int) $msgIdParts[1]; /* * Some older comments contain an non-numeric string * on this position. Make sure this is an number else * reset to zero (no rating given) */ if (!is_numeric($msgIdParts[1])) { $msgheader['rating'] = 0; } # if } else { $msgheader['rating'] = 0; } # if /* * Determine whether we need to add the header to the database * and extract the required fields */ if (!$header_isInDb) { $commentDbList[] = array('messageid' => $commentId, 'nntpref' => $msgheader['References'], 'stamp' => $msgheader['stamp'], 'rating' => $msgheader['rating']); /* * Some buggy NNTP servers give us the same messageid * in one XOVER statement, hence we update the list of * messageid's we already have retrieved and are ready * to be added to the database */ $dbIdList['comment'][$commentId] = 1; $spotMsgIdList[$msgheader['References']] = 1; /* * If this comment contains a rating, mark the spot to * have it's rating be recalculated */ if ($msgheader['rating'] >= 1 && $msgheader['rating'] <= 10) { $spotMsgIdRatingList[$msgheader['References']] = 1; } # if $header_isInDb = true; $lastProcessedId = $commentId; $lastProcessedArtNr = $artNr; $didFetchHeader = true; } # if SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-hdrList->retrieveFull'); } else { $lastProcessedId = $commentId; $lastProcessedArtNr = $artNr; } # else /* * We don't want to retrieve the full comment body if we don't have the header * in the database. Because we try to add headers in the above code we just have * to check if the header is in the database. * * We cannot collapse this code with the header fetching code because we want to * be able to add the full body to a system after all the headers are retrieved */ if ($header_isInDb && !$fullcomment_isInDb) { /* * Don't add older fullcomments than specified for the retention stamp */ if ($retentionStamp > 0 && strtotime($msgheader['Date']) < $retentionStamp) { continue; } # if if ($this->_retrieveFull) { try { SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop: readFullComment start:' . $commentId); $fullComment = $this->_svcNntpTextReading->readComments(array(array('messageid' => $commentId))); SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop: readFullComment finished:' . $commentId); # Add this comment to the datbase and mark it as such $fullCommentDbList[] = $fullComment; $fullcomment_isInDb = true; /* * Some buggy NNTP servers give us the same messageid * in one XOVER statement, hence we update the list of * messageid's we already have retrieved and are ready * to be added to the database */ $dbIdList['fullcomment'][$commentId] = 1; } catch (ParseSpotXmlException $x) { # swallow error } catch (Exception $x) { /** * Sometimes we get an 'No such article' error for a header we just retrieved, * if we want to retrieve the full article. This is messed up, but let's just * swallow the error */ if ($x->getCode() == 430) { /* * Reset error count, so other errors are actually re-tried */ $this->_svcNntpText->resetErrorCount(); $this->_svcNntpBin->resetErrorCount(); } elseif ($x->getMessage() == 'String could not be parsed as XML') { } else { throw $x; } # else } # catch } # if retrievefull } # if fullcomment is not in db yet SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop: iter-stop'); } # foreach SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-hdrList'); SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop: done'); if (count($hdrList) > 0) { $this->displayStatus("loopcount", count($hdrList)); } else { $this->displayStatus("loopcount", 0); } # else $this->displayStatus("timer", round(microtime(true) - $timer, 2)); /* * Add the comments to the database and update the last article * number found */ $fullComments = array(); while ($fullComment = array_shift($fullCommentDbList)) { $fullComments = array_merge($fullComments, $fullComment); } # while SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':addComments() call'); $this->_commentDao->addComments($commentDbList, $fullComments); SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':addComments() call'); # update the maximum article id if (!empty($lastProcessedId) && $lastProcessedArtNr > 0) { $this->_usenetStateDao->setMaxArticleId(Dao_UsenetState::State_Comments, $lastProcessedArtNr, $lastProcessedId); } # if /* * Recalculate the average spotrating and update the amount * of unverified comments */ SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':updateSpotRating()'); $this->_spotDao->updateSpotRating($spotMsgIdRatingList); SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':updateSpotRating()'); SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':updateSpotCommentCount()'); $this->_spotDao->updateSpotCommentCount($spotMsgIdList); SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':updateSpotCommentCount()'); return array('count' => count($hdrList), 'headercount' => count($hdrList), 'lastmsgid' => $lastProcessedId); }
/** * * Execute a POST to the given url and return the body. * @param String $url * @param array $postdata * @return bool|mixed */ protected function postAndDownloadNzb($url, array $postdata) { // Initialize download retrieval class $svcHttp = new Services_Providers_Http($this->_cacheDao); $svcHttp->setPostContent($postdata); $svcHttp->setMethod('POST'); $result = $svcHttp->perform($url); // Check if any error occured if (!$result['successful']) { SpotDebug::msg(SpotDebug::DEBUG, __CLASS__ . '->postAndDownloadNzb(), not succesful=' . $result['errorstr']); return false; } # if // Load the body into simplexml. // If the xml is well formed this will result in true thus returning the xml. // Suppress errors if the string is not well formed, where testing here. if (@simplexml_load_string($result['data'])) { return $result['data']; } else { return false; } # else }
function process($hdrList, $curArtNr, $increment, $timer) { $this->displayStatus("progress", $curArtNr . " till " . $increment); $signedCount = 0; $hdrsParsed = 0; $fullsRetrieved = 0; $invalidCount = 0; $msgCounter = 0; $modCount = 0; $headerInDbCount = 0; $skipCount = 0; $lastProcessedId = ''; $lastProcessedArtNr = 0; $fullSpotDbList = array(); $spotDbList = array(); $moderationList = array(); $processingStartTime = time(); /* * Determine the cutoff date (unixtimestamp) from whereon we do not want to * load the spots */ if ($this->_settings->get('retention') > 0) { $retentionStamp = time() - $this->_settings->get('retention') * 24 * 60 * 60; } else { $retentionStamp = 0; } # else SpotDebug::msg(SpotDebug::DEBUG, 'retentionStamp=' . $retentionStamp); SpotDebug::msg(SpotDebug::TRACE, 'hdrList=' . serialize($hdrList)); /** * We ask the database to match our messageid's we just retrieved with * the list of id's we have just retrieved from the server */ SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':matchSpotMessageIds'); $dbIdList = $this->_spotDao->matchSpotMessageIds($hdrList); SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':matchSpotMessageIds'); SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':getMassCacheRecords'); $cachedIdList = $this->_cacheDao->getMassCacheRecords($hdrList); SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':getMassCacheRecords'); SpotDebug::msg(SpotDebug::TRACE, 'dbIdList=' . serialize($dbIdList)); /* * We get a list of spots which have been blacklisted before, * we do this because when the 'buggy' flag is set, we else keep * retrieving the same spots, nzb's and images over and over again */ $preModdedList = $this->_modListDao->matchAgainst($hdrList); SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach'); foreach ($hdrList as $msgheader) { SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader'); $msgCounter++; SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop, start. msgId= ' . $msgCounter); /* * Keep te usenet server alive when processing is slow. */ if ($processingStartTime - time() > 30) { $this->_svcNntpText->sendNoop(); $this->_svcNntpBin->sendNoop(); $processingStartTime = time(); } # if /* * We keep track whether we actually fetched this header and fullspot * to add it to the database, because only then we can update the * title from the spots title or rely on our database to fetch * the fullspot */ $didFetchHeader = false; $didFetchFullSpot = false; # Reset timelimit set_time_limit(120); # messageid to check $msgId = $msgheader['Message-ID']; $artNr = $msgheader['Number']; /* * If this message was already deleted in a previous run, * les not even consider it */ if (isset($preModdedList[$msgId])) { SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader'); $skipCount++; continue; } # if /* * We prepare some variables to we don't have to perform an array * lookup for each check and the code is easier to read. */ $header_isInDb = isset($dbIdList['spot'][$msgId]); $fullspot_isInDb = isset($dbIdList['fullspot'][$msgId]); /* * If the spotheader is not yet added to the database, parse the header * information. * * If the header is present, but we don't have the fullspot yet or we are * running in 'retro' mode, parse the header as well because some fields * are only in the header and not in the full. * * We need some of those fields (for example KeyID) */ if (!$header_isInDb || (!$fullspot_isInDb || $this->_retro) && $this->_retrieveFull) { $hdrsParsed++; SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, parsingXover, start. msgId= ' . $msgCounter); SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':parseHeader'); $spot = $this->_svcSpotParser->parseHeader($msgheader['Subject'], $msgheader['From'], $msgheader['Date'], $msgheader['Message-ID'], $this->_rsakeys); SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':parseHeader'); SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, parsingXover, done. msgId= ' . $msgCounter); /* * When a parse error occurred, we ignore the spot, also unverified * spots are ignored */ if ($spot === false || !$spot['verified']) { SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader'); $invalidCount++; continue; } # if /* * Special moderator commands always have keyid 2 */ if ($spot['keyid'] == 2) { $commandAr = explode(' ', $spot['title']); $validCommands = array('delete', 'dispose', 'remove'); # is this one of the defined valid commands? if (in_array(strtolower($commandAr[0]), $validCommands) !== false) { $moderationList[$commandAr[1]] = 1; $modCount++; } # if } else { /* * Don't add spots older than specified for the retention stamp */ if ($retentionStamp > 0 && $spot['stamp'] < $retentionStamp && $this->_settings->get('retentiontype') == 'everything') { SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader'); $skipCount++; continue; } elseif ($spot['stamp'] < $this->_settings->get('retrieve_newer_than')) { $skipCount++; } else { /* * Do we have the header in the database? If not, lets add it */ if (!$header_isInDb) { $spotDbList[] = $spot; /* * Some buggy NNTP servers give us the same messageid * in one XOVER statement, hence we update the list of * messageid's we already have retrieved and are ready * to be added to the database */ $dbIdList['spot'][$msgId] = 1; $header_isInDb = true; $lastProcessedId = $msgId; $lastProcessedArtNr = $artNr; $didFetchHeader = true; if ($spot['wassigned']) { $signedCount++; } # if } # if } # if } # else } else { $lastProcessedId = $msgId; $lastProcessedArtNr = $artNr; $headerInDbCount++; } # else SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getFullSpot'); /* * We don't want to retrieve the fullspot if we don't have the header * in the database. Because we try to add headers in the above code we just have * to check if the header is in the database. * * We cannot collapse this code with the header fetching code because we want to * be able to add the fullspot to a system after all the headers are retrieved */ if ($header_isInDb && !$fullspot_isInDb) { /* * Don't add older fullspots than specified for the retention stamp */ if ($retentionStamp > 0 && strtotime($msgheader['Date']) < $retentionStamp) { SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader'); SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getFullSpot'); continue; } # if if ($this->_retrieveFull) { $fullSpot = array(); try { $fullsRetrieved++; SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getFullSpot, start. msgId= ' . $msgId); $fullSpot = $this->_svcNntpTextReading->readFullSpot($msgId); SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getFullSpot, done. msgId= ' . $msgId); # did we fail to parse the spot? if so, skip this one if (empty($fullSpot)) { $invalidCount++; continue; } // if # add this spot to the database $fullSpotDbList[] = $fullSpot; $fullspot_isInDb = true; $didFetchFullSpot = true; /* * Some buggy NNTP servers give us the same messageid * in the same XOVER statement, hence we update the list of * messageid's we already have retrieved and are ready * to be added to the database */ $dbIdList['fullspot'][$msgId] = 1; /* * Overwrite the spots' title because the fullspot contains the title in * UTF-8 format. * We also overwrite the spotterid from the spotsfull because the spotterid * is only in the header in more recent spots. */ if ($didFetchHeader) { $spotDbList[count($spotDbList) - 1]['title'] = $fullSpot['title']; $spotDbList[count($spotDbList) - 1]['spotterid'] = $fullSpot['spotterid']; } # if } catch (ParseSpotXmlException $x) { # swallow error } catch (Exception $x) { /** * Sometimes we get an 'No such article' error for a header we just retrieved, * if we want to retrieve the full article. This is messed up, but let's just * swallow the error */ if ($x->getCode() == 430) { /* * Reset error count, so other errors are actually re-tried */ $this->_svcNntpText->resetErrorCount(); $this->_svcNntpBin->resetErrorCount(); } elseif ($x->getMessage() == 'String could not be parsed as XML') { } else { throw $x; } # else } # catch } # if retrievefull } # if fullspot is not in db yet SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getFullSpot'); SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getNzbOrImage'); /* * If both the image and the NZB file are already in the cache, * or we are set to not prefetch them, don't bother to retrieve * the full spot either from the database */ $needPrefetch = $this->_prefetch_image || $this->_prefetch_nzb; if (!$this->_retrieveFull || !$header_isInDb) { $needPrefetch = false; } # if if ($needPrefetch) { $needPrefetch = !isset($cachedIdList[Dao_Cache::SpotImage][$msgId]) || !isset($cachedIdList[Dao_Cache::SpotNzb][$msgId]); } # if if ($needPrefetch) { try { /* * If we are running in 'retro' mode, it is possible both the header and spot are in the * database already, however -- we need the information from the fullspot so we retrieve it * again */ if (!$didFetchFullSpot) { SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':daoGetFullSpot'); $fullSpot = $this->_spotDao->getFullSpot($msgId, SPOTWEB_ANONYMOUS_USERID); SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':retrieveParseFullSpot'); $fullSpot = array_merge($this->_svcSpotParser->parseFull($fullSpot['fullxml']), $fullSpot); SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':retrieveParseFullSpot', array()); SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':daoGetFullSpot'); } # if /* * Prefetch (cache) the spots' image */ SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getImage'); if ($this->_prefetch_image) { SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getImage(), start. msgId= ' . $msgId); if (!isset($cachedIdList[Dao_Cache::SpotImage][$fullSpot['messageid']])) { $this->_svcProvImage->fetchSpotImage($fullSpot); } # if SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getImage(), done. msgId= ' . $msgId); } # if SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getImage'); SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getNzb'); /* * Prefetch (cache) the spots' NZB file */ if ($this->_prefetch_nzb) { /* * Only do so if we can expect an NZB file */ if (!empty($fullSpot['nzb']) && $fullSpot['stamp'] > 1290578400) { SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getNzb(), start. msgId= ' . $msgId); if (!isset($cachedIdList[Dao_Cache::SpotNzb][$fullSpot['messageid']])) { $this->_svcProvNzb->fetchNzb($fullSpot); } # if SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getNzb(), done. msgId= ' . $msgId); } # if } # if SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getNzb'); } catch (ParseSpotXmlException $x) { # swallow error } catch (Exception $x) { SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':retrieveParseFullSpot', array()); SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':daoGetFullSpot'); /** * Sometimes we get an 'No such article' error for a header we just retrieved, * if we want to retrieve the full article. This is messed up, but let's just * swallow the error */ if ($x->getCode() == 430) { /* * Reset error count, so other errors are actually re-tried */ $this->_svcNntpText->resetErrorCount(); $this->_svcNntpBin->resetErrorCount(); } elseif ($x->getMessage() == 'String could not be parsed as XML') { } else { throw $x; } # else } # catch } # if prefetch image and/or nzb SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getNzbOrImage'); SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader'); /* * If we are under memory pressure, flush the cache to disk in advance so we * can free up memory. This is slower, but might avoid ballooning memory. */ if ($this->hasMemoryPressure()) { SpotDebug::msg(SpotDebug::DEBUG, 'we are under memory pressure, flushing to disk'); echo "We are under memory pressure... "; $this->_spotDao->addSpots($spotDbList, $fullSpotDbList); $spotDbList = array(); $fullSpotDbList = array(); } // if SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop, done. msgId= ' . $msgCounter); } # foreach SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach'); if (count($hdrList) > 0) { $this->displayStatus("hdrparsed", $hdrsParsed); $this->displayStatus("hdrindbcount", $headerInDbCount); $this->displayStatus("verified", $signedCount); $this->displayStatus("invalidcount", $invalidCount); $this->displayStatus("skipcount", $skipCount); $this->displayStatus("modcount", $modCount); $this->displayStatus("fullretrieved", $fullsRetrieved); $this->displayStatus("loopcount", count($hdrList)); } else { $this->displayStatus("hdrparsed", 0); $this->displayStatus("hdrindbcount", 0); $this->displayStatus("verified", 0); $this->displayStatus("invalidcount", 0); $this->displayStatus("skipcount", 0); $this->displayStatus("modcount", 0); $this->displayStatus("fullretrieved", 0); $this->displayStatus("loopcount", 0); } # else /* * Add the spots to the database and update the last article * number found */ $this->_spotDao->addSpots($spotDbList, $fullSpotDbList); SpotDebug::msg(SpotDebug::TRACE, 'added Spots, spotDbList=' . serialize($spotDbList)); SpotDebug::msg(SpotDebug::TRACE, 'added Spots, fullSpotDbList=' . serialize($fullSpotDbList)); /* * Actually act on the moderation settings. We cannot process this inline * because a spot can be added and moderated within the same iteration */ switch ($this->_settings->get('spot_moderation')) { case 'disable': break; case 'markspot': $this->_commentDao->markCommentsModerated($moderationList); $this->_spotDao->markSpotsModerated($moderationList); break; # case 'markspot' # case 'markspot' default: $this->_spotDao->removeSpots($moderationList); $this->_commentDao->removeComments($moderationList); /* * If the spots actually get removed, we want to make * sure we write the deleted spots down. This prevents * us from retrieving and deleting them over and over again */ $this->_modListDao->addToRingBuffer($moderationList); break; # default } # switch # update the maximum article id if (!empty($lastProcessedId) && $lastProcessedArtNr > 0) { $this->_usenetStateDao->setMaxArticleId(Dao_UsenetState::State_Spots, $lastProcessedArtNr, $lastProcessedId); } # if SpotDebug::msg(SpotDebug::DEBUG, 'loop finished, setMaxArticleId=' . serialize($increment)); /* * And remove old list of moderated spots */ $this->_modListDao->deleteOldest(); $this->displayStatus("timer", round(microtime(true) - $timer, 2)); return array('count' => count($hdrList), 'headercount' => $hdrsParsed, 'lastmsgid' => $lastProcessedId); }
public function validateServer() { SpotDebug::msg(SpotDebug::TRACE, __CLASS__ . "->validateServer()"); /* * We need to select a group, because authentication * is not always entered but sometimes required */ $this->selectGroup('free.pt'); $this->quit(); }
function loopTillEnd($curArticleNr, $increment = 1000) { $processed = 0; $headersProcessed = 0; $highestMessageId = ''; # make sure we handle articlenumber wrap arounds if ($curArticleNr < $this->_msgdata['first']) { $curArticleNr = $this->_msgdata['first']; } # if $this->displayStatus("groupmessagecount", $this->_msgdata['last'] - $this->_msgdata['first']); $this->displayStatus("firstmsg", $this->_msgdata['first']); $this->displayStatus("lastmsg", $this->_msgdata['last']); $this->displayStatus("curartnr", $curArticleNr); $this->displayStatus("", ""); SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':whileLoop'); while ($curArticleNr < $this->_msgdata['last']) { $timer = microtime(true); # get the list of headers (XOVER) SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':getOverview'); $hdrList = $this->_svcNntpText->getOverview($curArticleNr, $curArticleNr + $increment); SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':getOverview'); $saveCurArtNr = $curArticleNr; # If no spots were found, just manually increase the # messagenumber with the increment to make sure we advance if (count($hdrList) < 1 || $hdrList[count($hdrList) - 1]['Number'] < $curArticleNr) { $curArticleNr += $increment; } else { $curArticleNr = $hdrList[count($hdrList) - 1]['Number'] + 1; } # else # run the processing method SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':callProcess'); $processOutput = $this->process($hdrList, $saveCurArtNr, $curArticleNr, $timer); SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':callProcess'); $processed += $processOutput['count']; $headersProcessed += $processOutput['headercount']; $highestMessageId = $processOutput['lastmsgid']; # reset the start time to prevent a another retriever from starting # during the intial retrieve which can take many hours $this->_usenetStateDao->setRetrieverRunning(true); /* * Make sure if we run with timing on, we do not fetch too many * spots as that would make us run out of memory */ if ($processed > 3000 && SpotTiming::isEnabled()) { break; } # if } # while SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':whileLoop'); # we are done updating, make sure that if the newsserver deleted # earlier retrieved messages, we remove them from our database if ($highestMessageId != '') { SpotDebug::msg(SpotDebug::DEBUG, 'loopTillEnd() finished, highestMessageId = ' . $highestMessageId); $this->removeTooNewRecords($highestMessageId); } # if $this->displayStatus("totalprocessed", $processed); return $headersProcessed; }