コード例 #1
0
 function fetchFullSpot($msgId, $ourUserId)
 {
     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__);
     /*
      * First try the database for the spot, because if it
      * is already cached in the database we don't need
      * anything else
      */
     $fullSpot = $this->_spotDao->getFullSpot($msgId, $ourUserId);
     if (empty($fullSpot)) {
         /*
          * When we retrieve a fullspot entry but there is no spot entry the join in our DB query
          * causes us to never get the spot, hence we throw this exception
          */
         $spotHeader = $this->_spotDao->getSpotHeader($msgId);
         if (empty($spotHeader)) {
             throw new Exception("Spot is not in our Spotweb database");
         }
         # if
         /*
          * Retrieve a full loaded spot from the NNTP server
          */
         $newFullSpot = $this->_nntpSpotReading->readFullSpot($msgId);
         if (!empty($newFullSpot)) {
             $this->_spotDao->addFullSpots(array($newFullSpot));
         } else {
             SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__, array($msgId, $ourUserId, $fullSpot));
             return false;
         }
         // else
         /*
          * If the current spotterid is empty, we probably now have a spotterid because 
          * we have the fullspot.
          * 
          * We now update the 'basic' spot information, like the spotterid but also the 
          * title. This is necessary because the XML contains better encoding.
          *
          * For example take the title from spot bdZZdJ3gPxTAmSE@spot.net.
          *
          * We cannot use all information from the XML because because some information just
          * isn't present in the XML file
          */
         $this->_spotDao->updateSpotInfoFromFull($newFullSpot);
         /*
          * We ask our DB to retrieve the fullspot again, this ensures
          * us all information is present and in always the same format
          */
         $fullSpot = $this->_spotDao->getFullSpot($msgId, $ourUserId);
     }
     # if
     /*
      * We always have to parse the full spot because the database
      * does not contain all information
      */
     $spotParser = new Services_Format_Parsing();
     $parsedXml = $spotParser->parseFull($fullSpot['fullxml']);
     $fullSpot = array_merge($parsedXml, $fullSpot);
     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__, array($msgId, $ourUserId, $fullSpot));
     return $fullSpot;
 }
コード例 #2
0
 public function updateSpot($messageId, $fullSpotXml)
 {
     # parse the fullspot xml
     $svcFmtParsing = new Services_Format_Parsing();
     $updatedFullSpot = $svcFmtParsing->parseFull($fullSpotXml);
     /*
      * add the message id and updated fullspot xml because they are not added
      * to the spot when parsing the updated fullspot xml
      */
     $updatedFullSpot['messageid'] = $messageId;
     $updatedFullSpot['fullxml'] = $fullSpotXml;
     # finally store the updated spot in the database
     $daoSpot = $this->_daoFactory->getSpotDao();
     $daoSpot->updateSpot($updatedFullSpot, $this->_currentSession['user']['username']);
 }
コード例 #3
0
 public function readFullSpot($msgId)
 {
     # initialize some variables
     $spotSigning = Services_Signing_Base::factory();
     $spot = array('fullxml' => '', 'user-signature' => '', 'user-key' => '', 'verified' => false, 'messageid' => $msgId, 'spotterid' => '', 'xml-signature' => '', 'moderated' => 0, 'user-avatar' => '', 'newsreader' => '');
     /* 
      * Retrieve the header of the given spot 
      */
     $header = $this->_nntpEngine->getHeader('<' . $msgId . '>');
     $spot = array_merge($spot, $this->parseHeader($header, $spot));
     /*
      * Validate the XML signature of the spot
      */
     $spot['verified'] = $spotSigning->verifyFullSpot($spot);
     /*
      * if the spot is verified, lets calculate the spotterid as well
      * so the user can safely store it in the database
      */
     if ($spot['verified']) {
         $spot['spotterid'] = $this->_spotParseUtil->calculateSpotterId($spot['user-key']['modulo']);
     }
     # if
     /*
      * Some spots are very large bcause they are spammy. if so, we skip them to
      * prevent memory and database issues
      */
     if (strlen($spot['fullxml']) > 1024 * 50) {
         return false;
     }
     # if
     /*
      * Parse the XML structure of the spot, technically not necessary
      */
     $spotParser = new Services_Format_Parsing();
     $spot = array_merge($spotParser->parseFull($spot['fullxml']), $spot);
     return $spot;
 }
コード例 #4
0
 function process($hdrList, $curArtNr, $increment, $timer)
 {
     $this->displayStatus("progress", $curArtNr . " till " . $increment);
     $signedCount = 0;
     $hdrsParsed = 0;
     $fullsRetrieved = 0;
     $invalidCount = 0;
     $msgCounter = 0;
     $modCount = 0;
     $headerInDbCount = 0;
     $skipCount = 0;
     $lastProcessedId = '';
     $lastProcessedArtNr = 0;
     $fullSpotDbList = array();
     $spotDbList = array();
     $moderationList = array();
     $processingStartTime = time();
     /*
      * Determine the cutoff date (unixtimestamp) from whereon we do not want to 
      * load the spots
      */
     if ($this->_settings->get('retention') > 0) {
         $retentionStamp = time() - $this->_settings->get('retention') * 24 * 60 * 60;
     } else {
         $retentionStamp = 0;
     }
     # else
     SpotDebug::msg(SpotDebug::DEBUG, 'retentionStamp=' . $retentionStamp);
     SpotDebug::msg(SpotDebug::TRACE, 'hdrList=' . serialize($hdrList));
     /**
      * We ask the database to match our messageid's we just retrieved with
      * the list of id's we have just retrieved from the server
      */
     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':matchSpotMessageIds');
     $dbIdList = $this->_spotDao->matchSpotMessageIds($hdrList);
     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':matchSpotMessageIds');
     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':getMassCacheRecords');
     $cachedIdList = $this->_cacheDao->getMassCacheRecords($hdrList);
     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':getMassCacheRecords');
     SpotDebug::msg(SpotDebug::TRACE, 'dbIdList=' . serialize($dbIdList));
     /*
      * We get a list of spots which have been blacklisted before,
      * we do this because when the 'buggy' flag is set, we else keep
      * retrieving the same spots, nzb's and images over and over again
      */
     $preModdedList = $this->_modListDao->matchAgainst($hdrList);
     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach');
     foreach ($hdrList as $msgheader) {
         SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader');
         $msgCounter++;
         SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop, start. msgId= ' . $msgCounter);
         /* 
          * Keep te usenet server alive when processing is slow.
          */
         if ($processingStartTime - time() > 30) {
             $this->_svcNntpText->sendNoop();
             $this->_svcNntpBin->sendNoop();
             $processingStartTime = time();
         }
         # if
         /*
          * We keep track whether we actually fetched this header and fullspot
          * to add it to the database, because only then we can update the
          * title from the spots title or rely on our database to fetch
          * the fullspot
          */
         $didFetchHeader = false;
         $didFetchFullSpot = false;
         # Reset timelimit
         set_time_limit(120);
         # messageid to check
         $msgId = $msgheader['Message-ID'];
         $artNr = $msgheader['Number'];
         /*
          * If this message was already deleted in a previous run,
          * les not even consider it
          */
         if (isset($preModdedList[$msgId])) {
             SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader');
             $skipCount++;
             continue;
         }
         # if
         /*
          * We prepare some variables to we don't have to perform an array
          * lookup for each check and the code is easier to read.
          */
         $header_isInDb = isset($dbIdList['spot'][$msgId]);
         $fullspot_isInDb = isset($dbIdList['fullspot'][$msgId]);
         /*
          * If the spotheader is not yet added to the database, parse the header
          * information.
          *
          * If the header is present, but we don't have the fullspot yet or we are
          * running in 'retro' mode, parse the header as well because some fields
          * are only in the header and not in the full.
          * 
          * We need some of those fields (for example KeyID)
          */
         if (!$header_isInDb || (!$fullspot_isInDb || $this->_retro) && $this->_retrieveFull) {
             $hdrsParsed++;
             SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, parsingXover, start. msgId= ' . $msgCounter);
             SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':parseHeader');
             $spot = $this->_svcSpotParser->parseHeader($msgheader['Subject'], $msgheader['From'], $msgheader['Date'], $msgheader['Message-ID'], $this->_rsakeys);
             SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':parseHeader');
             SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, parsingXover, done. msgId= ' . $msgCounter);
             /*
              * When a parse error occurred, we ignore the spot, also unverified
              * spots are ignored
              */
             if ($spot === false || !$spot['verified']) {
                 SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader');
                 $invalidCount++;
                 continue;
             }
             # if
             /*
              * Special moderator commands always have keyid 2
              */
             if ($spot['keyid'] == 2) {
                 $commandAr = explode(' ', $spot['title']);
                 $validCommands = array('delete', 'dispose', 'remove');
                 # is this one of the defined valid commands?
                 if (in_array(strtolower($commandAr[0]), $validCommands) !== false) {
                     $moderationList[$commandAr[1]] = 1;
                     $modCount++;
                 }
                 # if
             } else {
                 /*
                  * Don't add spots older than specified for the retention stamp
                  */
                 if ($retentionStamp > 0 && $spot['stamp'] < $retentionStamp && $this->_settings->get('retentiontype') == 'everything') {
                     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader');
                     $skipCount++;
                     continue;
                 } elseif ($spot['stamp'] < $this->_settings->get('retrieve_newer_than')) {
                     $skipCount++;
                 } else {
                     /*
                      * Do we have the header in the database? If not, lets add it
                      */
                     if (!$header_isInDb) {
                         $spotDbList[] = $spot;
                         /*
                          * Some buggy NNTP servers give us the same messageid
                          * in one XOVER statement, hence we update the list of
                          * messageid's we already have retrieved and are ready
                          * to be added to the database
                          */
                         $dbIdList['spot'][$msgId] = 1;
                         $header_isInDb = true;
                         $lastProcessedId = $msgId;
                         $lastProcessedArtNr = $artNr;
                         $didFetchHeader = true;
                         if ($spot['wassigned']) {
                             $signedCount++;
                         }
                         # if
                     }
                     # if
                 }
                 # if
             }
             # else
         } else {
             $lastProcessedId = $msgId;
             $lastProcessedArtNr = $artNr;
             $headerInDbCount++;
         }
         # else
         SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getFullSpot');
         /*
          * We don't want to retrieve the fullspot if we don't have the header
          * in the database. Because we try to add headers in the above code we just have
          * to check if the header is in the database.
          *
          * We cannot collapse this code with the header fetching code because we want to
          * be able to add the fullspot to a system after all the headers are retrieved
          */
         if ($header_isInDb && !$fullspot_isInDb) {
             /*
              * Don't add older fullspots than specified for the retention stamp
              */
             if ($retentionStamp > 0 && strtotime($msgheader['Date']) < $retentionStamp) {
                 SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader');
                 SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getFullSpot');
                 continue;
             }
             # if
             if ($this->_retrieveFull) {
                 $fullSpot = array();
                 try {
                     $fullsRetrieved++;
                     SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getFullSpot, start. msgId= ' . $msgId);
                     $fullSpot = $this->_svcNntpTextReading->readFullSpot($msgId);
                     SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getFullSpot, done. msgId= ' . $msgId);
                     # did we fail to parse the spot? if so, skip this one
                     if (empty($fullSpot)) {
                         $invalidCount++;
                         continue;
                     }
                     // if
                     # add this spot to the database
                     $fullSpotDbList[] = $fullSpot;
                     $fullspot_isInDb = true;
                     $didFetchFullSpot = true;
                     /*
                      * Some buggy NNTP servers give us the same messageid
                      * in the same XOVER statement, hence we update the list of
                      * messageid's we already have retrieved and are ready
                      * to be added to the database
                      */
                     $dbIdList['fullspot'][$msgId] = 1;
                     /*
                      * Overwrite the spots' title because the fullspot contains the title in
                      * UTF-8 format.
                      * We also overwrite the spotterid from the spotsfull because the spotterid
                      * is only in the header in more recent spots.
                      */
                     if ($didFetchHeader) {
                         $spotDbList[count($spotDbList) - 1]['title'] = $fullSpot['title'];
                         $spotDbList[count($spotDbList) - 1]['spotterid'] = $fullSpot['spotterid'];
                     }
                     # if
                 } catch (ParseSpotXmlException $x) {
                     # swallow error
                 } catch (Exception $x) {
                     /**
                      * Sometimes we get an 'No such article' error for a header we just retrieved,
                      * if we want to retrieve the full article. This is messed up, but let's just
                      * swallow the error
                      */
                     if ($x->getCode() == 430) {
                         /*
                          * Reset error count, so other errors are actually re-tried
                          */
                         $this->_svcNntpText->resetErrorCount();
                         $this->_svcNntpBin->resetErrorCount();
                     } elseif ($x->getMessage() == 'String could not be parsed as XML') {
                     } else {
                         throw $x;
                     }
                     # else
                 }
                 # catch
             }
             # if retrievefull
         }
         # if fullspot is not in db yet
         SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getFullSpot');
         SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getNzbOrImage');
         /*
          * If both the image and the NZB file are already in the cache,
          * or we are set to not prefetch them, don't bother to retrieve
          * the full spot either from the database
          */
         $needPrefetch = $this->_prefetch_image || $this->_prefetch_nzb;
         if (!$this->_retrieveFull || !$header_isInDb) {
             $needPrefetch = false;
         }
         # if
         if ($needPrefetch) {
             $needPrefetch = !isset($cachedIdList[Dao_Cache::SpotImage][$msgId]) || !isset($cachedIdList[Dao_Cache::SpotNzb][$msgId]);
         }
         # if
         if ($needPrefetch) {
             try {
                 /*
                  * If we are running in 'retro' mode, it is possible both the header and spot are in the
                  * database already, however -- we need the information from the fullspot so we retrieve it
                  * again
                  */
                 if (!$didFetchFullSpot) {
                     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':daoGetFullSpot');
                     $fullSpot = $this->_spotDao->getFullSpot($msgId, SPOTWEB_ANONYMOUS_USERID);
                     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':retrieveParseFullSpot');
                     $fullSpot = array_merge($this->_svcSpotParser->parseFull($fullSpot['fullxml']), $fullSpot);
                     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':retrieveParseFullSpot', array());
                     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':daoGetFullSpot');
                 }
                 # if
                 /*
                  * Prefetch (cache) the spots' image
                  */
                 SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getImage');
                 if ($this->_prefetch_image) {
                     SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getImage(), start. msgId= ' . $msgId);
                     if (!isset($cachedIdList[Dao_Cache::SpotImage][$fullSpot['messageid']])) {
                         $this->_svcProvImage->fetchSpotImage($fullSpot);
                     }
                     # if
                     SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getImage(), done. msgId= ' . $msgId);
                 }
                 # if
                 SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getImage');
                 SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getNzb');
                 /*
                  * Prefetch (cache) the spots' NZB file
                  */
                 if ($this->_prefetch_nzb) {
                     /*
                      * Only do so if we can expect an NZB file
                      */
                     if (!empty($fullSpot['nzb']) && $fullSpot['stamp'] > 1290578400) {
                         SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getNzb(), start. msgId= ' . $msgId);
                         if (!isset($cachedIdList[Dao_Cache::SpotNzb][$fullSpot['messageid']])) {
                             $this->_svcProvNzb->fetchNzb($fullSpot);
                         }
                         # if
                         SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getNzb(), done. msgId= ' . $msgId);
                     }
                     # if
                 }
                 # if
                 SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getNzb');
             } catch (ParseSpotXmlException $x) {
                 # swallow error
             } catch (Exception $x) {
                 SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':retrieveParseFullSpot', array());
                 SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':daoGetFullSpot');
                 /**
                  * Sometimes we get an 'No such article' error for a header we just retrieved,
                  * if we want to retrieve the full article. This is messed up, but let's just
                  * swallow the error
                  */
                 if ($x->getCode() == 430) {
                     /*
                      * Reset error count, so other errors are actually re-tried
                      */
                     $this->_svcNntpText->resetErrorCount();
                     $this->_svcNntpBin->resetErrorCount();
                 } elseif ($x->getMessage() == 'String could not be parsed as XML') {
                 } else {
                     throw $x;
                 }
                 # else
             }
             # catch
         }
         # if prefetch image and/or nzb
         SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getNzbOrImage');
         SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader');
         /*
          * If we are under memory pressure, flush the cache to disk in advance so we
          * can free up memory. This is slower, but might avoid ballooning memory.
          */
         if ($this->hasMemoryPressure()) {
             SpotDebug::msg(SpotDebug::DEBUG, 'we are under memory pressure, flushing to disk');
             echo "We are under memory pressure... ";
             $this->_spotDao->addSpots($spotDbList, $fullSpotDbList);
             $spotDbList = array();
             $fullSpotDbList = array();
         }
         // if
         SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop, done. msgId= ' . $msgCounter);
     }
     # foreach
     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach');
     if (count($hdrList) > 0) {
         $this->displayStatus("hdrparsed", $hdrsParsed);
         $this->displayStatus("hdrindbcount", $headerInDbCount);
         $this->displayStatus("verified", $signedCount);
         $this->displayStatus("invalidcount", $invalidCount);
         $this->displayStatus("skipcount", $skipCount);
         $this->displayStatus("modcount", $modCount);
         $this->displayStatus("fullretrieved", $fullsRetrieved);
         $this->displayStatus("loopcount", count($hdrList));
     } else {
         $this->displayStatus("hdrparsed", 0);
         $this->displayStatus("hdrindbcount", 0);
         $this->displayStatus("verified", 0);
         $this->displayStatus("invalidcount", 0);
         $this->displayStatus("skipcount", 0);
         $this->displayStatus("modcount", 0);
         $this->displayStatus("fullretrieved", 0);
         $this->displayStatus("loopcount", 0);
     }
     # else
     /* 
      * Add the spots to the database and update the last article
      * number found
      */
     $this->_spotDao->addSpots($spotDbList, $fullSpotDbList);
     SpotDebug::msg(SpotDebug::TRACE, 'added Spots, spotDbList=' . serialize($spotDbList));
     SpotDebug::msg(SpotDebug::TRACE, 'added Spots, fullSpotDbList=' . serialize($fullSpotDbList));
     /*
      * Actually act on the moderation settings. We cannot process this inline
      * because a spot can be added and moderated within the same iteration
      */
     switch ($this->_settings->get('spot_moderation')) {
         case 'disable':
             break;
         case 'markspot':
             $this->_commentDao->markCommentsModerated($moderationList);
             $this->_spotDao->markSpotsModerated($moderationList);
             break;
             # case 'markspot'
         # case 'markspot'
         default:
             $this->_spotDao->removeSpots($moderationList);
             $this->_commentDao->removeComments($moderationList);
             /*
              * If the spots actually get removed, we want to make
              * sure we write the deleted spots down. This prevents
              * us from retrieving and deleting them over and over again
              */
             $this->_modListDao->addToRingBuffer($moderationList);
             break;
             # default
     }
     # switch
     # update the maximum article id
     if (!empty($lastProcessedId) && $lastProcessedArtNr > 0) {
         $this->_usenetStateDao->setMaxArticleId(Dao_UsenetState::State_Spots, $lastProcessedArtNr, $lastProcessedId);
     }
     # if
     SpotDebug::msg(SpotDebug::DEBUG, 'loop finished, setMaxArticleId=' . serialize($increment));
     /*
      * And remove old list of moderated spots
      */
     $this->_modListDao->deleteOldest();
     $this->displayStatus("timer", round(microtime(true) - $timer, 2));
     return array('count' => count($hdrList), 'headercount' => $hdrsParsed, 'lastmsgid' => $lastProcessedId);
 }