Ejemplo n.º 1
0
 /**
  * Returns an Sevice_Nntp_Engine but tries to minimize
  * the amount of different objects and hence connections 
  * which are created by issueing existing NNTP engines
  * when possible
  *
  * @returns Services_Nntp_Engine Instance of Services_NNTP_Engine
  */
 public static function pool(Services_Settings_Container $settings, $type)
 {
     SpotDebug::msg(SpotDebug::DEBUG, __CLASS__ . '::pool:(' . $type . ') called');
     if (isset(self::$_instances[$type])) {
         return self::$_instances[$type];
     }
     # if
     /*
      * Make sure we have a valid NNTP configuration
      */
     $settings_nntp_hdr = $settings->get('nntp_hdr');
     if (empty($settings_nntp_hdr)) {
         throw new MissingNntpConfigurationException();
     }
     # if
     /*
      * Retrieve the NNTP header settings we can validate those
      */
     switch ($type) {
         case 'hdr':
             self::$_instances[$type] = new Services_Nntp_Engine($settings_nntp_hdr);
             break;
         case 'bin':
             $settings_nntp_bin = $settings->get('nntp_nzb');
             if (empty($settings_nntp_bin['host'])) {
                 self::$_instances[$type] = self::pool($settings, 'hdr');
             } else {
                 self::$_instances[$type] = new Services_Nntp_Engine($settings_nntp_bin);
             }
             # else
             break;
             # nzb
         # nzb
         case 'post':
             $settings_nntp_post = $settings->get('nntp_post');
             if (empty($settings_nntp_post['host'])) {
                 self::$_instances[$type] = self::pool($settings, 'hdr');
             } else {
                 self::$_instances[$type] = new Services_Nntp_Engine($settings_nntp_post);
             }
             # else
             break;
             # post
         # post
         default:
             throw new Exception("Unknown NNTP type engine (" . $type . ") for pool creation");
     }
     # switch
     return self::$_instances[$type];
 }
Ejemplo n.º 2
0
 /**
  * Retrieves an uncached GET from the web
  *
  * @param $url string to retrieve
  * @param $lastModTime int Last modification time, can be null
  * @param int $redirTries Amount of tries already passed to follow a redirect
  * @return mixed array with first element the HTTP code, and second with the data (if any)
  */
 public function perform($url, $lastModTime = null, $redirTries = 0)
 {
     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__);
     /*
      * Default our effectiveUrl to be the current URL,
      * so this way we can always return the effectiveUrl
      */
     $effectiveUrl = $url;
     $ch = curl_init();
     curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0');
     curl_setopt($ch, CURLOPT_URL, $url);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
     curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
     curl_setopt($ch, CURLOPT_TIMEOUT, 15);
     curl_setopt($ch, CURLOPT_ENCODING, '');
     // Don't use fail on error, because sometimes we do want to se
     // the output of the content
     //      curl_setopt ($ch, CURLOPT_FAILONERROR, 1);
     // eg, if a site returns an 400 we might want to know why.
     curl_setopt($ch, CURLOPT_HEADER, 1);
     curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
     curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
     curl_setopt($ch, CURLINFO_HEADER_OUT, true);
     curl_setopt($ch, CURLOPT_VERBOSE, true);
     // send a cookie with the request if defined
     if ($this->getCookie() !== null) {
         curl_setopt($ch, CURLOPT_COOKIE, $this->getCookie());
     }
     # if
     // Only use these curl options if no open base dir is set and php mode is off.
     $manualRedirect = false;
     if (ini_get('open_basedir') != '' || ini_get('safe_mode')) {
         $manualRedirect = true;
         curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
         curl_setopt($ch, CURLOPT_MAXREDIRS, 1);
     } else {
         curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
     }
     # else
     /*
      * If specified, pass authorization for this request
      */
     $username = $this->getUsername();
     if (!empty($username)) {
         curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_BASIC);
         curl_setopt($ch, CURLOPT_USERPWD, $this->getUsername() . ':' . $this->getPassword());
     }
     // # if
     /*
      * OAuth 2.0 uses 'Bearer' authentication, we support this by manually sending the
      * HTTP header field
      */
     $bearerAuth = $this->getBearerAuth();
     if (!empty($bearerAuth)) {
         $this->addHttpHeaders(array('Authorization: Bearer ' . $this->getBearerAuth()));
     }
     # if
     /*
      * Should we be posting?
      */
     if ($this->getMethod() == 'POST') {
         curl_setopt($ch, CURLOPT_POST, true);
     }
     # if
     /*
      * If we are passed fields to post to the server, actuall post them
      */
     if (($this->getPostContent() != null || $this->getUploadFiles() != null || $this->getRawPostData() != null) && $this->getMethod() == 'POST') {
         $this->addPostFieldsToCurl($ch, $this->getPostContent(), $this->getUploadFiles(), $this->getRawPostData());
     }
     # if
     /*
      * If we already have content stored in our cache, just ask
      * the server if the content is modified since our last
      * time this was stored in the cache
      */
     if ($lastModTime != null && $lastModTime > 0) {
         curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
         curl_setopt($ch, CURLOPT_TIMEVALUE, $lastModTime);
     }
     # if
     /*
      * Send our custom HTTP headers
      */
     $httpHeaders = $this->getHttpHeaders();
     if (!empty($httpHeaders)) {
         curl_setopt($ch, CURLOPT_HTTPHEADER, $this->getHttpHeaders());
     }
     # if
     $response = curl_exec($ch);
     $errorStr = curl_error($ch);
     /*
      * Curl returns false on some unspecified errors (eg: a timeout)
      */
     if ($response !== false) {
         $curl_info = curl_getinfo($ch);
         $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
         /*
          * Server responded with 304 (Resource not modified)
          */
         if ($http_code != 304) {
             $data = substr($response, $curl_info['header_size']);
         } else {
             $data = '';
         }
         # else
         /*
          * We also follow redirects, but PHP's safemode doesn't allow
          * for redirects, so fix those as well.
          */
         $effectiveUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
         if (($effectiveUrl != $url || $http_code == 301 || $http_code == 302) && $manualRedirect) {
             if (preg_match('/Location:(.*?)\\n/', $response, $matches)) {
                 $redirUrl = trim(array_pop($matches));
                 $redirTries++;
                 if ($redirTries < 20) {
                     return $this->perform($redirUrl, $lastModTime, $redirTries);
                 }
                 # if
             }
             # if
         }
         # if
         // Get the url.
         if (preg_match('/meta.+?http-equiv\\W+?refresh/i', $response)) {
             preg_match('/content.+?url\\W+?(.+?)\\"/i', $response, $matches);
             if (isset($matches[1])) {
                 SpotDebug::msg(SpotDebug::DEBUG, __CLASS__ . '-perform(), matches[1]= ' . $matches[1]);
                 /*
                  * We can get either an relative redirect, or an fully
                  * qualified redirect. Hideref, for example, uses an
                  * relative direct. Look for those.
                  *
                  * parse_url() doesn't support relative url's, so we have
                  * to do a guess ourselves.
                  */
                 $redirUrl = $matches[1];
                 if (stripos($redirUrl, 'http://') !== 0 && stripos($redirUrl, 'https://') !== 0 && stripos($redirUrl, '//') !== 0) {
                     SpotDebug::msg(SpotDebug::DEBUG, __CLASS__ . '->perform(), we have gotten an correct url');
                     $urlParts = parse_url($url);
                     SpotDebug::msg(SpotDebug::DEBUG, __CLASS__ . '->perform(), parse_url: ' . json_encode($urlParts));
                     if ($redirUrl[0] == '/') {
                         $redirUrl = $urlParts['scheme'] . '://' . $urlParts['host'] . $redirUrl;
                     } else {
                         if (!isset($urlParts['path'])) {
                             $urlParts['path'] = '';
                         }
                         // if
                         $redirUrl = $urlParts['scheme'] . '://' . $urlParts['host'] . $urlParts['path'] . $redirUrl;
                     }
                     # if
                 }
                 # if
                 SpotDebug::msg(SpotDebug::DEBUG, __CLASS__ . '->perform(), after metafresh, url = : ' . $url);
                 $redirTries++;
                 if ($redirTries < 20) {
                     return $this->perform($redirUrl, $lastModTime, $redirTries);
                 }
                 # if
             }
             # if
         }
         # if
     } else {
         $http_code = 700;
         # Curl returned an error
         $curl_info = curl_getinfo($ch);
         $data = '';
     }
     # else
     curl_close($ch);
     /*
      * Sometimes we get an HTTP error of 0 back, which
      * probably means a timeout or something, so fix up
      * the error string manually.
      */
     if ($errorStr == '' && $http_code == 0) {
         $errorStr = 'unable to connect to URL: ' . $url;
     }
     # if
     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__, array($url));
     return array('http_code' => $http_code, 'data' => $data, 'finalurl' => $effectiveUrl, 'successful' => $http_code == 200 || $http_code == 304, 'errorstr' => 'http returncode: ' . $http_code . ' / ' . $errorStr, 'curl_info' => $curl_info);
 }
 function process($hdrList, $curArtNr, $increment, $timer)
 {
     $this->displayStatus("progress", $curArtNr . " till " . $increment);
     $lastProcessedId = '';
     $lastProcessedArtNr = 0;
     $commentDbList = array();
     $fullCommentDbList = array();
     /*
      * Determine the cutoff date (unixtimestamp) from whereon we do not want to 
      * load the spots
      */
     if ($this->_settings->get('retention') > 0) {
         $retentionStamp = time() - $this->_settings->get('retention') * 24 * 60 * 60;
     } else {
         $retentionStamp = 0;
     }
     # else
     /**
      * We ask the database to match our messageid's we just retrieved with
      * the list of id's we have just retrieved from the server
      */
     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':matchCommentMessageIds');
     $dbIdList = $this->_commentDao->matchCommentMessageIds($hdrList);
     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':matchCommentMessageIds');
     /*
      * We keep a seperate list of messageid's for updating the amount of
      * comments each spot.
      */
     $spotMsgIdList = array();
     /*
      * and a different list for comments with a rating, this way we wont
      * calculcate the rating for a spot when a comments has no rating
      */
     $spotMsgIdRatingList = array();
     # Process each header
     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-hdrList');
     foreach ($hdrList as $msgheader) {
         SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop: iter-start');
         # Reset timelimit
         set_time_limit(120);
         # strip the <>'s from the reference
         $commentId = $msgheader['Message-ID'];
         $artNr = $msgheader['Number'];
         SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop: processing: ' . $commentId . ', artNr=' . $artNr);
         /*
          * We prepare some variables to we don't have to perform an array
          * lookup for each check and the code is easier to read.
          */
         $header_isInDb = isset($dbIdList['comment'][$commentId]);
         $fullcomment_isInDb = isset($dbIdList['fullcomment'][$commentId]);
         SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop: headerIsInDb: ' . (int) $header_isInDb . ', fullComment=' . (int) $fullcomment_isInDb . ', retrieveFull= ' . (int) $this->_retrieveFull);
         /*
          * Do we have the comment in the database already? If not, lets process it
          */
         if (!$header_isInDb || !$fullcomment_isInDb && $this->_retrieveFull) {
             SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-hdrList->retrieveFull');
             /*
              * Because not all usenet servers pass the reference field properly,
              * we manually create this reference field by using the messageid of
              * the comment
              */
             $msgIdParts = explode(".", $commentId);
             $msgheader['References'] = $msgIdParts[0] . substr($commentId, strpos($commentId, '@'));
             $msgheader['stamp'] = strtotime($msgheader['Date']);
             SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop: msgHeader=' . serialize($msgheader));
             /*
              * Don't add older comments than specified for the retention stamp
              */
             if ($retentionStamp > 0 && $msgheader['stamp'] < $retentionStamp && $this->_settings->get('retentiontype') == 'everything') {
                 continue;
             }
             # if
             if ($msgheader['stamp'] < $this->_settings->get('retrieve_newer_than')) {
                 continue;
             }
             # if
             /*
              * Newer kind of comments contain a rating, if we think this comment
              * is such a comment, extract the rating
              */
             if (count($msgIdParts) == 5) {
                 $msgheader['rating'] = (int) $msgIdParts[1];
                 /*
                  * Some older comments contain an non-numeric string
                  * on this position. Make sure this is an number else
                  * reset to zero (no rating given)
                  */
                 if (!is_numeric($msgIdParts[1])) {
                     $msgheader['rating'] = 0;
                 }
                 # if
             } else {
                 $msgheader['rating'] = 0;
             }
             # if
             /*
              * Determine whether we need to add the header to the database
              * and extract the required fields 
              */
             if (!$header_isInDb) {
                 $commentDbList[] = array('messageid' => $commentId, 'nntpref' => $msgheader['References'], 'stamp' => $msgheader['stamp'], 'rating' => $msgheader['rating']);
                 /*
                  * Some buggy NNTP servers give us the same messageid
                  * in one XOVER statement, hence we update the list of
                  * messageid's we already have retrieved and are ready
                  * to be added to the database
                  */
                 $dbIdList['comment'][$commentId] = 1;
                 $spotMsgIdList[$msgheader['References']] = 1;
                 /*
                  * If this comment contains a rating, mark the spot to
                  * have it's rating be recalculated
                  */
                 if ($msgheader['rating'] >= 1 && $msgheader['rating'] <= 10) {
                     $spotMsgIdRatingList[$msgheader['References']] = 1;
                 }
                 # if
                 $header_isInDb = true;
                 $lastProcessedId = $commentId;
                 $lastProcessedArtNr = $artNr;
                 $didFetchHeader = true;
             }
             # if
             SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-hdrList->retrieveFull');
         } else {
             $lastProcessedId = $commentId;
             $lastProcessedArtNr = $artNr;
         }
         # else
         /*
          * We don't want to retrieve the full comment body if we don't have the header
          * in the database. Because we try to add headers in the above code we just have
          * to check if the header is in the database.
          *
          * We cannot collapse this code with the header fetching code because we want to
          * be able to add the full body to a system after all the headers are retrieved
          */
         if ($header_isInDb && !$fullcomment_isInDb) {
             /*
              * Don't add older fullcomments than specified for the retention stamp
              */
             if ($retentionStamp > 0 && strtotime($msgheader['Date']) < $retentionStamp) {
                 continue;
             }
             # if
             if ($this->_retrieveFull) {
                 try {
                     SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop: readFullComment start:' . $commentId);
                     $fullComment = $this->_svcNntpTextReading->readComments(array(array('messageid' => $commentId)));
                     SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop: readFullComment finished:' . $commentId);
                     # Add this comment to the datbase and mark it as such
                     $fullCommentDbList[] = $fullComment;
                     $fullcomment_isInDb = true;
                     /*
                      * Some buggy NNTP servers give us the same messageid
                      * in one XOVER statement, hence we update the list of
                      * messageid's we already have retrieved and are ready
                      * to be added to the database
                      */
                     $dbIdList['fullcomment'][$commentId] = 1;
                 } catch (ParseSpotXmlException $x) {
                     # swallow error
                 } catch (Exception $x) {
                     /**
                      * Sometimes we get an 'No such article' error for a header we just retrieved,
                      * if we want to retrieve the full article. This is messed up, but let's just
                      * swallow the error
                      */
                     if ($x->getCode() == 430) {
                         /*
                          * Reset error count, so other errors are actually re-tried
                          */
                         $this->_svcNntpText->resetErrorCount();
                         $this->_svcNntpBin->resetErrorCount();
                     } elseif ($x->getMessage() == 'String could not be parsed as XML') {
                     } else {
                         throw $x;
                     }
                     # else
                 }
                 # catch
             }
             # if retrievefull
         }
         # if fullcomment is not in db yet
         SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop: iter-stop');
     }
     # foreach
     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-hdrList');
     SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop: done');
     if (count($hdrList) > 0) {
         $this->displayStatus("loopcount", count($hdrList));
     } else {
         $this->displayStatus("loopcount", 0);
     }
     # else
     $this->displayStatus("timer", round(microtime(true) - $timer, 2));
     /* 
      * Add the comments to the database and update the last article
      * number found
      */
     $fullComments = array();
     while ($fullComment = array_shift($fullCommentDbList)) {
         $fullComments = array_merge($fullComments, $fullComment);
     }
     # while
     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':addComments() call');
     $this->_commentDao->addComments($commentDbList, $fullComments);
     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':addComments() call');
     # update the maximum article id
     if (!empty($lastProcessedId) && $lastProcessedArtNr > 0) {
         $this->_usenetStateDao->setMaxArticleId(Dao_UsenetState::State_Comments, $lastProcessedArtNr, $lastProcessedId);
     }
     # if
     /*
      * Recalculate the average spotrating and update the amount
      * of unverified comments
      */
     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':updateSpotRating()');
     $this->_spotDao->updateSpotRating($spotMsgIdRatingList);
     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':updateSpotRating()');
     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':updateSpotCommentCount()');
     $this->_spotDao->updateSpotCommentCount($spotMsgIdList);
     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':updateSpotCommentCount()');
     return array('count' => count($hdrList), 'headercount' => count($hdrList), 'lastmsgid' => $lastProcessedId);
 }
Ejemplo n.º 4
0
 /**
  *
  * Execute a POST to the given url and return the body.
  * @param String $url
  * @param array $postdata
  * @return bool|mixed
  */
 protected function postAndDownloadNzb($url, array $postdata)
 {
     // Initialize download retrieval class
     $svcHttp = new Services_Providers_Http($this->_cacheDao);
     $svcHttp->setPostContent($postdata);
     $svcHttp->setMethod('POST');
     $result = $svcHttp->perform($url);
     // Check if any error occured
     if (!$result['successful']) {
         SpotDebug::msg(SpotDebug::DEBUG, __CLASS__ . '->postAndDownloadNzb(), not succesful=' . $result['errorstr']);
         return false;
     }
     # if
     // Load the body into simplexml.
     // If the xml is well formed this will result in true thus returning the xml.
     // Suppress errors if the string is not well formed, where testing here.
     if (@simplexml_load_string($result['data'])) {
         return $result['data'];
     } else {
         return false;
     }
     # else
 }
Ejemplo n.º 5
0
SpotClassAutoload::register();
#- main() -#
try {
    SpotTranslation::initialize('en_US');
    SpotTiming::enable();
    SpotTiming::start('total');
    /*
     * Initialize the Spotweb base classes
     */
    $bootstrap = new Bootstrap();
    list($settings, $daoFactory, $req) = $bootstrap->boot();
    /*
     * Enable debug logging mechanism if timing is enabled
     */
    if ($settings->get('enable_timing')) {
        SpotDebug::enable(SpotDebug::TRACE, $daoFactory->getDebugLogDao());
    }
    # if
    # helper functions for passed variables
    $page = $req->getDef('page', 'index');
    # Retrieve the users object of the user which is logged on
    SpotTiming::start('auth');
    $svcUserAuth = new Services_User_Authentication($daoFactory, $settings);
    if ($req->doesExist('apikey')) {
        $currentSession = $svcUserAuth->verifyApi($req->getDef('apikey', ''));
    } else {
        $currentSession = $svcUserAuth->useOrStartSession(false);
    }
    # if
    /*
     * If three is no user object, we don't have a security system
Ejemplo n.º 6
0
 function process($hdrList, $curArtNr, $increment, $timer)
 {
     $this->displayStatus("progress", $curArtNr . " till " . $increment);
     $signedCount = 0;
     $hdrsParsed = 0;
     $fullsRetrieved = 0;
     $invalidCount = 0;
     $msgCounter = 0;
     $modCount = 0;
     $headerInDbCount = 0;
     $skipCount = 0;
     $lastProcessedId = '';
     $lastProcessedArtNr = 0;
     $fullSpotDbList = array();
     $spotDbList = array();
     $moderationList = array();
     $processingStartTime = time();
     /*
      * Determine the cutoff date (unixtimestamp) from whereon we do not want to 
      * load the spots
      */
     if ($this->_settings->get('retention') > 0) {
         $retentionStamp = time() - $this->_settings->get('retention') * 24 * 60 * 60;
     } else {
         $retentionStamp = 0;
     }
     # else
     SpotDebug::msg(SpotDebug::DEBUG, 'retentionStamp=' . $retentionStamp);
     SpotDebug::msg(SpotDebug::TRACE, 'hdrList=' . serialize($hdrList));
     /**
      * We ask the database to match our messageid's we just retrieved with
      * the list of id's we have just retrieved from the server
      */
     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':matchSpotMessageIds');
     $dbIdList = $this->_spotDao->matchSpotMessageIds($hdrList);
     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':matchSpotMessageIds');
     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':getMassCacheRecords');
     $cachedIdList = $this->_cacheDao->getMassCacheRecords($hdrList);
     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':getMassCacheRecords');
     SpotDebug::msg(SpotDebug::TRACE, 'dbIdList=' . serialize($dbIdList));
     /*
      * We get a list of spots which have been blacklisted before,
      * we do this because when the 'buggy' flag is set, we else keep
      * retrieving the same spots, nzb's and images over and over again
      */
     $preModdedList = $this->_modListDao->matchAgainst($hdrList);
     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach');
     foreach ($hdrList as $msgheader) {
         SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader');
         $msgCounter++;
         SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop, start. msgId= ' . $msgCounter);
         /* 
          * Keep te usenet server alive when processing is slow.
          */
         if ($processingStartTime - time() > 30) {
             $this->_svcNntpText->sendNoop();
             $this->_svcNntpBin->sendNoop();
             $processingStartTime = time();
         }
         # if
         /*
          * We keep track whether we actually fetched this header and fullspot
          * to add it to the database, because only then we can update the
          * title from the spots title or rely on our database to fetch
          * the fullspot
          */
         $didFetchHeader = false;
         $didFetchFullSpot = false;
         # Reset timelimit
         set_time_limit(120);
         # messageid to check
         $msgId = $msgheader['Message-ID'];
         $artNr = $msgheader['Number'];
         /*
          * If this message was already deleted in a previous run,
          * les not even consider it
          */
         if (isset($preModdedList[$msgId])) {
             SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader');
             $skipCount++;
             continue;
         }
         # if
         /*
          * We prepare some variables to we don't have to perform an array
          * lookup for each check and the code is easier to read.
          */
         $header_isInDb = isset($dbIdList['spot'][$msgId]);
         $fullspot_isInDb = isset($dbIdList['fullspot'][$msgId]);
         /*
          * If the spotheader is not yet added to the database, parse the header
          * information.
          *
          * If the header is present, but we don't have the fullspot yet or we are
          * running in 'retro' mode, parse the header as well because some fields
          * are only in the header and not in the full.
          * 
          * We need some of those fields (for example KeyID)
          */
         if (!$header_isInDb || (!$fullspot_isInDb || $this->_retro) && $this->_retrieveFull) {
             $hdrsParsed++;
             SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, parsingXover, start. msgId= ' . $msgCounter);
             SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':parseHeader');
             $spot = $this->_svcSpotParser->parseHeader($msgheader['Subject'], $msgheader['From'], $msgheader['Date'], $msgheader['Message-ID'], $this->_rsakeys);
             SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':parseHeader');
             SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, parsingXover, done. msgId= ' . $msgCounter);
             /*
              * When a parse error occurred, we ignore the spot, also unverified
              * spots are ignored
              */
             if ($spot === false || !$spot['verified']) {
                 SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader');
                 $invalidCount++;
                 continue;
             }
             # if
             /*
              * Special moderator commands always have keyid 2
              */
             if ($spot['keyid'] == 2) {
                 $commandAr = explode(' ', $spot['title']);
                 $validCommands = array('delete', 'dispose', 'remove');
                 # is this one of the defined valid commands?
                 if (in_array(strtolower($commandAr[0]), $validCommands) !== false) {
                     $moderationList[$commandAr[1]] = 1;
                     $modCount++;
                 }
                 # if
             } else {
                 /*
                  * Don't add spots older than specified for the retention stamp
                  */
                 if ($retentionStamp > 0 && $spot['stamp'] < $retentionStamp && $this->_settings->get('retentiontype') == 'everything') {
                     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader');
                     $skipCount++;
                     continue;
                 } elseif ($spot['stamp'] < $this->_settings->get('retrieve_newer_than')) {
                     $skipCount++;
                 } else {
                     /*
                      * Do we have the header in the database? If not, lets add it
                      */
                     if (!$header_isInDb) {
                         $spotDbList[] = $spot;
                         /*
                          * Some buggy NNTP servers give us the same messageid
                          * in one XOVER statement, hence we update the list of
                          * messageid's we already have retrieved and are ready
                          * to be added to the database
                          */
                         $dbIdList['spot'][$msgId] = 1;
                         $header_isInDb = true;
                         $lastProcessedId = $msgId;
                         $lastProcessedArtNr = $artNr;
                         $didFetchHeader = true;
                         if ($spot['wassigned']) {
                             $signedCount++;
                         }
                         # if
                     }
                     # if
                 }
                 # if
             }
             # else
         } else {
             $lastProcessedId = $msgId;
             $lastProcessedArtNr = $artNr;
             $headerInDbCount++;
         }
         # else
         SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getFullSpot');
         /*
          * We don't want to retrieve the fullspot if we don't have the header
          * in the database. Because we try to add headers in the above code we just have
          * to check if the header is in the database.
          *
          * We cannot collapse this code with the header fetching code because we want to
          * be able to add the fullspot to a system after all the headers are retrieved
          */
         if ($header_isInDb && !$fullspot_isInDb) {
             /*
              * Don't add older fullspots than specified for the retention stamp
              */
             if ($retentionStamp > 0 && strtotime($msgheader['Date']) < $retentionStamp) {
                 SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader');
                 SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getFullSpot');
                 continue;
             }
             # if
             if ($this->_retrieveFull) {
                 $fullSpot = array();
                 try {
                     $fullsRetrieved++;
                     SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getFullSpot, start. msgId= ' . $msgId);
                     $fullSpot = $this->_svcNntpTextReading->readFullSpot($msgId);
                     SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getFullSpot, done. msgId= ' . $msgId);
                     # did we fail to parse the spot? if so, skip this one
                     if (empty($fullSpot)) {
                         $invalidCount++;
                         continue;
                     }
                     // if
                     # add this spot to the database
                     $fullSpotDbList[] = $fullSpot;
                     $fullspot_isInDb = true;
                     $didFetchFullSpot = true;
                     /*
                      * Some buggy NNTP servers give us the same messageid
                      * in the same XOVER statement, hence we update the list of
                      * messageid's we already have retrieved and are ready
                      * to be added to the database
                      */
                     $dbIdList['fullspot'][$msgId] = 1;
                     /*
                      * Overwrite the spots' title because the fullspot contains the title in
                      * UTF-8 format.
                      * We also overwrite the spotterid from the spotsfull because the spotterid
                      * is only in the header in more recent spots.
                      */
                     if ($didFetchHeader) {
                         $spotDbList[count($spotDbList) - 1]['title'] = $fullSpot['title'];
                         $spotDbList[count($spotDbList) - 1]['spotterid'] = $fullSpot['spotterid'];
                     }
                     # if
                 } catch (ParseSpotXmlException $x) {
                     # swallow error
                 } catch (Exception $x) {
                     /**
                      * Sometimes we get an 'No such article' error for a header we just retrieved,
                      * if we want to retrieve the full article. This is messed up, but let's just
                      * swallow the error
                      */
                     if ($x->getCode() == 430) {
                         /*
                          * Reset error count, so other errors are actually re-tried
                          */
                         $this->_svcNntpText->resetErrorCount();
                         $this->_svcNntpBin->resetErrorCount();
                     } elseif ($x->getMessage() == 'String could not be parsed as XML') {
                     } else {
                         throw $x;
                     }
                     # else
                 }
                 # catch
             }
             # if retrievefull
         }
         # if fullspot is not in db yet
         SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getFullSpot');
         SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getNzbOrImage');
         /*
          * If both the image and the NZB file are already in the cache,
          * or we are set to not prefetch them, don't bother to retrieve
          * the full spot either from the database
          */
         $needPrefetch = $this->_prefetch_image || $this->_prefetch_nzb;
         if (!$this->_retrieveFull || !$header_isInDb) {
             $needPrefetch = false;
         }
         # if
         if ($needPrefetch) {
             $needPrefetch = !isset($cachedIdList[Dao_Cache::SpotImage][$msgId]) || !isset($cachedIdList[Dao_Cache::SpotNzb][$msgId]);
         }
         # if
         if ($needPrefetch) {
             try {
                 /*
                  * If we are running in 'retro' mode, it is possible both the header and spot are in the
                  * database already, however -- we need the information from the fullspot so we retrieve it
                  * again
                  */
                 if (!$didFetchFullSpot) {
                     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':daoGetFullSpot');
                     $fullSpot = $this->_spotDao->getFullSpot($msgId, SPOTWEB_ANONYMOUS_USERID);
                     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':retrieveParseFullSpot');
                     $fullSpot = array_merge($this->_svcSpotParser->parseFull($fullSpot['fullxml']), $fullSpot);
                     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':retrieveParseFullSpot', array());
                     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':daoGetFullSpot');
                 }
                 # if
                 /*
                  * Prefetch (cache) the spots' image
                  */
                 SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getImage');
                 if ($this->_prefetch_image) {
                     SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getImage(), start. msgId= ' . $msgId);
                     if (!isset($cachedIdList[Dao_Cache::SpotImage][$fullSpot['messageid']])) {
                         $this->_svcProvImage->fetchSpotImage($fullSpot);
                     }
                     # if
                     SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getImage(), done. msgId= ' . $msgId);
                 }
                 # if
                 SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getImage');
                 SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getNzb');
                 /*
                  * Prefetch (cache) the spots' NZB file
                  */
                 if ($this->_prefetch_nzb) {
                     /*
                      * Only do so if we can expect an NZB file
                      */
                     if (!empty($fullSpot['nzb']) && $fullSpot['stamp'] > 1290578400) {
                         SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getNzb(), start. msgId= ' . $msgId);
                         if (!isset($cachedIdList[Dao_Cache::SpotNzb][$fullSpot['messageid']])) {
                             $this->_svcProvNzb->fetchNzb($fullSpot);
                         }
                         # if
                         SpotDebug::msg(SpotDebug::TRACE, 'foreach-loop, getNzb(), done. msgId= ' . $msgId);
                     }
                     # if
                 }
                 # if
                 SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getNzb');
             } catch (ParseSpotXmlException $x) {
                 # swallow error
             } catch (Exception $x) {
                 SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':retrieveParseFullSpot', array());
                 SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':daoGetFullSpot');
                 /**
                  * Sometimes we get an 'No such article' error for a header we just retrieved,
                  * if we want to retrieve the full article. This is messed up, but let's just
                  * swallow the error
                  */
                 if ($x->getCode() == 430) {
                     /*
                      * Reset error count, so other errors are actually re-tried
                      */
                     $this->_svcNntpText->resetErrorCount();
                     $this->_svcNntpBin->resetErrorCount();
                 } elseif ($x->getMessage() == 'String could not be parsed as XML') {
                 } else {
                     throw $x;
                 }
                 # else
             }
             # catch
         }
         # if prefetch image and/or nzb
         SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-getNzbOrImage');
         SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach-to-ParseHeader');
         /*
          * If we are under memory pressure, flush the cache to disk in advance so we
          * can free up memory. This is slower, but might avoid ballooning memory.
          */
         if ($this->hasMemoryPressure()) {
             SpotDebug::msg(SpotDebug::DEBUG, 'we are under memory pressure, flushing to disk');
             echo "We are under memory pressure... ";
             $this->_spotDao->addSpots($spotDbList, $fullSpotDbList);
             $spotDbList = array();
             $fullSpotDbList = array();
         }
         // if
         SpotDebug::msg(SpotDebug::DEBUG, 'foreach-loop, done. msgId= ' . $msgCounter);
     }
     # foreach
     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':forEach');
     if (count($hdrList) > 0) {
         $this->displayStatus("hdrparsed", $hdrsParsed);
         $this->displayStatus("hdrindbcount", $headerInDbCount);
         $this->displayStatus("verified", $signedCount);
         $this->displayStatus("invalidcount", $invalidCount);
         $this->displayStatus("skipcount", $skipCount);
         $this->displayStatus("modcount", $modCount);
         $this->displayStatus("fullretrieved", $fullsRetrieved);
         $this->displayStatus("loopcount", count($hdrList));
     } else {
         $this->displayStatus("hdrparsed", 0);
         $this->displayStatus("hdrindbcount", 0);
         $this->displayStatus("verified", 0);
         $this->displayStatus("invalidcount", 0);
         $this->displayStatus("skipcount", 0);
         $this->displayStatus("modcount", 0);
         $this->displayStatus("fullretrieved", 0);
         $this->displayStatus("loopcount", 0);
     }
     # else
     /* 
      * Add the spots to the database and update the last article
      * number found
      */
     $this->_spotDao->addSpots($spotDbList, $fullSpotDbList);
     SpotDebug::msg(SpotDebug::TRACE, 'added Spots, spotDbList=' . serialize($spotDbList));
     SpotDebug::msg(SpotDebug::TRACE, 'added Spots, fullSpotDbList=' . serialize($fullSpotDbList));
     /*
      * Actually act on the moderation settings. We cannot process this inline
      * because a spot can be added and moderated within the same iteration
      */
     switch ($this->_settings->get('spot_moderation')) {
         case 'disable':
             break;
         case 'markspot':
             $this->_commentDao->markCommentsModerated($moderationList);
             $this->_spotDao->markSpotsModerated($moderationList);
             break;
             # case 'markspot'
         # case 'markspot'
         default:
             $this->_spotDao->removeSpots($moderationList);
             $this->_commentDao->removeComments($moderationList);
             /*
              * If the spots actually get removed, we want to make
              * sure we write the deleted spots down. This prevents
              * us from retrieving and deleting them over and over again
              */
             $this->_modListDao->addToRingBuffer($moderationList);
             break;
             # default
     }
     # switch
     # update the maximum article id
     if (!empty($lastProcessedId) && $lastProcessedArtNr > 0) {
         $this->_usenetStateDao->setMaxArticleId(Dao_UsenetState::State_Spots, $lastProcessedArtNr, $lastProcessedId);
     }
     # if
     SpotDebug::msg(SpotDebug::DEBUG, 'loop finished, setMaxArticleId=' . serialize($increment));
     /*
      * And remove old list of moderated spots
      */
     $this->_modListDao->deleteOldest();
     $this->displayStatus("timer", round(microtime(true) - $timer, 2));
     return array('count' => count($hdrList), 'headercount' => $hdrsParsed, 'lastmsgid' => $lastProcessedId);
 }
Ejemplo n.º 7
0
 public function validateServer()
 {
     SpotDebug::msg(SpotDebug::TRACE, __CLASS__ . "->validateServer()");
     /*
      * We need to select a group, because authentication
      * is not always entered but sometimes required
      */
     $this->selectGroup('free.pt');
     $this->quit();
 }
Ejemplo n.º 8
0
 static function disable()
 {
     self::$_level = self::DISABLED;
 }
Ejemplo n.º 9
0
 # if
 /*
  * We normally check whether we are not running already, because
  * this would mean it will mess up all sorts of things like
  * comment calculation, but a user can force our hand
  */
 $forceMode = SpotCommandline::get('force');
 /*
  * Do we need to debuglog this session? Generates loads of
  * output
  */
 $debugLog = SpotCommandline::get('debug');
 if ($debugLog) {
     SpotDebug::enable(SpotDebug::TRACE, $daoFactory->getDebugLogDao());
 } else {
     SpotDebug::disable();
 }
 # if
 /*
  * Retro mode will allow os to start from the beginning and retrieve
  * all spots starting from scratch
  */
 $retroMode = SpotCommandline::get('retro');
 /*
  * Retention cleanup. Basically when we ask for Spotweb to only
  * keep spots for 'xx' days (eg: 30 days), we either have to delete
  * everyting older than 'xx' days, or delete all 'full' resources
  * older than the specified time period.
  *
  * The full resources are everything beyond the bare minimum to 
  * display the spots, so we delete nzb's, images, comments, etc.
Ejemplo n.º 10
0
 function loopTillEnd($curArticleNr, $increment = 1000)
 {
     $processed = 0;
     $headersProcessed = 0;
     $highestMessageId = '';
     # make sure we handle articlenumber wrap arounds
     if ($curArticleNr < $this->_msgdata['first']) {
         $curArticleNr = $this->_msgdata['first'];
     }
     # if
     $this->displayStatus("groupmessagecount", $this->_msgdata['last'] - $this->_msgdata['first']);
     $this->displayStatus("firstmsg", $this->_msgdata['first']);
     $this->displayStatus("lastmsg", $this->_msgdata['last']);
     $this->displayStatus("curartnr", $curArticleNr);
     $this->displayStatus("", "");
     SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':whileLoop');
     while ($curArticleNr < $this->_msgdata['last']) {
         $timer = microtime(true);
         # get the list of headers (XOVER)
         SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':getOverview');
         $hdrList = $this->_svcNntpText->getOverview($curArticleNr, $curArticleNr + $increment);
         SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':getOverview');
         $saveCurArtNr = $curArticleNr;
         # If no spots were found, just manually increase the
         # messagenumber with the increment to make sure we advance
         if (count($hdrList) < 1 || $hdrList[count($hdrList) - 1]['Number'] < $curArticleNr) {
             $curArticleNr += $increment;
         } else {
             $curArticleNr = $hdrList[count($hdrList) - 1]['Number'] + 1;
         }
         # else
         # run the processing method
         SpotTiming::start(__CLASS__ . '::' . __FUNCTION__ . ':callProcess');
         $processOutput = $this->process($hdrList, $saveCurArtNr, $curArticleNr, $timer);
         SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':callProcess');
         $processed += $processOutput['count'];
         $headersProcessed += $processOutput['headercount'];
         $highestMessageId = $processOutput['lastmsgid'];
         # reset the start time to prevent a another retriever from starting
         # during the intial retrieve which can take many hours
         $this->_usenetStateDao->setRetrieverRunning(true);
         /*
          * Make sure if we run with timing on, we do not fetch too many
          * spots as that would make us run out of memory
          */
         if ($processed > 3000 && SpotTiming::isEnabled()) {
             break;
         }
         # if
     }
     # while
     SpotTiming::stop(__CLASS__ . '::' . __FUNCTION__ . ':whileLoop');
     # we are done updating, make sure that if the newsserver deleted
     # earlier retrieved messages, we remove them from our database
     if ($highestMessageId != '') {
         SpotDebug::msg(SpotDebug::DEBUG, 'loopTillEnd() finished, highestMessageId = ' . $highestMessageId);
         $this->removeTooNewRecords($highestMessageId);
     }
     # if
     $this->displayStatus("totalprocessed", $processed);
     return $headersProcessed;
 }