function process($hdrList, $curMsg, $endMsg) { $this->displayStatus("progress", $curMsg . " till " . $endMsg); $this->_db->beginTransaction(); $signedCount = 0; $hdrsRetrieved = 0; $fullsRetrieved = 0; $modCount = 0; $skipCount = 0; # pak onze lijst met messageid's, en kijk welke er al in de database zitten $t = microtime(true); $dbIdList = $this->_db->matchMessageIds($hdrList); # en loop door elke header heen foreach ($hdrList as $msgid => $msgheader) { # Reset timelimit set_time_limit(120); # messageid to check $msgId = substr($msgheader['Message-ID'], 1, -1); # als we de spot overview nog niet in de database hebben, haal hem dan op if (!in_array($msgId, $dbIdList['spot'])) { $hdrsRetrieved++; $spotParser = new SpotParser(); $spot = $spotParser->parseXover($msgheader['Subject'], $msgheader['From'], $msgheader['Message-ID'], $this->_rsakeys); # als er een parse error was, negeren we de spot volledig, ook niet # verified spots gooien we weg. if ($spot === false || !$spot['Verified']) { continue; } # if if ($spot['KeyID'] == 2) { $commandAr = explode(' ', strtolower($spot['Title'])); $validCommands = array('delete', 'dispose', 'remove'); # is dit een geldig commando? if (array_search($commandAr[0], $validCommands) !== false) { switch ($this->_settings['spot_moderation']) { case 'disable': break; case 'markspot': $this->_db->markSpotModerated($commandAr[1]); break; default: $this->_db->deleteSpot($commandAr[1]); break; } # switch $modCount++; } # if } else { // Oudere spots niet toevoegen, hoeven we het later ook niet te verwijderen if ($this->_settings['retention'] > 0 && $spot['Stamp'] < time() - $this->_settings['retention'] * 24 * 60 * 60) { $skipCount++; } else { $this->_db->addSpot($spot); $dbIdList['spot'][] = $msgId; if ($spot['WasSigned']) { $signedCount++; } # if } # if } # else } else { # anders halen we hem uit de database want we hebben die nodig $spot = $this->_db->getFullSpot($msgId); } # else # We willen enkel de volledige spot ophalen als de header in de database zit, omdat # we dat hierboven eventueel doen, is het enkel daarop checken voldoende if (in_array($msgId, $dbIdList['spot']) && !in_array($msgId, $dbIdList['fullspot'])) { # # We gebruiken altijd XOVER, dit is namelijk handig omdat eventueel ontbrekende # artikel nummers (en soms zijn dat er duizenden) niet hoeven op te vragen, nu # vragen we enkel de de headers op van de artikelen die er daadwerkelijk zijn # # KeyID 2 is een 'moderator' post en kan dus niet getrieved worden # if ($this->_retrieveFull && $spot['KeyID'] != 2) { $fullSpot = array(); try { $fullsRetrieved++; $fullSpot = $this->_spotnntp->getFullSpot(substr($msgheader['Message-ID'], 1, -1)); # en voeg hem aan de database toe $this->_db->addFullSpot($fullSpot); } catch (ParseSpotXmlException $x) { # swallow error } catch (Exception $x) { # messed up index aan de kant van de server ofzo? iig, dit gebeurt. soms, if so, # swallow the error if ($x->getMessage() == 'No such article found') { } elseif ($x->getMessage() == 'String could not be parsed as XML') { } else { throw $x; } # else } # catch } # if retrievefull } # if fullspot is not in db yet } # foreach if (count($hdrList) > 0) { $this->displayStatus("hdrparsed", $hdrsRetrieved); $this->displayStatus("fullretrieved", $fullsRetrieved); $this->displayStatus("verified", $signedCount); $this->displayStatus("modcount", $modCount); $this->displayStatus("skipcount", $skipCount); $this->displayStatus("loopcount", count($hdrList)); } else { $this->displayStatus("hdrparsed", 0); $this->displayStatus("fullretrieved", 0); $this->displayStatus("verified", 0); $this->displayStatus("modcount", 0); $this->displayStatus("skipcount", 0); $this->displayStatus("loopcount", 0); } # else $this->_db->setMaxArticleid($this->_server['host'], $curMsg); $this->_db->commitTransaction(); return count($hdrList); }
function process($hdrList, $curMsg, $increment) { $this->displayStatus("progress", $curMsg . " till " . ($curMsg + $increment)); $this->_db->beginTransaction(); $signedCount = 0; foreach ($hdrList as $msgid => $msgheader) { # Reset timelimit set_time_limit(120); $spotParser = new SpotParser(); $spot = $spotParser->parseXover($msgheader['Subject'], $msgheader['From'], $msgheader['Message-ID'], $this->_rsakeys); if ($spot != null && $spot['Verified']) { $this->_db->addSpot($spot); } # if if ($spot['Verified']) { if ($spot['WasSigned']) { $signedCount++; } # if } # if } # foreach if (count($hdrList) > 0) { $this->displayStatus("verified", $signedCount); $this->displayStatus("loopcount", count($hdrList)); } else { $this->displayStatus("verified", 0); $this->displayStatus("loopcount", 0); } # else $this->_db->setMaxArticleid($this->_server['host'], $curMsg); $this->_db->commitTransaction(); return count($hdrList); }
function process($hdrList, $curMsg, $endMsg, $timer) { $this->displayStatus("progress", $curMsg . " till " . $endMsg); $spotParser = new SpotParser(); $signedCount = 0; $hdrsRetrieved = 0; $fullsRetrieved = 0; $msgCounter = 0; $modCount = 0; $skipCount = 0; $lastProcessedId = ''; $fullSpotDbList = array(); $spotDbList = array(); $moderationList = array(); $processingStartTime = time(); /* * Determine the cutoff date (unixtimestamp) from whereon we do not want to * load the spots */ if ($this->_settings->get('retention') > 0) { $retentionStamp = time() - $this->_settings->get('retention') * 24 * 60 * 60; } else { $retentionStamp = 0; } # else $this->debug('retentionStamp=' . $retentionStamp); $this->debug('hdrList=' . serialize($hdrList)); /** * We ask the database to match our messageid's we just retrieved with * the list of id's we have just retrieved from the server */ $dbIdList = $this->_db->matchSpotMessageIds($hdrList); $this->debug('dbIdList=' . serialize($dbIdList)); # if we need to fetch images or nzb files, we need an spotsoverview instance if ($this->_retrieveFull && $this->_prefetch_image || $this->_prefetch_nzb) { $spotsOverview = new SpotsOverview($this->_db, $this->_settings); $spotsOverview->setActiveRetriever(true); /* * Only create a new NZB instance if the server differs from the * header host, else re-use the connection */ $settings_nntp_nzb = $this->_settings->get('nntp_nzb'); if ($this->_server['host'] == $settings_nntp_nzb['host']) { $nntp_nzb = $this->_spotnntp; } else { $nntp_nzb = new SpotNntp($settings_nntp_nzb); $nntp_nzb->selectGroup($this->_settings->get('nzb_group')); } # else } # if foreach ($hdrList as $msgheader) { $msgCounter++; $this->debug('foreach-loop, start. msgId= ' . $msgCounter); /* * Keep te usenet server alive when processing is slow. */ if ($processingStartTime - time() > 30) { $this->_spotnntp->sendNoop(); if (isset($nntp_nzb) && $nntp_nzb != $this->_spotnntp) { $nntp_nzb->sendNoop(); } # if $processingStartTime = time(); } # if /* * We keep track whether we actually fetched this header and fullspot * to add it to the database, because only then we can update the * titel from the spots title or rely on our database to fetch * the fullspot */ $didFetchHeader = false; $didFetchFullSpot = false; # Reset timelimit set_time_limit(120); # messageid to check $msgId = substr($msgheader['Message-ID'], 1, -1); /* * We prepare some variables to we don't have to perform an array * lookup for each check and the code is easier to read. */ $header_isInDb = isset($dbIdList['spot'][$msgId]); $fullspot_isInDb = isset($dbIdList['fullspot'][$msgId]); /* * If the spotheader is not yet added to the database, parse the header * information. * * If the header is present, but we don't have the fullspot yet or we are * running in 'retro' mode, parse the header as well because some fields * are only in the header and not in the full. * * We need some of those fields (for example KeyID) */ if (!$header_isInDb || (!$fullspot_isInDb || $this->_retro) && $this->_retrieveFull) { $hdrsRetrieved++; $this->debug('foreach-loop, parsingXover, start. msgId= ' . $msgCounter); $spot = $spotParser->parseXover($msgheader['Subject'], $msgheader['From'], $msgheader['Date'], $msgheader['Message-ID'], $this->_rsakeys); $this->debug('foreach-loop, parsingXover, done. msgId= ' . $msgCounter); /* * When a parse error occured, we ignore the spot, also unverified * spots are ignored */ if ($spot === false || !$spot['verified']) { $this->debug('foreach-loop, spot is either false or not verified'); continue; } # if /* * Special moderator commands always have keyid 2 */ if ($spot['keyid'] == 2) { $this->debug('foreach-loop, spot is a moderation spot'); $commandAr = explode(' ', strtolower($spot['title'])); $validCommands = array('delete', 'dispose', 'remove'); # is this one of the defined valid commands? if (in_array($commandAr[0], $validCommands) !== false) { $moderationList[$commandAr[1]] = 1; $modCount++; } # if } else { /* * Don't add spots older than specified for the retention stamp */ if ($retentionStamp > 0 && $spot['stamp'] < $retentionStamp && $this->_settings->get('retentiontype') == 'everything') { $this->debug('foreach-loop, spot is expired: ' . $spot['stamp']); continue; } elseif ($spot['stamp'] < $this->_settings->get('retrieve_newer_than')) { $this->debug('foreach-loop, spot is too old: ' . $spot['stamp']); $skipCount++; } else { /* * Do we have the header in the database? If not, lets add it */ if (!$header_isInDb) { $spotDbList[] = $spot; /* * Some buggy NNTP servers give us the same messageid * in one XOVER statement, hence we update the list of * messageid's we already have retrieved and are ready * to be added to the database */ $dbIdList['spot'][$msgId] = 1; $header_isInDb = true; $lastProcessedId = $msgId; $didFetchHeader = true; if ($spot['wassigned']) { $signedCount++; } # if } # if } # if } # else } else { $lastProcessedId = $msgId; } # else /* * We don't want to retrieve the fullspot if we don't have the header * in the database. Because we try to add headers in the above code we just have * to check if the header is in the database. * * We cannot collapse this code with the header fetching code because we want to * be able to add the fullspot to a system after all the headers are retrieved */ if ($header_isInDb && !$fullspot_isInDb) { /* * Don't add older fullspots than specified for the retention stamp */ if ($retentionStamp > 0 && strtotime($msgheader['Date']) < $retentionStamp) { continue; } # if if ($this->_retrieveFull) { $fullSpot = array(); try { $fullsRetrieved++; $this->debug('foreach-loop, getFullSpot, start. msgId= ' . $msgId); $fullSpot = $this->_spotnntp->getFullSpot($msgId); $this->debug('foreach-loop, getFullSpot, done. msgId= ' . $msgId); # add this spot to the database $fullSpotDbList[] = $fullSpot; $fullspot_isInDb = true; $didFetchFullSpot = true; /* * Some buggy NNTP servers give us the same messageid * in once XOVER statement, hence we update the list of * messageid's we already have retrieved and are ready * to be added to the database */ $dbIdList['fullspot'][$msgId] = 1; /* * Overwrite the spots' title because the fullspot contains the title in * UTF-8 format. * We also overwrite the spotterid from the spotsfull because the spotterid * is only in the header in more recent spots. */ if ($didFetchHeader) { $spotDbList[count($spotDbList) - 1]['title'] = $fullSpot['title']; $spotDbList[count($spotDbList) - 1]['spotterid'] = $fullSpot['spotterid']; } # if } catch (ParseSpotXmlException $x) { # swallow error } catch (Exception $x) { /** * Sometimes we get an 'No such article' error for a header we just retrieved, * if we want to retrieve the full article. This is messed up, but let's just * swallow the error */ if ($x->getCode() == 430) { } elseif ($x->getMessage() == 'String could not be parsed as XML') { } else { throw $x; } # else } # catch } # if retrievefull } # if fullspot is not in db yet if ($this->_retrieveFull && $header_isInDb && ($this->_prefetch_image || $this->_prefetch_nzb)) { try { /* * If we are running in 'retro' mode, it is possible both the header and spot are in the * database already, however -- we need the information from the fullspot so we retrieve it * again */ if (!$didFetchFullSpot) { $fullSpot = $this->_db->getFullSpot($msgId, SPOTWEB_ANONYMOUS_USERID); $fullSpot = array_merge($spotParser->parseFull($fullSpot['fullxml']), $fullSpot); } # if /* * Prefetch (cache) the spots' image */ if ($this->_prefetch_image) { /* * If the spot is older than 30 days, and the image is on the web, we do not * prefetch the image. */ if (is_array($fullSpot['image']) || $fullSpot['stamp'] > (int) time() - 30 * 24 * 60 * 60) { $this->debug('foreach-loop, getImage(), start. msgId= ' . $msgId); $spotsOverview->getImage($fullSpot, $nntp_nzb); $this->debug('foreach-loop, getImage(), done. msgId= ' . $msgId); } # if } # if /* * Prefetch (cache) the spots' NZB file */ if ($this->_prefetch_nzb) { /* * Only do so if we can expect an NZB file */ if (!empty($fullSpot['nzb']) && $fullSpot['stamp'] > 1290578400) { $this->debug('foreach-loop, getNzb(), start. msgId= ' . $msgId); $spotsOverview->getNzb($fullSpot, $nntp_nzb); $this->debug('foreach-loop, getNzb(), done. msgId= ' . $msgId); } # if } # if } catch (ParseSpotXmlException $x) { # swallow error } catch (Exception $x) { /** * Sometimes we get an 'No such article' error for a header we just retrieved, * if we want to retrieve the full article. This is messed up, but let's just * swallow the error */ if ($x->getCode() == 430) { } elseif ($x->getMessage() == 'String could not be parsed as XML') { } else { throw $x; } # else } # catch } # if prefetch image and/or nzb $this->debug('foreach-loop, done. msgId= ' . $msgCounter); } # foreach if (count($hdrList) > 0) { $this->displayStatus("hdrparsed", $hdrsRetrieved); $this->displayStatus("fullretrieved", $fullsRetrieved); $this->displayStatus("verified", $signedCount); $this->displayStatus("modcount", $modCount); $this->displayStatus("skipcount", $skipCount); $this->displayStatus("loopcount", count($hdrList)); } else { $this->displayStatus("hdrparsed", 0); $this->displayStatus("fullretrieved", 0); $this->displayStatus("verified", 0); $this->displayStatus("modcount", 0); $this->displayStatus("skipcount", 0); $this->displayStatus("loopcount", 0); } # else /* * Add the spots to the database and update the last article * number found */ $this->_db->addSpots($spotDbList, $fullSpotDbList); $this->debug('added Spots, spotDbList=' . serialize($spotDbList)); $this->debug('added Spots, fullSpotDbList=' . serialize($fullSpotDbList)); /* * Actually act on the moderation settings. We cannot process this inline * because a spot can be added and moderated within the same iteration */ switch ($this->_settings->get('spot_moderation')) { case 'disable': break; case 'markspot': $this->_db->markCommentsModerated($moderationList); $this->_db->markSpotsModerated($moderationList); break; # case 'markspot' # case 'markspot' default: $this->_db->removeSpots($moderationList); $this->_db->removeComments($moderationList); break; # default } # switch # update the maximum article id if ($this->_retro) { $this->_db->setMaxArticleid('spots_retro', $endMsg); } else { $this->_db->setMaxArticleid($this->_server['host'], $endMsg); } # if $this->debug('loop finished, setMaxArticleId=' . serialize($endMsg)); $this->displayStatus("timer", round(microtime(true) - $timer, 2)); return array('count' => count($hdrList), 'headercount' => $hdrsRetrieved, 'lastmsgid' => $lastProcessedId); }
require_once "lib/SpotSecurity.php"; require_once "lib/SpotTiming.php"; require_once "settings.php"; require_once "lib/SpotDb.php"; define('USERID', 30); /* -------------------------------------------------------------------- */ echo "Included PHP classes... " . PHP_EOL; $db = new SpotDb($settings['db']); $db->connect(); echo "Connected to the database.." . PHP_EOL; $spotSettings = SpotSettings::singleton($db, $settings); $spotSigning = new SpotSigning(); $spotPosting = new SpotPosting($db, $spotSettings); $spotUserSystem = new SpotUserSystem($db, $spotSettings); echo "Initialized classes.." . PHP_EOL; $rsaKeys = $spotSettings->get('rsa_keys'); $retriever = new SpotRetriever_Spots($spotSettings->get('nntp_hdr'), $db, $spotSettings, $rsaKeys, '', $spotSettings->get('retrieve_full')); $msgdata = $retriever->connect($spotSettings->get('hdr_group')); var_dump($msgdata); # Connct thru our own NNTP session to the server so we have an XOVER list $headerServer = $spotSettings->get('nntp_hdr'); $spotnntp = new SpotNntp($spotSettings->get('nntp_hdr')); $spotnntp->selectGroup($spotSettings->get('hdr_group')); $hdrList = $spotnntp->getOverview(3244937, 3244938); foreach ($hdrList as $msgid => $msgheader) { $spotParser = new SpotParser(); $spot = $spotParser->parseXover($msgheader['Subject'], $msgheader['From'], $msgheader['Date'], $msgheader['Message-ID'], $rsaKeys); var_dump($spot); } # and signal quit $retriever->quit();
function process($hdrList, $curMsg, $endMsg) { $this->displayStatus("progress", $curMsg . " till " . $endMsg); $this->_db->beginTransaction(); $signedCount = 0; $hdrsRetrieved = 0; $fullsRetrieved = 0; $modCount = 0; $skipCount = 0; $lastProcessedId = ''; # pak onze lijst met messageid's, en kijk welke er al in de database zitten $dbIdList = $this->_db->matchSpotMessageIds($hdrList); #var_dump($hdrList); # en loop door elke header heen $spotParser = new SpotParser(); foreach ($hdrList as $msgid => $msgheader) { # Reset timelimit set_time_limit(120); # messageid to check $msgId = substr($msgheader['Message-ID'], 1, -1); # definieer een paar booleans zodat we niet steeds een array lookup moeten doen # en de code wat duidelijker is $header_isInDb = isset($dbIdList['spot'][$msgId]); $fullspot_isInDb = isset($dbIdList['fullspot'][$msgId]); # als we de spot overview nog niet in de database hebben, haal hem dan op, # ook als de fullspot er nog niet is, moeten we dit doen want een aantal velden # die wel in de header zitten, zitten niet in de database (denk aan 'keyid') if (!$header_isInDb || !$fullspot_isInDb) { $hdrsRetrieved++; $spot = $spotParser->parseXover($msgheader['Subject'], $msgheader['From'], $msgheader['Date'], $msgheader['Message-ID'], $this->_rsakeys); # als er een parse error was, negeren we de spot volledig, ook niet- # verified spots gooien we weg. if ($spot === false || !$spot['verified']) { continue; } # if if ($spot['keyid'] == 2) { $commandAr = explode(' ', strtolower($spot['title'])); $validCommands = array('delete', 'dispose', 'remove'); # FIXME: Message-ID kan ook van een comment zijn, # onderstaande code gaat uit van een spot. # is dit een geldig commando? if (in_array($commandAr[0], $validCommands) !== false) { switch ($this->_settings->get('spot_moderation')) { case 'disable': break; case 'markspot': $this->_db->markSpotModerated($commandAr[1]); break; default: $this->_db->deleteSpot($commandAr[1]); break; } # switch $modCount++; } # if } else { # Oudere spots niet toevoegen, hoeven we het later ook niet te verwijderen if ($this->_settings->get('retention') > 0 && $spot['stamp'] < time() - $this->_settings->get('retention') * 24 * 60 * 60) { $skipCount++; } elseif ($spot['stamp'] < $this->_settings->get('retrieve_newer_than')) { $skipCount++; } else { # Hier kijken we alleen of de spotheader niet bestaat if (!$header_isInDb) { $this->_db->addSpot($spot); # definieer de header als al ontvangen, we moeten ook de # msgid lijst updaten omdat soms een messageid meerdere # keren per xover mee komt ... $dbIdList['spot'][$msgId] = 1; $header_isInDb = true; $lastProcessedId = $msgId; if ($spot['wassigned']) { $signedCount++; } # if } # if } # if } # else } else { $lastProcessedId = $msgId; } # else # We willen enkel de volledige spot ophalen als de header in de database zit, omdat # we dat hierboven eventueel doen, is het enkel daarop checken voldoende if ($header_isInDb && !$fullspot_isInDb) { # # We gebruiken altijd XOVER, dit is namelijk handig omdat eventueel ontbrekende # artikel nummers (en soms zijn dat er duizenden) niet hoeven op te vragen, nu # vragen we enkel de de headers op van de artikelen die er daadwerkelijk zijn # # KeyID 2 is een 'moderator' post en kan dus niet getrieved worden # if ($this->_retrieveFull && $spot['keyid'] != 2) { $fullSpot = array(); try { $fullsRetrieved++; $fullSpot = $this->_spotnntp->getFullSpot($msgId); # en voeg hem aan de database toe $this->_db->addFullSpot($fullSpot); $fullspot_isInDb = true; # we moeten ook de msgid lijst updaten omdat soms een messageid meerdere # keren per xover mee komt ... $dbIdList['fullspot'][$msgId] = 1; } catch (ParseSpotXmlException $x) { # swallow error } catch (Exception $x) { # messed up index aan de kant van de server ofzo? iig, dit gebeurt. soms, if so, # swallow the error if ($x->getMessage() == 'No such article found') { } elseif ($x->getMessage() == 'String could not be parsed as XML') { } else { throw $x; } # else } # catch } # if retrievefull } # if fullspot is not in db yet } # foreach if (count($hdrList) > 0) { $this->displayStatus("hdrparsed", $hdrsRetrieved); $this->displayStatus("fullretrieved", $fullsRetrieved); $this->displayStatus("verified", $signedCount); $this->displayStatus("modcount", $modCount); $this->displayStatus("skipcount", $skipCount); $this->displayStatus("loopcount", count($hdrList)); } else { $this->displayStatus("hdrparsed", 0); $this->displayStatus("fullretrieved", 0); $this->displayStatus("verified", 0); $this->displayStatus("modcount", 0); $this->displayStatus("skipcount", 0); $this->displayStatus("loopcount", 0); } # else $this->_db->setMaxArticleid($this->_server['host'], $curMsg); $this->_db->commitTransaction(); return array('count' => count($hdrList), 'headercount' => $hdrsRetrieved, 'lastmsgid' => $lastProcessedId); }