Beispiel #1
0
 /**
  * Return an array of raw html notifications, delay in [s]
  */
 private function getAndIndexNotifications($daysBack = 14, $delay = 0.5)
 {
     $date = new DateTime();
     // DateTime::createFromFormat('d-m-Y', $enddate);
     date_sub($date, date_interval_create_from_date_string($daysBack . ' days'));
     $p = 0;
     $alreadyStoredPages = 0;
     // remove database entries older than given date
     $this->deleteEntriesInDatabase($date);
     $Scraper = new P2000Scraper("http://www.p2000-online.net/alleregiosf.html");
     while ($this->entriesInDatabase($date) == 0) {
         //&& $alreadyStoredPages<5) {
         $Scraper->scrapePage();
         $now = round(microtime(true) * 1000);
         $alreadyStored = $this->indexNotifications($Scraper->getRawNotifications());
         $elapsed = round(microtime(true) * 1000) - $now;
         if ($elapsed < $delay * 1000.0) {
             // ensure proper delay between requests
             usleep(($delay - $elapsed / 1000.0) * 1000000);
         }
         $end = round(microtime(true) * 1000) - $now;
         if ($alreadyStored == 15) {
             $alreadyStoredPages++;
         }
         $Scraper->clearRawNotifications();
         $Scraper->loadNextPage();
         $p++;
         //echo "Scraped " . $p . " pages - Time elapsed: " . $elapsed . "[ms] <br/>"; // for webpage
         fwrite(STDOUT, "\n\tScraped " . $p . " pages - Time elapsed: " . $end . "[ms]\n");
         // for CLI
         $amount = $this->entriesInDatabase($date);
         fwrite(STDOUT, $amount . " pages indexed of date: " . $date->format('d-m-Y') . "\n");
         //->format('d-m-Y')."\n");
     }
 }
Beispiel #2
0
<?php

include_once 'P2000Scraper.php';
$Scraper = new P2000Scraper("http://www.p2000-online.net/alleregiosf.html");
$Scraper->scrapePages(10, 60 / 100.0);
echo htmlspecialchars($Scraper->getRawNotifications()[0]);
echo "<br/>";
echo "Count: " . count($Scraper->getRawNotifications());