/** * Cleans up the crawler after it has finished. */ protected function cleanup() { $abort_reason = $this->checkForAbort(); if ($abort_reason != null && $abort_reason != PHPCrawlerAbortReasons::ABORTREASON_PASSEDTHROUGH) { $crawler_status = $this->CrawlerStatusHandler->getCrawlerStatus(); $crawler_status->documents_received = 0; $crawler_status->bytes_received = 0; $crawler_status->links_followed = 0; $crawler_status->previous_abort_reason = $abort_reason; $crawler_status->abort_reason = null; $this->CrawlerStatusHandler->setCrawlerStatus($crawler_status); } else { // Free/unlock caches $this->CookieCache->cleanup(); $this->LinkCache->cleanup(); // Delete working-dir PHPCrawlerUtils::rmDir($this->working_directory); // Remove semaphore (if multiprocess-mode) if ($this->multiprocess_mode != PHPCrawlerMultiProcessModes::MPMODE_NONE) { $sem_key = sem_get($this->crawler_uniqid); sem_remove($sem_key); } } }
/** * Delays the execution of the next request depending on the setRequestDelayTime()-setting and updates * the last-request-time afterwards */ protected function delayRequest() { // Delay next request only if a request-delay was set if ($this->request_delay_time != null) { while (true) { $crawler_status = $this->CrawlerStatusHandler->getCrawlerStatus(); // Wait if the time of the last request isn't way back enough if ($crawler_status->last_request_time + $this->request_delay_time > PHPCrawlerBenchmark::getmicrotime()) { usleep($this->request_delay_time * 1000000 / 2); } else { break; } } // Update last-request-time $this->CrawlerStatusHandler->updateCrawlerStatus(null, null, null, PHPCrawlerBenchmark::getmicrotime()); } }