コード例 #1
0
 /**
  * Starts the loop of a child-process.
  */
 protected function startChildProcessLoop()
 {
     $this->initCrawlerProcess();
     // Call overidable method initChildProcess()
     $this->initChildProcess();
     // Start benchmark (if single-processed)
     if ($this->is_chlid_process == false) {
         PHPCrawlerBenchmark::start("crawling_process");
     }
     // Init vars
     $stop_crawling = false;
     // Main-Loop
     while ($stop_crawling == false) {
         // Get next URL from cache
         $UrlDescriptor = $this->LinkCache->getNextUrl();
         // die('startChildProcessLoop $UrlDescriptor');
         // Process URL
         if ($UrlDescriptor != null) {
             $stop_crawling = $this->processUrl($UrlDescriptor);
             // die('startChildProcessLoop processUrl');
         } else {
             // die('startChildProcessLoop sleep');
             usleep(500000);
         }
         if ($this->multiprocess_mode != PHPCrawlerMultiProcessModes::MPMODE_PARENT_EXECUTES_USERCODE) {
             // If there's nothing more to do
             if ($this->LinkCache->containsURLs() == false) {
                 $stop_crawling = true;
                 $this->CrawlerStatusHandler->updateCrawlerStatus(null, PHPCrawlerAbortReasons::ABORTREASON_PASSEDTHROUGH);
             }
             // Check for abort form other processes
             if ($this->checkForAbort() !== null) {
                 $stop_crawling = true;
             }
         }
     }
     // Loop enden gere. If child-process -> kill it
     if ($this->is_chlid_process == true) {
         // die('startChildProcessLoop KILL');
         if ($this->multiprocess_mode == PHPCrawlerMultiProcessModes::MPMODE_PARENT_EXECUTES_USERCODE) {
             return;
         } else {
             exit;
         }
     }
     $this->crawlerStatus = $this->CrawlerStatusHandler->getCrawlerStatus();
     // Cleanup crawler
     $this->cleanup();
     // Stop benchmark (if single-processed)
     if ($this->is_chlid_process == false) {
         // die('startChildProcessLoop STOP');
         PHPCrawlerBenchmark::stop("crawling_process");
     }
     // die('startChildProcessLoop');
 }
コード例 #2
0
ファイル: PHPCrawler.php プロジェクト: dawid-z/phpcrawl
 /**
  * Starts the loop of the controller-process (main-process).
  */
 protected function startControllerProcessLoop()
 {
     // If multiprocess-mode is not MPMODE_PARENT_EXECUTES_USERCODE -> exit process
     if ($this->multiprocess_mode != PHPCrawlerMultiProcessModes::MPMODE_PARENT_EXECUTES_USERCODE) {
         exit;
     }
     $this->initCrawlerProcess();
     $this->initChildProcess();
     while (true) {
         // Check for abort
         if ($this->checkForAbort() !== null) {
             $this->ProcessHandler->killChildProcesses();
             break;
         }
         // Get next DocInfo-object from queue
         $DocInfo = $this->DocumentInfoQueue->getNextDocumentInfo();
         if ($DocInfo == null) {
             // If there are nor more links in cache AND there are no more DocInfo-objects in queue -> passedthrough
             if ($this->LinkCache->containsURLs() == false && $this->DocumentInfoQueue->getDocumentInfoCount() == 0) {
                 $this->CrawlerStatusHandler->updateCrawlerStatus(null, PHPCrawlerAbortReasons::ABORTREASON_PASSEDTHROUGH);
             }
             usleep(100000);
             continue;
         }
         // Update crawler-status
         $this->CrawlerStatusHandler->updateCrawlerStatus($DocInfo);
         // Call the "abstract" method handlePageData
         $user_abort = false;
         // If defined by user -> call old handlePageData-method, otherwise don't (because of high memory-usage)
         if (method_exists($this, "handlePageData")) {
             $page_info = $DocInfo->toArray();
             $user_return_value = $this->handlePageData($page_info);
             if ($user_return_value < 0) {
                 $user_abort = true;
             }
         }
         // Call the "abstract" method handleDocumentInfo
         $user_return_value = $this->handleDocumentInfo($DocInfo);
         if ($user_return_value < 0) {
             $user_abort = true;
         }
         // Update status if user aborted process
         if ($user_abort == true) {
             $this->CrawlerStatusHandler->updateCrawlerStatus(null, PHPCrawlerAbortReasons::ABORTREASON_USERABORT);
         }
     }
 }