Example #1
0
 function __construct($nodeName, $jobConfig)
 {
     parent::__construct($nodeName, $jobConfig);
     if (!$jobConfig["leader"]) {
         throw new Scalr_System_Cronjob_Exception("Configuration array must have a key for 'leader' " . "that names the leader node apointed by administrator");
     }
     $this->logger = Logger::getLogger(__CLASS__);
 }
Example #2
0
 private function forkCoordinator()
 {
     $this->logger->info("Forking coordinator process");
     $pid = pcntl_fork();
     if ($pid > 0) {
         $this->coordinatorPid = $pid;
     } else {
         if ($pid == 0) {
             $this->coordinatorLoop = true;
             $this->coordinatorPid = posix_getpid();
             $ppid = posix_getppid();
             $this->nodeRegistry->set(self::REGKEY_COORDINATOR_PROCESS_PID, posix_getpid());
             $leaderPath = "{$this->jobZPath}/leader";
             $leaderTimeout = new Scalr_Util_Timeout($this->leaderTimeout);
             $zombyTimeout = new Scalr_Util_Timeout((int) $this->config["tickTime"] * 10);
             $heartbeatTimeout = new Scalr_Util_Timeout((int) $this->config["tickTime"]);
             // Track mtime from self node
             $lastMtime = $this->zookeeper->get("{$this->nodeRegistry->path}/{$this->nodeRegistry->node}")->mtime;
             while ($this->coordinatorLoop) {
                 $leaderTimeout->reset();
                 try {
                     $exceptionCounter = 0;
                     while (!$leaderTimeout->reached() && $this->coordinatorLoop) {
                         try {
                             // Terminate myself if parent was killed
                             if (!posix_kill($ppid, 0)) {
                                 $this->coordinatorLoop = false;
                                 break 2;
                             }
                             // Leader election maybe initiated
                             if ($this->leaderElection->isInitiated()) {
                                 $this->logger->info("[coordinator] Someone has initiated leader election");
                                 $this->doLeaderElection();
                             }
                             // Leader may changed
                             $leaderNodeName = $this->zookeeper->getData($leaderPath);
                             $oldIsLeader = $this->isLeader;
                             $this->isLeader = $leaderNodeName == $this->nodeName;
                             if (!$this->isLeader && $oldIsLeader) {
                                 $this->logger->info("[coordinator] I am not longer a leader ('{$this->nodeName}'). " . "Leader is '{$leaderNodeName}'");
                             }
                             // Check leader znode mtime
                             $leaderStat = $this->zookeeper->get($leaderPath);
                             if ($leaderStat->mtime != $this->leaderMtime) {
                                 // Leader had updated it's state
                                 $leaderTimeout->reset();
                                 $this->logger->info("[coordinator] Leader is the same");
                                 $this->leaderMtime = $leaderStat->mtime;
                             }
                             if ($this->isLeader) {
                                 // Process returned nodes.
                                 // Administrator's configured leader may be here
                                 if ($c = $this->returnedNodesQueue->capacity()) {
                                     $this->logger->info(sprintf("%d node(s) have returned back online", $c));
                                     $votes = array($this->elector->getElectionData());
                                     while ($vote = $this->returnedNodesQueue->peek()) {
                                         $votes[] = $vote;
                                     }
                                     $this->checkElectionResults($votes, false);
                                 }
                                 // Check zomby nodes
                                 if ($zombyTimeout->reached(false)) {
                                     $childData = $this->zookeeper->getChildren($this->nodeRegistry->path);
                                     foreach ($childData->children as $childName) {
                                         $childStat = $this->zookeeper->get("{$this->nodeRegistry->path}/{$childName}");
                                         if ($childStat->mtime < $lastMtime) {
                                             // Zomby detected
                                             $this->logger->info(sprintf("[coordinator] Cleanup zomby node '%s'", $childName));
                                             $this->zookeeper->deleteRecursive("{$this->nodeRegistry->path}/{$childName}");
                                         }
                                     }
                                     $zombyTimeout->reset();
                                     $lastMtime = $this->zookeeper->get("{$this->nodeRegistry->path}/{$this->nodeRegistry->node}")->mtime;
                                 }
                             }
                             // Node heart beat
                             if ($heartbeatTimeout->reached(false)) {
                                 $this->logger->debug(sprintf("[coordinator] '%s' heartbeat", $this->nodeName));
                                 $this->nodeRegistry->touchNode();
                                 $heartbeatTimeout->reset();
                             }
                             // Poll work queue
                             while ($message = $this->globalWorkQueue->peek()) {
                                 $this->logger->info("[coordinator] Put received message into local queue");
                                 $this->processPool->workQueue->put($message);
                             }
                             Scalr_Util_Timeout::sleep(1000);
                         } catch (Exception $e) {
                             $this->logger->error(sprintf("[coordinator] Caught in message loop <%s> %s", get_class($e), $e->getMessage()));
                             if (++$exceptionCounter > $this->coordinatorSlippageLimit) {
                                 $this->logger->fatal("[coordinator] Got too many consistent exceptions in main loop. " . "Slippage limit: {$this->coordinatorSlippageLimit} exceed");
                                 posix_kill(posix_getppid(), SIGTERM);
                                 exit;
                             }
                         }
                     }
                 } catch (Scalr_Util_TimeoutException $e) {
                     $this->logger->warn("[coordinator] Caught leader timeout exception ({$leaderTimeout->format()})");
                     $this->logger->info("[coordinator] Start new leader election procedure");
                     try {
                         $this->leaderElection->initiate($this->nodeRegistry->nodesCapacity());
                     } catch (Exception $e) {
                         $this->logger->error(sprintf("[coordinator] Caught in leader election <%s> %s", get_class($e), $e->getMessage()));
                     }
                 }
             }
             $this->logger->info("[coordinator] Done");
             exit;
         } else {
             if ($pid == -1) {
                 throw new Scalr_System_Cronjob_Exception("Cannot fork coordinator process");
             }
         }
     }
 }