public function xLockAction() { $this->request->defineParams(array('serverId')); $dbServer = DBServer::LoadByID($this->getParam('serverId')); $this->user->getPermissions()->validate($dbServer); if ($dbServer->platform != SERVER_PLATFORMS::EC2) { throw new Exception("Server lock supported ONLY by EC2"); } $env = Scalr_Environment::init()->loadById($dbServer->envId); $ec2 = $env->aws($dbServer->GetCloudLocation())->ec2; $newValue = !$ec2->instance->describeAttribute($dbServer->GetCloudServerID(), InstanceAttributeType::disableApiTermination()); $ec2->instance->modifyAttribute($dbServer->GetCloudServerID(), InstanceAttributeType::disableApiTermination(), $newValue); $dbServer->SetProperty(EC2_SERVER_PROPERTIES::IS_LOCKED, $newValue); $this->response->success(); }
public function xLockAction() { $this->request->defineParams(array('serverId')); $dbServer = DBServer::LoadByID($this->getParam('serverId')); $this->user->getPermissions()->validate($dbServer); if ($dbServer->platform != SERVER_PLATFORMS::EC2) { throw new Exception("Server lock supported ONLY by EC2"); } $env = Scalr_Environment::init()->loadById($dbServer->envId); $ec2 = $env->aws($dbServer->GetCloudLocation())->ec2; if ($dbServer->GetRealStatus(true)->isTerminated()) { $dbServer->SetProperty(EC2_SERVER_PROPERTIES::IS_LOCKED, 0); $this->response->warning('Server was terminated. Clearing disableAPITermination flag.'); } else { $newValue = !$ec2->instance->describeAttribute($dbServer->GetCloudServerID(), InstanceAttributeType::disableApiTermination()); $ec2->instance->modifyAttribute($dbServer->GetCloudServerID(), InstanceAttributeType::disableApiTermination(), $newValue); $dbServer->SetProperties([EC2_SERVER_PROPERTIES::IS_LOCKED_LAST_CHECK_TIME => 0, EC2_SERVER_PROPERTIES::IS_LOCKED => $newValue]); $this->response->success(); } }
/** * {@inheritdoc} * @see \Scalr\System\Zmq\Cron\TaskInterface::worker() */ public function worker($request) { $dtNow = new DateTime('now'); try { $dbServer = DBServer::LoadByID($request->serverId); } catch (ServerNotFoundException $e) { $this->log('INFO', "Server:%s does not exist:%s", $request->serverId, $e->getMessage()); return false; } if (!in_array($dbServer->status, [SERVER_STATUS::PENDING_TERMINATE, SERVER_STATUS::PENDING_SUSPEND, SERVER_STATUS::TERMINATED, SERVER_STATUS::SUSPENDED])) { return false; } // Check and skip locked instances if ($dbServer->status == SERVER_STATUS::PENDING_TERMINATE && $dbServer->GetProperty(EC2_SERVER_PROPERTIES::IS_LOCKED) == 1) { if (($checkDateTime = $dbServer->GetProperty(EC2_SERVER_PROPERTIES::IS_LOCKED_LAST_CHECK_TIME)) <= time()) { if (!$dbServer->GetRealStatus(true)->isTerminated()) { $isLocked = $dbServer->GetEnvironmentObject()->aws($dbServer->GetCloudLocation())->ec2->instance->describeAttribute($dbServer->GetCloudServerID(), InstanceAttributeType::disableApiTermination()); if ($isLocked) { \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($dbServer->GetFarmObject()->ID, sprintf("Server '%s' has disableAPITermination flag and can't be terminated (Platform: %s) (ServerTerminate).", $dbServer->serverId, $dbServer->platform), $dbServer->serverId)); $startTime = strtotime($dbServer->dateShutdownScheduled); // 1, 2, 3, 4, 5, 6, 9, 14, ... 60 $diff = round((($checkDateTime < $startTime ? $startTime : $checkDateTime) - $startTime) / 60 * 0.5) * 60; $diff = $diff == 0 ? 60 : ($diff > 3600 ? 3600 : $diff); $dbServer->SetProperty(EC2_SERVER_PROPERTIES::IS_LOCKED_LAST_CHECK_TIME, time() + $diff); return false; } else { $dbServer->SetProperty(EC2_SERVER_PROPERTIES::IS_LOCKED, $isLocked); } } } else { return false; } } //Warming up static DI cache \Scalr::getContainer()->warmup(); // Reconfigure observers \Scalr::ReconfigureObservers(); if ($dbServer->status == SERVER_STATUS::TERMINATED || $dbServer->dateShutdownScheduled <= $dtNow->format('Y-m-d H:i:s')) { try { $p = PlatformFactory::NewPlatform($dbServer->platform); $environment = $dbServer->GetEnvironmentObject(); if (!$environment->isPlatformEnabled($dbServer->platform)) { throw new Exception(sprintf("%s platform is not enabled in the '%s' (%d) environment.", $dbServer->platform, $environment->name, $environment->id)); } if ($dbServer->GetCloudServerID()) { $serverHistory = $dbServer->getServerHistory(); $isTermination = in_array($dbServer->status, [SERVER_STATUS::TERMINATED, SERVER_STATUS::PENDING_TERMINATE]); $isSuspension = in_array($dbServer->status, [SERVER_STATUS::SUSPENDED, SERVER_STATUS::PENDING_SUSPEND]); /* @var $terminationData TerminationData */ $terminationData = null; //NOTE: in any case, after call, be sure to set callback to null $this->setupClientCallback($p, function ($request, $response) use($dbServer, &$terminationData) { $terminationData = new TerminationData(); $terminationData->serverId = $dbServer->serverId; if ($request instanceof \http\Client\Request) { $terminationData->requestUrl = $request->getRequestUrl(); $terminationData->requestQuery = $request->getQuery(); $terminationData->request = $request->toString(); } if ($response instanceof \http\Client\Response) { $terminationData->response = $response->toString(); $terminationData->responseCode = $response->getResponseCode(); $terminationData->responseStatus = $response->getResponseStatus(); } }, $dbServer); try { $status = $dbServer->GetRealStatus(); } catch (Exception $e) { //eliminate callback $this->setupClientCallback($p, null, $dbServer); throw $e; } //eliminate callback $this->setupClientCallback($p, null, $dbServer); if ($dbServer->isCloudstack()) { //Workaround for when expunge flag not working and servers stuck in Destroyed state. $isTerminated = $status->isTerminated() && $status->getName() != 'Destroyed'; } else { $isTerminated = $status->isTerminated(); } if ($isTermination && !$isTerminated || $isSuspension && !$dbServer->GetRealStatus()->isSuspended()) { try { if ($dbServer->farmId != 0) { try { if ($dbServer->GetFarmRoleObject()->GetRoleObject()->hasBehavior(ROLE_BEHAVIORS::RABBITMQ)) { $serversCount = count($dbServer->GetFarmRoleObject()->GetServersByFilter([], ['status' => [SERVER_STATUS::TERMINATED, SERVER_STATUS::SUSPENDED]])); if ($dbServer->index == 1 && $serversCount > 1) { \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($dbServer->GetFarmObject()->ID, sprintf("RabbitMQ role. Main DISK node should be terminated after all other nodes. Waiting... (Platform: %s) (ServerTerminate).", $dbServer->serverId, $dbServer->platform), $dbServer->serverId)); return false; } } } catch (Exception $e) { } \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($dbServer->GetFarmObject()->ID, sprintf("Terminating server '%s' (Platform: %s) (ServerTerminate).", $dbServer->serverId, $dbServer->platform), $dbServer->serverId)); } } catch (Exception $e) { $this->getLogger()->warn("Server: %s caused exception: %s", $request->serverId, $e->getMessage()); } $terminationTime = $dbServer->GetProperty(SERVER_PROPERTIES::TERMINATION_REQUEST_UNIXTIME); if (!$terminationTime || time() - $terminationTime > 180) { if ($isTermination) { $p->TerminateServer($dbServer); } else { $p->SuspendServer($dbServer); } $dbServer->SetProperty(SERVER_PROPERTIES::TERMINATION_REQUEST_UNIXTIME, time()); if ($dbServer->farmId) { $wasHostDownFired = \Scalr::getDb()->GetOne("\n SELECT id FROM events WHERE event_server_id = ? AND type = ? AND is_suspend = '0'", array($request->serverId, 'HostDown')); if (!$wasHostDownFired) { $event = new HostDownEvent($dbServer); $event->isSuspended = !$isTermination; \Scalr::FireEvent($dbServer->farmId, $event); } } } } else { if ($dbServer->status == SERVER_STATUS::TERMINATED) { if (!$dbServer->dateShutdownScheduled || time() - strtotime($dbServer->dateShutdownScheduled) > 600) { $errorResolution = true; $serverHistory->setTerminated(); $dbServer->Remove(); if (isset($terminationData)) { $terminationData->save(); } } } else { if ($dbServer->status == SERVER_STATUS::PENDING_TERMINATE) { $dbServer->updateStatus(SERVER_STATUS::TERMINATED); $errorResolution = true; } else { if ($dbServer->status == SERVER_STATUS::PENDING_SUSPEND) { $dbServer->update(['status' => SERVER_STATUS::SUSPENDED, 'remoteIp' => '', 'localIp' => '']); $errorResolution = true; } } } if (!empty($errorResolution) && $request->attempts > 0 && ($ste = ServerTerminationError::findPk($request->serverId))) { //Automatic error resolution $ste->delete(); } } } else { //If there is no cloudserverID we don't need to add this server into server history. //$serverHistory->setTerminated(); $dbServer->Remove(); } } catch (InstanceNotFound $e) { if ($serverHistory) { $serverHistory->setTerminated(); } $dbServer->Remove(); if (isset($terminationData)) { $terminationData->save(); } } catch (Exception $e) { if ($request->serverId && stripos($e->getMessage(), "modify its 'disableApiTermination' instance attribute and try again") !== false && $dbServer && $dbServer->platform == \SERVER_PLATFORMS::EC2) { // server has disableApiTermination flag on cloud, update server properties $dbServer->SetProperty(EC2_SERVER_PROPERTIES::IS_LOCKED, 1); } else { if ($request->serverId && ($e instanceof InvalidCloudCredentialsException || CloudPlatformSuspensionInfo::isSuspensionException($e) || stripos($e->getMessage(), "tenant is disabled") !== false || stripos($e->getMessage(), "was not able to validate the provided access credentials") !== false || stripos($e->getMessage(), "platform is not enabled") !== false || stripos($e->getMessage(), "neither api key nor password was provided for the openstack config") !== false || stripos($e->getMessage(), "refreshing the OAuth2 token") !== false || strpos($e->getMessage(), "Cannot obtain endpoint url. Unavailable service") !== false)) { //Postpones unsuccessful task for 30 minutes. $ste = new ServerTerminationError($request->serverId, isset($request->attempts) ? $request->attempts + 1 : 1, $e->getMessage()); $minutes = rand(30, 40); $ste->retryAfter = new \DateTime('+' . $minutes . ' minutes'); if ($ste->attempts > self::MAX_ATTEMPTS && in_array($dbServer->status, [SERVER_STATUS::PENDING_TERMINATE, SERVER_STATUS::TERMINATED])) { //We are going to remove those with Pending terminate status from Scalr after 1 month of unsuccessful attempts $dbServer->Remove(); if (isset($terminationData)) { $terminationData->save(); } } $ste->save(); } else { $this->log('ERROR', "Server:%s, failed - Exeption:%s %s", $request->serverId, get_class($e), $e->getMessage()); throw $e; } } } } return $request; }
/** * {@inheritdoc} * @see Scalr_System_Cronjob_MultiProcess_DefaultWorker::handleWork() */ function handleWork($farmId) { $this->cleanup(); $DBFarm = DBFarm::LoadByID($farmId); $account = Scalr_Account::init()->loadById($DBFarm->ClientID); $payAsYouGoTime = $account->getSetting(Scalr_Account::SETTING_BILLING_PAY_AS_YOU_GO_DATE); $GLOBALS["SUB_TRANSACTIONID"] = abs(crc32(posix_getpid() . $farmId)); $GLOBALS["LOGGER_FARMID"] = $farmId; $this->logger->info("[" . $GLOBALS["SUB_TRANSACTIONID"] . "] Begin polling farm (ID: {$DBFarm->ID}, Name: {$DBFarm->Name}, Status: {$DBFarm->Status})"); // Collect information from database $servers_count = $this->db->GetOne("\n SELECT COUNT(*) FROM servers WHERE farm_id = ? AND status NOT IN (?,?)\n ", array($DBFarm->ID, SERVER_STATUS::TERMINATED, SERVER_STATUS::SUSPENDED)); $this->logger->info("[FarmID: {$DBFarm->ID}] Found {$servers_count} farm instances in database"); if ($DBFarm->Status == FARM_STATUS::TERMINATED && $servers_count == 0) { return; } foreach ($DBFarm->GetServersByFilter(array(), array('status' => SERVER_STATUS::PENDING_LAUNCH)) as $DBServer) { /** @var DBServer $DBServer */ try { if ($DBServer->cloudLocation) { try { Logger::getLogger(LOG_CATEGORY::FARM)->info(sprintf("Initializing cloud cache: {$DBServer->cloudLocation} ({$DBServer->serverId})")); $p = PlatformFactory::NewPlatform($DBServer->platform); $list = $p->GetServersList($DBServer->GetEnvironmentObject(), $DBServer->cloudLocation); } catch (Exception $e) { Logger::getLogger(LOG_CATEGORY::FARM)->info(sprintf("Initializing cloud cache: FAILED")); } } if ($DBServer->status != SERVER_STATUS::PENDING && $DBServer->status != SERVER_STATUS::PENDING_TERMINATE) { if (!$p->IsServerExists($DBServer)) { try { $serverInfo = $p->GetServerExtendedInformation($DBServer); } catch (Exception $e) { Logger::getLogger(LOG_CATEGORY::FARM)->error(sprintf("[CRASH][FarmID: %d] Crash check for server '%s' failed: %s", $DBFarm->ID, $DBServer->serverId, $e->getMessage())); } if (!$serverInfo) { if ($p->debugLog) { Logger::getLogger('Openstack')->fatal($p->debugLog); } if (!in_array($DBServer->status, array(SERVER_STATUS::PENDING_TERMINATE, SERVER_STATUS::TERMINATED, SERVER_STATUS::SUSPENDED))) { if ($DBServer->GetProperty(SERVER_PROPERTIES::CRASHED) == 1) { if (PlatformFactory::isOpenstack($DBServer->platform)) { $DBServer->SetProperty(SERVER_PROPERTIES::MISSING, 1); } else { $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); Scalr::FireEvent($DBFarm->ID, new HostCrashEvent($DBServer)); } } else { $DBServer->SetProperties([SERVER_PROPERTIES::REBOOTING => 0, SERVER_PROPERTIES::CRASHED => 1, SERVER_PROPERTIES::MISSING => 1]); Logger::getLogger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' found in database but not found on %s. Crashed.", $DBServer->serverId, $DBServer->platform))); } continue; } } else { Logger::getLogger(LOG_CATEGORY::FARM)->error(sprintf("[CRASH][FarmID: %d] False-positive crash check: %s (EnvID: %d)", $DBFarm->ID, $DBServer->serverId, $DBServer->envId)); } } else { $DBServer->SetProperty(SERVER_PROPERTIES::CRASHED, "0"); $DBServer->SetProperty(SERVER_PROPERTIES::MISSING, "0"); } } } catch (Exception $e) { if (stristr($e->getMessage(), "AWS was not able to validate the provided access credentials") || stristr($e->getMessage(), "Unable to sign AWS API request. Please, check your X.509")) { $env = Scalr_Environment::init()->LoadById($DBFarm->EnvID); $env->status = Scalr_Environment::STATUS_INACTIVE; $env->save(); $env->setPlatformConfig(array('system.auto-disable-reason' => $e->getMessage()), false); return; } if (stristr($e->getMessage(), "Could not connect to host")) { continue; } print "[0][Farm: {$farmId}] {$e->getMessage()} at {$e->getFile()}:{$e->getLine()}\n\n"; continue; } /* try { $realStatus = $DBServer->GetRealStatus()->getName(); if ($realStatus == 'stopped') { $DBServer->SetProperty(SERVER_PROPERTIES::SUB_STATUS, $realStatus); continue; } else { if ($DBServer->GetProperty(SERVER_PROPERTIES::SUB_STATUS) == 'stopped') $DBServer->SetProperty(SERVER_PROPERTIES::SUB_STATUS, ""); } } catch (Exception $e) {} */ try { if (!in_array($DBServer->status, array(SERVER_STATUS::SUSPENDED, SERVER_STATUS::TERMINATED, SERVER_STATUS::PENDING_TERMINATE, SERVER_STATUS::PENDING_SUSPEND))) { $openstackErrorState = false; if (PlatformFactory::isOpenstack($DBServer->platform)) { if ($DBServer->GetRealStatus()->getName() === 'ERROR') { $openstackErrorState = true; } } if ($DBServer->GetRealStatus()->isTerminated() || $openstackErrorState) { Logger::getLogger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' (Platform: %s) not running (Real state: %s, Scalr status: %s).", $DBServer->serverId, $DBServer->platform, $DBServer->GetRealStatus()->getName(), $DBServer->status))); $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); $DBServer->SetProperties([SERVER_PROPERTIES::REBOOTING => 0, SERVER_PROPERTIES::RESUMING => 0]); Scalr::FireEvent($DBFarm->ID, new HostDownEvent($DBServer)); continue; } elseif ($DBServer->GetRealStatus()->isSuspended()) { // if server was suspended when it was running if ($DBServer->status == SERVER_STATUS::RUNNING) { Logger::getLogger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' (Platform: %s) not running (Real state: %s, Scalr status: %s).", $DBServer->serverId, $DBServer->platform, $DBServer->GetRealStatus()->getName(), $DBServer->status))); $DBServer->SetProperties([SERVER_PROPERTIES::REBOOTING => 0, SERVER_PROPERTIES::RESUMING => 0, SERVER_PROPERTIES::SUB_STATUS => ""]); $DBServer->remoteIp = ""; $DBServer->localIp = ""; $DBServer->status = SERVER_STATUS::SUSPENDED; $DBServer->Save(); Scalr::FireEvent($DBFarm->ID, new HostDownEvent($DBServer)); continue; } else { // If server was suspended during initialization - we do not support this and need to terminate this instance $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); continue; } } } if ($DBServer->status != SERVER_STATUS::RUNNING && $DBServer->GetRealStatus()->IsRunning()) { if ($DBServer->status == SERVER_STATUS::SUSPENDED) { //TODO: Depends on resume strategy need to set server status // For Openstack we need to re-accociate IPs $DBServer->SetProperty(\SERVER_PROPERTIES::RESUMING, 0); try { if ($DBServer->isOpenstack()) { $this->openstackSetFloatingIp($DBServer); } } catch (Exception $e) { if (!$DBServer->GetProperty(SERVER_PROPERTIES::SZR_IS_INIT_FAILED)) { $DBServer->SetProperties([SERVER_PROPERTIES::SZR_IS_INIT_FAILED => 1, SERVER_PROPERTIES::SZR_IS_INIT_ERROR_MSG => $e->getMessage()]); } } Scalr::FireEvent($DBFarm->ID, new HostUpEvent($DBServer, "")); continue; } elseif (!in_array($DBServer->status, array(SERVER_STATUS::TERMINATED, SERVER_STATUS::TROUBLESHOOTING))) { if ($DBServer->platform == SERVER_PLATFORMS::EC2) { if ($DBServer->status == SERVER_STATUS::PENDING && $DBFarm->GetSetting(DBFarm::SETTING_EC2_VPC_ID)) { if ($DBServer->GetFarmRoleObject()->GetSetting(DBFarmRole::SETTING_AWS_VPC_INTERNET_ACCESS) != 'outbound-only') { $ipAddress = \Scalr\Modules\Platforms\Ec2\Helpers\EipHelper::setEipForServer($DBServer); if ($ipAddress) { $DBServer->remoteIp = $ipAddress; $DBServer->Save(); } } } } try { if ($DBServer->isOpenstack()) { $this->openstackSetFloatingIp($DBServer); } } catch (Exception $e) { if (!$DBServer->GetProperty(SERVER_PROPERTIES::SZR_IS_INIT_FAILED)) { $DBServer->SetProperties([SERVER_PROPERTIES::SZR_IS_INIT_FAILED => 1, SERVER_PROPERTIES::SZR_IS_INIT_ERROR_MSG => $e->getMessage()]); } } if ($DBServer->isCloudstack()) { if ($DBServer->status == SERVER_STATUS::PENDING) { $jobId = $DBServer->GetProperty(CLOUDSTACK_SERVER_PROPERTIES::LAUNCH_JOB_ID); try { $cs = $DBServer->GetEnvironmentObject()->cloudstack($DBServer->platform); $res = $cs->queryAsyncJobResult($jobId); if ($res->jobstatus == 1) { $DBServer->SetProperties([CLOUDSTACK_SERVER_PROPERTIES::TMP_PASSWORD => $res->virtualmachine->password, CLOUDSTACK_SERVER_PROPERTIES::SERVER_NAME => $res->virtualmachine->name]); } //TODO: handle failed job: $res->jobresult->jobstatus == 2 } catch (Exception $e) { if ($DBServer->farmId) { Logger::getLogger("CloudStack")->error(new FarmLogMessage($DBServer->farmId, $e->getMessage())); } } } } try { $dtadded = strtotime($DBServer->dateAdded); $DBFarmRole = $DBServer->GetFarmRoleObject(); $launch_timeout = $DBFarmRole->GetSetting(DBFarmRole::SETTING_SYSTEM_LAUNCH_TIMEOUT) > 0 ? $DBFarmRole->GetSetting(DBFarmRole::SETTING_SYSTEM_LAUNCH_TIMEOUT) : 900; } catch (Exception $e) { if (stristr($e->getMessage(), "not found")) { $DBServer->terminate(DBServer::TERMINATE_REASON_ROLE_REMOVED); } } $scripting_event = false; if ($DBServer->status == SERVER_STATUS::PENDING) { $event = "hostInit"; $scripting_event = EVENT_TYPE::HOST_INIT; } elseif ($DBServer->status == SERVER_STATUS::INIT) { $event = "hostUp"; $scripting_event = EVENT_TYPE::HOST_UP; } if ($scripting_event && $dtadded) { $scripting_timeout = (int) $this->db->GetOne("\n SELECT sum(timeout)\n FROM farm_role_scripts\n WHERE event_name=? AND\n farm_roleid=? AND issync='1'\n ", array($scripting_event, $DBServer->farmRoleId)); if ($scripting_timeout) { $launch_timeout = $launch_timeout + $scripting_timeout; } if ($dtadded + $launch_timeout < time() && !$DBFarmRole->GetRoleObject()->hasBehavior(ROLE_BEHAVIORS::MONGODB)) { //Add entry to farm log Logger::getLogger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' did not send '%s' event in %s seconds after launch (Try increasing timeouts in role settings). Considering it broken. Terminating instance.", $DBServer->serverId, $event, $launch_timeout))); try { $DBServer->terminate(array(DBServer::TERMINATE_REASON_SERVER_DID_NOT_SEND_EVENT, $event, $launch_timeout), false); } catch (Exception $err) { $this->logger->fatal($err->getMessage()); } } elseif ($DBFarmRole->GetRoleObject()->hasBehavior(ROLE_BEHAVIORS::MONGODB)) { //DO NOT TERMINATE MONGODB INSTANCES BY TIMEOUT! IT'S NOT SAFE //THINK ABOUT WORKAROUND } } // Is IP address changed? if (!$DBServer->IsRebooting()) { $ipaddresses = PlatformFactory::NewPlatform($DBServer->platform)->GetServerIPAddresses($DBServer); if ($ipaddresses['remoteIp'] && $DBServer->remoteIp && $DBServer->remoteIp != $ipaddresses['remoteIp'] || $ipaddresses['localIp'] && $DBServer->localIp && $DBServer->localIp != $ipaddresses['localIp']) { Logger::getLogger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("RemoteIP: %s (%s), LocalIp: %s (%s) (Poller).", $DBServer->remoteIp, $ipaddresses['remoteIp'], $DBServer->localIp, $ipaddresses['localIp']))); Scalr::FireEvent($DBServer->farmId, new IPAddressChangedEvent($DBServer, $ipaddresses['remoteIp'], $ipaddresses['localIp'])); } //TODO: Check health: } } } elseif ($DBServer->status == SERVER_STATUS::RUNNING && $DBServer->GetRealStatus()->isRunning()) { // Is IP address changed? if (!$DBServer->IsRebooting()) { $ipaddresses = PlatformFactory::NewPlatform($DBServer->platform)->GetServerIPAddresses($DBServer); if ($ipaddresses['remoteIp'] && $DBServer->remoteIp != $ipaddresses['remoteIp'] || $ipaddresses['localIp'] && $DBServer->localIp != $ipaddresses['localIp']) { Scalr::FireEvent($DBServer->farmId, new IPAddressChangedEvent($DBServer, $ipaddresses['remoteIp'], $ipaddresses['localIp'])); } if ($payAsYouGoTime) { $initTime = $DBServer->GetProperty(SERVER_PROPERTIES::INITIALIZED_TIME); if ($initTime < $payAsYouGoTime) { $initTime = $payAsYouGoTime; } $runningHours = ceil((time() - $initTime) / 3600); $scuUsed = $runningHours * Scalr_Billing::getSCUByInstanceType($DBServer->GetFlavor(), $DBServer->platform); $this->db->Execute("UPDATE servers_history SET scu_used = ?, scu_updated = 0 WHERE server_id = ?", array($scuUsed, $DBServer->serverId)); } if ($DBServer->platform == SERVER_PLATFORMS::EC2) { $env = Scalr_Environment::init()->loadById($DBServer->envId); $ec2 = $env->aws($DBServer->GetCloudLocation())->ec2; //TODO: $time = $DBServer->GetProperty(EC2_SERVER_PROPERTIES::IS_LOCKED_LAST_CHECK_TIME); if (!$time || time() < $time + 1200) { $isEnabled = $ec2->instance->describeAttribute($DBServer->GetCloudServerID(), InstanceAttributeType::disableApiTermination()); $DBServer->SetProperties([EC2_SERVER_PROPERTIES::IS_LOCKED => $isEnabled, EC2_SERVER_PROPERTIES::IS_LOCKED_LAST_CHECK_TIME => time()]); } } } else { //TODO: Check reboot timeout } } } catch (Exception $e) { if (stristr($e->getMessage(), "not found")) { print $e->getTraceAsString() . "\n"; } elseif (stristr($e->getMessage(), "Request limit exceeded")) { sleep(10); print "[1:SLEEP][Farm: {$farmId}] {$e->getMessage()} at {$e->getFile()}:{$e->getLine()}\n\n"; } else { print "[1][Farm: {$farmId}] {$e->getMessage()} at {$e->getFile()}:{$e->getLine()}\n\n"; } } } }
function handleWork($farmId) { $this->cleanup(); $DBFarm = DBFarm::LoadByID($farmId); $account = Scalr_Account::init()->loadById($DBFarm->ClientID); $payAsYouGoTime = $account->getSetting(Scalr_Account::SETTING_BILLING_PAY_AS_YOU_GO_DATE); $GLOBALS["SUB_TRANSACTIONID"] = abs(crc32(posix_getpid() . $farmId)); $GLOBALS["LOGGER_FARMID"] = $farmId; $this->logger->info("[" . $GLOBALS["SUB_TRANSACTIONID"] . "] Begin polling farm (ID: {$DBFarm->ID}, Name: {$DBFarm->Name}, Status: {$DBFarm->Status})"); // // Collect information from database // $servers_count = $this->db->GetOne("SELECT COUNT(*) FROM servers WHERE farm_id = ? AND status != ?", array($DBFarm->ID, SERVER_STATUS::TERMINATED)); $this->logger->info("[FarmID: {$DBFarm->ID}] Found {$servers_count} farm instances in database"); if ($DBFarm->Status == FARM_STATUS::TERMINATED && $servers_count == 0) { return; } foreach ($DBFarm->GetServersByFilter(array(), array('status' => SERVER_STATUS::PENDING_LAUNCH)) as $DBServer) { try { if ($DBServer->status != SERVER_STATUS::PENDING && $DBServer->status != SERVER_STATUS::PENDING_TERMINATE) { $p = PlatformFactory::NewPlatform($DBServer->platform); if (!$p->IsServerExists($DBServer)) { try { $serverInfo = $p->GetServerExtendedInformation($DBServer); } catch (Exception $e) { Logger::getLogger(LOG_CATEGORY::FARM)->error(sprintf("[CRASH][FarmID: {$DBFarm->ID}] Crash check for server '{$DBServer->serverId}' failed: {$e->getMessage()}")); } if (!$serverInfo) { if ($DBServer->status != SERVER_STATUS::PENDING_TERMINATE && $DBServer->status != SERVER_STATUS::TERMINATED) { if ($DBServer->GetProperty("system.crashed") == 1) { $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); Scalr::FireEvent($DBFarm->ID, new HostCrashEvent($DBServer)); } else { $DBServer->SetProperty(SERVER_PROPERTIES::REBOOTING, 0); Logger::getLogger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' found in database but not found on {$DBServer->platform}. Crashed.", $DBServer->serverId))); $DBServer->SetProperty("system.crashed", "1"); } continue; } } else { Logger::getLogger(LOG_CATEGORY::FARM)->error(sprintf("[CRASH][FarmID: {$DBFarm->ID}] False-positive crash check: {$DBServer->serverId}")); if ($DBServer->platform == SERVER_PLATFORMS::EC2) { Logger::getLogger(LOG_CATEGORY::FARM)->fatal(sprintf("[CRASH][FarmID: {$DBFarm->ID}] InstanceID: %s, List: %s", $DBServer->GetCloudServerID(), json_encode($p->instancesListCache))); } } } else { $DBServer->SetProperty("system.crashed", "0"); } } } catch (Exception $e) { if (stristr($e->getMessage(), "AWS was not able to validate the provided access credentials") || stristr($e->getMessage(), "Unable to sign AWS API request. Please, check your X.509")) { $env = Scalr_Environment::init()->LoadById($DBFarm->EnvID); $env->status = Scalr_Environment::STATUS_INACTIVE; $env->save(); $env->setPlatformConfig(array('system.auto-disable-reason' => $e->getMessage()), false); return; } if (stristr($e->getMessage(), "Could not connect to host")) { continue; } print "[0][Farm: {$farmId}] {$e->getMessage()} at {$e->getFile()}:{$e->getLine()}\n\n"; continue; } try { $realStatus = $DBServer->GetRealStatus()->getName(); if ($realStatus == 'stopped') { $DBServer->SetProperty(SERVER_PROPERTIES::SUB_STATUS, $realStatus); continue; } else { if ($DBServer->GetProperty(SERVER_PROPERTIES::SUB_STATUS) == 'stopped') { $DBServer->SetProperty(SERVER_PROPERTIES::SUB_STATUS, ""); } } } catch (Exception $e) { } try { if (!in_array($DBServer->status, array(SERVER_STATUS::TERMINATED, SERVER_STATUS::PENDING_TERMINATE)) && $DBServer->GetRealStatus()->isTerminated()) { Logger::getLogger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' (Platform: %s) not running (Real state: %s, Scalr status: %s).", $DBServer->serverId, $DBServer->platform, $DBServer->GetRealStatus()->getName(), $DBServer->status))); $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); $DBServer->SetProperty(SERVER_PROPERTIES::REBOOTING, 0); Scalr::FireEvent($DBFarm->ID, new HostDownEvent($DBServer)); continue; } elseif ($DBServer->status != SERVER_STATUS::RUNNING && $DBServer->GetRealStatus()->IsRunning()) { if ($DBServer->status != SERVER_STATUS::TERMINATED && $DBServer->status != SERVER_STATUS::TROUBLESHOOTING) { /* if ($DBServer->platform == SERVER_PLATFORMS::NIMBULA) { if (!$DBServer->GetProperty(NIMBULA_SERVER_PROPERTIES::USER_DATA_INJECTED)) { $dbRole = $DBServer->GetFarmRoleObject()->GetRoleObject(); $ssh2Client = new Scalr_Net_Ssh2_Client(); $ssh2Client->addPassword( $dbRole->getProperty(DBRole::PROPERTY_NIMBULA_INIT_ROOT_USER), $dbRole->getProperty(DBRole::PROPERTY_NIMBULA_INIT_ROOT_PASS) ); $info = PlatformFactory::NewPlatform($DBServer->platform)->GetServerIPAddresses($DBServer); $port = $dbRole->getProperty(DBRole::PROPERTY_SSH_PORT); if (!$port) $port = 22; try { $ssh2Client->connect($info['remoteIp'], $port); foreach ($DBServer->GetCloudUserData() as $k=>$v) $u_data .= "{$k}={$v};"; $u_data = trim($u_data, ";"); $ssh2Client->sendFile('/etc/scalr/private.d/.user-data', $u_data, "w+", false); $DBServer->SetProperty(NIMBULA_SERVER_PROPERTIES::USER_DATA_INJECTED, 1); } catch(Exception $e) { Logger::getLogger(LOG_CATEGORY::FARM)->error(new FarmLogMessage($DBFarm->ID, $e->getMessage())); } } } */ if ($DBServer->platform == SERVER_PLATFORMS::EC2) { if ($DBServer->status == SERVER_STATUS::PENDING && $DBFarm->GetSetting(DBFarm::SETTING_EC2_VPC_ID)) { if ($DBServer->GetFarmRoleObject()->GetSetting(DBFarmRole::SETTING_AWS_VPC_INTERNET_ACCESS) != 'outbound-only') { $ipAddress = Modules_Platforms_Ec2_Helpers_Eip::setEipForServer($DBServer); if ($ipAddress) { $DBServer->remoteIp = $ipAddress; $DBServer->Save(); } } } } if ($DBServer->isOpenstack()) { $ipPool = $DBServer->GetFarmRoleObject()->GetSetting(DBFarmRole::SETTING_OPENSTACK_IP_POOL); if (!$DBServer->remoteIp && ($DBServer->status == SERVER_STATUS::PENDING || $DBServer->status == SERVER_STATUS::INIT) && $ipPool) { $osClient = $DBServer->GetEnvironmentObject()->openstack($DBServer->platform, $DBServer->GetProperty(OPENSTACK_SERVER_PROPERTIES::CLOUD_LOCATION)); if ($osClient->hasService('network')) { $platform = PlatformFactory::NewPlatform($DBServer->platform); $serverIps = $platform->GetServerIPAddresses($DBServer); /********* USE Quantum (Neuron) NETWORK *******/ $ips = $osClient->network->floatingIps->list(); //Check free existing IP $ipAssigned = false; $ipAddress = false; $ipId = false; $ipInfo = false; foreach ($ips as $ip) { if ($ip->fixed_ip_address == $serverIps['localIp'] && $ip->port_id) { $ipAddress = $ip->floating_ip_address; $ipId = $ip->id; $ipAssigned = true; $ipInfo = $ip; break; } /* if (!$ip->fixed_ip_address && !$ipAddress) { $ipAddress = $ip->floating_ip_address; $ipId = $ip->id; $ipInfo = $ip; } */ } if ($ipInfo) { Logger::getLogger("Openstack")->warn(new FarmLogMessage($DBServer->farmId, "Found free floating IP: {$ipAddress} for use (" . json_encode($ipInfo) . ")")); } if (!$ipAssigned) { // Get instance port $ports = $osClient->network->ports->list(); foreach ($ports as $port) { if ($port->device_id == $DBServer->GetProperty(OPENSTACK_SERVER_PROPERTIES::SERVER_ID)) { $serverNetworkPort = $port->id; break; } } if (!$serverNetworkPort) { Logger::getLogger("Openstack")->error(new FarmLogMessage($DBServer->farmId, "Unable to identify network port of instance")); } else { if (!$ipAddress) { $publicNetworkId = $ipPool; /* $networks = $osClient->network->listNetworks(); foreach ($networks as $network) { if ($network->{"router:external"} == true) { $publicNetworkId = $network->id; } } if (!$publicNetworkId) { Logger::getLogger("Openstack")->error(new FarmLogMessage($DBServer->farmId, "Unable to identify public network to allocate" )); } else { */ $ip = $osClient->network->floatingIps->create($publicNetworkId, $serverNetworkPort); $ipAddress = $ip->floating_ip_address; $DBServer->SetProperties(array(OPENSTACK_SERVER_PROPERTIES::FLOATING_IP => $ip->floating_ip_address, OPENSTACK_SERVER_PROPERTIES::FLOATING_IP_ID => $ip->id)); Logger::getLogger("Openstack")->warn(new FarmLogMessage($DBServer->farmId, "Allocated new IP {$ipAddress} for port: {$serverNetworkPort}")); //} } else { /* Logger::getLogger("Openstack")->warn(new FarmLogMessage($DBServer->farmId, "Updating IP {$ipAddress} ({$ipId}) with port: {$serverNetworkPort}" )); $osClient->network->floatingIps->update($ipId, $serverNetworkPort); */ } } } else { Logger::getLogger("Openstack")->warn(new FarmLogMessage($DBServer->farmId, "IP: {$ipAddress} already assigned")); } } else { /********* USE NOVA NETWORK *******/ //Check free existing IP $ipAssigned = false; $ipAddress = false; $ips = $osClient->servers->floatingIps->list($ipPool); foreach ($ips as $ip) { //if (!$ip->instance_id) { // $ipAddress = $ip->ip; //} if ($ip->instance_id == $DBServer->GetProperty(OPENSTACK_SERVER_PROPERTIES::SERVER_ID)) { $ipAddress = $ip->ip; $ipAssigned = true; } } //If no free IP allocate new from pool if (!$ipAddress) { $ip = $osClient->servers->floatingIps->create($ipPool); $ipAddress = $ip->ip; $DBServer->SetProperties(array(OPENSTACK_SERVER_PROPERTIES::FLOATING_IP => $ip->ip, OPENSTACK_SERVER_PROPERTIES::FLOATING_IP_ID => $ip->id)); } if (!$ipAssigned) { //Associate floating IP with Instance $osClient->servers->addFloatingIp($DBServer->GetCloudServerID(), $ipAddress); } } if ($ipAddress) { $DBServer->remoteIp = $ipAddress; $DBServer->Save(); $DBServer->SetProperty(SERVER_PROPERTIES::SYSTEM_IGNORE_INBOUND_MESSAGES, null); } } } if ($DBServer->isCloudstack()) { if ($DBServer->status == SERVER_STATUS::PENDING) { $jobId = $DBServer->GetProperty(CLOUDSTACK_SERVER_PROPERTIES::LAUNCH_JOB_ID); try { $platform = PlatformFactory::NewPlatform($DBServer->platform); $cs = Scalr_Service_Cloud_Cloudstack::newCloudstack($platform->getConfigVariable(Modules_Platforms_Cloudstack::API_URL, $DBServer->GetEnvironmentObject()), $platform->getConfigVariable(Modules_Platforms_Cloudstack::API_KEY, $DBServer->GetEnvironmentObject()), $platform->getConfigVariable(Modules_Platforms_Cloudstack::SECRET_KEY, $DBServer->GetEnvironmentObject()), $DBServer->platform); $res = $cs->queryAsyncJobResult($jobId); if ($res->jobresult->jobstatus == 1) { $DBServer->SetProperty(CLOUDSTACK_SERVER_PROPERTIES::TMP_PASSWORD, $res->jobresult->virtualmachine->password); $DBServer->SetProperty(CLOUDSTACK_SERVER_PROPERTIES::SERVER_NAME, $res->jobresult->virtualmachine->name); } //TODO: handle failed job: $res->jobresult->jobstatus == 2 } catch (Exception $e) { if ($DBServer->farmId) { Logger::getLogger("CloudStack")->error(new FarmLogMessage($DBServer->farmId, $e->getMessage())); } } } } try { $dtadded = strtotime($DBServer->dateAdded); $DBFarmRole = $DBServer->GetFarmRoleObject(); $launch_timeout = $DBFarmRole->GetSetting(DBFarmRole::SETTING_SYSTEM_LAUNCH_TIMEOUT) > 0 ? $DBFarmRole->GetSetting(DBFarmRole::SETTING_SYSTEM_LAUNCH_TIMEOUT) : 900; } catch (Exception $e) { if (stristr($e->getMessage(), "not found")) { $DBServer->terminate('FARM_ROLE_NOT_FOUND'); } } $scripting_event = false; if ($DBServer->status == SERVER_STATUS::PENDING) { $event = "hostInit"; $scripting_event = EVENT_TYPE::HOST_INIT; } elseif ($DBServer->status == SERVER_STATUS::INIT) { $event = "hostUp"; $scripting_event = EVENT_TYPE::HOST_UP; } if ($scripting_event && $dtadded) { $scripting_timeout = (int) $this->db->GetOne("\n SELECT sum(timeout)\n FROM farm_role_scripts\n WHERE event_name=? AND\n farm_roleid=? AND issync='1'\n ", array($scripting_event, $DBServer->farmRoleId)); if ($scripting_timeout) { $launch_timeout = $launch_timeout + $scripting_timeout; } if ($dtadded + $launch_timeout < time() && !$DBFarmRole->GetRoleObject()->hasBehavior(ROLE_BEHAVIORS::MONGODB)) { //Add entry to farm log Logger::getLogger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' did not send '%s' event in %s seconds after launch (Try increasing timeouts in role settings). Considering it broken. Terminating instance.", $DBServer->serverId, $event, $launch_timeout))); try { $DBServer->terminate(array('SERVER_DID_NOT_SEND_EVENT', $event, $launch_timeout), false); } catch (Exception $err) { $this->logger->fatal($err->getMessage()); } } elseif ($DBFarmRole->GetRoleObject()->hasBehavior(ROLE_BEHAVIORS::MONGODB)) { //DO NOT TERMINATE MONGODB INSTANCES BY TIMEOUT! IT'S NOT SAFE //THINK ABOUT WORKAROUND } } // Is IP address changed? if (!$DBServer->IsRebooting()) { $ipaddresses = PlatformFactory::NewPlatform($DBServer->platform)->GetServerIPAddresses($DBServer); if ($ipaddresses['remoteIp'] && $DBServer->remoteIp && $DBServer->remoteIp != $ipaddresses['remoteIp'] || $ipaddresses['localIp'] && $DBServer->localIp && $DBServer->localIp != $ipaddresses['localIp']) { Logger::getLogger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("RemoteIP: %s (%s), LocalIp: %s (%s) (Poller).", $DBServer->remoteIp, $ipaddresses['remoteIp'], $DBServer->localIp, $ipaddresses['localIp']))); Scalr::FireEvent($DBServer->farmId, new IPAddressChangedEvent($DBServer, $ipaddresses['remoteIp'], $ipaddresses['localIp'])); } //TODO: Check health: } } } elseif ($DBServer->status == SERVER_STATUS::RUNNING && $DBServer->GetRealStatus()->isRunning()) { // Is IP address changed? if (!$DBServer->IsRebooting()) { $ipaddresses = PlatformFactory::NewPlatform($DBServer->platform)->GetServerIPAddresses($DBServer); if ($ipaddresses['remoteIp'] && $DBServer->remoteIp && $DBServer->remoteIp != $ipaddresses['remoteIp'] || $ipaddresses['localIp'] && $DBServer->localIp && $DBServer->localIp != $ipaddresses['localIp']) { Scalr::FireEvent($DBServer->farmId, new IPAddressChangedEvent($DBServer, $ipaddresses['remoteIp'], $ipaddresses['localIp'])); } if ($payAsYouGoTime) { $initTime = $DBServer->GetProperty(SERVER_PROPERTIES::INITIALIZED_TIME); if ($initTime < $payAsYouGoTime) { $initTime = $payAsYouGoTime; } $runningHours = ceil((time() - $initTime) / 3600); $scuUsed = $runningHours * Scalr_Billing::getSCUByInstanceType($DBServer->GetFlavor()); $this->db->Execute("UPDATE servers_history SET scu_used = ?, scu_updated = 0 WHERE server_id = ?", array($scuUsed, $DBServer->serverId)); } //Update GCE ServerID if ($DBServer->platform == SERVER_PLATFORMS::GCE) { if ($DBServer->GetProperty(GCE_SERVER_PROPERTIES::SERVER_ID) == $DBServer->serverId) { $info = PlatformFactory::NewPlatform($DBServer->platform)->GetServerExtendedInformation($DBServer); $DBServer->SetProperty(GCE_SERVER_PROPERTIES::SERVER_ID, $info['Cloud Server ID']); } } if ($DBServer->platform == SERVER_PLATFORMS::EC2) { $env = Scalr_Environment::init()->loadById($DBServer->envId); $ec2 = $env->aws($DBServer->GetCloudLocation())->ec2; //TODO: $isEnabled = $ec2->instance->describeAttribute($DBServer->GetCloudServerID(), InstanceAttributeType::disableApiTermination()); $DBServer->SetProperty(EC2_SERVER_PROPERTIES::IS_LOCKED, $isEnabled); } } else { //TODO: Check reboot timeout } } } catch (Exception $e) { if (stristr($e->getMessage(), "not found")) { print $e->getTraceAsString() . "\n"; } else { print "[1][Farm: {$farmId}] {$e->getMessage()} at {$e->getFile()}:{$e->getLine()}\n\n"; } } } }
/** * {@inheritdoc} * @see \Scalr\System\Zmq\Cron\TaskInterface::worker() */ public function worker($request) { $db = \Scalr::getDb(); //Speed up poller if ($this->config()->daemon) { //Warming up static DI cache \Scalr::getContainer()->warmup(); } // Reconfigure observers \Scalr::ReconfigureObservers(); $DBFarm = DBFarm::LoadByID($request->farmId); $account = Scalr_Account::init()->loadById($DBFarm->ClientID); $payAsYouGoTime = $account->getSetting(Scalr_Account::SETTING_BILLING_PAY_AS_YOU_GO_DATE); $transactionId = abs(crc32(posix_getpid() . $request->farmId)); $this->getLogger()->info("[%s] Begin polling farm (ID: %d, Name: %s, Status: %s)", $transactionId, $DBFarm->ID, $DBFarm->Name, $DBFarm->Status); //Retrieves the number of either terminated or suspended servers for the farm $servers_count = $db->GetOne("\n SELECT COUNT(*) AS cnt FROM servers WHERE farm_id = ? AND status NOT IN (?,?)\n ", [$DBFarm->ID, SERVER_STATUS::TERMINATED, SERVER_STATUS::SUSPENDED]); if ($DBFarm->Status == FARM_STATUS::TERMINATED && $servers_count == 0) { //There are no servers for this farm return; } $this->getLogger()->info("%d server%s for the farm: %d", $servers_count, $servers_count == 1 ? '' : 's', $DBFarm->ID); $config = \Scalr::getContainer()->config; foreach ($DBFarm->GetServersByFilter(array(), array('status' => SERVER_STATUS::PENDING_LAUNCH)) as $DBServer) { /* @var $DBServer \DBServer */ try { if ($DBServer->cloudLocation) { try { $this->getLogger()->info("Retrieving the list of the instances for %s, server: %s", $DBServer->cloudLocation, $DBServer->serverId); $p = PlatformFactory::NewPlatform($DBServer->platform); $p->GetServersList($DBServer->GetEnvironmentObject(), $DBServer->cloudLocation); } catch (Exception $e) { $this->getLogger()->error("[Server: %s] Could not retrieve the list of the instances: %s", $DBServer->serverId, $e->getMessage()); continue; } } if ($DBServer->status != SERVER_STATUS::PENDING && $DBServer->status != SERVER_STATUS::PENDING_TERMINATE) { if (!$p->IsServerExists($DBServer)) { try { $serverInfo = $p->GetServerExtendedInformation($DBServer); } catch (Exception $e) { $this->getLogger()->error("[CRASH][FarmID: %d] Crash check for server '%s' failed: %s", $DBFarm->ID, $DBServer->serverId, $e->getMessage()); } if (!$serverInfo) { if (!in_array($DBServer->status, [SERVER_STATUS::PENDING_TERMINATE, SERVER_STATUS::TERMINATED])) { if ($DBServer->isOpenstack() && $DBServer->status == SERVER_STATUS::SUSPENDED) { continue; } elseif ($DBServer->platform == \SERVER_PLATFORMS::GCE && $DBServer->status == SERVER_STATUS::SUSPENDED) { $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); Logger::getLogger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' was terminated", $DBServer->serverId), $DBServer->serverId)); continue; } if ($DBServer->GetProperty(SERVER_PROPERTIES::CRASHED) == 1) { $action = 'terminate'; if ($config->defined("scalr.{$DBServer->platform}.action_on_missing_server")) { $action = $config->get("scalr.{$DBServer->platform}.action_on_missing_server"); } if ($action == 'flag') { $DBServer->SetProperty(SERVER_PROPERTIES::MISSING, 1); } else { $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); \Scalr::FireEvent($DBFarm->ID, new HostCrashEvent($DBServer)); } } else { $DBServer->SetProperties([SERVER_PROPERTIES::REBOOTING => 0, SERVER_PROPERTIES::CRASHED => 1, SERVER_PROPERTIES::MISSING => 1]); Logger::getLogger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' found in database but not found on %s. Crashed.", $DBServer->serverId, $DBServer->platform), $DBServer->serverId)); } continue; } } else { //http.persistent.handles.limit must be set to 0 for pecl-http version 1 $this->getLogger()->error("[CRASH][FarmID: %d] False-positive crash check: %s (EnvID: %d). Please verify current scalr install with app/www/testenvironment.php", $DBFarm->ID, $DBServer->serverId, $DBServer->envId); //More debug $this->getLogger()->error(sprintf("[CRASH][FarmID: %d] Debug: %s", $DBFarm->ID, json_encode($p->instancesListCache))); $this->getLogger()->error(sprintf("[CRASH][FarmID: %d] {$DBServer->GetCloudServerID()}")); } } else { $DBServer->SetProperties([SERVER_PROPERTIES::CRASHED => 0, SERVER_PROPERTIES::MISSING => 0]); } } } catch (Exception $e) { if (stristr($e->getMessage(), "AWS was not able to validate the provided access credentials") || stristr($e->getMessage(), "You are not authorized to perform this operation") || stristr($e->getMessage(), "Unable to sign AWS API request. Please, check your X.509")) { /* @var $env \Scalr_Environment */ $env = Scalr_Environment::init()->LoadById($DBFarm->EnvID); $env->status = Scalr_Environment::STATUS_INACTIVE; $env->save(); //Saving the reason why this environment is disabled $env->setPlatformConfig(['system.auto-disable-reason' => $e->getMessage()]); return; } elseif (stristr($e->getMessage(), "Could not connect to host")) { continue; } $this->getLogger()->warn("Exception for farm: %d with the message: %s, in the %s:%s", $request->farmId, $e->getMessage(), $e->getFile(), $e->getLine()); continue; } try { if (!in_array($DBServer->status, [SERVER_STATUS::SUSPENDED, SERVER_STATUS::TERMINATED, SERVER_STATUS::PENDING_TERMINATE, SERVER_STATUS::PENDING_SUSPEND])) { $openstackErrorState = false; if (PlatformFactory::isOpenstack($DBServer->platform) && $DBServer->GetRealStatus()->getName() === 'ERROR') { $openstackErrorState = true; } if ($DBServer->GetRealStatus()->isTerminated() || $openstackErrorState) { if ($openstackErrorState) { try { $info = PlatformFactory::NewPlatform($DBServer->platform)->GetServerExtendedInformation($DBServer); Logger::getLogger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' (Platform: %s) is not running. Status: %s. Terminating.", $DBServer->serverId, $DBServer->platform, $info['Status']), $DBServer->serverId)); } catch (Exception $e) { } } else { Logger::getLogger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' (Platform: %s) is not running (Real state: %s, Scalr status: %s).", $DBServer->serverId, $DBServer->platform, $DBServer->GetRealStatus()->getName(), $DBServer->status), $DBServer->serverId)); } $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); $DBServer->SetProperties([SERVER_PROPERTIES::REBOOTING => 0, SERVER_PROPERTIES::RESUMING => 0]); \Scalr::FireEvent($DBFarm->ID, new HostDownEvent($DBServer)); continue; } elseif ($DBServer->GetRealStatus()->isSuspended()) { //In case the server was suspended when it was running if ($DBServer->status == SERVER_STATUS::RUNNING) { Logger::getLogger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' (Platform: %s) is not running (Status in cloud: %s, Status in Scalr: %s).", $DBServer->serverId, $DBServer->platform, $DBServer->GetRealStatus()->getName(), $DBServer->status), $DBServer->serverId)); $DBServer->SetProperties([SERVER_PROPERTIES::REBOOTING => 0, SERVER_PROPERTIES::RESUMING => 0]); $DBServer->remoteIp = ""; $DBServer->localIp = ""; $DBServer->status = SERVER_STATUS::SUSPENDED; $DBServer->Save(); $event = new HostDownEvent($DBServer); $event->isSuspended = true; \Scalr::FireEvent($DBFarm->ID, $event); continue; } else { //If the server was suspended during initialization //we do not support this and need to terminate this instance $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); continue; } } } if ($DBServer->status != SERVER_STATUS::RUNNING && $DBServer->GetRealStatus()->IsRunning()) { if ($DBServer->status == SERVER_STATUS::SUSPENDED) { $platform = PlatformFactory::NewPlatform($DBServer->platform); if ($DBServer->platform == \SERVER_PLATFORMS::GCE) { if ($platform->GetServerRealStatus($DBServer)->getName() == 'STOPPING') { continue; } } // For Openstack we need to re-accociate IPs try { if ($DBServer->isOpenstack()) { $this->openstackSetFloatingIp($DBServer); } } catch (Exception $e) { if (!$DBServer->GetProperty(SERVER_PROPERTIES::SZR_IS_INIT_FAILED)) { $DBServer->SetProperties([SERVER_PROPERTIES::SZR_IS_INIT_FAILED => 1, SERVER_PROPERTIES::SZR_IS_INIT_ERROR_MSG => "Scalr is unable to allocate/associate floating IP with server: " . $e->getMessage()]); } } if ($DBServer->platform == \SERVER_PLATFORMS::CLOUDSTACK) { if (!$DBServer->remoteIp) { $DBServer->remoteIp = CloudstackHelper::getSharedIP($DBServer); $DBServer->Save(); } } if ($platform->getResumeStrategy() == \Scalr_Role_Behavior::RESUME_STRATEGY_INIT) { $DBServer->status = \SERVER_STATUS::PENDING; $DBServer->SetProperty(\SERVER_PROPERTIES::RESUMING, 1); $DBServer->dateAdded = date("Y-m-d H:i:s"); $DBServer->Save(); } else { $DBServer->SetProperty(\SERVER_PROPERTIES::RESUMING, 0); \Scalr::FireEvent($DBFarm->ID, new HostUpEvent($DBServer, "")); } continue; } elseif (!in_array($DBServer->status, array(SERVER_STATUS::TERMINATED, SERVER_STATUS::TROUBLESHOOTING))) { if ($DBServer->platform == SERVER_PLATFORMS::EC2) { if ($DBServer->status == SERVER_STATUS::PENDING) { if (!$DBServer->remoteIp && !$DBServer->localIp) { $ipaddresses = PlatformFactory::NewPlatform($DBServer->platform)->GetServerIPAddresses($DBServer); if ($ipaddresses['remoteIp'] && !$DBServer->remoteIp || $ipaddresses['localIp'] && !$DBServer->localIp) { $DBServer->remoteIp = $ipaddresses['remoteIp']; $DBServer->localIp = $ipaddresses['localIp']; $DBServer->Save(); } //Add tags Ec2Helper::createServerTags($DBServer); } if ($DBFarm->GetSetting(DBFarm::SETTING_EC2_VPC_ID)) { if ($DBServer->GetFarmRoleObject()->GetSetting(DBFarmRole::SETTING_AWS_VPC_INTERNET_ACCESS) != 'outbound-only') { $ipAddress = Ec2EipHelper::setEipForServer($DBServer); if ($ipAddress) { $DBServer->remoteIp = $ipAddress; $DBServer->Save(); } } } } } try { if ($DBServer->isOpenstack()) { $this->openstackSetFloatingIp($DBServer); } } catch (Exception $e) { if (!$DBServer->GetProperty(SERVER_PROPERTIES::SZR_IS_INIT_FAILED)) { $DBServer->SetProperties([SERVER_PROPERTIES::SZR_IS_INIT_FAILED => 1, SERVER_PROPERTIES::SZR_IS_INIT_ERROR_MSG => "Scalr is unable to allocate/associate floating IP with server:" . $e->getMessage()]); } } if ($DBServer->isCloudstack()) { if ($DBServer->status == SERVER_STATUS::PENDING) { $jobId = $DBServer->GetProperty(CLOUDSTACK_SERVER_PROPERTIES::LAUNCH_JOB_ID); try { $cs = $DBServer->GetEnvironmentObject()->cloudstack($DBServer->platform); $res = $cs->queryAsyncJobResult($jobId); if ($res->jobstatus == 1) { $DBServer->SetProperties([CLOUDSTACK_SERVER_PROPERTIES::TMP_PASSWORD => $res->virtualmachine->password, CLOUDSTACK_SERVER_PROPERTIES::SERVER_NAME => $res->virtualmachine->name]); } //TODO handle failed job: $res->jobresult->jobstatus == 2 } catch (Exception $e) { if ($DBServer->farmId) { Logger::getLogger("CloudStack")->error(new FarmLogMessage($DBServer->farmId, $e->getMessage(), $DBServer->serverId)); } } } } try { $dtadded = strtotime($DBServer->dateAdded); $DBFarmRole = $DBServer->GetFarmRoleObject(); $launch_timeout = $DBFarmRole->GetSetting(DBFarmRole::SETTING_SYSTEM_LAUNCH_TIMEOUT) > 0 ? $DBFarmRole->GetSetting(DBFarmRole::SETTING_SYSTEM_LAUNCH_TIMEOUT) : 900; } catch (Exception $e) { if (stristr($e->getMessage(), "not found")) { $DBServer->terminate(DBServer::TERMINATE_REASON_ROLE_REMOVED); } } $scripting_event = false; if ($DBServer->status == SERVER_STATUS::PENDING) { $event = "hostInit"; $scripting_event = EVENT_TYPE::HOST_INIT; } elseif ($DBServer->status == SERVER_STATUS::INIT) { $event = "hostUp"; $scripting_event = EVENT_TYPE::HOST_UP; } if ($scripting_event && $dtadded) { $scripting_timeout = (int) $db->GetOne("\n SELECT SUM(timeout)\n FROM farm_role_scripts\n WHERE event_name = ? AND farm_roleid = ? AND issync = '1'\n ", [$scripting_event, $DBServer->farmRoleId]); if ($scripting_timeout) { $launch_timeout = $launch_timeout + $scripting_timeout; } if ($dtadded + $launch_timeout < time() && !$DBFarmRole->GetRoleObject()->hasBehavior(ROLE_BEHAVIORS::MONGODB)) { //Add entry to farm log Logger::getLogger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' did not send '%s' event in %s seconds after launch (Try increasing timeouts in role settings). Considering it broken. Terminating instance.", $DBServer->serverId, $event, $launch_timeout), $DBServer->serverId)); try { $DBServer->terminate(array(DBServer::TERMINATE_REASON_SERVER_DID_NOT_SEND_EVENT, $event, $launch_timeout), false); } catch (Exception $err) { $this->getLogger()->fatal($err->getMessage()); } } elseif ($DBFarmRole->GetRoleObject()->hasBehavior(ROLE_BEHAVIORS::MONGODB)) { //DO NOT TERMINATE MONGODB INSTANCES BY TIMEOUT! IT'S NOT SAFE //THINK ABOUT WORKAROUND } } //Whether IP address is changed if (!$DBServer->IsRebooting()) { $ipaddresses = PlatformFactory::NewPlatform($DBServer->platform)->GetServerIPAddresses($DBServer); if ($ipaddresses['remoteIp'] && $DBServer->remoteIp && $DBServer->remoteIp != $ipaddresses['remoteIp'] || $ipaddresses['localIp'] && $DBServer->localIp && $DBServer->localIp != $ipaddresses['localIp']) { Logger::getLogger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("RemoteIP: %s (%s), LocalIp: %s (%s) (Poller).", $DBServer->remoteIp, $ipaddresses['remoteIp'], $DBServer->localIp, $ipaddresses['localIp']), $DBServer->serverId)); \Scalr::FireEvent($DBServer->farmId, new IPAddressChangedEvent($DBServer, $ipaddresses['remoteIp'], $ipaddresses['localIp'])); } //TODO Check health } } } elseif ($DBServer->status == SERVER_STATUS::SUSPENDED && $DBServer->GetRealStatus()->isTerminated()) { if ($DBServer->platform == SERVER_PLATFORMS::EC2) { $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); \Scalr::FireEvent($DBFarm->ID, new HostCrashEvent($DBServer)); } elseif ($DBServer->platform == SERVER_PLATFORMS::GCE) { \Scalr::FireEvent($DBFarm->ID, new HostDownEvent($DBServer)); } } elseif ($DBServer->status == SERVER_STATUS::RUNNING && $DBServer->GetRealStatus()->isRunning()) { // Is IP address changed? if (!$DBServer->IsRebooting()) { $ipaddresses = PlatformFactory::NewPlatform($DBServer->platform)->GetServerIPAddresses($DBServer); if ($ipaddresses['remoteIp'] && $DBServer->remoteIp != $ipaddresses['remoteIp'] || $ipaddresses['localIp'] && $DBServer->localIp != $ipaddresses['localIp']) { \Scalr::FireEvent($DBServer->farmId, new IPAddressChangedEvent($DBServer, $ipaddresses['remoteIp'], $ipaddresses['localIp'])); } if ($payAsYouGoTime) { $initTime = $DBServer->GetProperty(SERVER_PROPERTIES::INITIALIZED_TIME); if ($initTime < $payAsYouGoTime) { $initTime = $payAsYouGoTime; } $runningHours = ceil((time() - $initTime) / 3600); $scuUsed = $runningHours * Scalr_Billing::getSCUByInstanceType($DBServer->GetFlavor(), $DBServer->platform); $db->Execute("UPDATE servers_history SET scu_used = ?, scu_updated = 0 WHERE server_id = ?", [$scuUsed, $DBServer->serverId]); } if ($DBServer->platform == SERVER_PLATFORMS::EC2) { $env = Scalr_Environment::init()->loadById($DBServer->envId); $ec2 = $env->aws($DBServer->GetCloudLocation())->ec2; $time = $DBServer->GetProperty(EC2_SERVER_PROPERTIES::IS_LOCKED_LAST_CHECK_TIME); if (!$time || time() < $time + 1200) { $isEnabled = $ec2->instance->describeAttribute($DBServer->GetCloudServerID(), InstanceAttributeType::disableApiTermination()); $DBServer->SetProperties([EC2_SERVER_PROPERTIES::IS_LOCKED => $isEnabled, EC2_SERVER_PROPERTIES::IS_LOCKED_LAST_CHECK_TIME => time()]); } } } else { //TODO Check reboot timeout } } } catch (Exception $e) { if (stristr($e->getMessage(), "not found")) { $this->getLogger()->fatal($e->getMessage()); } elseif (stristr($e->getMessage(), "Request limit exceeded")) { sleep(5); $this->getLogger()->error("[Farm: %d] sleep due to exception: %s", $request->farmId, $e->getMessage()); } else { $this->getLogger()->error("[Farm: %d] Exception: %s", $request->farmId, $e->getMessage()); } } } return $request; }
/** * {@inheritdoc} * @see \Scalr\System\Zmq\Cron\TaskInterface::worker() */ public function worker($request) { $db = \Scalr::getDb(); //The list of the suspension information about cloud platforms $this->aSuspensionInfo = []; //Speed up poller if ($this->config()->daemon) { //Warming up static DI cache \Scalr::getContainer()->warmup(); } // Reconfigure observers \Scalr::ReconfigureObservers(); $DBFarm = DBFarm::LoadByID($request->farmId); $account = Scalr_Account::init()->loadById($DBFarm->ClientID); $payAsYouGoTime = $account->getSetting(Scalr_Account::SETTING_BILLING_PAY_AS_YOU_GO_DATE); $transactionId = abs(crc32(posix_getpid() . $request->farmId)); $this->getLogger()->info("[%s] Begin polling farm (ID: %d, Name: %s, Status: %s, Platform:%s)", $transactionId, $DBFarm->ID, $DBFarm->Name, $DBFarm->Status, $request->platform); $jobStartTime = microtime(true); //Retrieves the number of either terminated or suspended servers for the farm $servers_count = $db->GetOne("\n SELECT COUNT(*) AS cnt FROM servers\n WHERE farm_id = ? AND platform = ? AND status NOT IN (?,?)\n ", [$DBFarm->ID, $request->platform, SERVER_STATUS::TERMINATED, SERVER_STATUS::SUSPENDED]); if ($DBFarm->Status == FARM_STATUS::TERMINATED && $servers_count == 0) { //There are no servers for this farm and platform return; } $this->getLogger()->info("%d server%s for the farm: %d and platform: %s", $servers_count, $servers_count == 1 ? '' : 's', $DBFarm->ID, $request->platform); $config = \Scalr::getContainer()->config; /* if ($request->platform) { $p = PlatformFactory::NewPlatform($request->platform); $p->ClearCache(); } */ $p = PlatformFactory::NewPlatform($request->platform); foreach ($DBFarm->GetServersByFilter(['platform' => $request->platform], ['status' => SERVER_STATUS::PENDING_LAUNCH]) as $DBServer) { /* @var $DBServer \DBServer */ //Get platform suspension info $suspensionInfo = $this->getSuspensionInfo($DBServer->platform, $DBServer->envId); //If the cloud platform is suspended we should not process it if ($suspensionInfo->isSuspended()) { continue; } try { //1. We need to check that server is exists in cloud and not missed. // (On Openstack server can be missed and should not be terminated) $cacheKey = sprintf('%s:%s', $DBServer->envId, $DBServer->cloudLocation); if ($DBServer->cloudLocation && count($p->instancesListCache[$cacheKey]) == 0) { try { $this->getLogger()->info("Retrieving the list of the instances for %s, server: %s, platform: %s", $DBServer->cloudLocation, $DBServer->serverId, $request->platform); if ($DBServer->platform == \SERVER_PLATFORMS::AZURE) { //For Azure we need to pass resource group instead of cloudLocation $p->GetServersList($DBServer->GetEnvironmentObject(), $DBServer->GetProperty(\AZURE_SERVER_PROPERTIES::RESOURCE_GROUP)); } else { $p->GetServersList($DBServer->GetEnvironmentObject(), $DBServer->cloudLocation); } //We successfully polled cloud so can resume suspension status for the cloud platform if ($suspensionInfo->isPendingSuspend()) { $suspensionInfo->resume(); } } catch (Exception $e) { if (CloudPlatformSuspensionInfo::isSuspensionException($e)) { $suspensionInfo->registerError($e->getMessage()); } $this->getLogger()->error("[Server: %s] Could not retrieve the list of the instances: %s", $DBServer->serverId, $e->getMessage()); continue; } } if ($DBServer->status != SERVER_STATUS::PENDING && $DBServer->status != SERVER_STATUS::PENDING_TERMINATE) { if (!$p->IsServerExists($DBServer)) { try { $serverInfo = $p->GetServerExtendedInformation($DBServer); } catch (Exception $e) { $this->getLogger()->error("[CRASH][FarmID: %d] Crash check for server '%s' failed: %s", $DBFarm->ID, $DBServer->serverId, $e->getMessage()); continue; } if (!$serverInfo) { if (!in_array($DBServer->status, [SERVER_STATUS::PENDING_TERMINATE, SERVER_STATUS::TERMINATED])) { if ($DBServer->isOpenstack() && $DBServer->status == SERVER_STATUS::SUSPENDED) { continue; } elseif ($DBServer->platform == \SERVER_PLATFORMS::GCE && $DBServer->status == SERVER_STATUS::SUSPENDED) { $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf(_("Server '%s' was terminated"), $DBServer->serverId), $DBServer->serverId)); continue; } $action = 'terminate'; if ($config->defined("scalr.{$DBServer->platform}.action_on_missing_server")) { $action = $config->get("scalr.{$DBServer->platform}.action_on_missing_server"); } if ($action == 'flag' && !$DBServer->GetProperty(SERVER_PROPERTIES::MISSING)) { \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' found in Scalr but not found in the cloud (%s). Marking as Missing.", $DBServer->serverId, $DBServer->platform), $DBServer->serverId)); $DBServer->SetProperties([SERVER_PROPERTIES::REBOOTING => 0, SERVER_PROPERTIES::MISSING => 1]); } else { \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' found in Scalr but not found in the cloud (%s). Terminating.", $DBServer->serverId, $DBServer->platform), $DBServer->serverId)); $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); } continue; } } else { //http.persistent.handles.limit must be set to 0 for pecl-http version 1 $this->getLogger()->error("[CRASH][FarmID: %d] False-positive crash check: %s (EnvID: %d). Please verify current scalr install with app/www/testenvironment.php", $DBFarm->ID, $DBServer->serverId, $DBServer->envId); } } else { $DBServer->SetProperties([SERVER_PROPERTIES::MISSING => 0]); } } } catch (Exception $e) { if (CloudPlatformSuspensionInfo::isSuspensionException($e)) { $suspensionInfo->registerError($e->getMessage()); } $this->getLogger()->warn("Exception for Farm: %d, Platform: %s with the message: %s, in the %s:%s", $request->farmId, $request->platform, $e->getMessage(), $e->getFile(), $e->getLine()); continue; } try { if (!in_array($DBServer->status, [SERVER_STATUS::SUSPENDED, SERVER_STATUS::TERMINATED, SERVER_STATUS::PENDING_TERMINATE, SERVER_STATUS::PENDING_SUSPEND])) { $openstackErrorState = false; if (PlatformFactory::isOpenstack($DBServer->platform) && $DBServer->GetRealStatus()->getName() === 'ERROR') { $openstackErrorState = true; } if ($DBServer->GetRealStatus()->isTerminated() || $openstackErrorState) { // If openstack server is in ERROR state we need more details if ($openstackErrorState) { try { $info = $p->GetServerExtendedInformation($DBServer); $status = empty($info['Status']) ? false : $info['Status']; } catch (Exception $e) { } } if (empty($status)) { $status = $DBServer->GetRealStatus()->getName(); } \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' (Platform: %s) was terminated in cloud or from within an OS. Status: %s.", $DBServer->serverId, $DBServer->platform, $status), $DBServer->serverId)); $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); continue; } elseif ($DBServer->GetRealStatus()->isSuspended()) { //In case the server was suspended when it was running if ($DBServer->status == SERVER_STATUS::RUNNING) { \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' (Platform: %s) is not running (Status in cloud: %s, Status in Scalr: %s).", $DBServer->serverId, $DBServer->platform, $DBServer->GetRealStatus()->getName(), $DBServer->status), $DBServer->serverId)); $event = new HostDownEvent($DBServer); $event->isSuspended = true; \Scalr::FireEvent($DBFarm->ID, $event); continue; } else { if ($DBServer->status != \SERVER_STATUS::RESUMING) { //If the server was suspended during initialization //we do not support this and need to terminate this instance if ($DBServer->platform == \SERVER_PLATFORMS::EC2) { try { $info = $p->GetServerExtendedInformation($DBServer); $realStatus = !empty($info['Instance state']) ? $info['Instance state'] : ''; } catch (\Exception $e) { // no need to do anything here; } $this->getLogger()->error("[SUSPEND_RESUME_ISSUE][ServerID: %s][2] Cached Cloud Status: %s (Cache age: %d seconds), Status: %s, Real status: %s", $DBServer->serverId, $DBServer->GetRealStatus()->getName(), time() - $p->instancesListCache[$cacheKey][$DBServer->GetCloudServerID()]['_timestamp'], $DBServer->status, $realStatus); } $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); continue; } else { // Need to clear cache, because this situation happens only when cache is stale. $p->ClearCache(); } } } } if ($DBServer->status != SERVER_STATUS::RUNNING && $DBServer->GetRealStatus()->IsRunning()) { if ($DBServer->status == SERVER_STATUS::SUSPENDED) { if ($DBServer->platform == \SERVER_PLATFORMS::GCE) { if ($p->GetServerRealStatus($DBServer)->getName() == 'STOPPING') { continue; } } $update = []; // For Openstack we need to re-accociate IPs try { if ($DBServer->isOpenstack()) { OpenstackHelper::setServerFloatingIp($DBServer); } } catch (Exception $e) { if (!$DBServer->GetProperty(SERVER_PROPERTIES::SZR_IS_INIT_FAILED)) { $DBServer->SetProperties([\SERVER_PROPERTIES::SZR_IS_INIT_FAILED => 1, \SERVER_PROPERTIES::SZR_IS_INIT_ERROR_MSG => "Scalr is unable to allocate/associate floating IP with server: " . $e->getMessage()]); } } if ($DBServer->platform == \SERVER_PLATFORMS::CLOUDSTACK) { if (!$DBServer->remoteIp) { $update['remoteIp'] = CloudstackHelper::getSharedIP($DBServer); } } if ($DBServer->platform == \SERVER_PLATFORMS::EC2) { try { $info = $p->GetServerExtendedInformation($DBServer); $realStatus = !empty($info['Instance state']) ? $info['Instance state'] : ''; } catch (\Exception $e) { // no need to do anything here; } $this->getLogger()->error("[SUSPEND_RESUME_ISSUE][ServerID: %s][1] Cached Cloud Status: %s (Cache age: %d seconds), Status: %s, Real status: %s", $DBServer->serverId, $DBServer->GetRealStatus()->getName(), time() - $p->instancesListCache[$cacheKey][$DBServer->GetCloudServerID()]['_timestamp'], $DBServer->status, $realStatus); } $update['status'] = \SERVER_STATUS::RESUMING; $update['dateAdded'] = date("Y-m-d H:i:s"); $DBServer->update($update); unset($update); continue; } elseif (!in_array($DBServer->status, array(SERVER_STATUS::TERMINATED))) { $elasticIpAssigned = false; if ($DBServer->platform == SERVER_PLATFORMS::EC2) { if ($DBServer->status == SERVER_STATUS::PENDING) { if (!$DBServer->remoteIp && !$DBServer->localIp) { $ipaddresses = $p->GetServerIPAddresses($DBServer); $elasticIpAddress = Ec2EipHelper::setEipForServer($DBServer); if ($elasticIpAddress) { $ipaddresses['remoteIp'] = $elasticIpAddress; $DBServer->remoteIp = $elasticIpAddress; $elasticIpAssigned = true; } if ($ipaddresses['remoteIp'] && !$DBServer->remoteIp || $ipaddresses['localIp'] && !$DBServer->localIp || $elasticIpAssigned) { $DBServer->update(['remoteIp' => $ipaddresses['remoteIp'], 'localIp' => $ipaddresses['localIp']]); } //Add tags Ec2Helper::createObjectTags($DBServer); } } } if ($DBServer->platform == \SERVER_PLATFORMS::AZURE) { if ($DBServer->GetProperty(\AZURE_SERVER_PROPERTIES::SZR_EXTENSION_DEPLOYED)) { if (!$DBServer->GetProperty(SERVER_PROPERTIES::SZR_IS_INIT_FAILED)) { // Check scalarizr deployment status $env = $DBServer->GetEnvironmentObject(); $azure = $env->azure(); $info = $azure->compute->virtualMachine->getInstanceViewInfo($env->cloudCredentials(SERVER_PLATFORMS::AZURE)->properties[Entity\CloudCredentialsProperty::AZURE_SUBSCRIPTION_ID], $DBServer->GetProperty(\AZURE_SERVER_PROPERTIES::RESOURCE_GROUP), $DBServer->GetProperty(\AZURE_SERVER_PROPERTIES::SERVER_NAME)); $extensions = !empty($info->extensions) ? $info->extensions : []; foreach ($extensions as $extension) { /* @var $extension ExtensionData */ if ($extension->name == 'scalarizr') { $extStatus = $extension->statuses[0]; /* @var $extStatus StatusData */ if ($extStatus->level == 'Error') { $DBServer->SetProperties([\SERVER_PROPERTIES::SZR_IS_INIT_FAILED => 1, \SERVER_PROPERTIES::SZR_IS_INIT_ERROR_MSG => "Azure resource extension failed to provision scalr agent. Status: {$extStatus->code} ({$extStatus->message})"]); } } } } } else { AzureHelper::setupScalrAgent($DBServer); } } try { if ($DBServer->isOpenstack()) { OpenstackHelper::setServerFloatingIp($DBServer); } } catch (Exception $e) { if (!$DBServer->GetProperty(\SERVER_PROPERTIES::SZR_IS_INIT_FAILED)) { $DBServer->SetProperties([\SERVER_PROPERTIES::SZR_IS_INIT_FAILED => 1, \SERVER_PROPERTIES::SZR_IS_INIT_ERROR_MSG => "Scalr is unable to allocate/associate floating IP with server:" . $e->getMessage()]); } } if ($DBServer->isCloudstack()) { if ($DBServer->status == SERVER_STATUS::PENDING) { $jobId = $DBServer->GetProperty(CLOUDSTACK_SERVER_PROPERTIES::LAUNCH_JOB_ID); try { $cs = $DBServer->GetEnvironmentObject()->cloudstack($DBServer->platform); $res = $cs->queryAsyncJobResult($jobId); if ($res->jobstatus == 1) { $DBServer->SetProperties([CLOUDSTACK_SERVER_PROPERTIES::TMP_PASSWORD => $res->virtualmachine->password, CLOUDSTACK_SERVER_PROPERTIES::SERVER_NAME => $res->virtualmachine->name]); } //TODO handle failed job: $res->jobresult->jobstatus == 2 } catch (Exception $e) { if ($DBServer->farmId) { \Scalr::getContainer()->logger("CloudStack")->error(new FarmLogMessage($DBServer->farmId, $e->getMessage(), $DBServer->serverId)); } } } } try { $dtadded = strtotime($DBServer->dateAdded); $DBFarmRole = $DBServer->GetFarmRoleObject(); $launchTimeout = $DBFarmRole->GetSetting(Entity\FarmRoleSetting::SYSTEM_LAUNCH_TIMEOUT) > 0 ? $DBFarmRole->GetSetting(Entity\FarmRoleSetting::SYSTEM_LAUNCH_TIMEOUT) : 900; } catch (Exception $e) { if (stristr($e->getMessage(), "not found")) { $DBServer->terminate(DBServer::TERMINATE_REASON_ROLE_REMOVED); } } $scriptingEvent = false; $eventName = null; if ($DBServer->status == SERVER_STATUS::PENDING) { $eventName = "hostInit"; $scriptingEvent = EVENT_TYPE::HOST_INIT; } elseif ($DBServer->status == SERVER_STATUS::INIT) { $eventName = "hostUp"; $scriptingEvent = EVENT_TYPE::HOST_UP; } if ($scriptingEvent && $dtadded) { $hasPendingMessages = !!$db->GetOne("\n SELECT EXISTS(SELECT 1 FROM messages WHERE type='in' AND status='0' AND server_id = ?)\n ", [$DBServer->serverId]); $scriptingTimeout = (int) $db->GetOne("\n SELECT SUM(timeout)\n FROM farm_role_scripts\n WHERE event_name = ? AND farm_roleid = ? AND issync = '1'\n ", [$scriptingEvent, $DBServer->farmRoleId]); if ($scriptingTimeout) { $launchTimeout = $launchTimeout + $scriptingTimeout; } if (!$hasPendingMessages && $dtadded + $launchTimeout < time() && !$DBFarmRole->GetRoleObject()->hasBehavior(ROLE_BEHAVIORS::MONGODB)) { //Add entry to farm log \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' did not send '%s' event in %s seconds after launch (Try increasing timeouts in role settings). Considering it broken. Terminating instance.", $DBServer->serverId, $eventName, $launchTimeout), $DBServer->serverId)); try { $DBServer->terminate(array(DBServer::TERMINATE_REASON_SERVER_DID_NOT_SEND_EVENT, $eventName, $launchTimeout), false); } catch (Exception $err) { $this->getLogger()->fatal($err->getMessage()); } } elseif ($DBFarmRole->GetRoleObject()->hasBehavior(ROLE_BEHAVIORS::MONGODB)) { //DO NOT TERMINATE MONGODB INSTANCES BY TIMEOUT! IT'S NOT SAFE //THINK ABOUT WORKAROUND } } //Whether IP address is changed if (!$DBServer->IsRebooting() && !$elasticIpAssigned) { $ipaddresses = $p->GetServerIPAddresses($DBServer); if ($ipaddresses['remoteIp'] && $DBServer->remoteIp && $DBServer->remoteIp != $ipaddresses['remoteIp'] || $ipaddresses['localIp'] && $DBServer->localIp && $DBServer->localIp != $ipaddresses['localIp']) { \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("RemoteIP: %s (%s), LocalIp: %s (%s) (Poller).", $DBServer->remoteIp, $ipaddresses['remoteIp'], $DBServer->localIp, $ipaddresses['localIp']), $DBServer->serverId)); \Scalr::FireEvent($DBServer->farmId, new IPAddressChangedEvent($DBServer, $ipaddresses['remoteIp'], $ipaddresses['localIp'])); } //TODO Check health } } } elseif ($DBServer->status == SERVER_STATUS::SUSPENDED && $DBServer->GetRealStatus()->isTerminated()) { //TODO: Terminated outside scalr while in SUSPENDED state $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); } elseif ($DBServer->status == SERVER_STATUS::RUNNING && $DBServer->GetRealStatus()->isRunning()) { // Is IP address changed? if (!$DBServer->IsRebooting()) { $ipaddresses = $p->GetServerIPAddresses($DBServer); // Private IP cannot be removed (only changed). if ($DBServer->remoteIp != $ipaddresses['remoteIp'] || $ipaddresses['localIp'] && $DBServer->localIp != $ipaddresses['localIp']) { \Scalr::FireEvent($DBServer->farmId, new IPAddressChangedEvent($DBServer, $ipaddresses['remoteIp'], $ipaddresses['localIp'])); } if ($payAsYouGoTime) { $initTime = $DBServer->dateInitialized ? strtotime($DBServer->dateInitialized) : null; if ($initTime < $payAsYouGoTime) { $initTime = $payAsYouGoTime; } $runningHours = ceil((time() - $initTime) / 3600); $scuUsed = $runningHours * Scalr_Billing::getSCUByInstanceType($DBServer->getType(), $DBServer->platform); $db->Execute("UPDATE servers_history SET scu_used = ?, scu_updated = 0 WHERE server_id = ?", [$scuUsed, $DBServer->serverId]); } if ($DBServer->platform == SERVER_PLATFORMS::EC2) { $env = Scalr_Environment::init()->loadById($DBServer->envId); $ec2 = $env->aws($DBServer->GetCloudLocation())->ec2; $time = $DBServer->GetProperty(EC2_SERVER_PROPERTIES::IS_LOCKED_LAST_CHECK_TIME); if (!$time || time() > $time + 1200) { $isEnabled = $ec2->instance->describeAttribute($DBServer->GetCloudServerID(), InstanceAttributeType::disableApiTermination()); $DBServer->SetProperties([EC2_SERVER_PROPERTIES::IS_LOCKED => $isEnabled, EC2_SERVER_PROPERTIES::IS_LOCKED_LAST_CHECK_TIME => time()]); } } } else { //TODO Check reboot timeout } } } catch (Exception $e) { if (CloudPlatformSuspensionInfo::isSuspensionException($e)) { $suspensionInfo->registerError($e->getMessage()); } if (stristr($e->getMessage(), "not found")) { $this->getLogger()->fatal($e->getMessage()); } elseif (stristr($e->getMessage(), "Request limit exceeded")) { sleep(5); $this->getLogger()->error("[Farm: %d] sleep due to exception: %s", $request->farmId, $e->getMessage()); } else { $this->getLogger()->error("[Farm: %d] Exception: %s", $request->farmId, $e->getMessage()); } } } $this->getLogger()->info("[%s] Finished farm polling (ID: %d, Name: %s, Status: %s, Platform:%s). Time: %s", $transactionId, $DBFarm->ID, $DBFarm->Name, $DBFarm->Status, $request->platform, microtime(true) - $jobStartTime); return $request; }