/** * {@inheritdoc} * @see \Scalr\System\Zmq\Cron\TaskInterface::worker() */ public function worker($request) { $db = \Scalr::getDb(); //The list of the suspension information about cloud platforms $this->aSuspensionInfo = []; //Speed up poller if ($this->config()->daemon) { //Warming up static DI cache \Scalr::getContainer()->warmup(); } // Reconfigure observers \Scalr::ReconfigureObservers(); $DBFarm = DBFarm::LoadByID($request->farmId); $account = Scalr_Account::init()->loadById($DBFarm->ClientID); $payAsYouGoTime = $account->getSetting(Scalr_Account::SETTING_BILLING_PAY_AS_YOU_GO_DATE); $transactionId = abs(crc32(posix_getpid() . $request->farmId)); $this->getLogger()->info("[%s] Begin polling farm (ID: %d, Name: %s, Status: %s, Platform:%s)", $transactionId, $DBFarm->ID, $DBFarm->Name, $DBFarm->Status, $request->platform); $jobStartTime = microtime(true); //Retrieves the number of either terminated or suspended servers for the farm $servers_count = $db->GetOne("\n SELECT COUNT(*) AS cnt FROM servers\n WHERE farm_id = ? AND platform = ? AND status NOT IN (?,?)\n ", [$DBFarm->ID, $request->platform, SERVER_STATUS::TERMINATED, SERVER_STATUS::SUSPENDED]); if ($DBFarm->Status == FARM_STATUS::TERMINATED && $servers_count == 0) { //There are no servers for this farm and platform return; } $this->getLogger()->info("%d server%s for the farm: %d and platform: %s", $servers_count, $servers_count == 1 ? '' : 's', $DBFarm->ID, $request->platform); $config = \Scalr::getContainer()->config; /* if ($request->platform) { $p = PlatformFactory::NewPlatform($request->platform); $p->ClearCache(); } */ $p = PlatformFactory::NewPlatform($request->platform); foreach ($DBFarm->GetServersByFilter(['platform' => $request->platform], ['status' => SERVER_STATUS::PENDING_LAUNCH]) as $DBServer) { /* @var $DBServer \DBServer */ //Get platform suspension info $suspensionInfo = $this->getSuspensionInfo($DBServer->platform, $DBServer->envId); //If the cloud platform is suspended we should not process it if ($suspensionInfo->isSuspended()) { continue; } try { //1. We need to check that server is exists in cloud and not missed. // (On Openstack server can be missed and should not be terminated) $cacheKey = sprintf('%s:%s', $DBServer->envId, $DBServer->cloudLocation); if ($DBServer->cloudLocation && count($p->instancesListCache[$cacheKey]) == 0) { try { $this->getLogger()->info("Retrieving the list of the instances for %s, server: %s, platform: %s", $DBServer->cloudLocation, $DBServer->serverId, $request->platform); if ($DBServer->platform == \SERVER_PLATFORMS::AZURE) { //For Azure we need to pass resource group instead of cloudLocation $p->GetServersList($DBServer->GetEnvironmentObject(), $DBServer->GetProperty(\AZURE_SERVER_PROPERTIES::RESOURCE_GROUP)); } else { $p->GetServersList($DBServer->GetEnvironmentObject(), $DBServer->cloudLocation); } //We successfully polled cloud so can resume suspension status for the cloud platform if ($suspensionInfo->isPendingSuspend()) { $suspensionInfo->resume(); } } catch (Exception $e) { if (CloudPlatformSuspensionInfo::isSuspensionException($e)) { $suspensionInfo->registerError($e->getMessage()); } $this->getLogger()->error("[Server: %s] Could not retrieve the list of the instances: %s", $DBServer->serverId, $e->getMessage()); continue; } } if ($DBServer->status != SERVER_STATUS::PENDING && $DBServer->status != SERVER_STATUS::PENDING_TERMINATE) { if (!$p->IsServerExists($DBServer)) { try { $serverInfo = $p->GetServerExtendedInformation($DBServer); } catch (Exception $e) { $this->getLogger()->error("[CRASH][FarmID: %d] Crash check for server '%s' failed: %s", $DBFarm->ID, $DBServer->serverId, $e->getMessage()); continue; } if (!$serverInfo) { if (!in_array($DBServer->status, [SERVER_STATUS::PENDING_TERMINATE, SERVER_STATUS::TERMINATED])) { if ($DBServer->isOpenstack() && $DBServer->status == SERVER_STATUS::SUSPENDED) { continue; } elseif ($DBServer->platform == \SERVER_PLATFORMS::GCE && $DBServer->status == SERVER_STATUS::SUSPENDED) { $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf(_("Server '%s' was terminated"), $DBServer->serverId), $DBServer->serverId)); continue; } $action = 'terminate'; if ($config->defined("scalr.{$DBServer->platform}.action_on_missing_server")) { $action = $config->get("scalr.{$DBServer->platform}.action_on_missing_server"); } if ($action == 'flag' && !$DBServer->GetProperty(SERVER_PROPERTIES::MISSING)) { \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' found in Scalr but not found in the cloud (%s). Marking as Missing.", $DBServer->serverId, $DBServer->platform), $DBServer->serverId)); $DBServer->SetProperties([SERVER_PROPERTIES::REBOOTING => 0, SERVER_PROPERTIES::MISSING => 1]); } else { \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' found in Scalr but not found in the cloud (%s). Terminating.", $DBServer->serverId, $DBServer->platform), $DBServer->serverId)); $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); } continue; } } else { //http.persistent.handles.limit must be set to 0 for pecl-http version 1 $this->getLogger()->error("[CRASH][FarmID: %d] False-positive crash check: %s (EnvID: %d). Please verify current scalr install with app/www/testenvironment.php", $DBFarm->ID, $DBServer->serverId, $DBServer->envId); } } else { $DBServer->SetProperties([SERVER_PROPERTIES::MISSING => 0]); } } } catch (Exception $e) { if (CloudPlatformSuspensionInfo::isSuspensionException($e)) { $suspensionInfo->registerError($e->getMessage()); } $this->getLogger()->warn("Exception for Farm: %d, Platform: %s with the message: %s, in the %s:%s", $request->farmId, $request->platform, $e->getMessage(), $e->getFile(), $e->getLine()); continue; } try { if (!in_array($DBServer->status, [SERVER_STATUS::SUSPENDED, SERVER_STATUS::TERMINATED, SERVER_STATUS::PENDING_TERMINATE, SERVER_STATUS::PENDING_SUSPEND])) { $openstackErrorState = false; if (PlatformFactory::isOpenstack($DBServer->platform) && $DBServer->GetRealStatus()->getName() === 'ERROR') { $openstackErrorState = true; } if ($DBServer->GetRealStatus()->isTerminated() || $openstackErrorState) { // If openstack server is in ERROR state we need more details if ($openstackErrorState) { try { $info = $p->GetServerExtendedInformation($DBServer); $status = empty($info['Status']) ? false : $info['Status']; } catch (Exception $e) { } } if (empty($status)) { $status = $DBServer->GetRealStatus()->getName(); } \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' (Platform: %s) was terminated in cloud or from within an OS. Status: %s.", $DBServer->serverId, $DBServer->platform, $status), $DBServer->serverId)); $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); continue; } elseif ($DBServer->GetRealStatus()->isSuspended()) { //In case the server was suspended when it was running if ($DBServer->status == SERVER_STATUS::RUNNING) { \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' (Platform: %s) is not running (Status in cloud: %s, Status in Scalr: %s).", $DBServer->serverId, $DBServer->platform, $DBServer->GetRealStatus()->getName(), $DBServer->status), $DBServer->serverId)); $event = new HostDownEvent($DBServer); $event->isSuspended = true; \Scalr::FireEvent($DBFarm->ID, $event); continue; } else { if ($DBServer->status != \SERVER_STATUS::RESUMING) { //If the server was suspended during initialization //we do not support this and need to terminate this instance if ($DBServer->platform == \SERVER_PLATFORMS::EC2) { try { $info = $p->GetServerExtendedInformation($DBServer); $realStatus = !empty($info['Instance state']) ? $info['Instance state'] : ''; } catch (\Exception $e) { // no need to do anything here; } $this->getLogger()->error("[SUSPEND_RESUME_ISSUE][ServerID: %s][2] Cached Cloud Status: %s (Cache age: %d seconds), Status: %s, Real status: %s", $DBServer->serverId, $DBServer->GetRealStatus()->getName(), time() - $p->instancesListCache[$cacheKey][$DBServer->GetCloudServerID()]['_timestamp'], $DBServer->status, $realStatus); } $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); continue; } else { // Need to clear cache, because this situation happens only when cache is stale. $p->ClearCache(); } } } } if ($DBServer->status != SERVER_STATUS::RUNNING && $DBServer->GetRealStatus()->IsRunning()) { if ($DBServer->status == SERVER_STATUS::SUSPENDED) { if ($DBServer->platform == \SERVER_PLATFORMS::GCE) { if ($p->GetServerRealStatus($DBServer)->getName() == 'STOPPING') { continue; } } $update = []; // For Openstack we need to re-accociate IPs try { if ($DBServer->isOpenstack()) { OpenstackHelper::setServerFloatingIp($DBServer); } } catch (Exception $e) { if (!$DBServer->GetProperty(SERVER_PROPERTIES::SZR_IS_INIT_FAILED)) { $DBServer->SetProperties([\SERVER_PROPERTIES::SZR_IS_INIT_FAILED => 1, \SERVER_PROPERTIES::SZR_IS_INIT_ERROR_MSG => "Scalr is unable to allocate/associate floating IP with server: " . $e->getMessage()]); } } if ($DBServer->platform == \SERVER_PLATFORMS::CLOUDSTACK) { if (!$DBServer->remoteIp) { $update['remoteIp'] = CloudstackHelper::getSharedIP($DBServer); } } if ($DBServer->platform == \SERVER_PLATFORMS::EC2) { try { $info = $p->GetServerExtendedInformation($DBServer); $realStatus = !empty($info['Instance state']) ? $info['Instance state'] : ''; } catch (\Exception $e) { // no need to do anything here; } $this->getLogger()->error("[SUSPEND_RESUME_ISSUE][ServerID: %s][1] Cached Cloud Status: %s (Cache age: %d seconds), Status: %s, Real status: %s", $DBServer->serverId, $DBServer->GetRealStatus()->getName(), time() - $p->instancesListCache[$cacheKey][$DBServer->GetCloudServerID()]['_timestamp'], $DBServer->status, $realStatus); } $update['status'] = \SERVER_STATUS::RESUMING; $update['dateAdded'] = date("Y-m-d H:i:s"); $DBServer->update($update); unset($update); continue; } elseif (!in_array($DBServer->status, array(SERVER_STATUS::TERMINATED))) { $elasticIpAssigned = false; if ($DBServer->platform == SERVER_PLATFORMS::EC2) { if ($DBServer->status == SERVER_STATUS::PENDING) { if (!$DBServer->remoteIp && !$DBServer->localIp) { $ipaddresses = $p->GetServerIPAddresses($DBServer); $elasticIpAddress = Ec2EipHelper::setEipForServer($DBServer); if ($elasticIpAddress) { $ipaddresses['remoteIp'] = $elasticIpAddress; $DBServer->remoteIp = $elasticIpAddress; $elasticIpAssigned = true; } if ($ipaddresses['remoteIp'] && !$DBServer->remoteIp || $ipaddresses['localIp'] && !$DBServer->localIp || $elasticIpAssigned) { $DBServer->update(['remoteIp' => $ipaddresses['remoteIp'], 'localIp' => $ipaddresses['localIp']]); } //Add tags Ec2Helper::createObjectTags($DBServer); } } } if ($DBServer->platform == \SERVER_PLATFORMS::AZURE) { if ($DBServer->GetProperty(\AZURE_SERVER_PROPERTIES::SZR_EXTENSION_DEPLOYED)) { if (!$DBServer->GetProperty(SERVER_PROPERTIES::SZR_IS_INIT_FAILED)) { // Check scalarizr deployment status $env = $DBServer->GetEnvironmentObject(); $azure = $env->azure(); $info = $azure->compute->virtualMachine->getInstanceViewInfo($env->cloudCredentials(SERVER_PLATFORMS::AZURE)->properties[Entity\CloudCredentialsProperty::AZURE_SUBSCRIPTION_ID], $DBServer->GetProperty(\AZURE_SERVER_PROPERTIES::RESOURCE_GROUP), $DBServer->GetProperty(\AZURE_SERVER_PROPERTIES::SERVER_NAME)); $extensions = !empty($info->extensions) ? $info->extensions : []; foreach ($extensions as $extension) { /* @var $extension ExtensionData */ if ($extension->name == 'scalarizr') { $extStatus = $extension->statuses[0]; /* @var $extStatus StatusData */ if ($extStatus->level == 'Error') { $DBServer->SetProperties([\SERVER_PROPERTIES::SZR_IS_INIT_FAILED => 1, \SERVER_PROPERTIES::SZR_IS_INIT_ERROR_MSG => "Azure resource extension failed to provision scalr agent. Status: {$extStatus->code} ({$extStatus->message})"]); } } } } } else { AzureHelper::setupScalrAgent($DBServer); } } try { if ($DBServer->isOpenstack()) { OpenstackHelper::setServerFloatingIp($DBServer); } } catch (Exception $e) { if (!$DBServer->GetProperty(\SERVER_PROPERTIES::SZR_IS_INIT_FAILED)) { $DBServer->SetProperties([\SERVER_PROPERTIES::SZR_IS_INIT_FAILED => 1, \SERVER_PROPERTIES::SZR_IS_INIT_ERROR_MSG => "Scalr is unable to allocate/associate floating IP with server:" . $e->getMessage()]); } } if ($DBServer->isCloudstack()) { if ($DBServer->status == SERVER_STATUS::PENDING) { $jobId = $DBServer->GetProperty(CLOUDSTACK_SERVER_PROPERTIES::LAUNCH_JOB_ID); try { $cs = $DBServer->GetEnvironmentObject()->cloudstack($DBServer->platform); $res = $cs->queryAsyncJobResult($jobId); if ($res->jobstatus == 1) { $DBServer->SetProperties([CLOUDSTACK_SERVER_PROPERTIES::TMP_PASSWORD => $res->virtualmachine->password, CLOUDSTACK_SERVER_PROPERTIES::SERVER_NAME => $res->virtualmachine->name]); } //TODO handle failed job: $res->jobresult->jobstatus == 2 } catch (Exception $e) { if ($DBServer->farmId) { \Scalr::getContainer()->logger("CloudStack")->error(new FarmLogMessage($DBServer->farmId, $e->getMessage(), $DBServer->serverId)); } } } } try { $dtadded = strtotime($DBServer->dateAdded); $DBFarmRole = $DBServer->GetFarmRoleObject(); $launchTimeout = $DBFarmRole->GetSetting(Entity\FarmRoleSetting::SYSTEM_LAUNCH_TIMEOUT) > 0 ? $DBFarmRole->GetSetting(Entity\FarmRoleSetting::SYSTEM_LAUNCH_TIMEOUT) : 900; } catch (Exception $e) { if (stristr($e->getMessage(), "not found")) { $DBServer->terminate(DBServer::TERMINATE_REASON_ROLE_REMOVED); } } $scriptingEvent = false; $eventName = null; if ($DBServer->status == SERVER_STATUS::PENDING) { $eventName = "hostInit"; $scriptingEvent = EVENT_TYPE::HOST_INIT; } elseif ($DBServer->status == SERVER_STATUS::INIT) { $eventName = "hostUp"; $scriptingEvent = EVENT_TYPE::HOST_UP; } if ($scriptingEvent && $dtadded) { $hasPendingMessages = !!$db->GetOne("\n SELECT EXISTS(SELECT 1 FROM messages WHERE type='in' AND status='0' AND server_id = ?)\n ", [$DBServer->serverId]); $scriptingTimeout = (int) $db->GetOne("\n SELECT SUM(timeout)\n FROM farm_role_scripts\n WHERE event_name = ? AND farm_roleid = ? AND issync = '1'\n ", [$scriptingEvent, $DBServer->farmRoleId]); if ($scriptingTimeout) { $launchTimeout = $launchTimeout + $scriptingTimeout; } if (!$hasPendingMessages && $dtadded + $launchTimeout < time() && !$DBFarmRole->GetRoleObject()->hasBehavior(ROLE_BEHAVIORS::MONGODB)) { //Add entry to farm log \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("Server '%s' did not send '%s' event in %s seconds after launch (Try increasing timeouts in role settings). Considering it broken. Terminating instance.", $DBServer->serverId, $eventName, $launchTimeout), $DBServer->serverId)); try { $DBServer->terminate(array(DBServer::TERMINATE_REASON_SERVER_DID_NOT_SEND_EVENT, $eventName, $launchTimeout), false); } catch (Exception $err) { $this->getLogger()->fatal($err->getMessage()); } } elseif ($DBFarmRole->GetRoleObject()->hasBehavior(ROLE_BEHAVIORS::MONGODB)) { //DO NOT TERMINATE MONGODB INSTANCES BY TIMEOUT! IT'S NOT SAFE //THINK ABOUT WORKAROUND } } //Whether IP address is changed if (!$DBServer->IsRebooting() && !$elasticIpAssigned) { $ipaddresses = $p->GetServerIPAddresses($DBServer); if ($ipaddresses['remoteIp'] && $DBServer->remoteIp && $DBServer->remoteIp != $ipaddresses['remoteIp'] || $ipaddresses['localIp'] && $DBServer->localIp && $DBServer->localIp != $ipaddresses['localIp']) { \Scalr::getContainer()->logger(LOG_CATEGORY::FARM)->warn(new FarmLogMessage($DBFarm->ID, sprintf("RemoteIP: %s (%s), LocalIp: %s (%s) (Poller).", $DBServer->remoteIp, $ipaddresses['remoteIp'], $DBServer->localIp, $ipaddresses['localIp']), $DBServer->serverId)); \Scalr::FireEvent($DBServer->farmId, new IPAddressChangedEvent($DBServer, $ipaddresses['remoteIp'], $ipaddresses['localIp'])); } //TODO Check health } } } elseif ($DBServer->status == SERVER_STATUS::SUSPENDED && $DBServer->GetRealStatus()->isTerminated()) { //TODO: Terminated outside scalr while in SUSPENDED state $DBServer->terminate(DBServer::TERMINATE_REASON_CRASHED); } elseif ($DBServer->status == SERVER_STATUS::RUNNING && $DBServer->GetRealStatus()->isRunning()) { // Is IP address changed? if (!$DBServer->IsRebooting()) { $ipaddresses = $p->GetServerIPAddresses($DBServer); // Private IP cannot be removed (only changed). if ($DBServer->remoteIp != $ipaddresses['remoteIp'] || $ipaddresses['localIp'] && $DBServer->localIp != $ipaddresses['localIp']) { \Scalr::FireEvent($DBServer->farmId, new IPAddressChangedEvent($DBServer, $ipaddresses['remoteIp'], $ipaddresses['localIp'])); } if ($payAsYouGoTime) { $initTime = $DBServer->dateInitialized ? strtotime($DBServer->dateInitialized) : null; if ($initTime < $payAsYouGoTime) { $initTime = $payAsYouGoTime; } $runningHours = ceil((time() - $initTime) / 3600); $scuUsed = $runningHours * Scalr_Billing::getSCUByInstanceType($DBServer->getType(), $DBServer->platform); $db->Execute("UPDATE servers_history SET scu_used = ?, scu_updated = 0 WHERE server_id = ?", [$scuUsed, $DBServer->serverId]); } if ($DBServer->platform == SERVER_PLATFORMS::EC2) { $env = Scalr_Environment::init()->loadById($DBServer->envId); $ec2 = $env->aws($DBServer->GetCloudLocation())->ec2; $time = $DBServer->GetProperty(EC2_SERVER_PROPERTIES::IS_LOCKED_LAST_CHECK_TIME); if (!$time || time() > $time + 1200) { $isEnabled = $ec2->instance->describeAttribute($DBServer->GetCloudServerID(), InstanceAttributeType::disableApiTermination()); $DBServer->SetProperties([EC2_SERVER_PROPERTIES::IS_LOCKED => $isEnabled, EC2_SERVER_PROPERTIES::IS_LOCKED_LAST_CHECK_TIME => time()]); } } } else { //TODO Check reboot timeout } } } catch (Exception $e) { if (CloudPlatformSuspensionInfo::isSuspensionException($e)) { $suspensionInfo->registerError($e->getMessage()); } if (stristr($e->getMessage(), "not found")) { $this->getLogger()->fatal($e->getMessage()); } elseif (stristr($e->getMessage(), "Request limit exceeded")) { sleep(5); $this->getLogger()->error("[Farm: %d] sleep due to exception: %s", $request->farmId, $e->getMessage()); } else { $this->getLogger()->error("[Farm: %d] Exception: %s", $request->farmId, $e->getMessage()); } } } $this->getLogger()->info("[%s] Finished farm polling (ID: %d, Name: %s, Status: %s, Platform:%s). Time: %s", $transactionId, $DBFarm->ID, $DBFarm->Name, $DBFarm->Status, $request->platform, microtime(true) - $jobStartTime); return $request; }