diff --git a/src/applications/drydock/blueprint/DrydockBlueprintImplementation.php b/src/applications/drydock/blueprint/DrydockBlueprintImplementation.php index 88bc4d935a..c5e9f811b5 100644 --- a/src/applications/drydock/blueprint/DrydockBlueprintImplementation.php +++ b/src/applications/drydock/blueprint/DrydockBlueprintImplementation.php @@ -1,526 +1,567 @@ getCustomFieldSpecifications(); if ($this->shouldUseConcurrentResourceLimit()) { $fields += array( 'allocator.limit' => array( 'name' => pht('Limit'), 'caption' => pht( 'Maximum number of resources this blueprint can have active '. 'concurrently.'), 'type' => 'int', ), ); } return $fields; } protected function getCustomFieldSpecifications() { return array(); } public function getViewer() { return PhabricatorUser::getOmnipotentUser(); } /* -( Lease Acquisition )-------------------------------------------------- */ /** * Enforce basic checks on lease/resource compatibility. Allows resources to * reject leases if they are incompatible, even if the resource types match. * * For example, if a resource represents a 32-bit host, this method might * reject leases that need a 64-bit host. The blueprint might also reject * a resource if the lease needs 8GB of RAM and the resource only has 6GB * free. * * This method should not acquire locks or expect anything to be locked. This * is a coarse compatibility check between a lease and a resource. * * @param DrydockBlueprint Concrete blueprint to allocate for. * @param DrydockResource Candidate resource to allocate the lease on. * @param DrydockLease Pending lease that wants to allocate here. * @return bool True if the resource and lease are compatible. * @task lease */ abstract public function canAcquireLeaseOnResource( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease); /** * Acquire a lease. Allows resources to perform setup as leases are brought * online. * * If acquisition fails, throw an exception. * * @param DrydockBlueprint Blueprint which built the resource. * @param DrydockResource Resource to acquire a lease on. * @param DrydockLease Requested lease. * @return void * @task lease */ abstract public function acquireLease( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease); /** * @return void * @task lease */ public function activateLease( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease) { throw new PhutilMethodNotImplementedException(); } /** * React to a lease being released. * * This callback is primarily useful for automatically releasing resources * once all leases are released. * * @param DrydockBlueprint Blueprint which built the resource. * @param DrydockResource Resource a lease was released on. * @param DrydockLease Recently released lease. * @return void * @task lease */ abstract public function didReleaseLease( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease); /** * Destroy any temporary data associated with a lease. * * If a lease creates temporary state while held, destroy it here. * * @param DrydockBlueprint Blueprint which built the resource. * @param DrydockResource Resource the lease is acquired on. * @param DrydockLease The lease being destroyed. * @return void * @task lease */ abstract public function destroyLease( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease); /** * Return true to try to allocate a new resource and expand the resource * pool instead of permitting an otherwise valid acquisition on an existing * resource. * * This allows the blueprint to provide a soft hint about when the resource * pool should grow. * * Returning "true" in all cases generally makes sense when a blueprint * controls a fixed pool of resources, like a particular number of physical * hosts: you want to put all the hosts in service, so whenever it is * possible to allocate a new host you want to do this. * * Returning "false" in all cases generally make sense when a blueprint * has a flexible pool of expensive resources and you want to pack leases * onto them as tightly as possible. * * @param DrydockBlueprint The blueprint for an existing resource being * acquired. * @param DrydockResource The resource being acquired, which we may want to * build a supplemental resource for. * @param DrydockLease The current lease performing acquisition. * @return bool True to prefer allocating a supplemental resource. * * @task lease */ public function shouldAllocateSupplementalResource( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease) { return false; } /* -( Resource Allocation )------------------------------------------------ */ /** * Enforce fundamental implementation/lease checks. Allows implementations to * reject a lease which no concrete blueprint can ever satisfy. * * For example, if a lease only builds ARM hosts and the lease needs a * PowerPC host, it may be rejected here. * * This is the earliest rejection phase, and followed by * @{method:canEverAllocateResourceForLease}. * * This method should not actually check if a resource can be allocated * right now, or even if a blueprint which can allocate a suitable resource * really exists, only if some blueprint may conceivably exist which could * plausibly be able to build a suitable resource. * * @param DrydockLease Requested lease. * @return bool True if some concrete blueprint of this implementation's * type might ever be able to build a resource for the lease. * @task resource */ abstract public function canAnyBlueprintEverAllocateResourceForLease( DrydockLease $lease); /** * Enforce basic blueprint/lease checks. Allows blueprints to reject a lease * which they can not build a resource for. * * This is the second rejection phase. It follows * @{method:canAnyBlueprintEverAllocateResourceForLease} and is followed by * @{method:canAllocateResourceForLease}. * * This method should not check if a resource can be built right now, only * if the blueprint as configured may, at some time, be able to build a * suitable resource. * * @param DrydockBlueprint Blueprint which may be asked to allocate a * resource. * @param DrydockLease Requested lease. * @return bool True if this blueprint can eventually build a suitable * resource for the lease, as currently configured. * @task resource */ abstract public function canEverAllocateResourceForLease( DrydockBlueprint $blueprint, DrydockLease $lease); /** * Enforce basic availability limits. Allows blueprints to reject resource * allocation if they are currently overallocated. * * This method should perform basic capacity/limit checks. For example, if * it has a limit of 6 resources and currently has 6 resources allocated, * it might reject new leases. * * This method should not acquire locks or expect locks to be acquired. This * is a coarse check to determine if the operation is likely to succeed * right now without needing to acquire locks. * * It is expected that this method will sometimes return `true` (indicating * that a resource can be allocated) but find that another allocator has * eaten up free capacity by the time it actually tries to build a resource. * This is normal and the allocator will recover from it. * * @param DrydockBlueprint The blueprint which may be asked to allocate a * resource. * @param DrydockLease Requested lease. * @return bool True if this blueprint appears likely to be able to allocate * a suitable resource. * @task resource */ abstract public function canAllocateResourceForLease( DrydockBlueprint $blueprint, DrydockLease $lease); /** * Allocate a suitable resource for a lease. * * This method MUST acquire, hold, and manage locks to prevent multiple * allocations from racing. World state is not locked before this method is * called. Blueprints are entirely responsible for any lock handling they * need to perform. * * @param DrydockBlueprint The blueprint which should allocate a resource. * @param DrydockLease Requested lease. * @return DrydockResource Allocated resource. * @task resource */ abstract public function allocateResource( DrydockBlueprint $blueprint, DrydockLease $lease); /** * @task resource */ public function activateResource( DrydockBlueprint $blueprint, DrydockResource $resource) { throw new PhutilMethodNotImplementedException(); } /** * Destroy any temporary data associated with a resource. * * If a resource creates temporary state when allocated, destroy that state * here. For example, you might shut down a virtual host or destroy a working * copy on disk. * * @param DrydockBlueprint Blueprint which built the resource. * @param DrydockResource Resource being destroyed. * @return void * @task resource */ abstract public function destroyResource( DrydockBlueprint $blueprint, DrydockResource $resource); /** * Get a human readable name for a resource. * * @param DrydockBlueprint Blueprint which built the resource. * @param DrydockResource Resource to get the name of. * @return string Human-readable resource name. * @task resource */ abstract public function getResourceName( DrydockBlueprint $blueprint, DrydockResource $resource); /* -( Resource Interfaces )------------------------------------------------ */ abstract public function getInterface( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease, $type); /* -( Logging )------------------------------------------------------------ */ public static function getAllBlueprintImplementations() { return id(new PhutilClassMapQuery()) ->setAncestorClass(__CLASS__) ->execute(); } + + /** + * Get all the @{class:DrydockBlueprintImplementation}s which can possibly + * build a resource to satisfy a lease. + * + * This method returns blueprints which might, at some time, be able to + * build a resource which can satisfy the lease. They may not be able to + * build that resource right now. + * + * @param DrydockLease Requested lease. + * @return list List of qualifying blueprint + * implementations. + */ + public static function getAllForAllocatingLease( + DrydockLease $lease) { + + $impls = self::getAllBlueprintImplementations(); + + $keep = array(); + foreach ($impls as $key => $impl) { + // Don't use disabled blueprint types. + if (!$impl->isEnabled()) { + continue; + } + + // Don't use blueprint types which can't allocate the correct kind of + // resource. + if ($impl->getType() != $lease->getResourceType()) { + continue; + } + + if (!$impl->canAnyBlueprintEverAllocateResourceForLease($lease)) { + continue; + } + + $keep[$key] = $impl; + } + + return $keep; + } + public static function getNamedImplementation($class) { return idx(self::getAllBlueprintImplementations(), $class); } protected function newResourceTemplate(DrydockBlueprint $blueprint) { $resource = id(new DrydockResource()) ->setBlueprintPHID($blueprint->getPHID()) ->attachBlueprint($blueprint) ->setType($this->getType()) ->setStatus(DrydockResourceStatus::STATUS_PENDING); // Pre-allocate the resource PHID. $resource->setPHID($resource->generatePHID()); return $resource; } protected function newLease(DrydockBlueprint $blueprint) { return DrydockLease::initializeNewLease() ->setAuthorizingPHID($blueprint->getPHID()); } protected function requireActiveLease(DrydockLease $lease) { $lease_status = $lease->getStatus(); switch ($lease_status) { case DrydockLeaseStatus::STATUS_PENDING: case DrydockLeaseStatus::STATUS_ACQUIRED: throw new PhabricatorWorkerYieldException(15); case DrydockLeaseStatus::STATUS_ACTIVE: return; default: throw new Exception( pht( 'Lease ("%s") is in bad state ("%s"), expected "%s".', $lease->getPHID(), $lease_status, DrydockLeaseStatus::STATUS_ACTIVE)); } } /** * Does this implementation use concurrent resource limits? * * Implementations can override this method to opt into standard limit * behavior, which provides a simple concurrent resource limit. * * @return bool True to use limits. */ protected function shouldUseConcurrentResourceLimit() { return false; } /** * Get the effective concurrent resource limit for this blueprint. * * @param DrydockBlueprint Blueprint to get the limit for. * @return int|null Limit, or `null` for no limit. */ protected function getConcurrentResourceLimit(DrydockBlueprint $blueprint) { if ($this->shouldUseConcurrentResourceLimit()) { $limit = $blueprint->getFieldValue('allocator.limit'); $limit = (int)$limit; if ($limit > 0) { return $limit; } else { return null; } } return null; } protected function getConcurrentResourceLimitSlotLock( DrydockBlueprint $blueprint) { $limit = $this->getConcurrentResourceLimit($blueprint); if ($limit === null) { return; } $blueprint_phid = $blueprint->getPHID(); // TODO: This logic shouldn't do anything awful, but is a little silly. It // would be nice to unify the "huge limit" and "small limit" cases // eventually but it's a little tricky. // If the limit is huge, just pick a random slot. This is just stopping // us from exploding if someone types a billion zillion into the box. if ($limit > 1024) { $slot = mt_rand(0, $limit - 1); return "allocator({$blueprint_phid}).limit({$slot})"; } // For reasonable limits, actually check for an available slot. $slots = range(0, $limit - 1); shuffle($slots); $lock_names = array(); foreach ($slots as $slot) { $lock_names[] = "allocator({$blueprint_phid}).limit({$slot})"; } $locks = DrydockSlotLock::loadHeldLocks($lock_names); $locks = mpull($locks, null, 'getLockKey'); foreach ($lock_names as $lock_name) { if (empty($locks[$lock_name])) { return $lock_name; } } // If we found no free slot, just return whatever we checked last (which // is just a random slot). There's a small chance we'll get lucky and the // lock will be free by the time we try to take it, but usually we'll just // fail to grab the lock, throw an appropriate lock exception, and get back // on the right path to retry later. return $lock_name; } /** * Apply standard limits on resource allocation rate. * * @param DrydockBlueprint The blueprint requesting an allocation. * @return bool True if further allocations should be limited. */ protected function shouldLimitAllocatingPoolSize( DrydockBlueprint $blueprint) { // TODO: If this mechanism sticks around, these values should be // configurable by the blueprint implementation. // Limit on total number of active resources. $total_limit = $this->getConcurrentResourceLimit($blueprint); // Always allow at least this many allocations to be in flight at once. $min_allowed = 1; // Allow this fraction of allocating resources as a fraction of active // resources. $growth_factor = 0.25; $resource = new DrydockResource(); $conn_r = $resource->establishConnection('r'); $counts = queryfx_all( $conn_r, 'SELECT status, COUNT(*) N FROM %T WHERE blueprintPHID = %s AND status != %s GROUP BY status', $resource->getTableName(), $blueprint->getPHID(), DrydockResourceStatus::STATUS_DESTROYED); $counts = ipull($counts, 'N', 'status'); $n_alloc = idx($counts, DrydockResourceStatus::STATUS_PENDING, 0); $n_active = idx($counts, DrydockResourceStatus::STATUS_ACTIVE, 0); $n_broken = idx($counts, DrydockResourceStatus::STATUS_BROKEN, 0); $n_released = idx($counts, DrydockResourceStatus::STATUS_RELEASED, 0); // If we're at the limit on total active resources, limit additional // allocations. if ($total_limit !== null) { $n_total = ($n_alloc + $n_active + $n_broken + $n_released); if ($n_total >= $total_limit) { return true; } } // If the number of in-flight allocations is fewer than the minimum number // of allowed allocations, don't impose a limit. if ($n_alloc < $min_allowed) { return false; } $allowed_alloc = (int)ceil($n_active * $growth_factor); // If the number of in-flight allocation is fewer than the number of // allowed allocations according to the pool growth factor, don't impose // a limit. if ($n_alloc < $allowed_alloc) { return false; } return true; } } diff --git a/src/applications/drydock/management/DrydockManagementLeaseWorkflow.php b/src/applications/drydock/management/DrydockManagementLeaseWorkflow.php index fa9f83ad1e..e38b381cc5 100644 --- a/src/applications/drydock/management/DrydockManagementLeaseWorkflow.php +++ b/src/applications/drydock/management/DrydockManagementLeaseWorkflow.php @@ -1,263 +1,288 @@ setName('lease') ->setSynopsis(pht('Lease a resource.')) ->setArguments( array( array( 'name' => 'type', 'param' => 'resource_type', 'help' => pht('Resource type.'), ), array( 'name' => 'until', 'param' => 'time', 'help' => pht('Set lease expiration time.'), ), array( 'name' => 'attributes', 'param' => 'file', 'help' => pht( 'JSON file with lease attributes. Use "-" to read attributes '. 'from stdin.'), ), array( 'name' => 'count', 'param' => 'N', 'default' => 1, 'help' => pht('Lease a given number of identical resources.'), ), )); } public function execute(PhutilArgumentParser $args) { $viewer = $this->getViewer(); $resource_type = $args->getArg('type'); if (!phutil_nonempty_string($resource_type)) { throw new PhutilArgumentUsageException( pht( 'Specify a resource type with "--type".')); } $until = $args->getArg('until'); if (phutil_nonempty_string($until)) { $until = strtotime($until); if ($until <= 0) { throw new PhutilArgumentUsageException( pht( 'Unable to parse argument to "--until".')); } } $count = $args->getArgAsInteger('count'); if ($count < 1) { throw new PhutilArgumentUsageException( pht( 'Value provided to "--count" must be a nonzero, positive '. 'number.')); } $attributes_file = $args->getArg('attributes'); if (phutil_nonempty_string($attributes_file)) { if ($attributes_file == '-') { echo tsprintf( "%s\n", pht('Reading JSON attributes from stdin...')); $data = file_get_contents('php://stdin'); } else { $data = Filesystem::readFile($attributes_file); } $attributes = phutil_json_decode($data); } else { $attributes = array(); } + $blueprint_phids = null; + $leases = array(); for ($idx = 0; $idx < $count; $idx++) { $lease = id(new DrydockLease()) ->setResourceType($resource_type); $drydock_phid = id(new PhabricatorDrydockApplication())->getPHID(); $lease->setAuthorizingPHID($drydock_phid); if ($attributes) { $lease->setAttributes($attributes); } - // TODO: This is not hugely scalable, although this is a debugging - // workflow so maybe it's fine. Do we even need `bin/drydock lease` in - // the long run? - $all_blueprints = id(new DrydockBlueprintQuery()) - ->setViewer($viewer) - ->execute(); - $allowed_phids = mpull($all_blueprints, 'getPHID'); - if (!$allowed_phids) { - throw new Exception( - pht( - 'No blueprints exist which can plausibly allocate resources to '. - 'satisfy the requested lease.')); + if ($blueprint_phids === null) { + $blueprint_phids = $this->newAllowedBlueprintPHIDs($lease); } - $lease->setAllowedBlueprintPHIDs($allowed_phids); + + $lease->setAllowedBlueprintPHIDs($blueprint_phids); if ($until) { $lease->setUntil($until); } // If something fatals or the user interrupts the process (for example, // with "^C"), release the lease. We'll cancel this below, if the lease // actually activates. $lease->setReleaseOnDestruction(true); $leases[] = $lease; } // TODO: This would probably be better handled with PhutilSignalRouter, // but it currently doesn't route SIGINT. We're initializing it to setup // SIGTERM handling and make eventual migration easier. $router = PhutilSignalRouter::getRouter(); pcntl_signal(SIGINT, array($this, 'didReceiveInterrupt')); $t_start = microtime(true); echo tsprintf( "%s\n\n", pht('Leases queued for activation:')); foreach ($leases as $lease) { $lease->queueForActivation(); echo tsprintf( " __%s__\n", PhabricatorEnv::getProductionURI($lease->getURI())); } echo tsprintf( "\n%s\n\n", pht('Waiting for daemons to activate leases...')); foreach ($leases as $lease) { $this->waitUntilActive($lease); } // Now that we've survived activation and the lease is good, make it // durable. foreach ($leases as $lease) { $lease->setReleaseOnDestruction(false); } $t_end = microtime(true); echo tsprintf( "\n%s\n\n", pht( 'Activation complete. Leases are permanent until manually '. 'released with:')); foreach ($leases as $lease) { echo tsprintf( " %s\n", pht('$ ./bin/drydock release-lease --id %d', $lease->getID())); } echo tsprintf( "\n%s\n", pht( 'Leases activated in %sms.', new PhutilNumber((int)(($t_end - $t_start) * 1000)))); return 0; } public function didReceiveInterrupt($signo) { // Doing this makes us run destructors, particularly the "release on // destruction" trigger on the lease. exit(128 + $signo); } private function waitUntilActive(DrydockLease $lease) { $viewer = $this->getViewer(); $log_cursor = 0; $log_types = DrydockLogType::getAllLogTypes(); $is_active = false; while (!$is_active) { $lease->reload(); $pager = id(new AphrontCursorPagerView()) ->setBeforeID($log_cursor); // While we're waiting, show the user any logs which the daemons have // generated to give them some clue about what's going on. $logs = id(new DrydockLogQuery()) ->setViewer($viewer) ->withLeasePHIDs(array($lease->getPHID())) ->executeWithCursorPager($pager); if ($logs) { $logs = mpull($logs, null, 'getID'); ksort($logs); $log_cursor = last_key($logs); } foreach ($logs as $log) { $type_key = $log->getType(); if (isset($log_types[$type_key])) { $type_object = id(clone $log_types[$type_key]) ->setLog($log) ->setViewer($viewer); $log_data = $log->getData(); $type = $type_object->getLogTypeName(); $data = $type_object->renderLogForText($log_data); } else { $type = pht('Unknown ("%s")', $type_key); $data = null; } echo tsprintf( "(Lease #%d) <%s> %B\n", $lease->getID(), $type, $data); } $status = $lease->getStatus(); switch ($status) { case DrydockLeaseStatus::STATUS_ACTIVE: $is_active = true; break; case DrydockLeaseStatus::STATUS_RELEASED: throw new Exception(pht('Lease has already been released!')); case DrydockLeaseStatus::STATUS_DESTROYED: throw new Exception(pht('Lease has already been destroyed!')); case DrydockLeaseStatus::STATUS_BROKEN: throw new Exception(pht('Lease has been broken!')); case DrydockLeaseStatus::STATUS_PENDING: case DrydockLeaseStatus::STATUS_ACQUIRED: break; default: throw new Exception( pht( 'Lease has unknown status "%s".', $status)); } if ($is_active) { break; } else { sleep(1); } } } + private function newAllowedBlueprintPHIDs(DrydockLease $lease) { + $viewer = $this->getViewer(); + + $impls = DrydockBlueprintImplementation::getAllForAllocatingLease($lease); + + if (!$impls) { + throw new PhutilArgumentUsageException( + pht( + 'No known blueprint class can ever allocate the specified '. + 'lease. Check that the resource type is spelled correctly.')); + } + + $classes = array_keys($impls); + + $blueprints = id(new DrydockBlueprintQuery()) + ->setViewer($viewer) + ->withBlueprintClasses($classes) + ->withDisabled(false) + ->execute(); + + if (!$blueprints) { + throw new PhutilArgumentUsageException( + pht( + 'No enabled blueprints exist with a blueprint class that can '. + 'plausibly allocate resources to satisfy the requested lease.')); + } + + $phids = mpull($blueprints, 'getPHID'); + + return $phids; + } + } diff --git a/src/applications/drydock/worker/DrydockLeaseUpdateWorker.php b/src/applications/drydock/worker/DrydockLeaseUpdateWorker.php index b83022f720..83d56554c1 100644 --- a/src/applications/drydock/worker/DrydockLeaseUpdateWorker.php +++ b/src/applications/drydock/worker/DrydockLeaseUpdateWorker.php @@ -1,1026 +1,984 @@ getTaskDataValue('leasePHID'); $hash = PhabricatorHash::digestForIndex($lease_phid); $lock_key = 'drydock.lease:'.$hash; $lock = PhabricatorGlobalLock::newLock($lock_key) ->lock(1); try { $lease = $this->loadLease($lease_phid); $this->handleUpdate($lease); } catch (Exception $ex) { $lock->unlock(); $this->flushDrydockTaskQueue(); throw $ex; } $lock->unlock(); } /* -( Updating Leases )---------------------------------------------------- */ /** * @task update */ private function handleUpdate(DrydockLease $lease) { try { $this->updateLease($lease); } catch (DrydockAcquiredBrokenResourceException $ex) { // If this lease acquired a resource but failed to activate, we don't // need to break the lease. We can throw it back in the pool and let // it take another shot at acquiring a new resource. // Before we throw it back, release any locks the lease is holding. DrydockSlotLock::releaseLocks($lease->getPHID()); $lease ->setStatus(DrydockLeaseStatus::STATUS_PENDING) ->setResourcePHID(null) ->save(); $lease->logEvent( DrydockLeaseReacquireLogType::LOGCONST, array( 'class' => get_class($ex), 'message' => $ex->getMessage(), )); $this->yieldLease($lease, $ex); } catch (Exception $ex) { if ($this->isTemporaryException($ex)) { $this->yieldLease($lease, $ex); } else { $this->breakLease($lease, $ex); } } } /** * @task update */ private function updateLease(DrydockLease $lease) { $this->processLeaseCommands($lease); $lease_status = $lease->getStatus(); switch ($lease_status) { case DrydockLeaseStatus::STATUS_PENDING: $this->executeAllocator($lease); break; case DrydockLeaseStatus::STATUS_ACQUIRED: $this->activateLease($lease); break; case DrydockLeaseStatus::STATUS_ACTIVE: // Nothing to do. break; case DrydockLeaseStatus::STATUS_RELEASED: case DrydockLeaseStatus::STATUS_BROKEN: $this->destroyLease($lease); break; case DrydockLeaseStatus::STATUS_DESTROYED: break; } $this->yieldIfExpiringLease($lease); } /** * @task update */ private function yieldLease(DrydockLease $lease, Exception $ex) { $duration = $this->getYieldDurationFromException($ex); $lease->logEvent( DrydockLeaseActivationYieldLogType::LOGCONST, array( 'duration' => $duration, )); throw new PhabricatorWorkerYieldException($duration); } /* -( Processing Commands )------------------------------------------------ */ /** * @task command */ private function processLeaseCommands(DrydockLease $lease) { if (!$lease->canReceiveCommands()) { return; } $this->checkLeaseExpiration($lease); $commands = $this->loadCommands($lease->getPHID()); foreach ($commands as $command) { if (!$lease->canReceiveCommands()) { break; } $this->processLeaseCommand($lease, $command); $command ->setIsConsumed(true) ->save(); } } /** * @task command */ private function processLeaseCommand( DrydockLease $lease, DrydockCommand $command) { switch ($command->getCommand()) { case DrydockCommand::COMMAND_RELEASE: $this->releaseLease($lease); break; } } /* -( Drydock Allocator )-------------------------------------------------- */ /** * Find or build a resource which can satisfy a given lease request, then * acquire the lease. * * @param DrydockLease Requested lease. * @return void * @task allocator */ private function executeAllocator(DrydockLease $lease) { $blueprints = $this->loadBlueprintsForAllocatingLease($lease); // If we get nothing back, that means no blueprint is defined which can // ever build the requested resource. This is a permanent failure, since // we don't expect to succeed no matter how many times we try. if (!$blueprints) { throw new PhabricatorWorkerPermanentFailureException( pht( 'No active Drydock blueprint exists which can ever allocate a '. 'resource for lease "%s".', $lease->getPHID())); } // First, try to find a suitable open resource which we can acquire a new // lease on. $resources = $this->loadResourcesForAllocatingLease($blueprints, $lease); // If no resources exist yet, see if we can build one. if (!$resources) { $usable_blueprints = $this->removeOverallocatedBlueprints( $blueprints, $lease); // If we get nothing back here, some blueprint claims it can eventually // satisfy the lease, just not right now. This is a temporary failure, // and we expect allocation to succeed eventually. if (!$usable_blueprints) { $blueprints = $this->rankBlueprints($blueprints, $lease); // Try to actively reclaim unused resources. If we succeed, jump back // into the queue in an effort to claim it. foreach ($blueprints as $blueprint) { $reclaimed = $this->reclaimResources($blueprint, $lease); if ($reclaimed) { $lease->logEvent( DrydockLeaseReclaimLogType::LOGCONST, array( 'resourcePHIDs' => array($reclaimed->getPHID()), )); throw new PhabricatorWorkerYieldException(15); } } $lease->logEvent( DrydockLeaseWaitingForResourcesLogType::LOGCONST, array( 'blueprintPHIDs' => mpull($blueprints, 'getPHID'), )); throw new PhabricatorWorkerYieldException(15); } $usable_blueprints = $this->rankBlueprints($usable_blueprints, $lease); $exceptions = array(); foreach ($usable_blueprints as $blueprint) { try { $resources[] = $this->allocateResource($blueprint, $lease); // Bail after allocating one resource, we don't need any more than // this. break; } catch (Exception $ex) { // This failure is not normally expected, so log it. It can be // caused by something mundane and recoverable, however (see below // for discussion). // We log to the blueprint separately from the log to the lease: // the lease is not attached to a blueprint yet so the lease log // will not show up on the blueprint; more than one blueprint may // fail; and the lease is not really impacted (and won't log) if at // least one blueprint actually works. $blueprint->logEvent( DrydockResourceAllocationFailureLogType::LOGCONST, array( 'class' => get_class($ex), 'message' => $ex->getMessage(), )); $exceptions[] = $ex; } } if (!$resources) { // If one or more blueprints claimed that they would be able to // allocate resources but none are actually able to allocate resources, // log the failure and yield so we try again soon. // This can happen if some unexpected issue occurs during allocation // (for example, a call to build a VM fails for some reason) or if we // raced another allocator and the blueprint is now full. $ex = new PhutilAggregateException( pht( 'All blueprints failed to allocate a suitable new resource when '. 'trying to allocate lease ("%s").', $lease->getPHID()), $exceptions); $lease->logEvent( DrydockLeaseAllocationFailureLogType::LOGCONST, array( 'class' => get_class($ex), 'message' => $ex->getMessage(), )); throw new PhabricatorWorkerYieldException(15); } $resources = $this->removeUnacquirableResources($resources, $lease); if (!$resources) { // If we make it here, we just built a resource but aren't allowed // to acquire it. We expect this during routine operation if the // resource prevents acquisition until it activates. Yield and wait // for activation. throw new PhabricatorWorkerYieldException(15); } // NOTE: We have not acquired the lease yet, so it is possible that the // resource we just built will be snatched up by some other lease before // we can acquire it. This is not problematic: we'll retry a little later // and should succeed eventually. } $resources = $this->rankResources($resources, $lease); $exceptions = array(); $yields = array(); $allocated = false; foreach ($resources as $resource) { try { $resource = $this->newResourceForAcquisition($resource, $lease); $this->acquireLease($resource, $lease); $allocated = true; break; } catch (DrydockResourceLockException $ex) { // We need to lock the resource to actually acquire it. If we aren't // able to acquire the lock quickly enough, we can yield and try again // later. $yields[] = $ex; } catch (DrydockAcquiredBrokenResourceException $ex) { // If a resource was reclaimed or destroyed by the time we actually // got around to acquiring it, we just got unlucky. We can yield and // try again later. $yields[] = $ex; } catch (PhabricatorWorkerYieldException $ex) { // We can be told to yield, particularly by the supplemental allocator // trying to give us a supplemental resource. $yields[] = $ex; } catch (Exception $ex) { $exceptions[] = $ex; } } if (!$allocated) { if ($yields) { throw new PhabricatorWorkerYieldException(15); } else { throw new PhutilAggregateException( pht( 'Unable to acquire lease "%s" on any resource.', $lease->getPHID()), $exceptions); } } } - /** - * Get all the @{class:DrydockBlueprintImplementation}s which can possibly - * build a resource to satisfy a lease. - * - * This method returns blueprints which might, at some time, be able to - * build a resource which can satisfy the lease. They may not be able to - * build that resource right now. - * - * @param DrydockLease Requested lease. - * @return list List of qualifying blueprint - * implementations. - * @task allocator - */ - private function loadBlueprintImplementationsForAllocatingLease( - DrydockLease $lease) { - - $impls = DrydockBlueprintImplementation::getAllBlueprintImplementations(); - - $keep = array(); - foreach ($impls as $key => $impl) { - // Don't use disabled blueprint types. - if (!$impl->isEnabled()) { - continue; - } - - // Don't use blueprint types which can't allocate the correct kind of - // resource. - if ($impl->getType() != $lease->getResourceType()) { - continue; - } - - if (!$impl->canAnyBlueprintEverAllocateResourceForLease($lease)) { - continue; - } - - $keep[$key] = $impl; - } - - return $keep; - } - - /** * Get all the concrete @{class:DrydockBlueprint}s which can possibly * build a resource to satisfy a lease. * * @param DrydockLease Requested lease. * @return list List of qualifying blueprints. * @task allocator */ private function loadBlueprintsForAllocatingLease( DrydockLease $lease) { $viewer = $this->getViewer(); - $impls = $this->loadBlueprintImplementationsForAllocatingLease($lease); + $impls = DrydockBlueprintImplementation::getAllForAllocatingLease($lease); if (!$impls) { return array(); } $blueprint_phids = $lease->getAllowedBlueprintPHIDs(); if (!$blueprint_phids) { $lease->logEvent(DrydockLeaseNoBlueprintsLogType::LOGCONST); return array(); } $query = id(new DrydockBlueprintQuery()) ->setViewer($viewer) ->withPHIDs($blueprint_phids) ->withBlueprintClasses(array_keys($impls)) ->withDisabled(false); // The Drydock application itself is allowed to authorize anything. This // is primarily used for leases generated by CLI administrative tools. $drydock_phid = id(new PhabricatorDrydockApplication())->getPHID(); $authorizing_phid = $lease->getAuthorizingPHID(); if ($authorizing_phid != $drydock_phid) { $blueprints = id(clone $query) ->withAuthorizedPHIDs(array($authorizing_phid)) ->execute(); if (!$blueprints) { // If we didn't hit any blueprints, check if this is an authorization // problem: re-execute the query without the authorization constraint. // If the second query hits blueprints, the overall configuration is // fine but this is an authorization problem. If the second query also // comes up blank, this is some other kind of configuration issue so // we fall through to the default pathway. $all_blueprints = $query->execute(); if ($all_blueprints) { $lease->logEvent( DrydockLeaseNoAuthorizationsLogType::LOGCONST, array( 'authorizingPHID' => $authorizing_phid, )); return array(); } } } else { $blueprints = $query->execute(); } $keep = array(); foreach ($blueprints as $key => $blueprint) { if (!$blueprint->canEverAllocateResourceForLease($lease)) { continue; } $keep[$key] = $blueprint; } return $keep; } /** * Load a list of all resources which a given lease can possibly be * allocated against. * * @param list Blueprints which may produce suitable * resources. * @param DrydockLease Requested lease. * @return list Resources which may be able to allocate * the lease. * @task allocator */ private function loadResourcesForAllocatingLease( array $blueprints, DrydockLease $lease) { assert_instances_of($blueprints, 'DrydockBlueprint'); $viewer = $this->getViewer(); $resources = id(new DrydockResourceQuery()) ->setViewer($viewer) ->withBlueprintPHIDs(mpull($blueprints, 'getPHID')) ->withTypes(array($lease->getResourceType())) ->withStatuses( array( DrydockResourceStatus::STATUS_PENDING, DrydockResourceStatus::STATUS_ACTIVE, )) ->execute(); return $this->removeUnacquirableResources($resources, $lease); } /** * Remove resources which can not be acquired by a given lease from a list. * * @param list Candidate resources. * @param DrydockLease Acquiring lease. * @return list Resources which the lease may be able to * acquire. * @task allocator */ private function removeUnacquirableResources( array $resources, DrydockLease $lease) { $keep = array(); foreach ($resources as $key => $resource) { $blueprint = $resource->getBlueprint(); if (!$blueprint->canAcquireLeaseOnResource($resource, $lease)) { continue; } $keep[$key] = $resource; } return $keep; } /** * Remove blueprints which are too heavily allocated to build a resource for * a lease from a list of blueprints. * * @param list List of blueprints. * @return list List with blueprints that can not allocate * a resource for the lease right now removed. * @task allocator */ private function removeOverallocatedBlueprints( array $blueprints, DrydockLease $lease) { assert_instances_of($blueprints, 'DrydockBlueprint'); $keep = array(); foreach ($blueprints as $key => $blueprint) { if (!$blueprint->canAllocateResourceForLease($lease)) { continue; } $keep[$key] = $blueprint; } return $keep; } /** * Rank blueprints by suitability for building a new resource for a * particular lease. * * @param list List of blueprints. * @param DrydockLease Requested lease. * @return list Ranked list of blueprints. * @task allocator */ private function rankBlueprints(array $blueprints, DrydockLease $lease) { assert_instances_of($blueprints, 'DrydockBlueprint'); // TODO: Implement improvements to this ranking algorithm if they become // available. shuffle($blueprints); return $blueprints; } /** * Rank resources by suitability for allocating a particular lease. * * @param list List of resources. * @param DrydockLease Requested lease. * @return list Ranked list of resources. * @task allocator */ private function rankResources(array $resources, DrydockLease $lease) { assert_instances_of($resources, 'DrydockResource'); // TODO: Implement improvements to this ranking algorithm if they become // available. shuffle($resources); return $resources; } /** * Perform an actual resource allocation with a particular blueprint. * * @param DrydockBlueprint The blueprint to allocate a resource from. * @param DrydockLease Requested lease. * @return DrydockResource Allocated resource. * @task allocator */ private function allocateResource( DrydockBlueprint $blueprint, DrydockLease $lease) { $resource = $blueprint->allocateResource($lease); $this->validateAllocatedResource($blueprint, $resource, $lease); // If this resource was allocated as a pending resource, queue a task to // activate it. if ($resource->getStatus() == DrydockResourceStatus::STATUS_PENDING) { PhabricatorWorker::scheduleTask( 'DrydockResourceUpdateWorker', array( 'resourcePHID' => $resource->getPHID(), // This task will generally yield while the resource activates, so // wake it back up once the resource comes online. Most of the time, // we'll be able to lease the newly activated resource. 'awakenOnActivation' => array( $this->getCurrentWorkerTaskID(), ), ), array( 'objectPHID' => $resource->getPHID(), )); } return $resource; } /** * Check that the resource a blueprint allocated is roughly the sort of * object we expect. * * @param DrydockBlueprint Blueprint which built the resource. * @param wild Thing which the blueprint claims is a valid resource. * @param DrydockLease Lease the resource was allocated for. * @return void * @task allocator */ private function validateAllocatedResource( DrydockBlueprint $blueprint, $resource, DrydockLease $lease) { if (!($resource instanceof DrydockResource)) { throw new Exception( pht( 'Blueprint "%s" (of type "%s") is not properly implemented: %s must '. 'return an object of type %s or throw, but returned something else.', $blueprint->getBlueprintName(), $blueprint->getClassName(), 'allocateResource()', 'DrydockResource')); } if (!$resource->isAllocatedResource()) { throw new Exception( pht( 'Blueprint "%s" (of type "%s") is not properly implemented: %s '. 'must actually allocate the resource it returns.', $blueprint->getBlueprintName(), $blueprint->getClassName(), 'allocateResource()')); } $resource_type = $resource->getType(); $lease_type = $lease->getResourceType(); if ($resource_type !== $lease_type) { throw new Exception( pht( 'Blueprint "%s" (of type "%s") is not properly implemented: it '. 'built a resource of type "%s" to satisfy a lease requesting a '. 'resource of type "%s".', $blueprint->getBlueprintName(), $blueprint->getClassName(), $resource_type, $lease_type)); } } private function reclaimResources( DrydockBlueprint $blueprint, DrydockLease $lease) { $viewer = $this->getViewer(); // If this lease is marked as already in the process of reclaiming a // resource, don't let it reclaim another one until the first reclaim // completes. This stops one lease from reclaiming a large number of // resources if the reclaims take a while to complete. $reclaiming_phid = $lease->getAttribute('drydock.reclaimingPHID'); if ($reclaiming_phid) { $reclaiming_resource = id(new DrydockResourceQuery()) ->setViewer($viewer) ->withPHIDs(array($reclaiming_phid)) ->withStatuses( array( DrydockResourceStatus::STATUS_ACTIVE, DrydockResourceStatus::STATUS_RELEASED, )) ->executeOne(); if ($reclaiming_resource) { return null; } } $resources = id(new DrydockResourceQuery()) ->setViewer($viewer) ->withBlueprintPHIDs(array($blueprint->getPHID())) ->withStatuses( array( DrydockResourceStatus::STATUS_ACTIVE, )) ->execute(); // TODO: We could be much smarter about this and try to release long-unused // resources, resources with many similar copies, old resources, resources // that are cheap to rebuild, etc. shuffle($resources); foreach ($resources as $resource) { if ($this->canReclaimResource($resource)) { $this->reclaimResource($resource, $lease); return $resource; } } return null; } /* -( Acquiring Leases )--------------------------------------------------- */ /** * Perform an actual lease acquisition on a particular resource. * * @param DrydockResource Resource to acquire a lease on. * @param DrydockLease Lease to acquire. * @return void * @task acquire */ private function acquireLease( DrydockResource $resource, DrydockLease $lease) { $blueprint = $resource->getBlueprint(); $blueprint->acquireLease($resource, $lease); $this->validateAcquiredLease($blueprint, $resource, $lease); // If this lease has been acquired but not activated, queue a task to // activate it. if ($lease->getStatus() == DrydockLeaseStatus::STATUS_ACQUIRED) { $this->queueTask( __CLASS__, array( 'leasePHID' => $lease->getPHID(), ), array( 'objectPHID' => $lease->getPHID(), )); } } /** * Make sure that a lease was really acquired properly. * * @param DrydockBlueprint Blueprint which created the resource. * @param DrydockResource Resource which was acquired. * @param DrydockLease The lease which was supposedly acquired. * @return void * @task acquire */ private function validateAcquiredLease( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease) { if (!$lease->isAcquiredLease()) { throw new Exception( pht( 'Blueprint "%s" (of type "%s") is not properly implemented: it '. 'returned from "%s" without acquiring a lease.', $blueprint->getBlueprintName(), $blueprint->getClassName(), 'acquireLease()')); } $lease_phid = $lease->getResourcePHID(); $resource_phid = $resource->getPHID(); if ($lease_phid !== $resource_phid) { throw new Exception( pht( 'Blueprint "%s" (of type "%s") is not properly implemented: it '. 'returned from "%s" with a lease acquired on the wrong resource.', $blueprint->getBlueprintName(), $blueprint->getClassName(), 'acquireLease()')); } } private function newResourceForAcquisition( DrydockResource $resource, DrydockLease $lease) { // If the resource has no leases against it, never build a new one. This is // likely already a new resource that just activated. $viewer = $this->getViewer(); $statuses = array( DrydockLeaseStatus::STATUS_PENDING, DrydockLeaseStatus::STATUS_ACQUIRED, DrydockLeaseStatus::STATUS_ACTIVE, ); $leases = id(new DrydockLeaseQuery()) ->setViewer($viewer) ->withResourcePHIDs(array($resource->getPHID())) ->withStatuses($statuses) ->setLimit(1) ->execute(); if (!$leases) { return $resource; } // If we're about to get a lease on a resource, check if the blueprint // wants to allocate a supplemental resource. If it does, try to perform a // new allocation instead. $blueprint = $resource->getBlueprint(); if (!$blueprint->shouldAllocateSupplementalResource($resource, $lease)) { return $resource; } // If the blueprint is already overallocated, we can't allocate a new // resource. Just return the existing resource. $remaining = $this->removeOverallocatedBlueprints( array($blueprint), $lease); if (!$remaining) { return $resource; } // Try to build a new resource. try { $new_resource = $this->allocateResource($blueprint, $lease); } catch (Exception $ex) { $blueprint->logEvent( DrydockResourceAllocationFailureLogType::LOGCONST, array( 'class' => get_class($ex), 'message' => $ex->getMessage(), )); return $resource; } // If we can't actually acquire the new resource yet, just yield. // (We could try to move forward with the original resource instead.) $acquirable = $this->removeUnacquirableResources( array($new_resource), $lease); if (!$acquirable) { throw new PhabricatorWorkerYieldException(15); } return $new_resource; } /* -( Activating Leases )-------------------------------------------------- */ /** * @task activate */ private function activateLease(DrydockLease $lease) { $resource = $lease->getResource(); if (!$resource) { throw new Exception( pht('Trying to activate lease with no resource.')); } $resource_status = $resource->getStatus(); if ($resource_status == DrydockResourceStatus::STATUS_PENDING) { throw new PhabricatorWorkerYieldException(15); } if ($resource_status != DrydockResourceStatus::STATUS_ACTIVE) { throw new DrydockAcquiredBrokenResourceException( pht( 'Trying to activate lease ("%s") on a resource ("%s") in '. 'the wrong status ("%s").', $lease->getPHID(), $resource->getPHID(), $resource_status)); } // NOTE: We can race resource destruction here. Between the time we // performed the read above and now, the resource might have closed, so // we may activate leases on dead resources. At least for now, this seems // fine: a resource dying right before we activate a lease on it should not // be distinguishable from a resource dying right after we activate a lease // on it. We end up with an active lease on a dead resource either way, and // can not prevent resources dying from lightning strikes. $blueprint = $resource->getBlueprint(); $blueprint->activateLease($resource, $lease); $this->validateActivatedLease($blueprint, $resource, $lease); } /** * @task activate */ private function validateActivatedLease( DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease) { if (!$lease->isActivatedLease()) { throw new Exception( pht( 'Blueprint "%s" (of type "%s") is not properly implemented: it '. 'returned from "%s" without activating a lease.', $blueprint->getBlueprintName(), $blueprint->getClassName(), 'acquireLease()')); } } /* -( Releasing Leases )--------------------------------------------------- */ /** * @task release */ private function releaseLease(DrydockLease $lease) { $lease ->setStatus(DrydockLeaseStatus::STATUS_RELEASED) ->save(); $lease->logEvent(DrydockLeaseReleasedLogType::LOGCONST); $resource = $lease->getResource(); if ($resource) { $blueprint = $resource->getBlueprint(); $blueprint->didReleaseLease($resource, $lease); } $this->destroyLease($lease); } /* -( Breaking Leases )---------------------------------------------------- */ /** * @task break */ protected function breakLease(DrydockLease $lease, Exception $ex) { switch ($lease->getStatus()) { case DrydockLeaseStatus::STATUS_BROKEN: case DrydockLeaseStatus::STATUS_RELEASED: case DrydockLeaseStatus::STATUS_DESTROYED: throw new PhutilProxyException( pht( 'Unexpected failure while destroying lease ("%s").', $lease->getPHID()), $ex); } $lease ->setStatus(DrydockLeaseStatus::STATUS_BROKEN) ->save(); $lease->logEvent( DrydockLeaseActivationFailureLogType::LOGCONST, array( 'class' => get_class($ex), 'message' => $ex->getMessage(), )); $lease->awakenTasks(); $this->queueTask( __CLASS__, array( 'leasePHID' => $lease->getPHID(), ), array( 'objectPHID' => $lease->getPHID(), )); throw new PhabricatorWorkerPermanentFailureException( pht( 'Permanent failure while activating lease ("%s"): %s', $lease->getPHID(), $ex->getMessage())); } /* -( Destroying Leases )-------------------------------------------------- */ /** * @task destroy */ private function destroyLease(DrydockLease $lease) { $resource = $lease->getResource(); if ($resource) { $blueprint = $resource->getBlueprint(); $blueprint->destroyLease($resource, $lease); } DrydockSlotLock::releaseLocks($lease->getPHID()); $lease ->setStatus(DrydockLeaseStatus::STATUS_DESTROYED) ->save(); $lease->logEvent(DrydockLeaseDestroyedLogType::LOGCONST); $lease->awakenTasks(); } }