관리-도구
편집 파일: adjust.py
# coding=utf-8 # # Copyright © Cloud Linux GmbH & Cloud Linux Software, Inc 2010-2023 All Rights Reserved # # Licensed under CLOUD LINUX LICENSE AGREEMENT # http://cloudlinux.com/docs/LICENSE.TXT import itertools import math from typing import Any, Callable from .utils import bootstrap_gen from ._logs import logger from .common import ( AdjustStepData, empty_stats, empty_usage, GetNormalLimits, InvalidStateError, ) from .overload import OverloadCheckResult from .lves_tracker import LvesTracker from .lve_sm import LveStateManager class StepCalculator: MAX_STEP = 50 def __init__(self, overload_threshold: float) -> None: self._last_enable_step = 0 self._last_disable_step = 0 self._overload_threshold = overload_threshold def get_enable_step(self, server_load: float) -> int: """ Get current step size We start with 1 and increase step size by 1 on each iteration. Step size is bounded by two factors: 1. MAX_STEP 2. descending linear function (y = k * x + b) depending on sever_load: step_size = -1 * MAX_STEP / overload_threshold * sever_load + MAX_STEP :param float sever_load: server load estimation (from 0 to 1) :return int: step size """ self._last_disable_step = 0 next_step_size = min(self._last_enable_step + 1, self.MAX_STEP) # step size bouded by descending linear function (MAX_STEP at 0, 0 at overload_threshold) upper_bound = math.ceil(-1 * self.MAX_STEP / self._overload_threshold * server_load + self.MAX_STEP) upper_bound = min(max(upper_bound, 0), self.MAX_STEP) # step size 0 <= step_size <= MAX_STEP step_size = min(next_step_size, upper_bound) self._last_enable_step = step_size return step_size def get_disable_step(self, lves_count: int) -> int: self._last_enable_step = 0 self._last_disable_step = min(max(self._last_disable_step * 2, 1), lves_count) return self._last_disable_step class Adjuster: def __init__( self, lves_tracker: LvesTracker, get_normal_limits: GetNormalLimits, step_calculator: StepCalculator, is_server_overloaded: Callable[[], OverloadCheckResult], fail_fast: bool = True ) -> None: self._step = self._create_algorithm_gen( lves_tracker=lves_tracker, step_calculator=step_calculator, get_normal_limits=get_normal_limits, is_server_overloaded=is_server_overloaded, fail_fast=fail_fast, ).send self._step_exception: Exception | None = None def step(self, adjust_step_data: AdjustStepData) -> Any: self._step(adjust_step_data) if self._step_exception is not None: exc, self._step_exception = self._step_exception, None raise exc @bootstrap_gen def _create_algorithm_gen( self, lves_tracker: LvesTracker, step_calculator: StepCalculator, get_normal_limits: GetNormalLimits, is_server_overloaded: Callable[[], OverloadCheckResult], fail_fast: bool, ): while True: try: # TODO(vlebedev): Implement cooldown period for LVEs that were unbursted externally? msg = yield assert isinstance(msg, AdjustStepData) try: normal_limits_by_lve = get_normal_limits() except Exception: logger.exception('Failed to get normal limits') normal_limits_by_lve = {} # TODO(vlebedev): This update call does not belong to adjuster - move it out of here. lves_tracker.update( now=msg.now, normal_limits_by_id=normal_limits_by_lve, stats_by_id=msg.stats, usages_by_id=msg.lve_usages_by_id, ) to_burst, to_unburst = set[LveStateManager](), set[LveStateManager]( lves_tracker.quota_exceeded - lves_tracker.unbursted, ) is_overloaded: OverloadCheckResult = is_server_overloaded() if is_overloaded: currently_bursted = lves_tracker.bursted currently_overusing = lves_tracker.overusing step = step_calculator.get_disable_step(len(currently_bursted)) logger.info('Server is overloaded - searching %s candidates to unburst', step) if step > len(to_unburst): missing_num = step - len(to_unburst) logger.debug( 'Server is overloaded and there is not enough LVEs going to be unbursted' 'due to quota being exceeded - trying to find %s more candidates to unburst', missing_num, ) to_unburst.update(itertools.islice( list(set(currently_overusing)) + list(set(currently_bursted) - set(currently_overusing)), missing_num, )) elif (burst_candidates := lves_tracker.unbursted - lves_tracker.quota_exceeded): burst_candidates = sorted( burst_candidates, # unutilzied_*_ratio = (*_limit - *_usage) / *_limit # sort by min(unutilzied_cpu_ratio, unutilized_io_ratio) # The more from allowed io or cpu capacity is alredy used, the higher priority for bursting key=lambda x: ( min( ( msg.stats.get(x.lve_id, empty_stats).cpu - msg.lve_usages_by_id.get(x.lve_id, empty_usage).cpu_usage ) / msg.stats.get(x.lve_id, empty_stats).cpu, ( msg.stats.get(x.lve_id, empty_stats).io - msg.lve_usages_by_id.get(x.lve_id, empty_usage).io_usage ) / msg.stats.get(x.lve_id, empty_stats).io, ) ), ) step = step_calculator.get_enable_step(is_overloaded.server_load) logger.debug('Server has spare resources - trying to find %s more candidates to burst', step) to_burst.update(itertools.islice(burst_candidates, step)) if fail_fast and to_burst.intersection(to_unburst) != set(): raise AssertionError('LVE can`t be bursted and unbursted simultaneously!') for managers, cmd in [ (to_burst, LveStateManager.Burst(now=msg.now)), (to_unburst, LveStateManager.Unburst(now=msg.now)), ]: for manager in managers: try: manager.step(cmd) except InvalidStateError as e: if fail_fast: raise e logger.exception('LVE "%s": Failed to execute "%s"!', manager.lve_id, cmd, exc_info=e) except Exception as e: self._step_exception = e