mirror of
https://github.com/ansible-collections/hetzner.hcloud
synced 2024-11-10 06:34:13 +00:00
feat: use exponential backoff algorithm when polling actions (#524)
##### SUMMARY Replace the constant poll interval of 1 second, with a truncated exponential back off algorithm with jitter. Below is a suite of poll interval (in seconds) generated by the new algorithm: ``` 1.49 2.14 5.46 6.51 6.57 5.57 5.98 7.13 6.59 7.10 5.54 5.03 6.56 5.96 6.72 7.21 7.05 5.31 5.60 6.33 6.82 5.42 6.08 6.60 TOTAL: 140.77 ```
This commit is contained in:
parent
ecaeac1175
commit
19e586fa22
6 changed files with 56 additions and 11 deletions
|
@ -0,0 +1,2 @@
|
||||||
|
minor_changes:
|
||||||
|
- Use a truncated exponential backoff algorithm when polling actions from the API.
|
|
@ -3,6 +3,7 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
from random import random
|
||||||
|
|
||||||
from ansible.module_utils.basic import missing_required_lib
|
from ansible.module_utils.basic import missing_required_lib
|
||||||
|
|
||||||
|
@ -106,3 +107,22 @@ class Client(ClientBase):
|
||||||
yield
|
yield
|
||||||
finally:
|
finally:
|
||||||
self._requests_session = requests.Session()
|
self._requests_session = requests.Session()
|
||||||
|
|
||||||
|
|
||||||
|
def exponential_backoff_poll_interval(*, base: float, multiplier: int, cap: float, jitter: float):
|
||||||
|
"""
|
||||||
|
Return a poll interval function, implementing a truncated exponential backoff with jitter.
|
||||||
|
|
||||||
|
:param base: Base for the exponential backoff algorithm.
|
||||||
|
:param multiplier: Multiplier for the exponential backoff algorithm.
|
||||||
|
:param cap: Value at which the interval is truncated.
|
||||||
|
:param jitter: Proportion of the interval to add as random jitter.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def func(retries: int) -> float:
|
||||||
|
interval = base * multiplier**retries # Exponential backoff
|
||||||
|
interval = min(cap, interval) # Cap backoff
|
||||||
|
interval += random() * interval * jitter # Add jitter
|
||||||
|
return interval
|
||||||
|
|
||||||
|
return func
|
||||||
|
|
|
@ -15,7 +15,12 @@ from ansible.module_utils.common.validation import (
|
||||||
check_required_one_of,
|
check_required_one_of,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .client import ClientException, client_check_required_lib, client_get_by_name_or_id
|
from .client import (
|
||||||
|
ClientException,
|
||||||
|
client_check_required_lib,
|
||||||
|
client_get_by_name_or_id,
|
||||||
|
exponential_backoff_poll_interval,
|
||||||
|
)
|
||||||
from .vendor.hcloud import APIException, Client, HCloudException
|
from .vendor.hcloud import APIException, Client, HCloudException
|
||||||
from .vendor.hcloud.actions import ActionException
|
from .vendor.hcloud.actions import ActionException
|
||||||
from .version import version
|
from .version import version
|
||||||
|
@ -81,6 +86,9 @@ class AnsibleHCloud:
|
||||||
api_endpoint=self.module.params["api_endpoint"],
|
api_endpoint=self.module.params["api_endpoint"],
|
||||||
application_name="ansible-module",
|
application_name="ansible-module",
|
||||||
application_version=version,
|
application_version=version,
|
||||||
|
# Total waiting time before timeout is > 117.0
|
||||||
|
poll_interval=exponential_backoff_poll_interval(base=1.0, multiplier=2, cap=5.0, jitter=0.5),
|
||||||
|
poll_max_retries=25,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _client_get_by_name_or_id(self, resource: str, param: str | int):
|
def _client_get_by_name_or_id(self, resource: str, param: str | int):
|
||||||
|
|
|
@ -204,7 +204,7 @@ class AnsibleHCloudCertificate(AnsibleHCloud):
|
||||||
resp = self.client.certificates.create_managed(**params)
|
resp = self.client.certificates.create_managed(**params)
|
||||||
# Action should take 60 to 90 seconds on average, wait for 5m to
|
# Action should take 60 to 90 seconds on average, wait for 5m to
|
||||||
# allow DNS or Let's Encrypt slowdowns.
|
# allow DNS or Let's Encrypt slowdowns.
|
||||||
resp.action.wait_until_finished(max_retries=300)
|
resp.action.wait_until_finished(max_retries=62) # 62 retries >= 302 seconds
|
||||||
except HCloudException as exception:
|
except HCloudException as exception:
|
||||||
self.fail_json_hcloud(exception)
|
self.fail_json_hcloud(exception)
|
||||||
|
|
||||||
|
|
|
@ -471,7 +471,7 @@ class AnsibleHCloudServer(AnsibleHCloud):
|
||||||
self.result["root_password"] = resp.root_password
|
self.result["root_password"] = resp.root_password
|
||||||
# Action should take 60 to 90 seconds on average, but can be >10m when creating a
|
# Action should take 60 to 90 seconds on average, but can be >10m when creating a
|
||||||
# server from a custom images
|
# server from a custom images
|
||||||
resp.action.wait_until_finished(max_retries=1800)
|
resp.action.wait_until_finished(max_retries=362) # 362 retries >= 1802 seconds
|
||||||
for action in resp.next_actions:
|
for action in resp.next_actions:
|
||||||
action.wait_until_finished()
|
action.wait_until_finished()
|
||||||
|
|
||||||
|
@ -671,17 +671,18 @@ class AnsibleHCloudServer(AnsibleHCloud):
|
||||||
|
|
||||||
self.stop_server_if_forced()
|
self.stop_server_if_forced()
|
||||||
|
|
||||||
upgrade_disk = self.module.params.get("upgrade_disk")
|
|
||||||
# Upgrading a server takes 160 seconds on average, upgrading the disk should
|
|
||||||
# take more time
|
|
||||||
upgrade_timeout = 600 if upgrade_disk else 180
|
|
||||||
|
|
||||||
if not self.module.check_mode:
|
if not self.module.check_mode:
|
||||||
|
upgrade_disk = self.module.params.get("upgrade_disk")
|
||||||
|
|
||||||
action = self.hcloud_server.change_type(
|
action = self.hcloud_server.change_type(
|
||||||
server_type=self._get_server_type(),
|
server_type=self._get_server_type(),
|
||||||
upgrade_disk=upgrade_disk,
|
upgrade_disk=upgrade_disk,
|
||||||
)
|
)
|
||||||
action.wait_until_finished(max_retries=upgrade_timeout)
|
# Upgrading a server takes 160 seconds on average, upgrading the disk should
|
||||||
|
# take more time
|
||||||
|
# 122 retries >= 602 seconds
|
||||||
|
# 38 retries >= 182 seconds
|
||||||
|
action.wait_until_finished(max_retries=122 if upgrade_disk else 38)
|
||||||
self._mark_as_changed()
|
self._mark_as_changed()
|
||||||
|
|
||||||
def _update_server_ip(self, kind: Literal["ipv4", "ipv6"]) -> None:
|
def _update_server_ip(self, kind: Literal["ipv4", "ipv6"]) -> None:
|
||||||
|
@ -867,9 +868,9 @@ class AnsibleHCloudServer(AnsibleHCloud):
|
||||||
try:
|
try:
|
||||||
if not self.module.check_mode:
|
if not self.module.check_mode:
|
||||||
image = self._get_image(self.hcloud_server.server_type)
|
image = self._get_image(self.hcloud_server.server_type)
|
||||||
# When we rebuild the server progress takes some more time.
|
|
||||||
resp = self.client.servers.rebuild(self.hcloud_server, image)
|
resp = self.client.servers.rebuild(self.hcloud_server, image)
|
||||||
resp.action.wait_until_finished(1000)
|
# When we rebuild the server progress takes some more time.
|
||||||
|
resp.action.wait_until_finished(max_retries=202) # 202 retries >= 1002 seconds
|
||||||
self._mark_as_changed()
|
self._mark_as_changed()
|
||||||
|
|
||||||
self._get_server()
|
self._get_server()
|
||||||
|
|
14
tests/unit/module_utils/test_client.py
Normal file
14
tests/unit/module_utils/test_client.py
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from ansible_collections.hetzner.hcloud.plugins.module_utils.client import (
|
||||||
|
exponential_backoff_poll_interval,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_exponential_backoff_poll_interval():
|
||||||
|
poll_interval = exponential_backoff_poll_interval(base=1.0, multiplier=2, cap=5.0, jitter=0.0)
|
||||||
|
poll_max_retries = 25
|
||||||
|
|
||||||
|
results = [poll_interval(i) for i in range(poll_max_retries)]
|
||||||
|
assert sum(results) == 117.0
|
||||||
|
assert results[:6] == [1.0, 2.0, 4.0, 5.0, 5.0, 5.0]
|
Loading…
Reference in a new issue