From 19e586fa22708348eca056377d9b8c51401c7cbb Mon Sep 17 00:00:00 2001 From: Jonas L Date: Thu, 4 Jul 2024 15:07:05 +0200 Subject: [PATCH] feat: use exponential backoff algorithm when polling actions (#524) ##### SUMMARY Replace the constant poll interval of 1 second, with a truncated exponential back off algorithm with jitter. Below is a suite of poll interval (in seconds) generated by the new algorithm: ``` 1.49 2.14 5.46 6.51 6.57 5.57 5.98 7.13 6.59 7.10 5.54 5.03 6.56 5.96 6.72 7.21 7.05 5.31 5.60 6.33 6.82 5.42 6.08 6.60 TOTAL: 140.77 ``` --- .../exponential-actions-polling-interval.yml | 2 ++ plugins/module_utils/client.py | 20 +++++++++++++++++++ plugins/module_utils/hcloud.py | 10 +++++++++- plugins/modules/certificate.py | 2 +- plugins/modules/server.py | 19 +++++++++--------- tests/unit/module_utils/test_client.py | 14 +++++++++++++ 6 files changed, 56 insertions(+), 11 deletions(-) create mode 100644 changelogs/fragments/exponential-actions-polling-interval.yml create mode 100644 tests/unit/module_utils/test_client.py diff --git a/changelogs/fragments/exponential-actions-polling-interval.yml b/changelogs/fragments/exponential-actions-polling-interval.yml new file mode 100644 index 0000000..bcb32c9 --- /dev/null +++ b/changelogs/fragments/exponential-actions-polling-interval.yml @@ -0,0 +1,2 @@ +minor_changes: + - Use a truncated exponential backoff algorithm when polling actions from the API. diff --git a/plugins/module_utils/client.py b/plugins/module_utils/client.py index d82a630..7301d8a 100644 --- a/plugins/module_utils/client.py +++ b/plugins/module_utils/client.py @@ -3,6 +3,7 @@ from __future__ import annotations from contextlib import contextmanager +from random import random from ansible.module_utils.basic import missing_required_lib @@ -106,3 +107,22 @@ class Client(ClientBase): yield finally: self._requests_session = requests.Session() + + +def exponential_backoff_poll_interval(*, base: float, multiplier: int, cap: float, jitter: float): + """ + Return a poll interval function, implementing a truncated exponential backoff with jitter. + + :param base: Base for the exponential backoff algorithm. + :param multiplier: Multiplier for the exponential backoff algorithm. + :param cap: Value at which the interval is truncated. + :param jitter: Proportion of the interval to add as random jitter. + """ + + def func(retries: int) -> float: + interval = base * multiplier**retries # Exponential backoff + interval = min(cap, interval) # Cap backoff + interval += random() * interval * jitter # Add jitter + return interval + + return func diff --git a/plugins/module_utils/hcloud.py b/plugins/module_utils/hcloud.py index 6039130..f8f410c 100644 --- a/plugins/module_utils/hcloud.py +++ b/plugins/module_utils/hcloud.py @@ -15,7 +15,12 @@ from ansible.module_utils.common.validation import ( check_required_one_of, ) -from .client import ClientException, client_check_required_lib, client_get_by_name_or_id +from .client import ( + ClientException, + client_check_required_lib, + client_get_by_name_or_id, + exponential_backoff_poll_interval, +) from .vendor.hcloud import APIException, Client, HCloudException from .vendor.hcloud.actions import ActionException from .version import version @@ -81,6 +86,9 @@ class AnsibleHCloud: api_endpoint=self.module.params["api_endpoint"], application_name="ansible-module", application_version=version, + # Total waiting time before timeout is > 117.0 + poll_interval=exponential_backoff_poll_interval(base=1.0, multiplier=2, cap=5.0, jitter=0.5), + poll_max_retries=25, ) def _client_get_by_name_or_id(self, resource: str, param: str | int): diff --git a/plugins/modules/certificate.py b/plugins/modules/certificate.py index 53466e4..353191c 100644 --- a/plugins/modules/certificate.py +++ b/plugins/modules/certificate.py @@ -204,7 +204,7 @@ class AnsibleHCloudCertificate(AnsibleHCloud): resp = self.client.certificates.create_managed(**params) # Action should take 60 to 90 seconds on average, wait for 5m to # allow DNS or Let's Encrypt slowdowns. - resp.action.wait_until_finished(max_retries=300) + resp.action.wait_until_finished(max_retries=62) # 62 retries >= 302 seconds except HCloudException as exception: self.fail_json_hcloud(exception) diff --git a/plugins/modules/server.py b/plugins/modules/server.py index 765b358..eb623e1 100644 --- a/plugins/modules/server.py +++ b/plugins/modules/server.py @@ -471,7 +471,7 @@ class AnsibleHCloudServer(AnsibleHCloud): self.result["root_password"] = resp.root_password # Action should take 60 to 90 seconds on average, but can be >10m when creating a # server from a custom images - resp.action.wait_until_finished(max_retries=1800) + resp.action.wait_until_finished(max_retries=362) # 362 retries >= 1802 seconds for action in resp.next_actions: action.wait_until_finished() @@ -671,17 +671,18 @@ class AnsibleHCloudServer(AnsibleHCloud): self.stop_server_if_forced() - upgrade_disk = self.module.params.get("upgrade_disk") - # Upgrading a server takes 160 seconds on average, upgrading the disk should - # take more time - upgrade_timeout = 600 if upgrade_disk else 180 - if not self.module.check_mode: + upgrade_disk = self.module.params.get("upgrade_disk") + action = self.hcloud_server.change_type( server_type=self._get_server_type(), upgrade_disk=upgrade_disk, ) - action.wait_until_finished(max_retries=upgrade_timeout) + # Upgrading a server takes 160 seconds on average, upgrading the disk should + # take more time + # 122 retries >= 602 seconds + # 38 retries >= 182 seconds + action.wait_until_finished(max_retries=122 if upgrade_disk else 38) self._mark_as_changed() def _update_server_ip(self, kind: Literal["ipv4", "ipv6"]) -> None: @@ -867,9 +868,9 @@ class AnsibleHCloudServer(AnsibleHCloud): try: if not self.module.check_mode: image = self._get_image(self.hcloud_server.server_type) - # When we rebuild the server progress takes some more time. resp = self.client.servers.rebuild(self.hcloud_server, image) - resp.action.wait_until_finished(1000) + # When we rebuild the server progress takes some more time. + resp.action.wait_until_finished(max_retries=202) # 202 retries >= 1002 seconds self._mark_as_changed() self._get_server() diff --git a/tests/unit/module_utils/test_client.py b/tests/unit/module_utils/test_client.py new file mode 100644 index 0000000..6988f6b --- /dev/null +++ b/tests/unit/module_utils/test_client.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from ansible_collections.hetzner.hcloud.plugins.module_utils.client import ( + exponential_backoff_poll_interval, +) + + +def test_exponential_backoff_poll_interval(): + poll_interval = exponential_backoff_poll_interval(base=1.0, multiplier=2, cap=5.0, jitter=0.0) + poll_max_retries = 25 + + results = [poll_interval(i) for i in range(poll_max_retries)] + assert sum(results) == 117.0 + assert results[:6] == [1.0, 2.0, 4.0, 5.0, 5.0, 5.0]