mirror of
https://github.com/prometheus-community/ansible
synced 2024-11-21 19:33:04 +00:00
Add nvidia_gpu_exporter
Signed-off-by: Jack <jack4zhang@gmail.com>
This commit is contained in:
parent
e2d8b745dc
commit
5e6c551656
18 changed files with 483 additions and 0 deletions
54
roles/nvidia_gpu_exporter/README.md
Normal file
54
roles/nvidia_gpu_exporter/README.md
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
<p><img src="https://www.circonus.com/wp-content/uploads/2015/03/sol-icon-itOps.png" alt="graph logo" title="graph" align="right" height="60" /></p>
|
||||||
|
|
||||||
|
# Ansible Role: Nvidia GPU exporter
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Deploy prometheus [Nvidia GPU exporter ](https://github.com/utkuozdemir/nvidia_gpu_exporter) using ansible.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- Ansible >= 2.9 (It might work on previous versions, but we cannot guarantee it)
|
||||||
|
- gnu-tar on Mac deployer host (`brew install gnu-tar`)
|
||||||
|
- Passlib is required when using the basic authentication feature (`pip install passlib[bcrypt]`)
|
||||||
|
|
||||||
|
## Role Variables
|
||||||
|
|
||||||
|
All variables which can be overridden are stored in [defaults/main.yml](defaults/main.yml) file as well as in [meta/argument_specs.yml](meta/argument_specs.yml).
|
||||||
|
Please refer to the [collection docs](https://prometheus-community.github.io/ansible/branch/main/nvidia_gpu_exporter_role.html) for description and default values of the variables.
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
### Playbook
|
||||||
|
|
||||||
|
Use it in a playbook as follows:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- hosts: all
|
||||||
|
roles:
|
||||||
|
- prometheus.prometheus.nvidia_gpu_exporter
|
||||||
|
```
|
||||||
|
|
||||||
|
### Demo site
|
||||||
|
|
||||||
|
We provide an example site that demonstrates a full monitoring solution based on prometheus and grafana. The repository with code and links to running instances is [available on github](https://github.com/prometheus/demo-site) and the site is hosted on [DigitalOcean](https://digitalocean.com).
|
||||||
|
|
||||||
|
## Local Testing
|
||||||
|
|
||||||
|
The preferred way of locally testing the role is to use Docker and [molecule](https://github.com/ansible-community/molecule) (v3.x). You will have to install Docker on your system. See "Get started" for a Docker package suitable for your system. Running your tests is as simple as executing `molecule test`.
|
||||||
|
|
||||||
|
## Continuous Integration
|
||||||
|
|
||||||
|
Combining molecule and circle CI allows us to test how new PRs will behave when used with multiple ansible versions and multiple operating systems. This also allows use to create test scenarios for different role configurations. As a result we have quite a large test matrix which can take more time than local testing, so please be patient.
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
See [contributor guideline](CONTRIBUTING.md).
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
See [troubleshooting](TROUBLESHOOTING.md).
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
This project is licensed under MIT License. See [LICENSE](/LICENSE) for more details.
|
16
roles/nvidia_gpu_exporter/defaults/main.yml
Normal file
16
roles/nvidia_gpu_exporter/defaults/main.yml
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
---
|
||||||
|
nvidia_gpu_exporter_version: 1.2.1
|
||||||
|
nvidia_gpu_exporter_binary_url: "https://github.com/{{ _nvidia_gpu_exporter_repo }}/releases/download/v{{ nvidia_gpu_exporter_version }}/\
|
||||||
|
nvidia_gpu_exporter_{{ nvidia_gpu_exporter_version }}_{{ ansible_system | lower }}_{{ _nvidia_gpu_exporter_go_ansible_arch }}.tar.gz"
|
||||||
|
nvidia_gpu_exporter_checksums_url: "https://github.com/{{ _nvidia_gpu_exporter_repo }}/releases/download/v{{ nvidia_gpu_exporter_version }}/checksums.txt"
|
||||||
|
|
||||||
|
nvidia_gpu_exporter_web_listen_address: "0.0.0.0:9835"
|
||||||
|
nvidia_gpu_exporter_web_telemetry_path: "/metrics"
|
||||||
|
|
||||||
|
nvidia_gpu_exporter_binary_install_dir: "/usr/local/bin"
|
||||||
|
nvidia_gpu_exporter_system_group: "nvidia-gpu-exp"
|
||||||
|
nvidia_gpu_exporter_system_user: "{{ nvidia_gpu_exporter_system_group }}"
|
||||||
|
|
||||||
|
nvidia_gpu_exporter_config_dir: "/etc/nvidia_gpu_exporter"
|
||||||
|
# Local path to stash the archive and its extraction
|
||||||
|
nvidia_gpu_exporter_local_cache_path: "/tmp/nvidia_gpu_exporter-{{ ansible_system | lower }}-{{ _nvidia_gpu_exporter_go_ansible_arch }}/{{ nvidia_gpu_exporter_version }}"
|
10
roles/nvidia_gpu_exporter/handlers/main.yml
Normal file
10
roles/nvidia_gpu_exporter/handlers/main.yml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
---
|
||||||
|
- name: Restart nvidia_gpu_exporter
|
||||||
|
listen: "restart nvidia_gpu_exporter"
|
||||||
|
become: true
|
||||||
|
ansible.builtin.systemd:
|
||||||
|
daemon_reload: true
|
||||||
|
name: nvidia_gpu_exporter
|
||||||
|
state: restarted
|
||||||
|
when:
|
||||||
|
- not ansible_check_mode
|
46
roles/nvidia_gpu_exporter/meta/argument_specs.yml
Normal file
46
roles/nvidia_gpu_exporter/meta/argument_specs.yml
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
---
|
||||||
|
# yamllint disable rule:line-length
|
||||||
|
argument_specs:
|
||||||
|
main:
|
||||||
|
short_description: "Prometheus Nvidia GPU Exporter"
|
||||||
|
description:
|
||||||
|
- "Deploy prometheus L(Nvidia GPU exporter,https://github.com/utkuozdemir/nvidia_gpu_exporter) using ansible"
|
||||||
|
author:
|
||||||
|
- "Prometheus Community"
|
||||||
|
options:
|
||||||
|
nvidia_gpu_exporter_version:
|
||||||
|
description: "Nvidia GPU exporter package version. Also accepts latest as parameter."
|
||||||
|
default: "1.2.1"
|
||||||
|
nvidia_gpu_exporter_binary_url:
|
||||||
|
description: "URL of the Nvidia GPU exporter binaries .tar.gz file"
|
||||||
|
default: "https://github.com/{{ _nvidia_gpu_exporter_repo }}/releases/download/v{{ nvidia_gpu_exporter_version }}/nvidia_gpu_exporter_{{ nvidia_gpu_exporter_version }}_{{ ansible_system | lower }}_{{ _nvidia_gpu_exporter_go_ansible_arch }}.tar.gz"
|
||||||
|
nvidia_gpu_exporter_checksums_url:
|
||||||
|
description: "URL of the Nvidia GPU exporter checksums file"
|
||||||
|
default: "https://github.com/{{ _nvidia_gpu_exporter_repo }}/releases/download/v{{ nvidia_gpu_exporter_version }}/sha256sums.txt"
|
||||||
|
nvidia_gpu_exporter_web_listen_address:
|
||||||
|
description: "Address on which Nvidia GPU exporter will listen"
|
||||||
|
default: "0.0.0.0:9835"
|
||||||
|
nvidia_gpu_exporter_web_telemetry_path:
|
||||||
|
description: "Path under which to expose metrics"
|
||||||
|
default: "/metrics"
|
||||||
|
nvidia_gpu_exporter_binary_install_dir:
|
||||||
|
description:
|
||||||
|
- "I(Advanced)"
|
||||||
|
- "Directory to install nvidia_gpu_exporter binary"
|
||||||
|
default: "/usr/local/bin"
|
||||||
|
nvidia_gpu_exporter_system_group:
|
||||||
|
description:
|
||||||
|
- "I(Advanced)"
|
||||||
|
- "System group for Nvidia GPU exporter"
|
||||||
|
default: "nvidia-gpu-exp"
|
||||||
|
nvidia_gpu_exporter_system_user:
|
||||||
|
description:
|
||||||
|
- "I(Advanced)"
|
||||||
|
- "Nvidia GPU exporter user"
|
||||||
|
default: "nvidia-gpu-exp"
|
||||||
|
nvidia_gpu_exporter_local_cache_path:
|
||||||
|
description: "Local path to stash the archive and its extraction"
|
||||||
|
default: "/tmp/nvidia_gpu_exporter-{{ ansible_system | lower }}-{{ _nvidia_gpu_exporter_go_ansible_arch }}/{{ nvidia_gpu_exporter_version }}"
|
||||||
|
nvidia_gpu_exporter_config_dir:
|
||||||
|
description: "Path to directory with nvidia_gpu_exporter configuration"
|
||||||
|
default: "/etc/nvidia_gpu_exporter"
|
25
roles/nvidia_gpu_exporter/meta/main.yml
Normal file
25
roles/nvidia_gpu_exporter/meta/main.yml
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
---
|
||||||
|
galaxy_info:
|
||||||
|
author: "Prometheus Community"
|
||||||
|
description: "Nvidia GPU exporter"
|
||||||
|
license: "Apache"
|
||||||
|
min_ansible_version: "2.9"
|
||||||
|
platforms:
|
||||||
|
- name: "Ubuntu"
|
||||||
|
versions:
|
||||||
|
- "focal"
|
||||||
|
- "jammy"
|
||||||
|
- "noble"
|
||||||
|
- name: "Debian"
|
||||||
|
versions:
|
||||||
|
- "bullseye"
|
||||||
|
- name: "EL"
|
||||||
|
versions:
|
||||||
|
- "8"
|
||||||
|
- "9"
|
||||||
|
galaxy_tags:
|
||||||
|
- "monitoring"
|
||||||
|
- "prometheus"
|
||||||
|
- "exporter"
|
||||||
|
- "metrics"
|
||||||
|
- "system"
|
14
roles/nvidia_gpu_exporter/molecule/alternative/molecule.yml
Normal file
14
roles/nvidia_gpu_exporter/molecule/alternative/molecule.yml
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
---
|
||||||
|
provisioner:
|
||||||
|
playbooks:
|
||||||
|
prepare: "${MOLECULE_PROJECT_DIRECTORY}/../../.config/molecule/alternative/prepare.yml"
|
||||||
|
inventory:
|
||||||
|
group_vars:
|
||||||
|
all:
|
||||||
|
nvidia_gpu_exporter_local_cache_path: "/tmp/nvidia_gpu_exporter-linux_x86_64"
|
||||||
|
nvidia_gpu_exporter_web_listen_address:
|
||||||
|
- '127.0.0.1:9835'
|
||||||
|
- '127.0.1.1:9835'
|
||||||
|
nvidia_gpu_exporter_version: 1.2.1
|
||||||
|
nvidia_gpu_exporter_binary_url: "https://github.com/utkuozdemir/nvidia_gpu_exporter/releases/download/v{{\
|
||||||
|
\ nvidia_gpu_exporter_version }}/nvidia_gpu_exporter_{{ nvidia_gpu_exporter_version }}_linux_x86_64.tar.gz"
|
|
@ -0,0 +1,33 @@
|
||||||
|
from __future__ import (absolute_import, division, print_function)
|
||||||
|
__metaclass__ = type
|
||||||
|
|
||||||
|
from testinfra_helpers import get_target_hosts
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
testinfra_hosts = get_target_hosts()
|
||||||
|
|
||||||
|
|
||||||
|
def test_service(host):
|
||||||
|
s = host.service("nvidia_gpu_exporter")
|
||||||
|
try:
|
||||||
|
assert s.is_running
|
||||||
|
except AssertionError:
|
||||||
|
# Capture service logs
|
||||||
|
journal_output = host.run('journalctl -u nvidia_gpu_exporter --since "1 hour ago"')
|
||||||
|
print("\n==== journalctl -u nvidia_gpu_exporter Output ====\n")
|
||||||
|
print(journal_output)
|
||||||
|
print("\n============================================\n")
|
||||||
|
raise # Re-raise the original assertion error
|
||||||
|
|
||||||
|
|
||||||
|
def test_protecthome_property(host):
|
||||||
|
s = host.service("nvidia_gpu_exporter")
|
||||||
|
p = s.systemd_properties
|
||||||
|
assert p.get("ProtectHome") == "yes"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("sockets", [
|
||||||
|
"tcp://127.0.1.1:9835",
|
||||||
|
])
|
||||||
|
def test_socket(host, sockets):
|
||||||
|
assert host.socket(sockets).is_listening
|
6
roles/nvidia_gpu_exporter/molecule/default/molecule.yml
Normal file
6
roles/nvidia_gpu_exporter/molecule/default/molecule.yml
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
---
|
||||||
|
provisioner:
|
||||||
|
inventory:
|
||||||
|
group_vars:
|
||||||
|
all:
|
||||||
|
nvidia_gpu_exporter_web_listen_address: "127.0.0.1:9835"
|
|
@ -0,0 +1,66 @@
|
||||||
|
from __future__ import (absolute_import, division, print_function)
|
||||||
|
__metaclass__ = type
|
||||||
|
|
||||||
|
from testinfra_helpers import get_target_hosts
|
||||||
|
|
||||||
|
testinfra_hosts = get_target_hosts()
|
||||||
|
|
||||||
|
|
||||||
|
def test_files(host):
|
||||||
|
files = [
|
||||||
|
"/etc/systemd/system/nvidia_gpu_exporter.service",
|
||||||
|
"/usr/local/bin/nvidia_gpu_exporter"
|
||||||
|
]
|
||||||
|
for file in files:
|
||||||
|
f = host.file(file)
|
||||||
|
assert f.exists
|
||||||
|
assert f.is_file
|
||||||
|
|
||||||
|
|
||||||
|
def test_permissions_didnt_change(host):
|
||||||
|
dirs = [
|
||||||
|
"/etc",
|
||||||
|
"/root",
|
||||||
|
"/usr",
|
||||||
|
"/var"
|
||||||
|
]
|
||||||
|
for file in dirs:
|
||||||
|
f = host.file(file)
|
||||||
|
assert f.exists
|
||||||
|
assert f.is_directory
|
||||||
|
assert f.user == "root"
|
||||||
|
assert f.group == "root"
|
||||||
|
|
||||||
|
|
||||||
|
def test_user(host):
|
||||||
|
assert host.group("nvidia-gpu-exp").exists
|
||||||
|
assert "nvidia-gpu-exp" in host.user("nvidia-gpu-exp").groups
|
||||||
|
assert host.user("nvidia-gpu-exp").shell == "/usr/sbin/nologin"
|
||||||
|
|
||||||
|
|
||||||
|
def test_service(host):
|
||||||
|
s = host.service("nvidia_gpu_exporter")
|
||||||
|
try:
|
||||||
|
assert s.is_running
|
||||||
|
except AssertionError:
|
||||||
|
# Capture service logs
|
||||||
|
journal_output = host.run('journalctl -u nvidia_gpu_exporter --since "1 hour ago"')
|
||||||
|
print("\n==== journalctl -u nvidia_gpu_exporter Output ====\n")
|
||||||
|
print(journal_output)
|
||||||
|
print("\n============================================\n")
|
||||||
|
raise # Re-raise the original assertion error
|
||||||
|
|
||||||
|
|
||||||
|
def test_protecthome_property(host):
|
||||||
|
s = host.service("nvidia_gpu_exporter")
|
||||||
|
p = s.systemd_properties
|
||||||
|
assert p.get("ProtectHome") == "yes"
|
||||||
|
|
||||||
|
|
||||||
|
def test_socket(host):
|
||||||
|
sockets = [
|
||||||
|
"tcp://127.0.0.1:9835"
|
||||||
|
]
|
||||||
|
for socket in sockets:
|
||||||
|
s = host.socket(socket)
|
||||||
|
assert s.is_listening
|
6
roles/nvidia_gpu_exporter/molecule/latest/molecule.yml
Normal file
6
roles/nvidia_gpu_exporter/molecule/latest/molecule.yml
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
---
|
||||||
|
provisioner:
|
||||||
|
inventory:
|
||||||
|
group_vars:
|
||||||
|
all:
|
||||||
|
nvidia_gpu_exporter_version: latest
|
|
@ -0,0 +1,41 @@
|
||||||
|
from __future__ import (absolute_import, division, print_function)
|
||||||
|
__metaclass__ = type
|
||||||
|
|
||||||
|
from testinfra_helpers import get_target_hosts
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
testinfra_hosts = get_target_hosts()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("files", [
|
||||||
|
"/etc/systemd/system/nvidia_gpu_exporter.service",
|
||||||
|
"/usr/local/bin/nvidia_gpu_exporter"
|
||||||
|
])
|
||||||
|
def test_files(host, files):
|
||||||
|
f = host.file(files)
|
||||||
|
assert f.exists
|
||||||
|
assert f.is_file
|
||||||
|
|
||||||
|
|
||||||
|
def test_service(host):
|
||||||
|
s = host.service("nvidia_gpu_exporter")
|
||||||
|
try:
|
||||||
|
assert s.is_running
|
||||||
|
except AssertionError:
|
||||||
|
# Capture service logs
|
||||||
|
journal_output = host.run('journalctl -u nvidia_gpu_exporter --since "1 hour ago"')
|
||||||
|
print("\n==== journalctl -u nvidia_gpu_exporter Output ====\n")
|
||||||
|
print(journal_output)
|
||||||
|
print("\n============================================\n")
|
||||||
|
raise # Re-raise the original assertion error
|
||||||
|
|
||||||
|
|
||||||
|
def test_protecthome_property(host):
|
||||||
|
s = host.service("nvidia_gpu_exporter")
|
||||||
|
p = s.systemd_properties
|
||||||
|
assert p.get("ProtectHome") == "yes"
|
||||||
|
|
||||||
|
|
||||||
|
def test_socket(host):
|
||||||
|
s = host.socket("tcp://0.0.0.0:9835")
|
||||||
|
assert s.is_listening
|
69
roles/nvidia_gpu_exporter/tasks/main.yml
Normal file
69
roles/nvidia_gpu_exporter/tasks/main.yml
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
---
|
||||||
|
- name: Preflight
|
||||||
|
ansible.builtin.include_tasks:
|
||||||
|
file: preflight.yml
|
||||||
|
tags:
|
||||||
|
- nvidia_gpu_exporter
|
||||||
|
- install
|
||||||
|
- configure
|
||||||
|
- run
|
||||||
|
- nvidia_gpu_exporter_install
|
||||||
|
- nvidia_gpu_exporter_configure
|
||||||
|
- nvidia_gpu_exporter_run
|
||||||
|
|
||||||
|
- name: Install
|
||||||
|
ansible.builtin.include_role:
|
||||||
|
name: prometheus.prometheus._common
|
||||||
|
tasks_from: install.yml
|
||||||
|
vars:
|
||||||
|
_common_local_cache_path: "{{ nvidia_gpu_exporter_local_cache_path }}"
|
||||||
|
_common_binaries: "{{ _nvidia_gpu_exporter_binaries }}"
|
||||||
|
_common_binary_install_dir: "{{ nvidia_gpu_exporter_binary_install_dir }}"
|
||||||
|
_common_binary_url: "{{ nvidia_gpu_exporter_binary_url }}"
|
||||||
|
_common_checksums_url: "{{ nvidia_gpu_exporter_checksums_url }}"
|
||||||
|
_common_system_group: "{{ nvidia_gpu_exporter_system_group }}"
|
||||||
|
_common_system_user: "{{ nvidia_gpu_exporter_system_user }}"
|
||||||
|
_common_config_dir: "{{ nvidia_gpu_exporter_config_dir }}"
|
||||||
|
tags:
|
||||||
|
- nvidia_gpu_exporter
|
||||||
|
- install
|
||||||
|
- nvidia_gpu_exporter_install
|
||||||
|
|
||||||
|
- name: SELinux
|
||||||
|
ansible.builtin.include_role:
|
||||||
|
name: prometheus.prometheus._common
|
||||||
|
tasks_from: selinux.yml
|
||||||
|
vars:
|
||||||
|
_common_selinux_port: "{{ nvidia_gpu_exporter_web_listen_address | urlsplit('port') }}"
|
||||||
|
when: ansible_selinux.status == "enabled"
|
||||||
|
tags:
|
||||||
|
- nvidia_gpu_exporter
|
||||||
|
- configure
|
||||||
|
- nvidia_gpu_exporter_configure
|
||||||
|
|
||||||
|
- name: Configure
|
||||||
|
ansible.builtin.include_role:
|
||||||
|
name: prometheus.prometheus._common
|
||||||
|
tasks_from: configure.yml
|
||||||
|
vars:
|
||||||
|
_common_system_user: "{{ nvidia_gpu_exporter_system_user }}"
|
||||||
|
_common_system_group: "{{ nvidia_gpu_exporter_system_group }}"
|
||||||
|
_common_config_dir: "{{ nvidia_gpu_exporter_config_dir }}"
|
||||||
|
tags:
|
||||||
|
- nvidia_gpu_exporter
|
||||||
|
- configure
|
||||||
|
- nvidia_gpu_exporter_configure
|
||||||
|
|
||||||
|
- name: Ensure Nvidia GPU Exporter is enabled on boot
|
||||||
|
become: true
|
||||||
|
ansible.builtin.systemd:
|
||||||
|
daemon_reload: true
|
||||||
|
name: nvidia_gpu_exporter
|
||||||
|
enabled: true
|
||||||
|
state: started
|
||||||
|
when:
|
||||||
|
- not ansible_check_mode
|
||||||
|
tags:
|
||||||
|
- nvidia_gpu_exporter
|
||||||
|
- run
|
||||||
|
- nvidia_gpu_exporter_run
|
31
roles/nvidia_gpu_exporter/tasks/preflight.yml
Normal file
31
roles/nvidia_gpu_exporter/tasks/preflight.yml
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
---
|
||||||
|
- name: Common preflight
|
||||||
|
ansible.builtin.include_role:
|
||||||
|
name: prometheus.prometheus._common
|
||||||
|
tasks_from: preflight.yml
|
||||||
|
|
||||||
|
- name: Naive assertion of proper listen address
|
||||||
|
ansible.builtin.assert:
|
||||||
|
that:
|
||||||
|
- >-
|
||||||
|
[nvidia_gpu_exporter_web_listen_address] |
|
||||||
|
flatten |
|
||||||
|
reject('match', '.+:\\d+$') |
|
||||||
|
list |
|
||||||
|
length == 0
|
||||||
|
|
||||||
|
- name: Discover latest version
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
nvidia_gpu_exporter_version: "{{ (lookup('url', 'https://api.github.com/repos/{{ _nvidia_gpu_exporter_repo }}/releases/latest', headers=_nvidia_gpu_exporter_github_api_headers,
|
||||||
|
split_lines=False) | from_json).get('tag_name') | replace('v', '') }}"
|
||||||
|
run_once: true
|
||||||
|
until: nvidia_gpu_exporter_version is version('0.0.0', '>=')
|
||||||
|
retries: 10
|
||||||
|
when:
|
||||||
|
- nvidia_gpu_exporter_version == "latest"
|
||||||
|
tags:
|
||||||
|
- nvidia_gpu_exporter
|
||||||
|
- install
|
||||||
|
- nvidia_gpu_exporter_install
|
||||||
|
- download
|
||||||
|
- nvidia_gpu_exporter_download
|
|
@ -0,0 +1,45 @@
|
||||||
|
{{ ansible_managed | comment }}
|
||||||
|
|
||||||
|
[Unit]
|
||||||
|
Description=Nvidia GPU Exporter
|
||||||
|
After=network-online.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User={{ nvidia_gpu_exporter_system_user }}
|
||||||
|
Group={{ nvidia_gpu_exporter_system_group }}
|
||||||
|
ExecStart={{ nvidia_gpu_exporter_binary_install_dir }}/nvidia_gpu_exporter \
|
||||||
|
{% if nvidia_gpu_exporter_web_listen_address is iterable and
|
||||||
|
nvidia_gpu_exporter_web_listen_address is not mapping and
|
||||||
|
nvidia_gpu_exporter_web_listen_address is not string %}
|
||||||
|
{% for address in nvidia_gpu_exporter_web_listen_address %}
|
||||||
|
'--web.listen-address={{ address }}' \
|
||||||
|
{% endfor %}
|
||||||
|
{% else %}
|
||||||
|
'--web.listen-address={{ nvidia_gpu_exporter_web_listen_address }}' \
|
||||||
|
{% endif %}
|
||||||
|
'--web.telemetry-path={{ nvidia_gpu_exporter_web_telemetry_path }}'
|
||||||
|
|
||||||
|
SyslogIdentifier=nvidia_gpu_exporter
|
||||||
|
Restart=always
|
||||||
|
RestartSec=1
|
||||||
|
StartLimitInterval=0
|
||||||
|
|
||||||
|
{% set ns = namespace(protect_home = 'yes') %}
|
||||||
|
{% for m in ansible_mounts if m.mount.startswith('/home') %}
|
||||||
|
{% set ns.protect_home = 'read-only' %}
|
||||||
|
{% endfor %}
|
||||||
|
ProtectHome={{ ns.protect_home }}
|
||||||
|
NoNewPrivileges=yes
|
||||||
|
|
||||||
|
{% if (ansible_facts.packages.systemd | first).version is version('232', '>=') %}
|
||||||
|
ProtectSystem=strict
|
||||||
|
ProtectControlGroups=true
|
||||||
|
ProtectKernelModules=true
|
||||||
|
ProtectKernelTunables=yes
|
||||||
|
{% else %}
|
||||||
|
ProtectSystem=full
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
9
roles/nvidia_gpu_exporter/vars/main.yml
Normal file
9
roles/nvidia_gpu_exporter/vars/main.yml
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
---
|
||||||
|
_nvidia_gpu_exporter_go_ansible_arch: "{{ {'i386': 'i386',
|
||||||
|
'x86_64': 'x86_64',
|
||||||
|
'aarch64': 'arm64',
|
||||||
|
'armv7l': 'armv7',
|
||||||
|
'armv6l': 'armv6'}.get(ansible_architecture, ansible_architecture) }}"
|
||||||
|
_nvidia_gpu_exporter_repo: "utkuozdemir/nvidia_gpu_exporter"
|
||||||
|
_nvidia_gpu_exporter_github_api_headers: "{{ {'GITHUB_TOKEN': lookup('ansible.builtin.env', 'GITHUB_TOKEN')} if (lookup('ansible.builtin.env', 'GITHUB_TOKEN')) else {} }}"
|
||||||
|
_nvidia_gpu_exporter_binaries: ['nvidia_gpu_exporter']
|
|
@ -0,0 +1,4 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
collection_root=$(pwd | grep -oP ".+\/ansible_collections\/\w+?\/\w+")
|
||||||
|
source "$collection_root/tests/integration/molecule.sh"
|
4
tests/integration/targets/molecule-nvidia_gpu_exporter-default/runme.sh
Executable file
4
tests/integration/targets/molecule-nvidia_gpu_exporter-default/runme.sh
Executable file
|
@ -0,0 +1,4 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
collection_root=$(pwd | grep -oP ".+\/ansible_collections\/\w+?\/\w+")
|
||||||
|
source "$collection_root/tests/integration/molecule.sh"
|
4
tests/integration/targets/molecule-nvidia_gpu_exporter-latest/runme.sh
Executable file
4
tests/integration/targets/molecule-nvidia_gpu_exporter-latest/runme.sh
Executable file
|
@ -0,0 +1,4 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
collection_root=$(pwd | grep -oP ".+\/ansible_collections\/\w+?\/\w+")
|
||||||
|
source "$collection_root/tests/integration/molecule.sh"
|
Loading…
Reference in a new issue