feat: Add cadvisor role

Add a role to deploy https://github.com/google/cadvisor.

Signed-off-by: SuperQ <superq@gmail.com>
This commit is contained in:
SuperQ 2024-01-07 15:00:38 +01:00
parent 6b1847fb23
commit 9e8cdccba6
No known key found for this signature in database
GPG key ID: C646B23C9E3245F1
24 changed files with 639 additions and 0 deletions

53
roles/cadvisor/README.md Normal file
View file

@ -0,0 +1,53 @@
<p><img src="https://www.circonus.com/wp-content/uploads/2015/03/sol-icon-itOps.png" alt="graph logo" title="graph" align="right" height="60" /></p>
# Ansible Role: cadvisor
## Description
Deploy [cadvisor](https://github.com/google/cadvisor) using ansible.
## Requirements
- Ansible >= 2.9 (It might work on previous versions, but we cannot guarantee it)
- gnu-tar on Mac deployer host (`brew install gnu-tar`)
- Passlib is required when using the basic authentication feature (`pip install passlib[bcrypt]`)
## Role Variables
All variables which can be overridden are stored in [defaults/main.yml](defaults/main.yml) file as well as in [meta/argument_specs.yml](meta/argument_specs.yml).
Please refer to the [collection docs](https://prometheus-community.github.io/ansible/branch/main/cadvisor_role.html) for description and default values of the variables.
## Example
### Playbook
Use it in a playbook as follows:
```yaml
- hosts: all
roles:
- prometheus.prometheus.cadvisor
```
### Demo site
We provide an example site that demonstrates a full monitoring solution based on prometheus and grafana. The repository with code and links to running instances is [available on github](https://github.com/superq/demo-site) and the site is hosted on [DigitalOcean](https://digitalocean.com).
## Local Testing
The preferred way of locally testing the role is to use Docker and [molecule](https://github.com/ansible-community/molecule) (v3.x). You will have to install Docker on your system. See "Get started" for a Docker package suitable for your system. Running your tests is as simple as executing `molecule test`.
## Continuous Integration
Combining molecule and circle CI allows us to test how new PRs will behave when used with multiple ansible versions and multiple operating systems. This also allows use to create test scenarios for different role configurations. As a result we have quite a large test matrix which can take more time than local testing, so please be patient.
## Contributing
See [contributor guideline](CONTRIBUTING.md).
## Troubleshooting
See [troubleshooting](TROUBLESHOOTING.md).
## License
This project is licensed under MIT License. See [LICENSE](/LICENSE) for more details.

View file

@ -0,0 +1,20 @@
# Troubleshooting
## Bad requests (HTTP 400)
This role downloads checksums from the Github project to verify the integrity of artifacts installed on your servers. When downloading the checksums, a "bad request" error might occur.
This happens in environments which (knowningly or unknowling) use the [netrc mechanism](https://www.gnu.org/software/inetutils/manual/html_node/The-_002enetrc-file.html) to auto-login into servers.
Unless netrc is needed by your playbook and ansible roles, please unset the var like so:
```
$ NETRC= ansible-playbook ...
```
Or:
```
$ export NETRC=
$ ansible-playbook ...
```

View file

@ -0,0 +1,14 @@
---
cadvisor_version: 0.47.2
cadvisor_binary_local_dir: ""
cadvisor_binary_url: "https://github.com/{{ _cadvisor_repo }}/releases/download/v{{ cadvisor_version }}/\
cadvisor-v{{ cadvisor_version }}-linux-{{ go_arch }}"
cadvisor_skip_install: false
cadvisor_listen_ip: "0.0.0.0"
cadvisor_port: "8080"
cadvisor_prometheus_endpoint: "/metrics"
cadvisor_binary_install_dir: "/usr/local/bin"
cadvisor_system_group: "root"
cadvisor_system_user: "{{ cadvisor_system_group }}"

View file

@ -0,0 +1,10 @@
---
- name: Restart cadvisor
listen: "restart cadvisor"
become: true
ansible.builtin.systemd:
daemon_reload: true
name: cadvisor
state: restarted
when:
- not ansible_check_mode

View file

@ -0,0 +1,49 @@
---
# yamllint disable rule:line-length
argument_specs:
main:
short_description: "cAdvisor"
description:
- "Deploy L(cadvisor,https://github.com/google/cadvisor) using ansible"
author:
- "Prometheus Community"
options:
cadvisor_version:
description: "cAdvisor package version. Also accepts latest as parameter."
default: "0.47.2"
cadvisor_skip_install:
description: "cAdvisor installation tasks gets skipped when set to true."
type: bool
default: false
cadvisor_binary_local_dir:
description:
- "Enables the use of local packages instead of those distributed on github."
- "The parameter may be set to a directory where the C(cadvisor) binary is stored on the host where ansible is run."
- "This overrides the I(cadvisor_version) parameter"
cadvisor_binary_url:
description: "URL of the cadvisor binary file"
default: "https://github.com/{{ _cadvisor_repo }}/releases/download/v{{ cadvisor_version }}/cadvisor-{{ cadvisor_version }}-linux-{{ go_arch }}"
cadvisor_listen_ip:
description: "Address on which cadvisor will listen"
default: "0.0.0.0"
cadvisor_port:
description: "Port on which cadvisor will listen"
default: "8080"
cadvisor_prometheus_endpoint:
description: "Path under which to expose metrics"
default: "/metrics"
cadvisor_binary_install_dir:
description:
- "I(Advanced)"
- "Directory to install cadvisor binary"
default: "/usr/local/bin"
cadvisor_system_group:
description:
- "I(Advanced)"
- "System group for cadvisor"
default: "root"
cadvisor_system_user:
description:
- "I(Advanced)"
- "cAdvisor user"
default: "root"

View file

@ -0,0 +1,30 @@
---
galaxy_info:
author: "cAdvisor Authors"
description: "cAdvisor"
license: "Apache"
min_ansible_version: "2.9"
platforms:
- name: "Ubuntu"
versions:
- "focal"
- "jammy"
- name: "Debian"
versions:
- "bullseye"
- "buster"
- name: "EL"
versions:
- "7"
- "8"
- "9"
- name: "Fedora"
versions:
- "37"
- '38'
galaxy_tags:
- "monitoring"
- "prometheus"
- "exporter"
- "metrics"
- "system"

View file

@ -0,0 +1,9 @@
---
provisioner:
inventory:
group_vars:
all:
cadvisor_binary_local_dir: "/tmp"
cadvisor_port: "8000"
go_arch: amd64
cadvisor_version: 0.47.0

View file

@ -0,0 +1,24 @@
---
- name: Run local preparation
hosts: localhost
gather_facts: false
tasks:
- name: Download cadvisor binary to local folder
become: false
ansible.builtin.get_url:
url: "https://github.com/google/cadvisor/releases/download/v{{ cadvisor_version }}/cadvisor-v{{ cadvisor_version }}-linux-{{ go_arch }}"
dest: "/tmp/cadvisor-v{{ cadvisor_version }}-linux-{{ go_arch }}"
mode: 0644
register: _download_binary
until: _download_binary is succeeded
retries: 5
delay: 2
check_mode: false
- name: Link to cadvisor binaries directory
become: false
ansible.builtin.file:
src: "/tmp/cadvisor-v{{ cadvisor_version }}-linux-{{ go_arch }}"
dest: "/tmp/cadvisor"
state: link
check_mode: false

View file

@ -0,0 +1,35 @@
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
import os
import testinfra.utils.ansible_runner
import pytest
testinfra_hosts = testinfra.utils.ansible_runner.AnsibleRunner(
os.environ['MOLECULE_INVENTORY_FILE']).get_hosts('all')
def test_service(host):
s = host.service("cadvisor")
try:
assert s.is_running
except AssertionError:
# Capture service logs
journal_output = host.run('journalctl -u cadvisor --since "1 hour ago"')
print("\n==== journalctl -u cadvisor Output ====\n")
print(journal_output)
print("\n============================================\n")
raise # Re-raise the original assertion error
def test_protecthome_property(host):
s = host.service("cadvisor")
p = s.systemd_properties
assert p.get("ProtectHome") == "yes"
@pytest.mark.parametrize("sockets", [
"tcp://127.0.0.1:8000",
])
def test_socket(host, sockets):
assert host.socket(sockets).is_listening

View file

@ -0,0 +1,6 @@
---
provisioner:
inventory:
group_vars:
all:
cadvisor_port: "8080"

View file

@ -0,0 +1,69 @@
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
import os
import testinfra.utils.ansible_runner
testinfra_hosts = testinfra.utils.ansible_runner.AnsibleRunner(
os.environ['MOLECULE_INVENTORY_FILE']).get_hosts('all')
def test_files(host):
files = [
"/etc/systemd/system/cadvisor.service",
"/usr/local/bin/cadvisor"
]
for file in files:
f = host.file(file)
assert f.exists
assert f.is_file
def test_permissions_didnt_change(host):
dirs = [
"/etc",
"/root",
"/usr",
"/var"
]
for file in dirs:
f = host.file(file)
assert f.exists
assert f.is_directory
assert f.user == "root"
assert f.group == "root"
def test_user(host):
assert host.group("root").exists
assert "root" in host.user("root").groups
assert host.user("root").shell == "/bin/bash"
assert host.user("root").home == "/root"
def test_service(host):
s = host.service("cadvisor")
try:
assert s.is_running
except AssertionError:
# Capture service logs
journal_output = host.run('journalctl -u cadvisor --since "1 hour ago"')
print("\n==== journalctl -u cadvisor Output ====\n")
print(journal_output)
print("\n============================================\n")
raise # Re-raise the original assertion error
def test_protecthome_property(host):
s = host.service("cadvisor")
p = s.systemd_properties
assert p.get("ProtectHome") == "yes"
def test_socket(host):
sockets = [
"tcp://127.0.0.1:8080"
]
for socket in sockets:
s = host.socket(socket)
assert s.is_listening

View file

@ -0,0 +1,6 @@
---
provisioner:
inventory:
group_vars:
all:
cadvisor_version: latest

View file

@ -0,0 +1,43 @@
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
import os
import testinfra.utils.ansible_runner
import pytest
testinfra_hosts = testinfra.utils.ansible_runner.AnsibleRunner(
os.environ['MOLECULE_INVENTORY_FILE']).get_hosts('all')
@pytest.mark.parametrize("files", [
"/etc/systemd/system/cadvisor.service",
"/usr/local/bin/cadvisor"
])
def test_files(host, files):
f = host.file(files)
assert f.exists
assert f.is_file
def test_service(host):
s = host.service("cadvisor")
try:
assert s.is_running
except AssertionError:
# Capture service logs
journal_output = host.run('journalctl -u cadvisor --since "1 hour ago"')
print("\n==== journalctl -u cadvisor Output ====\n")
print(journal_output)
print("\n============================================\n")
raise # Re-raise the original assertion error
def test_protecthome_property(host):
s = host.service("cadvisor")
p = s.systemd_properties
assert p.get("ProtectHome") == "yes"
def test_socket(host):
s = host.socket("tcp://0.0.0.0:8080")
assert s.is_listening

View file

@ -0,0 +1,19 @@
---
- name: Copy the cadvisor systemd service file
ansible.builtin.template:
src: cadvisor.service.j2
dest: /etc/systemd/system/cadvisor.service
owner: root
group: root
mode: 0644
notify: restart cadvisor
- name: Allow cadvisor port in SELinux on RedHat OS family
community.general.seport:
ports: "{{ cadvisor_port }}"
proto: tcp
setype: http_port_t
state: present
when:
- ansible_version.full is version_compare('2.4', '>=')
- ansible_selinux.status == "enabled"

View file

@ -0,0 +1,59 @@
---
- name: Create the cadvisor group
ansible.builtin.group:
name: "{{ cadvisor_system_group }}"
state: present
system: true
when: cadvisor_system_group != "root"
- name: Create the cadvisor user
ansible.builtin.user:
name: "{{ cadvisor_system_user }}"
groups: "{{ cadvisor_system_group }}"
append: true
shell: /usr/sbin/nologin
system: true
create_home: false
home: /
when: cadvisor_system_user != "root"
- name: Get binary
when:
- cadvisor_binary_local_dir | length == 0
- not cadvisor_skip_install
block:
- name: Download cadvisor binary to local folder
become: false
ansible.builtin.get_url:
url: "{{ cadvisor_binary_url }}"
dest: "/tmp/cadvisor-v{{ cadvisor_version }}-linux-{{ go_arch }}"
mode: '0644'
register: _download_binary
until: _download_binary is succeeded
retries: 5
delay: 2
delegate_to: localhost
check_mode: false
- name: Propagate cadvisor binaries
ansible.builtin.copy:
src: "/tmp/cadvisor-v{{ cadvisor_version }}-linux-{{ go_arch }}"
dest: "{{ cadvisor_binary_install_dir }}/cadvisor"
mode: 0755
owner: root
group: root
notify: restart cadvisor
when: not ansible_check_mode
- name: Propagate locally distributed cadvisor binary
ansible.builtin.copy:
src: "{{ cadvisor_binary_local_dir }}/cadvisor"
dest: "{{ cadvisor_binary_install_dir }}/cadvisor"
mode: 0755
owner: root
group: root
when:
- cadvisor_binary_local_dir | length > 0
- not cadvisor_skip_install
notify: restart cadvisor

View file

@ -0,0 +1,63 @@
---
- name: Preflight
ansible.builtin.include_tasks:
file: preflight.yml
apply:
tags:
- cadvisor_install
- cadvisor_configure
- cadvisor_run
tags:
- cadvisor_install
- cadvisor_configure
- cadvisor_run
- name: Install
ansible.builtin.include_tasks:
file: install.yml
apply:
become: true
tags:
- cadvisor_install
when:
( not __cadvisor_is_installed.stat.exists ) or
( (__cadvisor_current_version_output.stderr_lines | length > 0)
and ((__cadvisor_current_version_output.stderr_lines[0].split(" ")[2] | replace('v', '')) != cadvisor_version) ) or
( (__cadvisor_current_version_output.stdout_lines | length > 0)
and ((__cadvisor_current_version_output.stdout_lines[0].split(" ")[2] | replace('v', '')) != cadvisor_version) ) or
( cadvisor_binary_local_dir | length > 0 )
tags:
- cadvisor_install
- name: SELinux
ansible.builtin.include_tasks:
file: selinux.yml
apply:
become: true
tags:
- cadvisor_configure
when: ansible_selinux.status == "enabled"
tags:
- cadvisor_configure
- name: Configure
ansible.builtin.include_tasks:
file: configure.yml
apply:
become: true
tags:
- cadvisor_configure
tags:
- cadvisor_configure
- name: Ensure cAdvisor is enabled on boot
become: true
ansible.builtin.systemd:
daemon_reload: true
name: cadvisor
enabled: true
state: started
when:
- not ansible_check_mode
tags:
- cadvisor_run

View file

@ -0,0 +1,50 @@
---
- name: Assert usage of systemd as an init system
ansible.builtin.assert:
that: ansible_service_mgr == 'systemd'
msg: "This role only works with systemd"
- name: Install package fact dependencies
become: true
ansible.builtin.package:
name: "{{ _pkg_fact_req }}"
state: present
when: (_pkg_fact_req)
vars:
_pkg_fact_req: "{% if (ansible_pkg_mgr == 'apt') %}\
{{ ('python-apt' if ansible_python_version is version('3', '<') else 'python3-apt') }}
{% else %}\
{% endif %}"
- name: Gather package facts
ansible.builtin.package_facts:
when: "not 'packages' in ansible_facts"
- name: Check if cadvisor is installed
ansible.builtin.stat:
path: "{{ cadvisor_binary_install_dir }}/cadvisor"
register: __cadvisor_is_installed
check_mode: false
tags:
- cadvisor_install
- name: Gather currently installed cadvisor version (if any)
ansible.builtin.command: "{{ cadvisor_binary_install_dir }}/cadvisor --version"
changed_when: false
register: __cadvisor_current_version_output
check_mode: false
when: __cadvisor_is_installed.stat.exists
tags:
- cadvisor_install
- name: Discover latest version
ansible.builtin.set_fact:
cadvisor_version: "{{ (lookup('url', 'https://api.github.com/repos/{{ _cadvisor_repo }}/releases/latest', headers=_github_api_headers,
split_lines=False) | from_json).get('tag_name') | replace('v', '') }}"
run_once: true
until: cadvisor_version is version('0.0.0', '>=')
retries: 10
when:
- cadvisor_version == "latest"
- cadvisor_binary_local_dir | length == 0
- not cadvisor_skip_install

View file

@ -0,0 +1,23 @@
---
- name: Install selinux python packages [RedHat]
ansible.builtin.package:
name: "{{ ['libselinux-python', 'policycoreutils-python']
if ansible_python_version is version('3', '<') else
['python3-libselinux', 'python3-policycoreutils'] }}"
state: present
register: _install_selinux_packages
until: _install_selinux_packages is success
retries: 5
delay: 2
when: ansible_os_family | lower == "redhat"
- name: Install selinux python packages [clearlinux]
ansible.builtin.package:
name: sysadmin-basic
state: present
register: _install_selinux_packages
until: _install_selinux_packages is success
retries: 5
delay: 2
when:
- ansible_distribution | lower == "clearlinux"

View file

@ -0,0 +1,34 @@
{{ ansible_managed | comment }}
[Unit]
Description=cAdvisor cgroup/container metrics server
After=network-online.target
[Service]
Type=simple
User={{ cadvisor_system_user }}
Group={{ cadvisor_system_group }}
ExecStart={{ cadvisor_binary_install_dir }}/cadvisor \
'--listen_ip={{ cadvisor_listen_ip }}' \
'--port={{ cadvisor_port }}' \
'--prometheus_endpoint={{ cadvisor_prometheus_endpoint }}'
SyslogIdentifier=cadvisor
Restart=always
RestartSec=1
StartLimitInterval=0
ProtectHome=yes
NoNewPrivileges=yes
{% if (ansible_facts.packages.systemd | first).version is version('232', '>=') %}
ProtectSystem=strict
ProtectControlGroups=true
ProtectKernelModules=true
ProtectKernelTunables=yes
{% else %}
ProtectSystem=full
{% endif %}
[Install]
WantedBy=multi-user.target

View file

@ -0,0 +1 @@
bcrypt

View file

@ -0,0 +1,10 @@
---
go_arch_map:
x86_64: 'amd64'
aarch64: 'arm64'
armv7l: 'arm'
armv6l: 'arm'
go_arch: "{{ go_arch_map[ansible_architecture] | default(ansible_architecture) }}"
_cadvisor_repo: "google/cadvisor"
_github_api_headers: "{{ {'GITHUB_TOKEN': lookup('ansible.builtin.env', 'GITHUB_TOKEN')} if (lookup('ansible.builtin.env', 'GITHUB_TOKEN')) else {} }}"

View file

@ -0,0 +1,4 @@
#!/usr/bin/env bash
collection_root=$(pwd | grep -oP ".+\/ansible_collections\/\w+?\/\w+")
source "$collection_root/tests/integration/molecule.sh"

View file

@ -0,0 +1,4 @@
#!/usr/bin/env bash
collection_root=$(pwd | grep -oP ".+\/ansible_collections\/\w+?\/\w+")
source "$collection_root/tests/integration/molecule.sh"

View file

@ -0,0 +1,4 @@
#!/usr/bin/env bash
collection_root=$(pwd | grep -oP ".+\/ansible_collections\/\w+?\/\w+")
source "$collection_root/tests/integration/molecule.sh"