add health stats counters to machine models

This commit is contained in:
Nick Sweeting 2024-10-03 03:11:04 -07:00
parent 3b9e48ead8
commit 161afc7297
No known key found for this signature in database
2 changed files with 104 additions and 48 deletions

View file

@ -783,16 +783,17 @@ class CustomWebhookAdmin(WebhookAdmin, ABIDModelAdmin):
@admin.register(Machine, site=archivebox_admin)
class MachineAdmin(ABIDModelAdmin):
list_display = ('abid', 'created_at', 'hostname', 'ips', 'os_platform', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'os_arch', 'os_family', 'os_release', 'hw_uuid')
list_display = ('abid', 'created_at', 'hostname', 'ips', 'os_platform', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'os_arch', 'os_family', 'os_release', 'hw_uuid', 'health')
sort_fields = ('abid', 'created_at', 'hostname', 'ips', 'os_platform', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'os_arch', 'os_family', 'os_release', 'hw_uuid')
# search_fields = ('id', 'abid', 'guid', 'hostname', 'hw_manufacturer', 'hw_product', 'hw_uuid', 'os_arch', 'os_family', 'os_platform', 'os_kernel', 'os_release')
readonly_fields = ('guid', 'created_at', 'modified_at', 'abid_info', 'ips')
fields = (*readonly_fields, 'hostname', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'hw_uuid', 'os_arch', 'os_family', 'os_platform', 'os_kernel', 'os_release', 'stats')
fields = (*readonly_fields, 'hostname', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'hw_uuid', 'os_arch', 'os_family', 'os_platform', 'os_kernel', 'os_release', 'stats', 'num_uses_succeeded', 'num_uses_failed')
list_filter = ('hw_in_docker', 'hw_in_vm', 'os_arch', 'os_family', 'os_platform')
ordering = ['-created_at']
list_per_page = 100
actions = ["delete_selected"]
@admin.display(
description='Public IP',
@ -807,16 +808,17 @@ class MachineAdmin(ABIDModelAdmin):
@admin.register(NetworkInterface, site=archivebox_admin)
class NetworkInterfaceAdmin(ABIDModelAdmin):
list_display = ('abid', 'created_at', 'machine_info', 'ip_public', 'dns_server', 'isp', 'country', 'region', 'city', 'iface', 'ip_local', 'mac_address')
list_display = ('abid', 'created_at', 'machine_info', 'ip_public', 'dns_server', 'isp', 'country', 'region', 'city', 'iface', 'ip_local', 'mac_address', 'health')
sort_fields = ('abid', 'created_at', 'machine_info', 'ip_public', 'dns_server', 'isp', 'country', 'region', 'city', 'iface', 'ip_local', 'mac_address')
search_fields = ('abid', 'machine__abid', 'iface', 'ip_public', 'ip_local', 'mac_address', 'dns_server', 'hostname', 'isp', 'city', 'region', 'country')
readonly_fields = ('machine', 'created_at', 'modified_at', 'abid_info', 'mac_address', 'ip_public', 'ip_local', 'dns_server')
fields = (*readonly_fields, 'iface', 'hostname', 'isp', 'city', 'region', 'country')
fields = (*readonly_fields, 'iface', 'hostname', 'isp', 'city', 'region', 'country', 'num_uses_succeeded', 'num_uses_failed')
list_filter = ('isp', 'country', 'region')
ordering = ['-created_at']
list_per_page = 100
actions = ["delete_selected"]
@admin.display(
description='Machine',

View file

@ -1,39 +1,88 @@
__package__ = 'archivebox.machine'
import socket
from datetime import timedelta
from pathlib import Path
from django.db import models
from django.utils import timezone
from django.utils.functional import cached_property
from pydantic_pkgr import Binary
import abx.archivebox.use
from abx.archivebox.base_binary import BaseBinary, BaseBinProvider
from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
from .detect import get_host_guid, get_os_info, get_vm_info, get_host_network, get_host_stats
CURRENT_MACHINE = None
CURRENT_INTERFACE = None
CURRENT_MACHINE = None # global cache for the current machine
CURRENT_INTERFACE = None # global cache for the current network interface
CURRENT_BINARIES = {} # global cache for the currently installed binaries
MACHINE_RECHECK_INTERVAL = 7 * 24 * 60 * 60 # 1 week (how often should we check for OS/hardware changes?)
NETWORK_INTERFACE_RECHECK_INTERVAL = 1 * 60 * 60 # 1 hour (how often should we check for public IP/private IP/DNS changes?)
INSTALLED_BINARY_RECHECK_INTERVAL = 1 * 30 * 60 # 30min (how often should we check for changes to locally installed binaries?)
class ModelWithHealthStats(models.Model):
num_uses_failed = models.PositiveIntegerField(default=0)
num_uses_succeeded = models.PositiveIntegerField(default=0)
class Meta:
abstract = True
def record_health_failure(self) -> None:
self.num_uses_failed += 1
self.save()
def record_health_success(self) -> None:
self.num_uses_succeeded += 1
self.save()
def reset_health(self) -> None:
# move all the failures to successes when resetting so we dont lose track of the total count
self.num_uses_succeeded = self.num_uses_failed + self.num_uses_succeeded
self.num_uses_failed = 0
self.save()
@property
def health(self) -> int:
total_uses = max((self.num_uses_failed + self.num_uses_succeeded, 1))
success_pct = (self.num_uses_succeeded / total_uses) * 100
return round(success_pct)
class MachineManager(models.Manager):
def current(self) -> 'Machine':
"""Get the current machine that ArchiveBox is running on."""
global CURRENT_MACHINE
if CURRENT_MACHINE:
return CURRENT_MACHINE
expires_at = CURRENT_MACHINE.modified_at + timedelta(seconds=MACHINE_RECHECK_INTERVAL)
if timezone.now() < expires_at:
# assume current machine cant change *while archivebox is actively running on it*
# it's not strictly impossible to swap hardware while code is running,
# but its rare and unusual so we check only once per week
# (e.g. VMWare can live-migrate a VM to a new host while it's running)
return CURRENT_MACHINE
else:
CURRENT_MACHINE = None
guid = get_host_guid()
try:
CURRENT_MACHINE = self.get(guid=guid)
return CURRENT_MACHINE
except self.model.DoesNotExist:
pass
CURRENT_MACHINE, _created = self.update_or_create(
guid=get_host_guid(),
defaults={
'hostname': socket.gethostname(),
**get_os_info(),
**get_vm_info(),
'stats': get_host_stats(),
},
)
CURRENT_MACHINE.save() # populate ABID
CURRENT_MACHINE = self.model(
guid=guid,
hostname=socket.gethostname(),
**get_os_info(),
**get_vm_info(),
stats=get_host_stats(),
)
CURRENT_MACHINE.save()
return CURRENT_MACHINE
class Machine(ABIDModel):
class Machine(ABIDModel, ModelWithHealthStats):
abid_prefix = 'mxn_'
abid_ts_src = 'self.created_at'
abid_uri_src = 'self.guid'
@ -48,13 +97,12 @@ class Machine(ABIDModel):
modified_at = models.DateTimeField(auto_now=True)
# IMMUTABLE PROPERTIES
guid = models.CharField(max_length=64, default=None, null=False, unique=True, editable=False)
guid = models.CharField(max_length=64, default=None, null=False, unique=True, editable=False) # 64char sha256 hash of machine's unique hardware ID
# MUTABLE PROPERTIES
hostname = models.CharField(max_length=63, default=None, null=False)
hw_in_docker = models.BooleanField(default=False, null=False)
hw_in_vm = models.BooleanField(default=False, null=False)
hostname = models.CharField(max_length=63, default=None, null=False) # e.g. somehost.subdomain.example.com
hw_in_docker = models.BooleanField(default=False, null=False) # e.g. False
hw_in_vm = models.BooleanField(default=False, null=False) # e.g. False
hw_manufacturer = models.CharField(max_length=63, default=None, null=False) # e.g. Apple
hw_product = models.CharField(max_length=63, default=None, null=False) # e.g. Mac Studio Mac13,1
hw_uuid = models.CharField(max_length=255, default=None, null=False) # e.g. 39A12B50-...-...-...-...
@ -65,43 +113,49 @@ class Machine(ABIDModel):
os_release = models.CharField(max_length=63, default=None, null=False) # e.g. macOS 14.6.1
os_kernel = models.CharField(max_length=255, default=None, null=False) # e.g. Darwin Kernel Version 23.6.0: Mon Jul 29 21:14:30 PDT 2024; root:xnu-10063.141.2~1/RELEASE_ARM64_T6000
stats = models.JSONField(default=None, null=False)
# STATS COUNTERS
stats = models.JSONField(default=dict, null=False) # e.g. {"cpu_load": [1.25, 2.4, 1.4], "mem_swap_used_pct": 56, ...}
# num_uses_failed = models.PositiveIntegerField(default=0) # from ModelWithHealthStats
# num_uses_succeeded = models.PositiveIntegerField(default=0)
objects = MachineManager()
objects: MachineManager = MachineManager()
networkinterface_set: models.Manager['NetworkInterface']
class NetworkInterfaceManager(models.Manager):
def current(self) -> 'NetworkInterface':
"""Get the current network interface for the current machine."""
global CURRENT_INTERFACE
if CURRENT_INTERFACE:
return CURRENT_INTERFACE
# assume the current network interface (public IP, DNS servers, etc.) wont change more than once per hour
expires_at = CURRENT_INTERFACE.modified_at + timedelta(seconds=NETWORK_INTERFACE_RECHECK_INTERVAL)
if timezone.now() < expires_at:
return CURRENT_INTERFACE
else:
CURRENT_INTERFACE = None
machine = Machine.objects.current()
net_info = get_host_network()
try:
CURRENT_INTERFACE = self.get(
machine=machine,
ip_public=net_info['ip_public'],
ip_local=net_info['ip_local'],
mac_address=net_info['mac_address'],
dns_server=net_info['dns_server'],
)
return CURRENT_INTERFACE
except self.model.DoesNotExist:
pass
CURRENT_INTERFACE = self.model(
CURRENT_INTERFACE, _created = self.update_or_create(
machine=machine,
**get_host_network(),
ip_public=net_info.pop('ip_public'),
ip_local=net_info.pop('ip_local'),
mac_address=net_info.pop('mac_address'),
dns_server=net_info.pop('dns_server'),
defaults=net_info,
)
CURRENT_INTERFACE.save()
CURRENT_INTERFACE.save() # populate ABID
return CURRENT_INTERFACE
class NetworkInterface(ABIDModel):
class NetworkInterface(ABIDModel, ModelWithHealthStats):
abid_prefix = 'ixf_'
abid_ts_src = 'self.machine.created_at'
abid_uri_src = 'self.machine.guid'
@ -115,7 +169,7 @@ class NetworkInterface(ABIDModel):
created_at = AutoDateTimeField(default=None, null=False, db_index=True)
modified_at = models.DateTimeField(auto_now=True)
machine = models.ForeignKey(Machine, on_delete=models.CASCADE, default=None, null=False)
machine = models.ForeignKey(Machine, on_delete=models.CASCADE, default=None, null=False) # e.g. Machine(id=...)
# IMMUTABLE PROPERTIES
mac_address = models.CharField(max_length=17, default=None, null=False, editable=False) # e.g. ab:cd:ef:12:34:56