monitor

Monitoring script
git clone git://git.bain.cz/monitor.git
Log | Files | Refs | README

commit 3f4faf42e3b4a916b60a4aa39561ad64e4b0022c
parent 7897adeb7a8f531266d371fb305ed33f4e462b84
Author: bain <bain@bain.cz>
Date:   Tue, 28 Dec 2021 01:50:13 +0100

fix: typing; add: retry decoration function
additionaly add retries to ip_requirement

Diffstat:
Mmonitor.py | 55++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 38 insertions(+), 17 deletions(-)

diff --git a/monitor.py b/monitor.py @@ -6,6 +6,7 @@ import datetime import socket import json import os +from typing import Callable import requests import pydig @@ -19,7 +20,10 @@ logger.setLevel(logging.DEBUG) REPO_ROOT = "status-repo" # last states of services to keep from detecting downtime repeatedly -last_states = {} +last_states: dict[str, bool] = {} + +RequirementCheck = Callable[..., bool] +MonitorDict = dict[str, dict[RequirementCheck, dict]] # publish a failed service, no dependents so edit at will @@ -36,6 +40,7 @@ def fail(service_name: str, failed_requirements: list): logger.warning("Failed to pull from origin! Aborting!") return + # noinspection PyShadowingNames with open(REPO_ROOT + "/" + filename, 'w+') as f: lines = [ "---\n", @@ -60,7 +65,7 @@ def fail(service_name: str, failed_requirements: list): logger.warning(f"service {service_name} failed {[r.__name__ for r in failed_requirements]}") -def self_check(): +def self_check() -> bool: try: if requests.get("https://google.com/").status_code != 200: return False @@ -69,18 +74,33 @@ def self_check(): return True -def http_requirement(url: str, code: int) -> bool: - passed = False - for i in range(2): - try: - resp = requests.head(url) - except requests.exceptions.ConnectionError: +def retry(n: int = 3, sleep: int = 5) -> Callable[[RequirementCheck], RequirementCheck]: + """Decorator maker for calling a function multiple times with sleep time between calls.""" + + def inner_retry(func: RequirementCheck) -> RequirementCheck: + def inner(*args, **kwargs) -> bool: passed = False - else: - passed = resp.status_code == code - if passed: - break - return passed + for i in range(n - 1): + passed = func(*args, **kwargs) + if passed: + break + time.sleep(sleep) + return passed + + inner.__name__ = func.__name__ # preserve names in log (instead of each requirement being called "inner") + return inner + + return inner_retry + + +@retry() +def http_requirement(url: str, code: int) -> bool: + try: + resp = requests.head(url) + except requests.exceptions.ConnectionError: + return False + else: + return resp.status_code == code def dns_requirement(name: str, ip: str) -> bool: @@ -91,6 +111,7 @@ def dns_requirement(name: str, ip: str) -> bool: return query and (ip == "*" or ip in query) +@retry() def ip_requirement(ip: str, port: int, prot: str) -> bool: protocol = socket.SOCK_STREAM if prot == "tcp" else socket.SOCK_DGRAM sock = socket.socket(type=protocol) @@ -102,7 +123,7 @@ def ip_requirement(ip: str, port: int, prot: str) -> bool: return True -def check(monitors: dict): +def check(monitors: MonitorDict): for service, requirements in monitors.items(): logger.debug(f"Checking service {service}") failed = [] @@ -122,7 +143,7 @@ def check(monitors: dict): logger.debug("check complete") -monitors = { +monitors_: MonitorDict = { "f.bain.cz": { http_requirement: {"url": "https://f.bain.cz/status", "code": 200}, # dns_requirement: {"name": "f.bain.cz", "ip": "*"}, @@ -140,13 +161,13 @@ monitors = { } if __name__ == '__main__': - # we assume this is gonna be run in a cron job as the gitpython + # we assume this is going to be run in a cron job as the gitpython # library is slowly leaking memory apparently if os.path.exists("last-state"): with open("last-state", 'r') as f: last_states = json.load(f) - check(monitors) + check(monitors_) with open("last-state", 'w+') as f: json.dump(last_states, f)