monitor

Monitoring script
git clone git://git.bain.cz/monitor.git
Log | Files | Refs | README

commit b92cf90a0b29ec2b9d6eec984f2bfbbcbf78e782
Author: bain <bain@bain.cz>
Date:   Sat, 27 Nov 2021 23:30:16 +0100

Initial Commit

Diffstat:
AREADME | 10++++++++++
Amonitor.py | 143+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Arequirements.txt | 5+++++
3 files changed, 158 insertions(+), 0 deletions(-)

diff --git a/README b/README @@ -0,0 +1,10 @@ +A simple monitoring script for online services. Includes self-checks to be more resistant +to unstable networks as it is meant to be run on a home device (nothing keeps you from +running it on a server tho). + +Currently it publishes all downtime to a repository but the fail function can be easily +modified to suit one's needs (nothing depends on it). Information about all services is +in the monitors dictionary. + +The requirements are modular, so adding new ways of checking for status is as easy as +writing a function that returns a bool if it passed. diff --git a/monitor.py b/monitor.py @@ -0,0 +1,143 @@ +#!/bin/python3 +# HTTP, DNS, and IP monitoring script +import time +import logging +import datetime +import socket + +import requests +import pydig +import git +import pytz + +logger = logging.getLogger(__name__) +logger.addHandler(logging.StreamHandler()) +logger.setLevel(logging.DEBUG) + +REPO_ROOT = "status-repo" + +# last states of services to keep from detecting downtime repeatedly +last_states = {} + + +# publish a failed service, no dependents so edit at will +def fail(service_name: str, failed_requirements: list): + if not last_states.get(service_name, True): + return + now = datetime.datetime.now(tz=pytz.timezone("Europe/Prague")) + filename = f"src/content/{now.strftime('%Y-%m-%d-%f')}-downtime.md" + repo = git.Repo(REPO_ROOT) + origin = repo.remote('origin') + try: + origin.pull(kill_after_timeout=10) + except git.exc.CommandError: + logger.warning("Failed to pull from origin! Aborting!") + return + + with open(REPO_ROOT + "/" + filename, 'w+') as f: + lines = [ + "---\n", + f"title: {service_name} downtime\n", + f"date: {now.strftime('%Y-%m-%d %H:%M:%S %z')}\n", + "severity: down\n", + "affected:\n", + f" - {service_name}\n", + "---\n", + f"Automatic checks for {service_name} have failed. " + f"Requirements {[r.__name__ for r in failed_requirements]} failed.\n" + ] + f.writelines(lines) + repo.git.add(filename) + repo.git.commit('-m', f'{service_name} downtime') + try: + origin.push(kill_after_timeout=10) + except git.exc.CommandError: + logger.warning("Push to origin failed! Aborting and resetting!") + repo.git.reset("origin/HEAD", working_tree=True) + + logger.warning(f"service {service_name} failed {[r.__name__ for r in failed_requirements]}") + + +def self_check(): + try: + if requests.get("https://google.com/").status_code != 200: + return False + except ConnectionError: + return False + return True + + +def http_requirement(url: str, code: int) -> bool: + passed = False + for i in range(2): + try: + resp = requests.get(url) + except ConnectionError: + passed = False + else: + passed = resp.status_code == code + if passed: + break + return passed + + +def dns_requirement(name: str, ip: str) -> bool: + try: + query = pydig.query(name, "A") + except ConnectionError: + return False + return query and (ip == "*" or ip in query) + + +def ip_requirement(ip: str, port: int, prot: str) -> bool: + protocol = socket.SOCK_STREAM if prot == "tcp" else socket.SOCK_DGRAM + sock = socket.socket(type=protocol) + try: + sock.connect((ip, port)) + except ConnectionError: + return False + sock.close() + return True + + +def check(monitors: dict): + for service, requirements in monitors.items(): + logger.debug(f"Checking service {service}") + failed = [] + for requirement, args in requirements.items(): + logger.debug(f" checking requirement {requirement.__name__}") + passed = requirement(**args) + if not passed: + if not self_check(): + logger.warning("Self-check failed, assuming bad connection and aborting") + return + logger.info(f"{service} failed requirement {requirement.__name__}") + failed.append(requirement) + time.sleep(1) + if failed: + fail(service, failed) + last_states[service] = len(failed) == 0 + logger.debug("check complete") + + +monitors = { + "f.bain.cz": { + http_requirement: {"url": "https://f.bain.cz/status", "code": 200}, + # dns_requirement: {"name": "f.bain.cz", "ip": "*"}, + # ip_requirement: {"ip": "f.bain.cz", "port": 80, "prot": "tcp"} + }, + "s.bain.cz": { + http_requirement: {"url": "https://s.bain.cz/", "code": 200}, + }, + "git.bain.cz": { + http_requirement: {"url": "https://git.bain.cz/", "code": 200}, + }, + "ts3.bain.cz": { + ip_requirement: {"ip": "ts3.bain.cz", "port": 9987, "prot": "udp"} + } +} + +if __name__ == '__main__': + # we assume this is gonna be run in a cron job as the gitpython + # library is slowly leaking memory apparently + check(monitors) diff --git a/requirements.txt b/requirements.txt @@ -0,0 +1,4 @@ +requests +pydig +gitpython +pytz +\ No newline at end of file