diff options
Diffstat (limited to 'autoBackup')
| -rw-r--r-- | autoBackup/.env.example | 3 | ||||
| -rw-r--r-- | autoBackup/Dockerfile | 12 | ||||
| m--------- | autoBackup/TasmotaCLI | 0 | ||||
| -rw-r--r-- | autoBackup/autoBackup.py | 236 | ||||
| -rw-r--r-- | autoBackup/autoScrub.py | 71 | ||||
| -rw-r--r-- | autoBackup/docker-compose.yml | 8 | ||||
| -rw-r--r-- | autoBackup/requirements.txt | 1 |
7 files changed, 291 insertions, 40 deletions
diff --git a/autoBackup/.env.example b/autoBackup/.env.example index a7c0436..af58add 100644 --- a/autoBackup/.env.example +++ b/autoBackup/.env.example @@ -4,7 +4,10 @@ MASTER_REPLICATION_TASKS=replicateSpinningRust,autoReplicateTheVault SLAVE_HOST=192.168.69.4 SLAVE_KEY==***************************************************************** +SLAVE_USERNAME=************ +SLAVE_PASSWORD=*************** SLAVE_REPLICATION_TASKS=localVMs/localVMs - fivehundred/localVMs,ReplicateDatabaseBackups +SLAVE_SCRUB_POOLS=fivehundred,localVMs,chud POLLING_RATE=300 diff --git a/autoBackup/Dockerfile b/autoBackup/Dockerfile index 1ae96da..15a144d 100644 --- a/autoBackup/Dockerfile +++ b/autoBackup/Dockerfile @@ -1,13 +1,13 @@ -FROM ubuntu:20.04 -ENV TZ=Europe/London -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN apt-get update -y -RUN apt-get install -y python3-pip iputils-ping cron +FROM reg.reaweb.uk/cron +# https://github.com/jwansek/cron/ + +RUN git clone https://github.com/monitorjbl/api_client.git /api_client && cd /api_client && python3 setup.py install && rm -rf /api_client COPY . /app WORKDIR /app RUN pip3 install -r requirements.txt RUN pip3 install -r TasmotaCLI/requirements.txt RUN echo "0 21 * * sat,wed root python3 /app/autoBackup.py > /proc/1/fd/1 2>/proc/1/fd/2" > /etc/crontab +RUN echo "@monthly root python3 /app/autoScrub.py > /proc/1/fd/1 2>/proc/1/fd/2" >> /etc/crontab ENTRYPOINT ["bash"] -CMD ["entrypoint.sh"]
\ No newline at end of file +CMD ["entrypoint.sh"] diff --git a/autoBackup/TasmotaCLI b/autoBackup/TasmotaCLI -Subproject dd7790dab8d3fbea8f2b58eb4d5aaffc36b3cb0 +Subproject 96b7ce92db31f70072eb7949f76b120fd542b55 diff --git a/autoBackup/autoBackup.py b/autoBackup/autoBackup.py index a0cd841..7f1bc76 100644 --- a/autoBackup/autoBackup.py +++ b/autoBackup/autoBackup.py @@ -1,5 +1,9 @@ +import truenas_api_client +import subprocess +import datetime import requests import logging +import pickle import dotenv import json import time @@ -22,7 +26,124 @@ logging.basicConfig( ] ) +class TrueNASWebsocketsClient(truenas_api_client.JSONRPCClient): + """Using API keys with a self-signed certificate automatically invalidates them now? So + we have to use less secure username and password authentication instead.... + And attempting to set up a reverse proxy with HAProxy apparently means then the sockets API + doesn't work... (see bottom) + + Also, despite what the documentation says, it seems the websockets API doesn't like calls + over a long time with the same authentication, so we make a new session and re-authenticate + every time we poll the jobs. Yes this is directly contradicting what the documentation tells us + to do. Therefore, we serialize the dictionary of currently running jobs so we can make lots of new + instances of this object. + + The HTTP API is better in every way, but apparently it will be removed in a future version of TrueNAS + (25.10?) hence we have this instead. + + This implementation of the websockets API only works in 25.04 and onwards. + """ + def __init__(self, host, username, password, replication_task_names = None, *args, **kwargs): + super().__init__(uri = "ws://%s/api/current" % host, *args, **kwargs) + self.host = host + self.username = username + self.password = password + + if replication_task_names is None: + self.replication_task_names = [] + else: + self.replication_task_names = replication_task_names + + def __enter__(self): + o = super().__enter__() + # We are forced to use username/password instead of API keys if we're using self-certified certificates + auth = self.call("auth.login", self.username, self.password) + return o + + def __exit__(self, *args, **kwargs): + super().__exit__(*args, **kwargs) + # logging.info("%s Websocket disconnected" % self.host) + + def _get_job_serialized_name(self, job_type): + return os.path.join(os.path.dirname(__file__), ".%s_%s_jobs.pickle" % (self.host, job_type)) + + def _get_serialized_jobs(self, job_type): + if os.path.exists(self._get_job_serialized_name(job_type)): + with open(self._get_job_serialized_name(job_type), "rb") as f: + return pickle.load(f) + else: + return {} + + def _set_serialized_jobs(self, jobs, job_name): + with open(self._get_job_serialized_name(job_name), "wb") as f: + pickle.dump(jobs, f) + + def get_replication_tasks(self): + return list(filter(lambda a: a["name"] in self.replication_task_names, self.call("replication.query"))) + + def run_replication_task(self, task_id): + return self.call("replication.run", task_id) + + @staticmethod + def is_ready(host, username, password, *args, **kwargs): + try: + with truenas_api_client.JSONRPCClient(uri = "ws://%s/api/current" % host, *args, **kwargs) as c: + c.call("auth.login", username, password) + return c.call("system.ready") + except OSError: + raise ConnectionError("No route to host") + + def shutdown(self): + return self.call("system.shutdown", "Automatic autoBackup shutdown") + + def run_all_replication_tasks(self): + running_replication_jobs = self._get_serialized_jobs("replication") + + for task in self.get_replication_tasks(): + job_id = self.run_replication_task(task["id"]) + running_replication_jobs[job_id] = task["name"] + logging.info("Started replication task '%s' on '%s' with job id %d" % (task["name"], self.host, job_id)) + + self._set_serialized_jobs(running_replication_jobs, "replication") + + def scrub_pools(self, pools): + running_jobs = self._get_serialized_jobs("scrub") + + for pool_name in pools: + job_id = self.call("pool.scrub.scrub", pool_name) + running_jobs[job_id] = pool_name + logging.info("Started scrub job on pool '%s' on host '%s' with job id %d" % (pool_name, self.host, job_id)) + + self._set_serialized_jobs(running_jobs, "scrub") + + def get_jobs(self): + return self.call("core.get_jobs") + + def get_state_of_replication_jobs(self): + return self.get_state_of_jobs("replication") + + def get_state_of_jobs(self, job_type): + running_jobs = self._get_serialized_jobs(job_type) + all_complete = True + for job in self.get_jobs(): + if job["id"] in running_jobs.keys(): + if job["state"] == "RUNNING": + all_complete = False + logging.info("%s job '%s' on '%s' is currently '%s' (%d%%)" % ( + job_type, running_jobs[job["id"]], self.host, job["state"], job["progress"]["percent"] + )) + + if all_complete: + if os.path.exists(self._get_job_serialized_name(job_type)): + os.remove(self._get_job_serialized_name(job_type)) + logging.info("All %s jobs on '%s' completed" % (job_type, self.host)) + else: + logging.info("There were no %s jobs on '%s'. Perhaps they already all finished." % (job_type, self.host)) + return all_complete + class TrueNASAPIClient: + """Class for the REST HTTP API, which sadly will be removed soon :c + """ def __init__(self, host, api_key, replication_task_names = None): self.host = host self.base_url = "http://%s/api/v2.0" % host @@ -39,7 +160,7 @@ class TrueNASAPIClient: @staticmethod def filter_running_jobs(jobs): return list(filter( - lambda i: i["method"] == "replication.run" and i["progress"]["percent"] != 100 and not i["state"] == "FAILED", + lambda i: i["progress"]["percent"] != 100 and not i["state"] == "FAILED", jobs )) @@ -61,9 +182,6 @@ class TrueNASAPIClient: def get_jobs(self): return self.base_get("/core/get_jobs") - def get_running_replication_jobs(self): - return [i for i in self.get_jobs() if i["method"] == "replication.run" and i["progress"]["percent"] != 100 and not i["state"] == "FAILED"] - def get_replication_tasks(self): return list(filter(lambda a: a["name"] in self.replication_task_names, self.base_get("/replication"))) @@ -104,7 +222,6 @@ class TrueNASAPIClient: return all_complete def check_if_all_complete(truenasclients): - logging.info("Slave plug '%s' is using %dw of power" % (os.environ["SLAVE_PLUG_FRIENDLYNAME"], get_mqtt().switch_energy['Power'])) all_complete = True for truenas in truenasclients: if not truenas.get_state_of_replication_jobs(): @@ -117,20 +234,49 @@ def get_mqtt(message = None): username = os.environ["MQTT_USER"], password = os.environ["MQTT_PASSWORD"], friendlyname = os.environ["SLAVE_PLUG_FRIENDLYNAME"], + verbose = False, message = message ) def wait_for_slave(slave): + """Wait for a TrueNAS REST HTTP Client to be ready + + Args: + slave (TrueNASAPIClient): A TrueNAS REST client + """ while True: time.sleep(int(os.environ["POLLING_RATE"])) try: - logging.info("Slave is ready: " + str(slave.is_ready())) - except requests.exceptions.ConnectionError: + ready = slave.is_ready() + logging.info("Slave is ready: " + str(ready)) + if not ready: + continue + except (requests.exceptions.ConnectionError, truenas_api_client.exc.ClientException): logging.info("'%s' hasn't booted, waiting for %d more seconds" % (slave.host, int(os.environ["POLLING_RATE"]))) else: break logging.info("Slave TrueNAS has booted and is ready for API requests") +def wait_for_sockets_slave(): + """Wait for the slave's websockets API to be ready + """ + while True: + time.sleep(int(os.environ["POLLING_RATE"])) + try: + ready = TrueNASWebsocketsClient.is_ready( + host = os.environ["SLAVE_HOST"], + username = os.environ["SLAVE_USERNAME"], + password = os.environ["SLAVE_PASSWORD"] + ) + logging.info("Slave is ready: " + str(ready)) + if not ready: + continue + except ConnectionError: + logging.info("'%s' hasn't booted, waiting for %d more seconds" % (os.environ["SLAVE_HOST"], int(os.environ["POLLING_RATE"]))) + else: + break + logging.info("Slave TrueNAS has booted and is ready for API requests") + def wait_till_idle_power(): while True: p = get_mqtt().switch_energy['Power'] @@ -139,26 +285,19 @@ def wait_till_idle_power(): break def main(): + start_time = datetime.datetime.now() + subprocess.run(["rm", "-f", os.path.join(os.path.dirname(__file__), "*_replication_jobs.pickle")]) + if os.environ["MASTER_REPLICATION_TASKS"] != "": - tasks = os.environ["MASTER_REPLICATION_TASKS"].split(",") + master_tasks = os.environ["MASTER_REPLICATION_TASKS"].split(",") else: - tasks = [] - master = TrueNASAPIClient( - host = os.environ["MASTER_HOST"], - api_key = os.environ["MASTER_KEY"], - replication_task_names = tasks - ) + master_tasks = [] if os.environ["SLAVE_REPLICATION_TASKS"] != "": - tasks = os.environ["SLAVE_REPLICATION_TASKS"].split(",") + slave_tasks = os.environ["SLAVE_REPLICATION_TASKS"].split(",") else: - tasks = [] - slave = TrueNASAPIClient( - host = os.environ["SLAVE_HOST"], - api_key = os.environ["SLAVE_KEY"], - replication_task_names = tasks - ) + slave_tasks = [] - logging.info("Began autoBackup procedure") + logging.info("\n\nBegan autoBackup procedure") m = get_mqtt() logging.info("Slave plug '%s' is currently %s" % (m.friendlyname, m.switch_power)) if m.switch_power == "ON": @@ -167,20 +306,57 @@ def main(): was_already_on = False get_mqtt("ON") logging.info("Turned on the slave plug. Now waiting for it to boot") - wait_for_slave(slave) + # wait_for_slave(slave) + wait_for_sockets_slave() + + with (TrueNASWebsocketsClient( + host = os.environ["SLAVE_HOST"], + username = os.environ["SLAVE_USERNAME"], + password = os.environ["SLAVE_PASSWORD"], + replication_task_names = slave_tasks + ) as slave, TrueNASWebsocketsClient( + host = os.environ["MASTER_HOST"], + username = os.environ["MASTER_USERNAME"], + password = os.environ["MASTER_PASSWORD"], + replication_task_names = master_tasks + ) as master + ): + master.run_all_replication_tasks() + slave.run_all_replication_tasks() + + while True: + with (TrueNASWebsocketsClient( + host = os.environ["SLAVE_HOST"], + username = os.environ["SLAVE_USERNAME"], + password = os.environ["SLAVE_PASSWORD"], + replication_task_names = slave_tasks + ) as slave, TrueNASWebsocketsClient( + host = os.environ["MASTER_HOST"], + username = os.environ["MASTER_USERNAME"], + password = os.environ["MASTER_PASSWORD"], + replication_task_names = master_tasks + ) as master + ): + if check_if_all_complete([master, slave]): + break - master.run_all_replication_tasks() - slave.run_all_replication_tasks() - # while (not master.get_state_of_replication_jobs()) or (not slave.get_state_of_replication_jobs()): - while not check_if_all_complete([master, slave]): + logging.info("Slave plug '%s' is using %dw of power" % (os.environ["SLAVE_PLUG_FRIENDLYNAME"], get_mqtt().switch_energy['Power'])) time.sleep(int(os.environ["POLLING_RATE"])) + logging.info("All replication jobs on all hosts complete") if was_already_on: logging.info("The slave TrueNAS was turned on not by us, so stopping here") else: logging.info("The slave TrueNAS was turned on by us, so starting the shutdown procedure") - logging.info(json.dumps(slave.shutdown(), indent = 4)) + with TrueNASWebsocketsClient( + host = os.environ["SLAVE_HOST"], + username = os.environ["SLAVE_USERNAME"], + password = os.environ["SLAVE_PASSWORD"], + replication_task_names = slave_tasks + ) as slave: + slave.shutdown() + # logging.info(json.dumps(slave.shutdown(), indent = 4)) # wait until the slave TrueNAS is using 0w of power, which implies it has finished shutting down, # then turn off the power to it @@ -188,8 +364,10 @@ def main(): get_mqtt("OFF") logging.info("Turned off the slave's plug") - logging.info("autoBackup procedure completed\n\n") + logging.info("autoBackup backup procedure completed. Took %s" % str(datetime.datetime.now() - start_time)) if __name__ == "__main__": main() + + diff --git a/autoBackup/autoScrub.py b/autoBackup/autoScrub.py new file mode 100644 index 0000000..c34f5aa --- /dev/null +++ b/autoBackup/autoScrub.py @@ -0,0 +1,71 @@ +import subprocess +from autoBackup import TrueNASWebsocketsClient, get_mqtt, wait_for_sockets_slave, wait_till_idle_power +import datetime +import logging +import time +import json +import os + +def main(): + start_time = datetime.datetime.now() + subprocess.run(["rm", "-f", os.path.join(os.path.dirname(__file__), "*_scrub_jobs.pickle")]) + + if os.environ["SLAVE_SCRUB_POOLS"] != "": + scrub_pools = os.environ["SLAVE_SCRUB_POOLS"].split(",") + else: + scrub_pools = [] + + logging.info("Began autoScrub scrub procedure") + m = get_mqtt() + logging.info("Slave plug '%s' is currently %s" % (m.friendlyname, m.switch_power)) + if m.switch_power == "ON": + was_already_on = True + else: + was_already_on = False + get_mqtt("ON") + logging.info("Turned on the slave plug. Now waiting for it to boot") + # wait_for_slave(slave) + wait_for_sockets_slave() + + with TrueNASWebsocketsClient( + host = os.environ["SLAVE_HOST"], + username = os.environ["SLAVE_USERNAME"], + password = os.environ["SLAVE_PASSWORD"] + ) as slave: + slave.scrub_pools(scrub_pools) + + while True: + with TrueNASWebsocketsClient( + host = os.environ["SLAVE_HOST"], + username = os.environ["SLAVE_USERNAME"], + password = os.environ["SLAVE_PASSWORD"] + ) as slave: + if slave.get_state_of_jobs("scrub"): + break + + logging.info("Slave plug '%s' is using %dw of power" % (os.environ["SLAVE_PLUG_FRIENDLYNAME"], get_mqtt().switch_energy['Power'])) + time.sleep(int(os.environ["POLLING_RATE"])) + + logging.info("All scrub jobs on all hosts complete") + + if was_already_on: + logging.info("The slave TrueNAS was turned on not by us, so stopping here") + else: + logging.info("The slave TrueNAS was turned on by us, so starting the shutdown procedure") + with TrueNASWebsocketsClient( + host = os.environ["SLAVE_HOST"], + username = os.environ["SLAVE_USERNAME"], + password = os.environ["SLAVE_PASSWORD"], + ) as slave: + logging.info(json.dumps(slave.shutdown(), indent = 4)) + + # wait until the slave TrueNAS is using 0w of power, which implies it has finished shutting down, + # then turn off the power to it + wait_till_idle_power() + get_mqtt("OFF") + logging.info("Turned off the slave's plug") + + logging.info("autoScrub scrub procedure completed. Took %s\n\n" % str(datetime.datetime.now() - start_time)) + +if __name__ == "__main__": + main()
\ No newline at end of file diff --git a/autoBackup/docker-compose.yml b/autoBackup/docker-compose.yml index f23ecb8..71ec0a0 100644 --- a/autoBackup/docker-compose.yml +++ b/autoBackup/docker-compose.yml @@ -1,14 +1,12 @@ -version: "3" - services: - autoReplicate: - container_name: auto_replicate + autoBackup: + container_name: auto_backup volumes: - ./logs/:/app/logs env_file: - ./.env restart: unless-stopped - image: reg.reaweb.uk/auto_replicate + image: reg.reaweb.uk/auto_backup build: context: . dockerfile: Dockerfile diff --git a/autoBackup/requirements.txt b/autoBackup/requirements.txt index 2e7e54b..14abc09 100644 --- a/autoBackup/requirements.txt +++ b/autoBackup/requirements.txt @@ -1,3 +1,4 @@ python-dotenv requests docker +websocket-client>1.3.2
\ No newline at end of file |
