aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
l---------.dockerignore1
-rw-r--r--.gitignore1
-rw-r--r--.gitmodules6
-rw-r--r--README.md2
m---------api_client0
-rw-r--r--autoBackup/.env.example3
-rw-r--r--autoBackup/Dockerfile12
m---------autoBackup/TasmotaCLI0
-rw-r--r--autoBackup/autoBackup.py238
-rw-r--r--autoBackup/autoScrub.py71
-rw-r--r--autoBackup/docker-compose.yml8
-rw-r--r--autoBackup/requirements.txt1
-rwxr-xr-x[-rw-r--r--]do_replicate.sh0
13 files changed, 298 insertions, 45 deletions
diff --git a/.dockerignore b/.dockerignore
new file mode 120000
index 0000000..3e4e48b
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1 @@
+.gitignore \ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 290c87d..72628ee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
autoBackup/.env
*.env
+*.pickle
# Byte-compiled / optimized / DLL files
__pycache__/
diff --git a/.gitmodules b/.gitmodules
index 457e4a0..a1b2dfe 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
-[submodule "autoBackup/api_client"]
- path = autoBackup/api_client
- url = git@github.com:truenas/api_client.git
[submodule "autoBackup/TasmotaCLI"]
path = autoBackup/TasmotaCLI
url = git@github.com:jwansek/TasmotaCLI.git
+[submodule "api_client"]
+ path = api_client
+ url = git@github.com:monitorjbl/api_client.git
diff --git a/README.md b/README.md
index 5561a8d..dbbc2b4 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ it is shut down, and once the plug is pulling 0w, which implies the TrueNAS has
If the Tasmota MQTT plug was already on when the script starts, it implies that the `slave` TrueNAS was started manually, so it won't automatically
be switched off.
-It is recommended to run ZFS scrub tasks manually occasionally, since they probably won't be run automatically by TrueNAS.
+~~It is recommended to run ZFS scrub tasks manually occasionally, since they probably won't be run automatically by TrueNAS.~~ A script to automatically run ZFS scrub jobs every month has been added.
A Dockerfile is provided so you can automatically run this script as a cronjob.
diff --git a/api_client b/api_client
new file mode 160000
+Subproject f8c11612625ff253aac664afcc9eb81806f9920
diff --git a/autoBackup/.env.example b/autoBackup/.env.example
index a7c0436..af58add 100644
--- a/autoBackup/.env.example
+++ b/autoBackup/.env.example
@@ -4,7 +4,10 @@ MASTER_REPLICATION_TASKS=replicateSpinningRust,autoReplicateTheVault
SLAVE_HOST=192.168.69.4
SLAVE_KEY==*****************************************************************
+SLAVE_USERNAME=************
+SLAVE_PASSWORD=***************
SLAVE_REPLICATION_TASKS=localVMs/localVMs - fivehundred/localVMs,ReplicateDatabaseBackups
+SLAVE_SCRUB_POOLS=fivehundred,localVMs,chud
POLLING_RATE=300
diff --git a/autoBackup/Dockerfile b/autoBackup/Dockerfile
index 1ae96da..15a144d 100644
--- a/autoBackup/Dockerfile
+++ b/autoBackup/Dockerfile
@@ -1,13 +1,13 @@
-FROM ubuntu:20.04
-ENV TZ=Europe/London
-RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
-RUN apt-get update -y
-RUN apt-get install -y python3-pip iputils-ping cron
+FROM reg.reaweb.uk/cron
+# https://github.com/jwansek/cron/
+
+RUN git clone https://github.com/monitorjbl/api_client.git /api_client && cd /api_client && python3 setup.py install && rm -rf /api_client
COPY . /app
WORKDIR /app
RUN pip3 install -r requirements.txt
RUN pip3 install -r TasmotaCLI/requirements.txt
RUN echo "0 21 * * sat,wed root python3 /app/autoBackup.py > /proc/1/fd/1 2>/proc/1/fd/2" > /etc/crontab
+RUN echo "@monthly root python3 /app/autoScrub.py > /proc/1/fd/1 2>/proc/1/fd/2" >> /etc/crontab
ENTRYPOINT ["bash"]
-CMD ["entrypoint.sh"] \ No newline at end of file
+CMD ["entrypoint.sh"]
diff --git a/autoBackup/TasmotaCLI b/autoBackup/TasmotaCLI
-Subproject dd7790dab8d3fbea8f2b58eb4d5aaffc36b3cb0
+Subproject 96b7ce92db31f70072eb7949f76b120fd542b55
diff --git a/autoBackup/autoBackup.py b/autoBackup/autoBackup.py
index 166ad5a..7f1bc76 100644
--- a/autoBackup/autoBackup.py
+++ b/autoBackup/autoBackup.py
@@ -1,5 +1,9 @@
+import truenas_api_client
+import subprocess
+import datetime
import requests
import logging
+import pickle
import dotenv
import json
import time
@@ -22,7 +26,124 @@ logging.basicConfig(
]
)
+class TrueNASWebsocketsClient(truenas_api_client.JSONRPCClient):
+ """Using API keys with a self-signed certificate automatically invalidates them now? So
+ we have to use less secure username and password authentication instead....
+ And attempting to set up a reverse proxy with HAProxy apparently means then the sockets API
+ doesn't work... (see bottom)
+
+ Also, despite what the documentation says, it seems the websockets API doesn't like calls
+ over a long time with the same authentication, so we make a new session and re-authenticate
+ every time we poll the jobs. Yes this is directly contradicting what the documentation tells us
+ to do. Therefore, we serialize the dictionary of currently running jobs so we can make lots of new
+ instances of this object.
+
+ The HTTP API is better in every way, but apparently it will be removed in a future version of TrueNAS
+ (25.10?) hence we have this instead.
+
+ This implementation of the websockets API only works in 25.04 and onwards.
+ """
+ def __init__(self, host, username, password, replication_task_names = None, *args, **kwargs):
+ super().__init__(uri = "ws://%s/api/current" % host, *args, **kwargs)
+ self.host = host
+ self.username = username
+ self.password = password
+
+ if replication_task_names is None:
+ self.replication_task_names = []
+ else:
+ self.replication_task_names = replication_task_names
+
+ def __enter__(self):
+ o = super().__enter__()
+ # We are forced to use username/password instead of API keys if we're using self-certified certificates
+ auth = self.call("auth.login", self.username, self.password)
+ return o
+
+ def __exit__(self, *args, **kwargs):
+ super().__exit__(*args, **kwargs)
+ # logging.info("%s Websocket disconnected" % self.host)
+
+ def _get_job_serialized_name(self, job_type):
+ return os.path.join(os.path.dirname(__file__), ".%s_%s_jobs.pickle" % (self.host, job_type))
+
+ def _get_serialized_jobs(self, job_type):
+ if os.path.exists(self._get_job_serialized_name(job_type)):
+ with open(self._get_job_serialized_name(job_type), "rb") as f:
+ return pickle.load(f)
+ else:
+ return {}
+
+ def _set_serialized_jobs(self, jobs, job_name):
+ with open(self._get_job_serialized_name(job_name), "wb") as f:
+ pickle.dump(jobs, f)
+
+ def get_replication_tasks(self):
+ return list(filter(lambda a: a["name"] in self.replication_task_names, self.call("replication.query")))
+
+ def run_replication_task(self, task_id):
+ return self.call("replication.run", task_id)
+
+ @staticmethod
+ def is_ready(host, username, password, *args, **kwargs):
+ try:
+ with truenas_api_client.JSONRPCClient(uri = "ws://%s/api/current" % host, *args, **kwargs) as c:
+ c.call("auth.login", username, password)
+ return c.call("system.ready")
+ except OSError:
+ raise ConnectionError("No route to host")
+
+ def shutdown(self):
+ return self.call("system.shutdown", "Automatic autoBackup shutdown")
+
+ def run_all_replication_tasks(self):
+ running_replication_jobs = self._get_serialized_jobs("replication")
+
+ for task in self.get_replication_tasks():
+ job_id = self.run_replication_task(task["id"])
+ running_replication_jobs[job_id] = task["name"]
+ logging.info("Started replication task '%s' on '%s' with job id %d" % (task["name"], self.host, job_id))
+
+ self._set_serialized_jobs(running_replication_jobs, "replication")
+
+ def scrub_pools(self, pools):
+ running_jobs = self._get_serialized_jobs("scrub")
+
+ for pool_name in pools:
+ job_id = self.call("pool.scrub.scrub", pool_name)
+ running_jobs[job_id] = pool_name
+ logging.info("Started scrub job on pool '%s' on host '%s' with job id %d" % (pool_name, self.host, job_id))
+
+ self._set_serialized_jobs(running_jobs, "scrub")
+
+ def get_jobs(self):
+ return self.call("core.get_jobs")
+
+ def get_state_of_replication_jobs(self):
+ return self.get_state_of_jobs("replication")
+
+ def get_state_of_jobs(self, job_type):
+ running_jobs = self._get_serialized_jobs(job_type)
+ all_complete = True
+ for job in self.get_jobs():
+ if job["id"] in running_jobs.keys():
+ if job["state"] == "RUNNING":
+ all_complete = False
+ logging.info("%s job '%s' on '%s' is currently '%s' (%d%%)" % (
+ job_type, running_jobs[job["id"]], self.host, job["state"], job["progress"]["percent"]
+ ))
+
+ if all_complete:
+ if os.path.exists(self._get_job_serialized_name(job_type)):
+ os.remove(self._get_job_serialized_name(job_type))
+ logging.info("All %s jobs on '%s' completed" % (job_type, self.host))
+ else:
+ logging.info("There were no %s jobs on '%s'. Perhaps they already all finished." % (job_type, self.host))
+ return all_complete
+
class TrueNASAPIClient:
+ """Class for the REST HTTP API, which sadly will be removed soon :c
+ """
def __init__(self, host, api_key, replication_task_names = None):
self.host = host
self.base_url = "http://%s/api/v2.0" % host
@@ -39,7 +160,7 @@ class TrueNASAPIClient:
@staticmethod
def filter_running_jobs(jobs):
return list(filter(
- lambda i: i["method"] == "replication.run" and i["progress"]["percent"] != 100 and not i["state"] == "FAILED",
+ lambda i: i["progress"]["percent"] != 100 and not i["state"] == "FAILED",
jobs
))
@@ -61,9 +182,6 @@ class TrueNASAPIClient:
def get_jobs(self):
return self.base_get("/core/get_jobs")
- def get_running_replication_jobs(self):
- return [i for i in self.get_jobs() if i["method"] == "replication.run" and i["progress"]["percent"] != 100 and not i["state"] == "FAILED"]
-
def get_replication_tasks(self):
return list(filter(lambda a: a["name"] in self.replication_task_names, self.base_get("/replication")))
@@ -77,7 +195,7 @@ class TrueNASAPIClient:
return self.base_get("/system/ready")
def shutdown(self):
- req = requests.post(self.base_url + "/system/shutdown", headers = self.headers)
+ req = requests.post(self.base_url + "/system/shutdown", headers = self.headers, json = {"reason": "Automatic autoBackup shutdown"})
if not req.status_code == 200:
raise ConnectionError("API call failed (%d): '%s'" % (req.status_code, req.content.decode()))
return req.json()
@@ -104,7 +222,6 @@ class TrueNASAPIClient:
return all_complete
def check_if_all_complete(truenasclients):
- logging.info("Slave plug '%s' is using %dw of power" % (os.environ["SLAVE_PLUG_FRIENDLYNAME"], get_mqtt().switch_energy['Power']))
all_complete = True
for truenas in truenasclients:
if not truenas.get_state_of_replication_jobs():
@@ -117,20 +234,49 @@ def get_mqtt(message = None):
username = os.environ["MQTT_USER"],
password = os.environ["MQTT_PASSWORD"],
friendlyname = os.environ["SLAVE_PLUG_FRIENDLYNAME"],
+ verbose = False,
message = message
)
def wait_for_slave(slave):
+ """Wait for a TrueNAS REST HTTP Client to be ready
+
+ Args:
+ slave (TrueNASAPIClient): A TrueNAS REST client
+ """
while True:
time.sleep(int(os.environ["POLLING_RATE"]))
try:
- logging.info("Slave is ready: " + str(slave.is_ready()))
- except requests.exceptions.ConnectionError:
+ ready = slave.is_ready()
+ logging.info("Slave is ready: " + str(ready))
+ if not ready:
+ continue
+ except (requests.exceptions.ConnectionError, truenas_api_client.exc.ClientException):
logging.info("'%s' hasn't booted, waiting for %d more seconds" % (slave.host, int(os.environ["POLLING_RATE"])))
else:
break
logging.info("Slave TrueNAS has booted and is ready for API requests")
+def wait_for_sockets_slave():
+ """Wait for the slave's websockets API to be ready
+ """
+ while True:
+ time.sleep(int(os.environ["POLLING_RATE"]))
+ try:
+ ready = TrueNASWebsocketsClient.is_ready(
+ host = os.environ["SLAVE_HOST"],
+ username = os.environ["SLAVE_USERNAME"],
+ password = os.environ["SLAVE_PASSWORD"]
+ )
+ logging.info("Slave is ready: " + str(ready))
+ if not ready:
+ continue
+ except ConnectionError:
+ logging.info("'%s' hasn't booted, waiting for %d more seconds" % (os.environ["SLAVE_HOST"], int(os.environ["POLLING_RATE"])))
+ else:
+ break
+ logging.info("Slave TrueNAS has booted and is ready for API requests")
+
def wait_till_idle_power():
while True:
p = get_mqtt().switch_energy['Power']
@@ -139,26 +285,19 @@ def wait_till_idle_power():
break
def main():
+ start_time = datetime.datetime.now()
+ subprocess.run(["rm", "-f", os.path.join(os.path.dirname(__file__), "*_replication_jobs.pickle")])
+
if os.environ["MASTER_REPLICATION_TASKS"] != "":
- tasks = os.environ["MASTER_REPLICATION_TASKS"].split(",")
+ master_tasks = os.environ["MASTER_REPLICATION_TASKS"].split(",")
else:
- tasks = []
- master = TrueNASAPIClient(
- host = os.environ["MASTER_HOST"],
- api_key = os.environ["MASTER_KEY"],
- replication_task_names = tasks
- )
+ master_tasks = []
if os.environ["SLAVE_REPLICATION_TASKS"] != "":
- tasks = os.environ["SLAVE_REPLICATION_TASKS"].split(",")
+ slave_tasks = os.environ["SLAVE_REPLICATION_TASKS"].split(",")
else:
- tasks = []
- slave = TrueNASAPIClient(
- host = os.environ["SLAVE_HOST"],
- api_key = os.environ["SLAVE_KEY"],
- replication_task_names = tasks
- )
+ slave_tasks = []
- logging.info("Began autoBackup procedure")
+ logging.info("\n\nBegan autoBackup procedure")
m = get_mqtt()
logging.info("Slave plug '%s' is currently %s" % (m.friendlyname, m.switch_power))
if m.switch_power == "ON":
@@ -167,20 +306,57 @@ def main():
was_already_on = False
get_mqtt("ON")
logging.info("Turned on the slave plug. Now waiting for it to boot")
- wait_for_slave(slave)
+ # wait_for_slave(slave)
+ wait_for_sockets_slave()
+
+ with (TrueNASWebsocketsClient(
+ host = os.environ["SLAVE_HOST"],
+ username = os.environ["SLAVE_USERNAME"],
+ password = os.environ["SLAVE_PASSWORD"],
+ replication_task_names = slave_tasks
+ ) as slave, TrueNASWebsocketsClient(
+ host = os.environ["MASTER_HOST"],
+ username = os.environ["MASTER_USERNAME"],
+ password = os.environ["MASTER_PASSWORD"],
+ replication_task_names = master_tasks
+ ) as master
+ ):
+ master.run_all_replication_tasks()
+ slave.run_all_replication_tasks()
+
+ while True:
+ with (TrueNASWebsocketsClient(
+ host = os.environ["SLAVE_HOST"],
+ username = os.environ["SLAVE_USERNAME"],
+ password = os.environ["SLAVE_PASSWORD"],
+ replication_task_names = slave_tasks
+ ) as slave, TrueNASWebsocketsClient(
+ host = os.environ["MASTER_HOST"],
+ username = os.environ["MASTER_USERNAME"],
+ password = os.environ["MASTER_PASSWORD"],
+ replication_task_names = master_tasks
+ ) as master
+ ):
+ if check_if_all_complete([master, slave]):
+ break
- master.run_all_replication_tasks()
- slave.run_all_replication_tasks()
- # while (not master.get_state_of_replication_jobs()) or (not slave.get_state_of_replication_jobs()):
- while not check_if_all_complete([master, slave]):
+ logging.info("Slave plug '%s' is using %dw of power" % (os.environ["SLAVE_PLUG_FRIENDLYNAME"], get_mqtt().switch_energy['Power']))
time.sleep(int(os.environ["POLLING_RATE"]))
+
logging.info("All replication jobs on all hosts complete")
if was_already_on:
logging.info("The slave TrueNAS was turned on not by us, so stopping here")
else:
logging.info("The slave TrueNAS was turned on by us, so starting the shutdown procedure")
- logging.info(json.dumps(slave.shutdown(), indent = 4))
+ with TrueNASWebsocketsClient(
+ host = os.environ["SLAVE_HOST"],
+ username = os.environ["SLAVE_USERNAME"],
+ password = os.environ["SLAVE_PASSWORD"],
+ replication_task_names = slave_tasks
+ ) as slave:
+ slave.shutdown()
+ # logging.info(json.dumps(slave.shutdown(), indent = 4))
# wait until the slave TrueNAS is using 0w of power, which implies it has finished shutting down,
# then turn off the power to it
@@ -188,8 +364,10 @@ def main():
get_mqtt("OFF")
logging.info("Turned off the slave's plug")
- logging.info("autoBackup procedure completed\n\n")
+ logging.info("autoBackup backup procedure completed. Took %s" % str(datetime.datetime.now() - start_time))
if __name__ == "__main__":
main()
+
+
diff --git a/autoBackup/autoScrub.py b/autoBackup/autoScrub.py
new file mode 100644
index 0000000..c34f5aa
--- /dev/null
+++ b/autoBackup/autoScrub.py
@@ -0,0 +1,71 @@
+import subprocess
+from autoBackup import TrueNASWebsocketsClient, get_mqtt, wait_for_sockets_slave, wait_till_idle_power
+import datetime
+import logging
+import time
+import json
+import os
+
+def main():
+ start_time = datetime.datetime.now()
+ subprocess.run(["rm", "-f", os.path.join(os.path.dirname(__file__), "*_scrub_jobs.pickle")])
+
+ if os.environ["SLAVE_SCRUB_POOLS"] != "":
+ scrub_pools = os.environ["SLAVE_SCRUB_POOLS"].split(",")
+ else:
+ scrub_pools = []
+
+ logging.info("Began autoScrub scrub procedure")
+ m = get_mqtt()
+ logging.info("Slave plug '%s' is currently %s" % (m.friendlyname, m.switch_power))
+ if m.switch_power == "ON":
+ was_already_on = True
+ else:
+ was_already_on = False
+ get_mqtt("ON")
+ logging.info("Turned on the slave plug. Now waiting for it to boot")
+ # wait_for_slave(slave)
+ wait_for_sockets_slave()
+
+ with TrueNASWebsocketsClient(
+ host = os.environ["SLAVE_HOST"],
+ username = os.environ["SLAVE_USERNAME"],
+ password = os.environ["SLAVE_PASSWORD"]
+ ) as slave:
+ slave.scrub_pools(scrub_pools)
+
+ while True:
+ with TrueNASWebsocketsClient(
+ host = os.environ["SLAVE_HOST"],
+ username = os.environ["SLAVE_USERNAME"],
+ password = os.environ["SLAVE_PASSWORD"]
+ ) as slave:
+ if slave.get_state_of_jobs("scrub"):
+ break
+
+ logging.info("Slave plug '%s' is using %dw of power" % (os.environ["SLAVE_PLUG_FRIENDLYNAME"], get_mqtt().switch_energy['Power']))
+ time.sleep(int(os.environ["POLLING_RATE"]))
+
+ logging.info("All scrub jobs on all hosts complete")
+
+ if was_already_on:
+ logging.info("The slave TrueNAS was turned on not by us, so stopping here")
+ else:
+ logging.info("The slave TrueNAS was turned on by us, so starting the shutdown procedure")
+ with TrueNASWebsocketsClient(
+ host = os.environ["SLAVE_HOST"],
+ username = os.environ["SLAVE_USERNAME"],
+ password = os.environ["SLAVE_PASSWORD"],
+ ) as slave:
+ logging.info(json.dumps(slave.shutdown(), indent = 4))
+
+ # wait until the slave TrueNAS is using 0w of power, which implies it has finished shutting down,
+ # then turn off the power to it
+ wait_till_idle_power()
+ get_mqtt("OFF")
+ logging.info("Turned off the slave's plug")
+
+ logging.info("autoScrub scrub procedure completed. Took %s\n\n" % str(datetime.datetime.now() - start_time))
+
+if __name__ == "__main__":
+ main() \ No newline at end of file
diff --git a/autoBackup/docker-compose.yml b/autoBackup/docker-compose.yml
index f23ecb8..71ec0a0 100644
--- a/autoBackup/docker-compose.yml
+++ b/autoBackup/docker-compose.yml
@@ -1,14 +1,12 @@
-version: "3"
-
services:
- autoReplicate:
- container_name: auto_replicate
+ autoBackup:
+ container_name: auto_backup
volumes:
- ./logs/:/app/logs
env_file:
- ./.env
restart: unless-stopped
- image: reg.reaweb.uk/auto_replicate
+ image: reg.reaweb.uk/auto_backup
build:
context: .
dockerfile: Dockerfile
diff --git a/autoBackup/requirements.txt b/autoBackup/requirements.txt
index 2e7e54b..14abc09 100644
--- a/autoBackup/requirements.txt
+++ b/autoBackup/requirements.txt
@@ -1,3 +1,4 @@
python-dotenv
requests
docker
+websocket-client>1.3.2 \ No newline at end of file
diff --git a/do_replicate.sh b/do_replicate.sh
index 19560dc..19560dc 100644..100755
--- a/do_replicate.sh
+++ b/do_replicate.sh