Repository: perrymanuk/hashi-homelab
Branch: master
Commit: c1705ab65354
Files: 180
Total size: 450.4 KB
Directory structure:
gitextract_ikark14b/
├── .bootstrap.mk
├── .gitattributes
├── .github/
│ └── workflows/
│ ├── build-gcp-dns-updater.yaml
│ ├── nomad.yaml
│ ├── update-kideo.yaml
│ ├── update-minecraftmath.yaml
│ ├── update-radbot-dev.yaml
│ └── update-radbot.yaml
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── ansible/
│ ├── configs/
│ │ ├── consul.hcl.j2
│ │ ├── consul.service
│ │ ├── docker-daemon.json.j2
│ │ ├── nomad.hcl.j2
│ │ └── nomad.service
│ ├── playbook.yml
│ └── zsh.yml
├── docker_images/
│ ├── gcp-dns-updater/
│ │ ├── Dockerfile
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ └── update_dns.py
│ └── update-metadata/
│ ├── Dockerfile
│ ├── README.md
│ ├── requirements.txt
│ └── update_job_metadata.py
├── envrc
├── nomad_jobs/
│ ├── TEMPLATE-volume.hcl
│ ├── TEMPLATE.job
│ ├── ai-ml/
│ │ ├── cognee/
│ │ │ └── nomad.job
│ │ ├── crawl4ai/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── litellm/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── manyfold/
│ │ │ ├── 3dprints-volume.hcl
│ │ │ ├── nomad.job
│ │ │ ├── prints_volume.hcl
│ │ │ └── volume.hcl
│ │ ├── ollama/
│ │ │ └── nomad.job
│ │ ├── open-webui/
│ │ │ └── nomad.job
│ │ ├── paperless-ai/
│ │ │ └── nomad.job
│ │ ├── pgvector-client/
│ │ │ └── nomad.job
│ │ └── radbot/
│ │ ├── nomad-dev.job
│ │ └── nomad.job
│ ├── core-infra/
│ │ ├── coredns/
│ │ │ ├── README.md
│ │ │ └── nomad.job
│ │ ├── github-runner/
│ │ │ └── nomad.job
│ │ ├── haproxy/
│ │ │ └── nomad.job
│ │ ├── iscsi-csi-plugin/
│ │ │ ├── controller.job
│ │ │ └── node.job
│ │ ├── keepalived/
│ │ │ ├── TODO.md
│ │ │ └── nomad.job
│ │ ├── nfs-csi-plugin/
│ │ │ ├── controller.job
│ │ │ └── nodes.job
│ │ ├── pihole/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── smtp/
│ │ │ └── nomad.job
│ │ ├── tailscale/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── tailscale-este/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── traefik/
│ │ │ ├── config/
│ │ │ │ ├── consul-catalog.yml
│ │ │ │ ├── consul.yml
│ │ │ │ ├── traefik.toml
│ │ │ │ ├── traefik.toml.new
│ │ │ │ └── traefik.toml.test
│ │ │ └── nomad.job
│ │ ├── traefik-forward-auth/
│ │ │ └── nomad.job
│ │ └── vault/
│ │ └── secrets_template.yaml
│ ├── gaming/
│ │ ├── minecraft-1.21/
│ │ │ └── nomad.job
│ │ ├── minecraft-avaritia/
│ │ │ └── nomad.job
│ │ ├── minecraft-axiom/
│ │ │ └── nomad.job
│ │ ├── minecraft-fiskheroes/
│ │ │ └── nomad.job
│ │ └── minecraft-forge/
│ │ └── nomad.job
│ ├── media-stack/
│ │ ├── audioserve/
│ │ │ └── nomad.job
│ │ ├── flaresolverr/
│ │ │ └── nomad.job
│ │ ├── jackett/
│ │ │ └── nomad.job
│ │ ├── lazylibrarian/
│ │ │ └── nomad.job
│ │ ├── lidarr/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── lidify/
│ │ │ └── nomad.job
│ │ ├── maintainerr/
│ │ │ └── nomad.job
│ │ ├── mediasage/
│ │ │ └── nomad.job
│ │ ├── multi-scrobbler/
│ │ │ └── nomad.job
│ │ ├── navidrome/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── ombi/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── overseerr/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── plex/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── prowlarr/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── qbittorrent/
│ │ │ └── nomad.job
│ │ ├── radarr/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── requestrr/
│ │ │ └── nomad.job
│ │ ├── sabnzbd/
│ │ │ └── nomad.job
│ │ ├── sickchill/
│ │ │ └── nomad.job
│ │ ├── sonarr/
│ │ │ └── nomad.job
│ │ ├── synclounge/
│ │ │ └── nomad.job
│ │ ├── tautulli/
│ │ │ └── nomad.job
│ │ └── tdarr/
│ │ ├── nomad.job
│ │ └── volume.hcl
│ ├── misc/
│ │ ├── adb/
│ │ │ └── nomad.job
│ │ ├── gcp-dns-updater/
│ │ │ ├── Dockerfile
│ │ │ ├── README.md
│ │ │ ├── nomad.job
│ │ │ ├── requirements.txt
│ │ │ └── update_dns.py
│ │ ├── gitea/
│ │ │ └── nomad.job
│ │ ├── linuxgsm/
│ │ │ └── nomad.job
│ │ ├── murmur/
│ │ │ └── nomad.job
│ │ ├── octoprint/
│ │ │ └── nomad.job
│ │ └── uploader/
│ │ └── nomad.job
│ ├── observability/
│ │ ├── alertmanager/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── blackbox-exporter/
│ │ │ └── nomad.job
│ │ ├── grafana/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── loki/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── oom-test/
│ │ │ └── nomad.job
│ │ ├── prometheus/
│ │ │ ├── README.md
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── telegraf/
│ │ │ └── nomad.job
│ │ ├── truenas-graphite-exporter/
│ │ │ └── nomad.job
│ │ └── vector/
│ │ └── nomad.job
│ ├── personal-cloud/
│ │ ├── actualbudget/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── bitwarden/
│ │ │ └── nomad.job
│ │ ├── nextcloud/
│ │ │ └── nomad.job
│ │ ├── ntfy/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── paperless/
│ │ │ └── nomad.job
│ │ └── radicale/
│ │ └── nomad.job
│ ├── security/
│ │ ├── suricata/
│ │ │ └── nomad.job
│ │ ├── suricata-update/
│ │ │ └── nomad.job
│ │ ├── wazuh-agent/
│ │ │ └── nomad.job
│ │ └── wazuh-server/
│ │ ├── nomad.job
│ │ ├── volume-dashboard.hcl
│ │ ├── volume-indexer.hcl
│ │ └── volume-manager.hcl
│ ├── smart-home/
│ │ ├── deconz/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── home-assistant/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── mqtt/
│ │ │ └── nomad.job
│ │ ├── owntracks-recorder/
│ │ │ └── nomad.job
│ │ └── zigbee2mqtt/
│ │ └── nomad.job
│ ├── storage-backends/
│ │ ├── docker-registry/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── mariadb/
│ │ │ └── nomad.job
│ │ ├── neo4j/
│ │ │ ├── nomad.job
│ │ │ ├── setup.job
│ │ │ └── volume.hcl
│ │ ├── pgvector/
│ │ │ ├── nomad.job
│ │ │ └── pgvector-setup.job
│ │ ├── postgres/
│ │ │ ├── nomad.job
│ │ │ └── postgres-setup.job
│ │ ├── qdrant/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ ├── redis/
│ │ │ ├── nomad.job
│ │ │ └── volume.hcl
│ │ └── volumes/
│ │ └── nfs-example.hcl
│ ├── system/
│ │ └── docker-cleanup/
│ │ └── nomad.job
│ └── web-apps/
│ ├── alertmanager-dashboard/
│ │ └── nomad.job
│ ├── firecrawl/
│ │ └── nomad.job
│ ├── heimdall/
│ │ └── nomad.job
│ ├── homepage/
│ │ └── nomad.job
│ ├── kideo/
│ │ └── nomad.job
│ ├── minecraftmath/
│ │ └── nomad.job
│ ├── wordpress/
│ │ └── nomad.job
│ └── www/
│ ├── Dockerfile
│ └── nomad.job
├── renovate.json
└── services/
└── beefcake.json
================================================
FILE CONTENTS
================================================
================================================
FILE: .bootstrap.mk
================================================
export VERSION_TAG=$(shell git rev-parse --short HEAD)
export JOB_NAME=$(shell basename $PWD)
dash-split = $(word $2,$(subst -, ,$1))
dash-1 = $(call dash-split,$*,1)
dash-2 = $(call dash-split,$*,2)
help:##............Show this help.
@echo ""
@fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##//' | sed 's/^/ /'
@echo ""
@echo ""
================================================
FILE: .gitattributes
================================================
*.job linguist-language=HCL
================================================
FILE: .github/workflows/build-gcp-dns-updater.yaml
================================================
# .github/workflows/build-gcp-dns-updater.yaml
name: Build GCP DNS Updater Image
on:
push:
branches:
- main
paths:
- 'docker_images/gcp-dns-updater/**'
workflow_dispatch:
jobs:
build-and-push:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write # Required for pushing to GitHub Packages if used, good practice anyway
steps:
- name: Checkout Code
uses: actions/checkout@v6
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4
- name: Login to Docker Registry
uses: docker/login-action@v4
with:
registry: docker.${{ secrets.NOMAD_VAR_tld }}
username: ${{ secrets.DOCKER_REGISTRY_USER }}
password: ${{ secrets.DOCKER_REGISTRY_PASSWORD }}
- name: Build Image using Makefile
env:
NOMAD_VAR_tld: ${{ secrets.NOMAD_VAR_tld }}
run: make build-gcp-dns-updater
- name: Push Image
run: docker push docker.${{ secrets.NOMAD_VAR_tld }}/gcp-dns-updater:latest
================================================
FILE: .github/workflows/nomad.yaml
================================================
on:
push:
branches:
- master
jobs:
# JOB to run change detection
changes:
runs-on: ubuntu-latest
permissions:
pull-requests: read
outputs:
jobs: ${{ steps.filter.outputs.nomadjobs_files }}
volumes: ${{ steps.filter_volumes.outputs.volumes_files }}
steps:
- name: 'Checkout'
uses: 'actions/checkout@v6'
- uses: dorny/paths-filter@v4
id: filter_volumes
with:
list-files: 'json'
filters: |
volumes:
- 'nomad_jobs/**/volume.hcl'
- 'nomad_jobs/**/*-volume.hcl'
- uses: dorny/paths-filter@v4
id: filter
with:
list-files: 'json'
filters: |
nomadjobs:
# Updated paths based on directory restructure
- 'nomad_jobs/media-stack/plex/*.job'
- 'nomad_jobs/media-stack/radarr/*.job'
- 'nomad_jobs/media-stack/lidarr/*.job'
- 'nomad_jobs/media-stack/overseerr/*.job'
- 'nomad_jobs/storage-backends/postgres/*.job'
- 'nomad_jobs/storage-backends/redis/*.job'
- 'nomad_jobs/storage-backends/pgvector/*.job'
- 'nomad_jobs/core-infra/coredns/*.job'
- 'nomad_jobs/storage-backends/iscsi-csi-plugin/*.job'
- 'nomad_jobs/media-stack/sabnzbd/*.job'
- 'nomad_jobs/media-stack/qbittorrent/*.job'
- 'nomad_jobs/media-stack/prowlarr/*.job'
- 'nomad_jobs/media-stack/tdarr/*.job'
- 'nomad_jobs/core-infra/smtp/*.job'
- 'nomad_jobs/ai-ml/ollama/*.job'
- 'nomad_jobs/ai-ml/open-webui/*.job'
- 'nomad_jobs/misc/gcp-dns-updater/*.job'
- 'nomad_jobs/core-infra/tailscale-este/*.job'
- 'nomad_jobs/core-infra/traefik/*.job'
- 'nomad_jobs/core-infra/iscsi-csi-plugin/*.job'
- 'nomad_jobs/observability/alertmanager/*.job'
- 'nomad_jobs/observability/prometheus/*.job'
- 'nomad_jobs/ai-ml/radbot/*.job'
- 'nomad_jobs/personal-cloud/ntfy/*.job'
- 'nomad_jobs/web-apps/homepage/*.job'
- 'nomad_jobs/media-stack/multi-scrobbler/*.job'
- 'nomad_jobs/media-stack/lidify/*.job'
- 'nomad_jobs/media-stack/mediasage/*.job'
- 'nomad_jobs/core-infra/netboot-xyz/*.job'
- 'nomad_jobs/web-apps/kideo/*.job'
- 'nomad_jobs/web-apps/minecraftmath/*.job'
add_volumes:
runs-on: ubuntu-latest
needs: changes
if: needs.changes.outputs.volumes != '[]'
continue-on-error: true
strategy:
matrix:
job: ${{ fromJSON(needs.changes.outputs.volumes ) }}
steps:
- name: 'Checkout'
uses: 'actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd' # v6
- name: Connect to Tailscale
uses: tailscale/github-action@v4
with:
oauth-client-id: ${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }}
oauth-secret: ${{ secrets.TAILSCALE_OAUTH_SECRET }}
tags: tag:github-actions
args: --accept-dns=true
- name: Setup Nomad
uses: hashicorp/setup-nomad@v1.0.0
with:
version: "1.10.4"
- name: deploy
shell: bash
run: |
# Extract volume ID from the HCL file
VOLUME_ID=$(grep '^id' ${{ matrix.job }} | head -1 | sed 's/.*= *"\(.*\)"/\1/')
# Skip if volume already exists
if nomad volume status "$VOLUME_ID" > /dev/null 2>&1; then
echo "Volume '$VOLUME_ID' already exists, skipping creation"
else
echo "Creating volume '$VOLUME_ID'"
nomad volume create ${{ matrix.job }}
fi
env:
NOMAD_ADDR: '${{ secrets.NOMAD_ADDR }}'
deploy_jobs:
runs-on: ubuntu-latest
needs: changes
if: needs.changes.outputs.jobs != '[]'
continue-on-error: true
strategy:
matrix:
job: ${{ fromJSON(needs.changes.outputs.jobs ) }}
steps:
- name: 'Checkout'
uses: 'actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd' # v6
- name: Connect to Tailscale
uses: tailscale/github-action@v4
with:
oauth-client-id: ${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }}
oauth-secret: ${{ secrets.TAILSCALE_OAUTH_SECRET }}
tags: tag:github-actions
args: --accept-dns=true
- name: Setup Nomad
uses: hashicorp/setup-nomad@v1.0.0
with:
version: "1.10.4"
- name: deploy
shell: bash
run: |
nomad job run ${{ matrix.job }} # Removed -var flags
env:
NOMAD_ADDR: '${{ secrets.NOMAD_ADDR }}'
NOMAD_VAR_region: 'home'
NOMAD_VAR_tld: '${{ secrets.NOMAD_VAR_tld }}' # Corrected case
NOMAD_VAR_shared_dir: '/home/shared/'
NOMAD_VAR_downloads_dir: '/home/sabnzbd/downloads'
NOMAD_VAR_music_dir: '/home/media/Music'
NOMAD_VAR_movies_dir: '/home/media/Movies'
NOMAD_VAR_books_dir: '/home/media/Books'
NOMAD_VAR_tv_dir: '/home/media/TV'
NOMAD_VAR_media_dir: '/home/media'
NOMAD_VAR_hass_key: '${{ secrets.NOMAD_VAR_hass_key }}' # Corrected case
NOMAD_VAR_hass_ip: '${{ secrets.NOMAD_VAR_hass_ip }}'
NOMAD_VAR_github_pat: ${{ secrets.NOMAD_VAR_github_pat }} # Corrected case
NOMAD_VAR_datacenters_all: '["dc1", "public"]'
NOMAD_VAR_datacenters_dc1: '["dc1"]'
NOMAD_VAR_datacenters_public: '["public"]'
NOMAD_VAR_tailscale_auth: '${{ secrets.NOMAD_VAR_tailscale_auth }}' # Corrected case
NOMAD_VAR_tailscale_auth_este: '${{ secrets.NOMAD_VAR_tailscale_auth_este }}' # Corrected case
NOMAD_VAR_oauth_client_id: '${{ secrets.NOMAD_VAR_oauth_client_id }}' # Corrected case
NOMAD_VAR_oauth_client_secret: '${{ secrets.NOMAD_VAR_oauth_client_secret }}' # Corrected case
NOMAD_VAR_oauth_secret: '${{ secrets.NOMAD_VAR_oauth_secret }}' # Corrected case
NOMAD_VAR_oauth_emails: '${{ secrets.NOMAD_VAR_oauth_emails }}' # Corrected case
NOMAD_VAR_ssh_id: '${{ secrets.NOMAD_VAR_ssh_id }}' # Corrected case
NOMAD_VAR_truenas_api_key: '${{ secrets.NOMAD_VAR_truenas_api_key }}' # Corrected case
NOMAD_VAR_gh_access_token: '${{ secrets.NOMAD_VAR_gh_access_token }}' # Corrected case
NOMAD_VAR_ollama_data_dir: '/home/shared/ollama'
NOMAD_VAR_ollama_base_url: 'http://ollama.service.consul:11434'
NOMAD_VAR_webui_secret_key: '${{ secrets.NOMAD_VAR_webui_secret_key }}' # Corrected case
NOMAD_VAR_datacenter: 'dc1'
NOMAD_VAR_dns_server_ip: '192.168.50.2'
# Added missing variables
NOMAD_VAR_aws_access_key: ${{ secrets.NOMAD_VAR_aws_access_key }}
NOMAD_VAR_aws_secret_key: ${{ secrets.NOMAD_VAR_aws_secret_key }}
NOMAD_VAR_bedrock_aws_region: ${{ secrets.NOMAD_VAR_bedrock_aws_region }}
NOMAD_VAR_gcp_dns_admin: ${{ secrets.NOMAD_VAR_gcp_dns_admin }}
NOMAD_VAR_gemini_api_key: ${{ secrets.NOMAD_VAR_gemini_api_key }}
NOMAD_VAR_litellm_master_key: ${{ secrets.NOMAD_VAR_litellm_master_key }}
NOMAD_VAR_manyfold_secret_key: ${{ secrets.NOMAD_VAR_manyfold_secret_key }}
NOMAD_VAR_postgres_pass: ${{ secrets.NOMAD_VAR_postgres_pass }}
NOMAD_VAR_truenas_iscsi_pass: ${{ secrets.NOMAD_VAR_truenas_iscsi_pass }}
# Added gcp_project_id
NOMAD_VAR_gcp_project_id: ${{ secrets.NOMAD_VAR_gcp_project_id }}
# GitHub PAT is now stored securely in secrets
NOMAD_VAR_truenass_iscsi_pass: ${{ secrets.NOMAD_VAR_truenass_iscsi_pass }} # Note potential typo in name
NOMAD_VAR_dns_zone: ${{ secrets.NOMAD_VAR_dns_zone }}
NOMAD_VAR_ingress_ip: ${{ secrets.NOMAD_VAR_ingress_ip }}
NOMAD_VAR_radbot_credential_key: ${{ secrets.NOMAD_VAR_radbot_credential_key }}
NOMAD_VAR_radbot_admin_token: ${{ secrets.NOMAD_VAR_radbot_admin_token }}
NOMAD_VAR_radbot_mcp_token: ${{ secrets.NOMAD_VAR_radbot_mcp_token }}
NOMAD_VAR_mullvad_wireguard_key: ${{ secrets.NOMAD_VAR_mullvad_wireguard_key }}
NOMAD_VAR_mullvad_wireguard_addr: ${{ secrets.NOMAD_VAR_mullvad_wireguard_addr }}
NOMAD_VAR_sonarr_api_key: ${{ secrets.NOMAD_VAR_sonarr_api_key }}
NOMAD_VAR_radarr_api_key: ${{ secrets.NOMAD_VAR_radarr_api_key }}
NOMAD_VAR_curseforge_api_key: ${{ secrets.NOMAD_VAR_curseforge_api_key }}
NOMAD_VAR_pgvector_pass: ${{ secrets.NOMAD_VAR_pgvector_pass }}
NOMAD_VAR_pgvector_admin_password: ${{ secrets.NOMAD_VAR_pgvector_admin_password }}
NOMAD_VAR_postgres_admin_password: ${{ secrets.NOMAD_VAR_postgres_admin_password }}
NOMAD_VAR_litellm_crawl4ai_key: ${{ secrets.NOMAD_VAR_litellm_crawl4ai_key }}
NOMAD_VAR_litellm_salt_key: ${{ secrets.NOMAD_VAR_litellm_salt_key }}
NOMAD_VAR_wazuh_api_password: ${{ secrets.NOMAD_VAR_wazuh_api_password }}
NOMAD_VAR_wazuh_dashboard_password: ${{ secrets.NOMAD_VAR_wazuh_dashboard_password }}
NOMAD_VAR_wazuh_indexer_password: ${{ secrets.NOMAD_VAR_wazuh_indexer_password }}
NOMAD_VAR_otr_pass: ${{ secrets.NOMAD_VAR_otr_pass }}
NOMAD_VAR_plex_token: ${{ secrets.NOMAD_VAR_plex_token }}
NOMAD_VAR_listenbrainz_token: ${{ secrets.NOMAD_VAR_listenbrainz_token }}
NOMAD_VAR_listenbrainz_username: ${{ secrets.NOMAD_VAR_listenbrainz_username }}
NOMAD_VAR_lastfm_api_key: ${{ secrets.NOMAD_VAR_lastfm_api_key }}
NOMAD_VAR_lastfm_api_secret: ${{ secrets.NOMAD_VAR_lastfm_api_secret }}
NOMAD_VAR_lidarr_api_key: ${{ secrets.NOMAD_VAR_lidarr_api_key }}
NOMAD_VAR_kideo_jwt_secret: ${{ secrets.NOMAD_VAR_kideo_jwt_secret }}
NOMAD_VAR_kideo_youtube_cookies: ${{ secrets.NOMAD_VAR_kideo_youtube_cookies }}
NOMAD_VAR_kideo_curiositystream_user: ${{ secrets.NOMAD_VAR_kideo_curiositystream_user }}
NOMAD_VAR_kideo_curiositystream_pass: ${{ secrets.NOMAD_VAR_kideo_curiositystream_pass }}
NOMAD_VAR_minecraftmath_jwt_secret: ${{ secrets.NOMAD_VAR_minecraftmath_jwt_secret }}
================================================
FILE: .github/workflows/update-kideo.yaml
================================================
name: Update kideo image tag
on:
repository_dispatch:
types: [update-kideo]
jobs:
update-and-deploy:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6
with:
token: ${{ secrets.ACTIONS_PAT }}
- name: Update image tag in Nomad job
run: |
TAG="${{ github.event.client_payload.tag }}"
sed -i "s|ghcr.io/perrymanuk/kideo:[^ \"]*|ghcr.io/perrymanuk/kideo:${TAG}|" \
nomad_jobs/web-apps/kideo/nomad.job
echo "Updated kideo image tag to ${TAG}"
- name: Commit and push
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
TAG="${{ github.event.client_payload.tag }}"
git add nomad_jobs/web-apps/kideo/nomad.job
git commit -m "chore: bump kideo to ${TAG}"
git push
================================================
FILE: .github/workflows/update-minecraftmath.yaml
================================================
name: Update minecraftmath image tag
on:
repository_dispatch:
types: [update-minecraftmath]
jobs:
update-and-deploy:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6
with:
token: ${{ secrets.ACTIONS_PAT }}
- name: Update image tag in Nomad job
run: |
TAG="${{ github.event.client_payload.tag }}"
sed -i "s|ghcr.io/perrymanuk/minecraftmath:[^ \"]*|ghcr.io/perrymanuk/minecraftmath:${TAG}|" \
nomad_jobs/web-apps/minecraftmath/nomad.job
echo "Updated minecraftmath image tag to ${TAG}"
- name: Commit and push
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
TAG="${{ github.event.client_payload.tag }}"
git add nomad_jobs/web-apps/minecraftmath/nomad.job
git commit -m "chore: bump minecraftmath to ${TAG}"
git push
================================================
FILE: .github/workflows/update-radbot-dev.yaml
================================================
name: Update radbot-dev image tag
on:
repository_dispatch:
types: [update-radbot-dev]
jobs:
update-and-deploy:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6
with:
token: ${{ secrets.ACTIONS_PAT }}
- name: Update image tag in dev Nomad job
run: |
TAG="${{ github.event.client_payload.tag }}"
sed -i "s|ghcr.io/perrymanuk/radbot:[^ \"]*|ghcr.io/perrymanuk/radbot:${TAG}|" \
nomad_jobs/ai-ml/radbot/nomad-dev.job
echo "Updated radbot-dev image tag to ${TAG}"
- name: Commit and push
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
TAG="${{ github.event.client_payload.tag }}"
git add nomad_jobs/ai-ml/radbot/nomad-dev.job
git commit -m "chore: deploy radbot-dev with ${TAG}"
git push
================================================
FILE: .github/workflows/update-radbot.yaml
================================================
name: Update radbot image tag
on:
repository_dispatch:
types: [update-radbot]
jobs:
update-and-deploy:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6
with:
token: ${{ secrets.ACTIONS_PAT }}
- name: Update image tag in Nomad job
run: |
TAG="${{ github.event.client_payload.tag }}"
sed -i "s|ghcr.io/perrymanuk/radbot:[^ \"]*|ghcr.io/perrymanuk/radbot:${TAG}|" \
nomad_jobs/ai-ml/radbot/nomad.job
echo "Updated radbot image tag to ${TAG}"
- name: Commit and push
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
TAG="${{ github.event.client_payload.tag }}"
git add nomad_jobs/ai-ml/radbot/nomad.job
git commit -m "chore: bump radbot to ${TAG}"
git push
================================================
FILE: .gitignore
================================================
.envrc
.env
*-pub
.passwords
.envrc*
vault/secrets.yaml
vault/*.hcl
www/main.jpg
ssl
levant/*
!levant/defaults.yml
hosts
*.swp
.ra-aid
CLAUDE.md
scripts/*
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: Makefile
================================================
# Load .env files
#include .envrc
include ./.bootstrap.mk
# Define base deployments using their service names
base_deployments = coredns docker-registry haproxy
#help: # Placeholder for potential future help generation
# Find the nomad job file for a given service name ($1) within nomad_jobs/ structure
# Usage: $(call find_job_file, service_name)
# Example: $(call find_job_file, coredns) -> nomad_jobs/core-infra/coredns/coredns.job (or .nomad)
find_job_file = $(shell find nomad_jobs/ -mindepth 2 -maxdepth 3 -type f \( -name '$1.job' -o -name '$1.nomad' \) -print -quit)
.PHONY: dc1-%
dc1-%: ## Deploy specific job to dc1 (searches within nomad_jobs/ structure)
@JOB_FILE=$(call find_job_file,$*); \
if [ -z "$$JOB_FILE" ]; then \
echo "Error: Could not find nomad job file for '$*' in nomad_jobs/."; \
exit 1; \
fi; \
echo "Found job file: $$JOB_FILE"; \
nomad job run -var datacenters='["dc1"]' $$JOB_FILE
.PHONY: all-%
all-%: ## Deploy specific job to all DCs (searches within nomad_jobs/ structure)
@JOB_FILE=$(call find_job_file,$*); \
if [ -z "$$JOB_FILE" ]; then \
echo "Error: Could not find nomad job file for '$*' in nomad_jobs/."; \
exit 1; \
fi; \
echo "Found job file: $$JOB_FILE"; \
nomad job run -var datacenters='["dc1", "hetzner"]' $$JOB_FILE
.PHONY: deploy-%
deploy-%: ## Deploy specific job (searches within nomad_jobs/ structure)
@JOB_FILE=$(call find_job_file,$*); \
if [ -z "$$JOB_FILE" ]; then \
echo "Error: Could not find nomad job file for '$*' in nomad_jobs/."; \
exit 1; \
fi; \
echo "Found job file: $$JOB_FILE"; \
nomad job run $$JOB_FILE
.PHONY: deploy-base
deploy-base: ## Deploys base jobs (coredns, docker-registry, haproxy) to dc1
@echo "Deploying base services to dc1: $(base_deployments)"
$(foreach var,$(base_deployments), \
@JOB_FILE=$$(call find_job_file,$(var)); \
if [ -z "$$JOB_FILE" ]; then \
echo "Error: Could not find nomad job file for base deployment '$(var)' in nomad_jobs/."; \
exit 1; \
fi; \
echo "Deploying $(var) from $$JOB_FILE..."; \
nomad job run -var datacenters='["dc1"]' $$JOB_FILE; \
)
.PHONY: sslkeys
sslkeys: ## Generate certs if you have SSL enabled
consul-template -config ssl/consul-template.hcl -once -vault-renew-token=false
.PHONY: ssl-browser-cert
ssl-browser-cert: ## Generate browser cert if you have SSL enabled
sudo openssl pkcs12 -export -out browser_cert.p12 -inkey ssl/hetzner/server-key.pem -in ssl/hetzner/server.pem -certfile ssl/hetzner/nomad-ca.pem
.PHONY: sync-github-secrets
sync-github-secrets: ## Sync NOMAD_VAR variables from .envrc to GitHub secrets using gh CLI
@echo "Syncing NOMAD_VAR variables from .envrc to GitHub secrets..."
@bash -c 'source .envrc && env | grep "^NOMAD_VAR_" | while read -r line; do \
name="$${line%%=*}"; \
value="$${line#*=}"; \
echo "Setting $$name"; \
printf "%s" "$$value" | gh secret set "$$name"; \
done'
@echo "✅ All NOMAD_VAR secrets synced to GitHub"
.PHONY: build-update-metadata
build-update-metadata: ## Build the update-metadata Docker image
@echo "Building update-metadata Docker image..."
# Assumes update-metadata is in docker_images/update-metadata/
docker build --platform linux/amd64 -t update-metadata:latest docker_images/update-metadata/
.PHONY: build-gcp-dns-updater
build-gcp-dns-updater: ## Build the gcp-dns-updater Docker image
@echo "Building gcp-dns-updater Docker image..."
# Assumes gcp-dns-updater is in docker_images/gcp-dns-updater/
docker build --platform linux/amd64 -t docker.$$NOMAD_VAR_tld/gcp-dns-updater:latest docker_images/gcp-dns-updater/
# Example deployment target for gcp-dns-updater (if needed, uncomment and adjust)
#.PHONY: deploy-gcp-dns-updater
#deploy-gcp-dns-updater: ## Deploy gcp-dns-updater job using generic target
# $(MAKE) deploy-gcp-dns-updater
================================================
FILE: README.md
================================================
# Hashi-Homelab
### UPDATE - September 2nd 2025
This repo has gone through some major changes since the last update. I've completely reorganized the job structure into 10 clean categories (77 services total now!), added a comprehensive AI/ML stack with Ollama and Open-WebUI, enhanced the monitoring with Loki and Vector for log aggregation, modernized the alertmanager with better persistence and pushover notifications, added weekly docker cleanup automation, redesigned CoreDNS and Traefik for proper HA deployment, and implemented comprehensive Nomad allocation monitoring. The GitHub Actions deployment has been refined with better change detection and the whole thing just runs much more smoothly now. Also added a bunch of new services like smart home integration, personal cloud apps, and storage backends including pgvector for AI workloads, plus a few other bits and bobs that make the whole setup more robust.
### Background
The hashi-homelab was born of a desire to have a simple to maintain but very flexible homelab setup. While designed to work as a cohesive whole, each individual job can be taken and deployed on any Nomad cluster with minimal adjustments - they're built to be portable and self-contained.
The main goals were to keep the resources required to run the base lab setup small and to have all of the parts be easily exchangeable.
`make deploy-base` will deploy coredns, docker-registry and haproxy - these are needed for everything else to work but aside from these you can pick and choose what to deploy with `make deploy-SERVICE_NAME` to deploy any of the 77 services organized across 10 categories. `make deploy-prometheus` or `make deploy-ollama` for example. You can also target specific datacenters with `make dc1-traefik` or `make all-postgres`.
The whole thing is organized much better now with services grouped into logical categories like ai-ml, media-stack, smart-home, observability, etc. Makes it way easier to find what you're looking for and deploy related services together.
In the future I would like to provide a ready to boot image for a raspberry pi where you can run all of this as the resources needed are really minimal. With just the basics you can get away with one pi4 4gb model with plenty of room to spare.
### Core Components:
* **Scheduler**: Nomad *...with proper allocation monitoring now*
* **Service Catalog/Registry**: Consul
* **Service Mesh**: Traefik *...redesigned for HA deployment, much more robust*
* **VPN**: Tailscale *...can't say enough good things about tailscale, its integral for my homelab now*
* **DNS**: CoreDNS *...now with HA setup and proper failover*
* **Keepalived**: Assign a floating IP for DNS to not lose it if a node goes down
* **Monitoring**: Prometheus, Alertmanager, Telegraf, Blackbox-exporter, and Grafana *...plus Loki and Vector for log aggregation*
* **Container Registry**: Docker-Registry *...because sometimes you don't want to rely on Docker Hub being up*
* **AI/ML**: Ollama for local LLM serving, Open-WebUI for chat interface, LiteLLM for API compatibility
* **Vector Database**: PostgreSQL with pgvector extension for AI/ML vector embeddings storage and similarity search
* **Storage**: NFS and iSCSI CSI plugins for persistent storage across the cluster
### Service Categories (77 total):
* **ai-ml** (8): ollama, open-webui, litellm, cognee, crawl4ai, manyfold, paperless-ai, pgvector-client
* **core-infra** (13): coredns, traefik, haproxy, keepalived, tailscale, github-runner, csi plugins, etc.
* **media-stack** (16): plex, sonarr, radarr, lidarr, sabnzbd, qbittorrent, overseerr, navidrome, etc.
* **personal-cloud** (4): nextcloud, bitwarden, paperless, radicale
* **smart-home** (5): home-assistant, deconz, zigbee2mqtt, mqtt, owntracks-recorder
* **observability** (7): prometheus, grafana, alertmanager, loki, vector, telegraf, blackbox-exporter
* **storage-backends** (9): postgres, pgvector, redis, mariadb, neo4j, qdrant, docker-registry, etc.
* **web-apps** (5): heimdall, wordpress, firecrawl, alertmanager-dashboard, www
* **misc** (7): gitea, uploader, murmur, octoprint, adb, linuxgsm, gcp-dns-updater
* **system** (3): docker-cleanup, volumes
### Setup
You need to have Nomad and Consul already running, a simple setup with the -dev flag will suffice for testing but you'll want a proper cluster for real usage. If don't already have a Nomad and Consul cluster, there are some excellent guides here...
https://www.nomadproject.io/guides/install/production/deployment-guide.html
https://learn.hashicorp.com/consul/datacenter-deploy/deployment-guide
There are also some files in the `config` folder to help you get started and also one with some services to announce so the Consul and Nomad UI are available over the service mesh.
This repo relies on a `.envrc` file and direnv installed or setting the environment variables manually.
There is an `envrc` example file located in the repo that you can fill in and move to `.envrc`
The secret values from the `.envrc` also need to be put into your github secrets if you plan on deploying via the automated workflow. You can use `make sync-github-secrets` to sync them all at once which is pretty handy.
Once this is done, you simply run a `make deploy-base` and point your DNS to resolve via one of the Nomad nodes' IP address.
One of the more specific parts of the setup that you may need to adjust is I use several NFS mounts to provide persistent storage mounted on each client at `/home/shared` for configs and `/home/media` for images, video, audio, etc. Depending on which parts of this you are planning to deploy you will just need to adjust this persistent storage to meet the setup of your clients. The CSI plugins help make this more flexible now.
Services are exposed by their task name in the nomad job and whatever you configure your TLD to be in the `.envrc`. The whole thing works really well with the automated GitHub Actions deployment now - just push changes and they get deployed automatically to your cluster. This requires tailscale for the GitHub Actions to connect to your cluster.
================================================
FILE: ansible/configs/consul.hcl.j2
================================================
#jinja2: trim_blocks:False
server = {% if "lan-client-server" in group_names %}true{% else %}false{% endif %}
ui = {% if "lan-client-server" in group_names %}true{% else %}false{% endif %}
{% if "wan-clients" in group_names %}
{% raw %}
client_addr = "{{GetInterfaceIP \"tailscale0\"}}"
advertise_addr = "{{GetInterfaceIP \"tailscale0\"}}"
bind_addr = "{{GetInterfaceIP \"tailscale0\"}}"
{% endraw %}
{% else %}
{% raw %}
client_addr = "0.0.0.0"
advertise_addr = "{{ GetPrivateInterfaces | include \"network\" \"192.168.50.0/24\" | attr \"address\" }}"
bind_addr = "0.0.0.0"
{% endraw %}
{% endif %}
{% raw %}
advertise_addr_wan = "{{ GetPrivateInterfaces | include \"network\" \"192.168.50.0/24\" | attr \"address\" }}"
{% endraw %}
translate_wan_addrs = true
data_dir = "/var/lib/consul"
datacenter = "homelab"
enable_syslog = true
leave_on_terminate = true
log_level = "WARN"
retry_join = ["192.168.50.39", "192.168.50.113", "192.168.50.85"]
{% if "lan-client-server" in group_names %}bootstrap_expect = 3{% else %}{% endif %}
telemetry {
prometheus_retention_time = "60s"
}
================================================
FILE: ansible/configs/consul.service
================================================
[Unit]
Description=consul agent
Requires=network-online.target tailscaled.service
After=network-online.target tailscaled.service
[Service]
ExecStartPre=/bin/sleep 30
EnvironmentFile=-/etc/default/consul
Restart=always
ExecStart=/usr/bin/consul agent -domain consul -ui -config-dir=/etc/consul.d
ExecReload=/bin/kill -HUP $MAINPID
KillSignal=SIGINT
[Install]
WantedBy=multi-user.target
================================================
FILE: ansible/configs/docker-daemon.json.j2
================================================
{
"dns": ["192.168.50.2", "192.168.50.1", "8.8.8.8"]{% if 'cheese' in group_names %},
"runtimes": {
"nvidia": {
"args": [],
"path": "nvidia-container-runtime"
}
}
{% endif %}
}
================================================
FILE: ansible/configs/nomad.hcl.j2
================================================
#jinja2: trim_blocks:False
data_dir = "/var/lib/nomad/"
datacenter = {% if "cheese" in group_names %}"cheese"{% elif "minecraft" in group_names %}"minecraft"{% else %}"dc1"{% endif %}
log_level = "warn"
bind_addr = "0.0.0.0"
region = "home"
server {
enabled = {% if "lan-client-server" in group_names %}true{% else %}false{% endif %}
bootstrap_expect = 3
server_join {
retry_join = ["192.168.50.39", "192.168.50.113", "192.168.50.85"]
retry_max = 3
retry_interval = "15s"
}
authoritative_region = "home"
heartbeat_grace = "300s"
min_heartbeat_ttl = "20s"
}
client {
enabled = true
{% raw %}
network_interface = "{{ GetPrivateInterfaces | include \"network\" \"192.168.50.0/24\" | attr \"name\" }}"
{% endraw %}
options {
docker.auth.config = "/root/.docker/config.json"
docker.privileged.enabled = true
driver.raw_exec.enable = "1"
docker.volumes.enabled = true
}
meta {
shared_mount = {% if "wan-clients" in group_names %}"false"{% else %}"true"{% endif %}
dns = {% if "wan-clients" in group_names %}"false"{% else %}"true"{% endif %}
{%- if ansible_hostname == "klo01" %}
keepalived_priority = "100"
keepalived_priority_dns1 = "100"
keepalived_priority_dns2 = "{{ 200 | random(start=101) }}"
{%- else %}
keepalived_priority = "{{ 200 | random(start=101) }}"
keepalived_priority_dns1 = "{{ 200 | random(start=101) }}"
keepalived_priority_dns2 = "{{ 200 | random(start=101) }}"
{%- endif %}
}
host_network "lan" {
cidr = "192.168.50.0/24"
reserved_ports = "22"
}
host_network "tailscale" {
cidr = "100.0.0.0/8"
reserved_ports = "22"
}
{% if "wan-clients" in group_names %}
host_network "public" {
cidr = "78.47.90.68/32"
reserved_ports = "22"
}
{%- endif %}
{%- if ansible_hostname == "klo01" %}
reserved {
memory = 3072
}
{%- endif %}
}
telemetry {
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
use_node_name = false
}
{% raw %}
advertise {
http = "{{ GetPrivateInterfaces | include \"network\" \"192.168.50.0/24\" | attr \"address\" }}:4646"
rpc = "{{ GetPrivateInterfaces | include \"network\" \"192.168.50.0/24\" | attr \"address\" }}:4647"
serf = "{{ GetPrivateInterfaces | include \"network\" \"192.168.50.0/24\" | attr \"address\" }}:4648"
}
{% endraw %}
consul {
# The address to the Consul agent.
{%- raw %}
address = "127.0.0.1:8500"
{%- endraw %}
# The service name to register the server and client with Consul.
client_service_name = "nomad-client"
# Enables automatically registering the services.
auto_advertise = true
# Enabling the server and client to bootstrap using Consul.
server_auto_join = true
client_auto_join = true
}
#vault {
# enabled = true
# address = "http://vault.service.home:8200"
# allow_unauthenticated = true
# create_from_role = "nomad-cluster"
#}
plugin "docker" {
config {
allow_caps = ["CHOWN","DAC_OVERRIDE","FSETID","FOWNER","MKNOD","NET_RAW","SETGID","SETUID","SETFCAP","SETPCAP","NET_BIND_SERVICE","SYS_CHROOT","KILL","AUDIT_WRITE","NET_ADMIN","NET_BROADCAST","SYS_NICE"]
# extra Docker labels to be set by Nomad on each Docker container with the appropriate value
extra_labels = ["job_name", "task_group_name", "task_name", "namespace", "node_name"]
allow_privileged = true
volumes {
enabled = true
selinuxlabel = "z"
}
}
}
================================================
FILE: ansible/configs/nomad.service
================================================
[Unit]
Description=nomad.agent
Requires=network-online.target tailscaled.service
After=network-online.target tailscaled.service remote-fs.target
# Hard requirement: Nomad must not start until NFS mounts are ready
RequiresMountsFor=/home/shared /home/media/TV /home/media/Music /home/media/Movies /home/media/Books
[Service]
EnvironmentFile=-/etc/default/nomad
Restart=on-failure
RestartSec=10
ExecStart=/usr/bin/nomad agent $OPTIONS -config=/etc/nomad.d/nomad.hcl
ExecReload=/bin/kill -HUP $MAINPID
KillSignal=SIGINT
KillMode=process
[Install]
WantedBy=multi-user.target
================================================
FILE: ansible/playbook.yml
================================================
---
- name: network mounts
hosts:
- lan-client-server
- lan-client
- cheese
- minecraft
become: true
remote_user: root
tasks:
- name: Configure static IP via netplan
copy:
dest: /etc/netplan/00-installer-config.yaml
content: |
network:
version: 2
ethernets:
ens3:
addresses:
- {{ inventory_hostname }}/24
routes:
- to: default
via: 192.168.50.1
nameservers:
addresses:
- 192.168.50.1
notify: Apply netplan
- name: Ensure directories exist
file:
path: "{{ item }}"
state: directory
mode: '0755'
with_items:
- /home/shared
- /home/media/TV
- /home/media/Music
- /home/media/Movies
- /home/media/Books
- name: makesure multipath.conf exists
copy:
content: ""
dest: /etc/multipath.conf
force: no
backup: yes
ignore_errors: yes
- name: Manage /etc/multipath.conf
blockinfile:
path: /etc/multipath.conf
block: |
defaults {
user_friendly_names yes
find_multipaths yes
}
- name: Install Apt packages
apt:
name:
- nfs-common
- avahi-daemon
- docker.io
- open-iscsi
- lsscsi
- sg3-utils
- multipath-tools
- scsitools
- name: Ensure /etc/docker directory exists
file:
path: /etc/docker
state: directory
mode: '0755'
- name: Add NVIDIA Container Toolkit GPG key
apt_key:
url: https://nvidia.github.io/libnvidia-container/gpgkey
state: present
keyring: /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
when: "'cheese' in group_names"
- name: Add NVIDIA Container Toolkit repository
apt_repository:
repo: "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://nvidia.github.io/libnvidia-container/stable/deb/$(ARCH) /"
state: present
filename: nvidia-container-toolkit
when: "'cheese' in group_names"
- name: Install NVIDIA Container Toolkit
apt:
name: nvidia-container-toolkit
state: present
update_cache: yes
when: "'cheese' in group_names"
- name: Configure Docker daemon with fallback DNS and nvidia runtime
template:
src: configs/docker-daemon.json.j2
dest: /etc/docker/daemon.json
notify: Restart Docker
- name: Remove old NFS fstab entries
lineinfile:
path: /etc/fstab
regexp: '^192\.168\.50\.208:/mnt/.*'
state: absent
- name: Add NFS fstab entries with proper options
blockinfile:
path: /etc/fstab
marker: "# {mark} ANSIBLE MANAGED NFS MOUNTS"
block: |
192.168.50.208:/mnt/pool0/share /home/shared nfs4 _netdev,hard,timeo=600,retrans=5,x-systemd.mount-timeout=90,x-systemd.requires=network-online.target,x-systemd.after=network-online.target 0 0
192.168.50.208:/mnt/pool1/media/TV /home/media/TV nfs4 _netdev,hard,timeo=600,retrans=5,x-systemd.mount-timeout=90,x-systemd.requires=network-online.target,x-systemd.after=network-online.target 0 0
192.168.50.208:/mnt/pool0/media/music /home/media/Music nfs4 _netdev,hard,timeo=600,retrans=5,x-systemd.mount-timeout=90,x-systemd.requires=network-online.target,x-systemd.after=network-online.target 0 0
192.168.50.208:/mnt/pool1/media/Movies /home/media/Movies nfs4 _netdev,hard,timeo=600,retrans=5,x-systemd.mount-timeout=90,x-systemd.requires=network-online.target,x-systemd.after=network-online.target 0 0
192.168.50.208:/mnt/pool0/media/audiobooks /home/media/Books nfs4 _netdev,hard,timeo=600,retrans=5,x-systemd.mount-timeout=90,x-systemd.requires=network-online.target,x-systemd.after=network-online.target 0 0
notify:
- Reload systemd fstab
- Mount Filesystems
- name: Enable services
systemd:
name: "{{ item }}"
enabled: yes
state: started
with_items:
- open-iscsi
- multipath-tools
handlers:
- name: Apply netplan
command: netplan apply
- name: Reload systemd fstab
systemd:
daemon_reload: yes
- name: Mount Filesystems
command: mount -a
- name: Restart Docker
service:
name: docker
state: restarted
- name: Update configuration, execute command, and install packages
hosts:
- lan-client-server
- lan-client
- wan-clients
- cheese
- minecraft
remote_user: root
#roles:
# - role: artis3n.tailscale
# vars:
# # Example pulling the API key from the env vars on the host running Ansible
# tailscale_authkey: "{{ lookup('env', 'NOMAD_VAR_tailscale_auth') }}"
# tailscale_args: "{% if 'wan-clients' in group_names %}--accept-routes=true{% else %}--accept-routes=false{% endif %}"
tasks:
- name: Ensure directories exist
file:
path: "{{ item }}"
state: directory
mode: '0755'
with_items:
- /var/lib/nomad
- /var/lib/consul
- /etc/nomad.d
- /etc/consul.d
- name: Manage systemd service file nomad
copy:
src: configs/nomad.service
dest: /lib/systemd/system/nomad.service
notify: Reload systemd
- name: Manage systemd service file consul
copy:
src: configs/consul.service
dest: /lib/systemd/system/consul.service
notify: Reload systemd
- name: manage nomad config
template:
src: configs/nomad.hcl.j2
dest: /etc/nomad.d/nomad.hcl
notify: Restart Service
- name: manage consul config
template:
src: configs/consul.hcl.j2
dest: /etc/consul.d/server.hcl
- name: Add HashiCorp APT repository key
apt_key:
url: https://apt.releases.hashicorp.com/gpg
state: present
validate_certs: no
keyring: /usr/share/keyrings/hashicorp-archive-keyring.gpg
- name: Configure HashiCorp APT repository
apt_repository:
repo: "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com {{ ansible_distribution_release }} main"
- name: Install Apt packages
apt:
name:
- nomad=1.10.4-1
- consul=1.19.1-1
dpkg_options: 'force-confdef,force-confold'
update_cache: true
state: latest
allow_downgrade: true
- name: Modify sysctl entry for net.ipv4.ip_nonlocal_bind
sysctl:
name: "{{ item.name }}"
value: "{{ item.value }}"
state: present
with_items:
- { name: "net.ipv4.ip_nonlocal_bind", value: "1" }
- { name: "net.ipv4.conf.all.forwarding", value: "1" }
notify: Apply Sysctl Changes
- name: Enable services
systemd:
name: "{{ item }}"
enabled: yes
state: started
with_items:
- nomad
- consul
- tailscaled
handlers:
- name: Restart Service
service:
name: nomad
state: restarted
- name: Reload systemd
systemd:
daemon_reload: yes
- name: Mount Filesystems
command: mount -a
- name: Apply Sysctl Changes
command: sysctl -p /etc/sysctl.conf
- name: Install and configure Tailscale
hosts:
- all
become: yes
remote_user: root
gather_facts: yes
tags: tailscale
vars:
# Read authkey from environment variable; default to 'MISSING' if not set
tailscale_auth_key: "{{ lookup('env', 'NOMAD_VAR_tailscale_auth') | default('MISSING') }}"
# Optionally customize your Tailscale hostname
tailscale_hostname: "{{ inventory_hostname }}"
# Tag to advertise (must match OAuth client tag)
tailscale_tags: "tag:nomad"
tasks:
- name: Download Tailscale GPG key via curl
shell: >
curl -fsSL https://pkgs.tailscale.com/stable/ubuntu/noble.noarmor.gpg
| tee /usr/share/keyrings/tailscale-archive-keyring.gpg
>/dev/null
changed_when: true
- name: Update apt cache
apt:
update_cache: yes
- name: Configure Tailscale apt repository
copy:
dest: /etc/apt/sources.list.d/tailscale.list
content: |
deb [signed-by=/usr/share/keyrings/tailscale-archive-keyring.gpg arch=amd64] https://pkgs.tailscale.com/stable/ubuntu/ noble main
- name: Update apt cache (after adding Tailscale repo)
apt:
update_cache: yes
- name: Install Tailscale
apt:
name: tailscale
state: latest
- name: Enable and start tailscaled service
service:
name: tailscaled
state: started
enabled: yes
- name: Bring Tailscale interface up using authkey
# "command" used because there's no official Ansible module for "tailscale up".
# This is not strictly idempotent; see notes below for advanced usage.
command: >
tailscale up
--authkey={{ tailscale_auth_key }}
--hostname={{ tailscale_hostname }}
--advertise-tags={{ tailscale_tags }}
--accept-dns=false
--reset
register: tailscale_up
changed_when: "'Success' in tailscale_up.stdout or 'Success' in tailscale_up.stderr or tailscale_up.rc == 0"
- name: Show tailscale status
command: tailscale status
register: tailscale_status
changed_when: false
- debug:
var: tailscale_status.stdout
- name: Install Zsh and Oh My Zsh with Agnoster theme
hosts: all
become: yes
remote_user: root
gather_facts: yes
vars:
my_zsh_user: "root" # Change this to the desired user
tasks:
- name: Install zsh
apt:
name: zsh
state: present
update_cache: yes
- name: Ensure home directory path is known
user:
name: "{{ my_zsh_user }}"
register: user_info # This captures the user details, including home directory.
- name: Check if Oh My Zsh is already installed
stat:
path: "/root/.oh-my-zsh"
register: oh_my_zsh_stat
- name: Check if zshrc exists
stat:
path: "/root/.zshrc"
register: zshrc_stat
- name: Clone Oh My Zsh
git:
repo: "https://github.com/ohmyzsh/ohmyzsh.git"
dest: "/root/.oh-my-zsh"
become_user: "{{ my_zsh_user }}"
when: not oh_my_zsh_stat.stat.exists
- name: Copy the default .zshrc template if not present
copy:
src: "/root/.oh-my-zsh/templates/zshrc.zsh-template"
dest: "/root/.zshrc"
remote_src: yes
become_user: "{{ my_zsh_user }}"
when: not zshrc_stat.stat.exists
- name: Set Oh My Zsh theme to agnoster
# Uses a regex replace to ensure 'ZSH_THEME="agnoster"'
replace:
path: "/root/.zshrc"
regexp: '^ZSH_THEME="[^"]+"'
replace: 'ZSH_THEME="agnoster"'
become_user: "{{ my_zsh_user }}"
- name: Change default shell to zsh for the user
user:
name: "{{ my_zsh_user }}"
shell: /usr/bin/zsh
================================================
FILE: ansible/zsh.yml
================================================
---
- name: Install Zsh and Oh My Zsh with Agnoster theme
hosts: cheese
become: yes
remote_user: root
gather_facts: yes
vars:
my_zsh_user: "root" # Change this to the desired user
tasks:
- name: Install zsh
apt:
name: zsh
state: present
update_cache: yes
- name: Ensure home directory path is known
user:
name: "{{ my_zsh_user }}"
register: user_info # This captures the user details, including home directory.
- name: Check if Oh My Zsh is already installed
stat:
path: "/root/.oh-my-zsh"
register: oh_my_zsh_stat
- name: Check if zshrc exists
stat:
path: "/root/.zshrc"
register: zshrc_stat
- name: Clone Oh My Zsh
git:
repo: "https://github.com/ohmyzsh/ohmyzsh.git"
dest: "/root/.oh-my-zsh"
become_user: "{{ my_zsh_user }}"
when: not oh_my_zsh_stat.stat.exists
- name: Copy the default .zshrc template if not present
copy:
src: "/root/.oh-my-zsh/templates/zshrc.zsh-template"
dest: "/root/.zshrc"
remote_src: yes
become_user: "{{ my_zsh_user }}"
when: not zshrc_stat.stat.exists
- name: Set Oh My Zsh theme to agnoster
# Uses a regex replace to ensure 'ZSH_THEME="agnoster"'
replace:
path: "/root/.zshrc"
regexp: '^ZSH_THEME="[^"]+"'
replace: 'ZSH_THEME="agnoster"'
become_user: "{{ my_zsh_user }}"
- name: Change default shell to zsh for the user
user:
name: "{{ my_zsh_user }}"
shell: /usr/bin/zsh
================================================
FILE: docker_images/gcp-dns-updater/Dockerfile
================================================
FROM python:3.14-slim
# Set the working directory in the container
WORKDIR /app
# Copy the requirements file into the container at /app
COPY requirements.txt .
# Install any needed packages specified in requirements.txt
# Using --no-cache-dir to reduce image size
RUN pip install --no-cache-dir -r requirements.txt
# Copy the current directory contents into the container at /app
COPY update_dns.py .
# Define the command to run the application
CMD ["python", "update_dns.py"]
================================================
FILE: docker_images/gcp-dns-updater/README.md
================================================
# GCP Dynamic DNS Updater Service
This service periodically checks the public IPv4 address of the node it's running on and updates a specified A record in a Google Cloud DNS managed zone. It's designed to run as a Nomad job within the Hashi-Homelab environment, utilizing a **pre-built Docker image**.
## Features
* Fetches the current public IPv4 address from `https://v4.ifconfig.co/ip`.
* Uses the `google-cloud-dns` Python SDK to interact with Google Cloud DNS.
* Authenticates using a GCP Service Account key provided via an environment variable.
* Checks the specified DNS record:
* If it's a CNAME, it deletes the CNAME record.
* If it's an A record, it updates the IP address if it has changed.
* If it doesn't exist (or after deleting a CNAME), it creates the A record with the specified TTL.
* Runs periodically via a Nomad job, executing the Python script within the pre-built Docker container.
## Prerequisites
1. **Docker:** Docker must be installed locally to build the service image.
2. **GCP Service Account:** You need a Google Cloud Platform service account with the necessary permissions to manage DNS records.
* Go to the GCP Console -> IAM & Admin -> Service Accounts.
* Create a new service account (e.g., `gcp-dns-updater-sa`).
* Grant this service account the `DNS Administrator` role (`roles/dns.admin`) on the project containing your managed zone.
* Create a JSON key file for this service account and download it securely. You will need the *contents* of this file, not the file itself.
3. **Nomad Environment:** A running Nomad cluster where this job can be scheduled. The Nomad clients must have Docker installed and configured.
## Configuration
The service is configured via environment variables passed to the Nomad task, which are then consumed by the `update_dns.py` script running inside the Docker container:
* `GCP_DNS_ZONE_NAME`: The name of the managed zone in GCP DNS (e.g., `demonsafe-com`). The script derives the Project ID from the credentials.
* `GCP_DNS_RECORD_NAME`: The DNS record name to update (e.g., `*.demonsafe.com`). **Note:** The script expects the base name; the trailing dot is handled internally if needed by the SDK.
* `RECORD_TTL`: (Optional) The Time-To-Live (in seconds) for the created/updated A record. Defaults to 300 if not set.
* `GCP_PROJECT_ID`: The Google Cloud Project ID containing the DNS zone.
* `GCP_SERVICE_ACCOUNT_KEY_B64`: **Required.** The base64-encoded *content* of the GCP service account JSON key file.
**Generating the Base64 Key:**
You need to encode the *content* of your downloaded JSON key file into a single-line base64 string.
On Linux/macOS, you can use:
```bash
base64 -w 0 < /path/to/your/gcp_key.json
```
*(Ensure you use `-w 0` or an equivalent flag for your `base64` command to prevent line wrapping)*
Copy the resulting string.
**Setting Environment Variables in Nomad:**
These variables are defined within the `env` block of the `nomad.job` file using Go templating to read runtime environment variables provided by the Nomad agent (which in turn are often sourced from the deployment mechanism, like GitHub Actions):
```hcl
# Example within nomad.job task config
env {
GCP_DNS_ZONE_NAME = < {existing_a_record.rrdatas}")
elif record_set.record_type == 'CNAME' and record_set.name == fqdn:
existing_cname_record = record_set
logging.info(f"Found existing CNAME record: {existing_cname_record.name} -> {existing_cname_record.rrdatas}")
changes = zone.changes()
needs_update = False
# Handle existing CNAME (delete it to replace with A)
if existing_cname_record:
logging.warning(f"Deleting existing CNAME record {fqdn} to replace with A record.")
changes.delete_record_set(existing_cname_record)
needs_update = True
# Ensure we don't try to delete an A record if we just deleted a CNAME
existing_a_record = None
# Define the new A record we want
new_a_record = zone.resource_record_set(fqdn, "A", 300, [ip_address])
# Handle existing A record
if existing_a_record:
if existing_a_record.rrdatas == [ip_address]:
logging.info(f"Existing A record {fqdn} already points to {ip_address}. No update needed.")
return # Nothing to do
else:
logging.info(f"Existing A record {fqdn} points to {existing_a_record.rrdatas}. Updating to {ip_address}.")
changes.delete_record_set(existing_a_record)
changes.add_record_set(new_a_record)
needs_update = True
# Handle case where no A record (and no CNAME was found/deleted)
elif not existing_cname_record: # Only add if we didn't already decide to replace CNAME
logging.info(f"No existing A or CNAME record found for {fqdn}. Creating new A record pointing to {ip_address}.")
changes.add_record_set(new_a_record)
needs_update = True
# Handle case where CNAME was found and deleted - we still need to add the A record
elif existing_cname_record:
logging.info(f"Adding A record for {fqdn} pointing to {ip_address} after CNAME deletion.")
changes.add_record_set(new_a_record)
# needs_update should already be True
# Execute the changes if any were queued
if needs_update:
logging.info(f"Executing DNS changes for {fqdn} in zone {gcp_zone_name}...")
changes.create()
# Wait until the changes are finished.
while changes.status != 'done':
logging.info(f"Waiting for DNS changes to complete (status: {changes.status})...")
time.sleep(5) # Wait 5 seconds before checking again
changes.reload()
logging.info(f"Successfully updated DNS record {fqdn} to {ip_address} in zone {gcp_zone_name}.")
else:
# This case should only be hit if an A record existed and was correct
logging.info("No DNS changes were necessary.")
except GoogleAPIError as e:
logging.error(f"GCP API Error updating DNS record {fqdn} in zone {gcp_zone_name}: {e}")
except Exception as e:
logging.error(f"An unexpected error occurred during DNS update for {fqdn} in zone {gcp_zone_name}: {e}")
def update_spf_record(client: dns.Client, project_id: str, zone_name: str, record_name: str, ip_address: str):
"""Updates the SPF TXT record on the bare domain with the current public IP."""
try:
gcp_zone_name = zone_name.replace('.', '-')
logging.info(f"Updating SPF record in zone: {gcp_zone_name}")
zone = client.zone(gcp_zone_name, project_id)
if not zone.exists():
logging.error(f"DNS zone '{gcp_zone_name}' not found in project '{project_id}'.")
return
# Derive bare domain from record_name (e.g., "*.demonsafe.com" -> "demonsafe.com.")
domain = record_name.lstrip('*.') if record_name.startswith('*.') else record_name
fqdn = domain if domain.endswith('.') else f"{domain}."
logging.info(f"Checking TXT records for: {fqdn}")
spf_value = f'"v=spf1 ip4:{ip_address} ~all"'
record_sets = list(zone.list_resource_record_sets(filter_=f"name={fqdn}"))
existing_txt = None
for rs in record_sets:
if rs.record_type == 'TXT' and rs.name == fqdn:
existing_txt = rs
logging.info(f"Found existing TXT record: {rs.name} -> {rs.rrdatas}")
break
changes = zone.changes()
needs_update = False
if existing_txt:
new_rrdatas = []
spf_found = False
for rd in existing_txt.rrdatas:
if 'v=spf1' in rd:
spf_found = True
if ip_address in rd:
logging.info(f"SPF record already contains {ip_address}. No update needed.")
return
logging.info(f"Replacing SPF entry: {rd} -> {spf_value}")
new_rrdatas.append(spf_value)
else:
new_rrdatas.append(rd)
if not spf_found:
logging.info(f"No existing SPF entry found. Adding: {spf_value}")
new_rrdatas.append(spf_value)
changes.delete_record_set(existing_txt)
new_txt = zone.resource_record_set(fqdn, "TXT", 300, new_rrdatas)
changes.add_record_set(new_txt)
needs_update = True
else:
logging.info(f"No TXT record found for {fqdn}. Creating with SPF: {spf_value}")
new_txt = zone.resource_record_set(fqdn, "TXT", 300, [spf_value])
changes.add_record_set(new_txt)
needs_update = True
if needs_update:
logging.info(f"Executing SPF TXT changes for {fqdn}...")
changes.create()
while changes.status != 'done':
logging.info(f"Waiting for SPF changes to complete (status: {changes.status})...")
time.sleep(5)
changes.reload()
logging.info(f"Successfully updated SPF record for {fqdn} with ip4:{ip_address}")
except GoogleAPIError as e:
logging.error(f"GCP API Error updating SPF record: {e}")
except Exception as e:
logging.error(f"Unexpected error updating SPF record: {e}")
if __name__ == "__main__":
logging.info("Starting DNS update script.")
project_id, zone_name, record_name, key_b64 = get_env_vars()
public_ip = get_public_ip()
# DNS Pre-check logic
if public_ip:
hostname_to_check = 'asdf.demonsafe.com'
logging.info(f"Performing pre-check for hostname: {hostname_to_check}")
try:
resolved_ip = socket.gethostbyname(hostname_to_check)
logging.info(f"Resolved IP for {hostname_to_check}: {resolved_ip}")
if resolved_ip == public_ip:
logging.info(f'DNS record for {hostname_to_check} ({resolved_ip}) already matches public IP ({public_ip}). No update needed.')
sys.exit(0)
else:
logging.info(f'Resolved IP for {hostname_to_check} ({resolved_ip}) does not match public IP ({public_ip}). Proceeding with potential update.')
except socket.gaierror as e:
logging.warning(f'Could not resolve IP for {hostname_to_check}: {e}. Proceeding with potential update.')
except Exception as e:
logging.warning(f'An unexpected error occurred during DNS pre-check for {hostname_to_check}: {e}. Proceeding with potential update.')
if public_ip:
dns_client = get_dns_client(key_b64, project_id)
if dns_client:
update_dns_record(dns_client, project_id, zone_name, record_name, public_ip)
update_spf_record(dns_client, project_id, zone_name, record_name, public_ip)
logging.info("DNS update script finished.")
else:
logging.error("Exiting due to DNS client initialization failure.")
sys.exit(1)
else:
logging.error("Exiting due to inability to fetch public IP.")
sys.exit(1)
================================================
FILE: docker_images/update-metadata/Dockerfile
================================================
FROM python:3.14-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY sync_secrets.py .
ENTRYPOINT ["python", "sync_secrets.py"]
================================================
FILE: docker_images/update-metadata/README.md
================================================
# GitHub Secret Synchronization Script (Containerized)
## Purpose
This script (`sync_secrets.py`), running inside a Docker container, reads environment variables defined in the project's root `.envrc` file and synchronizes them as GitHub secrets to the `perrymanuk/hashi-homelab` repository using the `PyGithub` library.
## Requirements
* **Docker:** Docker must be installed and running to build and execute the container.
* **`NOMAD_VAR_github_pat` Environment Variable:** A GitHub Personal Access Token (PAT) with the `repo` scope must be available as an environment variable named `NOMAD_VAR_github_pat` in the **host shell** where you run the `make` command. The Makefile target (`sync-secrets`) will handle passing this token into the container under the name `GITHUB_TOKEN` for the script to use.
* **`.envrc` File:** An `.envrc` file must exist at the project root (`/Users/perry.manuk/git/perrymanuk/hashi-homelab/.envrc`) containing the secrets to sync.
## Usage
1. **Ensure `NOMAD_VAR_github_pat` is set:** Export your GitHub PAT in your current host shell session:
```bash
export NOMAD_VAR_github_pat="your_github_pat_here"
```
2. **Navigate to the project root directory:**
```bash
cd /Users/perry.manuk/git/perrymanuk/hashi-homelab
```
3. **Run the Makefile target:**
```bash
make sync-secrets
```
This command will:
* Build the Docker image defined in `scripts/Dockerfile`.
* Run a container from the image.
* Mount the host's `.envrc` file into the container.
* Pass the **host's** `NOMAD_VAR_github_pat` environment variable into the container as `GITHUB_TOKEN`.
* Execute the `sync_secrets.py` script within the container.
The script will output the status of each secret synchronization attempt (created, updated, or failed).
**Important:** Running the script will overwrite any existing secrets in the GitHub repository that have the same name as variables found in the `.envrc` file.
## `.envrc` Format
The script expects the `.envrc` file to follow this format:
```bash
export VARIABLE_NAME=value
export ANOTHER_VARIABLE='value with spaces'
export YET_ANOTHER="double quoted value"
# This is a comment and will be ignored
# Empty lines are also ignored
export SECRET_KEY=a_very_secret_value_here
```
* Lines must start with `export`.
* Variable names and values are separated by `=`.
* Values can be unquoted, single-quoted (`'...'`), or double-quoted (`"..."`). Quotes are stripped before syncing.
* Lines starting with `#` (comments) and empty lines are ignored.
================================================
FILE: docker_images/update-metadata/requirements.txt
================================================
PyGithub
hcl2
================================================
FILE: docker_images/update-metadata/update_job_metadata.py
================================================
import argparse
import logging
import pathlib
import re
import sys
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def find_job_block(content):
"""Find the start and end indices of the main 'job' block."""
job_match = re.search(r'^job\s+"[^"]+"\s*\{', content, re.MULTILINE)
if not job_match:
logging.warning("Could not find job block start.")
return None, None
start_index = job_match.start()
# Find the matching closing brace
brace_level = 0
end_index = -1
in_string = False
escaped = False
for i, char in enumerate(content[start_index:]):
if escaped:
escaped = False
continue
if char == '\\':
escaped = True
continue
if char == '"':
in_string = not in_string
continue
if not in_string:
if char == '{':
brace_level += 1
elif char == '}':
brace_level -= 1
if brace_level == 0:
end_index = start_index + i
break
if end_index == -1:
logging.warning("Could not find matching closing brace for job block.")
return None, None
return start_index, end_index + 1
def find_meta_block(content):
"""Find the start and end indices of the 'meta' block within the given content."""
meta_match = re.search(r'^\s*meta\s*\{', content, re.MULTILINE)
if not meta_match:
return None, None
start_index = meta_match.start()
# Find the matching closing brace
brace_level = 0
end_index = -1
in_string = False
escaped = False
for i, char in enumerate(content[start_index:]):
if escaped:
escaped = False
continue
if char == '\\':
escaped = True
continue
if char == '"':
in_string = not in_string
continue
if not in_string:
if char == '{':
brace_level += 1
elif char == '}':
brace_level -= 1
if brace_level == 0:
end_index = start_index + i
break
if end_index == -1:
logging.warning("Could not find matching closing brace for meta block.")
return None, None
return start_index, end_index + 1
def update_job_metadata(repo_root):
"""Finds Nomad job files and updates their meta block with job_file path."""
repo_path = pathlib.Path(repo_root).resolve()
nomad_jobs_path = repo_path / 'nomad_jobs'
if not nomad_jobs_path.is_dir():
logging.error(f"'nomad_jobs' directory not found in {repo_path}")
sys.exit(1)
logging.info(f"Scanning for job files in {nomad_jobs_path}...")
job_files = list(nomad_jobs_path.rglob('*.nomad')) + list(nomad_jobs_path.rglob('*.job'))
if not job_files:
logging.warning("No *.nomad or *.job files found.")
return
modified_count = 0
for job_file in job_files:
try:
relative_path = job_file.relative_to(repo_path).as_posix()
logging.debug(f"Processing file: {relative_path}")
content = job_file.read_text()
original_content = content # Keep a copy for comparison
job_start, job_end = find_job_block(content)
if job_start is None or job_end is None:
logging.warning(f"Skipping {relative_path}: Could not find main job block.")
continue
job_block_content = content[job_start:job_end]
job_opening_line_match = re.match(r'^job\s+"[^"]+"\s*\{\s*\n?', job_block_content, re.MULTILINE)
if not job_opening_line_match:
logging.warning(f"Skipping {relative_path}: Could not match job opening line format.")
continue
job_insert_pos = job_start + job_opening_line_match.end()
meta_start_rel, meta_end_rel = find_meta_block(job_block_content)
new_job_file_line = f' job_file = "{relative_path}"'
modified = False
if meta_start_rel is not None and meta_end_rel is not None:
meta_start_abs = job_start + meta_start_rel
meta_end_abs = job_start + meta_end_rel
meta_block_content = content[meta_start_abs:meta_end_abs]
meta_opening_line_match = re.match(r'^\s*meta\s*\{\s*\n?', meta_block_content, re.MULTILINE)
if not meta_opening_line_match:
logging.warning(f"Skipping {relative_path}: Could not match meta opening line format.")
continue
meta_insert_pos = meta_start_abs + meta_opening_line_match.end()
job_file_line_match = re.search(r'^(\s*)job_file\s*=\s*".*?"$\n?', meta_block_content, re.MULTILINE)
if job_file_line_match:
existing_line = job_file_line_match.group(0)
indent = job_file_line_match.group(1)
new_line_with_indent = f'{indent}job_file = "{relative_path}"\n' # Ensure newline
if existing_line.strip() != new_line_with_indent.strip():
# Replace existing line
start = meta_start_abs + job_file_line_match.start()
end = meta_start_abs + job_file_line_match.end()
# Ensure we capture the trailing newline if present in match
content = content[:start] + new_line_with_indent + content[end:]
modified = True
else:
# Insert new job_file line inside meta block
content = content[:meta_insert_pos] + new_job_file_line + '\n' + content[meta_insert_pos:]
modified = True
else:
# Insert new meta block
new_meta_block = f'\n meta {{\n{new_job_file_line}\n }}\n'
content = content[:job_insert_pos] + new_meta_block + content[job_insert_pos:]
modified = True
if modified and content != original_content:
job_file.write_text(content)
logging.info(f"Updated metadata in: {relative_path}")
modified_count += 1
elif not modified:
logging.debug(f"No changes needed for: {relative_path}")
except Exception as e:
logging.error(f"Failed to process {relative_path}: {e}")
logging.info(f"Metadata update complete. {modified_count} files modified.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Update Nomad job files with job_file metadata.")
# Default to the parent directory of the script's directory (../)
script_dir = pathlib.Path(__file__).parent.resolve()
default_repo_root = script_dir.parent
parser.add_argument(
"--repo-root",
type=str,
default=str(default_repo_root),
help="Path to the root of the repository."
)
args = parser.parse_args()
update_job_metadata(args.repo_root)
================================================
FILE: envrc
================================================
export CONSUL_HTTP_ADDR=http://FILL_IN_IP:8500
export CONSUL_CACERT=/etc/consul.d/ssl/ca.cert
export CONSUL_CLIENT_CERT=/etc/consul.d/ssl/consul.cert
export CONSUL_CLIENT_KEY=/etc/consul.d/ssl/consul.key
export VAULT_ADDR=http://FILL_IN_IP:8200
export VAULT_TOKEN=FILL_IN_TOKEN
export NOMAD_ADDR=http://FILL_IN_IP:4646
export NOMAD_VAR_region='home'
export NOMAD_VAR_tld='home'
export NOMAD_VAR_shared_dir='/home/shared/'
export NOMAD_VAR_downloads_dir='/home/sabnzbd/downloads'
export NOMAD_VAR_music_dir='/home/media/Music'
export NOMAD_VAR_movies_dir='/home/media/Movies'
export NOMAD_VAR_tv_dir='/home/media/TV'
export NOMAD_VAR_media_dir='/home/media'
================================================
FILE: nomad_jobs/TEMPLATE-volume.hcl
================================================
// =============================================================================
// Nomad CSI Volume Template
// =============================================================================
//
// Usage:
// 1. Copy this file to nomad_jobs///volume.hcl
// 2. Replace __VOL_NAME__ with the volume name (usually same as service name)
// 3. Replace __SIZE__ with capacity (e.g. "5GiB", "10GiB", "50GiB")
// 4. Set access_mode based on your needs (see below)
// 5. Volume is auto-created by CI when pushed (if path is in workflow filter)
//
// Access modes:
// single-node-writer : one node read/write (most services)
// single-node-reader-only : one node read-only
// multi-node-single-writer : multiple nodes can mount, one writes (HA failover)
//
// Size guide:
// Config-only (app state): 1-5 GiB
// Small databases: 5-10 GiB
// Media metadata/indexes: 10-20 GiB
// Time-series / logs: 50-100 GiB
//
// =============================================================================
id = "__VOL_NAME__"
external_id = "__VOL_NAME__"
name = "__VOL_NAME__"
type = "csi"
plugin_id = "org.democratic-csi.iscsi"
capacity_min = "__SIZE__"
capacity_max = "__SIZE__"
capability {
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
mount_options {
fs_type = "ext4"
mount_flags = ["noatime"]
}
================================================
FILE: nomad_jobs/TEMPLATE.job
================================================
// =============================================================================
// Nomad Job Template
// =============================================================================
//
// Usage:
// 1. Copy this file to nomad_jobs///nomad.job
// 2. Find/replace the following placeholders:
// - __JOB_NAME__ : lowercase service name (e.g. "sonarr")
// - __GROUP_NAME__ : group name (e.g. "downloaders", "monitoring", "ai")
// - __CATEGORY__ : directory category (e.g. "media-stack", "ai-ml")
// - __IMAGE__ : docker image with tag (e.g. "linuxserver/sonarr:4.0.16")
// - __PORT__ : container port number (e.g. "8989")
// - __HEALTH_PATH__ : HTTP health check path (e.g. "/ping", "/-/healthy", "/api/health")
// - __CPU__ : CPU MHz allocation (see guide below)
// - __MEMORY__ : Memory MB allocation (see guide below)
// 3. Remove any optional sections you don't need (marked with OPTIONAL)
// 4. Update the variable declarations at the bottom
// 5. Add any job-specific secrets to .envrc as NOMAD_VAR_
// 6. Add the job path to .github/workflows/nomad.yaml if it should auto-deploy
//
// Resource guide:
// Light services (static sites, proxies): cpu = 100-200, memory = 128-256
// Medium services (APIs, web apps): cpu = 500-1000, memory = 512-1024
// Heavy services (.NET apps, databases, Java): cpu = 1000+, memory = 1024-2048
// GPU / ML workloads: cpu = 200+, memory = 4096-8192
//
// =============================================================================
job "__JOB_NAME__" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/__CATEGORY__/__JOB_NAME__/nomad.job"
version = "1"
}
// Ensures scheduling on nodes with NFS shared mount available.
// Remove if the service has no need for shared storage or config dirs.
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
group "__GROUP_NAME__" {
count = 1
network {
port "http" {
host_network = "lan"
to = "__PORT__"
}
}
// --- OPTIONAL: CSI Volume ------------------------------------------------
// Use for services that need persistent block storage (databases, stateful apps).
// Requires a matching volume.hcl deployed first.
// Remove this block and the prep-disk task + volume_mount if not needed.
//
// volume "__JOB_NAME__" {
// type = "csi"
// read_only = false
// source = "__JOB_NAME__"
// access_mode = "single-node-writer"
// attachment_mode = "file-system"
// }
// -------------------------------------------------------------------------
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
update {
max_parallel = 1
min_healthy_time = "30s"
healthy_deadline = "5m"
progress_deadline = "10m"
auto_revert = true
}
// --- OPTIONAL: Prep-disk task --------------------------------------------
// Required when using CSI volumes to fix ownership before the main task runs.
// Set UID:GID to match the user the main container runs as.
// Common values:
// linuxserver images: 65534:65534 (nobody)
// prometheus: 1000:2000
// grafana: 472:472
// loki: 10001:10001
//
// task "prep-disk" {
// driver = "docker"
//
// lifecycle {
// hook = "prestart"
// sidecar = false
// }
//
// volume_mount {
// volume = "__JOB_NAME__"
// destination = "/volume/"
// read_only = false
// }
//
// config {
// image = "busybox:latest"
// command = "sh"
// args = ["-c", "chown -R UID:GID /volume/"]
// }
//
// resources {
// cpu = 200
// memory = 128
// }
// }
// -------------------------------------------------------------------------
task "__JOB_NAME__" {
driver = "docker"
config {
image = "__IMAGE__"
ports = ["http"]
// --- Bind mount pattern (shared NFS config dir) ---
// Use for services that store config on shared NFS.
// volumes = [
// "${var.shared_dir}__JOB_NAME__:/config",
// ]
// --- Template mount pattern (config rendered by Nomad) ---
// Use when config is templated inline below.
// volumes = [
// "local/config.yaml:/app/config.yaml",
// ]
}
// --- OPTIONAL: CSI volume mount ----------------------------------------
// volume_mount {
// volume = "__JOB_NAME__"
// destination = "/data"
// read_only = false
// }
// -----------------------------------------------------------------------
env {
TZ = "Etc/UTC"
// PUID = "65534" // common for linuxserver images
// PGID = "65534"
}
// --- OPTIONAL: Config template -----------------------------------------
// Use for services that need a rendered config file.
// Reference secrets with ${var.secret_name} syntax.
//
// template {
// data = < 768MB
}
group "web" {
network {
mode = "host"
port "web" {
to = "8080"
host_network = "lan"
}
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
task "open-webui" {
driver = "docker"
config {
image = "ghcr.io/open-webui/open-webui:v0.8.12"
dns_servers = [var.dns_server_ip]
volumes = [
"${var.shared_dir}open-webui:/app/backend/data",
]
ports = ["web"]
}
env {
OLLAMA_BASE_URL= var.ollama_base_url
WEBUI_SECRET_KEY = var.webui_secret_key
}
service {
name = "${NOMAD_JOB_NAME}"
tags = ["traefik.enable=true"]
port = "web"
check {
type = "tcp"
port = "web"
interval = "30s"
timeout = "2s"
}
}
resources {
cpu = "200"
memory = "768"
}
}
}
}
variable "region" {
type = string
}
variable "shared_dir" {
type = string
}
variable "ollama_base_url" {
type = string
}
variable "webui_secret_key" {
type = string
}
variable "datacenter" {
type = string
}
variable "dns_server_ip" {
type = string
}
================================================
FILE: nomad_jobs/ai-ml/paperless-ai/nomad.job
================================================
job "paperless-ai" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/ai-ml/paperless-ai/nomad.job"
version = "2"
}
group "web" {
network {
mode = "host"
port "web" {
to = "3000"
host_network = "lan"
}
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
task "paperless-ai" {
driver = "docker"
config {
image = "clusterzx/paperless-ai"
dns_servers = ["192.168.50.2"]
volumes = [
"${var.shared_dir}paperless-ai:/app/data",
]
ports = ["web"]
}
service {
name = "${NOMAD_JOB_NAME}"
tags = ["traefik.enable=true"]
port = "web"
check {
type = "tcp"
port = "web"
interval = "30s"
timeout = "2s"
}
}
resources {
cpu = "200"
memory = "2048"
}
}
}
}
variable "region" {
type = string
}
variable "shared_dir" {
type = string
}
================================================
FILE: nomad_jobs/ai-ml/pgvector-client/nomad.job
================================================
job "pgvector-client-example" {
region = var.region
datacenters = ["dc1"]
type = "batch"
meta {
job_file = "nomad_jobs/ai-ml/pgvector-client/nomad.job"
version = "1" // Initial version
}
group "client" {
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
task "embedding-example" {
driver = "docker"
config {
image = "python:3.14-slim"
command = "python"
args = [
"/local/embedding-example.py"
]
}
env {
PGVECTOR_HOST = "pgvector.service.consul"
PGVECTOR_PORT = "5433"
PGVECTOR_USER = "postgres"
PGVECTOR_PASSWORD = "${var.pgvector_pass}"
PGVECTOR_DB = "embeddings"
}
template {
data = < %s) AS similarity
FROM documents
ORDER BY embedding <=> %s
LIMIT 3
""", (query_embedding, query_embedding))
results = cursor.fetchall()
print("\nTop 3 most similar documents:")
for id, content, similarity in results:
print(f"ID: {id}, Similarity: {similarity:.4f}")
print(f"Content: {content}")
print("-" * 50)
# Commit and close
conn.commit()
cursor.close()
conn.close()
print("Example completed successfully!")
EOH
destination = "local/embedding-example.py"
}
resources {
cpu = 500
memory = 512
}
}
}
}
variable "region" {
type = string
}
variable "pgvector_pass" {
type = string
description = "Admin password for pgvector PostgreSQL server"
}
================================================
FILE: nomad_jobs/ai-ml/radbot/nomad-dev.job
================================================
job "radbot-dev" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/ai-ml/radbot/nomad-dev.job"
version = "1"
}
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
group "web" {
count = 1
network {
port "http" {
host_network = "lan"
to = 8000
}
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
update {
max_parallel = 1
min_healthy_time = "60s"
healthy_deadline = "5m"
auto_revert = true
}
task "radbot-dev" {
driver = "docker"
config {
image = "ghcr.io/perrymanuk/radbot:dev"
dns_servers = [var.dns_server_ip]
ports = ["http"]
volumes = [
"local/config.yaml:/app/config.yaml",
]
}
env {
RADBOT_CREDENTIAL_KEY = var.radbot_credential_key
RADBOT_ADMIN_TOKEN = var.radbot_admin_token
RADBOT_CONFIG_FILE = "/app/config.yaml"
RADBOT_ENV = "dev"
}
template {
data = <
postgres
${var.postgres_pass}
5432
postgres.service.consul
lidarr_main
lidarr_logs
info
*
8686
6868
False
False
${var.lidarr_api_key}
External
DisabledForLocalAddresses
100.64.0.0/10,192.168.50.0/24
master
Lidarr
Docker
EOH
destination = "local/config.xml"
perms = "644"
}
service {
port = "http"
name = "lidarr"
tags = [
"traefik.enable=true",
"traefik.http.routers.${NOMAD_TASK_NAME}.tls.domains[0].sans=${NOMAD_TASK_NAME}.${var.tld}",
]
check {
type = "http"
path = "/ping"
interval = "10s"
timeout = "2s"
check_restart {
limit = 3
grace = "60s"
ignore_warnings = false
}
}
}
resources {
cpu = 100
memory = 256
}
}
}
}
variable "region" {
type = string
}
variable "tld" {
type = string
}
variable "shared_dir" {
type = string
}
variable "downloads_dir" {
type = string
}
variable "music_dir" {
type = string
}
variable "postgres_pass" {
type = string
description = "Admin password for PostgreSQL"
}
variable "lidarr_api_key" {
type = string
description = "API key for Lidarr"
}
================================================
FILE: nomad_jobs/media-stack/lidarr/volume.hcl
================================================
id = "lidarr2"
external_id = "lidarr2"
name = "lidarr2"
type = "csi"
plugin_id = "org.democratic-csi.iscsi"
capacity_min = "10GiB"
capacity_max = "10GiB"
capability {
access_mode = "single-node-writer"
attachment_mode = "block-device"
}
mount_options {
fs_type = "ext4"
mount_flags = ["noatime"]
}
================================================
FILE: nomad_jobs/media-stack/lidify/nomad.job
================================================
job "lidify" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/media-stack/lidify/nomad.job"
version = "1"
}
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
group "discovery" {
count = 1
network {
port "http" {
host_network = "lan"
to = "5000"
}
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "lidify" {
driver = "docker"
config {
image = "thewicklowwolf/lidify:latest"
ports = ["http"]
dns_servers = ["192.168.50.2"]
volumes = [
"${var.shared_dir}lidify:/lidify/config",
]
}
env {
lidarr_address = "http://lidarr.service.consul:8686"
lidarr_api_key = var.lidarr_api_key
lastfm_api_key = var.lastfm_api_key
root_folder_path = "/music"
quality_profile_id = "1"
metadata_profile_id = "1"
sleep_interval = "3600"
}
service {
port = "http"
name = "lidify"
tags = [
"traefik.enable=true",
]
check {
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
check_restart {
limit = 3
grace = "60s"
ignore_warnings = false
}
}
}
resources {
cpu = 100
memory = 256
}
}
}
}
variable "region" {
type = string
}
variable "tld" {
type = string
}
variable "shared_dir" {
type = string
}
variable "lidarr_api_key" {
type = string
description = "API key for Lidarr"
}
variable "lastfm_api_key" {
type = string
description = "Last.fm API key"
}
================================================
FILE: nomad_jobs/media-stack/maintainerr/nomad.job
================================================
job "maintainerr" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/media-stack/maintainerr/nomad.job"
version = "2"
}
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
group "media" {
count = 1
network {
port "http" {
host_network = "lan"
to = 6246
}
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "maintainerr" {
driver = "docker"
config {
image = "ghcr.io/maintainerr/maintainerr:3.7.0"
ports = ["http"]
volumes = [
"${var.shared_dir}maintainerr:/opt/data",
]
}
env {
TZ = "Etc/UTC"
}
user = "1000:1000"
service {
port = "http"
name = "maintainerr"
tags = [
"traefik.enable=true"
]
check {
type = "http"
path = "/"
interval = "30s"
timeout = "5s"
check_restart {
limit = 3
grace = "60s"
ignore_warnings = false
}
}
}
resources {
cpu = 200
memory = 512
}
}
}
}
variable "region" {
type = string
}
variable "tld" {
type = string
}
variable "shared_dir" {
type = string
}
================================================
FILE: nomad_jobs/media-stack/mediasage/nomad.job
================================================
job "mediasage" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/media-stack/mediasage/nomad.job"
version = "1"
}
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
group "playlists" {
count = 1
network {
port "http" {
host_network = "lan"
to = "5765"
}
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "prep-disk" {
driver = "docker"
lifecycle {
hook = "prestart"
sidecar = false
}
config {
image = "busybox:latest"
command = "sh"
args = ["-c", "mkdir -p /data && chmod 777 /data"]
volumes = [
"${var.shared_dir}mediasage:/data",
]
}
resources {
cpu = 50
memory = 32
}
}
task "mediasage" {
driver = "docker"
config {
image = "ghcr.io/ecwilsonaz/mediasage:latest"
ports = ["http"]
dns_servers = ["192.168.50.2"]
volumes = [
"${var.shared_dir}mediasage:/app/data",
]
}
env {
PLEX_URL = "http://plex.service.consul:32400"
PLEX_TOKEN = var.plex_token
AI_PROVIDER = "ollama"
OLLAMA_URL = "http://ollama.service.consul:11434"
}
service {
port = "http"
name = "mediasage"
tags = [
"traefik.enable=true",
]
check {
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
check_restart {
limit = 3
grace = "60s"
ignore_warnings = false
}
}
}
resources {
cpu = 200
memory = 512
}
}
}
}
variable "region" {
type = string
}
variable "tld" {
type = string
}
variable "shared_dir" {
type = string
}
variable "plex_token" {
type = string
description = "Plex authentication token"
}
================================================
FILE: nomad_jobs/media-stack/multi-scrobbler/nomad.job
================================================
job "multi-scrobbler" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/media-stack/multi-scrobbler/nomad.job"
version = "1"
}
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
group "scrobbler" {
count = 1
network {
port "http" {
host_network = "lan"
to = "9078"
}
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "multi-scrobbler" {
driver = "docker"
config {
image = "foxxmd/multi-scrobbler:latest"
ports = ["http"]
dns_servers = ["192.168.50.2"]
volumes = [
"${var.shared_dir}multi-scrobbler:/config",
"local/config.json:/config/config.json",
]
}
env {
TZ = "Etc/UTC"
}
template {
data = </dev/null || true)" ]; then
echo "Database volume is empty, copying existing databases if any..."
if [ -d "$DB_DIR" ] && [ -n "$(ls -A "$DB_DIR" 2>/dev/null || true)" ]; then
cp -a "$DB_DIR"/* /opt/plex-db/
echo "Copied existing databases to persistent volume"
fi
fi
# Set up link to optimized database storage (only if not already linked)
if [ ! -L "$DB_DIR" ] || [ "$(readlink "$DB_DIR")" != "/opt/plex-db" ]; then
echo "Setting up database symlink..."
rm -rf "$DB_DIR"
ln -sf /opt/plex-db "$DB_DIR"
fi
# Install SQLite3 if needed
if ! command -v sqlite3 &>/dev/null; then
echo "Installing SQLite3..."
apt-get update && apt-get install -y sqlite3
fi
# Set environment variables for SQLite
export SQLITE_TMPDIR=/tmp/plex_sqlite
mkdir -p "$SQLITE_TMPDIR"
# Apply optimizations to all databases
echo "Applying SQLite optimizations to databases..."
find /opt/plex-db -name "*.db" -type f 2>/dev/null | while read -r db; do
echo "Optimizing $db"
sqlite3 "$db" < 256
}
group "downloaders" {
count = 1
network {
port "http" {
host_network = "lan"
static = 9696
}
port "flaresolverr" {
host_network = "lan"
static = 8191
}
}
volume "prowlarr" {
type = "csi"
read_only = false
source = "prowlarr"
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "prowlarr" {
driver = "docker"
config {
image = "linuxserver/prowlarr"
dns_servers = ["192.168.50.2"]
ports = ["http"]
}
volume_mount {
volume = "prowlarr"
destination = "/config"
read_only = false
}
env {
PUID = "65534"
PGID = "65534"
TZ = "Etc/UTC"
}
service {
port = "http"
name = "prowlarr"
tags = [
"traefik.enable=true",
]
check {
type = "http"
path = "/ping"
interval = "10s"
timeout = "2s"
check_restart {
limit = 3
grace = "60s"
ignore_warnings = false
}
}
}
resources {
cpu = 500
memory = 256
}
}
task "flaresolverr" {
driver = "docker"
config {
image = "ghcr.io/flaresolverr/flaresolverr:v3.4.6"
ports = ["flaresolverr"]
}
env {
LOG_LEVEL = "info"
LOG_HTML = "false"
TZ = "Etc/UTC"
}
service {
port = "flaresolverr"
name = "flaresolverr"
check {
type = "http"
path = "/"
interval = "30s"
timeout = "5s"
}
}
resources {
cpu = 500
memory = 512
}
lifecycle {
hook = "prestart"
sidecar = true
}
}
}
}
variable "region" {}
variable "tld" {}
================================================
FILE: nomad_jobs/media-stack/prowlarr/volume.hcl
================================================
id = "prowlarr"
external_id = "prowlarr"
name = "prowlarr"
type = "csi"
plugin_id = "org.democratic-csi.iscsi"
capacity_min = "5GiB"
capacity_max = "5GiB"
capability {
access_mode = "single-node-writer"
attachment_mode = "block-device"
}
mount_options {
fs_type = "ext4"
mount_flags = ["noatime"]
}
================================================
FILE: nomad_jobs/media-stack/qbittorrent/nomad.job
================================================
job "qbittorrent" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/media-stack/qbittorrent/nomad.job"
version = "5"
}
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
group "downloaders" {
count = 1
network {
port "http" {
host_network = "lan"
static = 8081
}
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "gluetun" {
driver = "docker"
lifecycle {
hook = "prestart"
sidecar = true
}
config {
image = "qmcgaw/gluetun"
cap_add = ["NET_ADMIN"]
ports = ["http"]
mounts = [
{
type = "tmpfs"
target = "/tmp/gluetun"
readonly = false
},
]
}
env {
VPN_SERVICE_PROVIDER = "mullvad"
VPN_TYPE = "wireguard"
WIREGUARD_PRIVATE_KEY = var.mullvad_wireguard_key
WIREGUARD_ADDRESSES = var.mullvad_wireguard_addr
SERVER_COUNTRIES = "Netherlands"
FIREWALL_VPN_INPUT_PORTS = "8081"
}
resources {
cpu = 500
memory = 512
}
}
task "qbittorrent" {
driver = "docker"
config {
image = "linuxserver/qbittorrent"
network_mode = "container:gluetun-${NOMAD_ALLOC_ID}"
mounts = [
{
type = "bind"
target = "/config"
source = "${var.shared_dir}qbittorrent"
readonly = false
bind_options = {
propagation = "rshared"
}
},
{
type = "bind"
target = "/downloads"
source = "${var.downloads_dir}"
readonly = false
bind_options = {
propagation = "rshared"
}
},
{
type = "bind"
target = "/media"
source = "${var.media_dir}"
readonly = false
bind_options = {
propagation = "rshared"
}
},
]
}
env {
PUID = "65534"
PGID = "65534"
TZ = "Etc/UTC"
WEBUI_PORT = "8081"
}
service {
port = "http"
name = "qbittorrent"
tags = [
"traefik.enable=true",
]
check {
type = "tcp"
interval = "10s"
timeout = "2s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
resources {
cpu = 1000
memory = 1024
}
}
}
}
variable "region" {}
variable "tld" {}
variable "shared_dir" {}
variable "downloads_dir" {}
variable "media_dir" {}
variable "mullvad_wireguard_key" {
type = string
description = "Mullvad WireGuard private key"
}
variable "mullvad_wireguard_addr" {
type = string
description = "Mullvad WireGuard interface address"
}
================================================
FILE: nomad_jobs/media-stack/radarr/nomad.job
================================================
job "radarr" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/media-stack/radarr/nomad.job"
version = "10" // Full config.xml template with API key
}
group "downloaders" {
count = 1
network {
port "http" {
host_network = "lan"
to = "7878"
}
}
volume "radarr" {
type = "csi"
read_only = false
source = "radarr2"
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "radarr" {
driver = "docker"
config {
image = "linuxserver/radarr:6.1.1"
dns_servers = ["192.168.50.2"]
ports = ["http"]
volumes = [
"${var.downloads_dir}:/downloads",
"${var.movies_dir}:/media/Movies",
"local/config.xml:/config/config.xml",
]
}
volume_mount {
volume = "radarr"
destination = "/config"
read_only = false
}
env {
UMASK_SET = "022"
TZ = "UTC"
PUID = "65534"
PGID = "65534"
}
template {
data = <
postgres
${var.postgres_pass}
5432
postgres.service.consul
radarr_main
radarr_logs
info
*
7878
9898
False
False
${var.radarr_api_key}
External
DisabledForLocalAddresses
100.64.0.0/10,192.168.50.0/24
master
Radarr
Docker
EOH
destination = "local/config.xml"
perms = "644"
}
service {
port = "http"
name = "radarr"
tags = [
"traefik.enable=true"
]
check {
type = "http"
path = "/ping"
interval = "10s"
timeout = "2s"
check_restart {
limit = 3
grace = "60s"
ignore_warnings = false
}
}
}
resources {
cpu = 1000
memory = 512
}
}
}
}
variable "region" {
type = string
}
variable "tld" {
type = string
}
variable "downloads_dir" {
type = string
}
variable "tv_dir" {
type = string
}
variable "movies_dir" {
type = string
}
variable "postgres_pass" {
type = string
description = "Admin password for PostgreSQL"
}
variable "radarr_api_key" {
type = string
description = "API key for Radarr"
}
================================================
FILE: nomad_jobs/media-stack/radarr/volume.hcl
================================================
id = "radarr2"
external_id = "radarr2"
name = "radarr2"
type = "csi"
plugin_id = "org.democratic-csi.iscsi"
capacity_min = "10GiB"
capacity_max = "10GiB"
capability {
access_mode = "single-node-writer"
attachment_mode = "block-device"
}
mount_options {
fs_type = "ext4"
mount_flags = ["noatime"]
}
================================================
FILE: nomad_jobs/media-stack/requestrr/nomad.job
================================================
job "requestrr" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/media-stack/requestrr/nomad.job"
version = "1"
}
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
group "media" {
count = 1
network {
port "http" {
host_network = "lan"
to = 4545
}
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "requestrr" {
driver = "docker"
config {
dns_servers = ["192.168.50.2"]
image = "thomst08/requestrr:v2.1.9"
ports = ["http"]
volumes = [
"${var.shared_dir}requestrr:/root/config",
]
}
env {
TZ = "Etc/UTC"
}
service {
port = "http"
name = "requestrr"
tags = [
"traefik.enable=true"
]
check {
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
resources {
cpu = 200
memory = 256
}
}
}
}
variable "region" {
type = string
}
variable "tld" {
type = string
}
variable "shared_dir" {
type = string
}
================================================
FILE: nomad_jobs/media-stack/sabnzbd/nomad.job
================================================
job "sabnzbd" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/media-stack/sabnzbd/nomad.job"
version = "6"
}
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
group "downloaders" {
count = 1
network {
port "http" {
host_network = "lan"
static = "8080"
}
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "sabnzbd" {
driver = "docker"
config {
image = "linuxserver/sabnzbd"
network_mode = "host"
ports = ["http"]
mounts = [
{
type = "bind"
target = "/config"
source = "${var.shared_dir}sabnzbd",
readonly = false
bind_options = {
propagation = "rshared"
}
},
{
type = "bind"
target = "/downloads"
source = "/tmp"
readonly = false
bind_options = {
propagation = "rshared"
}
},
{
type = "bind"
target = "/media"
source = "${var.media_dir}"
readonly = false
bind_options = {
propagation = "rshared"
}
}
]
}
env {
PUID = "65534"
PGID = "65534"
TZ = "Etc/UTC"
}
service {
port = "http"
name = "${NOMAD_TASK_NAME}"
tags = [
"traefik.enable=true"
]
check {
type = "http"
path = "/api?mode=auth"
interval = "10s"
timeout = "2s"
check_restart {
limit = 3
grace = "60s"
ignore_warnings = false
}
}
}
resources {
cpu = 1000 # Match actual usage (952 MHz observed)
memory = 3072 # Accommodate 2GB cache + 1GB overhead
memory_max = 4096 # Hard limit for burst usage
}
}
}
}
variable "region" {}
variable "tld" {}
variable "shared_dir" {}
variable "media_dir" {}
variable "downloads_dir" {}
================================================
FILE: nomad_jobs/media-stack/sickchill/nomad.job
================================================
job "sickchill" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/media-stack/sickchill/nomad.job"
version = "4"
}
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
group "downloaders" {
count = 1
network {
port "http" {
host_network = "lan"
to = "8081"
}
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "sickchill" {
driver = "docker"
config {
image = "linuxserver/sickchill:2024.3.1"
dns_servers = ["192.168.50.2"]
ports = ["http"]
volumes = [
"${var.downloads_dir}:/downloads",
"${var.tv_dir}:/tv",
"${var.shared_dir}sickchill:/config",
]
}
env {
PUID = "65534"
PGID = "65534"
TZ = "Etc/UTC"
}
service {
port = "http"
name = "sickchill"
tags = [
"traefik.enable=true"
]
check {
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
check_restart {
limit = 3
grace = "60s"
ignore_warnings = false
}
}
}
resources {
cpu = 1000
memory = 256
}
}
}
}
variable "region" {
type = string
}
variable "tld" {
type = string
}
variable "downloads_dir" {
type = string
}
variable "tv_dir" {
type = string
}
variable "shared_dir" {
type = string
}
================================================
FILE: nomad_jobs/media-stack/sonarr/nomad.job
================================================
job "sonarr" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/media-stack/sonarr/nomad.job"
version = "11" // Full config.xml template with API key
}
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
group "downloaders" {
count = 1
network {
port "http" {
host_network = "lan"
to = "8989"
}
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "sonarr" {
driver = "docker"
config {
image = "linuxserver/sonarr:4.0.17"
dns_servers = ["192.168.50.2"]
ports = ["http"]
volumes = [
"${var.shared_dir}sonarr:/config",
"${var.downloads_dir}:/downloads",
"${var.tv_dir}:/media/TV",
"local/config.xml:/config/config.xml",
]
}
env {
PUID = "65534"
PGID = "65534"
TZ = "Etc/UTC"
}
template {
data = <
postgres
${var.postgres_pass}
5432
postgres.service.consul
sonarr_main
sonarr_logs
info
*
8989
9898
False
False
${var.sonarr_api_key}
External
DisabledForLocalAddresses
100.64.0.0/10,192.168.50.0/24
main
Sonarr
Docker
EOH
destination = "local/config.xml"
perms = "644"
}
service {
port = "http"
name = "sonarr"
tags = [
"traefik.enable=true",
]
check {
type = "http"
path = "/ping"
interval = "10s"
timeout = "2s"
check_restart {
limit = 3
grace = "60s"
ignore_warnings = false
}
}
}
resources {
cpu = 1000
memory = 512
}
}
}
}
variable "region" {
type = string
}
variable "tld" {
type = string
}
variable "shared_dir" {
type = string
}
variable "downloads_dir" {
type = string
}
variable "tv_dir" {
type = string
}
variable "postgres_pass" {
type = string
description = "Admin password for PostgreSQL"
}
variable "sonarr_api_key" {
type = string
description = "API key for Sonarr"
}
================================================
FILE: nomad_jobs/media-stack/synclounge/nomad.job
================================================
job "synclounge" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/media-stack/synclounge/nomad.job"
version = "4"
}
group "synclounge" {
count = 1
network {
port "http" {
host_network = "tailscale"
to = "8088"
}
port "server" {
host_network = "tailscale"
to = "8089"
}
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "plexlounge" {
driver = "docker"
config {
image = "starbix/synclounge"
network_mode = "host"
force_pull = "true"
ports = ["http", "server"]
}
env {
DOMAIN = "${NOMAD_TASK_NAME}.${var.tld}"
}
service {
port = "http"
name = "plexlounge"
tags = [
"traefik.enable=true",
"traefik.http.middlewares.httpsRedirect.redirectscheme.scheme=https",
"traefik.http.routers.${NOMAD_TASK_NAME}.tls.domains[0].sans=${NOMAD_TASK_NAME}.${var.tld}",
"traefik.http.routers.${NOMAD_TASK_NAME}.middlewares=forward-auth"
]
check {
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
service {
port = "server"
name = "syncserver"
tags = [
"traefik.enable=true",
"traefik.http.middlewares.httpsRedirect.redirectscheme.scheme=https",
"traefik.http.routers.syncserver.tls.domains[0].sans=syncserver.${var.tld}",
"traefik.http.routers.syncserver.middlewares=forward-auth"
]
check {
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
resources {
cpu = 3500
memory = 512
}
}
}
}
variable "region" {
type = string
}
variable "tld" {
type = string
}
================================================
FILE: nomad_jobs/media-stack/tautulli/nomad.job
================================================
job "tautulli" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/media-stack/tautulli/nomad.job"
version = "3"
}
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
group "metrics" {
count = 1
network {
port "http" {
host_network = "tailscale"
to = "8181"
}
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "tautulli" {
driver = "docker"
config {
image = "tautulli/tautulli"
ports = ["http"]
volumes = [
"${var.shared_dir}tautulli:/config",
"[[ .dirs.plexlogs ]]:/media/TV",
]
}
service {
port = "http"
name = "tautulli"
tags = ["net-internal", "net-external", "tautulli", "net.frontend.entryPoints=https"]
check {
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
check_restart {
limit = 3
grace = "60s"
ignore_warnings = false
}
}
}
resources {
cpu = 100
memory = 128
}
}
}
}
variable "region" {
type = string
}
variable "tld" {
type = string
}
variable "shared_dir" {
type = string
}
================================================
FILE: nomad_jobs/media-stack/tdarr/nomad.job
================================================
job "tdarr" {
region = var.region
datacenters = ["cheese"]
type = "service"
priority = 50
meta {
job_file = "nomad_jobs/media-stack/tdarr/nomad.job"
version = "4" // Move to cheese01 for NVENC GPU transcoding
}
group "tdarr" {
count = 1
constraint {
attribute = "${attr.unique.hostname}"
value = "cheese01"
}
network {
port "http" {
host_network = "lan"
static = 8265
}
port "server" {
host_network = "lan"
static = 8266
}
}
volume "tdarr" {
type = "csi"
read_only = false
source = "tdarr"
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
update {
max_parallel = 1
min_healthy_time = "30s"
healthy_deadline = "9m"
progress_deadline = "15m"
auto_revert = true
}
task "tdarr" {
driver = "docker"
config {
image = "ghcr.io/haveagitgat/tdarr:latest"
network_mode = "host"
privileged = true
runtime = "nvidia"
force_pull = true
ports = ["http", "server"]
volumes = [
"/tmp/tdarr:/temp",
"${var.shared_dir}tdarr/configs:/app/configs",
"${var.shared_dir}tdarr/logs:/app/logs",
"${var.media_dir}:/media",
]
}
volume_mount {
volume = "tdarr"
destination = "/app/server"
read_only = false
}
env {
PUID = "1000"
PGID = "1000"
NVIDIA_VISIBLE_DEVICES = "all"
serverIP = "0.0.0.0"
serverPort = "8266"
webUIPort = "8265"
internalNode = "true"
nodeName = "cheese01"
}
service {
port = "http"
name = "tdarr"
tags = [
"traefik.enable=true",
]
check {
type = "tcp"
interval = "10s"
timeout = "2s"
check_restart {
limit = 3
grace = "120s"
ignore_warnings = false
}
}
}
resources {
cpu = 2000
memory = 2048
}
}
}
}
variable "region" {}
variable "tld" {}
variable "shared_dir" {}
variable "media_dir" {}
================================================
FILE: nomad_jobs/media-stack/tdarr/volume.hcl
================================================
id = "tdarr"
external_id = "tdarr"
name = "tdarr"
type = "csi"
plugin_id = "org.democratic-csi.iscsi"
capacity_min = "10GiB"
capacity_max = "10GiB"
capability {
access_mode = "single-node-writer"
attachment_mode = "block-device"
}
mount_options {
fs_type = "ext4"
mount_flags = ["noatime"]
}
================================================
FILE: nomad_jobs/misc/adb/nomad.job
================================================
job "adb" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/misc/adb/nomad.job"
version = "4"
}
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
constraint {
attribute = "${meta.zigbee}"
operator = "="
value = "true"
}
group "downloaders" {
count = 1
network {
mode = "host"
port "tcp" {
static = "5037"
host_network = "lan"
}
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "adb" {
driver = "docker"
config {
image = "docker-registry.demonsafe.com/adb"
entrypoint = ["/local/start.sh"]
network_mode = "host"
extra_hosts = ["hassio:127.0.0.1"]
args = ["&", "adb", "-a", "-P", "5037", "server", "nodaemon"]
volumes = [
"${var.shared_dir}home-assistant/android:/root/.android",
]
}
env {
log_level = "warning"
}
service {
port = "tcp"
name = "adb"
tags = ["net-internal", "adb"]
check {
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
template {
data = < IAM & Admin -> Service Accounts.
* Create a new service account (e.g., `gcp-dns-updater-sa`).
* Grant this service account the `DNS Administrator` role (`roles/dns.admin`) on the project containing your managed zone.
* Create a JSON key file for this service account and download it securely. You will need the *contents* of this file, not the file itself.
3. **Nomad Environment:** A running Nomad cluster where this job can be scheduled. The Nomad clients must have Docker installed and configured.
## Configuration
The service is configured via environment variables passed to the Nomad task, which are then consumed by the `update_dns.py` script running inside the Docker container:
* `GCP_DNS_ZONE_NAME`: The name of the managed zone in GCP DNS (e.g., `demonsafe-com`). The script derives the Project ID from the credentials.
* `GCP_DNS_RECORD_NAME`: The DNS record name to update (e.g., `*.demonsafe.com`). **Note:** The script expects the base name; the trailing dot is handled internally if needed by the SDK.
* `RECORD_TTL`: (Optional) The Time-To-Live (in seconds) for the created/updated A record. Defaults to 300 if not set.
* `GCP_PROJECT_ID`: The Google Cloud Project ID containing the DNS zone.
* `GCP_SERVICE_ACCOUNT_KEY_B64`: **Required.** The base64-encoded *content* of the GCP service account JSON key file.
**Generating the Base64 Key:**
You need to encode the *content* of your downloaded JSON key file into a single-line base64 string.
On Linux/macOS, you can use:
```bash
base64 -w 0 < /path/to/your/gcp_key.json
```
*(Ensure you use `-w 0` or an equivalent flag for your `base64` command to prevent line wrapping)*
Copy the resulting string.
**Setting Environment Variables in Nomad:**
These variables are defined within the `env` block of the `nomad.job` file using Go templating to read runtime environment variables provided by the Nomad agent (which in turn are often sourced from the deployment mechanism, like GitHub Actions):
```hcl
# Example within nomad.job task config
env {
GCP_DNS_ZONE_NAME = < 50 else ''}")
# Clean the base64 string - remove any whitespace/newlines
key_b64 = key_b64.strip().replace('\n', '').replace('\r', '').replace(' ', '')
logging.info(f"Cleaned key length: {len(key_b64)}")
logging.info(f"Cleaned key content (first 50 chars): {key_b64[:50]}{'...' if len(key_b64) > 50 else ''}")
# Check if this looks like a valid base64 string
if len(key_b64) < 100:
logging.warning(f"Service account key seems too short ({len(key_b64)} chars). Expected several thousand characters.")
logging.warning(f"Full key content: '{key_b64}'")
logging.error("The GCP_SERVICE_ACCOUNT_KEY_B64 environment variable appears to contain invalid or incomplete data.")
sys.exit(1)
# Fix base64 padding if needed
missing_padding = len(key_b64) % 4
if missing_padding:
padding_needed = 4 - missing_padding
key_b64 += '=' * padding_needed
logging.info(f"Added {padding_needed} padding characters")
logging.info(f"Final key length: {len(key_b64)}")
decoded_key = base64.b64decode(key_b64, validate=True)
logging.info("Base64 key decoded successfully.")
logging.info("Parsing service account key JSON...")
key_info = json.loads(decoded_key)
logging.info("Service account key JSON parsed successfully.")
credentials = service_account.Credentials.from_service_account_info(key_info)
client = dns.Client(project=project_id, credentials=credentials)
logging.info(f"Successfully created DNS client for project {project_id}")
return client
except binascii.Error as e:
logging.error(f"Failed to decode base64 service account key: {e}")
sys.exit(1)
except json.JSONDecodeError as e:
logging.error(f"Failed to parse service account key JSON: {e}")
sys.exit(1)
except Exception as e:
logging.error(f"Failed to create DNS client from service account info: {e}")
sys.exit(1)
def update_traefik_whitelist(ip_address: str):
"""Updates Traefik IP whitelist configuration."""
try:
logging.info(f"Updating Traefik whitelist with IP: {ip_address}")
traefik_config = {
"http": {
"middlewares": {
"home-ip-whitelist": {
"ipAllowList": {
"sourceRange": [
f"{ip_address}/32",
"192.168.0.0/16",
"10.0.0.0/8",
"172.16.0.0/12",
"100.64.0.0/10"
]
}
}
}
}
}
config_path = "/shared/traefik-ingress/dynamic-whitelist.toml"
# Write as TOML format
toml_content = f"""[http.middlewares.home-ip-whitelist.ipAllowList]
sourceRange = ["{ip_address}/32", "192.168.0.0/16", "10.0.0.0/8", "172.16.0.0/12", "100.64.0.0/10"]
"""
with open(config_path, 'w') as f:
f.write(toml_content)
logging.info(f"Successfully updated Traefik whitelist configuration at {config_path}")
except Exception as e:
logging.error(f"Failed to update Traefik whitelist: {e}")
def update_dns_record(client: dns.Client, project_id: str, zone_name: str, record_name: str, ip_address: str):
"""Updates DNS record."""
try:
# Use zone_name directly as it should already be the correct GCP zone name
gcp_zone_name = zone_name
logging.info(f"Targeting GCP DNS Zone: {gcp_zone_name}")
zone = client.zone(gcp_zone_name, project_id)
if not zone.exists():
logging.error(f"DNS zone '{gcp_zone_name}' not found in project '{project_id}'.")
return
fqdn = record_name if record_name.endswith('.') else f"{record_name}."
logging.info(f"Checking DNS records for: {fqdn} in zone {gcp_zone_name}")
record_sets = list(zone.list_resource_record_sets())
existing_a_record = None
existing_cname_record = None
for record_set in record_sets:
if record_set.record_type == 'A' and record_set.name == fqdn:
existing_a_record = record_set
logging.info(f"Found existing A record: {existing_a_record.name} -> {existing_a_record.rrdatas}")
elif record_set.record_type == 'CNAME' and record_set.name == fqdn:
existing_cname_record = record_set
logging.info(f"Found existing CNAME record: {existing_cname_record.name} -> {existing_cname_record.rrdatas}")
changes = zone.changes()
needs_update = False
if existing_cname_record:
logging.warning(f"Deleting existing CNAME record {fqdn} to replace with A record.")
changes.delete_record_set(existing_cname_record)
needs_update = True
existing_a_record = None
new_a_record = zone.resource_record_set(fqdn, "A", 300, [ip_address])
if existing_a_record:
if existing_a_record.rrdatas == [ip_address]:
logging.info(f"Existing A record {fqdn} already points to {ip_address}. No update needed.")
return
else:
logging.info(f"Existing A record {fqdn} points to {existing_a_record.rrdatas}. Updating to {ip_address}.")
changes.delete_record_set(existing_a_record)
changes.add_record_set(new_a_record)
needs_update = True
elif not existing_cname_record:
logging.info(f"No existing A or CNAME record found for {fqdn}. Creating new A record pointing to {ip_address}.")
changes.add_record_set(new_a_record)
needs_update = True
elif existing_cname_record:
logging.info(f"Adding A record for {fqdn} pointing to {ip_address} after CNAME deletion.")
changes.add_record_set(new_a_record)
if needs_update:
logging.info(f"Executing DNS changes for {fqdn} in zone {gcp_zone_name}...")
changes.create()
while changes.status != 'done':
logging.info(f"Waiting for DNS changes to complete (status: {changes.status})...")
time.sleep(5)
changes.reload()
logging.info(f"Successfully updated DNS record {fqdn} to {ip_address} in zone {gcp_zone_name}.")
else:
logging.info("No DNS changes were necessary.")
except GoogleAPIError as e:
logging.error(f"GCP API Error updating DNS record {fqdn} in zone {gcp_zone_name}: {e}")
except Exception as e:
logging.error(f"An unexpected error occurred during DNS update for {fqdn} in zone {gcp_zone_name}: {e}")
def update_spf_record(client: dns.Client, project_id: str, zone_name: str, record_name: str, ip_address: str):
"""Updates the SPF TXT record on the bare domain with the current public IP."""
try:
gcp_zone_name = zone_name
logging.info(f"Updating SPF record in zone: {gcp_zone_name}")
zone = client.zone(gcp_zone_name, project_id)
if not zone.exists():
logging.error(f"DNS zone '{gcp_zone_name}' not found in project '{project_id}'.")
return
# Derive bare domain from record_name (e.g., "*.demonsafe.com" -> "demonsafe.com.")
domain = record_name.lstrip('*.') if record_name.startswith('*.') else record_name
fqdn = domain if domain.endswith('.') else f"{domain}."
logging.info(f"Checking TXT records for: {fqdn}")
spf_value = f'"v=spf1 ip4:{ip_address} ~all"'
record_sets = list(zone.list_resource_record_sets())
existing_txt = None
for rs in record_sets:
if rs.record_type == 'TXT' and rs.name == fqdn:
existing_txt = rs
logging.info(f"Found existing TXT record: {rs.name} -> {rs.rrdatas}")
break
changes = zone.changes()
needs_update = False
if existing_txt:
new_rrdatas = []
spf_found = False
for rd in existing_txt.rrdatas:
if 'v=spf1' in rd:
spf_found = True
if ip_address in rd:
logging.info(f"SPF record already contains {ip_address}. No update needed.")
return
logging.info(f"Replacing SPF entry: {rd} -> {spf_value}")
new_rrdatas.append(spf_value)
else:
new_rrdatas.append(rd)
if not spf_found:
logging.info(f"No existing SPF entry found. Adding: {spf_value}")
new_rrdatas.append(spf_value)
changes.delete_record_set(existing_txt)
new_txt = zone.resource_record_set(fqdn, "TXT", 300, new_rrdatas)
changes.add_record_set(new_txt)
needs_update = True
else:
logging.info(f"No TXT record found for {fqdn}. Creating with SPF: {spf_value}")
new_txt = zone.resource_record_set(fqdn, "TXT", 300, [spf_value])
changes.add_record_set(new_txt)
needs_update = True
if needs_update:
logging.info(f"Executing SPF TXT changes for {fqdn}...")
changes.create()
while changes.status != 'done':
logging.info(f"Waiting for SPF changes to complete (status: {changes.status})...")
time.sleep(5)
changes.reload()
logging.info(f"Successfully updated SPF record for {fqdn} with ip4:{ip_address}")
except GoogleAPIError as e:
logging.error(f"GCP API Error updating SPF record: {e}")
except Exception as e:
logging.error(f"Unexpected error updating SPF record: {e}")
if __name__ == "__main__":
logging.info("Starting DNS update script.")
project_id, zone_name, record_name, key_b64 = get_env_vars()
logging.info(f"Environment variables loaded - zone_name: '{zone_name}', record_name: '{record_name}'")
public_ip = get_public_ip()
if public_ip:
dns_client = get_dns_client(key_b64, project_id)
if dns_client:
update_dns_record(dns_client, project_id, zone_name, record_name, public_ip)
update_spf_record(dns_client, project_id, zone_name, record_name, public_ip)
update_traefik_whitelist(public_ip)
logging.info("DNS, SPF, and Traefik whitelist update script finished.")
else:
logging.error("Exiting due to DNS client initialization failure.")
sys.exit(1)
else:
logging.error("Exiting due to inability to fetch public IP.")
sys.exit(1)
# Sleep to allow log viewing before container exits
logging.info("Sleeping for 10 seconds to allow log viewing...")
time.sleep(10)
EOF
destination = "local/update_dns.py"
}
resources {
cpu = 100
memory = 128
}
}
}
}
variable "gcp_project_id" {}
variable "dns_zone" {}
variable "tld" {}
variable "gcp_dns_admin" {}
variable "shared_dir" {}
================================================
FILE: nomad_jobs/misc/gcp-dns-updater/requirements.txt
================================================
google-cloud-dns
requests
google-auth
================================================
FILE: nomad_jobs/misc/gcp-dns-updater/update_dns.py
================================================
import os
import requests
import logging
import sys
import base64
import json
import time # Moved import to top
# Import GCP specific libraries
from google.cloud import dns
from google.oauth2 import service_account
from google.api_core.exceptions import GoogleAPIError
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def get_env_vars():
"""Reads required environment variables and returns them."""
project_id = os.environ.get('GCP_PROJECT_ID')
zone_name = os.environ.get('GCP_DNS_ZONE_NAME') # This will be the TLD like "demonsafe.com"
record_name = os.environ.get('GCP_DNS_RECORD_NAME')
key_b64 = os.environ.get('GCP_SERVICE_ACCOUNT_KEY_B64') # Changed variable name
if not all([project_id, zone_name, record_name, key_b64]): # Check for key_b64
missing = [var for var, val in [
('GCP_PROJECT_ID', project_id),
('GCP_DNS_ZONE_NAME', zone_name),
('GCP_DNS_RECORD_NAME', record_name),
('GCP_SERVICE_ACCOUNT_KEY_B64', key_b64) # Updated missing check
] if not val]
logging.error(f"Missing required environment variables: {', '.join(missing)}")
sys.exit(1)
return project_id, zone_name, record_name, key_b64 # Return key_b64
def get_public_ip():
"""Fetches the public IPv4 address."""
try:
response = requests.get('https://v4.ifconfig.me/ip', timeout=10)
response.raise_for_status() # Raise an exception for bad status codes
ip_address = response.text.strip()
logging.info(f"Successfully fetched public IP: {ip_address}")
return ip_address
except requests.exceptions.RequestException as e:
logging.error(f"Error fetching public IP: {e}")
sys.exit(1) # Exit if IP cannot be fetched
def get_dns_client(key_b64: str, project_id: str): # Changed key_path to key_b64 and added project_id
"""Creates and returns a DNS client authenticated with a base64 encoded service account key."""
try:
# Decode the base64 string
logging.info("Decoding base64 service account key...")
decoded_key = base64.b64decode(key_b64)
logging.info("Base64 key decoded successfully.")
# Parse the decoded JSON key
logging.info("Parsing service account key JSON...")
key_info = json.loads(decoded_key)
logging.info("Service account key JSON parsed successfully.")
# Create credentials from the parsed key info
credentials = service_account.Credentials.from_service_account_info(key_info)
# Use the provided project_id, not the one from credentials, to ensure consistency
client = dns.Client(project=project_id, credentials=credentials)
logging.info(f"Successfully created DNS client for project {project_id}")
return client
except base64.binascii.Error as e:
logging.error(f"Failed to decode base64 service account key: {e}")
sys.exit(1)
except json.JSONDecodeError as e:
logging.error(f"Failed to parse service account key JSON: {e}")
sys.exit(1)
except Exception as e:
logging.error(f"Failed to create DNS client from service account info: {e}")
sys.exit(1)
def update_dns_record(client: dns.Client, project_id: str, zone_name: str, record_name: str, ip_address: str):
"""
Checks and updates/creates an A record for the given name in the specified zone,
replacing a CNAME if necessary.
Args:
client: Authenticated DNS client.
project_id: GCP project ID.
zone_name: The domain TLD (e.g., "demonsafe.com"). This will be converted
to the GCP zone name format (e.g., "demonsafe-com").
record_name: The specific record to update (e.g., "*.demonsafe.com").
ip_address: The public IP address to set.
"""
try:
# Convert the TLD zone name (e.g., "demonsafe.com") to GCP zone name format (e.g., "demonsafe-com")
gcp_zone_name = zone_name.replace('.', '-')
logging.info(f"Targeting GCP DNS Zone: {gcp_zone_name}")
zone = client.zone(gcp_zone_name, project_id)
if not zone.exists():
logging.error(f"DNS zone '{gcp_zone_name}' not found in project '{project_id}'.")
return # Cannot proceed without the zone
# Ensure record_name ends with a dot for FQDN matching
fqdn = record_name if record_name.endswith('.') else f"{record_name}."
logging.info(f"Checking DNS records for: {fqdn} in zone {gcp_zone_name}")
record_sets = list(zone.list_resource_record_sets(filter_=f"name={fqdn}"))
existing_a_record = None
existing_cname_record = None
for record_set in record_sets:
if record_set.record_type == 'A' and record_set.name == fqdn:
existing_a_record = record_set
logging.info(f"Found existing A record: {existing_a_record.name} -> {existing_a_record.rrdatas}")
elif record_set.record_type == 'CNAME' and record_set.name == fqdn:
existing_cname_record = record_set
logging.info(f"Found existing CNAME record: {existing_cname_record.name} -> {existing_cname_record.rrdatas}")
changes = zone.changes()
needs_update = False
# Handle existing CNAME (delete it to replace with A)
if existing_cname_record:
logging.warning(f"Deleting existing CNAME record {fqdn} to replace with A record.")
changes.delete_record_set(existing_cname_record)
needs_update = True
# Ensure we don't try to delete an A record if we just deleted a CNAME
existing_a_record = None
# Define the new A record we want
new_a_record = zone.resource_record_set(fqdn, "A", 300, [ip_address])
# Handle existing A record
if existing_a_record:
if existing_a_record.rrdatas == [ip_address]:
logging.info(f"Existing A record {fqdn} already points to {ip_address}. No update needed.")
return # Nothing to do
else:
logging.info(f"Existing A record {fqdn} points to {existing_a_record.rrdatas}. Updating to {ip_address}.")
changes.delete_record_set(existing_a_record)
changes.add_record_set(new_a_record)
needs_update = True
# Handle case where no A record (and no CNAME was found/deleted)
elif not existing_cname_record: # Only add if we didn't already decide to replace CNAME
logging.info(f"No existing A or CNAME record found for {fqdn}. Creating new A record pointing to {ip_address}.")
changes.add_record_set(new_a_record)
needs_update = True
# Handle case where CNAME was found and deleted - we still need to add the A record
elif existing_cname_record:
logging.info(f"Adding A record for {fqdn} pointing to {ip_address} after CNAME deletion.")
changes.add_record_set(new_a_record)
# needs_update should already be True
# Execute the changes if any were queued
if needs_update:
logging.info(f"Executing DNS changes for {fqdn} in zone {gcp_zone_name}...")
changes.create()
# Wait until the changes are finished.
while changes.status != 'done':
logging.info(f"Waiting for DNS changes to complete (status: {changes.status})...")
time.sleep(5) # Wait 5 seconds before checking again
changes.reload()
logging.info(f"Successfully updated DNS record {fqdn} to {ip_address} in zone {gcp_zone_name}.")
else:
# This case should only be hit if an A record existed and was correct
logging.info("No DNS changes were necessary.")
except GoogleAPIError as e:
logging.error(f"GCP API Error updating DNS record {fqdn} in zone {gcp_zone_name}: {e}")
except Exception as e:
logging.error(f"An unexpected error occurred during DNS update for {fqdn} in zone {gcp_zone_name}: {e}")
def update_spf_record(client: dns.Client, project_id: str, zone_name: str, record_name: str, ip_address: str):
"""Updates the SPF TXT record on the bare domain with the current public IP."""
try:
gcp_zone_name = zone_name.replace('.', '-')
logging.info(f"Updating SPF record in zone: {gcp_zone_name}")
zone = client.zone(gcp_zone_name, project_id)
if not zone.exists():
logging.error(f"DNS zone '{gcp_zone_name}' not found in project '{project_id}'.")
return
# Derive bare domain from record_name (e.g., "*.demonsafe.com" -> "demonsafe.com.")
domain = record_name.lstrip('*.') if record_name.startswith('*.') else record_name
fqdn = domain if domain.endswith('.') else f"{domain}."
logging.info(f"Checking TXT records for: {fqdn}")
spf_value = f'"v=spf1 ip4:{ip_address} ~all"'
record_sets = list(zone.list_resource_record_sets(filter_=f"name={fqdn}"))
existing_txt = None
for rs in record_sets:
if rs.record_type == 'TXT' and rs.name == fqdn:
existing_txt = rs
logging.info(f"Found existing TXT record: {rs.name} -> {rs.rrdatas}")
break
changes = zone.changes()
needs_update = False
if existing_txt:
new_rrdatas = []
spf_found = False
for rd in existing_txt.rrdatas:
if 'v=spf1' in rd:
spf_found = True
if ip_address in rd:
logging.info(f"SPF record already contains {ip_address}. No update needed.")
return
logging.info(f"Replacing SPF entry: {rd} -> {spf_value}")
new_rrdatas.append(spf_value)
else:
new_rrdatas.append(rd)
if not spf_found:
logging.info(f"No existing SPF entry found. Adding: {spf_value}")
new_rrdatas.append(spf_value)
changes.delete_record_set(existing_txt)
new_txt = zone.resource_record_set(fqdn, "TXT", 300, new_rrdatas)
changes.add_record_set(new_txt)
needs_update = True
else:
logging.info(f"No TXT record found for {fqdn}. Creating with SPF: {spf_value}")
new_txt = zone.resource_record_set(fqdn, "TXT", 300, [spf_value])
changes.add_record_set(new_txt)
needs_update = True
if needs_update:
logging.info(f"Executing SPF TXT changes for {fqdn}...")
changes.create()
while changes.status != 'done':
logging.info(f"Waiting for SPF changes to complete (status: {changes.status})...")
time.sleep(5)
changes.reload()
logging.info(f"Successfully updated SPF record for {fqdn} with ip4:{ip_address}")
except GoogleAPIError as e:
logging.error(f"GCP API Error updating SPF record: {e}")
except Exception as e:
logging.error(f"Unexpected error updating SPF record: {e}")
if __name__ == "__main__":
logging.info("Starting DNS update script.")
project_id, zone_name, record_name, key_b64 = get_env_vars()
public_ip = get_public_ip()
if public_ip:
dns_client = get_dns_client(key_b64, project_id)
if dns_client:
update_dns_record(dns_client, project_id, zone_name, record_name, public_ip)
update_spf_record(dns_client, project_id, zone_name, record_name, public_ip)
logging.info("DNS update script finished.")
else:
# Error handled in get_dns_client, it exits
logging.error("Exiting due to DNS client initialization failure.")
sys.exit(1) # Explicit exit for clarity
else:
# Error handled in get_public_ip, it exits
logging.error("Exiting due to inability to fetch public IP.")
sys.exit(1) # Explicit exit for clarity
================================================
FILE: nomad_jobs/misc/gitea/nomad.job
================================================
job "gitea" {
meta {
job_file = "nomad_jobs/misc/gitea/nomad.job"
}
region = var.region
datacenters = ["dc1"]
type = "service"
group "svc" {
count = 1
volume "gitea-data" {
type = "host"
source = "gitea-data"
read_only = false
}
volume "gitea-db" {
type = "host"
source = "gitea-db"
read_only = false
}
restart {
attempts = 5
delay = "30s"
}
task "app" {
driver = "docker"
volume_mount {
volume = "gitea-data"
destination = "/data"
read_only = false
}
config {
image = "gitea/gitea"
port_map {
http = 3000
ssh_pass = 22
}
}
env = {
"APP_NAME" = "Gitea: Git with a cup of tea"
"RUN_MODE" = "prod"
"SSH_DOMAIN" = "git.${var.tld}"
"SSH_PORT" = "22"
"ROOT_URL" = "http://git.${var.tld}/"
"USER_UID" = "1002"
"USER_GID" = "1002"
"DB_TYPE" = "postgres"
"DB_NAME" = "gitea"
"DB_USER" = "gitea"
"DB_PASSWD" = "gitea"
"SHOW_REGISTRATION_BUTTON" = "false"
}
template {
data = <'
; before setting it here, to get a feel for which cipher suites you will get.
;
; After setting this option, it is recommend that you inspect your Murmur log
; to ensure that Murmur is using the cipher suites that you expected it to.
;
; Note: Changing this option may impact the backwards compatibility of your
; Murmur server, and can remove the ability for older Mumble clients to be able
; to connect to it.
;sslCiphers=EECDH+AESGCM:EDH+aRSA+AESGCM:DHE-RSA-AES256-SHA:DHE-RSA-AES128-SHA:AES256-SHA:AES128-SHA
; If Murmur is started as root, which user should it switch to?
; This option is ignored if Murmur isn't started with root privileges.
;uname=
; By default, in log files and in the user status window for privileged users,
; Mumble will show IP addresses - in some situations you may find this unwanted
; behavior. If obfuscate is set to true, Murmur will randomize the IP addresses
; of connecting users.
;
; The obfuscate function only affects the log file and DOES NOT effect the user
; information section in the client window.
;obfuscate=false
; If this options is enabled, only clients which have a certificate are allowed
; to connect.
;certrequired=False
; If enabled, clients are sent information about the servers version and operating
; system.
;sendversion=True
; You can set a recommended minimum version for your server, and clients will
; be notified in their log when they connect if their client does not meet the
; minimum requirements. suggestVersion expects the version in the format X.X.X.
;
; Note that the suggest* options appeared after 1.2.3 and will have no effect
; on client versions 1.2.3 and earlier.
;
;suggestVersion=
; Setting this to "true" will alert any user who does not have positional audio
; enabled that the server administrators recommend enabling it. Setting it to
; "false" will have the opposite effect - if you do not care whether the user
; enables positional audio or not, set it to blank. The message will appear in
; the log window upon connection, but only if the user's settings do not match
; what the server requests.
;
; Note that the suggest* options appeared after 1.2.3 and will have no effect
; on client versions 1.2.3 and earlier.
;
;suggestPositional=
; Setting this to "true" will alert any user who does not have Push-To-Talk
; enabled that the server administrators recommend enabling it. Setting it to
; "false" will have the opposite effect - if you do not care whether the user
; enables PTT or not, set it to blank. The message will appear in the log
; window upon connection, but only if the user's settings do not match what the
; server requests.
;
; Note that the suggest* options appeared after 1.2.3 and will have no effect
; on client versions 1.2.3 and earlier.
;
;suggestPushToTalk=
; This sets password hash storage to legacy mode (1.2.4 and before)
; (Note that setting this to true is insecure and should not be used unless absolutely necessary)
;legacyPasswordHash=false
; By default a strong amount of PBKDF2 iterations are chosen automatically. If >0 this setting
; overrides the automatic benchmark and forces a specific number of iterations.
; (Note that you should only change this value if you know what you are doing)
;kdfIterations=-1
; In order to prevent misconfigured, impolite or malicious clients from
; affecting the low-latency of other users, Murmur has a rudimentary global-ban
; system. It's configured using the autobanAttempts, autobanTimeframe and
; autobanTime settings.
;
; If a client attempts autobanAttempts connections in autobanTimeframe seconds,
; they will be banned for autobanTime seconds. This is a global ban, from all
; virtual servers on the Murmur process. It will not show up in any of the
; ban-lists on the server, and they can't be removed without restarting the
; Murmur process - just let them expire. A single, properly functioning client
; should not trip these bans.
;
; To disable, set autobanAttempts or autobanTimeframe to 0. Commenting these
; settings out will cause Murmur to use the defaults:
;
; To avoid autobanning successful connection attempts from the same IP address,
; set autobanSuccessfulConnections=False.
;
;autobanAttempts=10
;autobanTimeframe=120
;autobanTime=300
;autobanSuccessfulConnections=True
; Enables logging of group changes. This means that every time a group in a
; channel changes, the server will log all groups and their members from before
; the change and after the change. Deault is false. This option was introduced
; with Murmur 1.4.0.
;
;loggroupchanges=false
; Enables logging of ACL changes. This means that every time the ACL in a
; channel changes, the server will log all ACLs from before the change and
; after the change. Default is false. This option was introduced with Murmur
; 1.4.0.
;
;logaclchanges=false
; You can configure any of the configuration options for Ice here. We recommend
; leave the defaults as they are.
; Please note that this section has to be last in the configuration file.
;
[Ice]
Ice.Warn.UnknownProperties=1
Ice.MessageSizeMax=65536
EOH
destination = "local/murmur-config"
env = false
}
resources {
cpu = 100
memory = 128
network {
port "0" {}
}
}
}
}
}
variable "region" {
type = string
}
variable "tld" {
type = string
}
variable "shared_dir" {
type = string
}
variable "auth" {
type = string
}
================================================
FILE: nomad_jobs/misc/octoprint/nomad.job
================================================
job "octoprint" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/misc/octoprint/nomad.job"
version = "6"
}
constraint {
attribute = "${meta.3d_printer}"
operator = "="
value = "true"
}
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
group "3dprinter" {
count = 1
network {
port "web" {
host_network = "tailscale"
to = "5000"
}
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "octoprint" {
driver = "docker"
config {
image = "octoprint/octoprint"
force_pull = true
#network_mode = "host"
privileged = true
ports = ["web"]
volumes = [
"${var.shared_dir}octoprint:/home/octoprint/.octoprint",
"/dev/ttyUSB0:/dev/ttyUSB0",
]
}
service {
port = "web"
name = "octoprint"
tags = [
"traefik.enable=true",
"traefik.http.middlewares.cors.headers.accesscontrolallowmethods=GET,OPTIONS,PUT",
"traefik.http.middlewares.cors.headers.accesscontrolalloworigin=origin-list-or-null",
"traefik.http.middlewares.cors.headers.accesscontrolmaxage=100",
"traefik.http.middlewares.cors.headers.addvaryheader=true",
"traefik.http.middlewares.malpotAuth.basicauth.users=${var.auth}",
"traefik.http.routers.${NOMAD_TASK_NAME}.middlewares=forward-auth"
]
check {
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
check_restart {
limit = 3
grace = "60s"
ignore_warnings = false
}
}
}
env {
TZ = "Europe/Amsterdam"
}
resources {
cpu = 100
memory = 1024
}
}
}
}
variable "region" {
type = string
}
variable "tld" {
type = string
}
variable "shared_dir" {
type = string
}
================================================
FILE: nomad_jobs/misc/uploader/nomad.job
================================================
job "uploader" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/misc/uploader/nomad.job"
version = "5"
}
group "webserver" {
count = 1
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "uploader" {
driver = "docker"
service {
name = "uploader"
tags = [
"traefik.enable=true",
"traefik.http.middlewares.httpsRedirect.redirectscheme.scheme=https",
"traefik.http.routers.${NOMAD_TASK_NAME}.tls.domains[0].sans=${NOMAD_TASK_NAME}.${var.tld}",
"traefik.http.routers.${NOMAD_TASK_NAME}.middlewares=forward-auth"
]
port = "http"
check {
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
config {
image = "docker-registry.${var.tld}/uploader:latest"
network_mode = "host"
volumes = [
"${var.shared_dir}uploader:/data",
]
}
template {
data = <5 minutes.
- alert: InstanceDown
expr: up{job!="hass"} == 0
for: 5m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
- alert: HomeAssistantDown
expr: up{job="hass"} == 0
for: 10m
labels:
severity: warning
annotations:
summary: "Home Assistant is down"
description: "Home Assistant at {{ $labels.instance }} has been down for more than 10 minutes."
# Alert for any device that is over 80% capacity
- alert: DiskUsage
expr: avg(nomad_client_host_disk_used_percent) by (host, device) > 80
for: 5m
labels:
severity: page
annotations:
summary: "Host {{ $labels.host }} disk {{ $labels.device }} usage alert"
description: "{{ $labels.host }} is using over 80% of its device: {{ $labels.device }}"
- name: nomad_allocation_alerts
rules:
- alert: NomadJobFailureRate
expr: rate(nomad_nomad_job_summary_failed[5m]) > 0
for: 2m
labels:
severity: critical
alertname: "NomadJobFailureRate"
annotations:
summary: "Nomad job {{ $labels.exported_job }} is experiencing failures"
description: "Job {{ $labels.exported_job }} is failing allocations at a rate of {{ $value | printf \"%.2f\" }} per second"
service: "nomad"
- alert: NomadJobLostRate
expr: rate(nomad_nomad_job_summary_lost[5m]) > 0
for: 2m
labels:
severity: warning
alertname: "NomadJobLostRate"
annotations:
summary: "Nomad job {{ $labels.exported_job }} is losing allocations"
description: "Job {{ $labels.exported_job }} is losing allocations at a rate of {{ $value | printf \"%.2f\" }} per second"
service: "nomad"
- alert: NomadJobQueued
expr: nomad_nomad_job_summary_queued > 0
for: 5m
labels:
severity: warning
alertname: "NomadJobQueued"
annotations:
summary: "Nomad job {{ $labels.exported_job }} has queued allocations"
description: "Job {{ $labels.exported_job }} has {{ $value }} allocations queued for over 5 minutes"
service: "nomad"
- alert: NomadAllocationsRestarting
expr: rate(nomad_client_allocs_restart[5m]) > 0.1
for: 2m
labels:
severity: warning
alertname: "NomadAllocationsRestarting"
annotations:
summary: "High allocation restart rate on {{ $labels.host }}"
description: "Allocation restart rate is {{ $value }} per second on {{ $labels.host }}"
service: "nomad"
- alert: NomadAllocationsOOMKilled
expr: nomad_client_allocs_oom_killed > 0
for: 0s
labels:
severity: critical
alertname: "NomadAllocationsOOMKilled"
annotations:
summary: "Allocation killed due to OOM on {{ $labels.host }}"
description: "{{ $value }} allocations were killed due to out-of-memory on {{ $labels.host }}"
service: "nomad"
EOH
}
config {
image = "prom/prometheus:v3.11.2"
network_mode = "host"
args = ["--storage.tsdb.path", "/opt/prometheus", "--web.listen-address", "0.0.0.0:9090", "--storage.tsdb.retention.time", "90d"]
force_pull = true
ports = ["http"]
dns_servers = ["192.168.50.2"]
volumes = [
"local/alerts.yml:/prometheus/alerts.yml",
"local/prometheus.yml:/prometheus/prometheus.yml",
]
}
resources {
cpu = 1000
memory = 512
}
}
}
}
variable "region" {}
variable "tld" {}
variable "shared_dir" {}
variable "hass_key" {}
variable "hass_ip" {}
================================================
FILE: nomad_jobs/observability/prometheus/volume.hcl
================================================
id = "prometheus"
external_id = "prometheus"
name = "prometheus"
type = "csi"
plugin_id = "org.democratic-csi.iscsi"
capacity_min = "50GiB"
capacity_max = "50GiB"
capability {
access_mode = "multi-node-single-writer"
attachment_mode = "file-system"
}
mount_options {
fs_type = "ext4"
mount_flags = ["noatime"]
}
================================================
FILE: nomad_jobs/observability/telegraf/nomad.job
================================================
job "telegraf" {
region = var.region
datacenters = ["dc1", "public", "system"]
type = "system"
priority = 100
meta {
job_file = "nomad_jobs/observability/telegraf/nomad.job"
version = "4"
}
group "telegraf-exporter" {
network {
port "http" {
host_network = "tailscale"
to = "9273"
}
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
update {
min_healthy_time = "30s"
auto_revert = true
}
task "telegraf" {
driver = "docker"
service {
name = "telegraf"
port = "http"
tags = ["metrics"]
check {
type = "tcp"
interval = "5s"
timeout = "2s"
}
}
config {
image = "telegraf:1.38.2"
privileged = "true"
ports = ["http"]
args = [
"--config=/local/config.yaml",
]
}
template {
data = < /dev/null || pgrep -x suricata > /dev/null"]
interval = "30s"
timeout = "5s"
}
}
}
}
}
variable "region" {
type = string
}
variable "shared_dir" {
type = string
}
================================================
FILE: nomad_jobs/security/suricata-update/nomad.job
================================================
job "suricata-update" {
region = var.region
datacenters = ["dc1"]
type = "batch"
priority = 80
meta {
job_file = "nomad_jobs/security/suricata-update/nomad.job"
version = "3" // Single instance with shared NFS storage
}
# Run daily at 4am
periodic {
crons = ["0 4 * * *"]
prohibit_overlap = true
}
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
group "update" {
count = 1
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
task "suricata-update" {
driver = "docker"
config {
image = "jasonish/suricata:8.0"
command = "suricata-update"
volumes = [
"${var.shared_dir}suricata/rules:/var/lib/suricata",
]
}
resources {
cpu = 500
memory = 1024
}
}
}
}
variable "region" {
type = string
}
variable "shared_dir" {
type = string
}
================================================
FILE: nomad_jobs/security/wazuh-agent/nomad.job
================================================
job "wazuh-agent" {
region = var.region
datacenters = ["dc1"]
type = "system"
priority = 100
meta {
job_file = "nomad_jobs/security/wazuh-agent/nomad.job"
version = "6" // Fix client.keys file permissions for persistence
}
group "agent" {
network {
mode = "host"
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
update {
min_healthy_time = "30s"
auto_revert = true
}
# Ensure agent data directory exists on host
task "prep-agent-dir" {
driver = "docker"
config {
image = "busybox:latest"
command = "sh"
args = ["-c", "mkdir -p /host/var/lib/wazuh-agent; test -f /host/var/lib/wazuh-agent/client.keys || touch /host/var/lib/wazuh-agent/client.keys; chmod 666 /host/var/lib/wazuh-agent/client.keys"]
volumes = [
"/var/lib:/host/var/lib",
]
}
resources {
cpu = 100
memory = 32
}
lifecycle {
hook = "prestart"
sidecar = false
}
}
task "wazuh-agent" {
driver = "docker"
config {
image = "wazuh/wazuh-agent:4.14.4"
network_mode = "host"
force_pull = true
privileged = true
# Mount host directories for monitoring and config
volumes = [
"/var/log:/host/var/log:ro",
"/var/run/docker.sock:/var/run/docker.sock:ro",
"/:/host:ro",
"/var/lib/wazuh-agent/client.keys:/var/ossec/etc/client.keys",
"local/ossec.conf:/var/ossec/etc/ossec.conf",
]
}
# Configuration template for the agent
# Uses Consul service discovery to automatically find Wazuh manager
template {
data = <
{{- if service "wazuh-agent-comm" -}}
{{- with index (service "wazuh-agent-comm") 0 }}
{{ .Address }}
{{ .Port }}
{{- end -}}
{{- else }}
127.0.0.1
1514
{{- end }}
tcp
ubuntu, ubuntu20, ubuntu20.04
10
60
yes
no
5000
500
syslog
/host/var/log/syslog
syslog
/host/var/log/auth.log
syslog
/host/var/log/kern.log
syslog
/host/var/log/dpkg.log
json
/host/var/log/nomad/*.log
json
/host/var/log/consul/*.log
syslog
/host/var/log/docker.log
audit
/host/var/log/audit/audit.log
syslog
/host/var/log/secure
journald
journald
json
/host/var/log/suricata/eve.json
syslog
/host/var/log/suricata/suricata.log
no
21600
yes
yes
/host/etc
/host/usr/bin
/host/usr/sbin
/host/bin
/host/sbin
/host/etc/nomad.d
/host/etc/consul.d
/host/root/.ssh
/host/home/*/.ssh
/host/etc/mtab
/host/etc/hosts.deny
/host/etc/mail/statistics
/host/etc/random-seed
/host/etc/adjtime
/host/etc/httpd/logs
/host/etc/resolv.conf
.log$|.tmp$|.swp$
no
yes
yes
yes
yes
yes
yes
yes
43200
/var/ossec/etc/shared/rootkit_files.txt
/var/ossec/etc/shared/rootkit_trojans.txt
no
1h
yes
yes
yes
yes
yes
yes
yes
yes
yes
12h
yes
no
10m
5
yes
yes
EOH
destination = "local/ossec.conf"
change_mode = "restart"
}
resources {
cpu = 300
memory = 512
}
service {
name = "wazuh-agent"
tags = ["security", "monitoring"]
# Use a simple script check that runs inside the container
check {
type = "script"
name = "agent-status"
command = "/var/ossec/bin/wazuh-control"
args = ["status"]
interval = "30s"
timeout = "10s"
}
}
}
}
}
variable "region" {
type = string
}
================================================
FILE: nomad_jobs/security/wazuh-server/nomad.job
================================================
job "wazuh-server" {
region = var.region
datacenters = ["dc1"]
type = "service"
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
group "wazuh-stack" {
count = 1
network {
port "indexer" {
host_network = "lan"
to = 9200
}
port "manager" {
static = 1514
host_network = "lan"
to = 1514
}
port "manager_reg" {
static = 1515
host_network = "lan"
to = 1515
}
port "manager_api" {
host_network = "lan"
to = 55000
}
port "dashboard" {
host_network = "lan"
to = 443
}
}
# Persistent volumes for Wazuh components
volume "wazuh-indexer" {
type = "csi"
read_only = false
source = "wazuh-indexer"
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
volume "wazuh-manager" {
type = "csi"
read_only = false
source = "wazuh-manager"
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
volume "wazuh-dashboard" {
type = "csi"
read_only = false
source = "wazuh-dashboard"
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
# Prep disk task for indexer volume permissions
task "prep-indexer-disk" {
driver = "docker"
volume_mount {
volume = "wazuh-indexer"
destination = "/volume/"
read_only = false
}
config {
image = "busybox:latest"
command = "sh"
args = ["-c", "chown -R 1000:1000 /volume/"]
}
resources {
cpu = 200
memory = 128
}
lifecycle {
hook = "prestart"
sidecar = false
}
}
# Prep disk task for manager volume permissions
task "prep-manager-disk" {
driver = "docker"
volume_mount {
volume = "wazuh-manager"
destination = "/volume/"
read_only = false
}
config {
image = "busybox:latest"
command = "sh"
args = ["-c", "chown -R 999:999 /volume/"]
}
resources {
cpu = 200
memory = 128
}
lifecycle {
hook = "prestart"
sidecar = false
}
}
# Prep disk task for dashboard volume permissions
task "prep-dashboard-disk" {
driver = "docker"
volume_mount {
volume = "wazuh-dashboard"
destination = "/volume/"
read_only = false
}
config {
image = "busybox:latest"
command = "sh"
args = ["-c", "rm -rf /volume/wazuh && mkdir -p /volume/wazuh/config && chown -R 1000:1000 /volume/"]
}
resources {
cpu = 200
memory = 128
}
lifecycle {
hook = "prestart"
sidecar = false
}
}
# Wazuh Indexer (OpenSearch-based)
task "wazuh-indexer" {
driver = "docker"
volume_mount {
volume = "wazuh-indexer"
destination = "/var/lib/wazuh-indexer"
read_only = false
}
config {
image = "wazuh/wazuh-indexer:4.14.4"
force_pull = true
ports = ["indexer"]
volumes = [
"local/opensearch.yml:/usr/share/wazuh-indexer/config/opensearch.yml",
]
ulimit {
nofile = "65536:65536"
memlock = "-1:-1"
}
}
env {
OPENSEARCH_JAVA_OPTS = "-Xms1g -Xmx1g"
}
template {
data = <
86601
Suppressed: Suricata STREAM ESTABLISHED invalid ack
EOH
destination = "local/local_rules.xml"
perms = "0644"
}
# Wazuh ossec.conf with log_alert_level=8 (only high/critical alerts)
# Wazuh levels: 0=ignored, 1-4=low, 5-7=medium, 8-10=high, 11-15=critical
template {
data = <
yes
yes
no
no
no
smtp.example.wazuh.com
wazuh@example.wazuh.com
recipient@example.wazuh.com
12
alerts.log
15m
0
yes
8
12
plain
secure
1514
tcp
131072
no
yes
yes
yes
yes
yes
yes
yes
43200
etc/rootcheck/rootkit_files.txt
etc/rootcheck/rootkit_trojans.txt
yes
/var/lib/containerd
/var/lib/docker/overlay2
yes
1800
1d
yes
wodles/java
wodles/ciscat
yes
yes
/var/log/osquery/osqueryd.results.log
/etc/osquery/osquery.conf
yes
no
1h
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
10
yes
yes
12h
yes
yes
yes
60m
yes
https://{{ env "NOMAD_IP_indexer" }}:{{ env "NOMAD_HOST_PORT_indexer" }}
/etc/filebeat/certs/root-ca.pem
/etc/filebeat/certs/filebeat.pem
/etc/filebeat/certs/filebeat-key.pem
no
43200
yes
yes
no
/etc,/usr/bin,/usr/sbin
/bin,/sbin,/boot
/etc/mtab
/etc/hosts.deny
/etc/mail/statistics
/etc/random-seed
/etc/random.seed
/etc/adjtime
/etc/httpd/logs
/etc/utmpx
/etc/wtmpx
/etc/cups/certs
/etc/dumpdates
/etc/svc/volatile
.log$|.swp$
/etc/ssl/private.key
yes
yes
yes
yes
10
50
yes
5m
10
127.0.0.1
^localhost.localdomain$
168.63.129.16
disable-account
disable-account
yes
restart-wazuh
restart-wazuh
firewall-drop
firewall-drop
yes
host-deny
host-deny
yes
route-null
route-null
yes
win_route-null
route-null.exe
yes
netsh
netsh.exe
yes
command
df -P
360
full_command
netstat -tulpn | sed 's/\([[:alnum:]]\+\)\ \+[[:digit:]]\+\ \+[[:digit:]]\+\ \+\(.*\):\([[:digit:]]*\)\ \+\([0-9\.\:\*]\+\).\+\ \([[:digit:]]*\/[[:alnum:]\-]*\).*/\1 \2 == \3 == \4 \5/' | sort -k 4 -g | sed 's/ == \(.*\) ==/:\1/' | sed 1,2d
netstat listening ports
360
full_command
last -n 20
360
ruleset/decoders
ruleset/rules
0215-policy_rules.xml
etc/lists/audit-keys
etc/lists/amazon/aws-eventnames
etc/lists/security-eventchannel
etc/lists/malicious-ioc/malware-hashes
etc/lists/malicious-ioc/malicious-ip
etc/lists/malicious-ioc/malicious-domains
etc/decoders
etc/rules
yes
1
64
15m
no
1515
no
yes
no
HIGH:!ADH:!EXP:!MD5:!RC4:!3DES:!CAMELLIA:@STRENGTH
no
etc/sslmanager.cert
etc/sslmanager.key
no
wazuh
node01
master
1516
0.0.0.0
NODE_IP
no
yes
syslog
/var/ossec/logs/active-responses.log
EOH
destination = "local/ossec.conf"
perms = "0644"
}
resources {
cpu = 1000
memory = 1024
}
service {
name = "wazuh-manager"
port = "manager_api"
tags = ["metrics"]
meta {
api_port = "${NOMAD_HOST_PORT_manager_api}"
}
check {
type = "tcp"
port = "manager_api"
interval = "10s"
timeout = "2s"
}
}
service {
name = "wazuh-agent-comm"
port = "manager"
tags = ["agent-communication"]
check {
type = "tcp"
port = "manager"
interval = "10s"
timeout = "2s"
}
}
service {
name = "wazuh-agent-reg"
port = "manager_reg"
tags = ["agent-registration"]
check {
type = "tcp"
port = "manager_reg"
interval = "10s"
timeout = "2s"
}
}
}
# Wazuh Dashboard (Web UI)
task "wazuh-dashboard" {
driver = "docker"
volume_mount {
volume = "wazuh-dashboard"
destination = "/usr/share/wazuh-dashboard/data"
read_only = false
}
config {
image = "wazuh/wazuh-dashboard:4.14.4"
force_pull = true
ports = ["dashboard"]
volumes = [
"local/opensearch_dashboards.yml:/usr/share/wazuh-dashboard/config/opensearch_dashboards.yml",
"local/wazuh.yml:/usr/share/wazuh-dashboard/data/wazuh/config/wazuh.yml:ro",
]
}
template {
data = < 128MB
}
group "db" {
network {
mode = "host"
port "pgvector" {
static = "5432"
host_network = "lan"
}
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "pgvector" {
driver = "docker"
config {
image = "pgvector/pgvector:pg16"
volumes = [
"${var.shared_dir}pgvector-data:/var/lib/postgresql/data",
]
ports = ["pgvector"]
}
env {
POSTGRES_DB = "vectordb"
POSTGRES_USER = "postgres"
POSTGRES_PASSWORD = "${var.postgres_pass}"
PGDATA = "/var/lib/postgresql/data"
}
service {
name = "${NOMAD_JOB_NAME}"
tags = ["pgvector", "database", "vector-database"]
port = "pgvector"
check {
type = "tcp"
port = "pgvector"
interval = "30s"
timeout = "2s"
}
}
resources {
cpu = "100"
memory = "128"
}
}
}
}
variable "region" {}
variable "shared_dir" {}
variable "pgvector_admin_password" {}
================================================
FILE: nomad_jobs/storage-backends/pgvector/pgvector-setup.job
================================================
job "pgvector-setup" {
type = "batch"
datacenters = ["dc1"]
meta {
job_file = "nomad_jobs/storage-backends/pgvector/pgvector-setup.job"
version = "1"
}
group "setup" {
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
task "initialize-pgvector" {
driver = "docker"
config {
image = "pgvector/pgvector:pg16"
command = "sh"
args = [
"-c",
"PGPASSWORD=$PGVECTOR_PASSWORD psql -h 192.168.50.120 -p 5432 -U postgres -d cognee_db -c \"CREATE EXTENSION IF NOT EXISTS vector;\" && PGPASSWORD=$PGVECTOR_PASSWORD psql -h 192.168.50.120 -p 5432 -U postgres -c \"DO \\$\\$ BEGIN CREATE DATABASE embeddings; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'embeddings database exists'; END \\$\\$;\" && PGPASSWORD=$PGVECTOR_PASSWORD psql -h 192.168.50.120 -p 5432 -U postgres -d embeddings -c \"CREATE EXTENSION IF NOT EXISTS vector;\""
]
}
env {
PGVECTOR_PASSWORD = "${var.pgvector_pass}"
}
resources {
cpu = 200
memory = 256
}
}
}
}
variable "pgvector_pass" {
type = string
description = "Admin password for the pgvector PostgreSQL server"
}
================================================
FILE: nomad_jobs/storage-backends/postgres/nomad.job
================================================
job "postgres" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/storage-backends/postgres/nomad.job"
version = "5" // Fixed postgres password variable
}
group "db" {
network {
mode = "host"
port "postgres" {
static = "5432"
host_network = "lan"
}
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "postgres" {
driver = "docker"
config {
image = "postgres:15.17"
volumes = [
"${var.shared_dir}paperless-postgres:/appdata/postgres",
]
ports = ["postgres"]
}
env {
POSTGRES_DB = "paperless"
POSTGRES_USER = "postgres"
POSTGRES_PASSWORD = "${var.postgres_pass}"
PGDATA = "/appdata/postgres"
}
service {
name = "${NOMAD_JOB_NAME}"
tags = ["postgres"]
port = "postgres"
check {
type = "tcp"
port = "postgres"
interval = "30s"
timeout = "2s"
}
}
resources {
cpu = "200"
memory = "512"
}
}
}
}
variable "region" {
type = string
}
variable "shared_dir" {
type = string
}
variable "postgres_pass" {
type = string
description = "Admin password for PostgreSQL"
}
================================================
FILE: nomad_jobs/storage-backends/postgres/postgres-setup.job
================================================
job "postgres-setup" {
type = "batch"
datacenters = ["dc1"]
meta {
job_file = "nomad_jobs/storage-backends/postgres/postgres-setup.job"
version = "2"
}
group "setup" {
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
task "create-dbs" {
driver = "docker"
config {
image = "postgres:15"
command = "sh"
args = [
"-c",
"PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres.service.consul -U postgres -c \"DO \\$\\$ BEGIN CREATE DATABASE sonarr_main; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'sonarr_main exists'; END \\$\\$;\" -c \"DO \\$\\$ BEGIN CREATE DATABASE sonarr_logs; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'sonarr_logs exists'; END \\$\\$;\" -c \"DO \\$\\$ BEGIN CREATE DATABASE radarr_main; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'radarr_main exists'; END \\$\\$;\" -c \"DO \\$\\$ BEGIN CREATE DATABASE radarr_logs; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'radarr_logs exists'; END \\$\\$;\" -c \"DO \\$\\$ BEGIN CREATE DATABASE lidarr_main; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'lidarr_main exists'; END \\$\\$;\" -c \"DO \\$\\$ BEGIN CREATE DATABASE lidarr_logs; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'lidarr_logs exists'; END \\$\\$;\" -c \"DO \\$\\$ BEGIN CREATE DATABASE litellm; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'litellm exists'; END \\$\\$;\" -c \"DO \\$\\$ BEGIN CREATE DATABASE nextcloud; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'nextcloud exists'; END \\$\\$;\" -c \"DO \\$\\$ BEGIN CREATE DATABASE paperless; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'paperless exists'; END \\$\\$;\" "
]
}
env {
POSTGRES_PASSWORD = "${var.postgres_pass}"
}
resources {
cpu = 200
memory = 256
}
}
}
}
variable "postgres_pass" {
type = string
description = "Admin password for the PostgreSQL server"
}
================================================
FILE: nomad_jobs/storage-backends/qdrant/nomad.job
================================================
job "qdrant" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/storage-backends/qdrant/nomad.job"
version = "3"
}
group "qdrant" {
count = 1
network {
mode = "host"
port "http" {
static = 6333
to = 6333
host_network = "lan"
}
port "grpc" {
static = 6334
to = 6334
host_network = "lan"
}
}
volume "qdrant-data" {
type = "csi"
read_only = false
source = "qdrant-data"
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "qdrant" {
driver = "docker"
config {
image = "qdrant/qdrant:v1.17"
ports = ["http", "grpc"]
}
volume_mount {
volume = "qdrant-data"
destination = "/qdrant/storage"
read_only = false
}
resources {
cpu = 500
memory = 128
}
service {
name = "qdrant"
tags = ["vector-db", "ai", "http"]
port = "http"
check {
type = "tcp"
port = "http"
interval = "30s"
timeout = "2s"
}
}
}
}
}
variable "region" {
type = string
default = "global"
}
================================================
FILE: nomad_jobs/storage-backends/qdrant/volume.hcl
================================================
# Qdrant vector database storage volume
id = "qdrant-data"
name = "qdrant-data"
type = "csi"
plugin_id = "org.democratic-csi.iscsi"
capacity_min = "10GiB"
capacity_max = "10GiB"
capability {
access_mode = "single-node-writer"
attachment_mode = "block-device"
}
mount_options {
fs_type = "ext4"
mount_flags = ["noatime"]
}
================================================
FILE: nomad_jobs/storage-backends/redis/nomad.job
================================================
job "redis" {
region = var.region
datacenters = ["dc1"]
type = "service"
meta {
job_file = "nomad_jobs/storage-backends/redis/nomad.job"
version = "4" // Reduced memory 512MB -> 128MB
}
constraint {
attribute = "${meta.shared_mount}"
operator = "="
value = "true"
}
group "db" {
count = 1
network {
mode = "host"
port "redis" {
static = 6379
host_network = "lan"
}
}
volume "redis" {
type = "csi"
read_only = false
source = "redis-data"
access_mode = "single-node-writer"
attachment_mode = "file-system"
}
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "prep-disk" {
driver = "docker"
volume_mount {
volume = "redis"
destination = "/volume/"
read_only = false
}
config {
image = "busybox:latest"
command = "sh"
args = ["-c", "chmod 777 /volume/"]
}
resources {
cpu = 200
memory = 128
}
lifecycle {
hook = "prestart"
sidecar = false
}
}
task "redis" {
driver = "docker"
config {
image = "redis:8.6.2-alpine"
ports = ["redis"]
}
volume_mount {
volume = "redis"
destination = "/data"
read_only = false
}
env {
# Save settings - save to disk every 60 seconds if at least 1 change
REDIS_SAVE_TO_DISK = "60 1"
# Set appendonly for durability
REDIS_APPENDONLY = "yes"
}
service {
name = "redis"
port = "redis"
check {
type = "tcp"
port = "redis"
interval = "10s"
timeout = "2s"
}
}
resources {
cpu = 300
memory = 128
}
}
}
}
variable "region" {
type = string
}
variable "shared_dir" {
type = string
}
================================================
FILE: nomad_jobs/storage-backends/redis/volume.hcl
================================================
id = "redis-data"
external_id = "redis-data"
name = "redis-data"
type = "csi"
plugin_id = "org.democratic-csi.iscsi"
capacity_min = "5GiB"
capacity_max = "5GiB"
capability {
access_mode = "single-node-writer"
attachment_mode = "block-device"
}
mount_options {
fs_type = "ext4"
mount_flags = ["noatime", "nodiratime", "data=ordered"]
}
================================================
FILE: nomad_jobs/storage-backends/volumes/nfs-example.hcl
================================================
type = "csi"
id = "example"
name = "example"
plugin_id = "nfsofficial"
external_id = "example"
capability {
access_mode = "multi-node-multi-writer"
attachment_mode = "file-system"
}
context {
server = "192.168.50.208"
share = "/mnt/pool0/share/example"
mountPermissions = "0"
}
mount_options {
fs_type = "nfs"
mount_flags = [ "timeo=30", "intr", "vers=3", "_netdev" , "nolock" ]
}
================================================
FILE: nomad_jobs/system/docker-cleanup/nomad.job
================================================
job "docker-cleanup" {
region = var.region
datacenters = ["dc1"]
type = "sysbatch"
meta {
job_file = "nomad_jobs/system/docker-cleanup/nomad.job"
version = "1"
}
# Run weekly on Sundays at 2 AM
periodic {
crons = ["0 2 * * 0"]
prohibit_overlap = true
time_zone = "UTC"
}
group "cleanup" {
# sysbatch will automatically run on all eligible nodes
restart {
attempts = 3
delay = "15s"
interval = "10m"
mode = "delay"
}
task "docker-prune" {
driver = "raw_exec"
config {
command = "/bin/bash"
args = ["-c", <&1 >/dev/null; do echo '.'; sleep 2; done"]
network_mode = "host"
}
resources {
cpu = 200
memory = 128
}
lifecycle {
hook = "prestart"
sidecar = false
}
}
update {
max_parallel = 1
min_healthy_time = "30s"
auto_revert = true
}
task "wordpress" {
driver = "docker"
template {
data = <
