Repository: perrymanuk/hashi-homelab Branch: master Commit: c1705ab65354 Files: 180 Total size: 450.4 KB Directory structure: gitextract_ikark14b/ ├── .bootstrap.mk ├── .gitattributes ├── .github/ │ └── workflows/ │ ├── build-gcp-dns-updater.yaml │ ├── nomad.yaml │ ├── update-kideo.yaml │ ├── update-minecraftmath.yaml │ ├── update-radbot-dev.yaml │ └── update-radbot.yaml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── ansible/ │ ├── configs/ │ │ ├── consul.hcl.j2 │ │ ├── consul.service │ │ ├── docker-daemon.json.j2 │ │ ├── nomad.hcl.j2 │ │ └── nomad.service │ ├── playbook.yml │ └── zsh.yml ├── docker_images/ │ ├── gcp-dns-updater/ │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── requirements.txt │ │ └── update_dns.py │ └── update-metadata/ │ ├── Dockerfile │ ├── README.md │ ├── requirements.txt │ └── update_job_metadata.py ├── envrc ├── nomad_jobs/ │ ├── TEMPLATE-volume.hcl │ ├── TEMPLATE.job │ ├── ai-ml/ │ │ ├── cognee/ │ │ │ └── nomad.job │ │ ├── crawl4ai/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── litellm/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── manyfold/ │ │ │ ├── 3dprints-volume.hcl │ │ │ ├── nomad.job │ │ │ ├── prints_volume.hcl │ │ │ └── volume.hcl │ │ ├── ollama/ │ │ │ └── nomad.job │ │ ├── open-webui/ │ │ │ └── nomad.job │ │ ├── paperless-ai/ │ │ │ └── nomad.job │ │ ├── pgvector-client/ │ │ │ └── nomad.job │ │ └── radbot/ │ │ ├── nomad-dev.job │ │ └── nomad.job │ ├── core-infra/ │ │ ├── coredns/ │ │ │ ├── README.md │ │ │ └── nomad.job │ │ ├── github-runner/ │ │ │ └── nomad.job │ │ ├── haproxy/ │ │ │ └── nomad.job │ │ ├── iscsi-csi-plugin/ │ │ │ ├── controller.job │ │ │ └── node.job │ │ ├── keepalived/ │ │ │ ├── TODO.md │ │ │ └── nomad.job │ │ ├── nfs-csi-plugin/ │ │ │ ├── controller.job │ │ │ └── nodes.job │ │ ├── pihole/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── smtp/ │ │ │ └── nomad.job │ │ ├── tailscale/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── tailscale-este/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── traefik/ │ │ │ ├── config/ │ │ │ │ ├── consul-catalog.yml │ │ │ │ ├── consul.yml │ │ │ │ ├── traefik.toml │ │ │ │ ├── traefik.toml.new │ │ │ │ └── traefik.toml.test │ │ │ └── nomad.job │ │ ├── traefik-forward-auth/ │ │ │ └── nomad.job │ │ └── vault/ │ │ └── secrets_template.yaml │ ├── gaming/ │ │ ├── minecraft-1.21/ │ │ │ └── nomad.job │ │ ├── minecraft-avaritia/ │ │ │ └── nomad.job │ │ ├── minecraft-axiom/ │ │ │ └── nomad.job │ │ ├── minecraft-fiskheroes/ │ │ │ └── nomad.job │ │ └── minecraft-forge/ │ │ └── nomad.job │ ├── media-stack/ │ │ ├── audioserve/ │ │ │ └── nomad.job │ │ ├── flaresolverr/ │ │ │ └── nomad.job │ │ ├── jackett/ │ │ │ └── nomad.job │ │ ├── lazylibrarian/ │ │ │ └── nomad.job │ │ ├── lidarr/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── lidify/ │ │ │ └── nomad.job │ │ ├── maintainerr/ │ │ │ └── nomad.job │ │ ├── mediasage/ │ │ │ └── nomad.job │ │ ├── multi-scrobbler/ │ │ │ └── nomad.job │ │ ├── navidrome/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── ombi/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── overseerr/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── plex/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── prowlarr/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── qbittorrent/ │ │ │ └── nomad.job │ │ ├── radarr/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── requestrr/ │ │ │ └── nomad.job │ │ ├── sabnzbd/ │ │ │ └── nomad.job │ │ ├── sickchill/ │ │ │ └── nomad.job │ │ ├── sonarr/ │ │ │ └── nomad.job │ │ ├── synclounge/ │ │ │ └── nomad.job │ │ ├── tautulli/ │ │ │ └── nomad.job │ │ └── tdarr/ │ │ ├── nomad.job │ │ └── volume.hcl │ ├── misc/ │ │ ├── adb/ │ │ │ └── nomad.job │ │ ├── gcp-dns-updater/ │ │ │ ├── Dockerfile │ │ │ ├── README.md │ │ │ ├── nomad.job │ │ │ ├── requirements.txt │ │ │ └── update_dns.py │ │ ├── gitea/ │ │ │ └── nomad.job │ │ ├── linuxgsm/ │ │ │ └── nomad.job │ │ ├── murmur/ │ │ │ └── nomad.job │ │ ├── octoprint/ │ │ │ └── nomad.job │ │ └── uploader/ │ │ └── nomad.job │ ├── observability/ │ │ ├── alertmanager/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── blackbox-exporter/ │ │ │ └── nomad.job │ │ ├── grafana/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── loki/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── oom-test/ │ │ │ └── nomad.job │ │ ├── prometheus/ │ │ │ ├── README.md │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── telegraf/ │ │ │ └── nomad.job │ │ ├── truenas-graphite-exporter/ │ │ │ └── nomad.job │ │ └── vector/ │ │ └── nomad.job │ ├── personal-cloud/ │ │ ├── actualbudget/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── bitwarden/ │ │ │ └── nomad.job │ │ ├── nextcloud/ │ │ │ └── nomad.job │ │ ├── ntfy/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── paperless/ │ │ │ └── nomad.job │ │ └── radicale/ │ │ └── nomad.job │ ├── security/ │ │ ├── suricata/ │ │ │ └── nomad.job │ │ ├── suricata-update/ │ │ │ └── nomad.job │ │ ├── wazuh-agent/ │ │ │ └── nomad.job │ │ └── wazuh-server/ │ │ ├── nomad.job │ │ ├── volume-dashboard.hcl │ │ ├── volume-indexer.hcl │ │ └── volume-manager.hcl │ ├── smart-home/ │ │ ├── deconz/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── home-assistant/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── mqtt/ │ │ │ └── nomad.job │ │ ├── owntracks-recorder/ │ │ │ └── nomad.job │ │ └── zigbee2mqtt/ │ │ └── nomad.job │ ├── storage-backends/ │ │ ├── docker-registry/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── mariadb/ │ │ │ └── nomad.job │ │ ├── neo4j/ │ │ │ ├── nomad.job │ │ │ ├── setup.job │ │ │ └── volume.hcl │ │ ├── pgvector/ │ │ │ ├── nomad.job │ │ │ └── pgvector-setup.job │ │ ├── postgres/ │ │ │ ├── nomad.job │ │ │ └── postgres-setup.job │ │ ├── qdrant/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ ├── redis/ │ │ │ ├── nomad.job │ │ │ └── volume.hcl │ │ └── volumes/ │ │ └── nfs-example.hcl │ ├── system/ │ │ └── docker-cleanup/ │ │ └── nomad.job │ └── web-apps/ │ ├── alertmanager-dashboard/ │ │ └── nomad.job │ ├── firecrawl/ │ │ └── nomad.job │ ├── heimdall/ │ │ └── nomad.job │ ├── homepage/ │ │ └── nomad.job │ ├── kideo/ │ │ └── nomad.job │ ├── minecraftmath/ │ │ └── nomad.job │ ├── wordpress/ │ │ └── nomad.job │ └── www/ │ ├── Dockerfile │ └── nomad.job ├── renovate.json └── services/ └── beefcake.json ================================================ FILE CONTENTS ================================================ ================================================ FILE: .bootstrap.mk ================================================ export VERSION_TAG=$(shell git rev-parse --short HEAD) export JOB_NAME=$(shell basename $PWD) dash-split = $(word $2,$(subst -, ,$1)) dash-1 = $(call dash-split,$*,1) dash-2 = $(call dash-split,$*,2) help:##............Show this help. @echo "" @fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##//' | sed 's/^/ /' @echo "" @echo "" ================================================ FILE: .gitattributes ================================================ *.job linguist-language=HCL ================================================ FILE: .github/workflows/build-gcp-dns-updater.yaml ================================================ # .github/workflows/build-gcp-dns-updater.yaml name: Build GCP DNS Updater Image on: push: branches: - main paths: - 'docker_images/gcp-dns-updater/**' workflow_dispatch: jobs: build-and-push: runs-on: ubuntu-latest permissions: contents: read packages: write # Required for pushing to GitHub Packages if used, good practice anyway steps: - name: Checkout Code uses: actions/checkout@v6 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v4 - name: Login to Docker Registry uses: docker/login-action@v4 with: registry: docker.${{ secrets.NOMAD_VAR_tld }} username: ${{ secrets.DOCKER_REGISTRY_USER }} password: ${{ secrets.DOCKER_REGISTRY_PASSWORD }} - name: Build Image using Makefile env: NOMAD_VAR_tld: ${{ secrets.NOMAD_VAR_tld }} run: make build-gcp-dns-updater - name: Push Image run: docker push docker.${{ secrets.NOMAD_VAR_tld }}/gcp-dns-updater:latest ================================================ FILE: .github/workflows/nomad.yaml ================================================ on: push: branches: - master jobs: # JOB to run change detection changes: runs-on: ubuntu-latest permissions: pull-requests: read outputs: jobs: ${{ steps.filter.outputs.nomadjobs_files }} volumes: ${{ steps.filter_volumes.outputs.volumes_files }} steps: - name: 'Checkout' uses: 'actions/checkout@v6' - uses: dorny/paths-filter@v4 id: filter_volumes with: list-files: 'json' filters: | volumes: - 'nomad_jobs/**/volume.hcl' - 'nomad_jobs/**/*-volume.hcl' - uses: dorny/paths-filter@v4 id: filter with: list-files: 'json' filters: | nomadjobs: # Updated paths based on directory restructure - 'nomad_jobs/media-stack/plex/*.job' - 'nomad_jobs/media-stack/radarr/*.job' - 'nomad_jobs/media-stack/lidarr/*.job' - 'nomad_jobs/media-stack/overseerr/*.job' - 'nomad_jobs/storage-backends/postgres/*.job' - 'nomad_jobs/storage-backends/redis/*.job' - 'nomad_jobs/storage-backends/pgvector/*.job' - 'nomad_jobs/core-infra/coredns/*.job' - 'nomad_jobs/storage-backends/iscsi-csi-plugin/*.job' - 'nomad_jobs/media-stack/sabnzbd/*.job' - 'nomad_jobs/media-stack/qbittorrent/*.job' - 'nomad_jobs/media-stack/prowlarr/*.job' - 'nomad_jobs/media-stack/tdarr/*.job' - 'nomad_jobs/core-infra/smtp/*.job' - 'nomad_jobs/ai-ml/ollama/*.job' - 'nomad_jobs/ai-ml/open-webui/*.job' - 'nomad_jobs/misc/gcp-dns-updater/*.job' - 'nomad_jobs/core-infra/tailscale-este/*.job' - 'nomad_jobs/core-infra/traefik/*.job' - 'nomad_jobs/core-infra/iscsi-csi-plugin/*.job' - 'nomad_jobs/observability/alertmanager/*.job' - 'nomad_jobs/observability/prometheus/*.job' - 'nomad_jobs/ai-ml/radbot/*.job' - 'nomad_jobs/personal-cloud/ntfy/*.job' - 'nomad_jobs/web-apps/homepage/*.job' - 'nomad_jobs/media-stack/multi-scrobbler/*.job' - 'nomad_jobs/media-stack/lidify/*.job' - 'nomad_jobs/media-stack/mediasage/*.job' - 'nomad_jobs/core-infra/netboot-xyz/*.job' - 'nomad_jobs/web-apps/kideo/*.job' - 'nomad_jobs/web-apps/minecraftmath/*.job' add_volumes: runs-on: ubuntu-latest needs: changes if: needs.changes.outputs.volumes != '[]' continue-on-error: true strategy: matrix: job: ${{ fromJSON(needs.changes.outputs.volumes ) }} steps: - name: 'Checkout' uses: 'actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd' # v6 - name: Connect to Tailscale uses: tailscale/github-action@v4 with: oauth-client-id: ${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }} oauth-secret: ${{ secrets.TAILSCALE_OAUTH_SECRET }} tags: tag:github-actions args: --accept-dns=true - name: Setup Nomad uses: hashicorp/setup-nomad@v1.0.0 with: version: "1.10.4" - name: deploy shell: bash run: | # Extract volume ID from the HCL file VOLUME_ID=$(grep '^id' ${{ matrix.job }} | head -1 | sed 's/.*= *"\(.*\)"/\1/') # Skip if volume already exists if nomad volume status "$VOLUME_ID" > /dev/null 2>&1; then echo "Volume '$VOLUME_ID' already exists, skipping creation" else echo "Creating volume '$VOLUME_ID'" nomad volume create ${{ matrix.job }} fi env: NOMAD_ADDR: '${{ secrets.NOMAD_ADDR }}' deploy_jobs: runs-on: ubuntu-latest needs: changes if: needs.changes.outputs.jobs != '[]' continue-on-error: true strategy: matrix: job: ${{ fromJSON(needs.changes.outputs.jobs ) }} steps: - name: 'Checkout' uses: 'actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd' # v6 - name: Connect to Tailscale uses: tailscale/github-action@v4 with: oauth-client-id: ${{ secrets.TAILSCALE_OAUTH_CLIENT_ID }} oauth-secret: ${{ secrets.TAILSCALE_OAUTH_SECRET }} tags: tag:github-actions args: --accept-dns=true - name: Setup Nomad uses: hashicorp/setup-nomad@v1.0.0 with: version: "1.10.4" - name: deploy shell: bash run: | nomad job run ${{ matrix.job }} # Removed -var flags env: NOMAD_ADDR: '${{ secrets.NOMAD_ADDR }}' NOMAD_VAR_region: 'home' NOMAD_VAR_tld: '${{ secrets.NOMAD_VAR_tld }}' # Corrected case NOMAD_VAR_shared_dir: '/home/shared/' NOMAD_VAR_downloads_dir: '/home/sabnzbd/downloads' NOMAD_VAR_music_dir: '/home/media/Music' NOMAD_VAR_movies_dir: '/home/media/Movies' NOMAD_VAR_books_dir: '/home/media/Books' NOMAD_VAR_tv_dir: '/home/media/TV' NOMAD_VAR_media_dir: '/home/media' NOMAD_VAR_hass_key: '${{ secrets.NOMAD_VAR_hass_key }}' # Corrected case NOMAD_VAR_hass_ip: '${{ secrets.NOMAD_VAR_hass_ip }}' NOMAD_VAR_github_pat: ${{ secrets.NOMAD_VAR_github_pat }} # Corrected case NOMAD_VAR_datacenters_all: '["dc1", "public"]' NOMAD_VAR_datacenters_dc1: '["dc1"]' NOMAD_VAR_datacenters_public: '["public"]' NOMAD_VAR_tailscale_auth: '${{ secrets.NOMAD_VAR_tailscale_auth }}' # Corrected case NOMAD_VAR_tailscale_auth_este: '${{ secrets.NOMAD_VAR_tailscale_auth_este }}' # Corrected case NOMAD_VAR_oauth_client_id: '${{ secrets.NOMAD_VAR_oauth_client_id }}' # Corrected case NOMAD_VAR_oauth_client_secret: '${{ secrets.NOMAD_VAR_oauth_client_secret }}' # Corrected case NOMAD_VAR_oauth_secret: '${{ secrets.NOMAD_VAR_oauth_secret }}' # Corrected case NOMAD_VAR_oauth_emails: '${{ secrets.NOMAD_VAR_oauth_emails }}' # Corrected case NOMAD_VAR_ssh_id: '${{ secrets.NOMAD_VAR_ssh_id }}' # Corrected case NOMAD_VAR_truenas_api_key: '${{ secrets.NOMAD_VAR_truenas_api_key }}' # Corrected case NOMAD_VAR_gh_access_token: '${{ secrets.NOMAD_VAR_gh_access_token }}' # Corrected case NOMAD_VAR_ollama_data_dir: '/home/shared/ollama' NOMAD_VAR_ollama_base_url: 'http://ollama.service.consul:11434' NOMAD_VAR_webui_secret_key: '${{ secrets.NOMAD_VAR_webui_secret_key }}' # Corrected case NOMAD_VAR_datacenter: 'dc1' NOMAD_VAR_dns_server_ip: '192.168.50.2' # Added missing variables NOMAD_VAR_aws_access_key: ${{ secrets.NOMAD_VAR_aws_access_key }} NOMAD_VAR_aws_secret_key: ${{ secrets.NOMAD_VAR_aws_secret_key }} NOMAD_VAR_bedrock_aws_region: ${{ secrets.NOMAD_VAR_bedrock_aws_region }} NOMAD_VAR_gcp_dns_admin: ${{ secrets.NOMAD_VAR_gcp_dns_admin }} NOMAD_VAR_gemini_api_key: ${{ secrets.NOMAD_VAR_gemini_api_key }} NOMAD_VAR_litellm_master_key: ${{ secrets.NOMAD_VAR_litellm_master_key }} NOMAD_VAR_manyfold_secret_key: ${{ secrets.NOMAD_VAR_manyfold_secret_key }} NOMAD_VAR_postgres_pass: ${{ secrets.NOMAD_VAR_postgres_pass }} NOMAD_VAR_truenas_iscsi_pass: ${{ secrets.NOMAD_VAR_truenas_iscsi_pass }} # Added gcp_project_id NOMAD_VAR_gcp_project_id: ${{ secrets.NOMAD_VAR_gcp_project_id }} # GitHub PAT is now stored securely in secrets NOMAD_VAR_truenass_iscsi_pass: ${{ secrets.NOMAD_VAR_truenass_iscsi_pass }} # Note potential typo in name NOMAD_VAR_dns_zone: ${{ secrets.NOMAD_VAR_dns_zone }} NOMAD_VAR_ingress_ip: ${{ secrets.NOMAD_VAR_ingress_ip }} NOMAD_VAR_radbot_credential_key: ${{ secrets.NOMAD_VAR_radbot_credential_key }} NOMAD_VAR_radbot_admin_token: ${{ secrets.NOMAD_VAR_radbot_admin_token }} NOMAD_VAR_radbot_mcp_token: ${{ secrets.NOMAD_VAR_radbot_mcp_token }} NOMAD_VAR_mullvad_wireguard_key: ${{ secrets.NOMAD_VAR_mullvad_wireguard_key }} NOMAD_VAR_mullvad_wireguard_addr: ${{ secrets.NOMAD_VAR_mullvad_wireguard_addr }} NOMAD_VAR_sonarr_api_key: ${{ secrets.NOMAD_VAR_sonarr_api_key }} NOMAD_VAR_radarr_api_key: ${{ secrets.NOMAD_VAR_radarr_api_key }} NOMAD_VAR_curseforge_api_key: ${{ secrets.NOMAD_VAR_curseforge_api_key }} NOMAD_VAR_pgvector_pass: ${{ secrets.NOMAD_VAR_pgvector_pass }} NOMAD_VAR_pgvector_admin_password: ${{ secrets.NOMAD_VAR_pgvector_admin_password }} NOMAD_VAR_postgres_admin_password: ${{ secrets.NOMAD_VAR_postgres_admin_password }} NOMAD_VAR_litellm_crawl4ai_key: ${{ secrets.NOMAD_VAR_litellm_crawl4ai_key }} NOMAD_VAR_litellm_salt_key: ${{ secrets.NOMAD_VAR_litellm_salt_key }} NOMAD_VAR_wazuh_api_password: ${{ secrets.NOMAD_VAR_wazuh_api_password }} NOMAD_VAR_wazuh_dashboard_password: ${{ secrets.NOMAD_VAR_wazuh_dashboard_password }} NOMAD_VAR_wazuh_indexer_password: ${{ secrets.NOMAD_VAR_wazuh_indexer_password }} NOMAD_VAR_otr_pass: ${{ secrets.NOMAD_VAR_otr_pass }} NOMAD_VAR_plex_token: ${{ secrets.NOMAD_VAR_plex_token }} NOMAD_VAR_listenbrainz_token: ${{ secrets.NOMAD_VAR_listenbrainz_token }} NOMAD_VAR_listenbrainz_username: ${{ secrets.NOMAD_VAR_listenbrainz_username }} NOMAD_VAR_lastfm_api_key: ${{ secrets.NOMAD_VAR_lastfm_api_key }} NOMAD_VAR_lastfm_api_secret: ${{ secrets.NOMAD_VAR_lastfm_api_secret }} NOMAD_VAR_lidarr_api_key: ${{ secrets.NOMAD_VAR_lidarr_api_key }} NOMAD_VAR_kideo_jwt_secret: ${{ secrets.NOMAD_VAR_kideo_jwt_secret }} NOMAD_VAR_kideo_youtube_cookies: ${{ secrets.NOMAD_VAR_kideo_youtube_cookies }} NOMAD_VAR_kideo_curiositystream_user: ${{ secrets.NOMAD_VAR_kideo_curiositystream_user }} NOMAD_VAR_kideo_curiositystream_pass: ${{ secrets.NOMAD_VAR_kideo_curiositystream_pass }} NOMAD_VAR_minecraftmath_jwt_secret: ${{ secrets.NOMAD_VAR_minecraftmath_jwt_secret }} ================================================ FILE: .github/workflows/update-kideo.yaml ================================================ name: Update kideo image tag on: repository_dispatch: types: [update-kideo] jobs: update-and-deploy: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v6 with: token: ${{ secrets.ACTIONS_PAT }} - name: Update image tag in Nomad job run: | TAG="${{ github.event.client_payload.tag }}" sed -i "s|ghcr.io/perrymanuk/kideo:[^ \"]*|ghcr.io/perrymanuk/kideo:${TAG}|" \ nomad_jobs/web-apps/kideo/nomad.job echo "Updated kideo image tag to ${TAG}" - name: Commit and push run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" TAG="${{ github.event.client_payload.tag }}" git add nomad_jobs/web-apps/kideo/nomad.job git commit -m "chore: bump kideo to ${TAG}" git push ================================================ FILE: .github/workflows/update-minecraftmath.yaml ================================================ name: Update minecraftmath image tag on: repository_dispatch: types: [update-minecraftmath] jobs: update-and-deploy: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v6 with: token: ${{ secrets.ACTIONS_PAT }} - name: Update image tag in Nomad job run: | TAG="${{ github.event.client_payload.tag }}" sed -i "s|ghcr.io/perrymanuk/minecraftmath:[^ \"]*|ghcr.io/perrymanuk/minecraftmath:${TAG}|" \ nomad_jobs/web-apps/minecraftmath/nomad.job echo "Updated minecraftmath image tag to ${TAG}" - name: Commit and push run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" TAG="${{ github.event.client_payload.tag }}" git add nomad_jobs/web-apps/minecraftmath/nomad.job git commit -m "chore: bump minecraftmath to ${TAG}" git push ================================================ FILE: .github/workflows/update-radbot-dev.yaml ================================================ name: Update radbot-dev image tag on: repository_dispatch: types: [update-radbot-dev] jobs: update-and-deploy: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v6 with: token: ${{ secrets.ACTIONS_PAT }} - name: Update image tag in dev Nomad job run: | TAG="${{ github.event.client_payload.tag }}" sed -i "s|ghcr.io/perrymanuk/radbot:[^ \"]*|ghcr.io/perrymanuk/radbot:${TAG}|" \ nomad_jobs/ai-ml/radbot/nomad-dev.job echo "Updated radbot-dev image tag to ${TAG}" - name: Commit and push run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" TAG="${{ github.event.client_payload.tag }}" git add nomad_jobs/ai-ml/radbot/nomad-dev.job git commit -m "chore: deploy radbot-dev with ${TAG}" git push ================================================ FILE: .github/workflows/update-radbot.yaml ================================================ name: Update radbot image tag on: repository_dispatch: types: [update-radbot] jobs: update-and-deploy: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v6 with: token: ${{ secrets.ACTIONS_PAT }} - name: Update image tag in Nomad job run: | TAG="${{ github.event.client_payload.tag }}" sed -i "s|ghcr.io/perrymanuk/radbot:[^ \"]*|ghcr.io/perrymanuk/radbot:${TAG}|" \ nomad_jobs/ai-ml/radbot/nomad.job echo "Updated radbot image tag to ${TAG}" - name: Commit and push run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" TAG="${{ github.event.client_payload.tag }}" git add nomad_jobs/ai-ml/radbot/nomad.job git commit -m "chore: bump radbot to ${TAG}" git push ================================================ FILE: .gitignore ================================================ .envrc .env *-pub .passwords .envrc* vault/secrets.yaml vault/*.hcl www/main.jpg ssl levant/* !levant/defaults.yml hosts *.swp .ra-aid CLAUDE.md scripts/* ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: Makefile ================================================ # Load .env files #include .envrc include ./.bootstrap.mk # Define base deployments using their service names base_deployments = coredns docker-registry haproxy #help: # Placeholder for potential future help generation # Find the nomad job file for a given service name ($1) within nomad_jobs/ structure # Usage: $(call find_job_file, service_name) # Example: $(call find_job_file, coredns) -> nomad_jobs/core-infra/coredns/coredns.job (or .nomad) find_job_file = $(shell find nomad_jobs/ -mindepth 2 -maxdepth 3 -type f \( -name '$1.job' -o -name '$1.nomad' \) -print -quit) .PHONY: dc1-% dc1-%: ## Deploy specific job to dc1 (searches within nomad_jobs/ structure) @JOB_FILE=$(call find_job_file,$*); \ if [ -z "$$JOB_FILE" ]; then \ echo "Error: Could not find nomad job file for '$*' in nomad_jobs/."; \ exit 1; \ fi; \ echo "Found job file: $$JOB_FILE"; \ nomad job run -var datacenters='["dc1"]' $$JOB_FILE .PHONY: all-% all-%: ## Deploy specific job to all DCs (searches within nomad_jobs/ structure) @JOB_FILE=$(call find_job_file,$*); \ if [ -z "$$JOB_FILE" ]; then \ echo "Error: Could not find nomad job file for '$*' in nomad_jobs/."; \ exit 1; \ fi; \ echo "Found job file: $$JOB_FILE"; \ nomad job run -var datacenters='["dc1", "hetzner"]' $$JOB_FILE .PHONY: deploy-% deploy-%: ## Deploy specific job (searches within nomad_jobs/ structure) @JOB_FILE=$(call find_job_file,$*); \ if [ -z "$$JOB_FILE" ]; then \ echo "Error: Could not find nomad job file for '$*' in nomad_jobs/."; \ exit 1; \ fi; \ echo "Found job file: $$JOB_FILE"; \ nomad job run $$JOB_FILE .PHONY: deploy-base deploy-base: ## Deploys base jobs (coredns, docker-registry, haproxy) to dc1 @echo "Deploying base services to dc1: $(base_deployments)" $(foreach var,$(base_deployments), \ @JOB_FILE=$$(call find_job_file,$(var)); \ if [ -z "$$JOB_FILE" ]; then \ echo "Error: Could not find nomad job file for base deployment '$(var)' in nomad_jobs/."; \ exit 1; \ fi; \ echo "Deploying $(var) from $$JOB_FILE..."; \ nomad job run -var datacenters='["dc1"]' $$JOB_FILE; \ ) .PHONY: sslkeys sslkeys: ## Generate certs if you have SSL enabled consul-template -config ssl/consul-template.hcl -once -vault-renew-token=false .PHONY: ssl-browser-cert ssl-browser-cert: ## Generate browser cert if you have SSL enabled sudo openssl pkcs12 -export -out browser_cert.p12 -inkey ssl/hetzner/server-key.pem -in ssl/hetzner/server.pem -certfile ssl/hetzner/nomad-ca.pem .PHONY: sync-github-secrets sync-github-secrets: ## Sync NOMAD_VAR variables from .envrc to GitHub secrets using gh CLI @echo "Syncing NOMAD_VAR variables from .envrc to GitHub secrets..." @bash -c 'source .envrc && env | grep "^NOMAD_VAR_" | while read -r line; do \ name="$${line%%=*}"; \ value="$${line#*=}"; \ echo "Setting $$name"; \ printf "%s" "$$value" | gh secret set "$$name"; \ done' @echo "✅ All NOMAD_VAR secrets synced to GitHub" .PHONY: build-update-metadata build-update-metadata: ## Build the update-metadata Docker image @echo "Building update-metadata Docker image..." # Assumes update-metadata is in docker_images/update-metadata/ docker build --platform linux/amd64 -t update-metadata:latest docker_images/update-metadata/ .PHONY: build-gcp-dns-updater build-gcp-dns-updater: ## Build the gcp-dns-updater Docker image @echo "Building gcp-dns-updater Docker image..." # Assumes gcp-dns-updater is in docker_images/gcp-dns-updater/ docker build --platform linux/amd64 -t docker.$$NOMAD_VAR_tld/gcp-dns-updater:latest docker_images/gcp-dns-updater/ # Example deployment target for gcp-dns-updater (if needed, uncomment and adjust) #.PHONY: deploy-gcp-dns-updater #deploy-gcp-dns-updater: ## Deploy gcp-dns-updater job using generic target # $(MAKE) deploy-gcp-dns-updater ================================================ FILE: README.md ================================================ # Hashi-Homelab

### UPDATE - September 2nd 2025 This repo has gone through some major changes since the last update. I've completely reorganized the job structure into 10 clean categories (77 services total now!), added a comprehensive AI/ML stack with Ollama and Open-WebUI, enhanced the monitoring with Loki and Vector for log aggregation, modernized the alertmanager with better persistence and pushover notifications, added weekly docker cleanup automation, redesigned CoreDNS and Traefik for proper HA deployment, and implemented comprehensive Nomad allocation monitoring. The GitHub Actions deployment has been refined with better change detection and the whole thing just runs much more smoothly now. Also added a bunch of new services like smart home integration, personal cloud apps, and storage backends including pgvector for AI workloads, plus a few other bits and bobs that make the whole setup more robust. ### Background The hashi-homelab was born of a desire to have a simple to maintain but very flexible homelab setup. While designed to work as a cohesive whole, each individual job can be taken and deployed on any Nomad cluster with minimal adjustments - they're built to be portable and self-contained. The main goals were to keep the resources required to run the base lab setup small and to have all of the parts be easily exchangeable. `make deploy-base` will deploy coredns, docker-registry and haproxy - these are needed for everything else to work but aside from these you can pick and choose what to deploy with `make deploy-SERVICE_NAME` to deploy any of the 77 services organized across 10 categories. `make deploy-prometheus` or `make deploy-ollama` for example. You can also target specific datacenters with `make dc1-traefik` or `make all-postgres`. The whole thing is organized much better now with services grouped into logical categories like ai-ml, media-stack, smart-home, observability, etc. Makes it way easier to find what you're looking for and deploy related services together. In the future I would like to provide a ready to boot image for a raspberry pi where you can run all of this as the resources needed are really minimal. With just the basics you can get away with one pi4 4gb model with plenty of room to spare. ### Core Components: * **Scheduler**: Nomad *...with proper allocation monitoring now* * **Service Catalog/Registry**: Consul * **Service Mesh**: Traefik *...redesigned for HA deployment, much more robust* * **VPN**: Tailscale *...can't say enough good things about tailscale, its integral for my homelab now* * **DNS**: CoreDNS *...now with HA setup and proper failover* * **Keepalived**: Assign a floating IP for DNS to not lose it if a node goes down * **Monitoring**: Prometheus, Alertmanager, Telegraf, Blackbox-exporter, and Grafana *...plus Loki and Vector for log aggregation* * **Container Registry**: Docker-Registry *...because sometimes you don't want to rely on Docker Hub being up* * **AI/ML**: Ollama for local LLM serving, Open-WebUI for chat interface, LiteLLM for API compatibility * **Vector Database**: PostgreSQL with pgvector extension for AI/ML vector embeddings storage and similarity search * **Storage**: NFS and iSCSI CSI plugins for persistent storage across the cluster ### Service Categories (77 total): * **ai-ml** (8): ollama, open-webui, litellm, cognee, crawl4ai, manyfold, paperless-ai, pgvector-client * **core-infra** (13): coredns, traefik, haproxy, keepalived, tailscale, github-runner, csi plugins, etc. * **media-stack** (16): plex, sonarr, radarr, lidarr, sabnzbd, qbittorrent, overseerr, navidrome, etc. * **personal-cloud** (4): nextcloud, bitwarden, paperless, radicale * **smart-home** (5): home-assistant, deconz, zigbee2mqtt, mqtt, owntracks-recorder * **observability** (7): prometheus, grafana, alertmanager, loki, vector, telegraf, blackbox-exporter * **storage-backends** (9): postgres, pgvector, redis, mariadb, neo4j, qdrant, docker-registry, etc. * **web-apps** (5): heimdall, wordpress, firecrawl, alertmanager-dashboard, www * **misc** (7): gitea, uploader, murmur, octoprint, adb, linuxgsm, gcp-dns-updater * **system** (3): docker-cleanup, volumes ### Setup You need to have Nomad and Consul already running, a simple setup with the -dev flag will suffice for testing but you'll want a proper cluster for real usage. If don't already have a Nomad and Consul cluster, there are some excellent guides here... https://www.nomadproject.io/guides/install/production/deployment-guide.html https://learn.hashicorp.com/consul/datacenter-deploy/deployment-guide There are also some files in the `config` folder to help you get started and also one with some services to announce so the Consul and Nomad UI are available over the service mesh. This repo relies on a `.envrc` file and direnv installed or setting the environment variables manually. There is an `envrc` example file located in the repo that you can fill in and move to `.envrc` The secret values from the `.envrc` also need to be put into your github secrets if you plan on deploying via the automated workflow. You can use `make sync-github-secrets` to sync them all at once which is pretty handy. Once this is done, you simply run a `make deploy-base` and point your DNS to resolve via one of the Nomad nodes' IP address. One of the more specific parts of the setup that you may need to adjust is I use several NFS mounts to provide persistent storage mounted on each client at `/home/shared` for configs and `/home/media` for images, video, audio, etc. Depending on which parts of this you are planning to deploy you will just need to adjust this persistent storage to meet the setup of your clients. The CSI plugins help make this more flexible now. Services are exposed by their task name in the nomad job and whatever you configure your TLD to be in the `.envrc`. The whole thing works really well with the automated GitHub Actions deployment now - just push changes and they get deployed automatically to your cluster. This requires tailscale for the GitHub Actions to connect to your cluster. ================================================ FILE: ansible/configs/consul.hcl.j2 ================================================ #jinja2: trim_blocks:False server = {% if "lan-client-server" in group_names %}true{% else %}false{% endif %} ui = {% if "lan-client-server" in group_names %}true{% else %}false{% endif %} {% if "wan-clients" in group_names %} {% raw %} client_addr = "{{GetInterfaceIP \"tailscale0\"}}" advertise_addr = "{{GetInterfaceIP \"tailscale0\"}}" bind_addr = "{{GetInterfaceIP \"tailscale0\"}}" {% endraw %} {% else %} {% raw %} client_addr = "0.0.0.0" advertise_addr = "{{ GetPrivateInterfaces | include \"network\" \"192.168.50.0/24\" | attr \"address\" }}" bind_addr = "0.0.0.0" {% endraw %} {% endif %} {% raw %} advertise_addr_wan = "{{ GetPrivateInterfaces | include \"network\" \"192.168.50.0/24\" | attr \"address\" }}" {% endraw %} translate_wan_addrs = true data_dir = "/var/lib/consul" datacenter = "homelab" enable_syslog = true leave_on_terminate = true log_level = "WARN" retry_join = ["192.168.50.39", "192.168.50.113", "192.168.50.85"] {% if "lan-client-server" in group_names %}bootstrap_expect = 3{% else %}{% endif %} telemetry { prometheus_retention_time = "60s" } ================================================ FILE: ansible/configs/consul.service ================================================ [Unit] Description=consul agent Requires=network-online.target tailscaled.service After=network-online.target tailscaled.service [Service] ExecStartPre=/bin/sleep 30 EnvironmentFile=-/etc/default/consul Restart=always ExecStart=/usr/bin/consul agent -domain consul -ui -config-dir=/etc/consul.d ExecReload=/bin/kill -HUP $MAINPID KillSignal=SIGINT [Install] WantedBy=multi-user.target ================================================ FILE: ansible/configs/docker-daemon.json.j2 ================================================ { "dns": ["192.168.50.2", "192.168.50.1", "8.8.8.8"]{% if 'cheese' in group_names %}, "runtimes": { "nvidia": { "args": [], "path": "nvidia-container-runtime" } } {% endif %} } ================================================ FILE: ansible/configs/nomad.hcl.j2 ================================================ #jinja2: trim_blocks:False data_dir = "/var/lib/nomad/" datacenter = {% if "cheese" in group_names %}"cheese"{% elif "minecraft" in group_names %}"minecraft"{% else %}"dc1"{% endif %} log_level = "warn" bind_addr = "0.0.0.0" region = "home" server { enabled = {% if "lan-client-server" in group_names %}true{% else %}false{% endif %} bootstrap_expect = 3 server_join { retry_join = ["192.168.50.39", "192.168.50.113", "192.168.50.85"] retry_max = 3 retry_interval = "15s" } authoritative_region = "home" heartbeat_grace = "300s" min_heartbeat_ttl = "20s" } client { enabled = true {% raw %} network_interface = "{{ GetPrivateInterfaces | include \"network\" \"192.168.50.0/24\" | attr \"name\" }}" {% endraw %} options { docker.auth.config = "/root/.docker/config.json" docker.privileged.enabled = true driver.raw_exec.enable = "1" docker.volumes.enabled = true } meta { shared_mount = {% if "wan-clients" in group_names %}"false"{% else %}"true"{% endif %} dns = {% if "wan-clients" in group_names %}"false"{% else %}"true"{% endif %} {%- if ansible_hostname == "klo01" %} keepalived_priority = "100" keepalived_priority_dns1 = "100" keepalived_priority_dns2 = "{{ 200 | random(start=101) }}" {%- else %} keepalived_priority = "{{ 200 | random(start=101) }}" keepalived_priority_dns1 = "{{ 200 | random(start=101) }}" keepalived_priority_dns2 = "{{ 200 | random(start=101) }}" {%- endif %} } host_network "lan" { cidr = "192.168.50.0/24" reserved_ports = "22" } host_network "tailscale" { cidr = "100.0.0.0/8" reserved_ports = "22" } {% if "wan-clients" in group_names %} host_network "public" { cidr = "78.47.90.68/32" reserved_ports = "22" } {%- endif %} {%- if ansible_hostname == "klo01" %} reserved { memory = 3072 } {%- endif %} } telemetry { disable_hostname = true prometheus_metrics = true publish_allocation_metrics = true publish_node_metrics = true use_node_name = false } {% raw %} advertise { http = "{{ GetPrivateInterfaces | include \"network\" \"192.168.50.0/24\" | attr \"address\" }}:4646" rpc = "{{ GetPrivateInterfaces | include \"network\" \"192.168.50.0/24\" | attr \"address\" }}:4647" serf = "{{ GetPrivateInterfaces | include \"network\" \"192.168.50.0/24\" | attr \"address\" }}:4648" } {% endraw %} consul { # The address to the Consul agent. {%- raw %} address = "127.0.0.1:8500" {%- endraw %} # The service name to register the server and client with Consul. client_service_name = "nomad-client" # Enables automatically registering the services. auto_advertise = true # Enabling the server and client to bootstrap using Consul. server_auto_join = true client_auto_join = true } #vault { # enabled = true # address = "http://vault.service.home:8200" # allow_unauthenticated = true # create_from_role = "nomad-cluster" #} plugin "docker" { config { allow_caps = ["CHOWN","DAC_OVERRIDE","FSETID","FOWNER","MKNOD","NET_RAW","SETGID","SETUID","SETFCAP","SETPCAP","NET_BIND_SERVICE","SYS_CHROOT","KILL","AUDIT_WRITE","NET_ADMIN","NET_BROADCAST","SYS_NICE"] # extra Docker labels to be set by Nomad on each Docker container with the appropriate value extra_labels = ["job_name", "task_group_name", "task_name", "namespace", "node_name"] allow_privileged = true volumes { enabled = true selinuxlabel = "z" } } } ================================================ FILE: ansible/configs/nomad.service ================================================ [Unit] Description=nomad.agent Requires=network-online.target tailscaled.service After=network-online.target tailscaled.service remote-fs.target # Hard requirement: Nomad must not start until NFS mounts are ready RequiresMountsFor=/home/shared /home/media/TV /home/media/Music /home/media/Movies /home/media/Books [Service] EnvironmentFile=-/etc/default/nomad Restart=on-failure RestartSec=10 ExecStart=/usr/bin/nomad agent $OPTIONS -config=/etc/nomad.d/nomad.hcl ExecReload=/bin/kill -HUP $MAINPID KillSignal=SIGINT KillMode=process [Install] WantedBy=multi-user.target ================================================ FILE: ansible/playbook.yml ================================================ --- - name: network mounts hosts: - lan-client-server - lan-client - cheese - minecraft become: true remote_user: root tasks: - name: Configure static IP via netplan copy: dest: /etc/netplan/00-installer-config.yaml content: | network: version: 2 ethernets: ens3: addresses: - {{ inventory_hostname }}/24 routes: - to: default via: 192.168.50.1 nameservers: addresses: - 192.168.50.1 notify: Apply netplan - name: Ensure directories exist file: path: "{{ item }}" state: directory mode: '0755' with_items: - /home/shared - /home/media/TV - /home/media/Music - /home/media/Movies - /home/media/Books - name: makesure multipath.conf exists copy: content: "" dest: /etc/multipath.conf force: no backup: yes ignore_errors: yes - name: Manage /etc/multipath.conf blockinfile: path: /etc/multipath.conf block: | defaults { user_friendly_names yes find_multipaths yes } - name: Install Apt packages apt: name: - nfs-common - avahi-daemon - docker.io - open-iscsi - lsscsi - sg3-utils - multipath-tools - scsitools - name: Ensure /etc/docker directory exists file: path: /etc/docker state: directory mode: '0755' - name: Add NVIDIA Container Toolkit GPG key apt_key: url: https://nvidia.github.io/libnvidia-container/gpgkey state: present keyring: /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg when: "'cheese' in group_names" - name: Add NVIDIA Container Toolkit repository apt_repository: repo: "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://nvidia.github.io/libnvidia-container/stable/deb/$(ARCH) /" state: present filename: nvidia-container-toolkit when: "'cheese' in group_names" - name: Install NVIDIA Container Toolkit apt: name: nvidia-container-toolkit state: present update_cache: yes when: "'cheese' in group_names" - name: Configure Docker daemon with fallback DNS and nvidia runtime template: src: configs/docker-daemon.json.j2 dest: /etc/docker/daemon.json notify: Restart Docker - name: Remove old NFS fstab entries lineinfile: path: /etc/fstab regexp: '^192\.168\.50\.208:/mnt/.*' state: absent - name: Add NFS fstab entries with proper options blockinfile: path: /etc/fstab marker: "# {mark} ANSIBLE MANAGED NFS MOUNTS" block: | 192.168.50.208:/mnt/pool0/share /home/shared nfs4 _netdev,hard,timeo=600,retrans=5,x-systemd.mount-timeout=90,x-systemd.requires=network-online.target,x-systemd.after=network-online.target 0 0 192.168.50.208:/mnt/pool1/media/TV /home/media/TV nfs4 _netdev,hard,timeo=600,retrans=5,x-systemd.mount-timeout=90,x-systemd.requires=network-online.target,x-systemd.after=network-online.target 0 0 192.168.50.208:/mnt/pool0/media/music /home/media/Music nfs4 _netdev,hard,timeo=600,retrans=5,x-systemd.mount-timeout=90,x-systemd.requires=network-online.target,x-systemd.after=network-online.target 0 0 192.168.50.208:/mnt/pool1/media/Movies /home/media/Movies nfs4 _netdev,hard,timeo=600,retrans=5,x-systemd.mount-timeout=90,x-systemd.requires=network-online.target,x-systemd.after=network-online.target 0 0 192.168.50.208:/mnt/pool0/media/audiobooks /home/media/Books nfs4 _netdev,hard,timeo=600,retrans=5,x-systemd.mount-timeout=90,x-systemd.requires=network-online.target,x-systemd.after=network-online.target 0 0 notify: - Reload systemd fstab - Mount Filesystems - name: Enable services systemd: name: "{{ item }}" enabled: yes state: started with_items: - open-iscsi - multipath-tools handlers: - name: Apply netplan command: netplan apply - name: Reload systemd fstab systemd: daemon_reload: yes - name: Mount Filesystems command: mount -a - name: Restart Docker service: name: docker state: restarted - name: Update configuration, execute command, and install packages hosts: - lan-client-server - lan-client - wan-clients - cheese - minecraft remote_user: root #roles: # - role: artis3n.tailscale # vars: # # Example pulling the API key from the env vars on the host running Ansible # tailscale_authkey: "{{ lookup('env', 'NOMAD_VAR_tailscale_auth') }}" # tailscale_args: "{% if 'wan-clients' in group_names %}--accept-routes=true{% else %}--accept-routes=false{% endif %}" tasks: - name: Ensure directories exist file: path: "{{ item }}" state: directory mode: '0755' with_items: - /var/lib/nomad - /var/lib/consul - /etc/nomad.d - /etc/consul.d - name: Manage systemd service file nomad copy: src: configs/nomad.service dest: /lib/systemd/system/nomad.service notify: Reload systemd - name: Manage systemd service file consul copy: src: configs/consul.service dest: /lib/systemd/system/consul.service notify: Reload systemd - name: manage nomad config template: src: configs/nomad.hcl.j2 dest: /etc/nomad.d/nomad.hcl notify: Restart Service - name: manage consul config template: src: configs/consul.hcl.j2 dest: /etc/consul.d/server.hcl - name: Add HashiCorp APT repository key apt_key: url: https://apt.releases.hashicorp.com/gpg state: present validate_certs: no keyring: /usr/share/keyrings/hashicorp-archive-keyring.gpg - name: Configure HashiCorp APT repository apt_repository: repo: "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com {{ ansible_distribution_release }} main" - name: Install Apt packages apt: name: - nomad=1.10.4-1 - consul=1.19.1-1 dpkg_options: 'force-confdef,force-confold' update_cache: true state: latest allow_downgrade: true - name: Modify sysctl entry for net.ipv4.ip_nonlocal_bind sysctl: name: "{{ item.name }}" value: "{{ item.value }}" state: present with_items: - { name: "net.ipv4.ip_nonlocal_bind", value: "1" } - { name: "net.ipv4.conf.all.forwarding", value: "1" } notify: Apply Sysctl Changes - name: Enable services systemd: name: "{{ item }}" enabled: yes state: started with_items: - nomad - consul - tailscaled handlers: - name: Restart Service service: name: nomad state: restarted - name: Reload systemd systemd: daemon_reload: yes - name: Mount Filesystems command: mount -a - name: Apply Sysctl Changes command: sysctl -p /etc/sysctl.conf - name: Install and configure Tailscale hosts: - all become: yes remote_user: root gather_facts: yes tags: tailscale vars: # Read authkey from environment variable; default to 'MISSING' if not set tailscale_auth_key: "{{ lookup('env', 'NOMAD_VAR_tailscale_auth') | default('MISSING') }}" # Optionally customize your Tailscale hostname tailscale_hostname: "{{ inventory_hostname }}" # Tag to advertise (must match OAuth client tag) tailscale_tags: "tag:nomad" tasks: - name: Download Tailscale GPG key via curl shell: > curl -fsSL https://pkgs.tailscale.com/stable/ubuntu/noble.noarmor.gpg | tee /usr/share/keyrings/tailscale-archive-keyring.gpg >/dev/null changed_when: true - name: Update apt cache apt: update_cache: yes - name: Configure Tailscale apt repository copy: dest: /etc/apt/sources.list.d/tailscale.list content: | deb [signed-by=/usr/share/keyrings/tailscale-archive-keyring.gpg arch=amd64] https://pkgs.tailscale.com/stable/ubuntu/ noble main - name: Update apt cache (after adding Tailscale repo) apt: update_cache: yes - name: Install Tailscale apt: name: tailscale state: latest - name: Enable and start tailscaled service service: name: tailscaled state: started enabled: yes - name: Bring Tailscale interface up using authkey # "command" used because there's no official Ansible module for "tailscale up". # This is not strictly idempotent; see notes below for advanced usage. command: > tailscale up --authkey={{ tailscale_auth_key }} --hostname={{ tailscale_hostname }} --advertise-tags={{ tailscale_tags }} --accept-dns=false --reset register: tailscale_up changed_when: "'Success' in tailscale_up.stdout or 'Success' in tailscale_up.stderr or tailscale_up.rc == 0" - name: Show tailscale status command: tailscale status register: tailscale_status changed_when: false - debug: var: tailscale_status.stdout - name: Install Zsh and Oh My Zsh with Agnoster theme hosts: all become: yes remote_user: root gather_facts: yes vars: my_zsh_user: "root" # Change this to the desired user tasks: - name: Install zsh apt: name: zsh state: present update_cache: yes - name: Ensure home directory path is known user: name: "{{ my_zsh_user }}" register: user_info # This captures the user details, including home directory. - name: Check if Oh My Zsh is already installed stat: path: "/root/.oh-my-zsh" register: oh_my_zsh_stat - name: Check if zshrc exists stat: path: "/root/.zshrc" register: zshrc_stat - name: Clone Oh My Zsh git: repo: "https://github.com/ohmyzsh/ohmyzsh.git" dest: "/root/.oh-my-zsh" become_user: "{{ my_zsh_user }}" when: not oh_my_zsh_stat.stat.exists - name: Copy the default .zshrc template if not present copy: src: "/root/.oh-my-zsh/templates/zshrc.zsh-template" dest: "/root/.zshrc" remote_src: yes become_user: "{{ my_zsh_user }}" when: not zshrc_stat.stat.exists - name: Set Oh My Zsh theme to agnoster # Uses a regex replace to ensure 'ZSH_THEME="agnoster"' replace: path: "/root/.zshrc" regexp: '^ZSH_THEME="[^"]+"' replace: 'ZSH_THEME="agnoster"' become_user: "{{ my_zsh_user }}" - name: Change default shell to zsh for the user user: name: "{{ my_zsh_user }}" shell: /usr/bin/zsh ================================================ FILE: ansible/zsh.yml ================================================ --- - name: Install Zsh and Oh My Zsh with Agnoster theme hosts: cheese become: yes remote_user: root gather_facts: yes vars: my_zsh_user: "root" # Change this to the desired user tasks: - name: Install zsh apt: name: zsh state: present update_cache: yes - name: Ensure home directory path is known user: name: "{{ my_zsh_user }}" register: user_info # This captures the user details, including home directory. - name: Check if Oh My Zsh is already installed stat: path: "/root/.oh-my-zsh" register: oh_my_zsh_stat - name: Check if zshrc exists stat: path: "/root/.zshrc" register: zshrc_stat - name: Clone Oh My Zsh git: repo: "https://github.com/ohmyzsh/ohmyzsh.git" dest: "/root/.oh-my-zsh" become_user: "{{ my_zsh_user }}" when: not oh_my_zsh_stat.stat.exists - name: Copy the default .zshrc template if not present copy: src: "/root/.oh-my-zsh/templates/zshrc.zsh-template" dest: "/root/.zshrc" remote_src: yes become_user: "{{ my_zsh_user }}" when: not zshrc_stat.stat.exists - name: Set Oh My Zsh theme to agnoster # Uses a regex replace to ensure 'ZSH_THEME="agnoster"' replace: path: "/root/.zshrc" regexp: '^ZSH_THEME="[^"]+"' replace: 'ZSH_THEME="agnoster"' become_user: "{{ my_zsh_user }}" - name: Change default shell to zsh for the user user: name: "{{ my_zsh_user }}" shell: /usr/bin/zsh ================================================ FILE: docker_images/gcp-dns-updater/Dockerfile ================================================ FROM python:3.14-slim # Set the working directory in the container WORKDIR /app # Copy the requirements file into the container at /app COPY requirements.txt . # Install any needed packages specified in requirements.txt # Using --no-cache-dir to reduce image size RUN pip install --no-cache-dir -r requirements.txt # Copy the current directory contents into the container at /app COPY update_dns.py . # Define the command to run the application CMD ["python", "update_dns.py"] ================================================ FILE: docker_images/gcp-dns-updater/README.md ================================================ # GCP Dynamic DNS Updater Service This service periodically checks the public IPv4 address of the node it's running on and updates a specified A record in a Google Cloud DNS managed zone. It's designed to run as a Nomad job within the Hashi-Homelab environment, utilizing a **pre-built Docker image**. ## Features * Fetches the current public IPv4 address from `https://v4.ifconfig.co/ip`. * Uses the `google-cloud-dns` Python SDK to interact with Google Cloud DNS. * Authenticates using a GCP Service Account key provided via an environment variable. * Checks the specified DNS record: * If it's a CNAME, it deletes the CNAME record. * If it's an A record, it updates the IP address if it has changed. * If it doesn't exist (or after deleting a CNAME), it creates the A record with the specified TTL. * Runs periodically via a Nomad job, executing the Python script within the pre-built Docker container. ## Prerequisites 1. **Docker:** Docker must be installed locally to build the service image. 2. **GCP Service Account:** You need a Google Cloud Platform service account with the necessary permissions to manage DNS records. * Go to the GCP Console -> IAM & Admin -> Service Accounts. * Create a new service account (e.g., `gcp-dns-updater-sa`). * Grant this service account the `DNS Administrator` role (`roles/dns.admin`) on the project containing your managed zone. * Create a JSON key file for this service account and download it securely. You will need the *contents* of this file, not the file itself. 3. **Nomad Environment:** A running Nomad cluster where this job can be scheduled. The Nomad clients must have Docker installed and configured. ## Configuration The service is configured via environment variables passed to the Nomad task, which are then consumed by the `update_dns.py` script running inside the Docker container: * `GCP_DNS_ZONE_NAME`: The name of the managed zone in GCP DNS (e.g., `demonsafe-com`). The script derives the Project ID from the credentials. * `GCP_DNS_RECORD_NAME`: The DNS record name to update (e.g., `*.demonsafe.com`). **Note:** The script expects the base name; the trailing dot is handled internally if needed by the SDK. * `RECORD_TTL`: (Optional) The Time-To-Live (in seconds) for the created/updated A record. Defaults to 300 if not set. * `GCP_PROJECT_ID`: The Google Cloud Project ID containing the DNS zone. * `GCP_SERVICE_ACCOUNT_KEY_B64`: **Required.** The base64-encoded *content* of the GCP service account JSON key file. **Generating the Base64 Key:** You need to encode the *content* of your downloaded JSON key file into a single-line base64 string. On Linux/macOS, you can use: ```bash base64 -w 0 < /path/to/your/gcp_key.json ``` *(Ensure you use `-w 0` or an equivalent flag for your `base64` command to prevent line wrapping)* Copy the resulting string. **Setting Environment Variables in Nomad:** These variables are defined within the `env` block of the `nomad.job` file using Go templating to read runtime environment variables provided by the Nomad agent (which in turn are often sourced from the deployment mechanism, like GitHub Actions): ```hcl # Example within nomad.job task config env { GCP_DNS_ZONE_NAME = < {existing_a_record.rrdatas}") elif record_set.record_type == 'CNAME' and record_set.name == fqdn: existing_cname_record = record_set logging.info(f"Found existing CNAME record: {existing_cname_record.name} -> {existing_cname_record.rrdatas}") changes = zone.changes() needs_update = False # Handle existing CNAME (delete it to replace with A) if existing_cname_record: logging.warning(f"Deleting existing CNAME record {fqdn} to replace with A record.") changes.delete_record_set(existing_cname_record) needs_update = True # Ensure we don't try to delete an A record if we just deleted a CNAME existing_a_record = None # Define the new A record we want new_a_record = zone.resource_record_set(fqdn, "A", 300, [ip_address]) # Handle existing A record if existing_a_record: if existing_a_record.rrdatas == [ip_address]: logging.info(f"Existing A record {fqdn} already points to {ip_address}. No update needed.") return # Nothing to do else: logging.info(f"Existing A record {fqdn} points to {existing_a_record.rrdatas}. Updating to {ip_address}.") changes.delete_record_set(existing_a_record) changes.add_record_set(new_a_record) needs_update = True # Handle case where no A record (and no CNAME was found/deleted) elif not existing_cname_record: # Only add if we didn't already decide to replace CNAME logging.info(f"No existing A or CNAME record found for {fqdn}. Creating new A record pointing to {ip_address}.") changes.add_record_set(new_a_record) needs_update = True # Handle case where CNAME was found and deleted - we still need to add the A record elif existing_cname_record: logging.info(f"Adding A record for {fqdn} pointing to {ip_address} after CNAME deletion.") changes.add_record_set(new_a_record) # needs_update should already be True # Execute the changes if any were queued if needs_update: logging.info(f"Executing DNS changes for {fqdn} in zone {gcp_zone_name}...") changes.create() # Wait until the changes are finished. while changes.status != 'done': logging.info(f"Waiting for DNS changes to complete (status: {changes.status})...") time.sleep(5) # Wait 5 seconds before checking again changes.reload() logging.info(f"Successfully updated DNS record {fqdn} to {ip_address} in zone {gcp_zone_name}.") else: # This case should only be hit if an A record existed and was correct logging.info("No DNS changes were necessary.") except GoogleAPIError as e: logging.error(f"GCP API Error updating DNS record {fqdn} in zone {gcp_zone_name}: {e}") except Exception as e: logging.error(f"An unexpected error occurred during DNS update for {fqdn} in zone {gcp_zone_name}: {e}") def update_spf_record(client: dns.Client, project_id: str, zone_name: str, record_name: str, ip_address: str): """Updates the SPF TXT record on the bare domain with the current public IP.""" try: gcp_zone_name = zone_name.replace('.', '-') logging.info(f"Updating SPF record in zone: {gcp_zone_name}") zone = client.zone(gcp_zone_name, project_id) if not zone.exists(): logging.error(f"DNS zone '{gcp_zone_name}' not found in project '{project_id}'.") return # Derive bare domain from record_name (e.g., "*.demonsafe.com" -> "demonsafe.com.") domain = record_name.lstrip('*.') if record_name.startswith('*.') else record_name fqdn = domain if domain.endswith('.') else f"{domain}." logging.info(f"Checking TXT records for: {fqdn}") spf_value = f'"v=spf1 ip4:{ip_address} ~all"' record_sets = list(zone.list_resource_record_sets(filter_=f"name={fqdn}")) existing_txt = None for rs in record_sets: if rs.record_type == 'TXT' and rs.name == fqdn: existing_txt = rs logging.info(f"Found existing TXT record: {rs.name} -> {rs.rrdatas}") break changes = zone.changes() needs_update = False if existing_txt: new_rrdatas = [] spf_found = False for rd in existing_txt.rrdatas: if 'v=spf1' in rd: spf_found = True if ip_address in rd: logging.info(f"SPF record already contains {ip_address}. No update needed.") return logging.info(f"Replacing SPF entry: {rd} -> {spf_value}") new_rrdatas.append(spf_value) else: new_rrdatas.append(rd) if not spf_found: logging.info(f"No existing SPF entry found. Adding: {spf_value}") new_rrdatas.append(spf_value) changes.delete_record_set(existing_txt) new_txt = zone.resource_record_set(fqdn, "TXT", 300, new_rrdatas) changes.add_record_set(new_txt) needs_update = True else: logging.info(f"No TXT record found for {fqdn}. Creating with SPF: {spf_value}") new_txt = zone.resource_record_set(fqdn, "TXT", 300, [spf_value]) changes.add_record_set(new_txt) needs_update = True if needs_update: logging.info(f"Executing SPF TXT changes for {fqdn}...") changes.create() while changes.status != 'done': logging.info(f"Waiting for SPF changes to complete (status: {changes.status})...") time.sleep(5) changes.reload() logging.info(f"Successfully updated SPF record for {fqdn} with ip4:{ip_address}") except GoogleAPIError as e: logging.error(f"GCP API Error updating SPF record: {e}") except Exception as e: logging.error(f"Unexpected error updating SPF record: {e}") if __name__ == "__main__": logging.info("Starting DNS update script.") project_id, zone_name, record_name, key_b64 = get_env_vars() public_ip = get_public_ip() # DNS Pre-check logic if public_ip: hostname_to_check = 'asdf.demonsafe.com' logging.info(f"Performing pre-check for hostname: {hostname_to_check}") try: resolved_ip = socket.gethostbyname(hostname_to_check) logging.info(f"Resolved IP for {hostname_to_check}: {resolved_ip}") if resolved_ip == public_ip: logging.info(f'DNS record for {hostname_to_check} ({resolved_ip}) already matches public IP ({public_ip}). No update needed.') sys.exit(0) else: logging.info(f'Resolved IP for {hostname_to_check} ({resolved_ip}) does not match public IP ({public_ip}). Proceeding with potential update.') except socket.gaierror as e: logging.warning(f'Could not resolve IP for {hostname_to_check}: {e}. Proceeding with potential update.') except Exception as e: logging.warning(f'An unexpected error occurred during DNS pre-check for {hostname_to_check}: {e}. Proceeding with potential update.') if public_ip: dns_client = get_dns_client(key_b64, project_id) if dns_client: update_dns_record(dns_client, project_id, zone_name, record_name, public_ip) update_spf_record(dns_client, project_id, zone_name, record_name, public_ip) logging.info("DNS update script finished.") else: logging.error("Exiting due to DNS client initialization failure.") sys.exit(1) else: logging.error("Exiting due to inability to fetch public IP.") sys.exit(1) ================================================ FILE: docker_images/update-metadata/Dockerfile ================================================ FROM python:3.14-slim WORKDIR /app COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY sync_secrets.py . ENTRYPOINT ["python", "sync_secrets.py"] ================================================ FILE: docker_images/update-metadata/README.md ================================================ # GitHub Secret Synchronization Script (Containerized) ## Purpose This script (`sync_secrets.py`), running inside a Docker container, reads environment variables defined in the project's root `.envrc` file and synchronizes them as GitHub secrets to the `perrymanuk/hashi-homelab` repository using the `PyGithub` library. ## Requirements * **Docker:** Docker must be installed and running to build and execute the container. * **`NOMAD_VAR_github_pat` Environment Variable:** A GitHub Personal Access Token (PAT) with the `repo` scope must be available as an environment variable named `NOMAD_VAR_github_pat` in the **host shell** where you run the `make` command. The Makefile target (`sync-secrets`) will handle passing this token into the container under the name `GITHUB_TOKEN` for the script to use. * **`.envrc` File:** An `.envrc` file must exist at the project root (`/Users/perry.manuk/git/perrymanuk/hashi-homelab/.envrc`) containing the secrets to sync. ## Usage 1. **Ensure `NOMAD_VAR_github_pat` is set:** Export your GitHub PAT in your current host shell session: ```bash export NOMAD_VAR_github_pat="your_github_pat_here" ``` 2. **Navigate to the project root directory:** ```bash cd /Users/perry.manuk/git/perrymanuk/hashi-homelab ``` 3. **Run the Makefile target:** ```bash make sync-secrets ``` This command will: * Build the Docker image defined in `scripts/Dockerfile`. * Run a container from the image. * Mount the host's `.envrc` file into the container. * Pass the **host's** `NOMAD_VAR_github_pat` environment variable into the container as `GITHUB_TOKEN`. * Execute the `sync_secrets.py` script within the container. The script will output the status of each secret synchronization attempt (created, updated, or failed). **Important:** Running the script will overwrite any existing secrets in the GitHub repository that have the same name as variables found in the `.envrc` file. ## `.envrc` Format The script expects the `.envrc` file to follow this format: ```bash export VARIABLE_NAME=value export ANOTHER_VARIABLE='value with spaces' export YET_ANOTHER="double quoted value" # This is a comment and will be ignored # Empty lines are also ignored export SECRET_KEY=a_very_secret_value_here ``` * Lines must start with `export`. * Variable names and values are separated by `=`. * Values can be unquoted, single-quoted (`'...'`), or double-quoted (`"..."`). Quotes are stripped before syncing. * Lines starting with `#` (comments) and empty lines are ignored. ================================================ FILE: docker_images/update-metadata/requirements.txt ================================================ PyGithub hcl2 ================================================ FILE: docker_images/update-metadata/update_job_metadata.py ================================================ import argparse import logging import pathlib import re import sys # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def find_job_block(content): """Find the start and end indices of the main 'job' block.""" job_match = re.search(r'^job\s+"[^"]+"\s*\{', content, re.MULTILINE) if not job_match: logging.warning("Could not find job block start.") return None, None start_index = job_match.start() # Find the matching closing brace brace_level = 0 end_index = -1 in_string = False escaped = False for i, char in enumerate(content[start_index:]): if escaped: escaped = False continue if char == '\\': escaped = True continue if char == '"': in_string = not in_string continue if not in_string: if char == '{': brace_level += 1 elif char == '}': brace_level -= 1 if brace_level == 0: end_index = start_index + i break if end_index == -1: logging.warning("Could not find matching closing brace for job block.") return None, None return start_index, end_index + 1 def find_meta_block(content): """Find the start and end indices of the 'meta' block within the given content.""" meta_match = re.search(r'^\s*meta\s*\{', content, re.MULTILINE) if not meta_match: return None, None start_index = meta_match.start() # Find the matching closing brace brace_level = 0 end_index = -1 in_string = False escaped = False for i, char in enumerate(content[start_index:]): if escaped: escaped = False continue if char == '\\': escaped = True continue if char == '"': in_string = not in_string continue if not in_string: if char == '{': brace_level += 1 elif char == '}': brace_level -= 1 if brace_level == 0: end_index = start_index + i break if end_index == -1: logging.warning("Could not find matching closing brace for meta block.") return None, None return start_index, end_index + 1 def update_job_metadata(repo_root): """Finds Nomad job files and updates their meta block with job_file path.""" repo_path = pathlib.Path(repo_root).resolve() nomad_jobs_path = repo_path / 'nomad_jobs' if not nomad_jobs_path.is_dir(): logging.error(f"'nomad_jobs' directory not found in {repo_path}") sys.exit(1) logging.info(f"Scanning for job files in {nomad_jobs_path}...") job_files = list(nomad_jobs_path.rglob('*.nomad')) + list(nomad_jobs_path.rglob('*.job')) if not job_files: logging.warning("No *.nomad or *.job files found.") return modified_count = 0 for job_file in job_files: try: relative_path = job_file.relative_to(repo_path).as_posix() logging.debug(f"Processing file: {relative_path}") content = job_file.read_text() original_content = content # Keep a copy for comparison job_start, job_end = find_job_block(content) if job_start is None or job_end is None: logging.warning(f"Skipping {relative_path}: Could not find main job block.") continue job_block_content = content[job_start:job_end] job_opening_line_match = re.match(r'^job\s+"[^"]+"\s*\{\s*\n?', job_block_content, re.MULTILINE) if not job_opening_line_match: logging.warning(f"Skipping {relative_path}: Could not match job opening line format.") continue job_insert_pos = job_start + job_opening_line_match.end() meta_start_rel, meta_end_rel = find_meta_block(job_block_content) new_job_file_line = f' job_file = "{relative_path}"' modified = False if meta_start_rel is not None and meta_end_rel is not None: meta_start_abs = job_start + meta_start_rel meta_end_abs = job_start + meta_end_rel meta_block_content = content[meta_start_abs:meta_end_abs] meta_opening_line_match = re.match(r'^\s*meta\s*\{\s*\n?', meta_block_content, re.MULTILINE) if not meta_opening_line_match: logging.warning(f"Skipping {relative_path}: Could not match meta opening line format.") continue meta_insert_pos = meta_start_abs + meta_opening_line_match.end() job_file_line_match = re.search(r'^(\s*)job_file\s*=\s*".*?"$\n?', meta_block_content, re.MULTILINE) if job_file_line_match: existing_line = job_file_line_match.group(0) indent = job_file_line_match.group(1) new_line_with_indent = f'{indent}job_file = "{relative_path}"\n' # Ensure newline if existing_line.strip() != new_line_with_indent.strip(): # Replace existing line start = meta_start_abs + job_file_line_match.start() end = meta_start_abs + job_file_line_match.end() # Ensure we capture the trailing newline if present in match content = content[:start] + new_line_with_indent + content[end:] modified = True else: # Insert new job_file line inside meta block content = content[:meta_insert_pos] + new_job_file_line + '\n' + content[meta_insert_pos:] modified = True else: # Insert new meta block new_meta_block = f'\n meta {{\n{new_job_file_line}\n }}\n' content = content[:job_insert_pos] + new_meta_block + content[job_insert_pos:] modified = True if modified and content != original_content: job_file.write_text(content) logging.info(f"Updated metadata in: {relative_path}") modified_count += 1 elif not modified: logging.debug(f"No changes needed for: {relative_path}") except Exception as e: logging.error(f"Failed to process {relative_path}: {e}") logging.info(f"Metadata update complete. {modified_count} files modified.") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Update Nomad job files with job_file metadata.") # Default to the parent directory of the script's directory (../) script_dir = pathlib.Path(__file__).parent.resolve() default_repo_root = script_dir.parent parser.add_argument( "--repo-root", type=str, default=str(default_repo_root), help="Path to the root of the repository." ) args = parser.parse_args() update_job_metadata(args.repo_root) ================================================ FILE: envrc ================================================ export CONSUL_HTTP_ADDR=http://FILL_IN_IP:8500 export CONSUL_CACERT=/etc/consul.d/ssl/ca.cert export CONSUL_CLIENT_CERT=/etc/consul.d/ssl/consul.cert export CONSUL_CLIENT_KEY=/etc/consul.d/ssl/consul.key export VAULT_ADDR=http://FILL_IN_IP:8200 export VAULT_TOKEN=FILL_IN_TOKEN export NOMAD_ADDR=http://FILL_IN_IP:4646 export NOMAD_VAR_region='home' export NOMAD_VAR_tld='home' export NOMAD_VAR_shared_dir='/home/shared/' export NOMAD_VAR_downloads_dir='/home/sabnzbd/downloads' export NOMAD_VAR_music_dir='/home/media/Music' export NOMAD_VAR_movies_dir='/home/media/Movies' export NOMAD_VAR_tv_dir='/home/media/TV' export NOMAD_VAR_media_dir='/home/media' ================================================ FILE: nomad_jobs/TEMPLATE-volume.hcl ================================================ // ============================================================================= // Nomad CSI Volume Template // ============================================================================= // // Usage: // 1. Copy this file to nomad_jobs///volume.hcl // 2. Replace __VOL_NAME__ with the volume name (usually same as service name) // 3. Replace __SIZE__ with capacity (e.g. "5GiB", "10GiB", "50GiB") // 4. Set access_mode based on your needs (see below) // 5. Volume is auto-created by CI when pushed (if path is in workflow filter) // // Access modes: // single-node-writer : one node read/write (most services) // single-node-reader-only : one node read-only // multi-node-single-writer : multiple nodes can mount, one writes (HA failover) // // Size guide: // Config-only (app state): 1-5 GiB // Small databases: 5-10 GiB // Media metadata/indexes: 10-20 GiB // Time-series / logs: 50-100 GiB // // ============================================================================= id = "__VOL_NAME__" external_id = "__VOL_NAME__" name = "__VOL_NAME__" type = "csi" plugin_id = "org.democratic-csi.iscsi" capacity_min = "__SIZE__" capacity_max = "__SIZE__" capability { access_mode = "single-node-writer" attachment_mode = "file-system" } mount_options { fs_type = "ext4" mount_flags = ["noatime"] } ================================================ FILE: nomad_jobs/TEMPLATE.job ================================================ // ============================================================================= // Nomad Job Template // ============================================================================= // // Usage: // 1. Copy this file to nomad_jobs///nomad.job // 2. Find/replace the following placeholders: // - __JOB_NAME__ : lowercase service name (e.g. "sonarr") // - __GROUP_NAME__ : group name (e.g. "downloaders", "monitoring", "ai") // - __CATEGORY__ : directory category (e.g. "media-stack", "ai-ml") // - __IMAGE__ : docker image with tag (e.g. "linuxserver/sonarr:4.0.16") // - __PORT__ : container port number (e.g. "8989") // - __HEALTH_PATH__ : HTTP health check path (e.g. "/ping", "/-/healthy", "/api/health") // - __CPU__ : CPU MHz allocation (see guide below) // - __MEMORY__ : Memory MB allocation (see guide below) // 3. Remove any optional sections you don't need (marked with OPTIONAL) // 4. Update the variable declarations at the bottom // 5. Add any job-specific secrets to .envrc as NOMAD_VAR_ // 6. Add the job path to .github/workflows/nomad.yaml if it should auto-deploy // // Resource guide: // Light services (static sites, proxies): cpu = 100-200, memory = 128-256 // Medium services (APIs, web apps): cpu = 500-1000, memory = 512-1024 // Heavy services (.NET apps, databases, Java): cpu = 1000+, memory = 1024-2048 // GPU / ML workloads: cpu = 200+, memory = 4096-8192 // // ============================================================================= job "__JOB_NAME__" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/__CATEGORY__/__JOB_NAME__/nomad.job" version = "1" } // Ensures scheduling on nodes with NFS shared mount available. // Remove if the service has no need for shared storage or config dirs. constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } group "__GROUP_NAME__" { count = 1 network { port "http" { host_network = "lan" to = "__PORT__" } } // --- OPTIONAL: CSI Volume ------------------------------------------------ // Use for services that need persistent block storage (databases, stateful apps). // Requires a matching volume.hcl deployed first. // Remove this block and the prep-disk task + volume_mount if not needed. // // volume "__JOB_NAME__" { // type = "csi" // read_only = false // source = "__JOB_NAME__" // access_mode = "single-node-writer" // attachment_mode = "file-system" // } // ------------------------------------------------------------------------- restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } update { max_parallel = 1 min_healthy_time = "30s" healthy_deadline = "5m" progress_deadline = "10m" auto_revert = true } // --- OPTIONAL: Prep-disk task -------------------------------------------- // Required when using CSI volumes to fix ownership before the main task runs. // Set UID:GID to match the user the main container runs as. // Common values: // linuxserver images: 65534:65534 (nobody) // prometheus: 1000:2000 // grafana: 472:472 // loki: 10001:10001 // // task "prep-disk" { // driver = "docker" // // lifecycle { // hook = "prestart" // sidecar = false // } // // volume_mount { // volume = "__JOB_NAME__" // destination = "/volume/" // read_only = false // } // // config { // image = "busybox:latest" // command = "sh" // args = ["-c", "chown -R UID:GID /volume/"] // } // // resources { // cpu = 200 // memory = 128 // } // } // ------------------------------------------------------------------------- task "__JOB_NAME__" { driver = "docker" config { image = "__IMAGE__" ports = ["http"] // --- Bind mount pattern (shared NFS config dir) --- // Use for services that store config on shared NFS. // volumes = [ // "${var.shared_dir}__JOB_NAME__:/config", // ] // --- Template mount pattern (config rendered by Nomad) --- // Use when config is templated inline below. // volumes = [ // "local/config.yaml:/app/config.yaml", // ] } // --- OPTIONAL: CSI volume mount ---------------------------------------- // volume_mount { // volume = "__JOB_NAME__" // destination = "/data" // read_only = false // } // ----------------------------------------------------------------------- env { TZ = "Etc/UTC" // PUID = "65534" // common for linuxserver images // PGID = "65534" } // --- OPTIONAL: Config template ----------------------------------------- // Use for services that need a rendered config file. // Reference secrets with ${var.secret_name} syntax. // // template { // data = < 768MB } group "web" { network { mode = "host" port "web" { to = "8080" host_network = "lan" } } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } task "open-webui" { driver = "docker" config { image = "ghcr.io/open-webui/open-webui:v0.8.12" dns_servers = [var.dns_server_ip] volumes = [ "${var.shared_dir}open-webui:/app/backend/data", ] ports = ["web"] } env { OLLAMA_BASE_URL= var.ollama_base_url WEBUI_SECRET_KEY = var.webui_secret_key } service { name = "${NOMAD_JOB_NAME}" tags = ["traefik.enable=true"] port = "web" check { type = "tcp" port = "web" interval = "30s" timeout = "2s" } } resources { cpu = "200" memory = "768" } } } } variable "region" { type = string } variable "shared_dir" { type = string } variable "ollama_base_url" { type = string } variable "webui_secret_key" { type = string } variable "datacenter" { type = string } variable "dns_server_ip" { type = string } ================================================ FILE: nomad_jobs/ai-ml/paperless-ai/nomad.job ================================================ job "paperless-ai" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/ai-ml/paperless-ai/nomad.job" version = "2" } group "web" { network { mode = "host" port "web" { to = "3000" host_network = "lan" } } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } task "paperless-ai" { driver = "docker" config { image = "clusterzx/paperless-ai" dns_servers = ["192.168.50.2"] volumes = [ "${var.shared_dir}paperless-ai:/app/data", ] ports = ["web"] } service { name = "${NOMAD_JOB_NAME}" tags = ["traefik.enable=true"] port = "web" check { type = "tcp" port = "web" interval = "30s" timeout = "2s" } } resources { cpu = "200" memory = "2048" } } } } variable "region" { type = string } variable "shared_dir" { type = string } ================================================ FILE: nomad_jobs/ai-ml/pgvector-client/nomad.job ================================================ job "pgvector-client-example" { region = var.region datacenters = ["dc1"] type = "batch" meta { job_file = "nomad_jobs/ai-ml/pgvector-client/nomad.job" version = "1" // Initial version } group "client" { restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } task "embedding-example" { driver = "docker" config { image = "python:3.14-slim" command = "python" args = [ "/local/embedding-example.py" ] } env { PGVECTOR_HOST = "pgvector.service.consul" PGVECTOR_PORT = "5433" PGVECTOR_USER = "postgres" PGVECTOR_PASSWORD = "${var.pgvector_pass}" PGVECTOR_DB = "embeddings" } template { data = < %s) AS similarity FROM documents ORDER BY embedding <=> %s LIMIT 3 """, (query_embedding, query_embedding)) results = cursor.fetchall() print("\nTop 3 most similar documents:") for id, content, similarity in results: print(f"ID: {id}, Similarity: {similarity:.4f}") print(f"Content: {content}") print("-" * 50) # Commit and close conn.commit() cursor.close() conn.close() print("Example completed successfully!") EOH destination = "local/embedding-example.py" } resources { cpu = 500 memory = 512 } } } } variable "region" { type = string } variable "pgvector_pass" { type = string description = "Admin password for pgvector PostgreSQL server" } ================================================ FILE: nomad_jobs/ai-ml/radbot/nomad-dev.job ================================================ job "radbot-dev" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/ai-ml/radbot/nomad-dev.job" version = "1" } constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } group "web" { count = 1 network { port "http" { host_network = "lan" to = 8000 } } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } update { max_parallel = 1 min_healthy_time = "60s" healthy_deadline = "5m" auto_revert = true } task "radbot-dev" { driver = "docker" config { image = "ghcr.io/perrymanuk/radbot:dev" dns_servers = [var.dns_server_ip] ports = ["http"] volumes = [ "local/config.yaml:/app/config.yaml", ] } env { RADBOT_CREDENTIAL_KEY = var.radbot_credential_key RADBOT_ADMIN_TOKEN = var.radbot_admin_token RADBOT_CONFIG_FILE = "/app/config.yaml" RADBOT_ENV = "dev" } template { data = < postgres ${var.postgres_pass} 5432 postgres.service.consul lidarr_main lidarr_logs info * 8686 6868 False False ${var.lidarr_api_key} External DisabledForLocalAddresses 100.64.0.0/10,192.168.50.0/24 master Lidarr Docker EOH destination = "local/config.xml" perms = "644" } service { port = "http" name = "lidarr" tags = [ "traefik.enable=true", "traefik.http.routers.${NOMAD_TASK_NAME}.tls.domains[0].sans=${NOMAD_TASK_NAME}.${var.tld}", ] check { type = "http" path = "/ping" interval = "10s" timeout = "2s" check_restart { limit = 3 grace = "60s" ignore_warnings = false } } } resources { cpu = 100 memory = 256 } } } } variable "region" { type = string } variable "tld" { type = string } variable "shared_dir" { type = string } variable "downloads_dir" { type = string } variable "music_dir" { type = string } variable "postgres_pass" { type = string description = "Admin password for PostgreSQL" } variable "lidarr_api_key" { type = string description = "API key for Lidarr" } ================================================ FILE: nomad_jobs/media-stack/lidarr/volume.hcl ================================================ id = "lidarr2" external_id = "lidarr2" name = "lidarr2" type = "csi" plugin_id = "org.democratic-csi.iscsi" capacity_min = "10GiB" capacity_max = "10GiB" capability { access_mode = "single-node-writer" attachment_mode = "block-device" } mount_options { fs_type = "ext4" mount_flags = ["noatime"] } ================================================ FILE: nomad_jobs/media-stack/lidify/nomad.job ================================================ job "lidify" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/media-stack/lidify/nomad.job" version = "1" } constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } group "discovery" { count = 1 network { port "http" { host_network = "lan" to = "5000" } } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "lidify" { driver = "docker" config { image = "thewicklowwolf/lidify:latest" ports = ["http"] dns_servers = ["192.168.50.2"] volumes = [ "${var.shared_dir}lidify:/lidify/config", ] } env { lidarr_address = "http://lidarr.service.consul:8686" lidarr_api_key = var.lidarr_api_key lastfm_api_key = var.lastfm_api_key root_folder_path = "/music" quality_profile_id = "1" metadata_profile_id = "1" sleep_interval = "3600" } service { port = "http" name = "lidify" tags = [ "traefik.enable=true", ] check { type = "http" path = "/" interval = "10s" timeout = "2s" check_restart { limit = 3 grace = "60s" ignore_warnings = false } } } resources { cpu = 100 memory = 256 } } } } variable "region" { type = string } variable "tld" { type = string } variable "shared_dir" { type = string } variable "lidarr_api_key" { type = string description = "API key for Lidarr" } variable "lastfm_api_key" { type = string description = "Last.fm API key" } ================================================ FILE: nomad_jobs/media-stack/maintainerr/nomad.job ================================================ job "maintainerr" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/media-stack/maintainerr/nomad.job" version = "2" } constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } group "media" { count = 1 network { port "http" { host_network = "lan" to = 6246 } } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "maintainerr" { driver = "docker" config { image = "ghcr.io/maintainerr/maintainerr:3.7.0" ports = ["http"] volumes = [ "${var.shared_dir}maintainerr:/opt/data", ] } env { TZ = "Etc/UTC" } user = "1000:1000" service { port = "http" name = "maintainerr" tags = [ "traefik.enable=true" ] check { type = "http" path = "/" interval = "30s" timeout = "5s" check_restart { limit = 3 grace = "60s" ignore_warnings = false } } } resources { cpu = 200 memory = 512 } } } } variable "region" { type = string } variable "tld" { type = string } variable "shared_dir" { type = string } ================================================ FILE: nomad_jobs/media-stack/mediasage/nomad.job ================================================ job "mediasage" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/media-stack/mediasage/nomad.job" version = "1" } constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } group "playlists" { count = 1 network { port "http" { host_network = "lan" to = "5765" } } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "prep-disk" { driver = "docker" lifecycle { hook = "prestart" sidecar = false } config { image = "busybox:latest" command = "sh" args = ["-c", "mkdir -p /data && chmod 777 /data"] volumes = [ "${var.shared_dir}mediasage:/data", ] } resources { cpu = 50 memory = 32 } } task "mediasage" { driver = "docker" config { image = "ghcr.io/ecwilsonaz/mediasage:latest" ports = ["http"] dns_servers = ["192.168.50.2"] volumes = [ "${var.shared_dir}mediasage:/app/data", ] } env { PLEX_URL = "http://plex.service.consul:32400" PLEX_TOKEN = var.plex_token AI_PROVIDER = "ollama" OLLAMA_URL = "http://ollama.service.consul:11434" } service { port = "http" name = "mediasage" tags = [ "traefik.enable=true", ] check { type = "http" path = "/" interval = "10s" timeout = "2s" check_restart { limit = 3 grace = "60s" ignore_warnings = false } } } resources { cpu = 200 memory = 512 } } } } variable "region" { type = string } variable "tld" { type = string } variable "shared_dir" { type = string } variable "plex_token" { type = string description = "Plex authentication token" } ================================================ FILE: nomad_jobs/media-stack/multi-scrobbler/nomad.job ================================================ job "multi-scrobbler" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/media-stack/multi-scrobbler/nomad.job" version = "1" } constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } group "scrobbler" { count = 1 network { port "http" { host_network = "lan" to = "9078" } } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "multi-scrobbler" { driver = "docker" config { image = "foxxmd/multi-scrobbler:latest" ports = ["http"] dns_servers = ["192.168.50.2"] volumes = [ "${var.shared_dir}multi-scrobbler:/config", "local/config.json:/config/config.json", ] } env { TZ = "Etc/UTC" } template { data = </dev/null || true)" ]; then echo "Database volume is empty, copying existing databases if any..." if [ -d "$DB_DIR" ] && [ -n "$(ls -A "$DB_DIR" 2>/dev/null || true)" ]; then cp -a "$DB_DIR"/* /opt/plex-db/ echo "Copied existing databases to persistent volume" fi fi # Set up link to optimized database storage (only if not already linked) if [ ! -L "$DB_DIR" ] || [ "$(readlink "$DB_DIR")" != "/opt/plex-db" ]; then echo "Setting up database symlink..." rm -rf "$DB_DIR" ln -sf /opt/plex-db "$DB_DIR" fi # Install SQLite3 if needed if ! command -v sqlite3 &>/dev/null; then echo "Installing SQLite3..." apt-get update && apt-get install -y sqlite3 fi # Set environment variables for SQLite export SQLITE_TMPDIR=/tmp/plex_sqlite mkdir -p "$SQLITE_TMPDIR" # Apply optimizations to all databases echo "Applying SQLite optimizations to databases..." find /opt/plex-db -name "*.db" -type f 2>/dev/null | while read -r db; do echo "Optimizing $db" sqlite3 "$db" < 256 } group "downloaders" { count = 1 network { port "http" { host_network = "lan" static = 9696 } port "flaresolverr" { host_network = "lan" static = 8191 } } volume "prowlarr" { type = "csi" read_only = false source = "prowlarr" access_mode = "single-node-writer" attachment_mode = "file-system" } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "prowlarr" { driver = "docker" config { image = "linuxserver/prowlarr" dns_servers = ["192.168.50.2"] ports = ["http"] } volume_mount { volume = "prowlarr" destination = "/config" read_only = false } env { PUID = "65534" PGID = "65534" TZ = "Etc/UTC" } service { port = "http" name = "prowlarr" tags = [ "traefik.enable=true", ] check { type = "http" path = "/ping" interval = "10s" timeout = "2s" check_restart { limit = 3 grace = "60s" ignore_warnings = false } } } resources { cpu = 500 memory = 256 } } task "flaresolverr" { driver = "docker" config { image = "ghcr.io/flaresolverr/flaresolverr:v3.4.6" ports = ["flaresolverr"] } env { LOG_LEVEL = "info" LOG_HTML = "false" TZ = "Etc/UTC" } service { port = "flaresolverr" name = "flaresolverr" check { type = "http" path = "/" interval = "30s" timeout = "5s" } } resources { cpu = 500 memory = 512 } lifecycle { hook = "prestart" sidecar = true } } } } variable "region" {} variable "tld" {} ================================================ FILE: nomad_jobs/media-stack/prowlarr/volume.hcl ================================================ id = "prowlarr" external_id = "prowlarr" name = "prowlarr" type = "csi" plugin_id = "org.democratic-csi.iscsi" capacity_min = "5GiB" capacity_max = "5GiB" capability { access_mode = "single-node-writer" attachment_mode = "block-device" } mount_options { fs_type = "ext4" mount_flags = ["noatime"] } ================================================ FILE: nomad_jobs/media-stack/qbittorrent/nomad.job ================================================ job "qbittorrent" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/media-stack/qbittorrent/nomad.job" version = "5" } constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } group "downloaders" { count = 1 network { port "http" { host_network = "lan" static = 8081 } } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "gluetun" { driver = "docker" lifecycle { hook = "prestart" sidecar = true } config { image = "qmcgaw/gluetun" cap_add = ["NET_ADMIN"] ports = ["http"] mounts = [ { type = "tmpfs" target = "/tmp/gluetun" readonly = false }, ] } env { VPN_SERVICE_PROVIDER = "mullvad" VPN_TYPE = "wireguard" WIREGUARD_PRIVATE_KEY = var.mullvad_wireguard_key WIREGUARD_ADDRESSES = var.mullvad_wireguard_addr SERVER_COUNTRIES = "Netherlands" FIREWALL_VPN_INPUT_PORTS = "8081" } resources { cpu = 500 memory = 512 } } task "qbittorrent" { driver = "docker" config { image = "linuxserver/qbittorrent" network_mode = "container:gluetun-${NOMAD_ALLOC_ID}" mounts = [ { type = "bind" target = "/config" source = "${var.shared_dir}qbittorrent" readonly = false bind_options = { propagation = "rshared" } }, { type = "bind" target = "/downloads" source = "${var.downloads_dir}" readonly = false bind_options = { propagation = "rshared" } }, { type = "bind" target = "/media" source = "${var.media_dir}" readonly = false bind_options = { propagation = "rshared" } }, ] } env { PUID = "65534" PGID = "65534" TZ = "Etc/UTC" WEBUI_PORT = "8081" } service { port = "http" name = "qbittorrent" tags = [ "traefik.enable=true", ] check { type = "tcp" interval = "10s" timeout = "2s" check_restart { limit = 3 grace = "90s" ignore_warnings = false } } } resources { cpu = 1000 memory = 1024 } } } } variable "region" {} variable "tld" {} variable "shared_dir" {} variable "downloads_dir" {} variable "media_dir" {} variable "mullvad_wireguard_key" { type = string description = "Mullvad WireGuard private key" } variable "mullvad_wireguard_addr" { type = string description = "Mullvad WireGuard interface address" } ================================================ FILE: nomad_jobs/media-stack/radarr/nomad.job ================================================ job "radarr" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/media-stack/radarr/nomad.job" version = "10" // Full config.xml template with API key } group "downloaders" { count = 1 network { port "http" { host_network = "lan" to = "7878" } } volume "radarr" { type = "csi" read_only = false source = "radarr2" access_mode = "single-node-writer" attachment_mode = "file-system" } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "radarr" { driver = "docker" config { image = "linuxserver/radarr:6.1.1" dns_servers = ["192.168.50.2"] ports = ["http"] volumes = [ "${var.downloads_dir}:/downloads", "${var.movies_dir}:/media/Movies", "local/config.xml:/config/config.xml", ] } volume_mount { volume = "radarr" destination = "/config" read_only = false } env { UMASK_SET = "022" TZ = "UTC" PUID = "65534" PGID = "65534" } template { data = < postgres ${var.postgres_pass} 5432 postgres.service.consul radarr_main radarr_logs info * 7878 9898 False False ${var.radarr_api_key} External DisabledForLocalAddresses 100.64.0.0/10,192.168.50.0/24 master Radarr Docker EOH destination = "local/config.xml" perms = "644" } service { port = "http" name = "radarr" tags = [ "traefik.enable=true" ] check { type = "http" path = "/ping" interval = "10s" timeout = "2s" check_restart { limit = 3 grace = "60s" ignore_warnings = false } } } resources { cpu = 1000 memory = 512 } } } } variable "region" { type = string } variable "tld" { type = string } variable "downloads_dir" { type = string } variable "tv_dir" { type = string } variable "movies_dir" { type = string } variable "postgres_pass" { type = string description = "Admin password for PostgreSQL" } variable "radarr_api_key" { type = string description = "API key for Radarr" } ================================================ FILE: nomad_jobs/media-stack/radarr/volume.hcl ================================================ id = "radarr2" external_id = "radarr2" name = "radarr2" type = "csi" plugin_id = "org.democratic-csi.iscsi" capacity_min = "10GiB" capacity_max = "10GiB" capability { access_mode = "single-node-writer" attachment_mode = "block-device" } mount_options { fs_type = "ext4" mount_flags = ["noatime"] } ================================================ FILE: nomad_jobs/media-stack/requestrr/nomad.job ================================================ job "requestrr" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/media-stack/requestrr/nomad.job" version = "1" } constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } group "media" { count = 1 network { port "http" { host_network = "lan" to = 4545 } } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "requestrr" { driver = "docker" config { dns_servers = ["192.168.50.2"] image = "thomst08/requestrr:v2.1.9" ports = ["http"] volumes = [ "${var.shared_dir}requestrr:/root/config", ] } env { TZ = "Etc/UTC" } service { port = "http" name = "requestrr" tags = [ "traefik.enable=true" ] check { type = "tcp" interval = "10s" timeout = "2s" } } resources { cpu = 200 memory = 256 } } } } variable "region" { type = string } variable "tld" { type = string } variable "shared_dir" { type = string } ================================================ FILE: nomad_jobs/media-stack/sabnzbd/nomad.job ================================================ job "sabnzbd" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/media-stack/sabnzbd/nomad.job" version = "6" } constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } group "downloaders" { count = 1 network { port "http" { host_network = "lan" static = "8080" } } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "sabnzbd" { driver = "docker" config { image = "linuxserver/sabnzbd" network_mode = "host" ports = ["http"] mounts = [ { type = "bind" target = "/config" source = "${var.shared_dir}sabnzbd", readonly = false bind_options = { propagation = "rshared" } }, { type = "bind" target = "/downloads" source = "/tmp" readonly = false bind_options = { propagation = "rshared" } }, { type = "bind" target = "/media" source = "${var.media_dir}" readonly = false bind_options = { propagation = "rshared" } } ] } env { PUID = "65534" PGID = "65534" TZ = "Etc/UTC" } service { port = "http" name = "${NOMAD_TASK_NAME}" tags = [ "traefik.enable=true" ] check { type = "http" path = "/api?mode=auth" interval = "10s" timeout = "2s" check_restart { limit = 3 grace = "60s" ignore_warnings = false } } } resources { cpu = 1000 # Match actual usage (952 MHz observed) memory = 3072 # Accommodate 2GB cache + 1GB overhead memory_max = 4096 # Hard limit for burst usage } } } } variable "region" {} variable "tld" {} variable "shared_dir" {} variable "media_dir" {} variable "downloads_dir" {} ================================================ FILE: nomad_jobs/media-stack/sickchill/nomad.job ================================================ job "sickchill" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/media-stack/sickchill/nomad.job" version = "4" } constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } group "downloaders" { count = 1 network { port "http" { host_network = "lan" to = "8081" } } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "sickchill" { driver = "docker" config { image = "linuxserver/sickchill:2024.3.1" dns_servers = ["192.168.50.2"] ports = ["http"] volumes = [ "${var.downloads_dir}:/downloads", "${var.tv_dir}:/tv", "${var.shared_dir}sickchill:/config", ] } env { PUID = "65534" PGID = "65534" TZ = "Etc/UTC" } service { port = "http" name = "sickchill" tags = [ "traefik.enable=true" ] check { type = "http" path = "/" interval = "10s" timeout = "2s" check_restart { limit = 3 grace = "60s" ignore_warnings = false } } } resources { cpu = 1000 memory = 256 } } } } variable "region" { type = string } variable "tld" { type = string } variable "downloads_dir" { type = string } variable "tv_dir" { type = string } variable "shared_dir" { type = string } ================================================ FILE: nomad_jobs/media-stack/sonarr/nomad.job ================================================ job "sonarr" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/media-stack/sonarr/nomad.job" version = "11" // Full config.xml template with API key } constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } group "downloaders" { count = 1 network { port "http" { host_network = "lan" to = "8989" } } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "sonarr" { driver = "docker" config { image = "linuxserver/sonarr:4.0.17" dns_servers = ["192.168.50.2"] ports = ["http"] volumes = [ "${var.shared_dir}sonarr:/config", "${var.downloads_dir}:/downloads", "${var.tv_dir}:/media/TV", "local/config.xml:/config/config.xml", ] } env { PUID = "65534" PGID = "65534" TZ = "Etc/UTC" } template { data = < postgres ${var.postgres_pass} 5432 postgres.service.consul sonarr_main sonarr_logs info * 8989 9898 False False ${var.sonarr_api_key} External DisabledForLocalAddresses 100.64.0.0/10,192.168.50.0/24 main Sonarr Docker EOH destination = "local/config.xml" perms = "644" } service { port = "http" name = "sonarr" tags = [ "traefik.enable=true", ] check { type = "http" path = "/ping" interval = "10s" timeout = "2s" check_restart { limit = 3 grace = "60s" ignore_warnings = false } } } resources { cpu = 1000 memory = 512 } } } } variable "region" { type = string } variable "tld" { type = string } variable "shared_dir" { type = string } variable "downloads_dir" { type = string } variable "tv_dir" { type = string } variable "postgres_pass" { type = string description = "Admin password for PostgreSQL" } variable "sonarr_api_key" { type = string description = "API key for Sonarr" } ================================================ FILE: nomad_jobs/media-stack/synclounge/nomad.job ================================================ job "synclounge" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/media-stack/synclounge/nomad.job" version = "4" } group "synclounge" { count = 1 network { port "http" { host_network = "tailscale" to = "8088" } port "server" { host_network = "tailscale" to = "8089" } } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "plexlounge" { driver = "docker" config { image = "starbix/synclounge" network_mode = "host" force_pull = "true" ports = ["http", "server"] } env { DOMAIN = "${NOMAD_TASK_NAME}.${var.tld}" } service { port = "http" name = "plexlounge" tags = [ "traefik.enable=true", "traefik.http.middlewares.httpsRedirect.redirectscheme.scheme=https", "traefik.http.routers.${NOMAD_TASK_NAME}.tls.domains[0].sans=${NOMAD_TASK_NAME}.${var.tld}", "traefik.http.routers.${NOMAD_TASK_NAME}.middlewares=forward-auth" ] check { type = "tcp" interval = "10s" timeout = "2s" } } service { port = "server" name = "syncserver" tags = [ "traefik.enable=true", "traefik.http.middlewares.httpsRedirect.redirectscheme.scheme=https", "traefik.http.routers.syncserver.tls.domains[0].sans=syncserver.${var.tld}", "traefik.http.routers.syncserver.middlewares=forward-auth" ] check { type = "tcp" interval = "10s" timeout = "2s" } } resources { cpu = 3500 memory = 512 } } } } variable "region" { type = string } variable "tld" { type = string } ================================================ FILE: nomad_jobs/media-stack/tautulli/nomad.job ================================================ job "tautulli" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/media-stack/tautulli/nomad.job" version = "3" } constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } group "metrics" { count = 1 network { port "http" { host_network = "tailscale" to = "8181" } } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "tautulli" { driver = "docker" config { image = "tautulli/tautulli" ports = ["http"] volumes = [ "${var.shared_dir}tautulli:/config", "[[ .dirs.plexlogs ]]:/media/TV", ] } service { port = "http" name = "tautulli" tags = ["net-internal", "net-external", "tautulli", "net.frontend.entryPoints=https"] check { type = "http" path = "/" interval = "10s" timeout = "2s" check_restart { limit = 3 grace = "60s" ignore_warnings = false } } } resources { cpu = 100 memory = 128 } } } } variable "region" { type = string } variable "tld" { type = string } variable "shared_dir" { type = string } ================================================ FILE: nomad_jobs/media-stack/tdarr/nomad.job ================================================ job "tdarr" { region = var.region datacenters = ["cheese"] type = "service" priority = 50 meta { job_file = "nomad_jobs/media-stack/tdarr/nomad.job" version = "4" // Move to cheese01 for NVENC GPU transcoding } group "tdarr" { count = 1 constraint { attribute = "${attr.unique.hostname}" value = "cheese01" } network { port "http" { host_network = "lan" static = 8265 } port "server" { host_network = "lan" static = 8266 } } volume "tdarr" { type = "csi" read_only = false source = "tdarr" access_mode = "single-node-writer" attachment_mode = "file-system" } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } update { max_parallel = 1 min_healthy_time = "30s" healthy_deadline = "9m" progress_deadline = "15m" auto_revert = true } task "tdarr" { driver = "docker" config { image = "ghcr.io/haveagitgat/tdarr:latest" network_mode = "host" privileged = true runtime = "nvidia" force_pull = true ports = ["http", "server"] volumes = [ "/tmp/tdarr:/temp", "${var.shared_dir}tdarr/configs:/app/configs", "${var.shared_dir}tdarr/logs:/app/logs", "${var.media_dir}:/media", ] } volume_mount { volume = "tdarr" destination = "/app/server" read_only = false } env { PUID = "1000" PGID = "1000" NVIDIA_VISIBLE_DEVICES = "all" serverIP = "0.0.0.0" serverPort = "8266" webUIPort = "8265" internalNode = "true" nodeName = "cheese01" } service { port = "http" name = "tdarr" tags = [ "traefik.enable=true", ] check { type = "tcp" interval = "10s" timeout = "2s" check_restart { limit = 3 grace = "120s" ignore_warnings = false } } } resources { cpu = 2000 memory = 2048 } } } } variable "region" {} variable "tld" {} variable "shared_dir" {} variable "media_dir" {} ================================================ FILE: nomad_jobs/media-stack/tdarr/volume.hcl ================================================ id = "tdarr" external_id = "tdarr" name = "tdarr" type = "csi" plugin_id = "org.democratic-csi.iscsi" capacity_min = "10GiB" capacity_max = "10GiB" capability { access_mode = "single-node-writer" attachment_mode = "block-device" } mount_options { fs_type = "ext4" mount_flags = ["noatime"] } ================================================ FILE: nomad_jobs/misc/adb/nomad.job ================================================ job "adb" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/misc/adb/nomad.job" version = "4" } constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } constraint { attribute = "${meta.zigbee}" operator = "=" value = "true" } group "downloaders" { count = 1 network { mode = "host" port "tcp" { static = "5037" host_network = "lan" } } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "adb" { driver = "docker" config { image = "docker-registry.demonsafe.com/adb" entrypoint = ["/local/start.sh"] network_mode = "host" extra_hosts = ["hassio:127.0.0.1"] args = ["&", "adb", "-a", "-P", "5037", "server", "nodaemon"] volumes = [ "${var.shared_dir}home-assistant/android:/root/.android", ] } env { log_level = "warning" } service { port = "tcp" name = "adb" tags = ["net-internal", "adb"] check { type = "tcp" interval = "10s" timeout = "2s" } } template { data = < IAM & Admin -> Service Accounts. * Create a new service account (e.g., `gcp-dns-updater-sa`). * Grant this service account the `DNS Administrator` role (`roles/dns.admin`) on the project containing your managed zone. * Create a JSON key file for this service account and download it securely. You will need the *contents* of this file, not the file itself. 3. **Nomad Environment:** A running Nomad cluster where this job can be scheduled. The Nomad clients must have Docker installed and configured. ## Configuration The service is configured via environment variables passed to the Nomad task, which are then consumed by the `update_dns.py` script running inside the Docker container: * `GCP_DNS_ZONE_NAME`: The name of the managed zone in GCP DNS (e.g., `demonsafe-com`). The script derives the Project ID from the credentials. * `GCP_DNS_RECORD_NAME`: The DNS record name to update (e.g., `*.demonsafe.com`). **Note:** The script expects the base name; the trailing dot is handled internally if needed by the SDK. * `RECORD_TTL`: (Optional) The Time-To-Live (in seconds) for the created/updated A record. Defaults to 300 if not set. * `GCP_PROJECT_ID`: The Google Cloud Project ID containing the DNS zone. * `GCP_SERVICE_ACCOUNT_KEY_B64`: **Required.** The base64-encoded *content* of the GCP service account JSON key file. **Generating the Base64 Key:** You need to encode the *content* of your downloaded JSON key file into a single-line base64 string. On Linux/macOS, you can use: ```bash base64 -w 0 < /path/to/your/gcp_key.json ``` *(Ensure you use `-w 0` or an equivalent flag for your `base64` command to prevent line wrapping)* Copy the resulting string. **Setting Environment Variables in Nomad:** These variables are defined within the `env` block of the `nomad.job` file using Go templating to read runtime environment variables provided by the Nomad agent (which in turn are often sourced from the deployment mechanism, like GitHub Actions): ```hcl # Example within nomad.job task config env { GCP_DNS_ZONE_NAME = < 50 else ''}") # Clean the base64 string - remove any whitespace/newlines key_b64 = key_b64.strip().replace('\n', '').replace('\r', '').replace(' ', '') logging.info(f"Cleaned key length: {len(key_b64)}") logging.info(f"Cleaned key content (first 50 chars): {key_b64[:50]}{'...' if len(key_b64) > 50 else ''}") # Check if this looks like a valid base64 string if len(key_b64) < 100: logging.warning(f"Service account key seems too short ({len(key_b64)} chars). Expected several thousand characters.") logging.warning(f"Full key content: '{key_b64}'") logging.error("The GCP_SERVICE_ACCOUNT_KEY_B64 environment variable appears to contain invalid or incomplete data.") sys.exit(1) # Fix base64 padding if needed missing_padding = len(key_b64) % 4 if missing_padding: padding_needed = 4 - missing_padding key_b64 += '=' * padding_needed logging.info(f"Added {padding_needed} padding characters") logging.info(f"Final key length: {len(key_b64)}") decoded_key = base64.b64decode(key_b64, validate=True) logging.info("Base64 key decoded successfully.") logging.info("Parsing service account key JSON...") key_info = json.loads(decoded_key) logging.info("Service account key JSON parsed successfully.") credentials = service_account.Credentials.from_service_account_info(key_info) client = dns.Client(project=project_id, credentials=credentials) logging.info(f"Successfully created DNS client for project {project_id}") return client except binascii.Error as e: logging.error(f"Failed to decode base64 service account key: {e}") sys.exit(1) except json.JSONDecodeError as e: logging.error(f"Failed to parse service account key JSON: {e}") sys.exit(1) except Exception as e: logging.error(f"Failed to create DNS client from service account info: {e}") sys.exit(1) def update_traefik_whitelist(ip_address: str): """Updates Traefik IP whitelist configuration.""" try: logging.info(f"Updating Traefik whitelist with IP: {ip_address}") traefik_config = { "http": { "middlewares": { "home-ip-whitelist": { "ipAllowList": { "sourceRange": [ f"{ip_address}/32", "192.168.0.0/16", "10.0.0.0/8", "172.16.0.0/12", "100.64.0.0/10" ] } } } } } config_path = "/shared/traefik-ingress/dynamic-whitelist.toml" # Write as TOML format toml_content = f"""[http.middlewares.home-ip-whitelist.ipAllowList] sourceRange = ["{ip_address}/32", "192.168.0.0/16", "10.0.0.0/8", "172.16.0.0/12", "100.64.0.0/10"] """ with open(config_path, 'w') as f: f.write(toml_content) logging.info(f"Successfully updated Traefik whitelist configuration at {config_path}") except Exception as e: logging.error(f"Failed to update Traefik whitelist: {e}") def update_dns_record(client: dns.Client, project_id: str, zone_name: str, record_name: str, ip_address: str): """Updates DNS record.""" try: # Use zone_name directly as it should already be the correct GCP zone name gcp_zone_name = zone_name logging.info(f"Targeting GCP DNS Zone: {gcp_zone_name}") zone = client.zone(gcp_zone_name, project_id) if not zone.exists(): logging.error(f"DNS zone '{gcp_zone_name}' not found in project '{project_id}'.") return fqdn = record_name if record_name.endswith('.') else f"{record_name}." logging.info(f"Checking DNS records for: {fqdn} in zone {gcp_zone_name}") record_sets = list(zone.list_resource_record_sets()) existing_a_record = None existing_cname_record = None for record_set in record_sets: if record_set.record_type == 'A' and record_set.name == fqdn: existing_a_record = record_set logging.info(f"Found existing A record: {existing_a_record.name} -> {existing_a_record.rrdatas}") elif record_set.record_type == 'CNAME' and record_set.name == fqdn: existing_cname_record = record_set logging.info(f"Found existing CNAME record: {existing_cname_record.name} -> {existing_cname_record.rrdatas}") changes = zone.changes() needs_update = False if existing_cname_record: logging.warning(f"Deleting existing CNAME record {fqdn} to replace with A record.") changes.delete_record_set(existing_cname_record) needs_update = True existing_a_record = None new_a_record = zone.resource_record_set(fqdn, "A", 300, [ip_address]) if existing_a_record: if existing_a_record.rrdatas == [ip_address]: logging.info(f"Existing A record {fqdn} already points to {ip_address}. No update needed.") return else: logging.info(f"Existing A record {fqdn} points to {existing_a_record.rrdatas}. Updating to {ip_address}.") changes.delete_record_set(existing_a_record) changes.add_record_set(new_a_record) needs_update = True elif not existing_cname_record: logging.info(f"No existing A or CNAME record found for {fqdn}. Creating new A record pointing to {ip_address}.") changes.add_record_set(new_a_record) needs_update = True elif existing_cname_record: logging.info(f"Adding A record for {fqdn} pointing to {ip_address} after CNAME deletion.") changes.add_record_set(new_a_record) if needs_update: logging.info(f"Executing DNS changes for {fqdn} in zone {gcp_zone_name}...") changes.create() while changes.status != 'done': logging.info(f"Waiting for DNS changes to complete (status: {changes.status})...") time.sleep(5) changes.reload() logging.info(f"Successfully updated DNS record {fqdn} to {ip_address} in zone {gcp_zone_name}.") else: logging.info("No DNS changes were necessary.") except GoogleAPIError as e: logging.error(f"GCP API Error updating DNS record {fqdn} in zone {gcp_zone_name}: {e}") except Exception as e: logging.error(f"An unexpected error occurred during DNS update for {fqdn} in zone {gcp_zone_name}: {e}") def update_spf_record(client: dns.Client, project_id: str, zone_name: str, record_name: str, ip_address: str): """Updates the SPF TXT record on the bare domain with the current public IP.""" try: gcp_zone_name = zone_name logging.info(f"Updating SPF record in zone: {gcp_zone_name}") zone = client.zone(gcp_zone_name, project_id) if not zone.exists(): logging.error(f"DNS zone '{gcp_zone_name}' not found in project '{project_id}'.") return # Derive bare domain from record_name (e.g., "*.demonsafe.com" -> "demonsafe.com.") domain = record_name.lstrip('*.') if record_name.startswith('*.') else record_name fqdn = domain if domain.endswith('.') else f"{domain}." logging.info(f"Checking TXT records for: {fqdn}") spf_value = f'"v=spf1 ip4:{ip_address} ~all"' record_sets = list(zone.list_resource_record_sets()) existing_txt = None for rs in record_sets: if rs.record_type == 'TXT' and rs.name == fqdn: existing_txt = rs logging.info(f"Found existing TXT record: {rs.name} -> {rs.rrdatas}") break changes = zone.changes() needs_update = False if existing_txt: new_rrdatas = [] spf_found = False for rd in existing_txt.rrdatas: if 'v=spf1' in rd: spf_found = True if ip_address in rd: logging.info(f"SPF record already contains {ip_address}. No update needed.") return logging.info(f"Replacing SPF entry: {rd} -> {spf_value}") new_rrdatas.append(spf_value) else: new_rrdatas.append(rd) if not spf_found: logging.info(f"No existing SPF entry found. Adding: {spf_value}") new_rrdatas.append(spf_value) changes.delete_record_set(existing_txt) new_txt = zone.resource_record_set(fqdn, "TXT", 300, new_rrdatas) changes.add_record_set(new_txt) needs_update = True else: logging.info(f"No TXT record found for {fqdn}. Creating with SPF: {spf_value}") new_txt = zone.resource_record_set(fqdn, "TXT", 300, [spf_value]) changes.add_record_set(new_txt) needs_update = True if needs_update: logging.info(f"Executing SPF TXT changes for {fqdn}...") changes.create() while changes.status != 'done': logging.info(f"Waiting for SPF changes to complete (status: {changes.status})...") time.sleep(5) changes.reload() logging.info(f"Successfully updated SPF record for {fqdn} with ip4:{ip_address}") except GoogleAPIError as e: logging.error(f"GCP API Error updating SPF record: {e}") except Exception as e: logging.error(f"Unexpected error updating SPF record: {e}") if __name__ == "__main__": logging.info("Starting DNS update script.") project_id, zone_name, record_name, key_b64 = get_env_vars() logging.info(f"Environment variables loaded - zone_name: '{zone_name}', record_name: '{record_name}'") public_ip = get_public_ip() if public_ip: dns_client = get_dns_client(key_b64, project_id) if dns_client: update_dns_record(dns_client, project_id, zone_name, record_name, public_ip) update_spf_record(dns_client, project_id, zone_name, record_name, public_ip) update_traefik_whitelist(public_ip) logging.info("DNS, SPF, and Traefik whitelist update script finished.") else: logging.error("Exiting due to DNS client initialization failure.") sys.exit(1) else: logging.error("Exiting due to inability to fetch public IP.") sys.exit(1) # Sleep to allow log viewing before container exits logging.info("Sleeping for 10 seconds to allow log viewing...") time.sleep(10) EOF destination = "local/update_dns.py" } resources { cpu = 100 memory = 128 } } } } variable "gcp_project_id" {} variable "dns_zone" {} variable "tld" {} variable "gcp_dns_admin" {} variable "shared_dir" {} ================================================ FILE: nomad_jobs/misc/gcp-dns-updater/requirements.txt ================================================ google-cloud-dns requests google-auth ================================================ FILE: nomad_jobs/misc/gcp-dns-updater/update_dns.py ================================================ import os import requests import logging import sys import base64 import json import time # Moved import to top # Import GCP specific libraries from google.cloud import dns from google.oauth2 import service_account from google.api_core.exceptions import GoogleAPIError # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def get_env_vars(): """Reads required environment variables and returns them.""" project_id = os.environ.get('GCP_PROJECT_ID') zone_name = os.environ.get('GCP_DNS_ZONE_NAME') # This will be the TLD like "demonsafe.com" record_name = os.environ.get('GCP_DNS_RECORD_NAME') key_b64 = os.environ.get('GCP_SERVICE_ACCOUNT_KEY_B64') # Changed variable name if not all([project_id, zone_name, record_name, key_b64]): # Check for key_b64 missing = [var for var, val in [ ('GCP_PROJECT_ID', project_id), ('GCP_DNS_ZONE_NAME', zone_name), ('GCP_DNS_RECORD_NAME', record_name), ('GCP_SERVICE_ACCOUNT_KEY_B64', key_b64) # Updated missing check ] if not val] logging.error(f"Missing required environment variables: {', '.join(missing)}") sys.exit(1) return project_id, zone_name, record_name, key_b64 # Return key_b64 def get_public_ip(): """Fetches the public IPv4 address.""" try: response = requests.get('https://v4.ifconfig.me/ip', timeout=10) response.raise_for_status() # Raise an exception for bad status codes ip_address = response.text.strip() logging.info(f"Successfully fetched public IP: {ip_address}") return ip_address except requests.exceptions.RequestException as e: logging.error(f"Error fetching public IP: {e}") sys.exit(1) # Exit if IP cannot be fetched def get_dns_client(key_b64: str, project_id: str): # Changed key_path to key_b64 and added project_id """Creates and returns a DNS client authenticated with a base64 encoded service account key.""" try: # Decode the base64 string logging.info("Decoding base64 service account key...") decoded_key = base64.b64decode(key_b64) logging.info("Base64 key decoded successfully.") # Parse the decoded JSON key logging.info("Parsing service account key JSON...") key_info = json.loads(decoded_key) logging.info("Service account key JSON parsed successfully.") # Create credentials from the parsed key info credentials = service_account.Credentials.from_service_account_info(key_info) # Use the provided project_id, not the one from credentials, to ensure consistency client = dns.Client(project=project_id, credentials=credentials) logging.info(f"Successfully created DNS client for project {project_id}") return client except base64.binascii.Error as e: logging.error(f"Failed to decode base64 service account key: {e}") sys.exit(1) except json.JSONDecodeError as e: logging.error(f"Failed to parse service account key JSON: {e}") sys.exit(1) except Exception as e: logging.error(f"Failed to create DNS client from service account info: {e}") sys.exit(1) def update_dns_record(client: dns.Client, project_id: str, zone_name: str, record_name: str, ip_address: str): """ Checks and updates/creates an A record for the given name in the specified zone, replacing a CNAME if necessary. Args: client: Authenticated DNS client. project_id: GCP project ID. zone_name: The domain TLD (e.g., "demonsafe.com"). This will be converted to the GCP zone name format (e.g., "demonsafe-com"). record_name: The specific record to update (e.g., "*.demonsafe.com"). ip_address: The public IP address to set. """ try: # Convert the TLD zone name (e.g., "demonsafe.com") to GCP zone name format (e.g., "demonsafe-com") gcp_zone_name = zone_name.replace('.', '-') logging.info(f"Targeting GCP DNS Zone: {gcp_zone_name}") zone = client.zone(gcp_zone_name, project_id) if not zone.exists(): logging.error(f"DNS zone '{gcp_zone_name}' not found in project '{project_id}'.") return # Cannot proceed without the zone # Ensure record_name ends with a dot for FQDN matching fqdn = record_name if record_name.endswith('.') else f"{record_name}." logging.info(f"Checking DNS records for: {fqdn} in zone {gcp_zone_name}") record_sets = list(zone.list_resource_record_sets(filter_=f"name={fqdn}")) existing_a_record = None existing_cname_record = None for record_set in record_sets: if record_set.record_type == 'A' and record_set.name == fqdn: existing_a_record = record_set logging.info(f"Found existing A record: {existing_a_record.name} -> {existing_a_record.rrdatas}") elif record_set.record_type == 'CNAME' and record_set.name == fqdn: existing_cname_record = record_set logging.info(f"Found existing CNAME record: {existing_cname_record.name} -> {existing_cname_record.rrdatas}") changes = zone.changes() needs_update = False # Handle existing CNAME (delete it to replace with A) if existing_cname_record: logging.warning(f"Deleting existing CNAME record {fqdn} to replace with A record.") changes.delete_record_set(existing_cname_record) needs_update = True # Ensure we don't try to delete an A record if we just deleted a CNAME existing_a_record = None # Define the new A record we want new_a_record = zone.resource_record_set(fqdn, "A", 300, [ip_address]) # Handle existing A record if existing_a_record: if existing_a_record.rrdatas == [ip_address]: logging.info(f"Existing A record {fqdn} already points to {ip_address}. No update needed.") return # Nothing to do else: logging.info(f"Existing A record {fqdn} points to {existing_a_record.rrdatas}. Updating to {ip_address}.") changes.delete_record_set(existing_a_record) changes.add_record_set(new_a_record) needs_update = True # Handle case where no A record (and no CNAME was found/deleted) elif not existing_cname_record: # Only add if we didn't already decide to replace CNAME logging.info(f"No existing A or CNAME record found for {fqdn}. Creating new A record pointing to {ip_address}.") changes.add_record_set(new_a_record) needs_update = True # Handle case where CNAME was found and deleted - we still need to add the A record elif existing_cname_record: logging.info(f"Adding A record for {fqdn} pointing to {ip_address} after CNAME deletion.") changes.add_record_set(new_a_record) # needs_update should already be True # Execute the changes if any were queued if needs_update: logging.info(f"Executing DNS changes for {fqdn} in zone {gcp_zone_name}...") changes.create() # Wait until the changes are finished. while changes.status != 'done': logging.info(f"Waiting for DNS changes to complete (status: {changes.status})...") time.sleep(5) # Wait 5 seconds before checking again changes.reload() logging.info(f"Successfully updated DNS record {fqdn} to {ip_address} in zone {gcp_zone_name}.") else: # This case should only be hit if an A record existed and was correct logging.info("No DNS changes were necessary.") except GoogleAPIError as e: logging.error(f"GCP API Error updating DNS record {fqdn} in zone {gcp_zone_name}: {e}") except Exception as e: logging.error(f"An unexpected error occurred during DNS update for {fqdn} in zone {gcp_zone_name}: {e}") def update_spf_record(client: dns.Client, project_id: str, zone_name: str, record_name: str, ip_address: str): """Updates the SPF TXT record on the bare domain with the current public IP.""" try: gcp_zone_name = zone_name.replace('.', '-') logging.info(f"Updating SPF record in zone: {gcp_zone_name}") zone = client.zone(gcp_zone_name, project_id) if not zone.exists(): logging.error(f"DNS zone '{gcp_zone_name}' not found in project '{project_id}'.") return # Derive bare domain from record_name (e.g., "*.demonsafe.com" -> "demonsafe.com.") domain = record_name.lstrip('*.') if record_name.startswith('*.') else record_name fqdn = domain if domain.endswith('.') else f"{domain}." logging.info(f"Checking TXT records for: {fqdn}") spf_value = f'"v=spf1 ip4:{ip_address} ~all"' record_sets = list(zone.list_resource_record_sets(filter_=f"name={fqdn}")) existing_txt = None for rs in record_sets: if rs.record_type == 'TXT' and rs.name == fqdn: existing_txt = rs logging.info(f"Found existing TXT record: {rs.name} -> {rs.rrdatas}") break changes = zone.changes() needs_update = False if existing_txt: new_rrdatas = [] spf_found = False for rd in existing_txt.rrdatas: if 'v=spf1' in rd: spf_found = True if ip_address in rd: logging.info(f"SPF record already contains {ip_address}. No update needed.") return logging.info(f"Replacing SPF entry: {rd} -> {spf_value}") new_rrdatas.append(spf_value) else: new_rrdatas.append(rd) if not spf_found: logging.info(f"No existing SPF entry found. Adding: {spf_value}") new_rrdatas.append(spf_value) changes.delete_record_set(existing_txt) new_txt = zone.resource_record_set(fqdn, "TXT", 300, new_rrdatas) changes.add_record_set(new_txt) needs_update = True else: logging.info(f"No TXT record found for {fqdn}. Creating with SPF: {spf_value}") new_txt = zone.resource_record_set(fqdn, "TXT", 300, [spf_value]) changes.add_record_set(new_txt) needs_update = True if needs_update: logging.info(f"Executing SPF TXT changes for {fqdn}...") changes.create() while changes.status != 'done': logging.info(f"Waiting for SPF changes to complete (status: {changes.status})...") time.sleep(5) changes.reload() logging.info(f"Successfully updated SPF record for {fqdn} with ip4:{ip_address}") except GoogleAPIError as e: logging.error(f"GCP API Error updating SPF record: {e}") except Exception as e: logging.error(f"Unexpected error updating SPF record: {e}") if __name__ == "__main__": logging.info("Starting DNS update script.") project_id, zone_name, record_name, key_b64 = get_env_vars() public_ip = get_public_ip() if public_ip: dns_client = get_dns_client(key_b64, project_id) if dns_client: update_dns_record(dns_client, project_id, zone_name, record_name, public_ip) update_spf_record(dns_client, project_id, zone_name, record_name, public_ip) logging.info("DNS update script finished.") else: # Error handled in get_dns_client, it exits logging.error("Exiting due to DNS client initialization failure.") sys.exit(1) # Explicit exit for clarity else: # Error handled in get_public_ip, it exits logging.error("Exiting due to inability to fetch public IP.") sys.exit(1) # Explicit exit for clarity ================================================ FILE: nomad_jobs/misc/gitea/nomad.job ================================================ job "gitea" { meta { job_file = "nomad_jobs/misc/gitea/nomad.job" } region = var.region datacenters = ["dc1"] type = "service" group "svc" { count = 1 volume "gitea-data" { type = "host" source = "gitea-data" read_only = false } volume "gitea-db" { type = "host" source = "gitea-db" read_only = false } restart { attempts = 5 delay = "30s" } task "app" { driver = "docker" volume_mount { volume = "gitea-data" destination = "/data" read_only = false } config { image = "gitea/gitea" port_map { http = 3000 ssh_pass = 22 } } env = { "APP_NAME" = "Gitea: Git with a cup of tea" "RUN_MODE" = "prod" "SSH_DOMAIN" = "git.${var.tld}" "SSH_PORT" = "22" "ROOT_URL" = "http://git.${var.tld}/" "USER_UID" = "1002" "USER_GID" = "1002" "DB_TYPE" = "postgres" "DB_NAME" = "gitea" "DB_USER" = "gitea" "DB_PASSWD" = "gitea" "SHOW_REGISTRATION_BUTTON" = "false" } template { data = <' ; before setting it here, to get a feel for which cipher suites you will get. ; ; After setting this option, it is recommend that you inspect your Murmur log ; to ensure that Murmur is using the cipher suites that you expected it to. ; ; Note: Changing this option may impact the backwards compatibility of your ; Murmur server, and can remove the ability for older Mumble clients to be able ; to connect to it. ;sslCiphers=EECDH+AESGCM:EDH+aRSA+AESGCM:DHE-RSA-AES256-SHA:DHE-RSA-AES128-SHA:AES256-SHA:AES128-SHA ; If Murmur is started as root, which user should it switch to? ; This option is ignored if Murmur isn't started with root privileges. ;uname= ; By default, in log files and in the user status window for privileged users, ; Mumble will show IP addresses - in some situations you may find this unwanted ; behavior. If obfuscate is set to true, Murmur will randomize the IP addresses ; of connecting users. ; ; The obfuscate function only affects the log file and DOES NOT effect the user ; information section in the client window. ;obfuscate=false ; If this options is enabled, only clients which have a certificate are allowed ; to connect. ;certrequired=False ; If enabled, clients are sent information about the servers version and operating ; system. ;sendversion=True ; You can set a recommended minimum version for your server, and clients will ; be notified in their log when they connect if their client does not meet the ; minimum requirements. suggestVersion expects the version in the format X.X.X. ; ; Note that the suggest* options appeared after 1.2.3 and will have no effect ; on client versions 1.2.3 and earlier. ; ;suggestVersion= ; Setting this to "true" will alert any user who does not have positional audio ; enabled that the server administrators recommend enabling it. Setting it to ; "false" will have the opposite effect - if you do not care whether the user ; enables positional audio or not, set it to blank. The message will appear in ; the log window upon connection, but only if the user's settings do not match ; what the server requests. ; ; Note that the suggest* options appeared after 1.2.3 and will have no effect ; on client versions 1.2.3 and earlier. ; ;suggestPositional= ; Setting this to "true" will alert any user who does not have Push-To-Talk ; enabled that the server administrators recommend enabling it. Setting it to ; "false" will have the opposite effect - if you do not care whether the user ; enables PTT or not, set it to blank. The message will appear in the log ; window upon connection, but only if the user's settings do not match what the ; server requests. ; ; Note that the suggest* options appeared after 1.2.3 and will have no effect ; on client versions 1.2.3 and earlier. ; ;suggestPushToTalk= ; This sets password hash storage to legacy mode (1.2.4 and before) ; (Note that setting this to true is insecure and should not be used unless absolutely necessary) ;legacyPasswordHash=false ; By default a strong amount of PBKDF2 iterations are chosen automatically. If >0 this setting ; overrides the automatic benchmark and forces a specific number of iterations. ; (Note that you should only change this value if you know what you are doing) ;kdfIterations=-1 ; In order to prevent misconfigured, impolite or malicious clients from ; affecting the low-latency of other users, Murmur has a rudimentary global-ban ; system. It's configured using the autobanAttempts, autobanTimeframe and ; autobanTime settings. ; ; If a client attempts autobanAttempts connections in autobanTimeframe seconds, ; they will be banned for autobanTime seconds. This is a global ban, from all ; virtual servers on the Murmur process. It will not show up in any of the ; ban-lists on the server, and they can't be removed without restarting the ; Murmur process - just let them expire. A single, properly functioning client ; should not trip these bans. ; ; To disable, set autobanAttempts or autobanTimeframe to 0. Commenting these ; settings out will cause Murmur to use the defaults: ; ; To avoid autobanning successful connection attempts from the same IP address, ; set autobanSuccessfulConnections=False. ; ;autobanAttempts=10 ;autobanTimeframe=120 ;autobanTime=300 ;autobanSuccessfulConnections=True ; Enables logging of group changes. This means that every time a group in a ; channel changes, the server will log all groups and their members from before ; the change and after the change. Deault is false. This option was introduced ; with Murmur 1.4.0. ; ;loggroupchanges=false ; Enables logging of ACL changes. This means that every time the ACL in a ; channel changes, the server will log all ACLs from before the change and ; after the change. Default is false. This option was introduced with Murmur ; 1.4.0. ; ;logaclchanges=false ; You can configure any of the configuration options for Ice here. We recommend ; leave the defaults as they are. ; Please note that this section has to be last in the configuration file. ; [Ice] Ice.Warn.UnknownProperties=1 Ice.MessageSizeMax=65536 EOH destination = "local/murmur-config" env = false } resources { cpu = 100 memory = 128 network { port "0" {} } } } } } variable "region" { type = string } variable "tld" { type = string } variable "shared_dir" { type = string } variable "auth" { type = string } ================================================ FILE: nomad_jobs/misc/octoprint/nomad.job ================================================ job "octoprint" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/misc/octoprint/nomad.job" version = "6" } constraint { attribute = "${meta.3d_printer}" operator = "=" value = "true" } constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } group "3dprinter" { count = 1 network { port "web" { host_network = "tailscale" to = "5000" } } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "octoprint" { driver = "docker" config { image = "octoprint/octoprint" force_pull = true #network_mode = "host" privileged = true ports = ["web"] volumes = [ "${var.shared_dir}octoprint:/home/octoprint/.octoprint", "/dev/ttyUSB0:/dev/ttyUSB0", ] } service { port = "web" name = "octoprint" tags = [ "traefik.enable=true", "traefik.http.middlewares.cors.headers.accesscontrolallowmethods=GET,OPTIONS,PUT", "traefik.http.middlewares.cors.headers.accesscontrolalloworigin=origin-list-or-null", "traefik.http.middlewares.cors.headers.accesscontrolmaxage=100", "traefik.http.middlewares.cors.headers.addvaryheader=true", "traefik.http.middlewares.malpotAuth.basicauth.users=${var.auth}", "traefik.http.routers.${NOMAD_TASK_NAME}.middlewares=forward-auth" ] check { type = "http" path = "/" interval = "10s" timeout = "2s" check_restart { limit = 3 grace = "60s" ignore_warnings = false } } } env { TZ = "Europe/Amsterdam" } resources { cpu = 100 memory = 1024 } } } } variable "region" { type = string } variable "tld" { type = string } variable "shared_dir" { type = string } ================================================ FILE: nomad_jobs/misc/uploader/nomad.job ================================================ job "uploader" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/misc/uploader/nomad.job" version = "5" } group "webserver" { count = 1 restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "uploader" { driver = "docker" service { name = "uploader" tags = [ "traefik.enable=true", "traefik.http.middlewares.httpsRedirect.redirectscheme.scheme=https", "traefik.http.routers.${NOMAD_TASK_NAME}.tls.domains[0].sans=${NOMAD_TASK_NAME}.${var.tld}", "traefik.http.routers.${NOMAD_TASK_NAME}.middlewares=forward-auth" ] port = "http" check { type = "tcp" interval = "10s" timeout = "2s" } } config { image = "docker-registry.${var.tld}/uploader:latest" network_mode = "host" volumes = [ "${var.shared_dir}uploader:/data", ] } template { data = <5 minutes. - alert: InstanceDown expr: up{job!="hass"} == 0 for: 5m labels: severity: page annotations: summary: "Instance {{ $labels.instance }} down" description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes." - alert: HomeAssistantDown expr: up{job="hass"} == 0 for: 10m labels: severity: warning annotations: summary: "Home Assistant is down" description: "Home Assistant at {{ $labels.instance }} has been down for more than 10 minutes." # Alert for any device that is over 80% capacity - alert: DiskUsage expr: avg(nomad_client_host_disk_used_percent) by (host, device) > 80 for: 5m labels: severity: page annotations: summary: "Host {{ $labels.host }} disk {{ $labels.device }} usage alert" description: "{{ $labels.host }} is using over 80% of its device: {{ $labels.device }}" - name: nomad_allocation_alerts rules: - alert: NomadJobFailureRate expr: rate(nomad_nomad_job_summary_failed[5m]) > 0 for: 2m labels: severity: critical alertname: "NomadJobFailureRate" annotations: summary: "Nomad job {{ $labels.exported_job }} is experiencing failures" description: "Job {{ $labels.exported_job }} is failing allocations at a rate of {{ $value | printf \"%.2f\" }} per second" service: "nomad" - alert: NomadJobLostRate expr: rate(nomad_nomad_job_summary_lost[5m]) > 0 for: 2m labels: severity: warning alertname: "NomadJobLostRate" annotations: summary: "Nomad job {{ $labels.exported_job }} is losing allocations" description: "Job {{ $labels.exported_job }} is losing allocations at a rate of {{ $value | printf \"%.2f\" }} per second" service: "nomad" - alert: NomadJobQueued expr: nomad_nomad_job_summary_queued > 0 for: 5m labels: severity: warning alertname: "NomadJobQueued" annotations: summary: "Nomad job {{ $labels.exported_job }} has queued allocations" description: "Job {{ $labels.exported_job }} has {{ $value }} allocations queued for over 5 minutes" service: "nomad" - alert: NomadAllocationsRestarting expr: rate(nomad_client_allocs_restart[5m]) > 0.1 for: 2m labels: severity: warning alertname: "NomadAllocationsRestarting" annotations: summary: "High allocation restart rate on {{ $labels.host }}" description: "Allocation restart rate is {{ $value }} per second on {{ $labels.host }}" service: "nomad" - alert: NomadAllocationsOOMKilled expr: nomad_client_allocs_oom_killed > 0 for: 0s labels: severity: critical alertname: "NomadAllocationsOOMKilled" annotations: summary: "Allocation killed due to OOM on {{ $labels.host }}" description: "{{ $value }} allocations were killed due to out-of-memory on {{ $labels.host }}" service: "nomad" EOH } config { image = "prom/prometheus:v3.11.2" network_mode = "host" args = ["--storage.tsdb.path", "/opt/prometheus", "--web.listen-address", "0.0.0.0:9090", "--storage.tsdb.retention.time", "90d"] force_pull = true ports = ["http"] dns_servers = ["192.168.50.2"] volumes = [ "local/alerts.yml:/prometheus/alerts.yml", "local/prometheus.yml:/prometheus/prometheus.yml", ] } resources { cpu = 1000 memory = 512 } } } } variable "region" {} variable "tld" {} variable "shared_dir" {} variable "hass_key" {} variable "hass_ip" {} ================================================ FILE: nomad_jobs/observability/prometheus/volume.hcl ================================================ id = "prometheus" external_id = "prometheus" name = "prometheus" type = "csi" plugin_id = "org.democratic-csi.iscsi" capacity_min = "50GiB" capacity_max = "50GiB" capability { access_mode = "multi-node-single-writer" attachment_mode = "file-system" } mount_options { fs_type = "ext4" mount_flags = ["noatime"] } ================================================ FILE: nomad_jobs/observability/telegraf/nomad.job ================================================ job "telegraf" { region = var.region datacenters = ["dc1", "public", "system"] type = "system" priority = 100 meta { job_file = "nomad_jobs/observability/telegraf/nomad.job" version = "4" } group "telegraf-exporter" { network { port "http" { host_network = "tailscale" to = "9273" } } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } update { min_healthy_time = "30s" auto_revert = true } task "telegraf" { driver = "docker" service { name = "telegraf" port = "http" tags = ["metrics"] check { type = "tcp" interval = "5s" timeout = "2s" } } config { image = "telegraf:1.38.2" privileged = "true" ports = ["http"] args = [ "--config=/local/config.yaml", ] } template { data = < /dev/null || pgrep -x suricata > /dev/null"] interval = "30s" timeout = "5s" } } } } } variable "region" { type = string } variable "shared_dir" { type = string } ================================================ FILE: nomad_jobs/security/suricata-update/nomad.job ================================================ job "suricata-update" { region = var.region datacenters = ["dc1"] type = "batch" priority = 80 meta { job_file = "nomad_jobs/security/suricata-update/nomad.job" version = "3" // Single instance with shared NFS storage } # Run daily at 4am periodic { crons = ["0 4 * * *"] prohibit_overlap = true } constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } group "update" { count = 1 restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } task "suricata-update" { driver = "docker" config { image = "jasonish/suricata:8.0" command = "suricata-update" volumes = [ "${var.shared_dir}suricata/rules:/var/lib/suricata", ] } resources { cpu = 500 memory = 1024 } } } } variable "region" { type = string } variable "shared_dir" { type = string } ================================================ FILE: nomad_jobs/security/wazuh-agent/nomad.job ================================================ job "wazuh-agent" { region = var.region datacenters = ["dc1"] type = "system" priority = 100 meta { job_file = "nomad_jobs/security/wazuh-agent/nomad.job" version = "6" // Fix client.keys file permissions for persistence } group "agent" { network { mode = "host" } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } update { min_healthy_time = "30s" auto_revert = true } # Ensure agent data directory exists on host task "prep-agent-dir" { driver = "docker" config { image = "busybox:latest" command = "sh" args = ["-c", "mkdir -p /host/var/lib/wazuh-agent; test -f /host/var/lib/wazuh-agent/client.keys || touch /host/var/lib/wazuh-agent/client.keys; chmod 666 /host/var/lib/wazuh-agent/client.keys"] volumes = [ "/var/lib:/host/var/lib", ] } resources { cpu = 100 memory = 32 } lifecycle { hook = "prestart" sidecar = false } } task "wazuh-agent" { driver = "docker" config { image = "wazuh/wazuh-agent:4.14.4" network_mode = "host" force_pull = true privileged = true # Mount host directories for monitoring and config volumes = [ "/var/log:/host/var/log:ro", "/var/run/docker.sock:/var/run/docker.sock:ro", "/:/host:ro", "/var/lib/wazuh-agent/client.keys:/var/ossec/etc/client.keys", "local/ossec.conf:/var/ossec/etc/ossec.conf", ] } # Configuration template for the agent # Uses Consul service discovery to automatically find Wazuh manager template { data = < {{- if service "wazuh-agent-comm" -}} {{- with index (service "wazuh-agent-comm") 0 }}
{{ .Address }}
{{ .Port }} {{- end -}} {{- else }}
127.0.0.1
1514 {{- end }} tcp
ubuntu, ubuntu20, ubuntu20.04 10 60 yes
no 5000 500 syslog /host/var/log/syslog syslog /host/var/log/auth.log syslog /host/var/log/kern.log syslog /host/var/log/dpkg.log json /host/var/log/nomad/*.log json /host/var/log/consul/*.log syslog /host/var/log/docker.log audit /host/var/log/audit/audit.log syslog /host/var/log/secure journald journald json /host/var/log/suricata/eve.json syslog /host/var/log/suricata/suricata.log no 21600 yes yes /host/etc /host/usr/bin /host/usr/sbin /host/bin /host/sbin /host/etc/nomad.d /host/etc/consul.d /host/root/.ssh /host/home/*/.ssh /host/etc/mtab /host/etc/hosts.deny /host/etc/mail/statistics /host/etc/random-seed /host/etc/adjtime /host/etc/httpd/logs /host/etc/resolv.conf .log$|.tmp$|.swp$ no yes yes yes yes yes yes yes 43200 /var/ossec/etc/shared/rootkit_files.txt /var/ossec/etc/shared/rootkit_trojans.txt no 1h yes yes yes yes yes yes yes yes yes 12h yes no 10m 5 yes yes EOH destination = "local/ossec.conf" change_mode = "restart" } resources { cpu = 300 memory = 512 } service { name = "wazuh-agent" tags = ["security", "monitoring"] # Use a simple script check that runs inside the container check { type = "script" name = "agent-status" command = "/var/ossec/bin/wazuh-control" args = ["status"] interval = "30s" timeout = "10s" } } } } } variable "region" { type = string } ================================================ FILE: nomad_jobs/security/wazuh-server/nomad.job ================================================ job "wazuh-server" { region = var.region datacenters = ["dc1"] type = "service" constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } group "wazuh-stack" { count = 1 network { port "indexer" { host_network = "lan" to = 9200 } port "manager" { static = 1514 host_network = "lan" to = 1514 } port "manager_reg" { static = 1515 host_network = "lan" to = 1515 } port "manager_api" { host_network = "lan" to = 55000 } port "dashboard" { host_network = "lan" to = 443 } } # Persistent volumes for Wazuh components volume "wazuh-indexer" { type = "csi" read_only = false source = "wazuh-indexer" access_mode = "single-node-writer" attachment_mode = "file-system" } volume "wazuh-manager" { type = "csi" read_only = false source = "wazuh-manager" access_mode = "single-node-writer" attachment_mode = "file-system" } volume "wazuh-dashboard" { type = "csi" read_only = false source = "wazuh-dashboard" access_mode = "single-node-writer" attachment_mode = "file-system" } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } # Prep disk task for indexer volume permissions task "prep-indexer-disk" { driver = "docker" volume_mount { volume = "wazuh-indexer" destination = "/volume/" read_only = false } config { image = "busybox:latest" command = "sh" args = ["-c", "chown -R 1000:1000 /volume/"] } resources { cpu = 200 memory = 128 } lifecycle { hook = "prestart" sidecar = false } } # Prep disk task for manager volume permissions task "prep-manager-disk" { driver = "docker" volume_mount { volume = "wazuh-manager" destination = "/volume/" read_only = false } config { image = "busybox:latest" command = "sh" args = ["-c", "chown -R 999:999 /volume/"] } resources { cpu = 200 memory = 128 } lifecycle { hook = "prestart" sidecar = false } } # Prep disk task for dashboard volume permissions task "prep-dashboard-disk" { driver = "docker" volume_mount { volume = "wazuh-dashboard" destination = "/volume/" read_only = false } config { image = "busybox:latest" command = "sh" args = ["-c", "rm -rf /volume/wazuh && mkdir -p /volume/wazuh/config && chown -R 1000:1000 /volume/"] } resources { cpu = 200 memory = 128 } lifecycle { hook = "prestart" sidecar = false } } # Wazuh Indexer (OpenSearch-based) task "wazuh-indexer" { driver = "docker" volume_mount { volume = "wazuh-indexer" destination = "/var/lib/wazuh-indexer" read_only = false } config { image = "wazuh/wazuh-indexer:4.14.4" force_pull = true ports = ["indexer"] volumes = [ "local/opensearch.yml:/usr/share/wazuh-indexer/config/opensearch.yml", ] ulimit { nofile = "65536:65536" memlock = "-1:-1" } } env { OPENSEARCH_JAVA_OPTS = "-Xms1g -Xmx1g" } template { data = < 86601 Suppressed: Suricata STREAM ESTABLISHED invalid ack EOH destination = "local/local_rules.xml" perms = "0644" } # Wazuh ossec.conf with log_alert_level=8 (only high/critical alerts) # Wazuh levels: 0=ignored, 1-4=low, 5-7=medium, 8-10=high, 11-15=critical template { data = < yes yes no no no smtp.example.wazuh.com wazuh@example.wazuh.com recipient@example.wazuh.com 12 alerts.log 15m 0 yes 8 12 plain secure 1514 tcp 131072 no yes yes yes yes yes yes yes 43200 etc/rootcheck/rootkit_files.txt etc/rootcheck/rootkit_trojans.txt yes /var/lib/containerd /var/lib/docker/overlay2 yes 1800 1d yes wodles/java wodles/ciscat yes yes /var/log/osquery/osqueryd.results.log /etc/osquery/osquery.conf yes no 1h yes yes yes yes yes yes yes yes yes yes yes 10 yes yes 12h yes yes yes 60m yes https://{{ env "NOMAD_IP_indexer" }}:{{ env "NOMAD_HOST_PORT_indexer" }} /etc/filebeat/certs/root-ca.pem /etc/filebeat/certs/filebeat.pem /etc/filebeat/certs/filebeat-key.pem no 43200 yes yes no /etc,/usr/bin,/usr/sbin /bin,/sbin,/boot /etc/mtab /etc/hosts.deny /etc/mail/statistics /etc/random-seed /etc/random.seed /etc/adjtime /etc/httpd/logs /etc/utmpx /etc/wtmpx /etc/cups/certs /etc/dumpdates /etc/svc/volatile .log$|.swp$ /etc/ssl/private.key yes yes yes yes 10 50 yes 5m 10 127.0.0.1 ^localhost.localdomain$ 168.63.129.16 disable-account disable-account yes restart-wazuh restart-wazuh firewall-drop firewall-drop yes host-deny host-deny yes route-null route-null yes win_route-null route-null.exe yes netsh netsh.exe yes command df -P 360 full_command netstat -tulpn | sed 's/\([[:alnum:]]\+\)\ \+[[:digit:]]\+\ \+[[:digit:]]\+\ \+\(.*\):\([[:digit:]]*\)\ \+\([0-9\.\:\*]\+\).\+\ \([[:digit:]]*\/[[:alnum:]\-]*\).*/\1 \2 == \3 == \4 \5/' | sort -k 4 -g | sed 's/ == \(.*\) ==/:\1/' | sed 1,2d netstat listening ports 360 full_command last -n 20 360 ruleset/decoders ruleset/rules 0215-policy_rules.xml etc/lists/audit-keys etc/lists/amazon/aws-eventnames etc/lists/security-eventchannel etc/lists/malicious-ioc/malware-hashes etc/lists/malicious-ioc/malicious-ip etc/lists/malicious-ioc/malicious-domains etc/decoders etc/rules yes 1 64 15m no 1515 no yes no HIGH:!ADH:!EXP:!MD5:!RC4:!3DES:!CAMELLIA:@STRENGTH no etc/sslmanager.cert etc/sslmanager.key no wazuh node01 master 1516 0.0.0.0 NODE_IP no yes syslog /var/ossec/logs/active-responses.log EOH destination = "local/ossec.conf" perms = "0644" } resources { cpu = 1000 memory = 1024 } service { name = "wazuh-manager" port = "manager_api" tags = ["metrics"] meta { api_port = "${NOMAD_HOST_PORT_manager_api}" } check { type = "tcp" port = "manager_api" interval = "10s" timeout = "2s" } } service { name = "wazuh-agent-comm" port = "manager" tags = ["agent-communication"] check { type = "tcp" port = "manager" interval = "10s" timeout = "2s" } } service { name = "wazuh-agent-reg" port = "manager_reg" tags = ["agent-registration"] check { type = "tcp" port = "manager_reg" interval = "10s" timeout = "2s" } } } # Wazuh Dashboard (Web UI) task "wazuh-dashboard" { driver = "docker" volume_mount { volume = "wazuh-dashboard" destination = "/usr/share/wazuh-dashboard/data" read_only = false } config { image = "wazuh/wazuh-dashboard:4.14.4" force_pull = true ports = ["dashboard"] volumes = [ "local/opensearch_dashboards.yml:/usr/share/wazuh-dashboard/config/opensearch_dashboards.yml", "local/wazuh.yml:/usr/share/wazuh-dashboard/data/wazuh/config/wazuh.yml:ro", ] } template { data = < 128MB } group "db" { network { mode = "host" port "pgvector" { static = "5432" host_network = "lan" } } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "pgvector" { driver = "docker" config { image = "pgvector/pgvector:pg16" volumes = [ "${var.shared_dir}pgvector-data:/var/lib/postgresql/data", ] ports = ["pgvector"] } env { POSTGRES_DB = "vectordb" POSTGRES_USER = "postgres" POSTGRES_PASSWORD = "${var.postgres_pass}" PGDATA = "/var/lib/postgresql/data" } service { name = "${NOMAD_JOB_NAME}" tags = ["pgvector", "database", "vector-database"] port = "pgvector" check { type = "tcp" port = "pgvector" interval = "30s" timeout = "2s" } } resources { cpu = "100" memory = "128" } } } } variable "region" {} variable "shared_dir" {} variable "pgvector_admin_password" {} ================================================ FILE: nomad_jobs/storage-backends/pgvector/pgvector-setup.job ================================================ job "pgvector-setup" { type = "batch" datacenters = ["dc1"] meta { job_file = "nomad_jobs/storage-backends/pgvector/pgvector-setup.job" version = "1" } group "setup" { restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } task "initialize-pgvector" { driver = "docker" config { image = "pgvector/pgvector:pg16" command = "sh" args = [ "-c", "PGPASSWORD=$PGVECTOR_PASSWORD psql -h 192.168.50.120 -p 5432 -U postgres -d cognee_db -c \"CREATE EXTENSION IF NOT EXISTS vector;\" && PGPASSWORD=$PGVECTOR_PASSWORD psql -h 192.168.50.120 -p 5432 -U postgres -c \"DO \\$\\$ BEGIN CREATE DATABASE embeddings; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'embeddings database exists'; END \\$\\$;\" && PGPASSWORD=$PGVECTOR_PASSWORD psql -h 192.168.50.120 -p 5432 -U postgres -d embeddings -c \"CREATE EXTENSION IF NOT EXISTS vector;\"" ] } env { PGVECTOR_PASSWORD = "${var.pgvector_pass}" } resources { cpu = 200 memory = 256 } } } } variable "pgvector_pass" { type = string description = "Admin password for the pgvector PostgreSQL server" } ================================================ FILE: nomad_jobs/storage-backends/postgres/nomad.job ================================================ job "postgres" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/storage-backends/postgres/nomad.job" version = "5" // Fixed postgres password variable } group "db" { network { mode = "host" port "postgres" { static = "5432" host_network = "lan" } } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "postgres" { driver = "docker" config { image = "postgres:15.17" volumes = [ "${var.shared_dir}paperless-postgres:/appdata/postgres", ] ports = ["postgres"] } env { POSTGRES_DB = "paperless" POSTGRES_USER = "postgres" POSTGRES_PASSWORD = "${var.postgres_pass}" PGDATA = "/appdata/postgres" } service { name = "${NOMAD_JOB_NAME}" tags = ["postgres"] port = "postgres" check { type = "tcp" port = "postgres" interval = "30s" timeout = "2s" } } resources { cpu = "200" memory = "512" } } } } variable "region" { type = string } variable "shared_dir" { type = string } variable "postgres_pass" { type = string description = "Admin password for PostgreSQL" } ================================================ FILE: nomad_jobs/storage-backends/postgres/postgres-setup.job ================================================ job "postgres-setup" { type = "batch" datacenters = ["dc1"] meta { job_file = "nomad_jobs/storage-backends/postgres/postgres-setup.job" version = "2" } group "setup" { restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } task "create-dbs" { driver = "docker" config { image = "postgres:15" command = "sh" args = [ "-c", "PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres.service.consul -U postgres -c \"DO \\$\\$ BEGIN CREATE DATABASE sonarr_main; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'sonarr_main exists'; END \\$\\$;\" -c \"DO \\$\\$ BEGIN CREATE DATABASE sonarr_logs; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'sonarr_logs exists'; END \\$\\$;\" -c \"DO \\$\\$ BEGIN CREATE DATABASE radarr_main; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'radarr_main exists'; END \\$\\$;\" -c \"DO \\$\\$ BEGIN CREATE DATABASE radarr_logs; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'radarr_logs exists'; END \\$\\$;\" -c \"DO \\$\\$ BEGIN CREATE DATABASE lidarr_main; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'lidarr_main exists'; END \\$\\$;\" -c \"DO \\$\\$ BEGIN CREATE DATABASE lidarr_logs; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'lidarr_logs exists'; END \\$\\$;\" -c \"DO \\$\\$ BEGIN CREATE DATABASE litellm; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'litellm exists'; END \\$\\$;\" -c \"DO \\$\\$ BEGIN CREATE DATABASE nextcloud; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'nextcloud exists'; END \\$\\$;\" -c \"DO \\$\\$ BEGIN CREATE DATABASE paperless; EXCEPTION WHEN duplicate_database THEN RAISE NOTICE 'paperless exists'; END \\$\\$;\" " ] } env { POSTGRES_PASSWORD = "${var.postgres_pass}" } resources { cpu = 200 memory = 256 } } } } variable "postgres_pass" { type = string description = "Admin password for the PostgreSQL server" } ================================================ FILE: nomad_jobs/storage-backends/qdrant/nomad.job ================================================ job "qdrant" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/storage-backends/qdrant/nomad.job" version = "3" } group "qdrant" { count = 1 network { mode = "host" port "http" { static = 6333 to = 6333 host_network = "lan" } port "grpc" { static = 6334 to = 6334 host_network = "lan" } } volume "qdrant-data" { type = "csi" read_only = false source = "qdrant-data" access_mode = "single-node-writer" attachment_mode = "file-system" } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "qdrant" { driver = "docker" config { image = "qdrant/qdrant:v1.17" ports = ["http", "grpc"] } volume_mount { volume = "qdrant-data" destination = "/qdrant/storage" read_only = false } resources { cpu = 500 memory = 128 } service { name = "qdrant" tags = ["vector-db", "ai", "http"] port = "http" check { type = "tcp" port = "http" interval = "30s" timeout = "2s" } } } } } variable "region" { type = string default = "global" } ================================================ FILE: nomad_jobs/storage-backends/qdrant/volume.hcl ================================================ # Qdrant vector database storage volume id = "qdrant-data" name = "qdrant-data" type = "csi" plugin_id = "org.democratic-csi.iscsi" capacity_min = "10GiB" capacity_max = "10GiB" capability { access_mode = "single-node-writer" attachment_mode = "block-device" } mount_options { fs_type = "ext4" mount_flags = ["noatime"] } ================================================ FILE: nomad_jobs/storage-backends/redis/nomad.job ================================================ job "redis" { region = var.region datacenters = ["dc1"] type = "service" meta { job_file = "nomad_jobs/storage-backends/redis/nomad.job" version = "4" // Reduced memory 512MB -> 128MB } constraint { attribute = "${meta.shared_mount}" operator = "=" value = "true" } group "db" { count = 1 network { mode = "host" port "redis" { static = 6379 host_network = "lan" } } volume "redis" { type = "csi" read_only = false source = "redis-data" access_mode = "single-node-writer" attachment_mode = "file-system" } restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "prep-disk" { driver = "docker" volume_mount { volume = "redis" destination = "/volume/" read_only = false } config { image = "busybox:latest" command = "sh" args = ["-c", "chmod 777 /volume/"] } resources { cpu = 200 memory = 128 } lifecycle { hook = "prestart" sidecar = false } } task "redis" { driver = "docker" config { image = "redis:8.6.2-alpine" ports = ["redis"] } volume_mount { volume = "redis" destination = "/data" read_only = false } env { # Save settings - save to disk every 60 seconds if at least 1 change REDIS_SAVE_TO_DISK = "60 1" # Set appendonly for durability REDIS_APPENDONLY = "yes" } service { name = "redis" port = "redis" check { type = "tcp" port = "redis" interval = "10s" timeout = "2s" } } resources { cpu = 300 memory = 128 } } } } variable "region" { type = string } variable "shared_dir" { type = string } ================================================ FILE: nomad_jobs/storage-backends/redis/volume.hcl ================================================ id = "redis-data" external_id = "redis-data" name = "redis-data" type = "csi" plugin_id = "org.democratic-csi.iscsi" capacity_min = "5GiB" capacity_max = "5GiB" capability { access_mode = "single-node-writer" attachment_mode = "block-device" } mount_options { fs_type = "ext4" mount_flags = ["noatime", "nodiratime", "data=ordered"] } ================================================ FILE: nomad_jobs/storage-backends/volumes/nfs-example.hcl ================================================ type = "csi" id = "example" name = "example" plugin_id = "nfsofficial" external_id = "example" capability { access_mode = "multi-node-multi-writer" attachment_mode = "file-system" } context { server = "192.168.50.208" share = "/mnt/pool0/share/example" mountPermissions = "0" } mount_options { fs_type = "nfs" mount_flags = [ "timeo=30", "intr", "vers=3", "_netdev" , "nolock" ] } ================================================ FILE: nomad_jobs/system/docker-cleanup/nomad.job ================================================ job "docker-cleanup" { region = var.region datacenters = ["dc1"] type = "sysbatch" meta { job_file = "nomad_jobs/system/docker-cleanup/nomad.job" version = "1" } # Run weekly on Sundays at 2 AM periodic { crons = ["0 2 * * 0"] prohibit_overlap = true time_zone = "UTC" } group "cleanup" { # sysbatch will automatically run on all eligible nodes restart { attempts = 3 delay = "15s" interval = "10m" mode = "delay" } task "docker-prune" { driver = "raw_exec" config { command = "/bin/bash" args = ["-c", <&1 >/dev/null; do echo '.'; sleep 2; done"] network_mode = "host" } resources { cpu = 200 memory = 128 } lifecycle { hook = "prestart" sidecar = false } } update { max_parallel = 1 min_healthy_time = "30s" auto_revert = true } task "wordpress" { driver = "docker" template { data = <

EOH destination = "local/index.html" env = false perms = 755 change_mode = "signal" change_signal = "SIGHUP" left_delimiter = "{{" right_delimiter = "}}" } resources { cpu = 100 memory = 128 } } } } variable "region" { type = string } variable "tld" { type = string } variable "shared_dir" { type = string } ================================================ FILE: renovate.json ================================================ { "$schema": "https://docs.renovatebot.com/renovate-schema.json", "extends": [ "config:recommended" ], "dependencyDashboard": true, "dependencyDashboardTitle": "🔄 Dependency Dashboard - Homelab Updates", "assignees": [ "perrymanuk" ], "reviewers": [ "perrymanuk" ], "packageRules": [ { "matchPackageNames": [ "prom/prometheus" ], "allowedVersions": "/^v[0-9]+\\.[0-9]+\\.[2-9]+$/", "automerge": true, "ignoreTests": true }, { "matchUpdateTypes": [ "minor", "patch", "pin", "digest" ], "automerge": true, "ignoreTests": true }, { "matchPackageNames": [ "postgres", "mariadb", "mysql" ], "matchUpdateTypes": [ "major" ], "enabled": false, "description": "Block major database version updates - require manual approval", "dependencyDashboardApproval": true, "prCreation": "approval" }, { "versioning": "regex:^(?.*?)-(?\\d+)\\.(?\\d+)\\.(?\\d+)$", "matchPackageNames": [ "/^lscr.io\\/linuxserver\\//" ] }, { "matchPackageNames": [ "ghcr.io/perrymanuk/radbot" ], "enabled": false, "description": "Managed by radbot CI dispatch workflow" } ], "customManagers": [ { "customType": "regex", "managerFilePatterns": [ "/(^|/)*\\.job$/" ], "matchStrings": [ "\\s*image\\s*=\\s*\"(?.*?)[@:](?.*?)\"\\n" ], "datasourceTemplate": "docker" } ] } ================================================ FILE: services/beefcake.json ================================================ { "Service": { "Name": "beefcake", "ID": "beefcake-instance-1", "Address": "192.168.50.208", "Port": 80, "Check": { "HTTP": "http://192.168.50.208:80", "Interval": "10s", "Timeout": "5s" }, "Tags": ["traefik.enable=true"] } }