ci + ansible

2026-06-20 11:06:27 +07:00
parent a6d74d9316
commit 966578ed58
15 changed files with 637 additions and 30 deletions
@@ -0,0 +1,70 @@
+# GitHub Actions self-hosted runners — Ansible
+
+Provisions self-hosted runners as **systemd services**. One host can run runners for
+multiple projects (e.g. `gatehouse-api` + `gatehouse-ui`); each project gets its own
+runner name, install dir, label, repo URL, and registration token.
+
+## Layout
+
+```
+ansible.cfg                 inventory + ssh defaults
+inventory.ini               stage / prod host groups (set ansible_host)
+group_vars/all.yml          pinned runner version + sha256, paths, env_file
+host_vars/<host>.yml        runner_env + per-project `runners` matrix
+tasks/install_project.yml   reads token, expands count -> N instances
+tasks/install_one_runner.yml  download/verify/extract/register/service one runner
+install-runner.yml          the playbook
+```
+
+## Runner naming
+
+- name: `<host>-<project>-<env>-<N>` e.g. `stage01-gatehouse-api-stage-1`
+- dir:  `/home/github-runner/actions-runner-<project>-<N>`
+- label: `project_spec.label` (e.g. `stage-secuird-runner`, `stage-gatehouse-ui`)
+- service: `actions.runner.<owner>-<repo>.<name>.service`
+
+## Tokens
+
+Registration tokens are **per-repo and expire ~1h after minting**. Set one key per
+project in the repo `.env` (control node), referenced by `token_env` in `host_vars`:
+
+```
+RUNNER_TOKEN_GATEHOUSE_API=AXXX...
+RUNNER_TOKEN_GATEHOUSE_UI=AYYY...
+```
+
+Mint each from the repo's **Settings > Actions > Runners > New self-hosted runner**
+(the value after `--token`), then run the playbook within the hour.
+
+## Run
+
+```bash
+cd deploy/ansible
+
+# 1. Edit inventory.ini (set ansible_host / ansible_user) and host_vars/*.yml.
+# 2. Refresh RUNNER_TOKEN_* in ../../.env.
+
+ansible-playbook --syntax-check install-runner.yml
+ansible-playbook install-runner.yml --check --diff --limit stage   # dry run
+ansible-playbook install-runner.yml --limit stage                  # apply
+```
+
+## Verify
+
+```bash
+# on the host
+systemctl list-units 'actions.runner.*'
+ls /home/github-runner/                # actions-runner-gatehouse-api-1, -2, ...
+```
+
+Each repo's Settings > Actions > Runners should list the runners as **Idle** with the
+expected label. Idempotent: re-running skips already-configured runners (`--replace`
+re-registers safely; `creates:` guards downloads/extract/config).
+
+## Notes
+
+- Bump `runner_version` + `runner_sha256` together in `group_vars/all.yml`
+  (sha256 from the GitHub release page).
+- `deploy/deploy.sh` needs the runner user to have passwordless sudo for nginx reload —
+  add that sudoers drop-in separately (not yet automated here).
+- `gatehouse-ui` repo URL + label in host_vars are placeholders — confirm before first run.
@@ -0,0 +1,9 @@
+[defaults]
+inventory = inventory.ini
+host_key_checking = False
+retry_files_enabled = False
+stdout_callback = yaml
+interpreter_python = auto_silent
+
+[ssh_connection]
+pipelining = True
@@ -0,0 +1,14 @@
+---
+# Shared constants for all runner hosts.
+runner_user: github-runner
+runner_home: /home/github-runner
+
+# Pinned runner release. Bump version + sha256 together.
+# sha256 from the GitHub release page for actions-runner-linux-x64-<version>.tar.gz
+runner_version: "2.335.1"
+runner_sha256: "4ef2f25285f0ae4477f1fe1e346db76d2f3ebf03824e2ddd1973a2819bf6c8cf"
+runner_tarball: "actions-runner-linux-x64-{{ runner_version }}.tar.gz"
+runner_download_url: "https://github.com/actions/runner/releases/download/v{{ runner_version }}/{{ runner_tarball }}"
+
+# Repo .env on the control node holding per-project RUNNER_TOKEN_<PROJECT> keys.
+env_file: "{{ playbook_dir }}/../../.env"
@@ -0,0 +1,15 @@
+---
+runner_env: prod
+
+runners:
+  - project: gatehouse-api
+    url: https://github.com/CoryHawkless/gatehouse-api
+    label: prod-secuird-runner             # matches runs-on: in push-main.yml
+    token_env: RUNNER_TOKEN_GATEHOUSE_API
+    count: 1
+
+  - project: gatehouse-ui
+    url: https://github.com/CoryHawkless/gatehouse-ui   # TODO: confirm UI repo URL
+    label: prod-gatehouse-ui                             # TODO: confirm UI workflow runs-on label
+    token_env: RUNNER_TOKEN_GATEHOUSE_UI
+    count: 1
@@ -0,0 +1,17 @@
+---
+runner_env: stage
+
+# One entry per project. A host runs runners for every project listed.
+# token_env = key read from .env on the control node (registration token, ~1h TTL).
+runners:
+  - project: gatehouse-api
+    url: https://github.com/CoryHawkless/gatehouse-api
+    label: stage-secuird-runner            # matches runs-on: in this repo's workflows
+    token_env: RUNNER_TOKEN_GATEHOUSE_API
+    count: 1
+
+  - project: gatehouse-ui
+    url: https://github.com/CoryHawkless/gatehouse-ui   # TODO: confirm UI repo URL
+    label: stage-gatehouse-ui                            # TODO: confirm UI workflow runs-on label
+    token_env: RUNNER_TOKEN_GATEHOUSE_UI
+    count: 1
@@ -0,0 +1,28 @@
+---
+- name: Install GitHub Actions self-hosted runners
+  hosts: all
+  become: true
+
+  pre_tasks:
+    - name: Assert host defines a runners matrix
+      ansible.builtin.assert:
+        that:
+          - runners is defined
+          - runners | length > 0
+          - runner_env is defined
+        fail_msg: "Host {{ inventory_hostname }} is missing host_vars (runners / runner_env)."
+
+  tasks:
+    - name: Ensure runner service user exists
+      ansible.builtin.user:
+        name: "{{ runner_user }}"
+        shell: /bin/bash
+        create_home: true
+        home: "{{ runner_home }}"
+
+    - name: Install runners for each project
+      ansible.builtin.include_tasks: tasks/install_project.yml
+      loop: "{{ runners }}"
+      loop_control:
+        loop_var: project_spec
+        label: "{{ project_spec.project }}"
@@ -0,0 +1,9 @@
+# Self-hosted GitHub Actions runner hosts.
+# Per-project runner matrix lives in host_vars/<host>.yml.
+# Set ansible_host (and ansible_user if not root) to real values.
+
+[stage]
+stage01 ansible_host=CHANGE_ME ansible_user=ubuntu
+
+[prod]
+prod01 ansible_host=CHANGE_ME ansible_user=ubuntu
@@ -0,0 +1,67 @@
+---
+# Installs + registers + services a single runner instance.
+# Inputs: project_spec (dict), idx (int), project_token (str).
+- name: Set per-runner facts
+  ansible.builtin.set_fact:
+    runner_name: "{{ inventory_hostname }}-{{ project_spec.project }}-{{ runner_env }}-{{ idx }}"
+    runner_dir: "{{ runner_home }}/actions-runner-{{ project_spec.project }}-{{ idx }}"
+
+- name: "Create runner dir {{ runner_dir }}"
+  ansible.builtin.file:
+    path: "{{ runner_dir }}"
+    state: directory
+    owner: "{{ runner_user }}"
+    group: "{{ runner_user }}"
+    mode: "0755"
+
+- name: Download runner tarball (sha256 verified)
+  ansible.builtin.get_url:
+    url: "{{ runner_download_url }}"
+    dest: "{{ runner_dir }}/{{ runner_tarball }}"
+    checksum: "sha256:{{ runner_sha256 }}"
+    owner: "{{ runner_user }}"
+    group: "{{ runner_user }}"
+    mode: "0644"
+
+- name: Extract runner
+  ansible.builtin.unarchive:
+    src: "{{ runner_dir }}/{{ runner_tarball }}"
+    dest: "{{ runner_dir }}"
+    remote_src: true
+    owner: "{{ runner_user }}"
+    group: "{{ runner_user }}"
+    creates: "{{ runner_dir }}/config.sh"
+
+- name: "Register runner {{ runner_name }}"
+  ansible.builtin.command:
+    cmd: >-
+      ./config.sh --unattended
+      --url {{ project_spec.url }}
+      --token {{ project_token }}
+      --name {{ runner_name }}
+      --labels {{ project_spec.label }}
+      --work _work
+      --replace
+    chdir: "{{ runner_dir }}"
+    creates: "{{ runner_dir }}/.runner"
+  become_user: "{{ runner_user }}"
+
+- name: "Check if service installed for {{ runner_name }}"
+  ansible.builtin.find:
+    paths: "{{ runner_dir }}"
+    patterns: ".service"
+    hidden: true
+  register: runner_svc_marker
+
+- name: "Install systemd service for {{ runner_name }}"
+  ansible.builtin.command:
+    cmd: "./svc.sh install {{ runner_user }}"
+    chdir: "{{ runner_dir }}"
+  when: runner_svc_marker.matched == 0
+
+- name: "Start + enable service for {{ runner_name }}"
+  ansible.builtin.command:
+    cmd: "./svc.sh start"
+    chdir: "{{ runner_dir }}"
+  register: svc_start
+  changed_when: "'active (running)' not in svc_start.stdout"
@@ -0,0 +1,22 @@
+---
+# Expands one project entry into `count` runner instances.
+- name: "Read registration token for {{ project_spec.project }} from .env"
+  ansible.builtin.set_fact:
+    project_token: >-
+      {{ lookup('ansible.builtin.ini', project_spec.token_env,
+                file=env_file, type='properties') }}
+
+- name: "Fail if token missing for {{ project_spec.project }}"
+  ansible.builtin.assert:
+    that:
+      - project_token | length > 0
+    fail_msg: >-
+      {{ project_spec.token_env }} not found in {{ env_file }}.
+      Mint a fresh registration token (Settings > Actions > Runners) and set it.
+
+- name: "Install {{ project_spec.count }} runner(s) for {{ project_spec.project }}"
+  ansible.builtin.include_tasks: install_one_runner.yml
+  loop: "{{ range(1, project_spec.count | int + 1) | list }}"
+  loop_control:
+    loop_var: idx
+    label: "{{ project_spec.project }}-{{ idx }}"
@@ -0,0 +1,134 @@
+#!/usr/bin/env bash
+# Rolling deploy: drains each api container from nginx before replacing it,
+# so users see zero downtime. Job workers are restarted last without draining.
+#
+# Usage:
+#   ./deploy.sh <image-tag>    e.g. ./deploy.sh a1b2c3d
+#
+# Overridable via env vars (defaults below are gatehouse-api):
+#   IMAGE_NAME   docker image name
+#   SVC1/SVC2    compose service names
+#   SVC1_PORT    host port for SVC1
+#   SVC2_PORT    host port for SVC2
+#   HEALTH_PATH  HTTP path for health check
+#   NGINX_CONF   path to nginx site config
+#   COMPOSE_DIR  directory with docker-compose.yml and .env
+#
+# The runner user needs passwordless sudo for nginx:
+#   echo "runner ALL=(ALL) NOPASSWD: /usr/sbin/nginx, /bin/systemctl reload nginx" \
+#     | sudo tee /etc/sudoers.d/runner-nginx
+set -euo pipefail
+
+TAG="${1:?Usage: deploy.sh <image-tag>  (e.g. deploy.sh a1b2c3d)}"
+
+IMAGE_NAME="${IMAGE_NAME:-gatehouse-api}"
+NGINX_CONF="${NGINX_CONF:-/etc/nginx/conf.d/gatehouse-api.conf}"
+COMPOSE_DIR="${COMPOSE_DIR:-/opt/gatehouse-api}"
+SVC1="${SVC1:-api1}"
+SVC2="${SVC2:-api2}"
+SVC1_PORT="${SVC1_PORT:-5000}"
+SVC2_PORT="${SVC2_PORT:-5001}"
+HEALTH_PATH="${HEALTH_PATH:-/api/health}"
+HEALTH_RETRIES=18     # 18 × 5 s = 90 s max
+HEALTH_INTERVAL=5
+
+# ── helpers ───────────────────────────────────────────────────────────────────
+
+log()  { echo "[$(date '+%H:%M:%S')] $*"; }
+die()  { log "ERROR: $*" >&2; exit 1; }
+step() { log ""; log "── $* ──"; }
+
+health_check() {
+    local port=$1 label=$2 attempt=0
+    log "Waiting for ${label} on :${port}${HEALTH_PATH} ..."
+    while (( attempt < HEALTH_RETRIES )); do
+        if curl -sf "http://127.0.0.1:${port}${HEALTH_PATH}" -o /dev/null; then
+            log "✓ ${label} healthy"
+            return 0
+        fi
+        attempt=$(( attempt + 1 ))
+        log "  [${attempt}/${HEALTH_RETRIES}] not ready, retrying in ${HEALTH_INTERVAL}s"
+        sleep "${HEALTH_INTERVAL}"
+    done
+    log "ERROR: ${label} failed health check after $((HEALTH_RETRIES * HEALTH_INTERVAL))s"
+    return 1
+}
+
+get_service_tag() {
+    docker compose ps -q "$1" 2>/dev/null \
+        | xargs -r docker inspect --format '{{.Config.Image}}' 2>/dev/null \
+        | cut -d: -f2
+}
+
+rollback() {
+    local service=$1 port=$2 old_tag=$3
+    if [[ -z "${old_tag}" ]]; then
+        nginx_restore "${port}"
+        die "Deploy aborted — ${service} failed health check, no previous tag to roll back to"
+    fi
+    log "Rolling back ${service} to ${old_tag}..."
+    IMAGE_TAG="${old_tag}" docker compose up -d --no-deps --force-recreate "${service}"
+    nginx_restore "${port}"
+    die "Deploy aborted — ${service} rolled back to ${old_tag}"
+}
+
+nginx_drain() {
+    local port=$1
+    sudo sed -i "s|server 127.0.0.1:${port};|server 127.0.0.1:${port} down;|" "$NGINX_CONF"
+    sudo nginx -t 2>&1 | tail -2
+    sudo nginx -s reload
+    log "nginx: drained :${port}"
+}
+
+nginx_restore() {
+    local port=$1
+    sudo sed -i "s|server 127.0.0.1:${port} down;|server 127.0.0.1:${port};|" "$NGINX_CONF"
+    sudo nginx -t 2>&1 | tail -2
+    sudo nginx -s reload
+    log "nginx: restored :${port}"
+}
+
+# ── pre-flight ────────────────────────────────────────────────────────────────
+
+cd "${COMPOSE_DIR}"
+pwd; ls -la
+log "Deploying ${IMAGE_NAME}:${TAG}"
+
+docker image inspect "${IMAGE_NAME}:${TAG}" > /dev/null 2>&1 \
+    || die "Image ${IMAGE_NAME}:${TAG} not found locally — build it first."
+
+# ── roll SVC1 ─────────────────────────────────────────────────────────────────
+
+step "${SVC1} → ${TAG}  (traffic: ${SVC2} only)"
+old_svc1=$(get_service_tag "${SVC1}")
+nginx_drain "${SVC1_PORT}"
+log "Waiting 15s for in-flight requests to drain..."
+sleep 15
+
+IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate "${SVC1}"
+
+health_check "${SVC1_PORT}" "${SVC1}" || rollback "${SVC1}" "${SVC1_PORT}" "${old_svc1}"
+nginx_restore "${SVC1_PORT}"
+
+# ── roll SVC2 ─────────────────────────────────────────────────────────────────
+
+step "${SVC2} → ${TAG}  (traffic: ${SVC1} only)"
+old_svc2=$(get_service_tag "${SVC2}")
+nginx_drain "${SVC2_PORT}"
+log "Waiting 15s for in-flight requests to drain..."
+sleep 15
+
+IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate "${SVC2}"
+
+health_check "${SVC2_PORT}" "${SVC2}" || rollback "${SVC2}" "${SVC2_PORT}" "${old_svc2}"
+nginx_restore "${SVC2_PORT}"
+
+# ── job workers ───────────────────────────────────────────────────────────────
+
+step "job workers → ${TAG}"
+IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate zerotier-reconciler mfa-compliance
+
+# ── done ──────────────────────────────────────────────────────────────────────
+
+log ""
+log "Deploy complete ✓  ${IMAGE_NAME}:${TAG}"