ci + ansible
This commit is contained in:
@@ -0,0 +1,70 @@
|
||||
# GitHub Actions self-hosted runners — Ansible
|
||||
|
||||
Provisions self-hosted runners as **systemd services**. One host can run runners for
|
||||
multiple projects (e.g. `gatehouse-api` + `gatehouse-ui`); each project gets its own
|
||||
runner name, install dir, label, repo URL, and registration token.
|
||||
|
||||
## Layout
|
||||
|
||||
```
|
||||
ansible.cfg inventory + ssh defaults
|
||||
inventory.ini stage / prod host groups (set ansible_host)
|
||||
group_vars/all.yml pinned runner version + sha256, paths, env_file
|
||||
host_vars/<host>.yml runner_env + per-project `runners` matrix
|
||||
tasks/install_project.yml reads token, expands count -> N instances
|
||||
tasks/install_one_runner.yml download/verify/extract/register/service one runner
|
||||
install-runner.yml the playbook
|
||||
```
|
||||
|
||||
## Runner naming
|
||||
|
||||
- name: `<host>-<project>-<env>-<N>` e.g. `stage01-gatehouse-api-stage-1`
|
||||
- dir: `/home/github-runner/actions-runner-<project>-<N>`
|
||||
- label: `project_spec.label` (e.g. `stage-secuird-runner`, `stage-gatehouse-ui`)
|
||||
- service: `actions.runner.<owner>-<repo>.<name>.service`
|
||||
|
||||
## Tokens
|
||||
|
||||
Registration tokens are **per-repo and expire ~1h after minting**. Set one key per
|
||||
project in the repo `.env` (control node), referenced by `token_env` in `host_vars`:
|
||||
|
||||
```
|
||||
RUNNER_TOKEN_GATEHOUSE_API=AXXX...
|
||||
RUNNER_TOKEN_GATEHOUSE_UI=AYYY...
|
||||
```
|
||||
|
||||
Mint each from the repo's **Settings > Actions > Runners > New self-hosted runner**
|
||||
(the value after `--token`), then run the playbook within the hour.
|
||||
|
||||
## Run
|
||||
|
||||
```bash
|
||||
cd deploy/ansible
|
||||
|
||||
# 1. Edit inventory.ini (set ansible_host / ansible_user) and host_vars/*.yml.
|
||||
# 2. Refresh RUNNER_TOKEN_* in ../../.env.
|
||||
|
||||
ansible-playbook --syntax-check install-runner.yml
|
||||
ansible-playbook install-runner.yml --check --diff --limit stage # dry run
|
||||
ansible-playbook install-runner.yml --limit stage # apply
|
||||
```
|
||||
|
||||
## Verify
|
||||
|
||||
```bash
|
||||
# on the host
|
||||
systemctl list-units 'actions.runner.*'
|
||||
ls /home/github-runner/ # actions-runner-gatehouse-api-1, -2, ...
|
||||
```
|
||||
|
||||
Each repo's Settings > Actions > Runners should list the runners as **Idle** with the
|
||||
expected label. Idempotent: re-running skips already-configured runners (`--replace`
|
||||
re-registers safely; `creates:` guards downloads/extract/config).
|
||||
|
||||
## Notes
|
||||
|
||||
- Bump `runner_version` + `runner_sha256` together in `group_vars/all.yml`
|
||||
(sha256 from the GitHub release page).
|
||||
- `deploy/deploy.sh` needs the runner user to have passwordless sudo for nginx reload —
|
||||
add that sudoers drop-in separately (not yet automated here).
|
||||
- `gatehouse-ui` repo URL + label in host_vars are placeholders — confirm before first run.
|
||||
@@ -0,0 +1,9 @@
|
||||
[defaults]
|
||||
inventory = inventory.ini
|
||||
host_key_checking = False
|
||||
retry_files_enabled = False
|
||||
stdout_callback = yaml
|
||||
interpreter_python = auto_silent
|
||||
|
||||
[ssh_connection]
|
||||
pipelining = True
|
||||
@@ -0,0 +1,14 @@
|
||||
---
|
||||
# Shared constants for all runner hosts.
|
||||
runner_user: github-runner
|
||||
runner_home: /home/github-runner
|
||||
|
||||
# Pinned runner release. Bump version + sha256 together.
|
||||
# sha256 from the GitHub release page for actions-runner-linux-x64-<version>.tar.gz
|
||||
runner_version: "2.335.1"
|
||||
runner_sha256: "4ef2f25285f0ae4477f1fe1e346db76d2f3ebf03824e2ddd1973a2819bf6c8cf"
|
||||
runner_tarball: "actions-runner-linux-x64-{{ runner_version }}.tar.gz"
|
||||
runner_download_url: "https://github.com/actions/runner/releases/download/v{{ runner_version }}/{{ runner_tarball }}"
|
||||
|
||||
# Repo .env on the control node holding per-project RUNNER_TOKEN_<PROJECT> keys.
|
||||
env_file: "{{ playbook_dir }}/../../.env"
|
||||
@@ -0,0 +1,15 @@
|
||||
---
|
||||
runner_env: prod
|
||||
|
||||
runners:
|
||||
- project: gatehouse-api
|
||||
url: https://github.com/CoryHawkless/gatehouse-api
|
||||
label: prod-secuird-runner # matches runs-on: in push-main.yml
|
||||
token_env: RUNNER_TOKEN_GATEHOUSE_API
|
||||
count: 1
|
||||
|
||||
- project: gatehouse-ui
|
||||
url: https://github.com/CoryHawkless/gatehouse-ui # TODO: confirm UI repo URL
|
||||
label: prod-gatehouse-ui # TODO: confirm UI workflow runs-on label
|
||||
token_env: RUNNER_TOKEN_GATEHOUSE_UI
|
||||
count: 1
|
||||
@@ -0,0 +1,17 @@
|
||||
---
|
||||
runner_env: stage
|
||||
|
||||
# One entry per project. A host runs runners for every project listed.
|
||||
# token_env = key read from .env on the control node (registration token, ~1h TTL).
|
||||
runners:
|
||||
- project: gatehouse-api
|
||||
url: https://github.com/CoryHawkless/gatehouse-api
|
||||
label: stage-secuird-runner # matches runs-on: in this repo's workflows
|
||||
token_env: RUNNER_TOKEN_GATEHOUSE_API
|
||||
count: 1
|
||||
|
||||
- project: gatehouse-ui
|
||||
url: https://github.com/CoryHawkless/gatehouse-ui # TODO: confirm UI repo URL
|
||||
label: stage-gatehouse-ui # TODO: confirm UI workflow runs-on label
|
||||
token_env: RUNNER_TOKEN_GATEHOUSE_UI
|
||||
count: 1
|
||||
@@ -0,0 +1,28 @@
|
||||
---
|
||||
- name: Install GitHub Actions self-hosted runners
|
||||
hosts: all
|
||||
become: true
|
||||
|
||||
pre_tasks:
|
||||
- name: Assert host defines a runners matrix
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- runners is defined
|
||||
- runners | length > 0
|
||||
- runner_env is defined
|
||||
fail_msg: "Host {{ inventory_hostname }} is missing host_vars (runners / runner_env)."
|
||||
|
||||
tasks:
|
||||
- name: Ensure runner service user exists
|
||||
ansible.builtin.user:
|
||||
name: "{{ runner_user }}"
|
||||
shell: /bin/bash
|
||||
create_home: true
|
||||
home: "{{ runner_home }}"
|
||||
|
||||
- name: Install runners for each project
|
||||
ansible.builtin.include_tasks: tasks/install_project.yml
|
||||
loop: "{{ runners }}"
|
||||
loop_control:
|
||||
loop_var: project_spec
|
||||
label: "{{ project_spec.project }}"
|
||||
@@ -0,0 +1,9 @@
|
||||
# Self-hosted GitHub Actions runner hosts.
|
||||
# Per-project runner matrix lives in host_vars/<host>.yml.
|
||||
# Set ansible_host (and ansible_user if not root) to real values.
|
||||
|
||||
[stage]
|
||||
stage01 ansible_host=CHANGE_ME ansible_user=ubuntu
|
||||
|
||||
[prod]
|
||||
prod01 ansible_host=CHANGE_ME ansible_user=ubuntu
|
||||
@@ -0,0 +1,67 @@
|
||||
---
|
||||
# Installs + registers + services a single runner instance.
|
||||
# Inputs: project_spec (dict), idx (int), project_token (str).
|
||||
- name: Set per-runner facts
|
||||
ansible.builtin.set_fact:
|
||||
runner_name: "{{ inventory_hostname }}-{{ project_spec.project }}-{{ runner_env }}-{{ idx }}"
|
||||
runner_dir: "{{ runner_home }}/actions-runner-{{ project_spec.project }}-{{ idx }}"
|
||||
|
||||
- name: "Create runner dir {{ runner_dir }}"
|
||||
ansible.builtin.file:
|
||||
path: "{{ runner_dir }}"
|
||||
state: directory
|
||||
owner: "{{ runner_user }}"
|
||||
group: "{{ runner_user }}"
|
||||
mode: "0755"
|
||||
|
||||
- name: Download runner tarball (sha256 verified)
|
||||
ansible.builtin.get_url:
|
||||
url: "{{ runner_download_url }}"
|
||||
dest: "{{ runner_dir }}/{{ runner_tarball }}"
|
||||
checksum: "sha256:{{ runner_sha256 }}"
|
||||
owner: "{{ runner_user }}"
|
||||
group: "{{ runner_user }}"
|
||||
mode: "0644"
|
||||
|
||||
- name: Extract runner
|
||||
ansible.builtin.unarchive:
|
||||
src: "{{ runner_dir }}/{{ runner_tarball }}"
|
||||
dest: "{{ runner_dir }}"
|
||||
remote_src: true
|
||||
owner: "{{ runner_user }}"
|
||||
group: "{{ runner_user }}"
|
||||
creates: "{{ runner_dir }}/config.sh"
|
||||
|
||||
- name: "Register runner {{ runner_name }}"
|
||||
ansible.builtin.command:
|
||||
cmd: >-
|
||||
./config.sh --unattended
|
||||
--url {{ project_spec.url }}
|
||||
--token {{ project_token }}
|
||||
--name {{ runner_name }}
|
||||
--labels {{ project_spec.label }}
|
||||
--work _work
|
||||
--replace
|
||||
chdir: "{{ runner_dir }}"
|
||||
creates: "{{ runner_dir }}/.runner"
|
||||
become_user: "{{ runner_user }}"
|
||||
|
||||
- name: "Check if service installed for {{ runner_name }}"
|
||||
ansible.builtin.find:
|
||||
paths: "{{ runner_dir }}"
|
||||
patterns: ".service"
|
||||
hidden: true
|
||||
register: runner_svc_marker
|
||||
|
||||
- name: "Install systemd service for {{ runner_name }}"
|
||||
ansible.builtin.command:
|
||||
cmd: "./svc.sh install {{ runner_user }}"
|
||||
chdir: "{{ runner_dir }}"
|
||||
when: runner_svc_marker.matched == 0
|
||||
|
||||
- name: "Start + enable service for {{ runner_name }}"
|
||||
ansible.builtin.command:
|
||||
cmd: "./svc.sh start"
|
||||
chdir: "{{ runner_dir }}"
|
||||
register: svc_start
|
||||
changed_when: "'active (running)' not in svc_start.stdout"
|
||||
@@ -0,0 +1,22 @@
|
||||
---
|
||||
# Expands one project entry into `count` runner instances.
|
||||
- name: "Read registration token for {{ project_spec.project }} from .env"
|
||||
ansible.builtin.set_fact:
|
||||
project_token: >-
|
||||
{{ lookup('ansible.builtin.ini', project_spec.token_env,
|
||||
file=env_file, type='properties') }}
|
||||
|
||||
- name: "Fail if token missing for {{ project_spec.project }}"
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- project_token | length > 0
|
||||
fail_msg: >-
|
||||
{{ project_spec.token_env }} not found in {{ env_file }}.
|
||||
Mint a fresh registration token (Settings > Actions > Runners) and set it.
|
||||
|
||||
- name: "Install {{ project_spec.count }} runner(s) for {{ project_spec.project }}"
|
||||
ansible.builtin.include_tasks: install_one_runner.yml
|
||||
loop: "{{ range(1, project_spec.count | int + 1) | list }}"
|
||||
loop_control:
|
||||
loop_var: idx
|
||||
label: "{{ project_spec.project }}-{{ idx }}"
|
||||
Executable
+134
@@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env bash
|
||||
# Rolling deploy: drains each api container from nginx before replacing it,
|
||||
# so users see zero downtime. Job workers are restarted last without draining.
|
||||
#
|
||||
# Usage:
|
||||
# ./deploy.sh <image-tag> e.g. ./deploy.sh a1b2c3d
|
||||
#
|
||||
# Overridable via env vars (defaults below are gatehouse-api):
|
||||
# IMAGE_NAME docker image name
|
||||
# SVC1/SVC2 compose service names
|
||||
# SVC1_PORT host port for SVC1
|
||||
# SVC2_PORT host port for SVC2
|
||||
# HEALTH_PATH HTTP path for health check
|
||||
# NGINX_CONF path to nginx site config
|
||||
# COMPOSE_DIR directory with docker-compose.yml and .env
|
||||
#
|
||||
# The runner user needs passwordless sudo for nginx:
|
||||
# echo "runner ALL=(ALL) NOPASSWD: /usr/sbin/nginx, /bin/systemctl reload nginx" \
|
||||
# | sudo tee /etc/sudoers.d/runner-nginx
|
||||
set -euo pipefail
|
||||
|
||||
TAG="${1:?Usage: deploy.sh <image-tag> (e.g. deploy.sh a1b2c3d)}"
|
||||
|
||||
IMAGE_NAME="${IMAGE_NAME:-gatehouse-api}"
|
||||
NGINX_CONF="${NGINX_CONF:-/etc/nginx/conf.d/gatehouse-api.conf}"
|
||||
COMPOSE_DIR="${COMPOSE_DIR:-/opt/gatehouse-api}"
|
||||
SVC1="${SVC1:-api1}"
|
||||
SVC2="${SVC2:-api2}"
|
||||
SVC1_PORT="${SVC1_PORT:-5000}"
|
||||
SVC2_PORT="${SVC2_PORT:-5001}"
|
||||
HEALTH_PATH="${HEALTH_PATH:-/api/health}"
|
||||
HEALTH_RETRIES=18 # 18 × 5 s = 90 s max
|
||||
HEALTH_INTERVAL=5
|
||||
|
||||
# ── helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
log() { echo "[$(date '+%H:%M:%S')] $*"; }
|
||||
die() { log "ERROR: $*" >&2; exit 1; }
|
||||
step() { log ""; log "── $* ──"; }
|
||||
|
||||
health_check() {
|
||||
local port=$1 label=$2 attempt=0
|
||||
log "Waiting for ${label} on :${port}${HEALTH_PATH} ..."
|
||||
while (( attempt < HEALTH_RETRIES )); do
|
||||
if curl -sf "http://127.0.0.1:${port}${HEALTH_PATH}" -o /dev/null; then
|
||||
log "✓ ${label} healthy"
|
||||
return 0
|
||||
fi
|
||||
attempt=$(( attempt + 1 ))
|
||||
log " [${attempt}/${HEALTH_RETRIES}] not ready, retrying in ${HEALTH_INTERVAL}s"
|
||||
sleep "${HEALTH_INTERVAL}"
|
||||
done
|
||||
log "ERROR: ${label} failed health check after $((HEALTH_RETRIES * HEALTH_INTERVAL))s"
|
||||
return 1
|
||||
}
|
||||
|
||||
get_service_tag() {
|
||||
docker compose ps -q "$1" 2>/dev/null \
|
||||
| xargs -r docker inspect --format '{{.Config.Image}}' 2>/dev/null \
|
||||
| cut -d: -f2
|
||||
}
|
||||
|
||||
rollback() {
|
||||
local service=$1 port=$2 old_tag=$3
|
||||
if [[ -z "${old_tag}" ]]; then
|
||||
nginx_restore "${port}"
|
||||
die "Deploy aborted — ${service} failed health check, no previous tag to roll back to"
|
||||
fi
|
||||
log "Rolling back ${service} to ${old_tag}..."
|
||||
IMAGE_TAG="${old_tag}" docker compose up -d --no-deps --force-recreate "${service}"
|
||||
nginx_restore "${port}"
|
||||
die "Deploy aborted — ${service} rolled back to ${old_tag}"
|
||||
}
|
||||
|
||||
nginx_drain() {
|
||||
local port=$1
|
||||
sudo sed -i "s|server 127.0.0.1:${port};|server 127.0.0.1:${port} down;|" "$NGINX_CONF"
|
||||
sudo nginx -t 2>&1 | tail -2
|
||||
sudo nginx -s reload
|
||||
log "nginx: drained :${port}"
|
||||
}
|
||||
|
||||
nginx_restore() {
|
||||
local port=$1
|
||||
sudo sed -i "s|server 127.0.0.1:${port} down;|server 127.0.0.1:${port};|" "$NGINX_CONF"
|
||||
sudo nginx -t 2>&1 | tail -2
|
||||
sudo nginx -s reload
|
||||
log "nginx: restored :${port}"
|
||||
}
|
||||
|
||||
# ── pre-flight ────────────────────────────────────────────────────────────────
|
||||
|
||||
cd "${COMPOSE_DIR}"
|
||||
pwd; ls -la
|
||||
log "Deploying ${IMAGE_NAME}:${TAG}"
|
||||
|
||||
docker image inspect "${IMAGE_NAME}:${TAG}" > /dev/null 2>&1 \
|
||||
|| die "Image ${IMAGE_NAME}:${TAG} not found locally — build it first."
|
||||
|
||||
# ── roll SVC1 ─────────────────────────────────────────────────────────────────
|
||||
|
||||
step "${SVC1} → ${TAG} (traffic: ${SVC2} only)"
|
||||
old_svc1=$(get_service_tag "${SVC1}")
|
||||
nginx_drain "${SVC1_PORT}"
|
||||
log "Waiting 15s for in-flight requests to drain..."
|
||||
sleep 15
|
||||
|
||||
IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate "${SVC1}"
|
||||
|
||||
health_check "${SVC1_PORT}" "${SVC1}" || rollback "${SVC1}" "${SVC1_PORT}" "${old_svc1}"
|
||||
nginx_restore "${SVC1_PORT}"
|
||||
|
||||
# ── roll SVC2 ─────────────────────────────────────────────────────────────────
|
||||
|
||||
step "${SVC2} → ${TAG} (traffic: ${SVC1} only)"
|
||||
old_svc2=$(get_service_tag "${SVC2}")
|
||||
nginx_drain "${SVC2_PORT}"
|
||||
log "Waiting 15s for in-flight requests to drain..."
|
||||
sleep 15
|
||||
|
||||
IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate "${SVC2}"
|
||||
|
||||
health_check "${SVC2_PORT}" "${SVC2}" || rollback "${SVC2}" "${SVC2_PORT}" "${old_svc2}"
|
||||
nginx_restore "${SVC2_PORT}"
|
||||
|
||||
# ── job workers ───────────────────────────────────────────────────────────────
|
||||
|
||||
step "job workers → ${TAG}"
|
||||
IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate zerotier-reconciler mfa-compliance
|
||||
|
||||
# ── done ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
log ""
|
||||
log "Deploy complete ✓ ${IMAGE_NAME}:${TAG}"
|
||||
Reference in New Issue
Block a user