ci + ansible

This commit is contained in:
sangnn
2026-06-20 11:06:27 +07:00
parent a6d74d9316
commit 966578ed58
15 changed files with 637 additions and 30 deletions
+70
View File
@@ -0,0 +1,70 @@
# GitHub Actions self-hosted runners — Ansible
Provisions self-hosted runners as **systemd services**. One host can run runners for
multiple projects (e.g. `gatehouse-api` + `gatehouse-ui`); each project gets its own
runner name, install dir, label, repo URL, and registration token.
## Layout
```
ansible.cfg inventory + ssh defaults
inventory.ini stage / prod host groups (set ansible_host)
group_vars/all.yml pinned runner version + sha256, paths, env_file
host_vars/<host>.yml runner_env + per-project `runners` matrix
tasks/install_project.yml reads token, expands count -> N instances
tasks/install_one_runner.yml download/verify/extract/register/service one runner
install-runner.yml the playbook
```
## Runner naming
- name: `<host>-<project>-<env>-<N>` e.g. `stage01-gatehouse-api-stage-1`
- dir: `/home/github-runner/actions-runner-<project>-<N>`
- label: `project_spec.label` (e.g. `stage-secuird-runner`, `stage-gatehouse-ui`)
- service: `actions.runner.<owner>-<repo>.<name>.service`
## Tokens
Registration tokens are **per-repo and expire ~1h after minting**. Set one key per
project in the repo `.env` (control node), referenced by `token_env` in `host_vars`:
```
RUNNER_TOKEN_GATEHOUSE_API=AXXX...
RUNNER_TOKEN_GATEHOUSE_UI=AYYY...
```
Mint each from the repo's **Settings > Actions > Runners > New self-hosted runner**
(the value after `--token`), then run the playbook within the hour.
## Run
```bash
cd deploy/ansible
# 1. Edit inventory.ini (set ansible_host / ansible_user) and host_vars/*.yml.
# 2. Refresh RUNNER_TOKEN_* in ../../.env.
ansible-playbook --syntax-check install-runner.yml
ansible-playbook install-runner.yml --check --diff --limit stage # dry run
ansible-playbook install-runner.yml --limit stage # apply
```
## Verify
```bash
# on the host
systemctl list-units 'actions.runner.*'
ls /home/github-runner/ # actions-runner-gatehouse-api-1, -2, ...
```
Each repo's Settings > Actions > Runners should list the runners as **Idle** with the
expected label. Idempotent: re-running skips already-configured runners (`--replace`
re-registers safely; `creates:` guards downloads/extract/config).
## Notes
- Bump `runner_version` + `runner_sha256` together in `group_vars/all.yml`
(sha256 from the GitHub release page).
- `deploy/deploy.sh` needs the runner user to have passwordless sudo for nginx reload —
add that sudoers drop-in separately (not yet automated here).
- `gatehouse-ui` repo URL + label in host_vars are placeholders — confirm before first run.
+9
View File
@@ -0,0 +1,9 @@
[defaults]
inventory = inventory.ini
host_key_checking = False
retry_files_enabled = False
stdout_callback = yaml
interpreter_python = auto_silent
[ssh_connection]
pipelining = True
+14
View File
@@ -0,0 +1,14 @@
---
# Shared constants for all runner hosts.
runner_user: github-runner
runner_home: /home/github-runner
# Pinned runner release. Bump version + sha256 together.
# sha256 from the GitHub release page for actions-runner-linux-x64-<version>.tar.gz
runner_version: "2.335.1"
runner_sha256: "4ef2f25285f0ae4477f1fe1e346db76d2f3ebf03824e2ddd1973a2819bf6c8cf"
runner_tarball: "actions-runner-linux-x64-{{ runner_version }}.tar.gz"
runner_download_url: "https://github.com/actions/runner/releases/download/v{{ runner_version }}/{{ runner_tarball }}"
# Repo .env on the control node holding per-project RUNNER_TOKEN_<PROJECT> keys.
env_file: "{{ playbook_dir }}/../../.env"
+15
View File
@@ -0,0 +1,15 @@
---
runner_env: prod
runners:
- project: gatehouse-api
url: https://github.com/CoryHawkless/gatehouse-api
label: prod-secuird-runner # matches runs-on: in push-main.yml
token_env: RUNNER_TOKEN_GATEHOUSE_API
count: 1
- project: gatehouse-ui
url: https://github.com/CoryHawkless/gatehouse-ui # TODO: confirm UI repo URL
label: prod-gatehouse-ui # TODO: confirm UI workflow runs-on label
token_env: RUNNER_TOKEN_GATEHOUSE_UI
count: 1
+17
View File
@@ -0,0 +1,17 @@
---
runner_env: stage
# One entry per project. A host runs runners for every project listed.
# token_env = key read from .env on the control node (registration token, ~1h TTL).
runners:
- project: gatehouse-api
url: https://github.com/CoryHawkless/gatehouse-api
label: stage-secuird-runner # matches runs-on: in this repo's workflows
token_env: RUNNER_TOKEN_GATEHOUSE_API
count: 1
- project: gatehouse-ui
url: https://github.com/CoryHawkless/gatehouse-ui # TODO: confirm UI repo URL
label: stage-gatehouse-ui # TODO: confirm UI workflow runs-on label
token_env: RUNNER_TOKEN_GATEHOUSE_UI
count: 1
+28
View File
@@ -0,0 +1,28 @@
---
- name: Install GitHub Actions self-hosted runners
hosts: all
become: true
pre_tasks:
- name: Assert host defines a runners matrix
ansible.builtin.assert:
that:
- runners is defined
- runners | length > 0
- runner_env is defined
fail_msg: "Host {{ inventory_hostname }} is missing host_vars (runners / runner_env)."
tasks:
- name: Ensure runner service user exists
ansible.builtin.user:
name: "{{ runner_user }}"
shell: /bin/bash
create_home: true
home: "{{ runner_home }}"
- name: Install runners for each project
ansible.builtin.include_tasks: tasks/install_project.yml
loop: "{{ runners }}"
loop_control:
loop_var: project_spec
label: "{{ project_spec.project }}"
+9
View File
@@ -0,0 +1,9 @@
# Self-hosted GitHub Actions runner hosts.
# Per-project runner matrix lives in host_vars/<host>.yml.
# Set ansible_host (and ansible_user if not root) to real values.
[stage]
stage01 ansible_host=CHANGE_ME ansible_user=ubuntu
[prod]
prod01 ansible_host=CHANGE_ME ansible_user=ubuntu
@@ -0,0 +1,67 @@
---
# Installs + registers + services a single runner instance.
# Inputs: project_spec (dict), idx (int), project_token (str).
- name: Set per-runner facts
ansible.builtin.set_fact:
runner_name: "{{ inventory_hostname }}-{{ project_spec.project }}-{{ runner_env }}-{{ idx }}"
runner_dir: "{{ runner_home }}/actions-runner-{{ project_spec.project }}-{{ idx }}"
- name: "Create runner dir {{ runner_dir }}"
ansible.builtin.file:
path: "{{ runner_dir }}"
state: directory
owner: "{{ runner_user }}"
group: "{{ runner_user }}"
mode: "0755"
- name: Download runner tarball (sha256 verified)
ansible.builtin.get_url:
url: "{{ runner_download_url }}"
dest: "{{ runner_dir }}/{{ runner_tarball }}"
checksum: "sha256:{{ runner_sha256 }}"
owner: "{{ runner_user }}"
group: "{{ runner_user }}"
mode: "0644"
- name: Extract runner
ansible.builtin.unarchive:
src: "{{ runner_dir }}/{{ runner_tarball }}"
dest: "{{ runner_dir }}"
remote_src: true
owner: "{{ runner_user }}"
group: "{{ runner_user }}"
creates: "{{ runner_dir }}/config.sh"
- name: "Register runner {{ runner_name }}"
ansible.builtin.command:
cmd: >-
./config.sh --unattended
--url {{ project_spec.url }}
--token {{ project_token }}
--name {{ runner_name }}
--labels {{ project_spec.label }}
--work _work
--replace
chdir: "{{ runner_dir }}"
creates: "{{ runner_dir }}/.runner"
become_user: "{{ runner_user }}"
- name: "Check if service installed for {{ runner_name }}"
ansible.builtin.find:
paths: "{{ runner_dir }}"
patterns: ".service"
hidden: true
register: runner_svc_marker
- name: "Install systemd service for {{ runner_name }}"
ansible.builtin.command:
cmd: "./svc.sh install {{ runner_user }}"
chdir: "{{ runner_dir }}"
when: runner_svc_marker.matched == 0
- name: "Start + enable service for {{ runner_name }}"
ansible.builtin.command:
cmd: "./svc.sh start"
chdir: "{{ runner_dir }}"
register: svc_start
changed_when: "'active (running)' not in svc_start.stdout"
+22
View File
@@ -0,0 +1,22 @@
---
# Expands one project entry into `count` runner instances.
- name: "Read registration token for {{ project_spec.project }} from .env"
ansible.builtin.set_fact:
project_token: >-
{{ lookup('ansible.builtin.ini', project_spec.token_env,
file=env_file, type='properties') }}
- name: "Fail if token missing for {{ project_spec.project }}"
ansible.builtin.assert:
that:
- project_token | length > 0
fail_msg: >-
{{ project_spec.token_env }} not found in {{ env_file }}.
Mint a fresh registration token (Settings > Actions > Runners) and set it.
- name: "Install {{ project_spec.count }} runner(s) for {{ project_spec.project }}"
ansible.builtin.include_tasks: install_one_runner.yml
loop: "{{ range(1, project_spec.count | int + 1) | list }}"
loop_control:
loop_var: idx
label: "{{ project_spec.project }}-{{ idx }}"
+134
View File
@@ -0,0 +1,134 @@
#!/usr/bin/env bash
# Rolling deploy: drains each api container from nginx before replacing it,
# so users see zero downtime. Job workers are restarted last without draining.
#
# Usage:
# ./deploy.sh <image-tag> e.g. ./deploy.sh a1b2c3d
#
# Overridable via env vars (defaults below are gatehouse-api):
# IMAGE_NAME docker image name
# SVC1/SVC2 compose service names
# SVC1_PORT host port for SVC1
# SVC2_PORT host port for SVC2
# HEALTH_PATH HTTP path for health check
# NGINX_CONF path to nginx site config
# COMPOSE_DIR directory with docker-compose.yml and .env
#
# The runner user needs passwordless sudo for nginx:
# echo "runner ALL=(ALL) NOPASSWD: /usr/sbin/nginx, /bin/systemctl reload nginx" \
# | sudo tee /etc/sudoers.d/runner-nginx
set -euo pipefail
TAG="${1:?Usage: deploy.sh <image-tag> (e.g. deploy.sh a1b2c3d)}"
IMAGE_NAME="${IMAGE_NAME:-gatehouse-api}"
NGINX_CONF="${NGINX_CONF:-/etc/nginx/conf.d/gatehouse-api.conf}"
COMPOSE_DIR="${COMPOSE_DIR:-/opt/gatehouse-api}"
SVC1="${SVC1:-api1}"
SVC2="${SVC2:-api2}"
SVC1_PORT="${SVC1_PORT:-5000}"
SVC2_PORT="${SVC2_PORT:-5001}"
HEALTH_PATH="${HEALTH_PATH:-/api/health}"
HEALTH_RETRIES=18 # 18 × 5 s = 90 s max
HEALTH_INTERVAL=5
# ── helpers ───────────────────────────────────────────────────────────────────
log() { echo "[$(date '+%H:%M:%S')] $*"; }
die() { log "ERROR: $*" >&2; exit 1; }
step() { log ""; log "── $* ──"; }
health_check() {
local port=$1 label=$2 attempt=0
log "Waiting for ${label} on :${port}${HEALTH_PATH} ..."
while (( attempt < HEALTH_RETRIES )); do
if curl -sf "http://127.0.0.1:${port}${HEALTH_PATH}" -o /dev/null; then
log "${label} healthy"
return 0
fi
attempt=$(( attempt + 1 ))
log " [${attempt}/${HEALTH_RETRIES}] not ready, retrying in ${HEALTH_INTERVAL}s"
sleep "${HEALTH_INTERVAL}"
done
log "ERROR: ${label} failed health check after $((HEALTH_RETRIES * HEALTH_INTERVAL))s"
return 1
}
get_service_tag() {
docker compose ps -q "$1" 2>/dev/null \
| xargs -r docker inspect --format '{{.Config.Image}}' 2>/dev/null \
| cut -d: -f2
}
rollback() {
local service=$1 port=$2 old_tag=$3
if [[ -z "${old_tag}" ]]; then
nginx_restore "${port}"
die "Deploy aborted — ${service} failed health check, no previous tag to roll back to"
fi
log "Rolling back ${service} to ${old_tag}..."
IMAGE_TAG="${old_tag}" docker compose up -d --no-deps --force-recreate "${service}"
nginx_restore "${port}"
die "Deploy aborted — ${service} rolled back to ${old_tag}"
}
nginx_drain() {
local port=$1
sudo sed -i "s|server 127.0.0.1:${port};|server 127.0.0.1:${port} down;|" "$NGINX_CONF"
sudo nginx -t 2>&1 | tail -2
sudo nginx -s reload
log "nginx: drained :${port}"
}
nginx_restore() {
local port=$1
sudo sed -i "s|server 127.0.0.1:${port} down;|server 127.0.0.1:${port};|" "$NGINX_CONF"
sudo nginx -t 2>&1 | tail -2
sudo nginx -s reload
log "nginx: restored :${port}"
}
# ── pre-flight ────────────────────────────────────────────────────────────────
cd "${COMPOSE_DIR}"
pwd; ls -la
log "Deploying ${IMAGE_NAME}:${TAG}"
docker image inspect "${IMAGE_NAME}:${TAG}" > /dev/null 2>&1 \
|| die "Image ${IMAGE_NAME}:${TAG} not found locally — build it first."
# ── roll SVC1 ─────────────────────────────────────────────────────────────────
step "${SVC1}${TAG} (traffic: ${SVC2} only)"
old_svc1=$(get_service_tag "${SVC1}")
nginx_drain "${SVC1_PORT}"
log "Waiting 15s for in-flight requests to drain..."
sleep 15
IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate "${SVC1}"
health_check "${SVC1_PORT}" "${SVC1}" || rollback "${SVC1}" "${SVC1_PORT}" "${old_svc1}"
nginx_restore "${SVC1_PORT}"
# ── roll SVC2 ─────────────────────────────────────────────────────────────────
step "${SVC2}${TAG} (traffic: ${SVC1} only)"
old_svc2=$(get_service_tag "${SVC2}")
nginx_drain "${SVC2_PORT}"
log "Waiting 15s for in-flight requests to drain..."
sleep 15
IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate "${SVC2}"
health_check "${SVC2_PORT}" "${SVC2}" || rollback "${SVC2}" "${SVC2_PORT}" "${old_svc2}"
nginx_restore "${SVC2_PORT}"
# ── job workers ───────────────────────────────────────────────────────────────
step "job workers → ${TAG}"
IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate zerotier-reconciler mfa-compliance
# ── done ──────────────────────────────────────────────────────────────────────
log ""
log "Deploy complete ✓ ${IMAGE_NAME}:${TAG}"