ci + ansible

This commit is contained in:
sangnn
2026-06-20 11:06:27 +07:00
parent a6d74d9316
commit 966578ed58
15 changed files with 637 additions and 30 deletions
+8
View File
@@ -144,3 +144,11 @@ ZEROTIER_API_URL=
# OIDC_RATE_LIMIT_AUTHORIZE=10/minute
# OIDC_RATE_LIMIT_TOKEN=20/minute
# OIDC_RATE_LIMIT_USERINFO=60/minute
# ═════════════════════════════════════════════════════════════════════════════
# GitHub Actions self-hosted runners (deploy/ansible/install-runner.yml)
# Per-project registration tokens. Short-lived (~1h) — mint fresh from each repo's
# Settings > Actions > Runners > New self-hosted runner right before running the playbook.
# ═════════════════════════════════════════════════════════════════════════════
# RUNNER_TOKEN_GATEHOUSE_API=
# RUNNER_TOKEN_GATEHOUSE_UI=
+54
View File
@@ -0,0 +1,54 @@
name: PR -> develop
on:
pull_request:
branches:
- main
- develop
env:
GITLEAKS_VERSION: "8.30.1"
jobs:
# ── 1. Secret scan ────────────────────────────────────────────────────────────
gitleaks:
name: Scan for secrets (Gitleaks)
runs-on: stage-secuird-runner
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install Gitleaks
run: |
curl -sSfL \
"https://github.com/gitleaks/gitleaks/releases/download/v${GITLEAKS_VERSION}/gitleaks_${GITLEAKS_VERSION}_linux_x64.tar.gz" \
| tar xz gitleaks
mv gitleaks /usr/local/bin/gitleaks
- name: Run secret scan
run: gitleaks detect --source . --exit-code 1 --redact --verbose --log-level debug
# ── 2. CVE scan ───────────────────────────────────────────────────────────────
trivy:
name: Scan for CVEs (Trivy)
runs-on: stage-secuird-runner
steps:
- uses: actions/checkout@v4
- name: Install Trivy
run: |
command -v trivy >/dev/null 2>&1 || \
curl -sSfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh \
| sh -s -- -b /usr/local/bin
- name: Run filesystem scan
run: |
trivy fs \
--exit-code 1 \
--severity HIGH,CRITICAL \
--no-progress \
.
+82
View File
@@ -0,0 +1,82 @@
name: Push -> develop
on:
push:
branches:
- develop
- ci/deploy
jobs:
# ── 1. Build ──────────────────────────────────────────────────────────────────
build:
name: Build Docker images
runs-on: stage-secuird-runner
outputs:
tag: ${{ steps.sha.outputs.tag }}
steps:
- uses: actions/checkout@v4
- name: Set image tag
id: sha
run: echo "tag=$(git rev-parse --short HEAD)" >> "$GITHUB_OUTPUT"
- name: Build api image
run: |
docker build \
-t "gatehouse-api:${{ steps.sha.outputs.tag }}" \
-t "gatehouse-api:latest" \
.
- name: Build job image
run: |
docker build \
-f Dockerfile.job \
-t "gatehouse-api-job:${{ steps.sha.outputs.tag }}" \
-t "gatehouse-api-job:latest" \
.
- name: Scan api image for vulnerabilities (Trivy)
run: |
command -v trivy >/dev/null 2>&1 || \
curl -sSfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh \
| sh -s -- -b /usr/local/bin
trivy image \
--exit-code 0 \
--severity HIGH,CRITICAL \
--no-progress \
"gatehouse-api:${{ steps.sha.outputs.tag }}"
# ── 2. Deploy ─────────────────────────────────────────────────────────────────
deploy:
name: Rolling deploy
runs-on: stage-secuird-runner
needs: build
env:
COMPOSE_DIR: /home/ubuntu/secuird/gatehouse-api
steps:
- uses: actions/checkout@v4
- name: Deploy (rolling restart)
run: |
cp docker-compose.yml "${COMPOSE_DIR}/docker-compose.yml"
bash deploy/deploy.sh "${{ needs.build.outputs.tag }}"
# ── 3. Alert ──────────────────────────────────────────────────────────────────
alert:
name: Notify on result
runs-on: stage-secuird-runner
needs: deploy
if: always()
steps:
- name: Send notification
run: |
STATUS="${{ needs.deploy.result }}"
echo "TODO: send alert — deploy status: ${STATUS}"
# curl -X POST "${{ secrets.ALERT_WEBHOOK }}" \
# -H 'Content-Type: application/json' \
# -d "{\"text\": \"[gatehouse-api] Deploy ${STATUS} — tag: ${{ needs.build.outputs.tag }}\"}"
+81
View File
@@ -0,0 +1,81 @@
name: Push -> main
on:
push:
branches:
- main
jobs:
# ── 1. Build ──────────────────────────────────────────────────────────────────
build:
name: Build Docker images
runs-on: prod-secuird-runner
outputs:
tag: ${{ steps.sha.outputs.tag }}
steps:
- uses: actions/checkout@v4
- name: Set image tag
id: sha
run: echo "tag=$(git rev-parse --short HEAD)" >> "$GITHUB_OUTPUT"
- name: Build api image
run: |
docker build \
-t "gatehouse-api:${{ steps.sha.outputs.tag }}" \
-t "gatehouse-api:latest" \
.
- name: Build job image
run: |
docker build \
-f Dockerfile.job \
-t "gatehouse-api-job:${{ steps.sha.outputs.tag }}" \
-t "gatehouse-api-job:latest" \
.
- name: Scan api image for vulnerabilities (Trivy)
run: |
command -v trivy >/dev/null 2>&1 || \
curl -sSfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh \
| sh -s -- -b /usr/local/bin
trivy image \
--exit-code 0 \
--severity HIGH,CRITICAL \
--no-progress \
"gatehouse-api:${{ steps.sha.outputs.tag }}"
# ── 2. Deploy ─────────────────────────────────────────────────────────────────
deploy:
name: Rolling deploy
runs-on: prod-secuird-runner
needs: build
env:
COMPOSE_DIR: /home/ubuntu/secuird/gatehouse-api # TODO: confirm prod path
steps:
- uses: actions/checkout@v4
- name: Deploy (rolling restart)
run: |
cp docker-compose.yml "${COMPOSE_DIR}/docker-compose.yml"
bash deploy/deploy.sh "${{ needs.build.outputs.tag }}"
# ── 3. Alert ──────────────────────────────────────────────────────────────────
alert:
name: Notify on result
runs-on: prod-secuird-runner
needs: deploy
if: always()
steps:
- name: Send notification
run: |
STATUS="${{ needs.deploy.result }}"
echo "TODO: send alert — deploy status: ${STATUS}"
# curl -X POST "${{ secrets.ALERT_WEBHOOK }}" \
# -H 'Content-Type: application/json' \
# -d "{\"text\": \"[gatehouse-api] Deploy ${STATUS} — tag: ${{ needs.build.outputs.tag }}\"}"
+70
View File
@@ -0,0 +1,70 @@
# GitHub Actions self-hosted runners — Ansible
Provisions self-hosted runners as **systemd services**. One host can run runners for
multiple projects (e.g. `gatehouse-api` + `gatehouse-ui`); each project gets its own
runner name, install dir, label, repo URL, and registration token.
## Layout
```
ansible.cfg inventory + ssh defaults
inventory.ini stage / prod host groups (set ansible_host)
group_vars/all.yml pinned runner version + sha256, paths, env_file
host_vars/<host>.yml runner_env + per-project `runners` matrix
tasks/install_project.yml reads token, expands count -> N instances
tasks/install_one_runner.yml download/verify/extract/register/service one runner
install-runner.yml the playbook
```
## Runner naming
- name: `<host>-<project>-<env>-<N>` e.g. `stage01-gatehouse-api-stage-1`
- dir: `/home/github-runner/actions-runner-<project>-<N>`
- label: `project_spec.label` (e.g. `stage-secuird-runner`, `stage-gatehouse-ui`)
- service: `actions.runner.<owner>-<repo>.<name>.service`
## Tokens
Registration tokens are **per-repo and expire ~1h after minting**. Set one key per
project in the repo `.env` (control node), referenced by `token_env` in `host_vars`:
```
RUNNER_TOKEN_GATEHOUSE_API=AXXX...
RUNNER_TOKEN_GATEHOUSE_UI=AYYY...
```
Mint each from the repo's **Settings > Actions > Runners > New self-hosted runner**
(the value after `--token`), then run the playbook within the hour.
## Run
```bash
cd deploy/ansible
# 1. Edit inventory.ini (set ansible_host / ansible_user) and host_vars/*.yml.
# 2. Refresh RUNNER_TOKEN_* in ../../.env.
ansible-playbook --syntax-check install-runner.yml
ansible-playbook install-runner.yml --check --diff --limit stage # dry run
ansible-playbook install-runner.yml --limit stage # apply
```
## Verify
```bash
# on the host
systemctl list-units 'actions.runner.*'
ls /home/github-runner/ # actions-runner-gatehouse-api-1, -2, ...
```
Each repo's Settings > Actions > Runners should list the runners as **Idle** with the
expected label. Idempotent: re-running skips already-configured runners (`--replace`
re-registers safely; `creates:` guards downloads/extract/config).
## Notes
- Bump `runner_version` + `runner_sha256` together in `group_vars/all.yml`
(sha256 from the GitHub release page).
- `deploy/deploy.sh` needs the runner user to have passwordless sudo for nginx reload —
add that sudoers drop-in separately (not yet automated here).
- `gatehouse-ui` repo URL + label in host_vars are placeholders — confirm before first run.
+9
View File
@@ -0,0 +1,9 @@
[defaults]
inventory = inventory.ini
host_key_checking = False
retry_files_enabled = False
stdout_callback = yaml
interpreter_python = auto_silent
[ssh_connection]
pipelining = True
+14
View File
@@ -0,0 +1,14 @@
---
# Shared constants for all runner hosts.
runner_user: github-runner
runner_home: /home/github-runner
# Pinned runner release. Bump version + sha256 together.
# sha256 from the GitHub release page for actions-runner-linux-x64-<version>.tar.gz
runner_version: "2.335.1"
runner_sha256: "4ef2f25285f0ae4477f1fe1e346db76d2f3ebf03824e2ddd1973a2819bf6c8cf"
runner_tarball: "actions-runner-linux-x64-{{ runner_version }}.tar.gz"
runner_download_url: "https://github.com/actions/runner/releases/download/v{{ runner_version }}/{{ runner_tarball }}"
# Repo .env on the control node holding per-project RUNNER_TOKEN_<PROJECT> keys.
env_file: "{{ playbook_dir }}/../../.env"
+15
View File
@@ -0,0 +1,15 @@
---
runner_env: prod
runners:
- project: gatehouse-api
url: https://github.com/CoryHawkless/gatehouse-api
label: prod-secuird-runner # matches runs-on: in push-main.yml
token_env: RUNNER_TOKEN_GATEHOUSE_API
count: 1
- project: gatehouse-ui
url: https://github.com/CoryHawkless/gatehouse-ui # TODO: confirm UI repo URL
label: prod-gatehouse-ui # TODO: confirm UI workflow runs-on label
token_env: RUNNER_TOKEN_GATEHOUSE_UI
count: 1
+17
View File
@@ -0,0 +1,17 @@
---
runner_env: stage
# One entry per project. A host runs runners for every project listed.
# token_env = key read from .env on the control node (registration token, ~1h TTL).
runners:
- project: gatehouse-api
url: https://github.com/CoryHawkless/gatehouse-api
label: stage-secuird-runner # matches runs-on: in this repo's workflows
token_env: RUNNER_TOKEN_GATEHOUSE_API
count: 1
- project: gatehouse-ui
url: https://github.com/CoryHawkless/gatehouse-ui # TODO: confirm UI repo URL
label: stage-gatehouse-ui # TODO: confirm UI workflow runs-on label
token_env: RUNNER_TOKEN_GATEHOUSE_UI
count: 1
+28
View File
@@ -0,0 +1,28 @@
---
- name: Install GitHub Actions self-hosted runners
hosts: all
become: true
pre_tasks:
- name: Assert host defines a runners matrix
ansible.builtin.assert:
that:
- runners is defined
- runners | length > 0
- runner_env is defined
fail_msg: "Host {{ inventory_hostname }} is missing host_vars (runners / runner_env)."
tasks:
- name: Ensure runner service user exists
ansible.builtin.user:
name: "{{ runner_user }}"
shell: /bin/bash
create_home: true
home: "{{ runner_home }}"
- name: Install runners for each project
ansible.builtin.include_tasks: tasks/install_project.yml
loop: "{{ runners }}"
loop_control:
loop_var: project_spec
label: "{{ project_spec.project }}"
+9
View File
@@ -0,0 +1,9 @@
# Self-hosted GitHub Actions runner hosts.
# Per-project runner matrix lives in host_vars/<host>.yml.
# Set ansible_host (and ansible_user if not root) to real values.
[stage]
stage01 ansible_host=CHANGE_ME ansible_user=ubuntu
[prod]
prod01 ansible_host=CHANGE_ME ansible_user=ubuntu
@@ -0,0 +1,67 @@
---
# Installs + registers + services a single runner instance.
# Inputs: project_spec (dict), idx (int), project_token (str).
- name: Set per-runner facts
ansible.builtin.set_fact:
runner_name: "{{ inventory_hostname }}-{{ project_spec.project }}-{{ runner_env }}-{{ idx }}"
runner_dir: "{{ runner_home }}/actions-runner-{{ project_spec.project }}-{{ idx }}"
- name: "Create runner dir {{ runner_dir }}"
ansible.builtin.file:
path: "{{ runner_dir }}"
state: directory
owner: "{{ runner_user }}"
group: "{{ runner_user }}"
mode: "0755"
- name: Download runner tarball (sha256 verified)
ansible.builtin.get_url:
url: "{{ runner_download_url }}"
dest: "{{ runner_dir }}/{{ runner_tarball }}"
checksum: "sha256:{{ runner_sha256 }}"
owner: "{{ runner_user }}"
group: "{{ runner_user }}"
mode: "0644"
- name: Extract runner
ansible.builtin.unarchive:
src: "{{ runner_dir }}/{{ runner_tarball }}"
dest: "{{ runner_dir }}"
remote_src: true
owner: "{{ runner_user }}"
group: "{{ runner_user }}"
creates: "{{ runner_dir }}/config.sh"
- name: "Register runner {{ runner_name }}"
ansible.builtin.command:
cmd: >-
./config.sh --unattended
--url {{ project_spec.url }}
--token {{ project_token }}
--name {{ runner_name }}
--labels {{ project_spec.label }}
--work _work
--replace
chdir: "{{ runner_dir }}"
creates: "{{ runner_dir }}/.runner"
become_user: "{{ runner_user }}"
- name: "Check if service installed for {{ runner_name }}"
ansible.builtin.find:
paths: "{{ runner_dir }}"
patterns: ".service"
hidden: true
register: runner_svc_marker
- name: "Install systemd service for {{ runner_name }}"
ansible.builtin.command:
cmd: "./svc.sh install {{ runner_user }}"
chdir: "{{ runner_dir }}"
when: runner_svc_marker.matched == 0
- name: "Start + enable service for {{ runner_name }}"
ansible.builtin.command:
cmd: "./svc.sh start"
chdir: "{{ runner_dir }}"
register: svc_start
changed_when: "'active (running)' not in svc_start.stdout"
+22
View File
@@ -0,0 +1,22 @@
---
# Expands one project entry into `count` runner instances.
- name: "Read registration token for {{ project_spec.project }} from .env"
ansible.builtin.set_fact:
project_token: >-
{{ lookup('ansible.builtin.ini', project_spec.token_env,
file=env_file, type='properties') }}
- name: "Fail if token missing for {{ project_spec.project }}"
ansible.builtin.assert:
that:
- project_token | length > 0
fail_msg: >-
{{ project_spec.token_env }} not found in {{ env_file }}.
Mint a fresh registration token (Settings > Actions > Runners) and set it.
- name: "Install {{ project_spec.count }} runner(s) for {{ project_spec.project }}"
ansible.builtin.include_tasks: install_one_runner.yml
loop: "{{ range(1, project_spec.count | int + 1) | list }}"
loop_control:
loop_var: idx
label: "{{ project_spec.project }}-{{ idx }}"
+134
View File
@@ -0,0 +1,134 @@
#!/usr/bin/env bash
# Rolling deploy: drains each api container from nginx before replacing it,
# so users see zero downtime. Job workers are restarted last without draining.
#
# Usage:
# ./deploy.sh <image-tag> e.g. ./deploy.sh a1b2c3d
#
# Overridable via env vars (defaults below are gatehouse-api):
# IMAGE_NAME docker image name
# SVC1/SVC2 compose service names
# SVC1_PORT host port for SVC1
# SVC2_PORT host port for SVC2
# HEALTH_PATH HTTP path for health check
# NGINX_CONF path to nginx site config
# COMPOSE_DIR directory with docker-compose.yml and .env
#
# The runner user needs passwordless sudo for nginx:
# echo "runner ALL=(ALL) NOPASSWD: /usr/sbin/nginx, /bin/systemctl reload nginx" \
# | sudo tee /etc/sudoers.d/runner-nginx
set -euo pipefail
TAG="${1:?Usage: deploy.sh <image-tag> (e.g. deploy.sh a1b2c3d)}"
IMAGE_NAME="${IMAGE_NAME:-gatehouse-api}"
NGINX_CONF="${NGINX_CONF:-/etc/nginx/conf.d/gatehouse-api.conf}"
COMPOSE_DIR="${COMPOSE_DIR:-/opt/gatehouse-api}"
SVC1="${SVC1:-api1}"
SVC2="${SVC2:-api2}"
SVC1_PORT="${SVC1_PORT:-5000}"
SVC2_PORT="${SVC2_PORT:-5001}"
HEALTH_PATH="${HEALTH_PATH:-/api/health}"
HEALTH_RETRIES=18 # 18 × 5 s = 90 s max
HEALTH_INTERVAL=5
# ── helpers ───────────────────────────────────────────────────────────────────
log() { echo "[$(date '+%H:%M:%S')] $*"; }
die() { log "ERROR: $*" >&2; exit 1; }
step() { log ""; log "── $* ──"; }
health_check() {
local port=$1 label=$2 attempt=0
log "Waiting for ${label} on :${port}${HEALTH_PATH} ..."
while (( attempt < HEALTH_RETRIES )); do
if curl -sf "http://127.0.0.1:${port}${HEALTH_PATH}" -o /dev/null; then
log "${label} healthy"
return 0
fi
attempt=$(( attempt + 1 ))
log " [${attempt}/${HEALTH_RETRIES}] not ready, retrying in ${HEALTH_INTERVAL}s"
sleep "${HEALTH_INTERVAL}"
done
log "ERROR: ${label} failed health check after $((HEALTH_RETRIES * HEALTH_INTERVAL))s"
return 1
}
get_service_tag() {
docker compose ps -q "$1" 2>/dev/null \
| xargs -r docker inspect --format '{{.Config.Image}}' 2>/dev/null \
| cut -d: -f2
}
rollback() {
local service=$1 port=$2 old_tag=$3
if [[ -z "${old_tag}" ]]; then
nginx_restore "${port}"
die "Deploy aborted — ${service} failed health check, no previous tag to roll back to"
fi
log "Rolling back ${service} to ${old_tag}..."
IMAGE_TAG="${old_tag}" docker compose up -d --no-deps --force-recreate "${service}"
nginx_restore "${port}"
die "Deploy aborted — ${service} rolled back to ${old_tag}"
}
nginx_drain() {
local port=$1
sudo sed -i "s|server 127.0.0.1:${port};|server 127.0.0.1:${port} down;|" "$NGINX_CONF"
sudo nginx -t 2>&1 | tail -2
sudo nginx -s reload
log "nginx: drained :${port}"
}
nginx_restore() {
local port=$1
sudo sed -i "s|server 127.0.0.1:${port} down;|server 127.0.0.1:${port};|" "$NGINX_CONF"
sudo nginx -t 2>&1 | tail -2
sudo nginx -s reload
log "nginx: restored :${port}"
}
# ── pre-flight ────────────────────────────────────────────────────────────────
cd "${COMPOSE_DIR}"
pwd; ls -la
log "Deploying ${IMAGE_NAME}:${TAG}"
docker image inspect "${IMAGE_NAME}:${TAG}" > /dev/null 2>&1 \
|| die "Image ${IMAGE_NAME}:${TAG} not found locally — build it first."
# ── roll SVC1 ─────────────────────────────────────────────────────────────────
step "${SVC1}${TAG} (traffic: ${SVC2} only)"
old_svc1=$(get_service_tag "${SVC1}")
nginx_drain "${SVC1_PORT}"
log "Waiting 15s for in-flight requests to drain..."
sleep 15
IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate "${SVC1}"
health_check "${SVC1_PORT}" "${SVC1}" || rollback "${SVC1}" "${SVC1_PORT}" "${old_svc1}"
nginx_restore "${SVC1_PORT}"
# ── roll SVC2 ─────────────────────────────────────────────────────────────────
step "${SVC2}${TAG} (traffic: ${SVC1} only)"
old_svc2=$(get_service_tag "${SVC2}")
nginx_drain "${SVC2_PORT}"
log "Waiting 15s for in-flight requests to drain..."
sleep 15
IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate "${SVC2}"
health_check "${SVC2_PORT}" "${SVC2}" || rollback "${SVC2}" "${SVC2_PORT}" "${old_svc2}"
nginx_restore "${SVC2_PORT}"
# ── job workers ───────────────────────────────────────────────────────────────
step "job workers → ${TAG}"
IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate zerotier-reconciler mfa-compliance
# ── done ──────────────────────────────────────────────────────────────────────
log ""
log "Deploy complete ✓ ${IMAGE_NAME}:${TAG}"
+27 -30
View File
@@ -1,14 +1,12 @@
version: '3.8'
services:
api:
build:
context: .
dockerfile: Dockerfile
api1:
image: gatehouse-api:${IMAGE_TAG:-latest}
env_file:
- .env
ports:
- "${API_PORT:-5000}:5000"
- "127.0.0.1:5000:5000"
depends_on:
db:
condition: service_healthy
@@ -18,7 +16,28 @@ services:
- authy2-network
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:5000/api/health"]
test: ["CMD", "curl", "-f", "http://127.0.0.1:5000/api/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
api2:
image: gatehouse-api:${IMAGE_TAG:-latest}
env_file:
- .env
ports:
- "127.0.0.1:5001:5000"
depends_on:
db:
condition: service_healthy
redis:
condition: service_healthy
networks:
- authy2-network
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://127.0.0.1:5000/api/health"]
interval: 30s
timeout: 10s
retries: 3
@@ -60,28 +79,8 @@ services:
ports:
- "6379:6379"
nginx:
image: nginx:1.27-alpine
volumes:
- ./docker/nginx.conf:/etc/nginx/nginx.conf:ro
ports:
- "${HTTP_PORT:-80}:80"
- "${HTTPS_PORT:-443}:443"
depends_on:
- api
networks:
- authy2-network
restart: unless-stopped
healthcheck:
test: ["CMD", "nginx", "-t"]
interval: 30s
timeout: 10s
retries: 3
zerotier-reconciler:
build:
context: .
dockerfile: Dockerfile.job
image: gatehouse-api-job:${IMAGE_TAG:-latest}
env_file:
- .env
environment:
@@ -97,9 +96,7 @@ services:
restart: unless-stopped
mfa-compliance:
build:
context: .
dockerfile: Dockerfile.job
image: gatehouse-api-job:${IMAGE_TAG:-latest}
env_file:
- .env
environment: