1 Commits

Author SHA1 Message Date
sangnn a3b230e65d ci: add ansible and CICD deployment
PR -> develop / Scan for secrets (Gitleaks) (pull_request) Failing after 4s
PR -> develop / Scan for CVEs (Trivy) (pull_request) Successful in 2s
2026-06-23 07:16:42 +00:00
24 changed files with 1077 additions and 31 deletions
+8
View File
@@ -144,3 +144,11 @@ ZEROTIER_API_URL=
# OIDC_RATE_LIMIT_AUTHORIZE=10/minute
# OIDC_RATE_LIMIT_TOKEN=20/minute
# OIDC_RATE_LIMIT_USERINFO=60/minute
# ═════════════════════════════════════════════════════════════════════════════
# GitHub Actions self-hosted runners (deploy/ansible/install-runner.yml)
# Per-project registration tokens. Short-lived (~1h) — mint fresh from each repo's
# Settings > Actions > Runners > New self-hosted runner. The Ansible playbook reads
# these as EXPORTED ENV VARS on the control node, not from this file:
# export RUNNER_TOKEN_GATEHOUSE_API=xxx
# export RUNNER_TOKEN_GATEHOUSE_UI=yyy
+58
View File
@@ -0,0 +1,58 @@
name: PR -> develop
on:
pull_request:
branches:
- main
- develop
env:
GITLEAKS_VERSION: "8.30.1"
jobs:
# ── 1. Secret scan ────────────────────────────────────────────────────────────
gitleaks:
name: Scan for secrets (Gitleaks)
runs-on: stage-gatehouse-api
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install Gitleaks
run: |
if command -v gitleaks >/dev/null 2>&1; then
echo "gitleaks already installed: $(gitleaks version)"
exit 0
fi
curl -sSfL \
"https://github.com/gitleaks/gitleaks/releases/download/v${GITLEAKS_VERSION}/gitleaks_${GITLEAKS_VERSION}_linux_x64.tar.gz" \
| tar xz gitleaks
mv gitleaks /usr/local/bin/gitleaks
- name: Run secret scan
run: gitleaks detect --source . --exit-code 1 --redact --verbose --log-level debug
# ── 2. CVE scan ───────────────────────────────────────────────────────────────
trivy:
name: Scan for CVEs (Trivy)
runs-on: stage-gatehouse-api
steps:
- uses: actions/checkout@v4
- name: Install Trivy
run: |
command -v trivy >/dev/null 2>&1 || \
curl -sSfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh \
| sh -s -- -b /usr/local/bin
- name: Run filesystem scan
run: |
trivy fs \
--exit-code 1 \
--severity HIGH,CRITICAL \
--no-progress \
.
+91
View File
@@ -0,0 +1,91 @@
name: Push -> develop
on:
push:
branches:
- develop
jobs:
# ── 1. Build ──────────────────────────────────────────────────────────────────
build:
name: Build Docker images
runs-on: stage-gatehouse-api
outputs:
tag: ${{ steps.sha.outputs.tag }}
steps:
- uses: actions/checkout@v4
- name: Set image tag
id: sha
run: echo "tag=$(git rev-parse --short HEAD)" >> "$GITHUB_OUTPUT"
- name: Build api image
run: |
docker build \
-t "gatehouse-api:${{ steps.sha.outputs.tag }}" \
-t "gatehouse-api:latest" \
.
- name: Build job image
run: |
docker build \
-f Dockerfile.job \
-t "gatehouse-api-job:${{ steps.sha.outputs.tag }}" \
-t "gatehouse-api-job:latest" \
.
- name: Scan api image for vulnerabilities (Trivy)
run: |
command -v trivy >/dev/null 2>&1 || \
curl -sSfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh \
| sh -s -- -b /usr/local/bin
trivy image \
--exit-code 0 \
--severity HIGH,CRITICAL \
--no-progress \
"gatehouse-api:${{ steps.sha.outputs.tag }}"
- name: Scan job image for vulnerabilities (Trivy)
run: |
trivy image \
--exit-code 0 \
--severity HIGH,CRITICAL \
--no-progress \
"gatehouse-api-job:${{ steps.sha.outputs.tag }}"
# ── 2. Deploy ─────────────────────────────────────────────────────────────────
deploy:
name: Deploy
runs-on: stage-gatehouse-api
needs: build
env:
COMPOSE_DIR: /opt/gatehouse-api
steps:
- uses: actions/checkout@v4
- name: Deploy (rolling, zero-downtime)
run: |
cp docker-compose.yml "${COMPOSE_DIR}/docker-compose.yml"
mkdir -p "${COMPOSE_DIR}/docker"
cp docker/nginx.conf "${COMPOSE_DIR}/docker/nginx.conf"
bash deploy/deploy.sh "${{ needs.build.outputs.tag }}"
# ── 3. Alert ──────────────────────────────────────────────────────────────────
alert:
name: Notify on result
runs-on: stage-gatehouse-api
needs: deploy
if: always()
steps:
- name: Send notification
run: |
STATUS="${{ needs.deploy.result }}"
echo "TODO: send alert — deploy status: ${STATUS}"
# curl -X POST "${{ secrets.ALERT_WEBHOOK }}" \
# -H 'Content-Type: application/json' \
# -d "{\"text\": \"[gatehouse-api] Deploy ${STATUS} — tag: ${{ needs.build.outputs.tag }}\"}"
+91
View File
@@ -0,0 +1,91 @@
name: Push -> main
on:
push:
branches:
- main
jobs:
# ── 1. Build ──────────────────────────────────────────────────────────────────
build:
name: Build Docker images
runs-on: prod-gatehouse-api
outputs:
tag: ${{ steps.sha.outputs.tag }}
steps:
- uses: actions/checkout@v4
- name: Set image tag
id: sha
run: echo "tag=$(git rev-parse --short HEAD)" >> "$GITHUB_OUTPUT"
- name: Build api image
run: |
docker build \
-t "gatehouse-api:${{ steps.sha.outputs.tag }}" \
-t "gatehouse-api:latest" \
.
- name: Build job image
run: |
docker build \
-f Dockerfile.job \
-t "gatehouse-api-job:${{ steps.sha.outputs.tag }}" \
-t "gatehouse-api-job:latest" \
.
- name: Scan api image for vulnerabilities (Trivy)
run: |
command -v trivy >/dev/null 2>&1 || \
curl -sSfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh \
| sh -s -- -b /usr/local/bin
trivy image \
--exit-code 0 \
--severity HIGH,CRITICAL \
--no-progress \
"gatehouse-api:${{ steps.sha.outputs.tag }}"
- name: Scan job image for vulnerabilities (Trivy)
run: |
trivy image \
--exit-code 0 \
--severity HIGH,CRITICAL \
--no-progress \
"gatehouse-api-job:${{ steps.sha.outputs.tag }}"
# ── 2. Deploy ─────────────────────────────────────────────────────────────────
deploy:
name: Deploy
runs-on: prod-gatehouse-api
needs: build
env:
COMPOSE_DIR: /opt/gatehouse-api
steps:
- uses: actions/checkout@v4
- name: Deploy (rolling, zero-downtime)
run: |
cp docker-compose.yml "${COMPOSE_DIR}/docker-compose.yml"
mkdir -p "${COMPOSE_DIR}/docker"
cp docker/nginx.conf "${COMPOSE_DIR}/docker/nginx.conf"
bash deploy/deploy.sh "${{ needs.build.outputs.tag }}"
# ── 3. Alert ──────────────────────────────────────────────────────────────────
alert:
name: Notify on result
runs-on: prod-gatehouse-api
needs: deploy
if: always()
steps:
- name: Send notification
run: |
STATUS="${{ needs.deploy.result }}"
echo "TODO: send alert — deploy status: ${STATUS}"
# curl -X POST "${{ secrets.ALERT_WEBHOOK }}" \
# -H 'Content-Type: application/json' \
# -d "{\"text\": \"[gatehouse-api] Deploy ${STATUS} — tag: ${{ needs.build.outputs.tag }}\"}"
+54
View File
@@ -0,0 +1,54 @@
name: PR -> develop
on:
pull_request:
branches:
- main
- develop
env:
GITLEAKS_VERSION: "8.30.1"
jobs:
# ── 1. Secret scan ────────────────────────────────────────────────────────────
gitleaks:
name: Scan for secrets (Gitleaks)
runs-on: stage-secuird-runner
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install Gitleaks
run: |
curl -sSfL \
"https://github.com/gitleaks/gitleaks/releases/download/v${GITLEAKS_VERSION}/gitleaks_${GITLEAKS_VERSION}_linux_x64.tar.gz" \
| tar xz gitleaks
mv gitleaks /usr/local/bin/gitleaks
- name: Run secret scan
run: gitleaks detect --source . --exit-code 1 --redact --verbose --log-level debug
# ── 2. CVE scan ───────────────────────────────────────────────────────────────
trivy:
name: Scan for CVEs (Trivy)
runs-on: stage-secuird-runner
steps:
- uses: actions/checkout@v4
- name: Install Trivy
run: |
command -v trivy >/dev/null 2>&1 || \
curl -sSfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh \
| sh -s -- -b /usr/local/bin
- name: Run filesystem scan
run: |
trivy fs \
--exit-code 1 \
--severity HIGH,CRITICAL \
--no-progress \
.
+82
View File
@@ -0,0 +1,82 @@
name: Push -> develop
on:
push:
branches:
- develop
- ci/deploy
jobs:
# ── 1. Build ──────────────────────────────────────────────────────────────────
build:
name: Build Docker images
runs-on: stage-secuird-runner
outputs:
tag: ${{ steps.sha.outputs.tag }}
steps:
- uses: actions/checkout@v4
- name: Set image tag
id: sha
run: echo "tag=$(git rev-parse --short HEAD)" >> "$GITHUB_OUTPUT"
- name: Build api image
run: |
docker build \
-t "gatehouse-api:${{ steps.sha.outputs.tag }}" \
-t "gatehouse-api:latest" \
.
- name: Build job image
run: |
docker build \
-f Dockerfile.job \
-t "gatehouse-api-job:${{ steps.sha.outputs.tag }}" \
-t "gatehouse-api-job:latest" \
.
- name: Scan api image for vulnerabilities (Trivy)
run: |
command -v trivy >/dev/null 2>&1 || \
curl -sSfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh \
| sh -s -- -b /usr/local/bin
trivy image \
--exit-code 0 \
--severity HIGH,CRITICAL \
--no-progress \
"gatehouse-api:${{ steps.sha.outputs.tag }}"
# ── 2. Deploy ─────────────────────────────────────────────────────────────────
deploy:
name: Rolling deploy
runs-on: stage-secuird-runner
needs: build
env:
COMPOSE_DIR: /home/ubuntu/secuird/gatehouse-api
steps:
- uses: actions/checkout@v4
- name: Deploy (rolling restart)
run: |
cp docker-compose.yml "${COMPOSE_DIR}/docker-compose.yml"
bash deploy/deploy.sh "${{ needs.build.outputs.tag }}"
# ── 3. Alert ──────────────────────────────────────────────────────────────────
alert:
name: Notify on result
runs-on: stage-secuird-runner
needs: deploy
if: always()
steps:
- name: Send notification
run: |
STATUS="${{ needs.deploy.result }}"
echo "TODO: send alert — deploy status: ${STATUS}"
# curl -X POST "${{ secrets.ALERT_WEBHOOK }}" \
# -H 'Content-Type: application/json' \
# -d "{\"text\": \"[gatehouse-api] Deploy ${STATUS} — tag: ${{ needs.build.outputs.tag }}\"}"
+81
View File
@@ -0,0 +1,81 @@
name: Push -> main
on:
push:
branches:
- main
jobs:
# ── 1. Build ──────────────────────────────────────────────────────────────────
build:
name: Build Docker images
runs-on: prod-secuird-runner
outputs:
tag: ${{ steps.sha.outputs.tag }}
steps:
- uses: actions/checkout@v4
- name: Set image tag
id: sha
run: echo "tag=$(git rev-parse --short HEAD)" >> "$GITHUB_OUTPUT"
- name: Build api image
run: |
docker build \
-t "gatehouse-api:${{ steps.sha.outputs.tag }}" \
-t "gatehouse-api:latest" \
.
- name: Build job image
run: |
docker build \
-f Dockerfile.job \
-t "gatehouse-api-job:${{ steps.sha.outputs.tag }}" \
-t "gatehouse-api-job:latest" \
.
- name: Scan api image for vulnerabilities (Trivy)
run: |
command -v trivy >/dev/null 2>&1 || \
curl -sSfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh \
| sh -s -- -b /usr/local/bin
trivy image \
--exit-code 0 \
--severity HIGH,CRITICAL \
--no-progress \
"gatehouse-api:${{ steps.sha.outputs.tag }}"
# ── 2. Deploy ─────────────────────────────────────────────────────────────────
deploy:
name: Rolling deploy
runs-on: prod-secuird-runner
needs: build
env:
COMPOSE_DIR: /home/ubuntu/secuird/gatehouse-api # TODO: confirm prod path
steps:
- uses: actions/checkout@v4
- name: Deploy (rolling restart)
run: |
cp docker-compose.yml "${COMPOSE_DIR}/docker-compose.yml"
bash deploy/deploy.sh "${{ needs.build.outputs.tag }}"
# ── 3. Alert ──────────────────────────────────────────────────────────────────
alert:
name: Notify on result
runs-on: prod-secuird-runner
needs: deploy
if: always()
steps:
- name: Send notification
run: |
STATUS="${{ needs.deploy.result }}"
echo "TODO: send alert — deploy status: ${STATUS}"
# curl -X POST "${{ secrets.ALERT_WEBHOOK }}" \
# -H 'Content-Type: application/json' \
# -d "{\"text\": \"[gatehouse-api] Deploy ${STATUS} — tag: ${{ needs.build.outputs.tag }}\"}"
+17 -5
View File
@@ -19,19 +19,31 @@ COPY requirements/base.txt requirements/base.txt
COPY requirements/production.txt requirements/production.txt
# Install dependencies
RUN pip install --no-cache-dir --upgrade pip wheel && \
# Upgrade build tooling too: clears CVE-2026-24049 (wheel) and CVE-2026-23949 (jaraco.context)
RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
pip install --no-cache-dir -r requirements/production.txt
# Production stage
FROM python:3.11-slim
# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
# apt-get upgrade pulls patched openssl/openssh/etc. so the image isn't pinned to
# whatever was current when the base layer was published.
# curl intentionally omitted: it was only used by HEALTHCHECK (now a stdlib Python
# check), and dropping it removes libcurl4t64 + libssh2 and their unfixed CVEs.
# NOTE: openssh-client retained for SSH CA workflows; drop it too if nothing shells
# out to ssh/scp (sshkey-tools signing is pure Python).
RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
libpq5 \
curl \
openssh-client \
&& rm -rf /var/lib/apt/lists/*
# Patch the base image's system-level build tooling that Trivy flags in
# /usr/local site-packages: wheel (CVE-2026-24049) and the jaraco.context
# (CVE-2026-23949) vendored by setuptools. Runs against system pip before the
# venv takes over PATH below.
RUN pip install --no-cache-dir --upgrade pip setuptools wheel
# Create non-root user
RUN groupadd --gid 1000 appgroup && \
useradd --uid 1000 --gid appgroup --shell /bin/bash --create-home appuser
@@ -53,9 +65,9 @@ USER appuser
# Expose port
EXPOSE 5000
# Health check
# Health check (stdlib urllib — avoids shipping curl)
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD curl -f http://localhost:5000/api/health || exit 1
CMD ["python", "-c", "import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:5000/api/health', timeout=5).getcode()==200 else 1)"]
# Run gunicorn with gevent workers
CMD ["gunicorn", "--bind", "0.0.0.0:5000", \
+6 -2
View File
@@ -12,15 +12,19 @@ WORKDIR /app
COPY requirements/base.txt requirements/base.txt
COPY requirements/production.txt requirements/production.txt
RUN pip install --no-cache-dir --upgrade pip wheel && \
RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
pip install --no-cache-dir -r requirements/production.txt
FROM python:3.11-slim
RUN apt-get update && apt-get install -y --no-install-recommends \
RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
libpq5 \
&& rm -rf /var/lib/apt/lists/*
# Patch system-level wheel (CVE-2026-24049) + setuptools-vendored jaraco.context
# (CVE-2026-23949) that Trivy flags in /usr/local site-packages.
RUN pip install --no-cache-dir --upgrade pip setuptools wheel
RUN groupadd --gid 1000 appgroup && \
useradd --uid 1000 --gid appgroup --shell /bin/bash --create-home appuser
+84
View File
@@ -0,0 +1,84 @@
# Gitea Actions self-hosted runners — Ansible
Provisions `act_runner` instances as **systemd services**. One host can run runners for
multiple projects (e.g. `gatehouse-api` + `gatehouse-ui`); each project gets its own
runner name, install dir, label set, repo URL, and registration token.
## Layout
```
ansible.cfg inventory + ssh defaults
inventory.ini stage / prod host groups (set ansible_host)
group_vars/all.yml pinned act_runner version + sha256, gitea_instance URL
host_vars/<host>.yml runner_env + per-project `runners` matrix
tasks/install_project.yml reads token, expands count -> N instances
tasks/install_one_runner.yml download/register/service one runner
install-runner.yml the playbook
```
## Runner naming
- name: `<host>-<project>-<env>-<N>` e.g. `stage01-gatehouse-api-stage-1`
- dir: `/home/gitea-runner/act-runner-<project>-<N>`
- labels: `project_spec.labels` (comma-separated, e.g. `self-hosted,linux,stage`)
- service: `gitea-runner-<project>-<N>.service`
## Before first run — pin act_runner version
1. Check [act_runner releases](https://gitea.com/gitea/act_runner/releases) for the latest stable version.
2. Download `act_runner-<version>-linux-amd64.sha256` from the release page.
3. Update `act_runner_version` and `act_runner_sha256` in `group_vars/all.yml`.
## Tokens
Registration tokens are **per-repo and expire ~1h after minting**. Exported as env
vars on the control node, named by each project's `token_env` in `host_vars`.
Mint each from the repo's **Settings → Actions → Runners → Create new runner token**,
then run the playbook within the hour.
## Run
```bash
cd deploy/ansible
# 1. Edit inventory.ini (set ansible_host / ansible_user) and host_vars/*.yml.
# 2. Fill in act_runner_version + act_runner_sha256 in group_vars/all.yml.
# 3. Export fresh tokens (leading space keeps them out of shell history):
export RUNNER_TOKEN_GATEHOUSE_API=xxx
export RUNNER_TOKEN_GATEHOUSE_UI=yyy
ansible-playbook --syntax-check install-runner.yml
ansible-playbook install-runner.yml --check --diff --limit stage # dry run
ansible-playbook install-runner.yml --limit stage # apply
```
## Verify
```bash
# on the host
systemctl list-units 'gitea-runner-*'
ls /home/gitea-runner/ # act-runner-gatehouse-api-1, ...
```
Each repo's **Settings → Actions → Runners** should list the runners as **Idle** with
the expected labels. Idempotent: re-running skips already-registered runners
(`creates: .runner` guards re-registration; binary download is checksum-gated).
## Migrating from GitHub Actions runners
Stop and remove the old runners on any existing host before re-running:
```bash
sudo su - github-runner -c "cd ~/actions-runner-*/; ./svc.sh stop && ./svc.sh uninstall"
sudo userdel -r github-runner # optional — removes home dir too
```
## Notes
- Bump `act_runner_version` + `act_runner_sha256` together in `group_vars/all.yml`.
- Labels in `host_vars/<host>.yml` must match the `runs-on:` values in workflow files.
- `deploy/deploy.sh` needs the runner user to have passwordless sudo for nginx reload —
add that sudoers drop-in separately (not yet automated here). Update the username from
`github-runner` to `gitea-runner` in that drop-in.
- `prod01.ansible_host` in `inventory.ini` is still `CHANGE_ME` — fill in before running prod.
+10
View File
@@ -0,0 +1,10 @@
[defaults]
inventory = inventory.ini
host_key_checking = False
retry_files_enabled = False
stdout_callback = default
result_format = yaml
interpreter_python = auto_silent
[ssh_connection]
pipelining = True
+40
View File
@@ -0,0 +1,40 @@
---
# Shared constants for all runner hosts.
runner_user: gitea-runner
runner_home: /home/gitea-runner
# Base dir for app deploy targets. Each project deploys to <app_base_dir>/<project>
# (e.g. /opt/gatehouse-api), owned by the runner so the deploy job can write the
# compose file there. Must match COMPOSE_DIR in the project's workflow files.
app_base_dir: /opt
gitea_instance: "https://source.hawkless.id.au"
# Pinned act_runner release. Bump version + sha256 together.
# Check latest: https://gitea.com/gitea/act_runner/releases
# sha256 for act_runner-<version>-linux-amd64 is on the release page (act_runner-<version>-linux-amd64.sha256).
act_runner_version: "1.0.8"
act_runner_sha256: "027d726127bb67e191d57052fdb66e74ec7f76966f790a18727147fa2b8005e5"
act_runner_binary: "gitea-runner-{{ act_runner_version }}-linux-amd64"
act_runner_download_url: "https://gitea.com/gitea/runner/releases/download/v{{ act_runner_version }}/{{ act_runner_binary }}"
# Node.js major version installed on the host executor. JS actions
# (actions/checkout@v4, etc.) are run with `node`; without it act_runner
# fails with "Cannot find: node in PATH". Bump this to change versions.
node_major_version: "26"
# Security scanners pre-installed on the host so workflow steps use the local
# binary instead of writing to /usr/local/bin as the runner user ("Permission
# denied"). gitleaks_version must match GITLEAKS_VERSION in pr-security-check.yml.
trivy_version: "0.71.2"
gitleaks_version: "8.30.1"
# Docker CLI plugins. The engine ships without them, so `docker compose` (used by
# deploy.sh) and `docker buildx` are missing — deploy.sh then fails with exit 125.
# Installed as pinned binaries into the host cli-plugins dir; bump to upgrade.
docker_compose_version: "5.1.4"
docker_buildx_version: "0.35.0"
# Registration tokens come from env vars named by each project's `token_env`
# (e.g. RUNNER_TOKEN_GATEHOUSE_API). Export them on the control node before running.
# Mint from: Gitea repo → Settings → Actions → Runners → Create new runner token.
+15
View File
@@ -0,0 +1,15 @@
---
runner_env: prod
runners:
- project: gatehouse-api
url: https://source.hawkless.id.au/coryHawkvelt/gatehouse-api
labels: "self-hosted:host,linux:host,prod:host,prod-gatehouse-api:host"
token_env: RUNNER_TOKEN_GATEHOUSE_API
count: 1
- project: gatehouse-ui
url: https://source.hawkless.id.au/coryHawkvelt/gatehouse-ui
labels: "self-hosted:host,linux:host,prod:host"
token_env: RUNNER_TOKEN_GATEHOUSE_UI
count: 1
+19
View File
@@ -0,0 +1,19 @@
---
runner_env: stage
# One entry per project. A host runs runners for every project listed.
# labels: comma-separated "<name>:<executor>" pairs. Use :host for native execution,
# :docker://<image> for Docker. Must match runs-on: values in workflow files.
# token_env: env var name on the control node holding a fresh registration token (~1h TTL).
runners:
- project: gatehouse-api
url: https://source.hawkless.id.au/coryHawkvelt/gatehouse-api
labels: "self-hosted:host,linux:host,stage:host,stage-gatehouse-api:host"
token_env: RUNNER_TOKEN_GATEHOUSE_API
count: 1
- project: gatehouse-ui
url: https://source.hawkless.id.au/coryHawkvelt/gatehouse-ui
labels: "self-hosted:host,linux:host,stage:host,stage-gatehouse-ui:host"
token_env: RUNNER_TOKEN_GATEHOUSE_UI
count: 1
+160
View File
@@ -0,0 +1,160 @@
---
- name: Install Gitea Actions self-hosted runners
hosts: all
become: true
pre_tasks:
- name: Assert host defines a runners matrix
ansible.builtin.assert:
that:
- runners is defined
- runners | length > 0
- runner_env is defined
fail_msg: "Host {{ inventory_hostname }} is missing host_vars (runners / runner_env)."
tasks:
- name: Ensure runner service user exists
ansible.builtin.user:
name: "{{ runner_user }}"
shell: /bin/bash
create_home: true
home: "{{ runner_home }}"
# JS actions (actions/checkout@v4, etc.) execute with `node` on the host
# executor. Without it act_runner fails: "Cannot find: node in PATH".
# git is needed by checkout for its fetch step.
- name: Ensure git is present
ansible.builtin.apt:
name: git
state: present
update_cache: true
- name: Install Node.js {{ node_major_version }}.x (NodeSource)
block:
# Key is ASCII-armored, so store it as .asc — apt reads .gpg as binary
# and .asc as armored; a mismatch fails repo signature verification.
- name: Add NodeSource apt key
ansible.builtin.get_url:
url: https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key
dest: /usr/share/keyrings/nodesource.asc
mode: "0644"
- name: Add NodeSource apt repo
ansible.builtin.apt_repository:
repo: "deb [signed-by=/usr/share/keyrings/nodesource.asc] https://deb.nodesource.com/node_{{ node_major_version }}.x nodistro main"
filename: nodesource
- name: Install nodejs
ansible.builtin.apt:
name: nodejs
state: present
update_cache: true
# Security scanners used by the CI workflows. Pre-installing them (as root)
# means the workflow steps find them on PATH and skip their runtime install,
# which would otherwise fail writing to /usr/local/bin as the runner user.
- name: Check installed Trivy version
ansible.builtin.command: trivy --version
register: trivy_check
changed_when: false
failed_when: false
- name: Install Trivy {{ trivy_version }}
ansible.builtin.shell: |
set -o pipefail
curl -sSfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh \
| sh -s -- -b /usr/local/bin v{{ trivy_version }}
args:
executable: /bin/bash
when: trivy_version not in (trivy_check.stdout | default(''))
- name: Check installed Gitleaks version
ansible.builtin.command: gitleaks version
register: gitleaks_check
changed_when: false
failed_when: false
- name: Install Gitleaks {{ gitleaks_version }}
ansible.builtin.unarchive:
src: "https://github.com/gitleaks/gitleaks/releases/download/v{{ gitleaks_version }}/gitleaks_{{ gitleaks_version }}_linux_x64.tar.gz"
dest: /usr/local/bin
remote_src: true
include:
- gitleaks
mode: "0755"
when: gitleaks_version not in (gitleaks_check.stdout | default(''))
# The Docker engine ships without CLI plugins, so `docker compose` (used by
# deploy.sh) and `docker buildx` are absent. Install them host-wide; this
# path is searched by both docker.io and docker-ce CLIs.
- name: Ensure Docker CLI plugins dir exists
ansible.builtin.file:
path: /usr/libexec/docker/cli-plugins
state: directory
mode: "0755"
- name: Check installed Docker Compose plugin version
ansible.builtin.command: docker compose version --short
register: compose_check
changed_when: false
failed_when: false
- name: Install Docker Compose v2 plugin {{ docker_compose_version }}
ansible.builtin.get_url:
url: "https://github.com/docker/compose/releases/download/v{{ docker_compose_version }}/docker-compose-linux-x86_64"
dest: /usr/libexec/docker/cli-plugins/docker-compose
mode: "0755"
force: true
when: docker_compose_version not in (compose_check.stdout | default(''))
- name: Check installed Docker Buildx plugin version
ansible.builtin.command: docker buildx version
register: buildx_check
changed_when: false
failed_when: false
- name: Install Docker Buildx plugin {{ docker_buildx_version }}
ansible.builtin.get_url:
url: "https://github.com/docker/buildx/releases/download/v{{ docker_buildx_version }}/buildx-v{{ docker_buildx_version }}.linux-amd64"
dest: /usr/libexec/docker/cli-plugins/docker-buildx
mode: "0755"
force: true
when: docker_buildx_version not in (buildx_check.stdout | default(''))
# Deploy target for each project's compose stack. Owned by the runner so the
# deploy job can `cp docker-compose.yml` here; the host-managed .env lives
# here too. Basename matches the compose project name, preserving volumes.
- name: Ensure app deploy dir exists for each project
ansible.builtin.file:
path: "{{ app_base_dir }}/{{ item.project }}"
state: directory
owner: "{{ runner_user }}"
group: "{{ runner_user }}"
mode: "0755"
loop: "{{ runners }}"
loop_control:
label: "{{ item.project }}"
- name: Install runners for each project
ansible.builtin.include_tasks: tasks/install_project.yml
loop: "{{ runners }}"
loop_control:
loop_var: project_spec
label: "{{ project_spec.project }}"
# The build job runs `docker build` on the host, talking to the daemon via
# /var/run/docker.sock. Without docker group membership the runner user gets
# "permission denied ... unix:///var/run/docker.sock".
- name: Add runner user to the docker group
ansible.builtin.user:
name: "{{ runner_user }}"
groups: docker
append: true
register: runner_docker_group
# Group membership is only read at process start, so already-running runner
# services must be restarted to gain socket access.
- name: Restart runner services to apply docker group membership
ansible.builtin.shell: "systemctl restart 'gitea-runner-*.service'"
when: runner_docker_group is changed
changed_when: true
+9
View File
@@ -0,0 +1,9 @@
# Self-hosted GitHub Actions runner hosts.
# Per-project runner matrix lives in host_vars/<host>.yml.
# Set ansible_host (and ansible_user if not root) to real values.
[stage]
stage01 ansible_host=172.25.25.209 ansible_user=ubuntu
[prod]
prod01 ansible_host=CHANGE_ME ansible_user=ubuntu
@@ -0,0 +1,68 @@
---
# Installs + registers + services a single act_runner instance.
# Inputs: project_spec (dict), idx (int), project_token (str).
- name: Set per-runner facts
ansible.builtin.set_fact:
runner_name: "{{ inventory_hostname }}-{{ project_spec.project }}-{{ runner_env }}-{{ idx }}"
runner_dir: "{{ runner_home }}/act-runner-{{ project_spec.project }}-{{ idx }}"
svc_name: "gitea-runner-{{ project_spec.project }}-{{ idx }}"
- name: "Create runner dir {{ runner_dir }}"
ansible.builtin.file:
path: "{{ runner_dir }}"
state: directory
owner: "{{ runner_user }}"
group: "{{ runner_user }}"
mode: "0755"
- name: Download act_runner binary (sha256 verified)
ansible.builtin.get_url:
url: "{{ act_runner_download_url }}"
dest: "{{ runner_dir }}/gitea-runner"
checksum: "sha256:{{ act_runner_sha256 }}"
owner: "{{ runner_user }}"
group: "{{ runner_user }}"
mode: "0755"
- name: "Register runner {{ runner_name }}"
ansible.builtin.command:
cmd: >-
./gitea-runner register
--no-interactive
--instance {{ gitea_instance }}
--token {{ project_token }}
--name {{ runner_name }}
--labels {{ project_spec.labels }}
chdir: "{{ runner_dir }}"
creates: "{{ runner_dir }}/.runner"
become_user: "{{ runner_user }}"
- name: "Write systemd unit for {{ svc_name }}"
ansible.builtin.copy:
dest: "/etc/systemd/system/{{ svc_name }}.service"
owner: root
group: root
mode: "0644"
content: |
[Unit]
Description=Gitea Actions Runner ({{ runner_name }})
After=network.target
[Service]
Type=simple
User={{ runner_user }}
WorkingDirectory={{ runner_dir }}
ExecStart={{ runner_dir }}/gitea-runner daemon
Restart=always
RestartSec=5s
Environment=HOME={{ runner_home }}
[Install]
WantedBy=multi-user.target
- name: "Enable + start {{ svc_name }}"
ansible.builtin.systemd:
name: "{{ svc_name }}"
enabled: true
state: started
daemon_reload: true
+20
View File
@@ -0,0 +1,20 @@
---
# Expands one project entry into `count` runner instances.
- name: "Read registration token for {{ project_spec.project }} from env var"
ansible.builtin.set_fact:
project_token: "{{ lookup('ansible.builtin.env', project_spec.token_env) }}"
- name: "Warn and skip {{ project_spec.project }} — token missing"
ansible.builtin.debug:
msg: >-
Skipping {{ project_spec.project }}: env var {{ project_spec.token_env }} is empty/unset.
Export a fresh registration token (Gitea repo → Settings → Actions → Runners → Create new runner token).
when: project_token | length == 0
- name: "Install {{ project_spec.count }} runner(s) for {{ project_spec.project }}"
ansible.builtin.include_tasks: install_one_runner.yml
loop: "{{ range(1, project_spec.count | int + 1) | list }}"
loop_control:
loop_var: idx
label: "{{ project_spec.project }}-{{ idx }}"
when: project_token | length > 0
+122
View File
@@ -0,0 +1,122 @@
#!/usr/bin/env bash
# Rolling deploy for the dockerised stack (api1 + api2 behind a containerised
# nginx). api containers are recreated one at a time; while one restarts, nginx
# routes to the other via the upstream's passive failover (max_fails/fail_timeout
# + proxy_next_upstream), so users see effectively no downtime. No host nginx and
# no sudo required — everything goes through `docker compose`.
#
# Usage:
# ./deploy.sh <image-tag> e.g. ./deploy.sh a1b2c3d
#
# Overridable via env vars (defaults below are gatehouse-api):
# IMAGE_NAME docker image name
# SVC1/SVC2 api compose service names
# SVC1_PORT host port for SVC1
# SVC2_PORT host port for SVC2
# JOB_SVCS space-separated job service names
# HEALTH_PATH HTTP path for health check
# COMPOSE_DIR directory with docker-compose.yml, docker/nginx.conf and .env
set -euo pipefail
TAG="${1:?Usage: deploy.sh <image-tag> (e.g. deploy.sh a1b2c3d)}"
IMAGE_NAME="${IMAGE_NAME:-gatehouse-api}"
COMPOSE_DIR="${COMPOSE_DIR:-/opt/gatehouse-api}"
SVC1="${SVC1:-api1}"
SVC2="${SVC2:-api2}"
SVC1_PORT="${SVC1_PORT:-5000}"
SVC2_PORT="${SVC2_PORT:-5001}"
JOB_SVCS="${JOB_SVCS:-zerotier-reconciler mfa-compliance}"
HEALTH_PATH="${HEALTH_PATH:-/api/health}"
HEALTH_RETRIES=18 # 18 × 5 s = 90 s max
HEALTH_INTERVAL=5
export IMAGE_TAG="${TAG}"
# ── helpers ───────────────────────────────────────────────────────────────────
log() { echo "[$(date '+%H:%M:%S')] $*"; }
die() { log "ERROR: $*" >&2; exit 1; }
step() { log ""; log "── $* ──"; }
health_check() {
local port=$1 label=$2 attempt=0
log "Waiting for ${label} on :${port}${HEALTH_PATH} ..."
while (( attempt < HEALTH_RETRIES )); do
if curl -sf "http://127.0.0.1:${port}${HEALTH_PATH}" -o /dev/null; then
log "${label} healthy"
return 0
fi
attempt=$(( attempt + 1 ))
log " [${attempt}/${HEALTH_RETRIES}] not ready, retrying in ${HEALTH_INTERVAL}s"
sleep "${HEALTH_INTERVAL}"
done
log "ERROR: ${label} failed health check after $((HEALTH_RETRIES * HEALTH_INTERVAL))s"
return 1
}
get_service_tag() {
docker compose ps -q "$1" 2>/dev/null \
| xargs -r docker inspect --format '{{.Config.Image}}' 2>/dev/null \
| cut -d: -f2
}
# Recreate one api service on the new tag, then health-check it; roll back to the
# previous tag on failure. The peer api keeps serving traffic throughout.
roll_api() {
local service=$1 port=$2 old_tag
step "${service}${TAG}"
old_tag=$(get_service_tag "${service}")
docker compose up -d --no-deps --force-recreate "${service}"
if ! health_check "${port}" "${service}"; then
if [[ -z "${old_tag}" ]]; then
die "Deploy aborted — ${service} failed health check, no previous tag to roll back to"
fi
log "Rolling back ${service} to ${old_tag}..."
IMAGE_TAG="${old_tag}" docker compose up -d --no-deps --force-recreate "${service}"
die "Deploy aborted — ${service} rolled back to ${old_tag}"
fi
}
# ── pre-flight ────────────────────────────────────────────────────────────────
cd "${COMPOSE_DIR}"
pwd; ls -la
log "Deploying ${IMAGE_NAME}:${TAG}"
docker image inspect "${IMAGE_NAME}:${TAG}" > /dev/null 2>&1 \
|| die "Image ${IMAGE_NAME}:${TAG} not found locally — build it first."
# Ensure backing services are up before rolling the api (idempotent; also brings
# the stack up cleanly on a first-ever deploy).
step "backing services (db, redis)"
docker compose up -d db redis
# ── roll api containers one at a time ─────────────────────────────────────────
roll_api "${SVC1}" "${SVC1_PORT}"
roll_api "${SVC2}" "${SVC2_PORT}"
# ── nginx + job workers ───────────────────────────────────────────────────────
# Bring nginx up (created on first deploy) and refresh job workers to the new
# tag. api1/api2 are already at the desired tag, so they are left untouched.
step "nginx + job workers → ${TAG}"
docker compose up -d --remove-orphans
# Apply any nginx.conf change without dropping connections (bind-mounted config
# is not re-read on `up`). Skipped if nginx isn't running yet.
if [[ -n "$(docker compose ps -q nginx 2>/dev/null)" ]]; then
if docker compose exec -T nginx nginx -t 2>/dev/null; then
docker compose exec -T nginx nginx -s reload && log "nginx: reloaded"
else
log "WARNING: nginx config test failed — left running with previous config"
fi
fi
# ── done ──────────────────────────────────────────────────────────────────────
log ""
log "Deploy complete ✓ ${IMAGE_NAME}:${TAG}"
+29 -14
View File
@@ -1,14 +1,12 @@
version: '3.8'
services:
api:
build:
context: .
dockerfile: Dockerfile
api1:
image: gatehouse-api:${IMAGE_TAG:-latest}
env_file:
- .env
ports:
- "${API_PORT:-5000}:5000"
- "0.0.0.0:5000:5000"
depends_on:
db:
condition: service_healthy
@@ -18,7 +16,28 @@ services:
- authy2-network
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:5000/api/health"]
test: ["CMD", "curl", "-f", "http://127.0.0.1:5000/api/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
api2:
image: gatehouse-api:${IMAGE_TAG:-latest}
env_file:
- .env
ports:
- "0.0.0.0:5001:5000"
depends_on:
db:
condition: service_healthy
redis:
condition: service_healthy
networks:
- authy2-network
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://127.0.0.1:5000/api/health"]
interval: 30s
timeout: 10s
retries: 3
@@ -66,9 +85,9 @@ services:
- ./docker/nginx.conf:/etc/nginx/nginx.conf:ro
ports:
- "${HTTP_PORT:-80}:80"
- "${HTTPS_PORT:-443}:443"
depends_on:
- api
- api1
- api2
networks:
- authy2-network
restart: unless-stopped
@@ -79,9 +98,7 @@ services:
retries: 3
zerotier-reconciler:
build:
context: .
dockerfile: Dockerfile.job
image: gatehouse-api-job:${IMAGE_TAG:-latest}
env_file:
- .env
environment:
@@ -97,9 +114,7 @@ services:
restart: unless-stopped
mfa-compliance:
build:
context: .
dockerfile: Dockerfile.job
image: gatehouse-api-job:${IMAGE_TAG:-latest}
env_file:
- .env
environment:
+2 -1
View File
@@ -34,7 +34,8 @@ http {
application/xml application/xml+rss text/javascript application/x-javascript;
upstream api {
server api:5000;
server api1:5000 max_fails=2 fail_timeout=10s;
server api2:5000 max_fails=2 fail_timeout=10s;
}
server {
+8 -6
View File
@@ -1,6 +1,6 @@
# Core Flask
Flask==3.0.0
Werkzeug==3.0.1
Werkzeug==3.0.6 # CVE-2024-34069 (debug-server RCE); stays <3.1 for Flask 3.0 compat
# Database
SQLAlchemy==2.0.23
@@ -19,15 +19,17 @@ Flask-Bcrypt==1.0.1
pyotp==2.9.0
# WebAuthn / FIDO2
fido2==1.1.2
cbor2==5.6.0
# fido2 removed: unused in the codebase (WebAuthn is parsed directly via cbor2),
# and it pinned cryptography<44, blocking the CVE-2026-26007 fix. Re-add fido2>=2.2.0
# if migrating to the official library.
cbor2==5.9.0 # CVE-2024-26134, CVE-2026-26209 (DoS via recursion)
# JWT / OIDC
PyJWT==2.8.0
cryptography==42.0.7
PyJWT==2.13.0 # CVE-2026-48526 (auth bypass via forged JWT), CVE-2026-32597
cryptography==43.0.3 # capped <44 by sshkey-tools 0.11.3; see .trivyignore for CVE-2026-26007
# CORS
Flask-CORS==4.0.0
Flask-CORS==6.0.0 # CVE-2024-6221 (ACAO handling)
# Environment variables
python-dotenv==1.0.0
+2 -2
View File
@@ -36,8 +36,8 @@ requests==2.31.0
pytz==2023.3
python-dotenv==1.0.0
pydantic==2.5.0
PyJWT==2.8.0
cryptography==42.0.7
PyJWT==2.13.0
cryptography==43.0.3 # capped <44 by sshkey-tools 0.11.3
pycryptodome==3.20.0
psycopg2-binary==2.9.9
sshkey-tools==0.11.3
+1 -1
View File
@@ -1,7 +1,7 @@
-r base.txt
# Production WSGI server
gunicorn==21.2.0
gunicorn==23.0.0 # CVE-2024-1135, CVE-2024-6827 (HTTP request smuggling)
# Monitoring & logging
sentry-sdk[flask]==1.39.1