ci: update failover
Push -> develop / Build Docker images (push) Successful in 9s
Push -> develop / Deploy (push) Successful in 19s
Push -> develop / Notify on result (push) Successful in 0s

This commit is contained in:
sangnn
2026-06-23 03:04:59 +00:00
parent f58ec37b8a
commit f971c4cdaa
5 changed files with 78 additions and 69 deletions
+4 -3
View File
@@ -60,11 +60,12 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Deploy (docker compose up) - name: Deploy (rolling, zero-downtime)
run: | run: |
cp docker-compose.yml "${COMPOSE_DIR}/docker-compose.yml" cp docker-compose.yml "${COMPOSE_DIR}/docker-compose.yml"
cd "${COMPOSE_DIR}" mkdir -p "${COMPOSE_DIR}/docker"
IMAGE_TAG="${{ needs.build.outputs.tag }}" docker compose up -d --remove-orphans cp docker/nginx.conf "${COMPOSE_DIR}/docker/nginx.conf"
bash deploy/deploy.sh "${{ needs.build.outputs.tag }}"
# ── 3. Alert ────────────────────────────────────────────────────────────────── # ── 3. Alert ──────────────────────────────────────────────────────────────────
alert: alert:
+4 -3
View File
@@ -59,11 +59,12 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Deploy (docker compose up) - name: Deploy (rolling, zero-downtime)
run: | run: |
cp docker-compose.yml "${COMPOSE_DIR}/docker-compose.yml" cp docker-compose.yml "${COMPOSE_DIR}/docker-compose.yml"
cd "${COMPOSE_DIR}" mkdir -p "${COMPOSE_DIR}/docker"
IMAGE_TAG="${{ needs.build.outputs.tag }}" docker compose up -d --remove-orphans cp docker/nginx.conf "${COMPOSE_DIR}/docker/nginx.conf"
bash deploy/deploy.sh "${{ needs.build.outputs.tag }}"
# ── 3. Alert ────────────────────────────────────────────────────────────────── # ── 3. Alert ──────────────────────────────────────────────────────────────────
alert: alert:
+43 -55
View File
@@ -1,37 +1,38 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# Rolling deploy: drains each api container from nginx before replacing it, # Rolling deploy for the dockerised stack (api1 + api2 behind a containerised
# so users see zero downtime. Job workers are restarted last without draining. # nginx). api containers are recreated one at a time; while one restarts, nginx
# routes to the other via the upstream's passive failover (max_fails/fail_timeout
# + proxy_next_upstream), so users see effectively no downtime. No host nginx and
# no sudo required — everything goes through `docker compose`.
# #
# Usage: # Usage:
# ./deploy.sh <image-tag> e.g. ./deploy.sh a1b2c3d # ./deploy.sh <image-tag> e.g. ./deploy.sh a1b2c3d
# #
# Overridable via env vars (defaults below are gatehouse-api): # Overridable via env vars (defaults below are gatehouse-api):
# IMAGE_NAME docker image name # IMAGE_NAME docker image name
# SVC1/SVC2 compose service names # SVC1/SVC2 api compose service names
# SVC1_PORT host port for SVC1 # SVC1_PORT host port for SVC1
# SVC2_PORT host port for SVC2 # SVC2_PORT host port for SVC2
# JOB_SVCS space-separated job service names
# HEALTH_PATH HTTP path for health check # HEALTH_PATH HTTP path for health check
# NGINX_CONF path to nginx site config # COMPOSE_DIR directory with docker-compose.yml, docker/nginx.conf and .env
# COMPOSE_DIR directory with docker-compose.yml and .env
#
# The runner user needs passwordless sudo for nginx:
# echo "runner ALL=(ALL) NOPASSWD: /usr/sbin/nginx, /bin/systemctl reload nginx" \
# | sudo tee /etc/sudoers.d/runner-nginx
set -euo pipefail set -euo pipefail
TAG="${1:?Usage: deploy.sh <image-tag> (e.g. deploy.sh a1b2c3d)}" TAG="${1:?Usage: deploy.sh <image-tag> (e.g. deploy.sh a1b2c3d)}"
IMAGE_NAME="${IMAGE_NAME:-gatehouse-api}" IMAGE_NAME="${IMAGE_NAME:-gatehouse-api}"
NGINX_CONF="${NGINX_CONF:-/etc/nginx/conf.d/gatehouse-api.conf}"
COMPOSE_DIR="${COMPOSE_DIR:-/opt/gatehouse-api}" COMPOSE_DIR="${COMPOSE_DIR:-/opt/gatehouse-api}"
SVC1="${SVC1:-api1}" SVC1="${SVC1:-api1}"
SVC2="${SVC2:-api2}" SVC2="${SVC2:-api2}"
SVC1_PORT="${SVC1_PORT:-5000}" SVC1_PORT="${SVC1_PORT:-5000}"
SVC2_PORT="${SVC2_PORT:-5001}" SVC2_PORT="${SVC2_PORT:-5001}"
JOB_SVCS="${JOB_SVCS:-zerotier-reconciler mfa-compliance}"
HEALTH_PATH="${HEALTH_PATH:-/api/health}" HEALTH_PATH="${HEALTH_PATH:-/api/health}"
HEALTH_RETRIES=18 # 18 × 5 s = 90 s max HEALTH_RETRIES=18 # 18 × 5 s = 90 s max
HEALTH_INTERVAL=5 HEALTH_INTERVAL=5
export IMAGE_TAG="${TAG}"
# ── helpers ─────────────────────────────────────────────────────────────────── # ── helpers ───────────────────────────────────────────────────────────────────
log() { echo "[$(date '+%H:%M:%S')] $*"; } log() { echo "[$(date '+%H:%M:%S')] $*"; }
@@ -60,32 +61,23 @@ get_service_tag() {
| cut -d: -f2 | cut -d: -f2
} }
rollback() { # Recreate one api service on the new tag, then health-check it; roll back to the
local service=$1 port=$2 old_tag=$3 # previous tag on failure. The peer api keeps serving traffic throughout.
roll_api() {
local service=$1 port=$2 old_tag
step "${service}${TAG}"
old_tag=$(get_service_tag "${service}")
docker compose up -d --no-deps --force-recreate "${service}"
if ! health_check "${port}" "${service}"; then
if [[ -z "${old_tag}" ]]; then if [[ -z "${old_tag}" ]]; then
nginx_restore "${port}"
die "Deploy aborted — ${service} failed health check, no previous tag to roll back to" die "Deploy aborted — ${service} failed health check, no previous tag to roll back to"
fi fi
log "Rolling back ${service} to ${old_tag}..." log "Rolling back ${service} to ${old_tag}..."
IMAGE_TAG="${old_tag}" docker compose up -d --no-deps --force-recreate "${service}" IMAGE_TAG="${old_tag}" docker compose up -d --no-deps --force-recreate "${service}"
nginx_restore "${port}"
die "Deploy aborted — ${service} rolled back to ${old_tag}" die "Deploy aborted — ${service} rolled back to ${old_tag}"
} fi
nginx_drain() {
local port=$1
sudo sed -i "s|server 127.0.0.1:${port};|server 127.0.0.1:${port} down;|" "$NGINX_CONF"
sudo nginx -t 2>&1 | tail -2
sudo nginx -s reload
log "nginx: drained :${port}"
}
nginx_restore() {
local port=$1
sudo sed -i "s|server 127.0.0.1:${port} down;|server 127.0.0.1:${port};|" "$NGINX_CONF"
sudo nginx -t 2>&1 | tail -2
sudo nginx -s reload
log "nginx: restored :${port}"
} }
# ── pre-flight ──────────────────────────────────────────────────────────────── # ── pre-flight ────────────────────────────────────────────────────────────────
@@ -97,36 +89,32 @@ log "Deploying ${IMAGE_NAME}:${TAG}"
docker image inspect "${IMAGE_NAME}:${TAG}" > /dev/null 2>&1 \ docker image inspect "${IMAGE_NAME}:${TAG}" > /dev/null 2>&1 \
|| die "Image ${IMAGE_NAME}:${TAG} not found locally — build it first." || die "Image ${IMAGE_NAME}:${TAG} not found locally — build it first."
# ── roll SVC1 ───────────────────────────────────────────────────────────────── # Ensure backing services are up before rolling the api (idempotent; also brings
# the stack up cleanly on a first-ever deploy).
step "backing services (db, redis)"
docker compose up -d db redis
step "${SVC1}${TAG} (traffic: ${SVC2} only)" # ── roll api containers one at a time ─────────────────────────────────────────
old_svc1=$(get_service_tag "${SVC1}")
nginx_drain "${SVC1_PORT}"
log "Waiting 15s for in-flight requests to drain..."
sleep 15
IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate "${SVC1}" roll_api "${SVC1}" "${SVC1_PORT}"
roll_api "${SVC2}" "${SVC2_PORT}"
health_check "${SVC1_PORT}" "${SVC1}" || rollback "${SVC1}" "${SVC1_PORT}" "${old_svc1}" # ── nginx + job workers ───────────────────────────────────────────────────────
nginx_restore "${SVC1_PORT}"
# ── roll SVC2 ───────────────────────────────────────────────────────────────── # Bring nginx up (created on first deploy) and refresh job workers to the new
# tag. api1/api2 are already at the desired tag, so they are left untouched.
step "nginx + job workers → ${TAG}"
docker compose up -d --remove-orphans
step "${SVC2}${TAG} (traffic: ${SVC1} only)" # Apply any nginx.conf change without dropping connections (bind-mounted config
old_svc2=$(get_service_tag "${SVC2}") # is not re-read on `up`). Skipped if nginx isn't running yet.
nginx_drain "${SVC2_PORT}" if [[ -n "$(docker compose ps -q nginx 2>/dev/null)" ]]; then
log "Waiting 15s for in-flight requests to drain..." if docker compose exec -T nginx nginx -t 2>/dev/null; then
sleep 15 docker compose exec -T nginx nginx -s reload && log "nginx: reloaded"
else
IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate "${SVC2}" log "WARNING: nginx config test failed — left running with previous config"
fi
health_check "${SVC2_PORT}" "${SVC2}" || rollback "${SVC2}" "${SVC2_PORT}" "${old_svc2}" fi
nginx_restore "${SVC2_PORT}"
# ── job workers ───────────────────────────────────────────────────────────────
step "job workers → ${TAG}"
IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate zerotier-reconciler mfa-compliance
# ── done ────────────────────────────────────────────────────────────────────── # ── done ──────────────────────────────────────────────────────────────────────
+20 -2
View File
@@ -6,7 +6,7 @@ services:
env_file: env_file:
- .env - .env
ports: ports:
- "127.0.0.1:5000:5000" - "0.0.0.0:5000:5000"
depends_on: depends_on:
db: db:
condition: service_healthy condition: service_healthy
@@ -27,7 +27,7 @@ services:
env_file: env_file:
- .env - .env
ports: ports:
- "127.0.0.1:5001:5000" - "0.0.0.0:5001:5000"
depends_on: depends_on:
db: db:
condition: service_healthy condition: service_healthy
@@ -79,6 +79,24 @@ services:
ports: ports:
- "6379:6379" - "6379:6379"
nginx:
image: nginx:1.27-alpine
volumes:
- ./docker/nginx.conf:/etc/nginx/nginx.conf:ro
ports:
- "${HTTP_PORT:-80}:80"
depends_on:
- api1
- api2
networks:
- authy2-network
restart: unless-stopped
healthcheck:
test: ["CMD", "nginx", "-t"]
interval: 30s
timeout: 10s
retries: 3
zerotier-reconciler: zerotier-reconciler:
image: gatehouse-api-job:${IMAGE_TAG:-latest} image: gatehouse-api-job:${IMAGE_TAG:-latest}
env_file: env_file:
+2 -1
View File
@@ -34,7 +34,8 @@ http {
application/xml application/xml+rss text/javascript application/x-javascript; application/xml application/xml+rss text/javascript application/x-javascript;
upstream api { upstream api {
server api:5000; server api1:5000 max_fails=2 fail_timeout=10s;
server api2:5000 max_fails=2 fail_timeout=10s;
} }
server { server {