From f971c4cdaa4e247e060da2524888541469059696 Mon Sep 17 00:00:00 2001 From: sangnn Date: Tue, 23 Jun 2026 03:04:59 +0000 Subject: [PATCH] ci: update failover --- .gitea/workflows/push-develop.yml | 7 +- .gitea/workflows/push-main.yml | 7 +- deploy/deploy.sh | 108 +++++++++++++----------------- docker-compose.yml | 22 +++++- docker/nginx.conf | 3 +- 5 files changed, 78 insertions(+), 69 deletions(-) diff --git a/.gitea/workflows/push-develop.yml b/.gitea/workflows/push-develop.yml index b66f9bf..1553572 100644 --- a/.gitea/workflows/push-develop.yml +++ b/.gitea/workflows/push-develop.yml @@ -60,11 +60,12 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Deploy (docker compose up) + - name: Deploy (rolling, zero-downtime) run: | cp docker-compose.yml "${COMPOSE_DIR}/docker-compose.yml" - cd "${COMPOSE_DIR}" - IMAGE_TAG="${{ needs.build.outputs.tag }}" docker compose up -d --remove-orphans + mkdir -p "${COMPOSE_DIR}/docker" + cp docker/nginx.conf "${COMPOSE_DIR}/docker/nginx.conf" + bash deploy/deploy.sh "${{ needs.build.outputs.tag }}" # ── 3. Alert ────────────────────────────────────────────────────────────────── alert: diff --git a/.gitea/workflows/push-main.yml b/.gitea/workflows/push-main.yml index d2222ef..dab173a 100644 --- a/.gitea/workflows/push-main.yml +++ b/.gitea/workflows/push-main.yml @@ -59,11 +59,12 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Deploy (docker compose up) + - name: Deploy (rolling, zero-downtime) run: | cp docker-compose.yml "${COMPOSE_DIR}/docker-compose.yml" - cd "${COMPOSE_DIR}" - IMAGE_TAG="${{ needs.build.outputs.tag }}" docker compose up -d --remove-orphans + mkdir -p "${COMPOSE_DIR}/docker" + cp docker/nginx.conf "${COMPOSE_DIR}/docker/nginx.conf" + bash deploy/deploy.sh "${{ needs.build.outputs.tag }}" # ── 3. Alert ────────────────────────────────────────────────────────────────── alert: diff --git a/deploy/deploy.sh b/deploy/deploy.sh index 5077356..19b2da3 100755 --- a/deploy/deploy.sh +++ b/deploy/deploy.sh @@ -1,37 +1,38 @@ #!/usr/bin/env bash -# Rolling deploy: drains each api container from nginx before replacing it, -# so users see zero downtime. Job workers are restarted last without draining. +# Rolling deploy for the dockerised stack (api1 + api2 behind a containerised +# nginx). api containers are recreated one at a time; while one restarts, nginx +# routes to the other via the upstream's passive failover (max_fails/fail_timeout +# + proxy_next_upstream), so users see effectively no downtime. No host nginx and +# no sudo required — everything goes through `docker compose`. # # Usage: # ./deploy.sh e.g. ./deploy.sh a1b2c3d # # Overridable via env vars (defaults below are gatehouse-api): # IMAGE_NAME docker image name -# SVC1/SVC2 compose service names +# SVC1/SVC2 api compose service names # SVC1_PORT host port for SVC1 # SVC2_PORT host port for SVC2 +# JOB_SVCS space-separated job service names # HEALTH_PATH HTTP path for health check -# NGINX_CONF path to nginx site config -# COMPOSE_DIR directory with docker-compose.yml and .env -# -# The runner user needs passwordless sudo for nginx: -# echo "runner ALL=(ALL) NOPASSWD: /usr/sbin/nginx, /bin/systemctl reload nginx" \ -# | sudo tee /etc/sudoers.d/runner-nginx +# COMPOSE_DIR directory with docker-compose.yml, docker/nginx.conf and .env set -euo pipefail TAG="${1:?Usage: deploy.sh (e.g. deploy.sh a1b2c3d)}" IMAGE_NAME="${IMAGE_NAME:-gatehouse-api}" -NGINX_CONF="${NGINX_CONF:-/etc/nginx/conf.d/gatehouse-api.conf}" COMPOSE_DIR="${COMPOSE_DIR:-/opt/gatehouse-api}" SVC1="${SVC1:-api1}" SVC2="${SVC2:-api2}" SVC1_PORT="${SVC1_PORT:-5000}" SVC2_PORT="${SVC2_PORT:-5001}" +JOB_SVCS="${JOB_SVCS:-zerotier-reconciler mfa-compliance}" HEALTH_PATH="${HEALTH_PATH:-/api/health}" HEALTH_RETRIES=18 # 18 × 5 s = 90 s max HEALTH_INTERVAL=5 +export IMAGE_TAG="${TAG}" + # ── helpers ─────────────────────────────────────────────────────────────────── log() { echo "[$(date '+%H:%M:%S')] $*"; } @@ -60,32 +61,23 @@ get_service_tag() { | cut -d: -f2 } -rollback() { - local service=$1 port=$2 old_tag=$3 - if [[ -z "${old_tag}" ]]; then - nginx_restore "${port}" - die "Deploy aborted — ${service} failed health check, no previous tag to roll back to" +# Recreate one api service on the new tag, then health-check it; roll back to the +# previous tag on failure. The peer api keeps serving traffic throughout. +roll_api() { + local service=$1 port=$2 old_tag + step "${service} → ${TAG}" + old_tag=$(get_service_tag "${service}") + + docker compose up -d --no-deps --force-recreate "${service}" + + if ! health_check "${port}" "${service}"; then + if [[ -z "${old_tag}" ]]; then + die "Deploy aborted — ${service} failed health check, no previous tag to roll back to" + fi + log "Rolling back ${service} to ${old_tag}..." + IMAGE_TAG="${old_tag}" docker compose up -d --no-deps --force-recreate "${service}" + die "Deploy aborted — ${service} rolled back to ${old_tag}" fi - log "Rolling back ${service} to ${old_tag}..." - IMAGE_TAG="${old_tag}" docker compose up -d --no-deps --force-recreate "${service}" - nginx_restore "${port}" - die "Deploy aborted — ${service} rolled back to ${old_tag}" -} - -nginx_drain() { - local port=$1 - sudo sed -i "s|server 127.0.0.1:${port};|server 127.0.0.1:${port} down;|" "$NGINX_CONF" - sudo nginx -t 2>&1 | tail -2 - sudo nginx -s reload - log "nginx: drained :${port}" -} - -nginx_restore() { - local port=$1 - sudo sed -i "s|server 127.0.0.1:${port} down;|server 127.0.0.1:${port};|" "$NGINX_CONF" - sudo nginx -t 2>&1 | tail -2 - sudo nginx -s reload - log "nginx: restored :${port}" } # ── pre-flight ──────────────────────────────────────────────────────────────── @@ -97,36 +89,32 @@ log "Deploying ${IMAGE_NAME}:${TAG}" docker image inspect "${IMAGE_NAME}:${TAG}" > /dev/null 2>&1 \ || die "Image ${IMAGE_NAME}:${TAG} not found locally — build it first." -# ── roll SVC1 ───────────────────────────────────────────────────────────────── +# Ensure backing services are up before rolling the api (idempotent; also brings +# the stack up cleanly on a first-ever deploy). +step "backing services (db, redis)" +docker compose up -d db redis -step "${SVC1} → ${TAG} (traffic: ${SVC2} only)" -old_svc1=$(get_service_tag "${SVC1}") -nginx_drain "${SVC1_PORT}" -log "Waiting 15s for in-flight requests to drain..." -sleep 15 +# ── roll api containers one at a time ───────────────────────────────────────── -IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate "${SVC1}" +roll_api "${SVC1}" "${SVC1_PORT}" +roll_api "${SVC2}" "${SVC2_PORT}" -health_check "${SVC1_PORT}" "${SVC1}" || rollback "${SVC1}" "${SVC1_PORT}" "${old_svc1}" -nginx_restore "${SVC1_PORT}" +# ── nginx + job workers ─────────────────────────────────────────────────────── -# ── roll SVC2 ───────────────────────────────────────────────────────────────── +# Bring nginx up (created on first deploy) and refresh job workers to the new +# tag. api1/api2 are already at the desired tag, so they are left untouched. +step "nginx + job workers → ${TAG}" +docker compose up -d --remove-orphans -step "${SVC2} → ${TAG} (traffic: ${SVC1} only)" -old_svc2=$(get_service_tag "${SVC2}") -nginx_drain "${SVC2_PORT}" -log "Waiting 15s for in-flight requests to drain..." -sleep 15 - -IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate "${SVC2}" - -health_check "${SVC2_PORT}" "${SVC2}" || rollback "${SVC2}" "${SVC2_PORT}" "${old_svc2}" -nginx_restore "${SVC2_PORT}" - -# ── job workers ─────────────────────────────────────────────────────────────── - -step "job workers → ${TAG}" -IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate zerotier-reconciler mfa-compliance +# Apply any nginx.conf change without dropping connections (bind-mounted config +# is not re-read on `up`). Skipped if nginx isn't running yet. +if [[ -n "$(docker compose ps -q nginx 2>/dev/null)" ]]; then + if docker compose exec -T nginx nginx -t 2>/dev/null; then + docker compose exec -T nginx nginx -s reload && log "nginx: reloaded" + else + log "WARNING: nginx config test failed — left running with previous config" + fi +fi # ── done ────────────────────────────────────────────────────────────────────── diff --git a/docker-compose.yml b/docker-compose.yml index 3953f00..dbb7ca5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,7 +6,7 @@ services: env_file: - .env ports: - - "127.0.0.1:5000:5000" + - "0.0.0.0:5000:5000" depends_on: db: condition: service_healthy @@ -27,7 +27,7 @@ services: env_file: - .env ports: - - "127.0.0.1:5001:5000" + - "0.0.0.0:5001:5000" depends_on: db: condition: service_healthy @@ -79,6 +79,24 @@ services: ports: - "6379:6379" + nginx: + image: nginx:1.27-alpine + volumes: + - ./docker/nginx.conf:/etc/nginx/nginx.conf:ro + ports: + - "${HTTP_PORT:-80}:80" + depends_on: + - api1 + - api2 + networks: + - authy2-network + restart: unless-stopped + healthcheck: + test: ["CMD", "nginx", "-t"] + interval: 30s + timeout: 10s + retries: 3 + zerotier-reconciler: image: gatehouse-api-job:${IMAGE_TAG:-latest} env_file: diff --git a/docker/nginx.conf b/docker/nginx.conf index ff9c194..6def141 100644 --- a/docker/nginx.conf +++ b/docker/nginx.conf @@ -34,7 +34,8 @@ http { application/xml application/xml+rss text/javascript application/x-javascript; upstream api { - server api:5000; + server api1:5000 max_fails=2 fail_timeout=10s; + server api2:5000 max_fails=2 fail_timeout=10s; } server {