ci: update failover
This commit is contained in:
+48
-60
@@ -1,37 +1,38 @@
|
||||
#!/usr/bin/env bash
|
||||
# Rolling deploy: drains each api container from nginx before replacing it,
|
||||
# so users see zero downtime. Job workers are restarted last without draining.
|
||||
# Rolling deploy for the dockerised stack (api1 + api2 behind a containerised
|
||||
# nginx). api containers are recreated one at a time; while one restarts, nginx
|
||||
# routes to the other via the upstream's passive failover (max_fails/fail_timeout
|
||||
# + proxy_next_upstream), so users see effectively no downtime. No host nginx and
|
||||
# no sudo required — everything goes through `docker compose`.
|
||||
#
|
||||
# Usage:
|
||||
# ./deploy.sh <image-tag> e.g. ./deploy.sh a1b2c3d
|
||||
#
|
||||
# Overridable via env vars (defaults below are gatehouse-api):
|
||||
# IMAGE_NAME docker image name
|
||||
# SVC1/SVC2 compose service names
|
||||
# SVC1/SVC2 api compose service names
|
||||
# SVC1_PORT host port for SVC1
|
||||
# SVC2_PORT host port for SVC2
|
||||
# JOB_SVCS space-separated job service names
|
||||
# HEALTH_PATH HTTP path for health check
|
||||
# NGINX_CONF path to nginx site config
|
||||
# COMPOSE_DIR directory with docker-compose.yml and .env
|
||||
#
|
||||
# The runner user needs passwordless sudo for nginx:
|
||||
# echo "runner ALL=(ALL) NOPASSWD: /usr/sbin/nginx, /bin/systemctl reload nginx" \
|
||||
# | sudo tee /etc/sudoers.d/runner-nginx
|
||||
# COMPOSE_DIR directory with docker-compose.yml, docker/nginx.conf and .env
|
||||
set -euo pipefail
|
||||
|
||||
TAG="${1:?Usage: deploy.sh <image-tag> (e.g. deploy.sh a1b2c3d)}"
|
||||
|
||||
IMAGE_NAME="${IMAGE_NAME:-gatehouse-api}"
|
||||
NGINX_CONF="${NGINX_CONF:-/etc/nginx/conf.d/gatehouse-api.conf}"
|
||||
COMPOSE_DIR="${COMPOSE_DIR:-/opt/gatehouse-api}"
|
||||
SVC1="${SVC1:-api1}"
|
||||
SVC2="${SVC2:-api2}"
|
||||
SVC1_PORT="${SVC1_PORT:-5000}"
|
||||
SVC2_PORT="${SVC2_PORT:-5001}"
|
||||
JOB_SVCS="${JOB_SVCS:-zerotier-reconciler mfa-compliance}"
|
||||
HEALTH_PATH="${HEALTH_PATH:-/api/health}"
|
||||
HEALTH_RETRIES=18 # 18 × 5 s = 90 s max
|
||||
HEALTH_INTERVAL=5
|
||||
|
||||
export IMAGE_TAG="${TAG}"
|
||||
|
||||
# ── helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
log() { echo "[$(date '+%H:%M:%S')] $*"; }
|
||||
@@ -60,32 +61,23 @@ get_service_tag() {
|
||||
| cut -d: -f2
|
||||
}
|
||||
|
||||
rollback() {
|
||||
local service=$1 port=$2 old_tag=$3
|
||||
if [[ -z "${old_tag}" ]]; then
|
||||
nginx_restore "${port}"
|
||||
die "Deploy aborted — ${service} failed health check, no previous tag to roll back to"
|
||||
# Recreate one api service on the new tag, then health-check it; roll back to the
|
||||
# previous tag on failure. The peer api keeps serving traffic throughout.
|
||||
roll_api() {
|
||||
local service=$1 port=$2 old_tag
|
||||
step "${service} → ${TAG}"
|
||||
old_tag=$(get_service_tag "${service}")
|
||||
|
||||
docker compose up -d --no-deps --force-recreate "${service}"
|
||||
|
||||
if ! health_check "${port}" "${service}"; then
|
||||
if [[ -z "${old_tag}" ]]; then
|
||||
die "Deploy aborted — ${service} failed health check, no previous tag to roll back to"
|
||||
fi
|
||||
log "Rolling back ${service} to ${old_tag}..."
|
||||
IMAGE_TAG="${old_tag}" docker compose up -d --no-deps --force-recreate "${service}"
|
||||
die "Deploy aborted — ${service} rolled back to ${old_tag}"
|
||||
fi
|
||||
log "Rolling back ${service} to ${old_tag}..."
|
||||
IMAGE_TAG="${old_tag}" docker compose up -d --no-deps --force-recreate "${service}"
|
||||
nginx_restore "${port}"
|
||||
die "Deploy aborted — ${service} rolled back to ${old_tag}"
|
||||
}
|
||||
|
||||
nginx_drain() {
|
||||
local port=$1
|
||||
sudo sed -i "s|server 127.0.0.1:${port};|server 127.0.0.1:${port} down;|" "$NGINX_CONF"
|
||||
sudo nginx -t 2>&1 | tail -2
|
||||
sudo nginx -s reload
|
||||
log "nginx: drained :${port}"
|
||||
}
|
||||
|
||||
nginx_restore() {
|
||||
local port=$1
|
||||
sudo sed -i "s|server 127.0.0.1:${port} down;|server 127.0.0.1:${port};|" "$NGINX_CONF"
|
||||
sudo nginx -t 2>&1 | tail -2
|
||||
sudo nginx -s reload
|
||||
log "nginx: restored :${port}"
|
||||
}
|
||||
|
||||
# ── pre-flight ────────────────────────────────────────────────────────────────
|
||||
@@ -97,36 +89,32 @@ log "Deploying ${IMAGE_NAME}:${TAG}"
|
||||
docker image inspect "${IMAGE_NAME}:${TAG}" > /dev/null 2>&1 \
|
||||
|| die "Image ${IMAGE_NAME}:${TAG} not found locally — build it first."
|
||||
|
||||
# ── roll SVC1 ─────────────────────────────────────────────────────────────────
|
||||
# Ensure backing services are up before rolling the api (idempotent; also brings
|
||||
# the stack up cleanly on a first-ever deploy).
|
||||
step "backing services (db, redis)"
|
||||
docker compose up -d db redis
|
||||
|
||||
step "${SVC1} → ${TAG} (traffic: ${SVC2} only)"
|
||||
old_svc1=$(get_service_tag "${SVC1}")
|
||||
nginx_drain "${SVC1_PORT}"
|
||||
log "Waiting 15s for in-flight requests to drain..."
|
||||
sleep 15
|
||||
# ── roll api containers one at a time ─────────────────────────────────────────
|
||||
|
||||
IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate "${SVC1}"
|
||||
roll_api "${SVC1}" "${SVC1_PORT}"
|
||||
roll_api "${SVC2}" "${SVC2_PORT}"
|
||||
|
||||
health_check "${SVC1_PORT}" "${SVC1}" || rollback "${SVC1}" "${SVC1_PORT}" "${old_svc1}"
|
||||
nginx_restore "${SVC1_PORT}"
|
||||
# ── nginx + job workers ───────────────────────────────────────────────────────
|
||||
|
||||
# ── roll SVC2 ─────────────────────────────────────────────────────────────────
|
||||
# Bring nginx up (created on first deploy) and refresh job workers to the new
|
||||
# tag. api1/api2 are already at the desired tag, so they are left untouched.
|
||||
step "nginx + job workers → ${TAG}"
|
||||
docker compose up -d --remove-orphans
|
||||
|
||||
step "${SVC2} → ${TAG} (traffic: ${SVC1} only)"
|
||||
old_svc2=$(get_service_tag "${SVC2}")
|
||||
nginx_drain "${SVC2_PORT}"
|
||||
log "Waiting 15s for in-flight requests to drain..."
|
||||
sleep 15
|
||||
|
||||
IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate "${SVC2}"
|
||||
|
||||
health_check "${SVC2_PORT}" "${SVC2}" || rollback "${SVC2}" "${SVC2_PORT}" "${old_svc2}"
|
||||
nginx_restore "${SVC2_PORT}"
|
||||
|
||||
# ── job workers ───────────────────────────────────────────────────────────────
|
||||
|
||||
step "job workers → ${TAG}"
|
||||
IMAGE_TAG="${TAG}" docker compose up -d --no-deps --force-recreate zerotier-reconciler mfa-compliance
|
||||
# Apply any nginx.conf change without dropping connections (bind-mounted config
|
||||
# is not re-read on `up`). Skipped if nginx isn't running yet.
|
||||
if [[ -n "$(docker compose ps -q nginx 2>/dev/null)" ]]; then
|
||||
if docker compose exec -T nginx nginx -t 2>/dev/null; then
|
||||
docker compose exec -T nginx nginx -s reload && log "nginx: reloaded"
|
||||
else
|
||||
log "WARNING: nginx config test failed — left running with previous config"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ── done ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
Reference in New Issue
Block a user