#!/usr/bin/env bash # Rolling deploy for the dockerised stack (api1 + api2 behind a containerised # nginx). api containers are recreated one at a time; while one restarts, nginx # routes to the other via the upstream's passive failover (max_fails/fail_timeout # + proxy_next_upstream), so users see effectively no downtime. No host nginx and # no sudo required — everything goes through `docker compose`. # # Usage: # ./deploy.sh e.g. ./deploy.sh a1b2c3d # # Overridable via env vars (defaults below are gatehouse-api): # IMAGE_NAME docker image name # SVC1/SVC2 api compose service names # SVC1_PORT host port for SVC1 # SVC2_PORT host port for SVC2 # JOB_SVCS space-separated job service names # HEALTH_PATH HTTP path for health check # COMPOSE_DIR directory with docker-compose.yml, docker/nginx.conf and .env set -euo pipefail TAG="${1:?Usage: deploy.sh (e.g. deploy.sh a1b2c3d)}" IMAGE_NAME="${IMAGE_NAME:-gatehouse-api}" COMPOSE_DIR="${COMPOSE_DIR:-/opt/gatehouse-api}" SVC1="${SVC1:-api1}" SVC2="${SVC2:-api2}" SVC1_PORT="${SVC1_PORT:-5000}" SVC2_PORT="${SVC2_PORT:-5001}" JOB_SVCS="${JOB_SVCS:-zerotier-reconciler mfa-compliance}" HEALTH_PATH="${HEALTH_PATH:-/api/health}" HEALTH_RETRIES=18 # 18 × 5 s = 90 s max HEALTH_INTERVAL=5 export IMAGE_TAG="${TAG}" # ── helpers ─────────────────────────────────────────────────────────────────── log() { echo "[$(date '+%H:%M:%S')] $*"; } die() { log "ERROR: $*" >&2; exit 1; } step() { log ""; log "── $* ──"; } health_check() { local port=$1 label=$2 attempt=0 log "Waiting for ${label} on :${port}${HEALTH_PATH} ..." while (( attempt < HEALTH_RETRIES )); do if curl -sf "http://127.0.0.1:${port}${HEALTH_PATH}" -o /dev/null; then log "✓ ${label} healthy" return 0 fi attempt=$(( attempt + 1 )) log " [${attempt}/${HEALTH_RETRIES}] not ready, retrying in ${HEALTH_INTERVAL}s" sleep "${HEALTH_INTERVAL}" done log "ERROR: ${label} failed health check after $((HEALTH_RETRIES * HEALTH_INTERVAL))s" return 1 } get_service_tag() { docker compose ps -q "$1" 2>/dev/null \ | xargs -r docker inspect --format '{{.Config.Image}}' 2>/dev/null \ | cut -d: -f2 } # Recreate one api service on the new tag, then health-check it; roll back to the # previous tag on failure. The peer api keeps serving traffic throughout. roll_api() { local service=$1 port=$2 old_tag step "${service} → ${TAG}" old_tag=$(get_service_tag "${service}") docker compose up -d --no-deps --force-recreate "${service}" if ! health_check "${port}" "${service}"; then if [[ -z "${old_tag}" ]]; then die "Deploy aborted — ${service} failed health check, no previous tag to roll back to" fi log "Rolling back ${service} to ${old_tag}..." IMAGE_TAG="${old_tag}" docker compose up -d --no-deps --force-recreate "${service}" die "Deploy aborted — ${service} rolled back to ${old_tag}" fi } # ── pre-flight ──────────────────────────────────────────────────────────────── cd "${COMPOSE_DIR}" pwd; ls -la log "Deploying ${IMAGE_NAME}:${TAG}" docker image inspect "${IMAGE_NAME}:${TAG}" > /dev/null 2>&1 \ || die "Image ${IMAGE_NAME}:${TAG} not found locally — build it first." # Ensure backing services are up before rolling the api (idempotent; also brings # the stack up cleanly on a first-ever deploy). step "backing services (db, redis)" docker compose up -d db redis # ── roll api containers one at a time ───────────────────────────────────────── roll_api "${SVC1}" "${SVC1_PORT}" roll_api "${SVC2}" "${SVC2_PORT}" # ── nginx + job workers ─────────────────────────────────────────────────────── # Bring nginx up (created on first deploy) and refresh job workers to the new # tag. api1/api2 are already at the desired tag, so they are left untouched. step "nginx + job workers → ${TAG}" docker compose up -d --remove-orphans # Apply any nginx.conf change without dropping connections (bind-mounted config # is not re-read on `up`). Skipped if nginx isn't running yet. if [[ -n "$(docker compose ps -q nginx 2>/dev/null)" ]]; then if docker compose exec -T nginx nginx -t 2>/dev/null; then docker compose exec -T nginx nginx -s reload && log "nginx: reloaded" else log "WARNING: nginx config test failed — left running with previous config" fi fi # ── done ────────────────────────────────────────────────────────────────────── log "" log "Deploy complete ✓ ${IMAGE_NAME}:${TAG}"