From ad2ba12b5b2e5e02e024a9cf8d898176a26cbb2f Mon Sep 17 00:00:00 2001 From: Michal Date: Tue, 16 Jun 2026 22:26:04 +0100 Subject: [PATCH] feat(deploy): versioned, reversible Pulumi-driven k8s deploy script scripts/deploy-k8s.sh replaces fulldeploy.sh's rollout-restart-:latest pattern (which bypassed Pulumi and left no rollback target). It: - gates on pnpm test:run - captures the current prod images as immutable rollback tags (skopeo) + records digests - pg_dumps the prod DB before the destructive-capable `prisma db push` - builds/pushes mcpd+mcplocal tagged with the git short-sha - pins the sha in ../kubernetes-deployment/Pulumi.homelab.yaml and runs `pulumi up --target` the mcpd/mcplocal Deployments only (avoids the SOGo docker-image resource that needs a local docker daemon) - waits for rollout + /healthz, installs the CLI RPM, runs smoke tests - prints an exact rollback recipe on post-cutover failure --dry-run validated: tests/pg_dump/targeted preview run read-only. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/deploy-k8s.sh | 181 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100755 scripts/deploy-k8s.sh diff --git a/scripts/deploy-k8s.sh b/scripts/deploy-k8s.sh new file mode 100755 index 0000000..40da9d4 --- /dev/null +++ b/scripts/deploy-k8s.sh @@ -0,0 +1,181 @@ +#!/usr/bin/env bash +# Versioned, reversible deploy of mcpd + mcplocal to Kubernetes via Pulumi. +# +# Replaces the old `fulldeploy.sh` "kubectl rollout restart :latest" pattern +# (which bypassed Pulumi and left nothing to roll back to). This script: +# +# 1. Gates on the unit test suite. +# 2. Captures the CURRENTLY-running images as immutable rollback tags +# (skopeo registry->registry copy) + records their digests. +# 3. Takes a pg_dump of the production DB (schema push is destructive-capable). +# 4. Builds + pushes new images tagged with the git short-sha. +# 5. Pins that sha in ../kubernetes-deployment/Pulumi.homelab.yaml and runs +# `pulumi preview` then `pulumi up` (Pulumi is the source of truth). +# 6. Waits for rollout + /healthz, installs the CLI RPM, runs smoke tests. +# 7. On any failure after the cutover, prints the exact rollback recipe. +# +# Usage: +# scripts/deploy-k8s.sh [--dry-run] [--skip-tests] [--yes] [TAG] +# +# --dry-run Do everything read-only: tests, pg_dump, `pulumi preview`. +# No image build/push, no retag, no `pulumi up`, no RPM. +# --skip-tests Skip the unit-test gate (NOT recommended). +# --yes Don't prompt before `pulumi up`. +# TAG Override the image tag (default: git short-sha). +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT="$(dirname "$SCRIPT_DIR")" +cd "$ROOT" +[ -f .env ] && { set -a; source .env; set +a; } + +export PATH="$HOME/.npm-global/bin:$PATH" + +KUBE_CONTEXT="${KUBE_CONTEXT:-worker0-k8s0}" +NS="${KUBE_NAMESPACE:-mcpctl}" +PULUMI_DIR="${PULUMI_DIR:-$ROOT/../kubernetes-deployment}" +PULUMI_STACK="${PULUMI_STACK:-homelab}" +PULUMI_YAML="$PULUMI_DIR/Pulumi.$PULUMI_STACK.yaml" +REG_INTERNAL="10.0.0.194:3012" # push target (no body-size limit) +REG_PUBLIC="mysources.co.uk" # what k8s pulls from (same backend) +# Target ONLY the mcpd/mcplocal Deployments. A full `pulumi up` would try to +# Configure the docker provider for the unrelated SOGo/courier-mta image build, +# which needs a local docker daemon this box doesn't have. Targeting the k8s +# Deployments avoids that provider entirely. +MCPD_URN='urn:pulumi:homelab::k8s-deployments::kubernetes:core/v1:Namespace$kubernetes:apps/v1:Deployment::mcpd' +MCPLOCAL_URN='urn:pulumi:homelab::k8s-deployments::kubernetes:core/v1:Namespace$kubernetes:apps/v1:Deployment::mcplocal' +BACKUP_DIR="$HOME/tmp/mcpctl-backup" +DATE="$(date +%Y%m%d-%H%M%S)" + +DRY_RUN=false; SKIP_TESTS=false; ASSUME_YES=false; TAG="" +while [ $# -gt 0 ]; do + case "$1" in + --dry-run) DRY_RUN=true ;; + --skip-tests) SKIP_TESTS=true ;; + --yes) ASSUME_YES=true ;; + *) TAG="$1" ;; + esac + shift +done +TAG="${TAG:-$(git rev-parse --short HEAD)}" +ROLLBACK_TAG="rollback-$DATE" + +say() { printf '\n\033[1;36m>>> %s\033[0m\n' "$*"; } +warn() { printf '\033[1;33m ! %s\033[0m\n' "$*"; } +die() { printf '\033[1;31mERROR: %s\033[0m\n' "$*" >&2; exit 1; } + +mkdir -p "$BACKUP_DIR" +say "Deploy plan" +cat < /tmp/deploy-test.log 2>&1 || { tail -30 /tmp/deploy-test.log; die "tests failed — aborting"; } + grep -E "Tests " /tmp/deploy-test.log | tail -1 +fi + +# ── 2. Record current images + create immutable rollback tags ── +say "2/7 Capture rollback target (current prod images)" +CUR_MCPD_DIGEST="$(kubectl --context "$KUBE_CONTEXT" -n "$NS" get pods -l 'app in (mcpd)' -o jsonpath='{.items[0].status.containerStatuses[0].imageID}' 2>/dev/null || true)" +[ -z "$CUR_MCPD_DIGEST" ] && CUR_MCPD_DIGEST="$(kubectl --context "$KUBE_CONTEXT" -n "$NS" get pods -o jsonpath='{range .items[*]}{.metadata.name}{" "}{.status.containerStatuses[0].imageID}{"\n"}{end}' | awk '/^mcpd-/{print $2; exit}')" +{ + echo "# deploy $DATE new-tag=$TAG" + echo "mcpd current digest: $CUR_MCPD_DIGEST" + echo "rollback tag (mcpd/mcplocal): $ROLLBACK_TAG" +} | tee "$BACKUP_DIR/deploy-$DATE.txt" + +retag() { # $1 = image name (mcpd|mcplocal) + local img="$1" + say " skopeo copy $img:latest -> $img:$ROLLBACK_TAG" + if [ "$DRY_RUN" = true ]; then warn "dry-run: skip retag"; return; fi + skopeo copy --src-tls-verify=false --dest-tls-verify=false \ + --src-creds "michal:$GITEA_TOKEN" --dest-creds "michal:$GITEA_TOKEN" \ + "docker://$REG_INTERNAL/michal/$img:latest" \ + "docker://$REG_INTERNAL/michal/$img:$ROLLBACK_TAG" +} +retag mcpd +retag mcplocal + +# ── 3. pg_dump production DB ── +say "3/7 pg_dump production DB" +DUMP="$BACKUP_DIR/predeploy-db-$DATE.sql" +kubectl --context "$KUBE_CONTEXT" -n "$NS" exec mcpctl-db-0 -- \ + pg_dump -U mcpctl -d mcpctl --clean --if-exists > "$DUMP" 2>/dev/null +ls -lh "$DUMP" | awk '{print " wrote "$NF" ("$5")"}' +[ -s "$DUMP" ] || die "pg_dump produced an empty file — aborting" + +if [ "$DRY_RUN" = true ]; then + say "DRY-RUN: build/push + pulumi up skipped. Running targeted pulumi preview only." + ( cd "$PULUMI_DIR" && ./scripts/pulumi.sh preview --stack "$PULUMI_STACK" \ + --target "$MCPD_URN" --target "$MCPLOCAL_URN" --non-interactive 2>&1 | tail -25 ) || true + say "DRY-RUN complete. Re-run without --dry-run to deploy." + exit 0 +fi + +# ── 4. Build + push versioned images ── +say "4/7 Build + push mcpd:$TAG and mcplocal:$TAG" +bash scripts/build-mcpd.sh "$TAG" +bash scripts/build-mcplocal.sh "$TAG" + +# ── 5. Pin sha in Pulumi + preview + up ── +say "5/7 Pin image in Pulumi and roll out" +cp "$PULUMI_YAML" "$BACKUP_DIR/Pulumi.$PULUMI_STACK.yaml.$DATE.bak" +sed -i -E "s#($REG_PUBLIC/michal/mcpd):[^[:space:]]+#\1:$TAG#; s#($REG_PUBLIC/michal/mcplocal):[^[:space:]]+#\1:$TAG#" "$PULUMI_YAML" +grep -nE "$REG_PUBLIC/michal/(mcpd|mcplocal):" "$PULUMI_YAML" | sed 's/^/ pinned: /' +( cd "$PULUMI_DIR" && ./scripts/pulumi.sh preview --stack "$PULUMI_STACK" \ + --target "$MCPD_URN" --target "$MCPLOCAL_URN" --non-interactive --diff 2>&1 | tail -30 ) +if [ "$ASSUME_YES" != true ]; then + read -r -p $'\n Proceed with pulumi up (targeted: mcpd + mcplocal)? [y/N] ' ans + [ "$ans" = y ] || [ "$ans" = Y ] || die "aborted before pulumi up (no prod change made; images pushed, Pulumi.yaml edited locally)" +fi +( cd "$PULUMI_DIR" && ./scripts/pulumi.sh up --stack "$PULUMI_STACK" \ + --target "$MCPD_URN" --target "$MCPLOCAL_URN" --yes ) + +# ── 6. Wait for rollout + health ── +say "6/7 Wait for rollout + health" +rollback_recipe() { + cat </dev/null 2>&1 && \ + kubectl --context "$KUBE_CONTEXT" -n "$NS" rollout status deployment/mcplocal --timeout=4m || true +for i in $(seq 1 30); do + code="$(curl -s -o /dev/null -w '%{http_code}' "https://mcpctl.ad.itaz.eu/healthz" || true)" + [ "$code" = 200 ] && { echo " /healthz OK"; break; } + [ "$i" = 30 ] && die "mcpd /healthz never returned 200" + sleep 4 +done +trap - ERR + +# ── 7. RPM + smoke ── +say "7/7 Build/install CLI RPM + smoke tests" +bash scripts/release.sh +systemctl --user restart mcplocal && sleep 2 +if pnpm test:smoke > /tmp/deploy-smoke.log 2>&1; then + grep -E "Tests |passed" /tmp/deploy-smoke.log | tail -2 + say "Deploy complete — $TAG live. Rollback tag: $ROLLBACK_TAG" +else + tail -40 /tmp/deploy-smoke.log + warn "SMOKE TESTS FAILED — system may be unhealthy. Consider rollback:" + rollback_recipe + exit 1 +fi