Files
mcpctl/scripts/deploy-k8s.sh
Michal 4c7e648771
Some checks failed
CI/CD / lint (pull_request) Successful in 1m15s
CI/CD / test (pull_request) Successful in 1m17s
CI/CD / typecheck (pull_request) Successful in 2m52s
CI/CD / smoke (pull_request) Failing after 1m54s
CI/CD / build (pull_request) Successful in 4m49s
CI/CD / publish (pull_request) Has been skipped
fix(smoke): read CLI credentials from ~/.mcpctl (active path)
passwd smoke read ~/.config/mcpctl/credentials and silently skipped; the CLI
stores creds at ~/.mcpctl/credentials. Now verified live against prod (4/4).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-16 22:41:30 +01:00

187 lines
8.6 KiB
Bash
Executable File

#!/usr/bin/env bash
# Versioned, reversible deploy of mcpd + mcplocal to Kubernetes via Pulumi.
#
# Replaces the old `fulldeploy.sh` "kubectl rollout restart :latest" pattern
# (which bypassed Pulumi and left nothing to roll back to). This script:
#
# 1. Gates on the unit test suite.
# 2. Captures the CURRENTLY-running images as immutable rollback tags
# (skopeo registry->registry copy) + records their digests.
# 3. Takes a pg_dump of the production DB (schema push is destructive-capable).
# 4. Builds + pushes new images tagged with the git short-sha.
# 5. Pins that sha in ../kubernetes-deployment/Pulumi.homelab.yaml and runs
# `pulumi preview` then `pulumi up` (Pulumi is the source of truth).
# 6. Waits for rollout + /healthz, installs the CLI RPM, runs smoke tests.
# 7. On any failure after the cutover, prints the exact rollback recipe.
#
# Usage:
# scripts/deploy-k8s.sh [--dry-run] [--skip-tests] [--yes] [TAG]
#
# --dry-run Do everything read-only: tests, pg_dump, `pulumi preview`.
# No image build/push, no retag, no `pulumi up`, no RPM.
# --skip-tests Skip the unit-test gate (NOT recommended).
# --yes Don't prompt before `pulumi up`.
# TAG Override the image tag (default: git short-sha).
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT="$(dirname "$SCRIPT_DIR")"
cd "$ROOT"
[ -f .env ] && { set -a; source .env; set +a; }
export PATH="$HOME/.npm-global/bin:$PATH"
KUBE_CONTEXT="${KUBE_CONTEXT:-worker0-k8s0}"
NS="${KUBE_NAMESPACE:-mcpctl}"
PULUMI_DIR="${PULUMI_DIR:-$ROOT/../kubernetes-deployment}"
PULUMI_STACK="${PULUMI_STACK:-homelab}"
PULUMI_YAML="$PULUMI_DIR/Pulumi.$PULUMI_STACK.yaml"
REG_INTERNAL="10.0.0.194:3012" # push target (no body-size limit)
REG_PUBLIC="mysources.co.uk" # what k8s pulls from (same backend)
# Target ONLY the mcpd/mcplocal Deployments. A full `pulumi up` would try to
# Configure the docker provider for the unrelated SOGo/courier-mta image build,
# which needs a local docker daemon this box doesn't have. Targeting the k8s
# Deployments avoids that provider entirely.
MCPD_URN='urn:pulumi:homelab::k8s-deployments::kubernetes:core/v1:Namespace$kubernetes:apps/v1:Deployment::mcpd'
MCPLOCAL_URN='urn:pulumi:homelab::k8s-deployments::kubernetes:core/v1:Namespace$kubernetes:apps/v1:Deployment::mcplocal'
# Prior kubectl-based deploys (old fulldeploy.sh) took server-side-apply
# ownership of the Deployment .image field under the `kubectl-set` field
# manager, which makes a plain pulumi apply conflict. Force-apply lets Pulumi
# reclaim the field and become the single source of truth going forward.
export PULUMI_K8S_ENABLE_PATCH_FORCE=true
BACKUP_DIR="$HOME/tmp/mcpctl-backup"
DATE="$(date +%Y%m%d-%H%M%S)"
DRY_RUN=false; SKIP_TESTS=false; ASSUME_YES=false; TAG=""
while [ $# -gt 0 ]; do
case "$1" in
--dry-run) DRY_RUN=true ;;
--skip-tests) SKIP_TESTS=true ;;
--yes) ASSUME_YES=true ;;
*) TAG="$1" ;;
esac
shift
done
TAG="${TAG:-$(git rev-parse --short HEAD)}"
ROLLBACK_TAG="rollback-$DATE"
say() { printf '\n\033[1;36m>>> %s\033[0m\n' "$*"; }
warn() { printf '\033[1;33m ! %s\033[0m\n' "$*"; }
die() { printf '\033[1;31mERROR: %s\033[0m\n' "$*" >&2; exit 1; }
mkdir -p "$BACKUP_DIR"
say "Deploy plan"
cat <<EOF
context : $KUBE_CONTEXT / $NS
image tag : $TAG (rollback tag for current prod: $ROLLBACK_TAG)
pulumi : $PULUMI_YAML (stack $PULUMI_STACK)
backup dir: $BACKUP_DIR
mode : $([ "$DRY_RUN" = true ] && echo DRY-RUN || echo LIVE)
EOF
[ -f "$PULUMI_YAML" ] || die "Pulumi config not found: $PULUMI_YAML"
# ── 1. Test gate ──
if [ "$SKIP_TESTS" = true ]; then warn "skipping unit tests (--skip-tests)"; else
say "1/7 Unit tests (pnpm test:run)"
pnpm test:run > /tmp/deploy-test.log 2>&1 || { tail -30 /tmp/deploy-test.log; die "tests failed — aborting"; }
grep -E "Tests " /tmp/deploy-test.log | tail -1
fi
# ── 2. Record current images + create immutable rollback tags ──
say "2/7 Capture rollback target (current prod images)"
CUR_MCPD_DIGEST="$(kubectl --context "$KUBE_CONTEXT" -n "$NS" get pods -l 'app in (mcpd)' -o jsonpath='{.items[0].status.containerStatuses[0].imageID}' 2>/dev/null || true)"
[ -z "$CUR_MCPD_DIGEST" ] && CUR_MCPD_DIGEST="$(kubectl --context "$KUBE_CONTEXT" -n "$NS" get pods -o jsonpath='{range .items[*]}{.metadata.name}{" "}{.status.containerStatuses[0].imageID}{"\n"}{end}' | awk '/^mcpd-/{print $2; exit}')"
{
echo "# deploy $DATE new-tag=$TAG"
echo "mcpd current digest: $CUR_MCPD_DIGEST"
echo "rollback tag (mcpd/mcplocal): $ROLLBACK_TAG"
} | tee "$BACKUP_DIR/deploy-$DATE.txt"
retag() { # $1 = image name (mcpd|mcplocal)
local img="$1"
say " skopeo copy $img:latest -> $img:$ROLLBACK_TAG"
if [ "$DRY_RUN" = true ]; then warn "dry-run: skip retag"; return; fi
skopeo copy --src-tls-verify=false --dest-tls-verify=false \
--src-creds "michal:$GITEA_TOKEN" --dest-creds "michal:$GITEA_TOKEN" \
"docker://$REG_INTERNAL/michal/$img:latest" \
"docker://$REG_INTERNAL/michal/$img:$ROLLBACK_TAG"
}
retag mcpd
retag mcplocal
# ── 3. pg_dump production DB ──
say "3/7 pg_dump production DB"
DUMP="$BACKUP_DIR/predeploy-db-$DATE.sql"
kubectl --context "$KUBE_CONTEXT" -n "$NS" exec mcpctl-db-0 -- \
pg_dump -U mcpctl -d mcpctl --clean --if-exists > "$DUMP" 2>/dev/null
ls -lh "$DUMP" | awk '{print " wrote "$NF" ("$5")"}'
[ -s "$DUMP" ] || die "pg_dump produced an empty file — aborting"
if [ "$DRY_RUN" = true ]; then
say "DRY-RUN: build/push + pulumi up skipped. Running targeted pulumi preview only."
( cd "$PULUMI_DIR" && ./scripts/pulumi.sh preview --stack "$PULUMI_STACK" \
--target "$MCPD_URN" --target "$MCPLOCAL_URN" --non-interactive 2>&1 | tail -25 ) || true
say "DRY-RUN complete. Re-run without --dry-run to deploy."
exit 0
fi
# ── 4. Build + push versioned images ──
say "4/7 Build + push mcpd:$TAG and mcplocal:$TAG"
bash scripts/build-mcpd.sh "$TAG"
bash scripts/build-mcplocal.sh "$TAG"
# ── 5. Pin sha in Pulumi + preview + up ──
say "5/7 Pin image in Pulumi and roll out"
cp "$PULUMI_YAML" "$BACKUP_DIR/Pulumi.$PULUMI_STACK.yaml.$DATE.bak"
sed -i -E "s#($REG_PUBLIC/michal/mcpd):[^[:space:]]+#\1:$TAG#; s#($REG_PUBLIC/michal/mcplocal):[^[:space:]]+#\1:$TAG#" "$PULUMI_YAML"
grep -nE "$REG_PUBLIC/michal/(mcpd|mcplocal):" "$PULUMI_YAML" | sed 's/^/ pinned: /'
( cd "$PULUMI_DIR" && ./scripts/pulumi.sh preview --stack "$PULUMI_STACK" \
--target "$MCPD_URN" --target "$MCPLOCAL_URN" --non-interactive --diff 2>&1 | tail -30 )
if [ "$ASSUME_YES" != true ]; then
read -r -p $'\n Proceed with pulumi up (targeted: mcpd + mcplocal)? [y/N] ' ans
[ "$ans" = y ] || [ "$ans" = Y ] || die "aborted before pulumi up (no prod change made; images pushed, Pulumi.yaml edited locally)"
fi
( cd "$PULUMI_DIR" && ./scripts/pulumi.sh up --stack "$PULUMI_STACK" \
--target "$MCPD_URN" --target "$MCPLOCAL_URN" --yes )
# ── 6. Wait for rollout + health ──
say "6/7 Wait for rollout + health"
rollback_recipe() {
cat <<EOF
════════════ ROLLBACK RECIPE ════════════
1) Image: edit $PULUMI_YAML — set mcpd/mcplocal image tag to ':$ROLLBACK_TAG'
(or restore $BACKUP_DIR/Pulumi.$PULUMI_STACK.yaml.$DATE.bak), then:
cd $PULUMI_DIR && ./scripts/pulumi.sh up --stack $PULUMI_STACK --yes
2) DB (only if schema changed): restore the pre-deploy dump:
kubectl --context $KUBE_CONTEXT -n $NS exec -i mcpctl-db-0 -- \\
psql -U mcpctl -d mcpctl < $DUMP
══════════════════════════════════════════
EOF
}
trap 'warn "deploy failed after cutover"; rollback_recipe' ERR
kubectl --context "$KUBE_CONTEXT" -n "$NS" rollout status deployment/mcpd --timeout=4m
kubectl --context "$KUBE_CONTEXT" -n "$NS" get deployment/mcplocal >/dev/null 2>&1 && \
kubectl --context "$KUBE_CONTEXT" -n "$NS" rollout status deployment/mcplocal --timeout=4m || true
for i in $(seq 1 30); do
code="$(curl -s -o /dev/null -w '%{http_code}' "https://mcpctl.ad.itaz.eu/healthz" || true)"
[ "$code" = 200 ] && { echo " /healthz OK"; break; }
[ "$i" = 30 ] && die "mcpd /healthz never returned 200"
sleep 4
done
trap - ERR
# ── 7. RPM + smoke ──
say "7/7 Build/install CLI RPM + smoke tests"
bash scripts/release.sh
systemctl --user restart mcplocal && sleep 2
if pnpm test:smoke > /tmp/deploy-smoke.log 2>&1; then
grep -E "Tests |passed" /tmp/deploy-smoke.log | tail -2
say "Deploy complete — $TAG live. Rollback tag: $ROLLBACK_TAG"
else
tail -40 /tmp/deploy-smoke.log
warn "SMOKE TESTS FAILED — system may be unhealthy. Consider rollback:"
rollback_recipe
exit 1
fi