From fac14b6d4a23be1664c15a988756c34cc1b0defd Mon Sep 17 00:00:00 2001 From: Michal Date: Tue, 17 Mar 2026 02:40:40 +0000 Subject: [PATCH] feat: server kickstart with LVM, user creation, progress callbacks, reprovision - LVM partition layout: /, /var, /var/log, /home, /srv, swap, tmpfs /tmp plus /var/lib/longhorn for worker role (grows to fill disk) - Reprovision preserves /home, /srv, /var/lib/longhorn via %pre detection - Admin user created matching the user running the bastion script with SSH keys from authorized_keys + local pubkeys, passwordless sudo - Progress callbacks from %pre and %post to /api/progress endpoint with IP reported on completion (ssh command printed) - Installed machines boot from local disk (iPXE exit) instead of re-entering discovery mode - --role worker|infra flag (infra skips longhorn partition) - reprovision subcommand: queues install + SSH reboot into PXE - Self-cleanup: kills old bastion instances on start - Domain config (DOMAIN env, default ad.itaz.eu) - efibootmgr in %post to set local disk first in boot order - k3s prereqs: kernel modules, sysctl, firewalld disabled, chrony - VM reprovision test script (test-reprovision.sh) Co-Authored-By: Claude Opus 4.6 (1M context) --- bastion.sh | 581 +++++++++++++++++++++++++++++++++++++++----- test-reprovision.sh | 279 +++++++++++++++++++++ 2 files changed, 804 insertions(+), 56 deletions(-) create mode 100755 test-reprovision.sh diff --git a/bastion.sh b/bastion.sh index 2acdfe1..ef60609 100755 --- a/bastion.sh +++ b/bastion.sh @@ -27,6 +27,7 @@ HTTP_PORT="${HTTP_PORT:-8080}" TIMEZONE="${TIMEZONE:-Europe/London}" LOCALE="${LOCALE:-en_GB.UTF-8}" BASTION_DIR="${BASTION_DIR:-/tmp/lab-bastion}" +DOMAIN="${DOMAIN:-ad.itaz.eu}" # internal domain for hostnames DHCP_MODE="${DHCP_MODE:-proxy}" # proxy (alongside existing DHCP) or full (bastion IS the DHCP server) DHCP_RANGE_START="${DHCP_RANGE_START:-}" # only for full mode, auto-derived if empty DHCP_RANGE_END="${DHCP_RANGE_END:-}" @@ -45,13 +46,19 @@ CMD="${1:-serve}" case "$CMD" in install) - [[ $# -ge 3 ]] || { echo "Usage: bastion.sh install [--disk ]"; exit 1; } + [[ $# -ge 3 ]] || { echo "Usage: bastion.sh install [--role worker|infra] [--disk ]"; exit 1; } MAC="$2" HOSTNAME="$3" - DISK="${5:-}" # --disk - PAYLOAD="{\"mac\":\"$MAC\",\"hostname\":\"$HOSTNAME\"" - [[ -n "$DISK" ]] && PAYLOAD="$PAYLOAD,\"disk\":\"$DISK\"" - PAYLOAD="$PAYLOAD}" + shift 3 + DISK="" ROLE="worker" + while [[ $# -gt 0 ]]; do + case "$1" in + --disk) DISK="$2"; shift 2 ;; + --role) ROLE="$2"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac + done + PAYLOAD=$(python3 -c "import json; print(json.dumps({k:v for k,v in {'mac':'$MAC','hostname':'$HOSTNAME','disk':'$DISK','role':'$ROLE'}.items() if v}))") RESULT=$(curl -sf -X POST "http://localhost:${HTTP_PORT}/api/install" \ -H "Content-Type: application/json" \ -d "$PAYLOAD" 2>&1) || die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?" @@ -93,16 +100,62 @@ print() print('\033[1mINSTALLED\033[0m') if installed: for mac, info in installed.items(): - print(f' {mac:<20} → {info.get(\"hostname\",\"?\")} ({info.get(\"installed_at\",\"?\")})') + ip = info.get('ip', '') + ip_str = f' ip={ip}' if ip else '' + print(f' {mac:<20} → {info.get(\"hostname\",\"?\")} role={info.get(\"role\",\"?\")}{ip_str} ({info.get(\"installed_at\",\"?\")})') else: print(' (none)') print() " 2>/dev/null || echo "$RESULT" exit 0 ;; + reprovision) + [[ $# -ge 3 ]] || { echo "Usage: bastion.sh reprovision [--role worker|infra] [--disk ]"; exit 1; } + MAC="$2" + HOSTNAME="$3" + shift 3 + DISK="" ROLE="worker" + while [[ $# -gt 0 ]]; do + case "$1" in + --disk) DISK="$2"; shift 2 ;; + --role) ROLE="$2"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac + done + + # Queue the install + PAYLOAD=$(python3 -c "import json; print(json.dumps({k:v for k,v in {'mac':'$MAC','hostname':'$HOSTNAME','disk':'$DISK','role':'$ROLE'}.items() if v}))") + RESULT=$(curl -sf -X POST "http://localhost:${HTTP_PORT}/api/install" \ + -H "Content-Type: application/json" \ + -d "$PAYLOAD" 2>&1) || die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?" + echo "$RESULT" | python3 -m json.tool 2>/dev/null || echo "$RESULT" + + # Try to find IP from installed state and SSH in to trigger PXE reboot + IP=$(curl -sf "http://localhost:${HTTP_PORT}/api/machines" 2>/dev/null | \ + python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('installed',{}).get('${MAC}',{}).get('ip',''))" 2>/dev/null || echo "") + ADMIN_USER="${SUDO_USER:-$USER}" + [[ "$ADMIN_USER" == "root" ]] && ADMIN_USER="" + + if [[ -n "$IP" && -n "$ADMIN_USER" ]]; then + echo "" + echo "Attempting SSH reboot into PXE ($ADMIN_USER@$IP)..." + ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "$ADMIN_USER@$IP" \ + 'sudo efibootmgr 2>/dev/null; PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi' 2>&1 && { + echo "" + echo "Machine is rebooting into PXE. Install will start automatically." + } || { + echo "" + echo "SSH failed. Reboot the machine manually into PXE (e.g. via IPMI/KVM)." + } + else + echo "" + echo "No IP known for this machine. Reboot it manually into PXE." + fi + exit 0 + ;; serve) ;; # continue below *) - echo "Usage: bastion.sh [serve|install |list]" + echo "Usage: bastion.sh [serve|install|reprovision|list]" exit 1 ;; esac @@ -111,6 +164,17 @@ esac # SERVE MODE — start the bastion # ══════════════════════════════════════════════════════════════════ +# ──── Kill old instances ────────────────────────────────────────── +# Find and kill any previous bastion dnsmasq and HTTP server +OLD_DNSMASQ=$(pgrep -f 'dnsmasq --no-daemon --conf-file=/tmp/lab-bastion' 2>/dev/null || true) +OLD_HTTP=$(pgrep -f 'python3 /tmp/lab-bastion/server.py' 2>/dev/null || true) +if [[ -n "$OLD_DNSMASQ" || -n "$OLD_HTTP" ]]; then + warn "Killing old bastion processes..." + [[ -n "$OLD_DNSMASQ" ]] && kill $OLD_DNSMASQ 2>/dev/null && log " Stopped old dnsmasq (PID $OLD_DNSMASQ)" + [[ -n "$OLD_HTTP" ]] && kill $OLD_HTTP 2>/dev/null && log " Stopped old HTTP server (PID $OLD_HTTP)" + sleep 1 +fi + # ──── Preflight ─────────────────────────────────────────────────── [[ $EUID -eq 0 ]] || die "Must run as root (need DHCP/TFTP ports). Use: sudo bash bastion.sh" @@ -143,23 +207,59 @@ GATEWAY="$(ip route | awk '/default/ {print $3; exit}')" [[ -n "$SERVER_IP" ]] || die "Cannot detect IP on interface $IFACE" log "Interface: ${BOLD}$IFACE${NC} IP: ${BOLD}$SERVER_IP${NC} Network: ${BOLD}$NETWORK${NC}" -# ──── Auto-detect SSH pubkey ────────────────────────────────────── -SSH_PUBKEY="${SSH_PUBKEY:-}" -if [[ -z "$SSH_PUBKEY" ]]; then - REAL_HOME="${HOME}" - [[ -n "${SUDO_USER:-}" ]] && REAL_HOME="$(getent passwd "$SUDO_USER" | cut -d: -f6)" - for keyfile in "$REAL_HOME/.ssh/id_ed25519.pub" "$REAL_HOME/.ssh/id_rsa.pub" "$REAL_HOME/.ssh/id_ecdsa.pub"; do - [[ -f "$keyfile" ]] && { SSH_PUBKEY="$keyfile"; break; } - done +# ──── Auto-detect SSH keys ─────────────────────────────────────── +REAL_HOME="${HOME}" +[[ -n "${SUDO_USER:-}" ]] && REAL_HOME="$(getent passwd "$SUDO_USER" | cut -d: -f6)" + +SSH_KEYS_CONTENT="" +SSH_KEY_SOURCE="" + +# Collect SSH keys from authorized_keys + local pubkeys (deduplicated) +SSH_KEY_SOURCE="" +if [[ -f "$REAL_HOME/.ssh/authorized_keys" ]]; then + SSH_KEYS_CONTENT="$(grep -v '^#' "$REAL_HOME/.ssh/authorized_keys" | grep -v '^$')" + SSH_KEY_SOURCE="$REAL_HOME/.ssh/authorized_keys" fi -SSH_KEY_CONTENT="" -if [[ -n "$SSH_PUBKEY" && -f "$SSH_PUBKEY" ]]; then - SSH_KEY_CONTENT="$(cat "$SSH_PUBKEY")" - log "SSH key: ${BOLD}$SSH_PUBKEY${NC}" -else - warn "No SSH public key found. Set SSH_PUBKEY=/path/to/key.pub" - warn "Install mode will use root password 'changeme' as fallback." +# Also include local pubkey files (they may not be in authorized_keys) +for keyfile in "$REAL_HOME/.ssh/id_ed25519.pub" "$REAL_HOME/.ssh/id_rsa.pub" "$REAL_HOME/.ssh/id_ecdsa.pub"; do + if [[ -f "$keyfile" ]]; then + KEY_DATA="$(cat "$keyfile")" + KEY_FP="$(awk '{print $2}' "$keyfile")" + if [[ -n "$SSH_KEYS_CONTENT" ]]; then + # Add only if not already present + if ! echo "$SSH_KEYS_CONTENT" | grep -qF "$KEY_FP"; then + SSH_KEYS_CONTENT="$SSH_KEYS_CONTENT"$'\n'"$KEY_DATA" + SSH_KEY_SOURCE="${SSH_KEY_SOURCE} + $keyfile" + fi + else + SSH_KEYS_CONTENT="$KEY_DATA" + SSH_KEY_SOURCE="$keyfile" + fi + fi +done + +# Priority 3: generate a keypair +if [[ -z "$SSH_KEYS_CONTENT" ]]; then + GENERATED_KEY="$BASTION_DIR/bastion_ed25519" + if [[ ! -f "$GENERATED_KEY" ]]; then + log "No SSH keys found — generating ed25519 keypair..." + ssh-keygen -t ed25519 -f "$GENERATED_KEY" -N "" -C "bastion-generated@$(hostname)" >/dev/null 2>&1 + fi + SSH_KEYS_CONTENT="$(cat "${GENERATED_KEY}.pub")" + SSH_KEY_SOURCE="$GENERATED_KEY (generated)" + warn "Using generated keypair: ${BOLD}$GENERATED_KEY${NC}" + warn "Save this private key — it's the only way to access installed machines." +fi + +SSH_KEY_COUNT="$(echo "$SSH_KEYS_CONTENT" | wc -l)" +log "SSH keys: ${BOLD}${SSH_KEY_COUNT} key(s)${NC} from ${BOLD}${SSH_KEY_SOURCE}${NC}" + +# ──── Detect admin username ────────────────────────────────────── +ADMIN_USER="${SUDO_USER:-$USER}" +[[ "$ADMIN_USER" == "root" ]] && ADMIN_USER="" +if [[ -n "$ADMIN_USER" ]]; then + log "Admin user: ${BOLD}${ADMIN_USER}${NC} (will be created on installed machines)" fi # ──── Prepare directories ──────────────────────────────────────── @@ -264,13 +364,8 @@ FEDORA_MIRROR="https://download.fedoraproject.org/pub/fedora/linux/releases/${FE log "Preparing boot artifacts (Fedora ${FEDORA_VERSION} ${ARCH})..." copy_if_missing "/usr/share/ipxe/undionly.kpxe" "$TFTPDIR/undionly.kpxe" "iPXE BIOS" -# UEFI x86_64: two-stage PXE boot -# Stage 1: tiny PXE loader stub (<20KB) fits in constrained TFTP buffers -# Stage 2: full iPXE binary downloaded via UEFI PXE protocol (no size limit) -PXELOADER_SRC="$(cd "$(dirname "$0")" && pwd)/pxeloader.c" -[[ -f "$PXELOADER_SRC" ]] || PXELOADER_SRC="$(dirname "${BASH_SOURCE[0]}")/pxeloader.c" -build_pxeloader "$PXELOADER_SRC" "$TFTPDIR/ipxe.efi" "PXE loader stub (stage 1)" -copy_if_missing "/usr/share/ipxe/ipxe-snponly-x86_64.efi" "$TFTPDIR/ipxe-real.efi" "iPXE UEFI x86_64 (stage 2)" +# UEFI x86_64: serve iPXE directly via TFTP (UEFI has no TFTP size limit) +copy_if_missing "/usr/share/ipxe/ipxe-snponly-x86_64.efi" "$TFTPDIR/ipxe.efi" "iPXE UEFI x86_64" copy_if_missing "/usr/share/ipxe/arm64-efi/snponly.efi" "$TFTPDIR/ipxe-arm64.efi" "iPXE UEFI arm64" download "${FEDORA_MIRROR}/images/pxeboot/vmlinuz" "$HTTPDIR/vmlinuz" "Fedora kernel" @@ -375,25 +470,29 @@ except Exception as e: " fi -# ── Power off — do NOT let Anaconda proceed ── +# ── Reboot — do NOT let Anaconda proceed ── echo "" -echo "=== Discovery complete, powering off ===" +echo "=== Discovery complete, rebooting ===" echo "" sleep 3 echo 1 > /proc/sys/kernel/sysrq -echo o > /proc/sysrq-trigger +echo b > /proc/sysrq-trigger sleep 5 -poweroff -f +reboot -f %end # Anaconda should never get here, but just in case: -poweroff +reboot DISCOVER_KS # Patch in the bastion URL sed -i "s|__BASTION_URL__|http://${SERVER_IP}:${HTTP_PORT}|g" "$HTTPDIR/discover.ks" +# Save SSH keys and admin user for the HTTP server to use +echo "$SSH_KEYS_CONTENT" > "$BASTION_DIR/ssh_keys" +echo "$ADMIN_USER" > "$BASTION_DIR/admin_user" + # ──── Generate iPXE boot script ─────────────────────────────────── # Initial iPXE script chains to /dispatch with the MAC, so the server # can route to discover or install mode per machine. @@ -431,9 +530,17 @@ SERVER_IP = sys.argv[3] HTTP_PORT = int(sys.argv[4]) FEDORA_VER = sys.argv[5] FEDORA_MIRROR = sys.argv[6] -SSH_KEY = sys.argv[7] if len(sys.argv) > 7 else "" +SSH_KEYS_FILE = sys.argv[7] if len(sys.argv) > 7 else "" TIMEZONE = sys.argv[8] if len(sys.argv) > 8 else "Europe/London" LOCALE = sys.argv[9] if len(sys.argv) > 9 else "en_GB.UTF-8" +DOMAIN = sys.argv[10] if len(sys.argv) > 10 else "ad.itaz.eu" +ADMIN_USER = sys.argv[11] if len(sys.argv) > 11 else "" + +# Load SSH keys from file +SSH_KEYS = [] +if SSH_KEYS_FILE and os.path.isfile(SSH_KEYS_FILE): + with open(SSH_KEYS_FILE) as f: + SSH_KEYS = [l.strip() for l in f if l.strip() and not l.startswith('#')] # ── State management (file-backed, lock-protected) ─────────────── @@ -452,19 +559,66 @@ def save_state(state): # ── Kickstart generation ───────────────────────────────────────── -def generate_kickstart(hostname, disk="", ssh_key=""): - disk_cmds = "clearpart --all --initlabel\nautopart --type=plain" - if disk: - disk_cmds = f"ignoredisk --only-use={disk}\nclearpart --all --initlabel --drives={disk}\nautopart --type=plain" +def generate_kickstart(hostname, disk="", ssh_keys=None, domain="", role="worker", admin_user=""): + ssh_keys = ssh_keys or [] + fqdn = f"{hostname}.{domain}" if domain else hostname + vg = "labvg" - if ssh_key: - auth = f'rootpw --lock\nsshkey --username=root "{ssh_key}"' + # ── Auth ── + if ssh_keys: + auth = f'rootpw --lock\nsshkey --username=root "{ssh_keys[0]}"' else: auth = 'rootpw --plaintext changeme' - return f"""# Lab Bastion — Fedora {FEDORA_VER} install + # ── Admin user (kickstart directive) ── + user_directive = "" + if admin_user: + user_directive = f'user --name={admin_user} --groups=wheel --lock' + + # ── SSH keys for %post (root + admin user) ── + all_keys = "\n".join(ssh_keys) + ssh_post_block = "" + if ssh_keys: + ssh_post_block = f""" +# Set up SSH keys for root +mkdir -p /root/.ssh && chmod 700 /root/.ssh +cat > /root/.ssh/authorized_keys << 'SSHKEYS' +{all_keys} +SSHKEYS +chmod 600 /root/.ssh/authorized_keys""" + + if admin_user and ssh_keys: + ssh_post_block += f""" + +# Set up SSH keys for {admin_user} +ADMIN_HOME=$(getent passwd {admin_user} | cut -d: -f6) +mkdir -p "$ADMIN_HOME/.ssh" && chmod 700 "$ADMIN_HOME/.ssh" +cp /root/.ssh/authorized_keys "$ADMIN_HOME/.ssh/authorized_keys" +chown -R {admin_user}:{admin_user} "$ADMIN_HOME/.ssh" +chmod 600 "$ADMIN_HOME/.ssh/authorized_keys" + +# Fix SELinux contexts for SSH +restorecon -R /root/.ssh "$ADMIN_HOME/.ssh" 2>/dev/null || true + +# Passwordless sudo for {admin_user} +echo '{admin_user} ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/{admin_user} +chmod 440 /etc/sudoers.d/{admin_user}""" + + # ── Determine disk (auto-detect first NVMe/SDA if not specified) ── + disk_line = f'DISK="{disk}"' if disk else ''' +DISK="" +for d in /dev/nvme0n1 /dev/sda /dev/vda; do + [ -b "$d" ] && { DISK="$(basename $d)"; break; } +done +[ -z "$DISK" ] && { echo "ERROR: no disk found"; exit 1; } +''' + + # ── LVM layout sizes (MB) ── + has_longhorn = (role == "worker") + + return f"""# Lab Bastion -- Fedora {FEDORA_VER} server install # Generated: {datetime.now().isoformat()} -# Target: {hostname} +# Target: {fqdn} (role={role}) text reboot @@ -473,39 +627,266 @@ lang {LOCALE} keyboard uk timezone {TIMEZONE} --utc -network --bootproto=dhcp --activate --hostname={hostname} +network --bootproto=dhcp --activate --hostname={fqdn} {auth} - -{disk_cmds} +{user_directive} bootloader --append="console=tty0 console=ttyS0,115200n8" url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-$releasever&arch=$basearch +# Partitioning is generated dynamically by %pre (supports longhorn preservation) +%include /tmp/part.ks + +%pre --log=/tmp/pre-partition.log +#!/bin/bash +set -x + +# Progress callback helper +bastion_progress() {{ + local stage="$1" detail="${{2:-}}" + local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {{print $2; exit}}') + curl -sf -X POST "http://{SERVER_IP}:{HTTP_PORT}/api/progress" \ + -H "Content-Type: application/json" \ + -d "{{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}}" 2>/dev/null || true +}} + +bastion_progress "partitioning" "preparing disk layout" + +VG="{vg}" +{disk_line} + +REPROVISION=no + +# Check if VG exists (reprovision scenario) +if vgs $VG &>/dev/null; then + echo "=== Existing VG found - reprovision mode ===" + REPROVISION=yes + + # Detect which data LVs to preserve + PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no + lvs $VG/longhorn &>/dev/null && PRESERVE_LONGHORN=yes + lvs $VG/srv &>/dev/null && PRESERVE_SRV=yes + lvs $VG/home &>/dev/null && PRESERVE_HOME=yes + + echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME" + + # Remove only OS logical volumes (keep data LVs) + for lv in root var varlog swap; do + lvremove -f $VG/$lv 2>/dev/null || true + done +fi + +if [ "$REPROVISION" = "yes" ]; then + # Find existing boot partitions by type + EFI_PART=$(blkid -t TYPE=vfat -o device /dev/${{DISK}}* 2>/dev/null | head -1) + BOOT_PART=$(blkid -t TYPE=ext4 -o device /dev/${{DISK}}* 2>/dev/null | head -1) + EFI_PART=${{EFI_PART:-/dev/${{DISK}}1}} + BOOT_PART=${{BOOT_PART:-/dev/${{DISK}}2}} + echo "Reusing EFI=$EFI_PART BOOT=$BOOT_PART" + + # Build partition config reusing existing PV/VG + cat > /tmp/part.ks << PARTEOF +ignoredisk --only-use=$DISK +clearpart --none +part /boot/efi --onpart=$EFI_PART --fstype=efi +part /boot --onpart=$BOOT_PART --fstype=ext4 +volgroup {vg} --useexisting --noformat +logvol swap --vgname={vg} --name=swap --fstype=swap --size=27648 +logvol / --vgname={vg} --name=root --fstype=xfs --size=33792 +logvol /var --vgname={vg} --name=var --fstype=xfs --size=102400 +logvol /var/log --vgname={vg} --name=varlog --fstype=xfs --size=10240 +PARTEOF + + # Preserve or recreate data LVs + if [ "$PRESERVE_HOME" = "yes" ]; then + echo "logvol /home --vgname={vg} --name=home --useexisting --noformat" >> /tmp/part.ks + else + echo "logvol /home --vgname={vg} --name=home --fstype=xfs --size=10240" >> /tmp/part.ks + fi + + if [ "$PRESERVE_SRV" = "yes" ]; then + echo "logvol /srv --vgname={vg} --name=srv --useexisting --noformat" >> /tmp/part.ks + else + echo "logvol /srv --vgname={vg} --name=srv --fstype=xfs --size=20480" >> /tmp/part.ks + fi + + if [ "$PRESERVE_LONGHORN" = "yes" ]; then + echo "logvol /var/lib/longhorn --vgname={vg} --name=longhorn --useexisting --noformat" >> /tmp/part.ks + fi + +else + # Fresh install + cat > /tmp/part.ks << PARTEOF +ignoredisk --only-use=$DISK +clearpart --all --initlabel --drives=$DISK +part /boot/efi --fstype=efi --size=600 --ondisk=$DISK +part /boot --fstype=ext4 --size=3072 --ondisk=$DISK +part pv.01 --size=1 --grow --ondisk=$DISK +volgroup {vg} pv.01 +logvol swap --vgname={vg} --name=swap --fstype=swap --size=27648 +logvol / --vgname={vg} --name=root --fstype=xfs --size=33792 +logvol /var --vgname={vg} --name=var --fstype=xfs --size=102400 +logvol /var/log --vgname={vg} --name=varlog --fstype=xfs --size=10240 +logvol /home --vgname={vg} --name=home --fstype=xfs --size=10240 +logvol /srv --vgname={vg} --name=srv --fstype=xfs --size=20480 +{"logvol /var/lib/longhorn --vgname=" + vg + " --name=longhorn --fstype=xfs --grow --size=1" if has_longhorn else ""} +PARTEOF +fi + +echo "=== Generated partition config ===" +cat /tmp/part.ks +echo "===================================" + +bastion_progress "partitioning" "layout ready, starting install" + +%end + %packages @core -@server-product openssh-server vim-enhanced tmux git curl +wget python3 lshw dmidecode dnf-plugins-core + +# Networking and diagnostics +NetworkManager +bind-utils +net-tools +iproute +iputils +traceroute +tcpdump +htop +iotop +strace +jq + +# k3s prerequisites +container-selinux +iptables-nft +nftables +policycoreutils-python-utils +chrony +tar +socat +conntrack-tools +ethtool + +# Boot management +efibootmgr + +# Puppet prerequisites +ruby +ruby-libs + +# Exclude desktop +-@workstation-product +-@gnome-desktop +-gnome-shell +-gdm +-PackageKit +-PackageKit-glib %end %post --log=/root/bastion-post-install.log #!/bin/bash set -x + +# Progress callback helper +bastion_progress() {{ + local stage="$1" detail="${{2:-}}" + local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {{print $2; exit}}') + curl -sf -X POST "http://{SERVER_IP}:{HTTP_PORT}/api/progress" \ + -H "Content-Type: application/json" \ + -d "{{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}}" 2>/dev/null || true +}} + +bastion_progress "post-install" "configuring system" + +# ── SSH ── systemctl enable --now sshd sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config -hostnamectl set-hostname {hostname} -echo "Provisioned by lab-bastion on $(date -Iseconds)" > /etc/lab-provisioned -echo "# Lab node — puppet enrollment pending" > /root/README +{ssh_post_block} + +# ── Hostname and domain ── +hostnamectl set-hostname {fqdn} + +# ── tmpfs for /tmp ── +echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab + +# ── Kernel modules for k3s ── +cat > /etc/modules-load.d/k3s.conf << 'MODULES' +br_netfilter +overlay +ip_conntrack +MODULES +modprobe br_netfilter || true +modprobe overlay || true + +# ── Sysctl for k3s networking ── +cat > /etc/sysctl.d/90-k3s.conf << 'SYSCTL' +net.bridge.bridge-nf-call-iptables = 1 +net.bridge.bridge-nf-call-ip6tables = 1 +net.ipv4.ip_forward = 1 +net.ipv6.conf.all.forwarding = 1 +fs.inotify.max_user_instances = 524288 +fs.inotify.max_user_watches = 1048576 +SYSCTL +sysctl --system || true + +# ── Disable firewalld (k3s manages its own iptables rules) ── +systemctl disable --now firewalld || true + +# ── Enable chronyd for time sync ── +systemctl enable --now chronyd + +# ── Set boot order: local disk first, PXE after ── +if command -v efibootmgr >/dev/null 2>&1; then + # Find the Fedora boot entry and move it first + FEDORA_ENTRY=$(efibootmgr | grep -i fedora | head -1 | grep -oP 'Boot\\K[0-9A-F]+') + if [ -n "$FEDORA_ENTRY" ]; then + CURRENT_ORDER=$(efibootmgr | grep BootOrder | cut -d: -f2 | tr -d ' ') + # Put Fedora first, keep rest + NEW_ORDER="$FEDORA_ENTRY,$(echo "$CURRENT_ORDER" | sed "s/$FEDORA_ENTRY,\\?//;s/,$//")" + efibootmgr -o "$NEW_ORDER" || true + echo "Boot order set: Fedora first ($NEW_ORDER)" + fi +fi + +# ── Provisioning metadata ── +cat > /etc/lab-provisioned << PROVEOF +hostname: {fqdn} +role: {role} +provisioned: $(date -Iseconds) +bastion: {SERVER_IP} +PROVEOF + +cat > /root/README << 'README' +# Lab Node -- {fqdn} (role: {role}) +# +# Next steps: +# 1. Install puppet agent: +# dnf install -y puppet-agent +# +# 2. Install k3s: +# curl -sfL https://get.k3s.io | sh - +# +# 3. Or join existing cluster: +# curl -sfL https://get.k3s.io | K3S_URL=https://:6443 K3S_TOKEN= sh - +README + +IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {{split($2,a,"/"); print a[1]; exit}}') +bastion_progress "complete" "ready at $IP_ADDR" + %end """ @@ -562,6 +943,25 @@ def print_install_started(mac, hostname): print(f" Serving Fedora {FEDORA_VER} installer + kickstart...") print(f"\n{'─' * 60}\n", flush=True) +PROGRESS_ICONS = { + "partitioning": "◆", + "installing": "◆◆", + "post-install": "◆◆◆", + "complete": "✔", + "error": "✘", +} + +def print_progress(mac, stage, detail=""): + icon = PROGRESS_ICONS.get(stage, "·") + color = GREEN if stage == "complete" else (RED if stage == "error" else YELLOW) + detail_str = f" -- {detail}" if detail else "" + print(f" {color}{icon}{RESET} {mac} {BOLD}{stage}{RESET}{detail_str}", flush=True) + if stage == "complete" and detail: + ip = detail.replace("ready at ", "").strip() + if ip: + admin = ADMIN_USER or "root" + print(f"\n {GREEN}{BOLD} ssh {admin}@{ip}{RESET}\n", flush=True) + # ── HTTP Handler ────────────────────────────────────────────────── class BastionHandler(SimpleHTTPRequestHandler): @@ -603,7 +1003,7 @@ class BastionHandler(SimpleHTTPRequestHandler): echo echo ============================================= -echo Lab PXE Bastion — INSTALLING Fedora {FEDORA_VER} +echo Lab PXE Bastion - INSTALLING Fedora {FEDORA_VER} echo Target: {hostname} echo MAC: {mac} echo ============================================= @@ -614,13 +1014,31 @@ initrd http://{SERVER_IP}:{HTTP_PORT}/initrd.img boot """ self.send_text(200, script) + + elif mac in state.get("installed", {}): + info = state["installed"][mac] + hostname = info.get("hostname", "?") + print(f" {GREEN}PXE request from {mac} ({hostname}) - already installed, booting local disk{RESET}", flush=True) + script = f"""#!ipxe + +echo +echo ============================================= +echo Lab PXE Bastion - {hostname} +echo Already installed, booting from local disk +echo ============================================= +echo +sleep 3 +exit +""" + self.send_text(200, script) + else: print(f" {YELLOW}PXE request from {mac} → discovery mode{RESET}", flush=True) script = f"""#!ipxe echo echo ============================================= -echo Lab PXE Bastion — DISCOVERY MODE +echo Lab PXE Bastion - DISCOVERY MODE echo MAC: {mac} echo Collecting hardware info... echo ============================================= @@ -642,7 +1060,10 @@ boot ks = generate_kickstart( hostname=cfg.get("hostname", "lab-node"), disk=cfg.get("disk", ""), - ssh_key=SSH_KEY, + ssh_keys=SSH_KEYS, + domain=DOMAIN, + role=cfg.get("role", "worker"), + admin_user=ADMIN_USER, ) self.send_text(200, ks) return @@ -710,15 +1131,21 @@ boot mac = data.get("mac", "").lower().replace("-", ":") hostname = data.get("hostname", "lab-node") disk = data.get("disk", "") + role = data.get("role", "worker") if not mac: self.send_json(400, {"error": "mac is required"}) return + if role not in ("worker", "infra"): + self.send_json(400, {"error": "role must be 'worker' or 'infra'"}) + return + state = load_state() state.setdefault("install_queue", {})[mac] = { "hostname": hostname, "disk": disk, + "role": role, "queued_at": datetime.now().isoformat(), } save_state(state) @@ -729,10 +1156,49 @@ boot "status": "queued", "mac": mac, "hostname": hostname, - "message": "PXE boot the machine to start installation", + "role": role, + "message": f"PXE boot the machine to start installation (role={role})", }) return + # ── Install progress callback from kickstart ── + if parsed.path == "/api/progress": + try: + data = json.loads(body) + except json.JSONDecodeError: + self.send_json(400, {"error": "invalid JSON"}) + return + + mac = data.get("mac", "unknown").lower() + stage = data.get("stage", "unknown") + detail = data.get("detail", "") + + print_progress(mac, stage, detail) + + # Update state with progress + state = load_state() + if mac in state.get("install_queue", {}): + state["install_queue"][mac]["progress"] = stage + state["install_queue"][mac]["progress_at"] = datetime.now().isoformat() + if detail: + state["install_queue"][mac]["progress_detail"] = detail + + # Move to installed on completion + if stage == "complete": + cfg = state["install_queue"].pop(mac) + ip = detail.replace("ready at ", "").strip() if detail else "" + state.setdefault("installed", {})[mac] = { + "hostname": cfg.get("hostname", "?"), + "role": cfg.get("role", "?"), + "ip": ip, + "installed_at": datetime.now().isoformat(), + } + + save_state(state) + + self.send_json(200, {"status": "ok"}) + return + self.send_json(404, {"error": "not found"}) @@ -850,9 +1316,11 @@ python3 "$BASTION_DIR/server.py" \ "$HTTP_PORT" \ "$FEDORA_VERSION" \ "$FEDORA_MIRROR" \ - "$SSH_KEY_CONTENT" \ + "$BASTION_DIR/ssh_keys" \ "$TIMEZONE" \ - "$LOCALE" & + "$LOCALE" \ + "$DOMAIN" \ + "$ADMIN_USER" & HTTP_PID=$! sleep 1 @@ -871,6 +1339,7 @@ echo -e " Network: ${BOLD}${NETWORK}/24${NC} via ${BOLD}${IFACE}${NC}" echo -e " DHCP: ${BOLD}${DHCP_MODE}${NC}$(if [[ "$DHCP_MODE" == "full" ]]; then echo " (${DHCP_RANGE_START}–${DHCP_RANGE_END})"; else echo " (alongside existing DHCP)"; fi)" echo -e " HTTP: ${BOLD}http://${SERVER_IP}:${HTTP_PORT}/${NC}" echo -e " OS: ${BOLD}Fedora ${FEDORA_VERSION} (${ARCH})${NC}" +echo -e " Domain: ${BOLD}${DOMAIN}${NC}" echo -e " State: ${BOLD}${STATEFILE}${NC}" echo "" echo -e " ${YELLOW}PXE boot any machine on this network.${NC}" diff --git a/test-reprovision.sh b/test-reprovision.sh new file mode 100755 index 0000000..7469893 --- /dev/null +++ b/test-reprovision.sh @@ -0,0 +1,279 @@ +#!/usr/bin/env bash +# ───────────────────────────────────────────────────────────── +# Test: reprovision preserves /home, /srv, /var/lib/longhorn +# +# Usage: sudo bash test-reprovision.sh +# sudo bash test-reprovision.sh --skip-first-install # if disk already has a first install +# sudo bash test-reprovision.sh --cleanup # just remove the VM and disk +# ───────────────────────────────────────────────────────────── +set -euo pipefail + +VM_NAME="test-bastion-ks" +DISK_PATH="/var/lib/libvirt/images/test-reprovision.qcow2" +DISK_SIZE=20 # GB +KS_PATH="/tmp/test-vm.ks" +FEDORA_MIRROR="https://download.fedoraproject.org/pub/fedora/linux/releases/43/Everything/x86_64/os/" +OVMF_CODE="/usr/share/edk2/ovmf/OVMF_CODE.fd" +OVMF_VARS="/usr/share/OVMF/OVMF_VARS.fd" + +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m' +CYAN='\033[0;36m'; BOLD='\033[1m'; NC='\033[0m' + +log() { echo -e "${GREEN}[test]${NC} $*"; } +err() { echo -e "${RED}[test]${NC} $*" >&2; } +step() { echo -e "\n${CYAN}${BOLD}══ $* ══${NC}\n"; } + +cleanup_vm() { + virsh destroy "$VM_NAME" 2>/dev/null || true + virsh undefine "$VM_NAME" --nvram 2>/dev/null || true +} + +cleanup_all() { + cleanup_vm + rm -f "$DISK_PATH" + log "Cleaned up VM and disk" +} + +# ── Handle args ── +SKIP_FIRST=false +for arg in "$@"; do + case "$arg" in + --skip-first-install) SKIP_FIRST=true ;; + --cleanup) cleanup_all; exit 0 ;; + esac +done + +[[ $EUID -eq 0 ]] || { err "Must run as root"; exit 1; } + +# ── Generate kickstart ── +generate_kickstart() { + cat > "$KS_PATH" << 'KSEOF' +text +reboot +lang en_GB.UTF-8 +keyboard uk +timezone Europe/London --utc +network --bootproto=dhcp --activate --hostname=test-vm.ad.itaz.eu +rootpw --plaintext testpass +user --name=michal --groups=wheel +bootloader --append="console=ttyS0,115200n8" +url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-43&arch=x86_64 +%include /tmp/part.ks + +%pre --log=/tmp/pre-partition.log +#!/bin/bash +set -x +VG="labvg" +DISK="vda" + +REPROVISION=no +if vgs $VG &>/dev/null; then + echo "=== REPROVISION MODE ===" + REPROVISION=yes + PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no + lvs $VG/longhorn &>/dev/null && PRESERVE_LONGHORN=yes + lvs $VG/srv &>/dev/null && PRESERVE_SRV=yes + lvs $VG/home &>/dev/null && PRESERVE_HOME=yes + echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME" + for lv in root var varlog swap; do + lvremove -f $VG/$lv 2>/dev/null || true + done +fi + +if [ "$REPROVISION" = "yes" ]; then + EFI_PART=$(blkid -t TYPE=vfat -o device /dev/${DISK}* 2>/dev/null | head -1) + BOOT_PART=$(blkid -t TYPE=ext4 -o device /dev/${DISK}* 2>/dev/null | head -1) + EFI_PART=${EFI_PART:-/dev/${DISK}1} + BOOT_PART=${BOOT_PART:-/dev/${DISK}2} + echo "Reusing EFI=$EFI_PART BOOT=$BOOT_PART" + + cat > /tmp/part.ks << PARTEOF +ignoredisk --only-use=$DISK +clearpart --none +part /boot/efi --onpart=$EFI_PART --fstype=efi +part /boot --onpart=$BOOT_PART --fstype=ext4 +volgroup labvg --useexisting --noformat +logvol swap --vgname=labvg --name=swap --fstype=swap --size=1024 +logvol / --vgname=labvg --name=root --fstype=xfs --size=4096 +logvol /var --vgname=labvg --name=var --fstype=xfs --size=3072 +logvol /var/log --vgname=labvg --name=varlog --fstype=xfs --size=1024 +PARTEOF + if [ "$PRESERVE_HOME" = "yes" ]; then + echo "logvol /home --vgname=labvg --name=home --useexisting --noformat" >> /tmp/part.ks + else + echo "logvol /home --vgname=labvg --name=home --fstype=xfs --size=1024" >> /tmp/part.ks + fi + if [ "$PRESERVE_SRV" = "yes" ]; then + echo "logvol /srv --vgname=labvg --name=srv --useexisting --noformat" >> /tmp/part.ks + else + echo "logvol /srv --vgname=labvg --name=srv --fstype=xfs --size=1024" >> /tmp/part.ks + fi + if [ "$PRESERVE_LONGHORN" = "yes" ]; then + echo "logvol /var/lib/longhorn --vgname=labvg --name=longhorn --useexisting --noformat" >> /tmp/part.ks + fi +else + cat > /tmp/part.ks << PARTEOF +ignoredisk --only-use=$DISK +clearpart --all --initlabel --drives=$DISK +part /boot/efi --fstype=efi --size=600 --ondisk=$DISK +part /boot --fstype=ext4 --size=1024 --ondisk=$DISK +part pv.01 --size=1 --grow --ondisk=$DISK +volgroup labvg pv.01 +logvol swap --vgname=labvg --name=swap --fstype=swap --size=1024 +logvol / --vgname=labvg --name=root --fstype=xfs --size=4096 +logvol /var --vgname=labvg --name=var --fstype=xfs --size=3072 +logvol /var/log --vgname=labvg --name=varlog --fstype=xfs --size=1024 +logvol /home --vgname=labvg --name=home --fstype=xfs --size=1024 +logvol /srv --vgname=labvg --name=srv --fstype=xfs --size=1024 +logvol /var/lib/longhorn --vgname=labvg --name=longhorn --fstype=xfs --grow --size=1 +PARTEOF +fi + +echo "=== Generated partition config ===" +cat /tmp/part.ks +%end + +%packages +@core +openssh-server +%end + +%post +echo "Installed $(date -Iseconds)" > /etc/lab-provisioned +echo "testpass" | passwd --stdin michal +%end +KSEOF +} + +# ── Install helper ── +run_install() { + local label="$1" + local disk_args="$2" + + log "Running virt-install ($label)..." + virt-install \ + --name "$VM_NAME" \ + --ram 4096 \ + --vcpus 2 \ + --disk "$disk_args" \ + --os-variant fedora-unknown \ + --network network=default \ + --location "$FEDORA_MIRROR" \ + --initrd-inject "$KS_PATH" \ + --extra-args "inst.ks=file:///test-vm.ks console=ttyS0,115200n8 inst.text" \ + --boot loader="$OVMF_CODE",loader.readonly=yes,loader.type=pflash,nvram.template="$OVMF_VARS" \ + --noautoconsole \ + --wait -1 + + log "virt-install exited — install complete" + virsh destroy "$VM_NAME" 2>/dev/null || true +} + +# ── Main test flow ── + +generate_kickstart +log "Kickstart generated at $KS_PATH" + +PASS=0 +FAIL=0 + +if ! $SKIP_FIRST; then + # ── Step 1: Fresh install ── + step "Step 1/4: Fresh install" + cleanup_all + run_install "fresh" "path=$DISK_PATH,size=$DISK_SIZE,bus=virtio" + + # Verify fresh install + log "Verifying fresh install..." + FILESYSTEMS=$(guestfish --ro -a "$DISK_PATH" -i list-filesystems 2>/dev/null) + for lv in root var varlog home srv longhorn swap; do + if echo "$FILESYSTEMS" | grep -q "labvg/$lv"; then + log " ✔ labvg/$lv exists" + ((PASS++)) + else + err " ✘ labvg/$lv MISSING" + ((FAIL++)) + fi + done +else + step "Skipping first install (--skip-first-install)" + [[ -f "$DISK_PATH" ]] || { err "Disk not found at $DISK_PATH"; exit 1; } +fi + +# ── Step 2: Write marker files ── +step "Step 2/4: Writing marker files to preserved partitions" +guestfish -a "$DISK_PATH" -i << 'GF' +write /home/michal/PRESERVE_TEST.txt "MARKER: home partition preserved\n" +write /srv/PRESERVE_TEST.txt "MARKER: srv partition preserved\n" +write /var/lib/longhorn/PRESERVE_TEST.txt "MARKER: longhorn partition preserved\n" +write /var/SHOULD_BE_WIPED.txt "This file should NOT survive reprovision\n" +GF +log "Marker files written:" +log " /home/michal/PRESERVE_TEST.txt" +log " /srv/PRESERVE_TEST.txt" +log " /var/lib/longhorn/PRESERVE_TEST.txt" +log " /var/SHOULD_BE_WIPED.txt (should be wiped)" + +# ── Step 3: Reprovision ── +step "Step 3/4: Reprovisioning (reinstall on same disk)" +cleanup_vm +run_install "reprovision" "path=$DISK_PATH,bus=virtio" + +# ── Step 4: Verify ── +step "Step 4/4: Verifying preservation" + +check_file() { + local path="$1" expect="$2" label="$3" + local content + content=$(guestfish --ro -a "$DISK_PATH" -i cat "$path" 2>/dev/null) || content="" + + if [[ "$expect" == "exists" ]]; then + if [[ -n "$content" && "$content" == *"MARKER"* ]]; then + log " ✔ $label — PRESERVED: $(echo "$content" | head -1)" + ((PASS++)) + else + err " ✘ $label — LOST (file missing or empty)" + ((FAIL++)) + fi + elif [[ "$expect" == "gone" ]]; then + if [[ -z "$content" ]]; then + log " ✔ $label — correctly wiped" + ((PASS++)) + else + err " ✘ $label — should have been wiped but still exists" + ((FAIL++)) + fi + fi +} + +check_file "/home/michal/PRESERVE_TEST.txt" "exists" "/home (preserved)" +check_file "/srv/PRESERVE_TEST.txt" "exists" "/srv (preserved)" +check_file "/var/lib/longhorn/PRESERVE_TEST.txt" "exists" "/var/lib/longhorn (preserved)" +check_file "/var/SHOULD_BE_WIPED.txt" "gone" "/var (wiped)" + +# Also verify OS was actually reinstalled +PROV_DATE=$(guestfish --ro -a "$DISK_PATH" -i cat /etc/lab-provisioned 2>/dev/null || echo "") +if [[ -n "$PROV_DATE" ]]; then + log " ✔ OS reinstalled: $PROV_DATE" + ((PASS++)) +else + err " ✘ /etc/lab-provisioned missing — OS not installed?" + ((FAIL++)) +fi + +# ── Summary ── +echo "" +echo -e "${BOLD}════════════════════════════════════════${NC}" +if [[ $FAIL -eq 0 ]]; then + echo -e "${GREEN}${BOLD} ALL TESTS PASSED ($PASS/$((PASS+FAIL)))${NC}" +else + echo -e "${RED}${BOLD} $FAIL TESTS FAILED ($PASS passed, $FAIL failed)${NC}" +fi +echo -e "${BOLD}════════════════════════════════════════${NC}" +echo "" + +# ── Cleanup ── +log "Cleaning up VM (disk preserved at $DISK_PATH)" +cleanup_vm + +exit $FAIL