feat: server kickstart with LVM, user creation, progress callbacks, reprovision

- LVM partition layout: /, /var, /var/log, /home, /srv, swap, tmpfs /tmp
  plus /var/lib/longhorn for worker role (grows to fill disk)
- Reprovision preserves /home, /srv, /var/lib/longhorn via %pre detection
- Admin user created matching the user running the bastion script
  with SSH keys from authorized_keys + local pubkeys, passwordless sudo
- Progress callbacks from %pre and %post to /api/progress endpoint
  with IP reported on completion (ssh command printed)
- Installed machines boot from local disk (iPXE exit) instead of
  re-entering discovery mode
- --role worker|infra flag (infra skips longhorn partition)
- reprovision subcommand: queues install + SSH reboot into PXE
- Self-cleanup: kills old bastion instances on start
- Domain config (DOMAIN env, default ad.itaz.eu)
- efibootmgr in %post to set local disk first in boot order
- k3s prereqs: kernel modules, sysctl, firewalld disabled, chrony
- VM reprovision test script (test-reprovision.sh)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Michal
2026-03-17 02:40:40 +00:00
parent 75d17eb87c
commit fac14b6d4a
2 changed files with 804 additions and 56 deletions

View File

@@ -27,6 +27,7 @@ HTTP_PORT="${HTTP_PORT:-8080}"
TIMEZONE="${TIMEZONE:-Europe/London}"
LOCALE="${LOCALE:-en_GB.UTF-8}"
BASTION_DIR="${BASTION_DIR:-/tmp/lab-bastion}"
DOMAIN="${DOMAIN:-ad.itaz.eu}" # internal domain for hostnames
DHCP_MODE="${DHCP_MODE:-proxy}" # proxy (alongside existing DHCP) or full (bastion IS the DHCP server)
DHCP_RANGE_START="${DHCP_RANGE_START:-}" # only for full mode, auto-derived if empty
DHCP_RANGE_END="${DHCP_RANGE_END:-}"
@@ -45,13 +46,19 @@ CMD="${1:-serve}"
case "$CMD" in
install)
[[ $# -ge 3 ]] || { echo "Usage: bastion.sh install <mac> <hostname> [--disk <dev>]"; exit 1; }
[[ $# -ge 3 ]] || { echo "Usage: bastion.sh install <mac> <hostname> [--role worker|infra] [--disk <dev>]"; exit 1; }
MAC="$2"
HOSTNAME="$3"
DISK="${5:-}" # --disk <dev>
PAYLOAD="{\"mac\":\"$MAC\",\"hostname\":\"$HOSTNAME\""
[[ -n "$DISK" ]] && PAYLOAD="$PAYLOAD,\"disk\":\"$DISK\""
PAYLOAD="$PAYLOAD}"
shift 3
DISK="" ROLE="worker"
while [[ $# -gt 0 ]]; do
case "$1" in
--disk) DISK="$2"; shift 2 ;;
--role) ROLE="$2"; shift 2 ;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
done
PAYLOAD=$(python3 -c "import json; print(json.dumps({k:v for k,v in {'mac':'$MAC','hostname':'$HOSTNAME','disk':'$DISK','role':'$ROLE'}.items() if v}))")
RESULT=$(curl -sf -X POST "http://localhost:${HTTP_PORT}/api/install" \
-H "Content-Type: application/json" \
-d "$PAYLOAD" 2>&1) || die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
@@ -93,16 +100,62 @@ print()
print('\033[1mINSTALLED\033[0m')
if installed:
for mac, info in installed.items():
print(f' {mac:<20} → {info.get(\"hostname\",\"?\")} ({info.get(\"installed_at\",\"?\")})')
ip = info.get('ip', '')
ip_str = f' ip={ip}' if ip else ''
print(f' {mac:<20} → {info.get(\"hostname\",\"?\")} role={info.get(\"role\",\"?\")}{ip_str} ({info.get(\"installed_at\",\"?\")})')
else:
print(' (none)')
print()
" 2>/dev/null || echo "$RESULT"
exit 0
;;
reprovision)
[[ $# -ge 3 ]] || { echo "Usage: bastion.sh reprovision <mac> <hostname> [--role worker|infra] [--disk <dev>]"; exit 1; }
MAC="$2"
HOSTNAME="$3"
shift 3
DISK="" ROLE="worker"
while [[ $# -gt 0 ]]; do
case "$1" in
--disk) DISK="$2"; shift 2 ;;
--role) ROLE="$2"; shift 2 ;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
done
# Queue the install
PAYLOAD=$(python3 -c "import json; print(json.dumps({k:v for k,v in {'mac':'$MAC','hostname':'$HOSTNAME','disk':'$DISK','role':'$ROLE'}.items() if v}))")
RESULT=$(curl -sf -X POST "http://localhost:${HTTP_PORT}/api/install" \
-H "Content-Type: application/json" \
-d "$PAYLOAD" 2>&1) || die "Cannot reach bastion at localhost:${HTTP_PORT}. Is it running?"
echo "$RESULT" | python3 -m json.tool 2>/dev/null || echo "$RESULT"
# Try to find IP from installed state and SSH in to trigger PXE reboot
IP=$(curl -sf "http://localhost:${HTTP_PORT}/api/machines" 2>/dev/null | \
python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('installed',{}).get('${MAC}',{}).get('ip',''))" 2>/dev/null || echo "")
ADMIN_USER="${SUDO_USER:-$USER}"
[[ "$ADMIN_USER" == "root" ]] && ADMIN_USER=""
if [[ -n "$IP" && -n "$ADMIN_USER" ]]; then
echo ""
echo "Attempting SSH reboot into PXE ($ADMIN_USER@$IP)..."
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "$ADMIN_USER@$IP" \
'sudo efibootmgr 2>/dev/null; PXE_ENTRY=$(sudo efibootmgr | grep -iE "pxe|network|ipv4" | head -1 | grep -oP "Boot\K[0-9A-F]+"); if [ -n "$PXE_ENTRY" ]; then sudo efibootmgr --bootnext "$PXE_ENTRY" && echo "PXE set as next boot" && sudo reboot; else echo "No PXE boot entry found, rebooting anyway..." && sudo reboot; fi' 2>&1 && {
echo ""
echo "Machine is rebooting into PXE. Install will start automatically."
} || {
echo ""
echo "SSH failed. Reboot the machine manually into PXE (e.g. via IPMI/KVM)."
}
else
echo ""
echo "No IP known for this machine. Reboot it manually into PXE."
fi
exit 0
;;
serve) ;; # continue below
*)
echo "Usage: bastion.sh [serve|install <mac> <hostname>|list]"
echo "Usage: bastion.sh [serve|install|reprovision|list]"
exit 1
;;
esac
@@ -111,6 +164,17 @@ esac
# SERVE MODE — start the bastion
# ══════════════════════════════════════════════════════════════════
# ──── Kill old instances ──────────────────────────────────────────
# Find and kill any previous bastion dnsmasq and HTTP server
OLD_DNSMASQ=$(pgrep -f 'dnsmasq --no-daemon --conf-file=/tmp/lab-bastion' 2>/dev/null || true)
OLD_HTTP=$(pgrep -f 'python3 /tmp/lab-bastion/server.py' 2>/dev/null || true)
if [[ -n "$OLD_DNSMASQ" || -n "$OLD_HTTP" ]]; then
warn "Killing old bastion processes..."
[[ -n "$OLD_DNSMASQ" ]] && kill $OLD_DNSMASQ 2>/dev/null && log " Stopped old dnsmasq (PID $OLD_DNSMASQ)"
[[ -n "$OLD_HTTP" ]] && kill $OLD_HTTP 2>/dev/null && log " Stopped old HTTP server (PID $OLD_HTTP)"
sleep 1
fi
# ──── Preflight ───────────────────────────────────────────────────
[[ $EUID -eq 0 ]] || die "Must run as root (need DHCP/TFTP ports). Use: sudo bash bastion.sh"
@@ -143,23 +207,59 @@ GATEWAY="$(ip route | awk '/default/ {print $3; exit}')"
[[ -n "$SERVER_IP" ]] || die "Cannot detect IP on interface $IFACE"
log "Interface: ${BOLD}$IFACE${NC} IP: ${BOLD}$SERVER_IP${NC} Network: ${BOLD}$NETWORK${NC}"
# ──── Auto-detect SSH pubkey ──────────────────────────────────────
SSH_PUBKEY="${SSH_PUBKEY:-}"
if [[ -z "$SSH_PUBKEY" ]]; then
# ──── Auto-detect SSH keys ───────────────────────────────────────
REAL_HOME="${HOME}"
[[ -n "${SUDO_USER:-}" ]] && REAL_HOME="$(getent passwd "$SUDO_USER" | cut -d: -f6)"
for keyfile in "$REAL_HOME/.ssh/id_ed25519.pub" "$REAL_HOME/.ssh/id_rsa.pub" "$REAL_HOME/.ssh/id_ecdsa.pub"; do
[[ -f "$keyfile" ]] && { SSH_PUBKEY="$keyfile"; break; }
done
SSH_KEYS_CONTENT=""
SSH_KEY_SOURCE=""
# Collect SSH keys from authorized_keys + local pubkeys (deduplicated)
SSH_KEY_SOURCE=""
if [[ -f "$REAL_HOME/.ssh/authorized_keys" ]]; then
SSH_KEYS_CONTENT="$(grep -v '^#' "$REAL_HOME/.ssh/authorized_keys" | grep -v '^$')"
SSH_KEY_SOURCE="$REAL_HOME/.ssh/authorized_keys"
fi
SSH_KEY_CONTENT=""
if [[ -n "$SSH_PUBKEY" && -f "$SSH_PUBKEY" ]]; then
SSH_KEY_CONTENT="$(cat "$SSH_PUBKEY")"
log "SSH key: ${BOLD}$SSH_PUBKEY${NC}"
# Also include local pubkey files (they may not be in authorized_keys)
for keyfile in "$REAL_HOME/.ssh/id_ed25519.pub" "$REAL_HOME/.ssh/id_rsa.pub" "$REAL_HOME/.ssh/id_ecdsa.pub"; do
if [[ -f "$keyfile" ]]; then
KEY_DATA="$(cat "$keyfile")"
KEY_FP="$(awk '{print $2}' "$keyfile")"
if [[ -n "$SSH_KEYS_CONTENT" ]]; then
# Add only if not already present
if ! echo "$SSH_KEYS_CONTENT" | grep -qF "$KEY_FP"; then
SSH_KEYS_CONTENT="$SSH_KEYS_CONTENT"$'\n'"$KEY_DATA"
SSH_KEY_SOURCE="${SSH_KEY_SOURCE} + $keyfile"
fi
else
warn "No SSH public key found. Set SSH_PUBKEY=/path/to/key.pub"
warn "Install mode will use root password 'changeme' as fallback."
SSH_KEYS_CONTENT="$KEY_DATA"
SSH_KEY_SOURCE="$keyfile"
fi
fi
done
# Priority 3: generate a keypair
if [[ -z "$SSH_KEYS_CONTENT" ]]; then
GENERATED_KEY="$BASTION_DIR/bastion_ed25519"
if [[ ! -f "$GENERATED_KEY" ]]; then
log "No SSH keys found — generating ed25519 keypair..."
ssh-keygen -t ed25519 -f "$GENERATED_KEY" -N "" -C "bastion-generated@$(hostname)" >/dev/null 2>&1
fi
SSH_KEYS_CONTENT="$(cat "${GENERATED_KEY}.pub")"
SSH_KEY_SOURCE="$GENERATED_KEY (generated)"
warn "Using generated keypair: ${BOLD}$GENERATED_KEY${NC}"
warn "Save this private key — it's the only way to access installed machines."
fi
SSH_KEY_COUNT="$(echo "$SSH_KEYS_CONTENT" | wc -l)"
log "SSH keys: ${BOLD}${SSH_KEY_COUNT} key(s)${NC} from ${BOLD}${SSH_KEY_SOURCE}${NC}"
# ──── Detect admin username ──────────────────────────────────────
ADMIN_USER="${SUDO_USER:-$USER}"
[[ "$ADMIN_USER" == "root" ]] && ADMIN_USER=""
if [[ -n "$ADMIN_USER" ]]; then
log "Admin user: ${BOLD}${ADMIN_USER}${NC} (will be created on installed machines)"
fi
# ──── Prepare directories ────────────────────────────────────────
@@ -264,13 +364,8 @@ FEDORA_MIRROR="https://download.fedoraproject.org/pub/fedora/linux/releases/${FE
log "Preparing boot artifacts (Fedora ${FEDORA_VERSION} ${ARCH})..."
copy_if_missing "/usr/share/ipxe/undionly.kpxe" "$TFTPDIR/undionly.kpxe" "iPXE BIOS"
# UEFI x86_64: two-stage PXE boot
# Stage 1: tiny PXE loader stub (<20KB) fits in constrained TFTP buffers
# Stage 2: full iPXE binary downloaded via UEFI PXE protocol (no size limit)
PXELOADER_SRC="$(cd "$(dirname "$0")" && pwd)/pxeloader.c"
[[ -f "$PXELOADER_SRC" ]] || PXELOADER_SRC="$(dirname "${BASH_SOURCE[0]}")/pxeloader.c"
build_pxeloader "$PXELOADER_SRC" "$TFTPDIR/ipxe.efi" "PXE loader stub (stage 1)"
copy_if_missing "/usr/share/ipxe/ipxe-snponly-x86_64.efi" "$TFTPDIR/ipxe-real.efi" "iPXE UEFI x86_64 (stage 2)"
# UEFI x86_64: serve iPXE directly via TFTP (UEFI has no TFTP size limit)
copy_if_missing "/usr/share/ipxe/ipxe-snponly-x86_64.efi" "$TFTPDIR/ipxe.efi" "iPXE UEFI x86_64"
copy_if_missing "/usr/share/ipxe/arm64-efi/snponly.efi" "$TFTPDIR/ipxe-arm64.efi" "iPXE UEFI arm64"
download "${FEDORA_MIRROR}/images/pxeboot/vmlinuz" "$HTTPDIR/vmlinuz" "Fedora kernel"
@@ -375,25 +470,29 @@ except Exception as e:
"
fi
# ── Power off — do NOT let Anaconda proceed ──
# ── Reboot — do NOT let Anaconda proceed ──
echo ""
echo "=== Discovery complete, powering off ==="
echo "=== Discovery complete, rebooting ==="
echo ""
sleep 3
echo 1 > /proc/sys/kernel/sysrq
echo o > /proc/sysrq-trigger
echo b > /proc/sysrq-trigger
sleep 5
poweroff -f
reboot -f
%end
# Anaconda should never get here, but just in case:
poweroff
reboot
DISCOVER_KS
# Patch in the bastion URL
sed -i "s|__BASTION_URL__|http://${SERVER_IP}:${HTTP_PORT}|g" "$HTTPDIR/discover.ks"
# Save SSH keys and admin user for the HTTP server to use
echo "$SSH_KEYS_CONTENT" > "$BASTION_DIR/ssh_keys"
echo "$ADMIN_USER" > "$BASTION_DIR/admin_user"
# ──── Generate iPXE boot script ───────────────────────────────────
# Initial iPXE script chains to /dispatch with the MAC, so the server
# can route to discover or install mode per machine.
@@ -431,9 +530,17 @@ SERVER_IP = sys.argv[3]
HTTP_PORT = int(sys.argv[4])
FEDORA_VER = sys.argv[5]
FEDORA_MIRROR = sys.argv[6]
SSH_KEY = sys.argv[7] if len(sys.argv) > 7 else ""
SSH_KEYS_FILE = sys.argv[7] if len(sys.argv) > 7 else ""
TIMEZONE = sys.argv[8] if len(sys.argv) > 8 else "Europe/London"
LOCALE = sys.argv[9] if len(sys.argv) > 9 else "en_GB.UTF-8"
DOMAIN = sys.argv[10] if len(sys.argv) > 10 else "ad.itaz.eu"
ADMIN_USER = sys.argv[11] if len(sys.argv) > 11 else ""
# Load SSH keys from file
SSH_KEYS = []
if SSH_KEYS_FILE and os.path.isfile(SSH_KEYS_FILE):
with open(SSH_KEYS_FILE) as f:
SSH_KEYS = [l.strip() for l in f if l.strip() and not l.startswith('#')]
# ── State management (file-backed, lock-protected) ───────────────
@@ -452,19 +559,66 @@ def save_state(state):
# ── Kickstart generation ─────────────────────────────────────────
def generate_kickstart(hostname, disk="", ssh_key=""):
disk_cmds = "clearpart --all --initlabel\nautopart --type=plain"
if disk:
disk_cmds = f"ignoredisk --only-use={disk}\nclearpart --all --initlabel --drives={disk}\nautopart --type=plain"
def generate_kickstart(hostname, disk="", ssh_keys=None, domain="", role="worker", admin_user=""):
ssh_keys = ssh_keys or []
fqdn = f"{hostname}.{domain}" if domain else hostname
vg = "labvg"
if ssh_key:
auth = f'rootpw --lock\nsshkey --username=root "{ssh_key}"'
# ── Auth ──
if ssh_keys:
auth = f'rootpw --lock\nsshkey --username=root "{ssh_keys[0]}"'
else:
auth = 'rootpw --plaintext changeme'
return f"""# Lab Bastion — Fedora {FEDORA_VER} install
# ── Admin user (kickstart directive) ──
user_directive = ""
if admin_user:
user_directive = f'user --name={admin_user} --groups=wheel --lock'
# ── SSH keys for %post (root + admin user) ──
all_keys = "\n".join(ssh_keys)
ssh_post_block = ""
if ssh_keys:
ssh_post_block = f"""
# Set up SSH keys for root
mkdir -p /root/.ssh && chmod 700 /root/.ssh
cat > /root/.ssh/authorized_keys << 'SSHKEYS'
{all_keys}
SSHKEYS
chmod 600 /root/.ssh/authorized_keys"""
if admin_user and ssh_keys:
ssh_post_block += f"""
# Set up SSH keys for {admin_user}
ADMIN_HOME=$(getent passwd {admin_user} | cut -d: -f6)
mkdir -p "$ADMIN_HOME/.ssh" && chmod 700 "$ADMIN_HOME/.ssh"
cp /root/.ssh/authorized_keys "$ADMIN_HOME/.ssh/authorized_keys"
chown -R {admin_user}:{admin_user} "$ADMIN_HOME/.ssh"
chmod 600 "$ADMIN_HOME/.ssh/authorized_keys"
# Fix SELinux contexts for SSH
restorecon -R /root/.ssh "$ADMIN_HOME/.ssh" 2>/dev/null || true
# Passwordless sudo for {admin_user}
echo '{admin_user} ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/{admin_user}
chmod 440 /etc/sudoers.d/{admin_user}"""
# ── Determine disk (auto-detect first NVMe/SDA if not specified) ──
disk_line = f'DISK="{disk}"' if disk else '''
DISK=""
for d in /dev/nvme0n1 /dev/sda /dev/vda; do
[ -b "$d" ] && { DISK="$(basename $d)"; break; }
done
[ -z "$DISK" ] && { echo "ERROR: no disk found"; exit 1; }
'''
# ── LVM layout sizes (MB) ──
has_longhorn = (role == "worker")
return f"""# Lab Bastion -- Fedora {FEDORA_VER} server install
# Generated: {datetime.now().isoformat()}
# Target: {hostname}
# Target: {fqdn} (role={role})
text
reboot
@@ -473,39 +627,266 @@ lang {LOCALE}
keyboard uk
timezone {TIMEZONE} --utc
network --bootproto=dhcp --activate --hostname={hostname}
network --bootproto=dhcp --activate --hostname={fqdn}
{auth}
{disk_cmds}
{user_directive}
bootloader --append="console=tty0 console=ttyS0,115200n8"
url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-$releasever&arch=$basearch
# Partitioning is generated dynamically by %pre (supports longhorn preservation)
%include /tmp/part.ks
%pre --log=/tmp/pre-partition.log
#!/bin/bash
set -x
# Progress callback helper
bastion_progress() {{
local stage="$1" detail="${{2:-}}"
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {{print $2; exit}}')
curl -sf -X POST "http://{SERVER_IP}:{HTTP_PORT}/api/progress" \
-H "Content-Type: application/json" \
-d "{{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}}" 2>/dev/null || true
}}
bastion_progress "partitioning" "preparing disk layout"
VG="{vg}"
{disk_line}
REPROVISION=no
# Check if VG exists (reprovision scenario)
if vgs $VG &>/dev/null; then
echo "=== Existing VG found - reprovision mode ==="
REPROVISION=yes
# Detect which data LVs to preserve
PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no
lvs $VG/longhorn &>/dev/null && PRESERVE_LONGHORN=yes
lvs $VG/srv &>/dev/null && PRESERVE_SRV=yes
lvs $VG/home &>/dev/null && PRESERVE_HOME=yes
echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME"
# Remove only OS logical volumes (keep data LVs)
for lv in root var varlog swap; do
lvremove -f $VG/$lv 2>/dev/null || true
done
fi
if [ "$REPROVISION" = "yes" ]; then
# Find existing boot partitions by type
EFI_PART=$(blkid -t TYPE=vfat -o device /dev/${{DISK}}* 2>/dev/null | head -1)
BOOT_PART=$(blkid -t TYPE=ext4 -o device /dev/${{DISK}}* 2>/dev/null | head -1)
EFI_PART=${{EFI_PART:-/dev/${{DISK}}1}}
BOOT_PART=${{BOOT_PART:-/dev/${{DISK}}2}}
echo "Reusing EFI=$EFI_PART BOOT=$BOOT_PART"
# Build partition config reusing existing PV/VG
cat > /tmp/part.ks << PARTEOF
ignoredisk --only-use=$DISK
clearpart --none
part /boot/efi --onpart=$EFI_PART --fstype=efi
part /boot --onpart=$BOOT_PART --fstype=ext4
volgroup {vg} --useexisting --noformat
logvol swap --vgname={vg} --name=swap --fstype=swap --size=27648
logvol / --vgname={vg} --name=root --fstype=xfs --size=33792
logvol /var --vgname={vg} --name=var --fstype=xfs --size=102400
logvol /var/log --vgname={vg} --name=varlog --fstype=xfs --size=10240
PARTEOF
# Preserve or recreate data LVs
if [ "$PRESERVE_HOME" = "yes" ]; then
echo "logvol /home --vgname={vg} --name=home --useexisting --noformat" >> /tmp/part.ks
else
echo "logvol /home --vgname={vg} --name=home --fstype=xfs --size=10240" >> /tmp/part.ks
fi
if [ "$PRESERVE_SRV" = "yes" ]; then
echo "logvol /srv --vgname={vg} --name=srv --useexisting --noformat" >> /tmp/part.ks
else
echo "logvol /srv --vgname={vg} --name=srv --fstype=xfs --size=20480" >> /tmp/part.ks
fi
if [ "$PRESERVE_LONGHORN" = "yes" ]; then
echo "logvol /var/lib/longhorn --vgname={vg} --name=longhorn --useexisting --noformat" >> /tmp/part.ks
fi
else
# Fresh install
cat > /tmp/part.ks << PARTEOF
ignoredisk --only-use=$DISK
clearpart --all --initlabel --drives=$DISK
part /boot/efi --fstype=efi --size=600 --ondisk=$DISK
part /boot --fstype=ext4 --size=3072 --ondisk=$DISK
part pv.01 --size=1 --grow --ondisk=$DISK
volgroup {vg} pv.01
logvol swap --vgname={vg} --name=swap --fstype=swap --size=27648
logvol / --vgname={vg} --name=root --fstype=xfs --size=33792
logvol /var --vgname={vg} --name=var --fstype=xfs --size=102400
logvol /var/log --vgname={vg} --name=varlog --fstype=xfs --size=10240
logvol /home --vgname={vg} --name=home --fstype=xfs --size=10240
logvol /srv --vgname={vg} --name=srv --fstype=xfs --size=20480
{"logvol /var/lib/longhorn --vgname=" + vg + " --name=longhorn --fstype=xfs --grow --size=1" if has_longhorn else ""}
PARTEOF
fi
echo "=== Generated partition config ==="
cat /tmp/part.ks
echo "==================================="
bastion_progress "partitioning" "layout ready, starting install"
%end
%packages
@core
@server-product
openssh-server
vim-enhanced
tmux
git
curl
wget
python3
lshw
dmidecode
dnf-plugins-core
# Networking and diagnostics
NetworkManager
bind-utils
net-tools
iproute
iputils
traceroute
tcpdump
htop
iotop
strace
jq
# k3s prerequisites
container-selinux
iptables-nft
nftables
policycoreutils-python-utils
chrony
tar
socat
conntrack-tools
ethtool
# Boot management
efibootmgr
# Puppet prerequisites
ruby
ruby-libs
# Exclude desktop
-@workstation-product
-@gnome-desktop
-gnome-shell
-gdm
-PackageKit
-PackageKit-glib
%end
%post --log=/root/bastion-post-install.log
#!/bin/bash
set -x
# Progress callback helper
bastion_progress() {{
local stage="$1" detail="${{2:-}}"
local mac=$(ip link show | awk '/ether/ && !/00:00:00:00/ {{print $2; exit}}')
curl -sf -X POST "http://{SERVER_IP}:{HTTP_PORT}/api/progress" \
-H "Content-Type: application/json" \
-d "{{\\"mac\\":\\"$mac\\",\\"stage\\":\\"$stage\\",\\"detail\\":\\"$detail\\"}}" 2>/dev/null || true
}}
bastion_progress "post-install" "configuring system"
# ── SSH ──
systemctl enable --now sshd
sed -i 's/^#\\?PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
sed -i 's/^#\\?PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
hostnamectl set-hostname {hostname}
echo "Provisioned by lab-bastion on $(date -Iseconds)" > /etc/lab-provisioned
echo "# Lab node — puppet enrollment pending" > /root/README
{ssh_post_block}
# ── Hostname and domain ──
hostnamectl set-hostname {fqdn}
# ── tmpfs for /tmp ──
echo "tmpfs /tmp tmpfs defaults,noatime,nosuid,nodev,size=4G 0 0" >> /etc/fstab
# ── Kernel modules for k3s ──
cat > /etc/modules-load.d/k3s.conf << 'MODULES'
br_netfilter
overlay
ip_conntrack
MODULES
modprobe br_netfilter || true
modprobe overlay || true
# ── Sysctl for k3s networking ──
cat > /etc/sysctl.d/90-k3s.conf << 'SYSCTL'
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
net.ipv6.conf.all.forwarding = 1
fs.inotify.max_user_instances = 524288
fs.inotify.max_user_watches = 1048576
SYSCTL
sysctl --system || true
# ── Disable firewalld (k3s manages its own iptables rules) ──
systemctl disable --now firewalld || true
# ── Enable chronyd for time sync ──
systemctl enable --now chronyd
# ── Set boot order: local disk first, PXE after ──
if command -v efibootmgr >/dev/null 2>&1; then
# Find the Fedora boot entry and move it first
FEDORA_ENTRY=$(efibootmgr | grep -i fedora | head -1 | grep -oP 'Boot\\K[0-9A-F]+')
if [ -n "$FEDORA_ENTRY" ]; then
CURRENT_ORDER=$(efibootmgr | grep BootOrder | cut -d: -f2 | tr -d ' ')
# Put Fedora first, keep rest
NEW_ORDER="$FEDORA_ENTRY,$(echo "$CURRENT_ORDER" | sed "s/$FEDORA_ENTRY,\\?//;s/,$//")"
efibootmgr -o "$NEW_ORDER" || true
echo "Boot order set: Fedora first ($NEW_ORDER)"
fi
fi
# ── Provisioning metadata ──
cat > /etc/lab-provisioned << PROVEOF
hostname: {fqdn}
role: {role}
provisioned: $(date -Iseconds)
bastion: {SERVER_IP}
PROVEOF
cat > /root/README << 'README'
# Lab Node -- {fqdn} (role: {role})
#
# Next steps:
# 1. Install puppet agent:
# dnf install -y puppet-agent
#
# 2. Install k3s:
# curl -sfL https://get.k3s.io | sh -
#
# 3. Or join existing cluster:
# curl -sfL https://get.k3s.io | K3S_URL=https://<server>:6443 K3S_TOKEN=<token> sh -
README
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {{split($2,a,"/"); print a[1]; exit}}')
bastion_progress "complete" "ready at $IP_ADDR"
%end
"""
@@ -562,6 +943,25 @@ def print_install_started(mac, hostname):
print(f" Serving Fedora {FEDORA_VER} installer + kickstart...")
print(f"\n{'─' * 60}\n", flush=True)
PROGRESS_ICONS = {
"partitioning": "◆",
"installing": "◆◆",
"post-install": "◆◆◆",
"complete": "✔",
"error": "✘",
}
def print_progress(mac, stage, detail=""):
icon = PROGRESS_ICONS.get(stage, "·")
color = GREEN if stage == "complete" else (RED if stage == "error" else YELLOW)
detail_str = f" -- {detail}" if detail else ""
print(f" {color}{icon}{RESET} {mac} {BOLD}{stage}{RESET}{detail_str}", flush=True)
if stage == "complete" and detail:
ip = detail.replace("ready at ", "").strip()
if ip:
admin = ADMIN_USER or "root"
print(f"\n {GREEN}{BOLD} ssh {admin}@{ip}{RESET}\n", flush=True)
# ── HTTP Handler ──────────────────────────────────────────────────
class BastionHandler(SimpleHTTPRequestHandler):
@@ -603,7 +1003,7 @@ class BastionHandler(SimpleHTTPRequestHandler):
echo
echo =============================================
echo Lab PXE Bastion INSTALLING Fedora {FEDORA_VER}
echo Lab PXE Bastion - INSTALLING Fedora {FEDORA_VER}
echo Target: {hostname}
echo MAC: {mac}
echo =============================================
@@ -614,13 +1014,31 @@ initrd http://{SERVER_IP}:{HTTP_PORT}/initrd.img
boot
"""
self.send_text(200, script)
elif mac in state.get("installed", {}):
info = state["installed"][mac]
hostname = info.get("hostname", "?")
print(f" {GREEN}PXE request from {mac} ({hostname}) - already installed, booting local disk{RESET}", flush=True)
script = f"""#!ipxe
echo
echo =============================================
echo Lab PXE Bastion - {hostname}
echo Already installed, booting from local disk
echo =============================================
echo
sleep 3
exit
"""
self.send_text(200, script)
else:
print(f" {YELLOW}PXE request from {mac} → discovery mode{RESET}", flush=True)
script = f"""#!ipxe
echo
echo =============================================
echo Lab PXE Bastion DISCOVERY MODE
echo Lab PXE Bastion - DISCOVERY MODE
echo MAC: {mac}
echo Collecting hardware info...
echo =============================================
@@ -642,7 +1060,10 @@ boot
ks = generate_kickstart(
hostname=cfg.get("hostname", "lab-node"),
disk=cfg.get("disk", ""),
ssh_key=SSH_KEY,
ssh_keys=SSH_KEYS,
domain=DOMAIN,
role=cfg.get("role", "worker"),
admin_user=ADMIN_USER,
)
self.send_text(200, ks)
return
@@ -710,15 +1131,21 @@ boot
mac = data.get("mac", "").lower().replace("-", ":")
hostname = data.get("hostname", "lab-node")
disk = data.get("disk", "")
role = data.get("role", "worker")
if not mac:
self.send_json(400, {"error": "mac is required"})
return
if role not in ("worker", "infra"):
self.send_json(400, {"error": "role must be 'worker' or 'infra'"})
return
state = load_state()
state.setdefault("install_queue", {})[mac] = {
"hostname": hostname,
"disk": disk,
"role": role,
"queued_at": datetime.now().isoformat(),
}
save_state(state)
@@ -729,10 +1156,49 @@ boot
"status": "queued",
"mac": mac,
"hostname": hostname,
"message": "PXE boot the machine to start installation",
"role": role,
"message": f"PXE boot the machine to start installation (role={role})",
})
return
# ── Install progress callback from kickstart ──
if parsed.path == "/api/progress":
try:
data = json.loads(body)
except json.JSONDecodeError:
self.send_json(400, {"error": "invalid JSON"})
return
mac = data.get("mac", "unknown").lower()
stage = data.get("stage", "unknown")
detail = data.get("detail", "")
print_progress(mac, stage, detail)
# Update state with progress
state = load_state()
if mac in state.get("install_queue", {}):
state["install_queue"][mac]["progress"] = stage
state["install_queue"][mac]["progress_at"] = datetime.now().isoformat()
if detail:
state["install_queue"][mac]["progress_detail"] = detail
# Move to installed on completion
if stage == "complete":
cfg = state["install_queue"].pop(mac)
ip = detail.replace("ready at ", "").strip() if detail else ""
state.setdefault("installed", {})[mac] = {
"hostname": cfg.get("hostname", "?"),
"role": cfg.get("role", "?"),
"ip": ip,
"installed_at": datetime.now().isoformat(),
}
save_state(state)
self.send_json(200, {"status": "ok"})
return
self.send_json(404, {"error": "not found"})
@@ -850,9 +1316,11 @@ python3 "$BASTION_DIR/server.py" \
"$HTTP_PORT" \
"$FEDORA_VERSION" \
"$FEDORA_MIRROR" \
"$SSH_KEY_CONTENT" \
"$BASTION_DIR/ssh_keys" \
"$TIMEZONE" \
"$LOCALE" &
"$LOCALE" \
"$DOMAIN" \
"$ADMIN_USER" &
HTTP_PID=$!
sleep 1
@@ -871,6 +1339,7 @@ echo -e " Network: ${BOLD}${NETWORK}/24${NC} via ${BOLD}${IFACE}${NC}"
echo -e " DHCP: ${BOLD}${DHCP_MODE}${NC}$(if [[ "$DHCP_MODE" == "full" ]]; then echo " (${DHCP_RANGE_START}${DHCP_RANGE_END})"; else echo " (alongside existing DHCP)"; fi)"
echo -e " HTTP: ${BOLD}http://${SERVER_IP}:${HTTP_PORT}/${NC}"
echo -e " OS: ${BOLD}Fedora ${FEDORA_VERSION} (${ARCH})${NC}"
echo -e " Domain: ${BOLD}${DOMAIN}${NC}"
echo -e " State: ${BOLD}${STATEFILE}${NC}"
echo ""
echo -e " ${YELLOW}PXE boot any machine on this network.${NC}"

279
test-reprovision.sh Executable file
View File

@@ -0,0 +1,279 @@
#!/usr/bin/env bash
# ─────────────────────────────────────────────────────────────
# Test: reprovision preserves /home, /srv, /var/lib/longhorn
#
# Usage: sudo bash test-reprovision.sh
# sudo bash test-reprovision.sh --skip-first-install # if disk already has a first install
# sudo bash test-reprovision.sh --cleanup # just remove the VM and disk
# ─────────────────────────────────────────────────────────────
set -euo pipefail
VM_NAME="test-bastion-ks"
DISK_PATH="/var/lib/libvirt/images/test-reprovision.qcow2"
DISK_SIZE=20 # GB
KS_PATH="/tmp/test-vm.ks"
FEDORA_MIRROR="https://download.fedoraproject.org/pub/fedora/linux/releases/43/Everything/x86_64/os/"
OVMF_CODE="/usr/share/edk2/ovmf/OVMF_CODE.fd"
OVMF_VARS="/usr/share/OVMF/OVMF_VARS.fd"
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'
CYAN='\033[0;36m'; BOLD='\033[1m'; NC='\033[0m'
log() { echo -e "${GREEN}[test]${NC} $*"; }
err() { echo -e "${RED}[test]${NC} $*" >&2; }
step() { echo -e "\n${CYAN}${BOLD}══ $* ══${NC}\n"; }
cleanup_vm() {
virsh destroy "$VM_NAME" 2>/dev/null || true
virsh undefine "$VM_NAME" --nvram 2>/dev/null || true
}
cleanup_all() {
cleanup_vm
rm -f "$DISK_PATH"
log "Cleaned up VM and disk"
}
# ── Handle args ──
SKIP_FIRST=false
for arg in "$@"; do
case "$arg" in
--skip-first-install) SKIP_FIRST=true ;;
--cleanup) cleanup_all; exit 0 ;;
esac
done
[[ $EUID -eq 0 ]] || { err "Must run as root"; exit 1; }
# ── Generate kickstart ──
generate_kickstart() {
cat > "$KS_PATH" << 'KSEOF'
text
reboot
lang en_GB.UTF-8
keyboard uk
timezone Europe/London --utc
network --bootproto=dhcp --activate --hostname=test-vm.ad.itaz.eu
rootpw --plaintext testpass
user --name=michal --groups=wheel
bootloader --append="console=ttyS0,115200n8"
url --mirrorlist=https://mirrors.fedoraproject.org/mirrorlist?repo=fedora-43&arch=x86_64
%include /tmp/part.ks
%pre --log=/tmp/pre-partition.log
#!/bin/bash
set -x
VG="labvg"
DISK="vda"
REPROVISION=no
if vgs $VG &>/dev/null; then
echo "=== REPROVISION MODE ==="
REPROVISION=yes
PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no
lvs $VG/longhorn &>/dev/null && PRESERVE_LONGHORN=yes
lvs $VG/srv &>/dev/null && PRESERVE_SRV=yes
lvs $VG/home &>/dev/null && PRESERVE_HOME=yes
echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME"
for lv in root var varlog swap; do
lvremove -f $VG/$lv 2>/dev/null || true
done
fi
if [ "$REPROVISION" = "yes" ]; then
EFI_PART=$(blkid -t TYPE=vfat -o device /dev/${DISK}* 2>/dev/null | head -1)
BOOT_PART=$(blkid -t TYPE=ext4 -o device /dev/${DISK}* 2>/dev/null | head -1)
EFI_PART=${EFI_PART:-/dev/${DISK}1}
BOOT_PART=${BOOT_PART:-/dev/${DISK}2}
echo "Reusing EFI=$EFI_PART BOOT=$BOOT_PART"
cat > /tmp/part.ks << PARTEOF
ignoredisk --only-use=$DISK
clearpart --none
part /boot/efi --onpart=$EFI_PART --fstype=efi
part /boot --onpart=$BOOT_PART --fstype=ext4
volgroup labvg --useexisting --noformat
logvol swap --vgname=labvg --name=swap --fstype=swap --size=1024
logvol / --vgname=labvg --name=root --fstype=xfs --size=4096
logvol /var --vgname=labvg --name=var --fstype=xfs --size=3072
logvol /var/log --vgname=labvg --name=varlog --fstype=xfs --size=1024
PARTEOF
if [ "$PRESERVE_HOME" = "yes" ]; then
echo "logvol /home --vgname=labvg --name=home --useexisting --noformat" >> /tmp/part.ks
else
echo "logvol /home --vgname=labvg --name=home --fstype=xfs --size=1024" >> /tmp/part.ks
fi
if [ "$PRESERVE_SRV" = "yes" ]; then
echo "logvol /srv --vgname=labvg --name=srv --useexisting --noformat" >> /tmp/part.ks
else
echo "logvol /srv --vgname=labvg --name=srv --fstype=xfs --size=1024" >> /tmp/part.ks
fi
if [ "$PRESERVE_LONGHORN" = "yes" ]; then
echo "logvol /var/lib/longhorn --vgname=labvg --name=longhorn --useexisting --noformat" >> /tmp/part.ks
fi
else
cat > /tmp/part.ks << PARTEOF
ignoredisk --only-use=$DISK
clearpart --all --initlabel --drives=$DISK
part /boot/efi --fstype=efi --size=600 --ondisk=$DISK
part /boot --fstype=ext4 --size=1024 --ondisk=$DISK
part pv.01 --size=1 --grow --ondisk=$DISK
volgroup labvg pv.01
logvol swap --vgname=labvg --name=swap --fstype=swap --size=1024
logvol / --vgname=labvg --name=root --fstype=xfs --size=4096
logvol /var --vgname=labvg --name=var --fstype=xfs --size=3072
logvol /var/log --vgname=labvg --name=varlog --fstype=xfs --size=1024
logvol /home --vgname=labvg --name=home --fstype=xfs --size=1024
logvol /srv --vgname=labvg --name=srv --fstype=xfs --size=1024
logvol /var/lib/longhorn --vgname=labvg --name=longhorn --fstype=xfs --grow --size=1
PARTEOF
fi
echo "=== Generated partition config ==="
cat /tmp/part.ks
%end
%packages
@core
openssh-server
%end
%post
echo "Installed $(date -Iseconds)" > /etc/lab-provisioned
echo "testpass" | passwd --stdin michal
%end
KSEOF
}
# ── Install helper ──
run_install() {
local label="$1"
local disk_args="$2"
log "Running virt-install ($label)..."
virt-install \
--name "$VM_NAME" \
--ram 4096 \
--vcpus 2 \
--disk "$disk_args" \
--os-variant fedora-unknown \
--network network=default \
--location "$FEDORA_MIRROR" \
--initrd-inject "$KS_PATH" \
--extra-args "inst.ks=file:///test-vm.ks console=ttyS0,115200n8 inst.text" \
--boot loader="$OVMF_CODE",loader.readonly=yes,loader.type=pflash,nvram.template="$OVMF_VARS" \
--noautoconsole \
--wait -1
log "virt-install exited — install complete"
virsh destroy "$VM_NAME" 2>/dev/null || true
}
# ── Main test flow ──
generate_kickstart
log "Kickstart generated at $KS_PATH"
PASS=0
FAIL=0
if ! $SKIP_FIRST; then
# ── Step 1: Fresh install ──
step "Step 1/4: Fresh install"
cleanup_all
run_install "fresh" "path=$DISK_PATH,size=$DISK_SIZE,bus=virtio"
# Verify fresh install
log "Verifying fresh install..."
FILESYSTEMS=$(guestfish --ro -a "$DISK_PATH" -i list-filesystems 2>/dev/null)
for lv in root var varlog home srv longhorn swap; do
if echo "$FILESYSTEMS" | grep -q "labvg/$lv"; then
log " ✔ labvg/$lv exists"
((PASS++))
else
err " ✘ labvg/$lv MISSING"
((FAIL++))
fi
done
else
step "Skipping first install (--skip-first-install)"
[[ -f "$DISK_PATH" ]] || { err "Disk not found at $DISK_PATH"; exit 1; }
fi
# ── Step 2: Write marker files ──
step "Step 2/4: Writing marker files to preserved partitions"
guestfish -a "$DISK_PATH" -i << 'GF'
write /home/michal/PRESERVE_TEST.txt "MARKER: home partition preserved\n"
write /srv/PRESERVE_TEST.txt "MARKER: srv partition preserved\n"
write /var/lib/longhorn/PRESERVE_TEST.txt "MARKER: longhorn partition preserved\n"
write /var/SHOULD_BE_WIPED.txt "This file should NOT survive reprovision\n"
GF
log "Marker files written:"
log " /home/michal/PRESERVE_TEST.txt"
log " /srv/PRESERVE_TEST.txt"
log " /var/lib/longhorn/PRESERVE_TEST.txt"
log " /var/SHOULD_BE_WIPED.txt (should be wiped)"
# ── Step 3: Reprovision ──
step "Step 3/4: Reprovisioning (reinstall on same disk)"
cleanup_vm
run_install "reprovision" "path=$DISK_PATH,bus=virtio"
# ── Step 4: Verify ──
step "Step 4/4: Verifying preservation"
check_file() {
local path="$1" expect="$2" label="$3"
local content
content=$(guestfish --ro -a "$DISK_PATH" -i cat "$path" 2>/dev/null) || content=""
if [[ "$expect" == "exists" ]]; then
if [[ -n "$content" && "$content" == *"MARKER"* ]]; then
log "$label — PRESERVED: $(echo "$content" | head -1)"
((PASS++))
else
err "$label — LOST (file missing or empty)"
((FAIL++))
fi
elif [[ "$expect" == "gone" ]]; then
if [[ -z "$content" ]]; then
log "$label — correctly wiped"
((PASS++))
else
err "$label — should have been wiped but still exists"
((FAIL++))
fi
fi
}
check_file "/home/michal/PRESERVE_TEST.txt" "exists" "/home (preserved)"
check_file "/srv/PRESERVE_TEST.txt" "exists" "/srv (preserved)"
check_file "/var/lib/longhorn/PRESERVE_TEST.txt" "exists" "/var/lib/longhorn (preserved)"
check_file "/var/SHOULD_BE_WIPED.txt" "gone" "/var (wiped)"
# Also verify OS was actually reinstalled
PROV_DATE=$(guestfish --ro -a "$DISK_PATH" -i cat /etc/lab-provisioned 2>/dev/null || echo "")
if [[ -n "$PROV_DATE" ]]; then
log " ✔ OS reinstalled: $PROV_DATE"
((PASS++))
else
err " ✘ /etc/lab-provisioned missing — OS not installed?"
((FAIL++))
fi
# ── Summary ──
echo ""
echo -e "${BOLD}════════════════════════════════════════${NC}"
if [[ $FAIL -eq 0 ]]; then
echo -e "${GREEN}${BOLD} ALL TESTS PASSED ($PASS/$((PASS+FAIL)))${NC}"
else
echo -e "${RED}${BOLD} $FAIL TESTS FAILED ($PASS passed, $FAIL failed)${NC}"
fi
echo -e "${BOLD}════════════════════════════════════════${NC}"
echo ""
# ── Cleanup ──
log "Cleaning up VM (disk preserved at $DISK_PATH)"
cleanup_vm
exit $FAIL