fix: PXE boot debugging — bisect root cause, syslog logging, serial console #3

Merged
michal merged 31 commits from wip/ks-debugging into main 2026-03-29 00:50:05 +00:00
8 changed files with 221 additions and 3 deletions
Showing only changes of commit 86cd961ee4 - Show all commits

View File

@@ -0,0 +1,12 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: bastion-config
namespace: lab-infra
data:
HTTP_PORT: "8080"
DOMAIN: "ad.itaz.eu"
FEDORA_VERSION: "43"
DHCP_MODE: "proxy"
TIMEZONE: "Europe/London"
LOCALE: "en_GB.UTF-8"

View File

@@ -0,0 +1,65 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: bastion
namespace: lab-infra
labels:
app: bastion
spec:
replicas: 1
selector:
matchLabels:
app: bastion
template:
metadata:
labels:
app: bastion
spec:
hostNetwork: true
containers:
- name: bastion
image: mysources.co.uk/michal/lab-bastion:latest
command:
- node
- src/cli/dist/index.js
- init
- bastion
- standalone
- start
envFrom:
- configMapRef:
name: bastion-config
ports:
- containerPort: 8080
name: http
volumeMounts:
- name: state
mountPath: /data
- name: ssh-keys
mountPath: /root/.ssh
readOnly: true
securityContext:
capabilities:
add:
- NET_ADMIN
- NET_RAW
livenessProbe:
httpGet:
path: /api/machines
port: 8080
initialDelaySeconds: 15
periodSeconds: 30
readinessProbe:
httpGet:
path: /api/machines
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
volumes:
- name: state
persistentVolumeClaim:
claimName: bastion-state
- name: ssh-keys
hostPath:
path: /root/.ssh
type: Directory

View File

@@ -0,0 +1,7 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- configmap.yaml
- pvc.yaml
- deployment.yaml

View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: lab-infra

View File

@@ -0,0 +1,12 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: bastion-state
namespace: lab-infra
spec:
accessModes:
- ReadWriteOnce
storageClassName: local-path
resources:
requests:
storage: 10Gi

75
bastion/scripts/release.sh Executable file
View File

@@ -0,0 +1,75 @@
#!/bin/bash
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_ROOT"
# Load .env if present
if [ -f .env ]; then
set -a; source .env; set +a
fi
echo "=== lab-bastion release ==="
echo ""
# 1. Build binaries & packages
bash scripts/build-rpm.sh
echo ""
# 2. Publish RPM
bash scripts/publish-rpm.sh
echo ""
# 3. Publish DEB
bash scripts/publish-deb.sh
echo ""
# 4. Build & push Docker image
bash scripts/build-bastion.sh
echo ""
# 5. Install locally (Fedora/RHEL only)
if [ -f /etc/fedora-release ] || [ -f /etc/redhat-release ]; then
echo "==> Installing locally..."
RPM_FILE=$(ls dist/lab-*.rpm 2>/dev/null | head -1)
if [ -n "$RPM_FILE" ]; then
sudo rpm -U --force "$RPM_FILE"
echo ""
echo "==> Installed:"
lab --version || echo "(lab binary installed)"
else
echo "==> WARNING: No RPM found in dist/, skipping local install."
fi
else
echo "==> Not Fedora/RHEL — skipping local RPM install."
fi
echo ""
# 6. Summary
GITEA_PUBLIC_URL="${GITEA_PUBLIC_URL:-https://mysources.co.uk}"
GITEA_OWNER="${GITEA_OWNER:-michal}"
REGISTRY="${GITEA_REGISTRY:-mysources.co.uk}"
VERSION=$(node -p "require('./package.json').version")
echo "=== Done! ==="
echo ""
echo "RPM install:"
echo " sudo dnf config-manager --add-repo ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/rpm.repo"
echo " sudo dnf install lab"
echo ""
echo "DEB install (Debian/Ubuntu):"
echo " echo \"deb ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/debian trixie main\" | sudo tee /etc/apt/sources.list.d/lab.list"
echo " curl -fsSL ${GITEA_PUBLIC_URL}/api/packages/${GITEA_OWNER}/debian/repository.key | sudo gpg --dearmor -o /etc/apt/keyrings/lab.gpg"
echo " sudo apt update && sudo apt install lab"
echo ""
echo "Docker image:"
echo " podman pull ${REGISTRY}/michal/lab-bastion:${VERSION}"
echo ""
echo "k3s deployment:"
echo " kubectl apply -k deploy/k3s/"

View File

@@ -35,6 +35,7 @@ export function renderInstallKickstart(params: InstallKickstartParams): string {
const vg = "labvg";
const now = new Date().toISOString();
const hasLonghorn = role === "worker";
const hasRancher = role === "infra";
// -- Auth section --
const auth = sshKeys.length > 0
@@ -91,6 +92,11 @@ done
? `logvol /var/lib/longhorn --vgname=${vg} --name=longhorn --fstype=xfs --grow --size=1`
: "";
// -- Rancher LV for fresh install (infra role) --
const rancherFreshLine = hasRancher
? `logvol /var/lib/rancher --vgname=${vg} --name=rancher --fstype=xfs --size=20480`
: "";
return `# Lab Bastion -- Fedora ${fedoraVersion} server install
# Generated: ${now}
# Target: ${fqdn} (role=${role})
@@ -140,12 +146,13 @@ if vgs $VG &>/dev/null; then
REPROVISION=yes
# Detect which data LVs to preserve
PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no
PRESERVE_LONGHORN=no; PRESERVE_SRV=no; PRESERVE_HOME=no; PRESERVE_RANCHER=no
lvs $VG/longhorn &>/dev/null && PRESERVE_LONGHORN=yes
lvs $VG/srv &>/dev/null && PRESERVE_SRV=yes
lvs $VG/home &>/dev/null && PRESERVE_HOME=yes
lvs $VG/rancher &>/dev/null && PRESERVE_RANCHER=yes
echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME"
echo "Preserving: longhorn=$PRESERVE_LONGHORN srv=$PRESERVE_SRV home=$PRESERVE_HOME rancher=$PRESERVE_RANCHER"
# Remove only OS logical volumes (keep data LVs)
for lv in root var varlog swap; do
@@ -191,6 +198,10 @@ PARTEOF
echo "logvol /var/lib/longhorn --vgname=${vg} --name=longhorn --useexisting --noformat" >> /tmp/part.ks
fi
if [ "$PRESERVE_RANCHER" = "yes" ]; then
echo "logvol /var/lib/rancher --vgname=${vg} --name=rancher --useexisting --noformat" >> /tmp/part.ks
fi
else
# Fresh install
cat > /tmp/part.ks << PARTEOF
@@ -207,6 +218,7 @@ logvol /var/log --vgname=${vg} --name=varlog --fstype=xfs --size=10240
logvol /home --vgname=${vg} --name=home --fstype=xfs --size=10240
logvol /srv --vgname=${vg} --name=srv --fstype=xfs --size=20480
${longhornFreshLine}
${rancherFreshLine}
PARTEOF
fi
@@ -357,7 +369,9 @@ cat > /root/README << 'README'
# curl -sfL https://get.k3s.io | K3S_URL=https://<server>:6443 K3S_TOKEN=<token> sh -
README
IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {split($2,a,"/"); print a[1]; exit}')
${hasRancher ? `# Install k3s server (skip start - will be configured manually)
curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh -
` : ""}IP_ADDR=$(ip -4 addr show | awk '/inet / && !/127.0.0/ {split($2,a,"/"); print a[1]; exit}')
bastion_progress "complete" "ready at $IP_ADDR"
%end

View File

@@ -93,6 +93,35 @@ describe("renderInstallKickstart", () => {
expect(ks).toContain("http://10.0.0.5:9090/api/progress");
});
it("infra role has /var/lib/rancher partition", () => {
const ks = renderInstallKickstart(baseParams({ role: "infra" }));
expect(ks).toContain("logvol /var/lib/rancher --vgname=labvg --name=rancher --fstype=xfs --size=20480");
});
it("infra role has k3s install", () => {
const ks = renderInstallKickstart(baseParams({ role: "infra" }));
expect(ks).toContain("curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true sh -");
});
it("worker role does NOT have /var/lib/rancher partition in fresh install", () => {
const ks = renderInstallKickstart(baseParams({ role: "worker" }));
// Worker should not have the fresh-install rancher partition line
expect(ks).not.toContain("logvol /var/lib/rancher --vgname=labvg --name=rancher --fstype=xfs --size=20480");
});
it("worker role does NOT have k3s install", () => {
const ks = renderInstallKickstart(baseParams({ role: "worker" }));
expect(ks).not.toContain("INSTALL_K3S_SKIP_START");
});
it("reprovision preserves rancher partition", () => {
const ks = renderInstallKickstart(baseParams({ role: "infra" }));
expect(ks).toContain("PRESERVE_RANCHER=no");
expect(ks).toContain('lvs $VG/rancher');
expect(ks).toContain("PRESERVE_RANCHER=yes");
expect(ks).toContain('logvol /var/lib/rancher --vgname=labvg --name=rancher --useexisting --noformat');
});
it("partition sizes are correct", () => {
const ks = renderInstallKickstart(baseParams());
// root = 33792