fix: revert kickstart to near-original baseline (Step 0 — boots clean)
Reverted install.ks.ts to near-original state from commit 64533b2.
This is the bisection baseline — 21/22 integration tests pass,
0 failed systemd services, SSH works, /boot/efi mounts.
Removed all accumulated fixes that collectively broke boot:
- ERR trap, background log streamer, bastion_log/bastion_error
- depmod rebuild, nofail on /boot/efi, SELinux autorelabel
- chcon/restorecon for /etc /var /root
- kernel-modules and dosfstools packages
Kept from current branch:
- rootpw --plaintext lab-root-pw (console debug access)
- Network-first boot order (bastion controls boot)
- Vanilla role support, rancher partition support
- Boot screenshots during SSH wait (1/sec rolling buffer)
- Test runner script (run-pxe-test.sh)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -27,13 +27,52 @@ import { ensurePxeNetwork, destroyPxeNetwork, deleteNftablesRejectRules, PXE_NET
|
||||
import { createPxeVm, destroyPxeVm, getVmMac, rebootPxeVm, readSerialLog } from "./helpers/pxe-vm.js";
|
||||
import { sshExec } from "./helpers/ssh.js";
|
||||
|
||||
// --- Boot screenshot capture ---
|
||||
const SCREENSHOT_DIR = "/tmp/vm-screenshots";
|
||||
|
||||
function startBootScreenshots(vmName: string): { stop: () => void } {
|
||||
try { mkdirSync(SCREENSHOT_DIR, { recursive: true }); } catch {}
|
||||
// Clean old screenshots
|
||||
try {
|
||||
for (const f of require("node:fs").readdirSync(SCREENSHOT_DIR)) {
|
||||
rmSync(join(SCREENSHOT_DIR, f), { force: true });
|
||||
}
|
||||
} catch {}
|
||||
|
||||
let running = true;
|
||||
let seq = 0;
|
||||
const BUFFER_SIZE = 60; // keep last 60 screenshots (1 per second)
|
||||
|
||||
const loop = async () => {
|
||||
while (running) {
|
||||
try {
|
||||
const idx = String(seq % BUFFER_SIZE).padStart(4, "0");
|
||||
const ppm = join(SCREENSHOT_DIR, `tmp-${idx}.ppm`);
|
||||
const png = join(SCREENSHOT_DIR, `boot-${idx}.png`);
|
||||
execSync(`sudo virsh screenshot ${vmName} ${ppm} --screen 0 2>/dev/null`, { timeout: 3000 });
|
||||
execSync(`convert ${ppm} ${png} 2>/dev/null && rm -f ${ppm}`, { timeout: 3000 });
|
||||
seq++;
|
||||
} catch {}
|
||||
await new Promise(r => setTimeout(r, 1000));
|
||||
}
|
||||
};
|
||||
loop();
|
||||
|
||||
return {
|
||||
stop: () => {
|
||||
running = false;
|
||||
log(`Boot screenshots saved to ${SCREENSHOT_DIR}/ (${seq} captured, last ${Math.min(seq, BUFFER_SIZE)} kept)`);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// --- Test constants ---
|
||||
const VM_NAME = "lab-pxe-test";
|
||||
const VM_MEMORY = 4096; // 4GB (Anaconda needs ~2GB minimum)
|
||||
const VM_VCPUS = 12;
|
||||
const VM_DISK_GB = 250; // LVM layout needs ~204GB (swap 27 + root 33 + var 100 + etc). QCOW2 is sparse.
|
||||
const HTTP_PORT = 8099; // Avoid conflicts with real bastion
|
||||
const SSH_USER = "root"; // Use root for SSH (admin user key setup has known issue)
|
||||
const SSH_USER = "root"; // Use root SSH for baseline testing
|
||||
const BASTION_IP = PXE_GATEWAY; // 192.168.251.1
|
||||
const DHCP_RANGE_START = `${PXE_SUBNET}.100`;
|
||||
const DHCP_RANGE_END = `${PXE_SUBNET}.200`;
|
||||
@@ -41,7 +80,7 @@ const DHCP_RANGE_END = `${PXE_SUBNET}.200`;
|
||||
// Fedora install takes a while
|
||||
const DISCOVERY_TIMEOUT_MS = 5 * 60_000; // 5 min for PXE boot + discovery
|
||||
const INSTALL_TIMEOUT_MS = 30 * 60_000; // 30 min for full Fedora install
|
||||
const SSH_TIMEOUT_MS = 15 * 60_000; // 15 min: PXE (~90s) + first boot + SELinux autorelabel (~3min) + reboot + second PXE (~90s) + boot
|
||||
const SSH_TIMEOUT_MS = 10 * 60_000; // 10 min: OVMF retries PXE/HTTP Boot (~3min) before disk boot + OS startup
|
||||
|
||||
function findSshKey(): { pubKey: string; keyPath: string } {
|
||||
const homes = [homedir()];
|
||||
@@ -279,8 +318,9 @@ describe("PXE boot provisioning", () => {
|
||||
await sleep(3_000);
|
||||
deleteNftablesRejectRules();
|
||||
|
||||
// 10. Wait for SSH
|
||||
// 10. Wait for SSH (with aggressive boot screenshots)
|
||||
log("Waiting for SSH access...");
|
||||
const screenshots = startBootScreenshots(VM_NAME);
|
||||
try {
|
||||
await waitForSsh(vmIp, SSH_USER, SSH_TIMEOUT_MS, sshKeyPath);
|
||||
} catch {
|
||||
@@ -292,7 +332,9 @@ describe("PXE boot provisioning", () => {
|
||||
} catch (serialErr) {
|
||||
log(`Serial console failed: ${serialErr instanceof Error ? serialErr.message : String(serialErr)}`);
|
||||
}
|
||||
throw new Error(`SSH not available on ${vmIp} — check serial console diagnostics above`);
|
||||
throw new Error(`SSH not available on ${vmIp} — check serial console diagnostics above. Screenshots: ${SCREENSHOT_DIR}/`);
|
||||
} finally {
|
||||
screenshots.stop();
|
||||
}
|
||||
|
||||
log("PXE provision test setup complete.");
|
||||
@@ -345,10 +387,10 @@ describe("PXE boot provisioning", () => {
|
||||
expect(data.progress).toBe("complete");
|
||||
});
|
||||
|
||||
it("log lines were captured", async () => {
|
||||
it.skip("log lines were captured", async () => {
|
||||
// Requires log streamer in %post — skipped until re-added
|
||||
const res = await fetch(`http://${BASTION_IP}:${HTTP_PORT}/api/logs/${encodeURIComponent(vmMac)}`);
|
||||
const data = (await res.json()) as { log_total?: number; log_lines?: Array<{ line: string }> };
|
||||
// Should have at least some log lines from the log streamer
|
||||
expect(data.log_total).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user