{ "master": { "tasks": [ { "id": 72, "title": "Expand Prisma Schema with Resource Relationships", "description": "Add Network, ServerNic, ServerDisk, and ClusterMember models to the Prisma schema. Add bastionId foreign key to Server model to track which bastion owns each server.", "details": "Edit `bastion/src/labd/prisma/schema.prisma` to add:\n\n1. **Server model changes**:\n - Add `bastionId String?` with relation to Bastion\n - Add `hardwareInfo Json?` for storing raw HardwareInfo\n - Add `os String?` for installed OS\n\n2. **Network model**:\n```prisma\nmodel Network {\n id String @id @default(uuid())\n name String @unique\n cidr String\n vlan Int?\n gateway String?\n domain String?\n dhcpEnabled Boolean @default(false)\n createdAt DateTime @default(now())\n updatedAt DateTime @updatedAt\n \n nics ServerNic[]\n}\n```\n\n3. **ServerNic model**:\n```prisma\nmodel ServerNic {\n id String @id @default(uuid())\n serverId String\n server Server @relation(fields: [serverId], references: [id], onDelete: Cascade)\n networkId String?\n network Network? @relation(fields: [networkId], references: [id])\n mac String\n ip String?\n name String\n state String @default(\"DOWN\")\n \n @@unique([serverId, mac])\n @@index([networkId])\n}\n```\n\n4. **ServerDisk model**:\n```prisma\nmodel ServerDisk {\n id String @id @default(uuid())\n serverId String\n server Server @relation(fields: [serverId], references: [id], onDelete: Cascade)\n name String\n sizeGb Float\n model String?\n \n @@unique([serverId, name])\n}\n```\n\n5. **ClusterMember model**:\n```prisma\nmodel ClusterMember {\n id String @id @default(uuid())\n clusterId String\n cluster Cluster @relation(fields: [clusterId], references: [id], onDelete: Cascade)\n serverId String\n server Server @relation(fields: [serverId], references: [id], onDelete: Cascade)\n role String @default(\"worker\") // control-plane, worker\n joinedAt DateTime @default(now())\n \n @@unique([clusterId, serverId])\n @@index([clusterId])\n @@index([serverId])\n}\n```\n\n6. Update Server model with relations to nics, disks, clusterMemberships, and bastion.\n\nRun `pnpm prisma generate` and `pnpm prisma migrate dev --name add-resource-models`.", "testStrategy": "1. Run `pnpm prisma validate` to verify schema syntax\n2. Run `pnpm prisma generate` to confirm client generation\n3. Create migration and verify it applies cleanly to local CockroachDB\n4. Write unit tests that create/read/delete each new model\n5. Verify cascade deletes work (deleting Server removes its NICs and Disks)", "priority": "high", "dependencies": [], "status": "pending", "subtasks": [] }, { "id": 73, "title": "Implement State Persistence Service in labd", "description": "Create a new service in labd that persists bastion state syncs to the Server table in CockroachDB. When bastion-state-sync messages arrive, upsert machines into Server with their hardware info, status, and ownership.", "details": "Create `bastion/src/labd/src/services/state-persistence.ts`:\n\n```typescript\nimport type { PrismaClient } from \"@prisma/client\";\nimport type { BastionState, HardwareInfo, InstallConfig, InstalledInfo } from \"@lab/shared\";\nimport { logger } from \"./logger.js\";\n\nexport class StatePersistence {\n constructor(private readonly db: PrismaClient) {}\n\n async syncBastionState(bastionId: string, state: BastionState): Promise {\n // Process discovered machines\n for (const [mac, hw] of Object.entries(state.discovered)) {\n await this.upsertDiscoveredServer(bastionId, mac, hw);\n }\n \n // Process queued machines (update status to provisioning)\n for (const [mac, cfg] of Object.entries(state.install_queue)) {\n await this.upsertQueuedServer(bastionId, mac, cfg);\n }\n \n // Process installed machines\n for (const [mac, info] of Object.entries(state.installed)) {\n await this.upsertInstalledServer(bastionId, mac, info);\n }\n }\n\n private async upsertDiscoveredServer(bastionId: string, mac: string, hw: HardwareInfo): Promise {\n const normalized = mac.toLowerCase();\n \n await this.db.server.upsert({\n where: { mac: normalized },\n create: {\n hostname: `unknown-${normalized.replace(/:/g, \"\").slice(-6)}`,\n mac: normalized,\n bastionId,\n status: \"discovered\",\n hardwareInfo: hw as any,\n labels: {\n arch: hw.arch,\n cpu_model: hw.cpu_model,\n cpu_cores: hw.cpu_cores,\n memory_gb: hw.memory_gb,\n },\n },\n update: {\n bastionId,\n status: \"discovered\", // only if not already provisioning/installed\n hardwareInfo: hw as any,\n },\n });\n \n // Sync NICs and Disks\n await this.syncServerHardware(normalized, hw);\n }\n \n private async syncServerHardware(mac: string, hw: HardwareInfo): Promise {\n const server = await this.db.server.findUnique({ where: { mac } });\n if (!server) return;\n \n // Upsert NICs\n for (const nic of hw.nics) {\n await this.db.serverNic.upsert({\n where: { serverId_mac: { serverId: server.id, mac: nic.mac.toLowerCase() } },\n create: { serverId: server.id, mac: nic.mac.toLowerCase(), name: nic.name, state: nic.state },\n update: { name: nic.name, state: nic.state },\n });\n }\n \n // Upsert Disks\n for (const disk of hw.disks) {\n await this.db.serverDisk.upsert({\n where: { serverId_name: { serverId: server.id, name: disk.name } },\n create: { serverId: server.id, name: disk.name, sizeGb: disk.size_gb, model: disk.model },\n update: { sizeGb: disk.size_gb, model: disk.model },\n });\n }\n }\n \n // Similar methods for upsertQueuedServer and upsertInstalledServer...\n}\n```\n\nIntegrate into `server.ts` WebSocket handler by calling `statePersistence.syncBastionState()` when `bastion-state-sync` messages arrive.", "testStrategy": "1. Unit test StatePersistence with mocked PrismaClient\n2. Integration test: simulate bastion-state-sync message, verify Server rows created\n3. Test idempotency: send same state twice, verify no duplicates\n4. Test status transitions: discovered -> provisioning -> installed\n5. Verify hardware info (NICs, Disks) is correctly persisted", "priority": "high", "dependencies": [ 72 ], "status": "pending", "subtasks": [] }, { "id": 74, "title": "Add State Loading from labd on Bastion Startup", "description": "Modify bastion startup to request its persisted state from labd before using the local JSON cache. This ensures bastions restore their state after pod restarts.", "details": "1. Add new labd API endpoint `GET /api/bastions/:id/state` that returns the aggregated state for a specific bastion from the Server table:\n\n```typescript\n// bastion/src/labd/src/routes/bastions.ts\napp.get<{ Params: { id: string } }>(\"/api/bastions/:id/state\", async (request, reply) => {\n const { id } = request.params;\n \n const servers = await db.server.findMany({\n where: { bastionId: id },\n include: { nics: true, disks: true },\n });\n \n // Transform back to BastionState format\n const state: BastionState = { discovered: {}, install_queue: {}, installed: {} };\n for (const server of servers) {\n const mac = server.mac;\n if (!mac) continue;\n \n switch (server.status) {\n case \"discovered\":\n state.discovered[mac] = transformToHardwareInfo(server);\n break;\n case \"provisioning\":\n state.install_queue[mac] = transformToInstallConfig(server);\n break;\n case \"installed\":\n state.installed[mac] = transformToInstalledInfo(server);\n break;\n }\n }\n \n return reply.send(state);\n});\n```\n\n2. Modify `BastionConnection.connect()` in `labd-connection.ts` to fetch state after enrollment:\n\n```typescript\nprivate async loadRemoteState(): Promise {\n if (!this.bastionId || !this.config.labdUrl) return null;\n try {\n const resp = await fetch(`${this.config.labdUrl}/api/bastions/${this.bastionId}/state`);\n if (resp.ok) return await resp.json();\n } catch { /* fall back to local */ }\n return null;\n}\n```\n\n3. In bastion `main.ts`, after establishing labd connection, merge remote state with local state (remote takes precedence for installed machines, local wins for in-progress installs).", "testStrategy": "1. Integration test: start bastion, let it persist state, restart bastion, verify state restored\n2. Test merge logic: local has in-progress install, remote has discovered - verify install preserved\n3. Test offline mode: labd unavailable, bastion falls back to local JSON\n4. Test fresh start: no local state, no remote state - bastion starts with empty state", "priority": "high", "dependencies": [ 73 ], "status": "pending", "subtasks": [] }, { "id": 75, "title": "Fix Bastion --dir Environment Variable Default", "description": "Fix the bug where CLI's --dir default overrides the BASTION_DIR environment variable. The CLI option should use the env var as its default.", "details": "Edit `bastion/src/cli/src/commands/serve.ts`:\n\n```typescript\n// Before (line 14):\n.option(\"--dir \", \"Bastion data directory\", \"/tmp/lab-bastion\")\n\n// After:\n.option(\n \"--dir \",\n \"Bastion data directory\",\n process.env[\"BASTION_DIR\"] ?? \"/tmp/lab-bastion\"\n)\n```\n\nThis ensures:\n1. If `BASTION_DIR` env var is set (e.g., in k8s deployment), it's used as default\n2. Explicit `--dir` flag still overrides both\n3. Falls back to `/tmp/lab-bastion` if neither is set\n\nAlso update the k8s deployment manifest `bastion/deploy/k3s/deployment.yaml` to ensure `BASTION_DIR=/data` is properly set.", "testStrategy": "1. Unit test: verify option default reads from process.env\n2. Integration test: set BASTION_DIR, run labctl without --dir, verify correct dir used\n3. Integration test: set BASTION_DIR, run labctl with --dir /custom, verify /custom used\n4. Test no env var: verify default /tmp/lab-bastion used", "priority": "high", "dependencies": [], "status": "pending", "subtasks": [] }, { "id": 76, "title": "Create Resource Type Registry with Aliases", "description": "Create a centralized resource type registry that maps resource names, plurals, and short aliases to canonical types. This enables kubectl-style resource resolution.", "details": "Create `bastion/src/cli/src/utils/resources.ts`:\n\n```typescript\nexport interface ResourceDefinition {\n kind: string; // Canonical type: \"Server\", \"Cluster\", etc.\n singular: string; // \"server\"\n plural: string; // \"servers\"\n aliases: string[]; // [\"srv\"]\n apiPath: string; // \"/api/servers\"\n columns: TableColumn[]; // Default columns for 'get' output\n wideColumns?: TableColumn[]; // Extra columns for -o wide\n}\n\nconst RESOURCE_DEFINITIONS: ResourceDefinition[] = [\n {\n kind: \"Server\",\n singular: \"server\",\n plural: \"servers\",\n aliases: [\"srv\"],\n apiPath: \"/api/servers\",\n columns: serverColumns,\n wideColumns: serverWideColumns,\n },\n {\n kind: \"Cluster\",\n singular: \"cluster\",\n plural: \"clusters\",\n aliases: [],\n apiPath: \"/api/clusters\",\n columns: clusterColumns,\n },\n {\n kind: \"Network\",\n singular: \"network\",\n plural: \"networks\",\n aliases: [\"net\"],\n apiPath: \"/api/networks\",\n columns: networkColumns,\n },\n // ... bastion, role, user, token, audit\n];\n\nconst aliasMap = new Map();\nfor (const def of RESOURCE_DEFINITIONS) {\n aliasMap.set(def.singular, def);\n aliasMap.set(def.plural, def);\n for (const alias of def.aliases) {\n aliasMap.set(alias, def);\n }\n}\n\nexport function resolveResourceType(input: string): ResourceDefinition {\n const normalized = input.toLowerCase();\n const def = aliasMap.get(normalized);\n if (!def) {\n const valid = RESOURCE_DEFINITIONS.map(d => d.plural).join(\", \");\n throw new Error(`Unknown resource type \"${input}\". Valid types: ${valid}`);\n }\n return def;\n}\n\nexport function resolveResourceIdentifier(input: string): {\n type: ResourceDefinition;\n name?: string;\n} {\n // Handle \"server/labmaster\" or just \"servers\"\n const parts = input.split(\"/\");\n const type = resolveResourceType(parts[0]);\n const name = parts.length > 1 ? parts.slice(1).join(\"/\") : undefined;\n return { type, name };\n}\n```\n\nUpdate `bastion/src/cli/src/utils/resource.ts` to use the new registry.", "testStrategy": "1. Unit test resolveResourceType with all aliases: server, servers, srv -> Server\n2. Test unknown resource type throws descriptive error\n3. Test case insensitivity: SERVER, Server, server all resolve correctly\n4. Test resolveResourceIdentifier parses \"server/labmaster\" correctly", "priority": "high", "dependencies": [], "status": "pending", "subtasks": [] }, { "id": 77, "title": "Implement 'labctl get' Command", "description": "Create the core 'labctl get [name]' command that lists resources with filtering and output format support. This is the foundation of the kubectl-style CLI.", "details": "Create `bastion/src/cli/src/commands/get.ts`:\n\n```typescript\nimport { Command } from \"commander\";\nimport { resolveResourceType, type ResourceDefinition } from \"../utils/resources.js\";\nimport { getLabdClient } from \"../api/config.js\";\nimport { formatOutput, type TableColumn } from \"../utils/table.js\";\n\nexport function registerGetCommand(program: Command): void {\n program\n .command(\"get [name]\")\n .description(\"List resources or get a specific resource by name\")\n .option(\"--status \", \"Filter by status\")\n .option(\"--role \", \"Filter by role (servers only)\")\n .option(\"--cloud \", \"Filter by cloud\")\n .option(\"--env \", \"Filter by environment\")\n .option(\"-l, --label