fix: tolerate incomplete LLM title arrays in paginate stage

Qwen 7B sometimes returns fewer titles than pages (12 for 14).
Instead of rejecting the entire response, pad missing entries with
generic "Page N" titles and truncate extras. Also emphasize exact
count in the prompt.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Michal
2026-03-03 22:10:56 +00:00
parent 4cfdd805d8
commit 89f869f460

View File

@@ -64,19 +64,25 @@ async function generatePageTitles(pages: string[], ctx: StageContext): Promise<s
}).join('\n\n');
const result = await ctx.llm.complete(
`Generate a short descriptive title (max 60 chars) for each page based on its content preview. ` +
`Return ONLY a JSON array of strings, one title per page. No markdown, no explanation.\n\n` +
`Generate exactly ${pages.length} short descriptive titles (max 60 chars each) for the following ${pages.length} pages. ` +
`Return ONLY a JSON array of ${pages.length} strings. No markdown, no explanation.\n\n` +
`${previews}`,
{ maxTokens: pages.length * 30 },
);
// Parse JSON array from response
// Parse JSON array from response, pad/truncate to match page count
const match = result.match(/\[[\s\S]*\]/);
if (!match) throw new Error('No JSON array in response');
const titles = JSON.parse(match[0]) as string[];
if (!Array.isArray(titles) || titles.length !== pages.length) {
throw new Error(`Expected ${pages.length} titles, got ${titles.length}`);
const raw = JSON.parse(match[0]) as string[];
if (!Array.isArray(raw) || raw.length === 0) {
throw new Error('Empty or invalid title array');
}
// Pad with generic titles if model returned fewer, truncate if more
const titles = pages.map((_, i) =>
(i < raw.length && typeof raw[i] === 'string' && raw[i]!.trim())
? raw[i]!.trim().slice(0, 80)
: `Page ${i + 1}`,
);
return JSON.stringify(titles);
});