Improve job status notifications
This commit is contained in:
@@ -5,7 +5,8 @@ import type {
|
||||
SearchAnswerResponse,
|
||||
SearchCitation,
|
||||
SearchResult,
|
||||
SearchSource
|
||||
SearchSource,
|
||||
TaskStageContext
|
||||
} from '@/lib/types';
|
||||
import { runAiAnalysis, runAiEmbeddings } from '@/lib/server/ai';
|
||||
import { __dbInternals, getSqliteClient } from '@/lib/server/db';
|
||||
@@ -90,7 +91,11 @@ type IndexSearchDocumentsInput = {
|
||||
journalEntryId?: number | null;
|
||||
sourceKinds?: SearchDocumentSourceKind[];
|
||||
deleteSourceRefs?: DeleteSourceRef[];
|
||||
onStage?: (stage: 'collect' | 'fetch' | 'chunk' | 'embed' | 'persist', detail: string) => Promise<void> | void;
|
||||
onStage?: (
|
||||
stage: 'collect' | 'fetch' | 'chunk' | 'embed' | 'persist',
|
||||
detail: string,
|
||||
context?: TaskStageContext | null
|
||||
) => Promise<void> | void;
|
||||
};
|
||||
|
||||
type SearchInput = {
|
||||
@@ -834,22 +839,94 @@ export async function indexSearchDocuments(input: IndexSearchDocumentsInput) {
|
||||
let skipped = 0;
|
||||
let deleted = 0;
|
||||
let chunksEmbedded = 0;
|
||||
const totalDocuments = materialized.length;
|
||||
|
||||
const stageContext = (current: number, subject?: TaskStageContext['subject'] | null): TaskStageContext => ({
|
||||
progress: {
|
||||
current,
|
||||
total: totalDocuments || 1,
|
||||
unit: 'sources'
|
||||
},
|
||||
counters: {
|
||||
sourcesCollected: totalDocuments,
|
||||
indexed,
|
||||
skipped,
|
||||
deleted,
|
||||
chunksEmbedded
|
||||
},
|
||||
subject: subject ?? (input.ticker ? { ticker: input.ticker } : input.accessionNumber ? { accessionNumber: input.accessionNumber } : null)
|
||||
});
|
||||
|
||||
if (input.deleteSourceRefs && input.deleteSourceRefs.length > 0) {
|
||||
deleted += deleteSourceRefs(client, input.deleteSourceRefs);
|
||||
}
|
||||
|
||||
for (const document of materialized) {
|
||||
await input.onStage?.('fetch', `Preparing ${document.sourceKind} ${document.sourceRef}`);
|
||||
await input.onStage?.(
|
||||
'collect',
|
||||
`Collected ${materialized.length} source records for search indexing`,
|
||||
{
|
||||
counters: {
|
||||
sourcesCollected: materialized.length,
|
||||
deleted
|
||||
},
|
||||
subject: input.ticker ? { ticker: input.ticker } : input.accessionNumber ? { accessionNumber: input.accessionNumber } : null
|
||||
}
|
||||
);
|
||||
|
||||
for (let index = 0; index < materialized.length; index += 1) {
|
||||
const document = materialized[index];
|
||||
|
||||
await input.onStage?.(
|
||||
'fetch',
|
||||
`Preparing ${document.sourceKind} ${document.sourceRef}`,
|
||||
stageContext(index + 1, {
|
||||
ticker: document.ticker ?? undefined,
|
||||
accessionNumber: document.accessionNumber ?? undefined,
|
||||
label: document.sourceRef
|
||||
})
|
||||
);
|
||||
const chunks = chunkDocument(document);
|
||||
if (chunks.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
await input.onStage?.('chunk', `Chunking ${document.sourceKind} ${document.sourceRef}`);
|
||||
await input.onStage?.('embed', `Embedding ${chunks.length} chunks for ${document.sourceRef}`);
|
||||
await input.onStage?.(
|
||||
'chunk',
|
||||
`Chunking ${document.sourceKind} ${document.sourceRef}`,
|
||||
stageContext(index + 1, {
|
||||
ticker: document.ticker ?? undefined,
|
||||
accessionNumber: document.accessionNumber ?? undefined,
|
||||
label: document.sourceRef
|
||||
})
|
||||
);
|
||||
await input.onStage?.(
|
||||
'embed',
|
||||
`Embedding ${chunks.length} chunks for ${document.sourceRef}`,
|
||||
{
|
||||
...stageContext(index + 1, {
|
||||
ticker: document.ticker ?? undefined,
|
||||
accessionNumber: document.accessionNumber ?? undefined,
|
||||
label: document.sourceRef
|
||||
}),
|
||||
counters: {
|
||||
sourcesCollected: totalDocuments,
|
||||
indexed,
|
||||
skipped,
|
||||
deleted,
|
||||
chunksEmbedded
|
||||
}
|
||||
}
|
||||
);
|
||||
const embeddings = await runAiEmbeddings(chunks.map((chunk) => chunk.chunkText));
|
||||
await input.onStage?.('persist', `Persisting indexed chunks for ${document.sourceRef}`);
|
||||
await input.onStage?.(
|
||||
'persist',
|
||||
`Persisting indexed chunks for ${document.sourceRef}`,
|
||||
stageContext(index + 1, {
|
||||
ticker: document.ticker ?? undefined,
|
||||
accessionNumber: document.accessionNumber ?? undefined,
|
||||
label: document.sourceRef
|
||||
})
|
||||
);
|
||||
const result = persistDocumentIndex(client, document, chunks, embeddings);
|
||||
|
||||
if (result.skipped) {
|
||||
|
||||
Reference in New Issue
Block a user