Run playwright UI tests

This commit is contained in:
2026-03-06 14:40:43 -05:00
parent 610fce8db3
commit 8e62c66677
37 changed files with 4430 additions and 643 deletions

View File

@@ -8,7 +8,6 @@ import type {
Holding,
FinancialHistoryWindow,
FinancialStatementKind,
FinancialStatementMode,
PortfolioInsight,
PortfolioSummary,
Task,
@@ -199,24 +198,32 @@ export async function getCompanyAnalysis(ticker: string) {
export async function getCompanyFinancialStatements(input: {
ticker: string;
mode: FinancialStatementMode;
statement: FinancialStatementKind;
window: FinancialHistoryWindow;
includeDimensions?: boolean;
includeFacts?: boolean;
factsCursor?: string | null;
factsLimit?: number;
cursor?: string | null;
limit?: number;
}) {
const query = {
ticker: input.ticker.trim().toUpperCase(),
mode: input.mode,
statement: input.statement,
window: input.window,
includeDimensions: input.includeDimensions ? 'true' : 'false',
includeFacts: input.includeFacts ? 'true' : 'false',
...(typeof input.cursor === 'string' && input.cursor.trim().length > 0
? { cursor: input.cursor.trim() }
: {}),
...(typeof input.limit === 'number' && Number.isFinite(input.limit)
? { limit: input.limit }
: {}),
...(typeof input.factsCursor === 'string' && input.factsCursor.trim().length > 0
? { factsCursor: input.factsCursor.trim() }
: {}),
...(typeof input.factsLimit === 'number' && Number.isFinite(input.factsLimit)
? { factsLimit: input.factsLimit }
: {})
};

View File

@@ -2,13 +2,15 @@ export const queryKeys = {
companyAnalysis: (ticker: string) => ['analysis', ticker] as const,
companyFinancialStatements: (
ticker: string,
mode: string,
statement: string,
window: string,
includeDimensions: boolean,
includeFacts: boolean,
factsCursor: string | null,
factsLimit: number,
cursor: string | null,
limit: number
) => ['financials-v2', ticker, mode, statement, window, includeDimensions ? 'dims' : 'no-dims', cursor ?? '', limit] as const,
) => ['financials-v3', ticker, statement, window, includeDimensions ? 'dims' : 'no-dims', includeFacts ? 'facts' : 'rows', factsCursor ?? '', factsLimit, cursor ?? '', limit] as const,
filings: (ticker: string | null, limit: number) => ['filings', ticker ?? '', limit] as const,
report: (accessionNumber: string) => ['report', accessionNumber] as const,
watchlist: () => ['watchlist'] as const,

View File

@@ -15,8 +15,7 @@ import {
import { queryKeys } from '@/lib/query/keys';
import type {
FinancialHistoryWindow,
FinancialStatementKind,
FinancialStatementMode
FinancialStatementKind
} from '@/lib/types';
export function companyAnalysisQueryOptions(ticker: string) {
@@ -31,34 +30,43 @@ export function companyAnalysisQueryOptions(ticker: string) {
export function companyFinancialStatementsQueryOptions(input: {
ticker: string;
mode: FinancialStatementMode;
statement: FinancialStatementKind;
window: FinancialHistoryWindow;
includeDimensions?: boolean;
includeFacts?: boolean;
factsCursor?: string | null;
factsLimit?: number;
cursor?: string | null;
limit?: number;
}) {
const normalizedTicker = input.ticker.trim().toUpperCase();
const includeDimensions = input.includeDimensions ?? false;
const includeFacts = input.includeFacts ?? false;
const factsCursor = input.factsCursor ?? null;
const factsLimit = input.factsLimit ?? 500;
const cursor = input.cursor ?? null;
const limit = input.limit ?? 40;
return queryOptions({
queryKey: queryKeys.companyFinancialStatements(
normalizedTicker,
input.mode,
input.statement,
input.window,
includeDimensions,
includeFacts,
factsCursor,
factsLimit,
cursor,
limit
),
queryFn: () => getCompanyFinancialStatements({
ticker: normalizedTicker,
mode: input.mode,
statement: input.statement,
window: input.window,
includeDimensions,
includeFacts,
factsCursor,
factsLimit,
cursor,
limit
}),

View File

@@ -4,7 +4,6 @@ import type {
Filing,
FinancialHistoryWindow,
FinancialStatementKind,
FinancialStatementMode,
TaskStatus
} from '@/lib/types';
import { auth } from '@/lib/auth';
@@ -13,8 +12,8 @@ import { asErrorMessage, jsonError } from '@/lib/server/http';
import { buildPortfolioSummary } from '@/lib/server/portfolio';
import {
defaultFinancialSyncLimit,
getCompanyFinancialStatements
} from '@/lib/server/financial-statements';
getCompanyFinancialTaxonomy
} from '@/lib/server/financial-taxonomy';
import { redactInternalFilingAnalysisFields } from '@/lib/server/api/filing-redaction';
import { getFilingByAccession, listFilingsRecords } from '@/lib/server/repos/filings';
import {
@@ -44,8 +43,7 @@ import {
const ALLOWED_STATUSES: TaskStatus[] = ['queued', 'running', 'completed', 'failed'];
const FINANCIAL_FORMS: ReadonlySet<Filing['filing_type']> = new Set(['10-K', '10-Q']);
const AUTO_FILING_SYNC_LIMIT = 20;
const FINANCIALS_V2_ENABLED = process.env.FINANCIALS_V2?.trim().toLowerCase() !== 'false';
const FINANCIAL_STATEMENT_MODES: FinancialStatementMode[] = ['standardized', 'filing_faithful'];
const FINANCIALS_V3_ENABLED = process.env.FINANCIALS_V3?.trim().toLowerCase() !== 'false';
const FINANCIAL_STATEMENT_KINDS: FinancialStatementKind[] = [
'income',
'balance',
@@ -120,12 +118,6 @@ function asTags(value: unknown) {
return [...unique];
}
function asStatementMode(value: unknown): FinancialStatementMode {
return FINANCIAL_STATEMENT_MODES.includes(value as FinancialStatementMode)
? value as FinancialStatementMode
: 'standardized';
}
function asStatementKind(value: unknown): FinancialStatementKind {
return FINANCIAL_STATEMENT_KINDS.includes(value as FinancialStatementKind)
? value as FinancialStatementKind
@@ -613,8 +605,8 @@ export const app = new Elysia({ prefix: '/api' })
return response;
}
if (!FINANCIALS_V2_ENABLED) {
return jsonError('Financial statements v2 is disabled', 404);
if (!FINANCIALS_V3_ENABLED) {
return jsonError('Financial statements v3 is disabled', 404);
}
const ticker = typeof query.ticker === 'string'
@@ -624,26 +616,34 @@ export const app = new Elysia({ prefix: '/api' })
return jsonError('ticker is required');
}
const mode = asStatementMode(query.mode);
const statement = asStatementKind(query.statement);
const window = asHistoryWindow(query.window);
const includeDimensions = asBoolean(query.includeDimensions, false);
const includeFacts = asBoolean(query.includeFacts, false);
const cursor = typeof query.cursor === 'string' && query.cursor.trim().length > 0
? query.cursor.trim()
: null;
const limit = Number.isFinite(Number(query.limit))
? Number(query.limit)
: undefined;
const factsCursor = typeof query.factsCursor === 'string' && query.factsCursor.trim().length > 0
? query.factsCursor.trim()
: null;
const factsLimit = Number.isFinite(Number(query.factsLimit))
? Number(query.factsLimit)
: undefined;
let payload = await getCompanyFinancialStatements({
let payload = await getCompanyFinancialTaxonomy({
ticker,
mode,
statement,
window,
includeDimensions,
includeFacts,
factsCursor,
factsLimit,
cursor,
limit,
v2Enabled: FINANCIALS_V2_ENABLED,
v3Enabled: FINANCIALS_V3_ENABLED,
queuedSync: false
});
@@ -671,7 +671,7 @@ export const app = new Elysia({ prefix: '/api' })
});
queuedSync = true;
} catch (error) {
console.error(`[financials-v2-sync] failed for ${ticker}:`, error);
console.error(`[financials-v3-sync] failed for ${ticker}:`, error);
}
}
@@ -689,7 +689,6 @@ export const app = new Elysia({ prefix: '/api' })
}, {
query: t.Object({
ticker: t.String({ minLength: 1 }),
mode: t.Optional(t.Union([t.Literal('standardized'), t.Literal('filing_faithful')])),
statement: t.Optional(t.Union([
t.Literal('income'),
t.Literal('balance'),
@@ -699,8 +698,11 @@ export const app = new Elysia({ prefix: '/api' })
])),
window: t.Optional(t.Union([t.Literal('10y'), t.Literal('all')])),
includeDimensions: t.Optional(t.Union([t.String(), t.Boolean()])),
includeFacts: t.Optional(t.Union([t.String(), t.Boolean()])),
cursor: t.Optional(t.String()),
limit: t.Optional(t.Numeric())
limit: t.Optional(t.Numeric()),
factsCursor: t.Optional(t.String()),
factsLimit: t.Optional(t.Numeric())
})
})
.get('/analysis/reports/:accessionNumber', async ({ params }) => {

View File

@@ -86,7 +86,8 @@ function applySqlMigrations(client: { exec: (query: string) => void }) {
'0001_glossy_statement_snapshots.sql',
'0002_workflow_task_projection_metadata.sql',
'0003_task_stage_event_timeline.sql',
'0004_watchlist_company_taxonomy.sql'
'0004_watchlist_company_taxonomy.sql',
'0005_financial_taxonomy_v3.sql'
];
for (const file of migrationFiles) {

View File

@@ -15,6 +15,19 @@ type FilingMetrics = {
debt: number | null;
};
type TaxonomyAssetType =
| 'instance'
| 'schema'
| 'presentation'
| 'label'
| 'calculation'
| 'definition'
| 'pdf'
| 'other';
type TaxonomyParseStatus = 'ready' | 'partial' | 'failed';
type TaxonomyMetricValidationStatus = 'not_run' | 'matched' | 'mismatch' | 'error';
type FilingAnalysis = {
provider?: string;
model?: string;
@@ -47,6 +60,7 @@ type FilingStatementPeriod = {
filingId: number;
accessionNumber: string;
filingDate: string;
periodStart: string | null;
periodEnd: string | null;
filingType: '10-K' | '10-Q';
periodLabel: string;
@@ -97,6 +111,55 @@ type DimensionStatementBundle = {
statements: Record<FinancialStatementKind, DimensionStatementSnapshotRow[]>;
};
type TaxonomyDimensionMember = {
axis: string;
member: string;
};
type TaxonomyStatementSnapshotRow = {
key: string;
label: string;
conceptKey: string;
qname: string;
namespaceUri: string;
localName: string;
isExtension: boolean;
statement: FinancialStatementKind;
roleUri: string | null;
order: number;
depth: number;
parentKey: string | null;
values: Record<string, number | null>;
units: Record<string, string | null>;
hasDimensions: boolean;
sourceFactIds: number[];
};
type TaxonomyStatementBundle = {
periods: FilingStatementPeriod[];
statements: Record<FinancialStatementKind, TaxonomyStatementSnapshotRow[]>;
};
type TaxonomyMetricValidationCheck = {
metricKey: keyof FilingMetrics;
taxonomyValue: number | null;
llmValue: number | null;
absoluteDiff: number | null;
relativeDiff: number | null;
status: TaxonomyMetricValidationStatus;
evidencePages: number[];
pdfUrl: string | null;
provider: string | null;
model: string | null;
error: string | null;
};
type TaxonomyMetricValidationResult = {
status: TaxonomyMetricValidationStatus;
checks: TaxonomyMetricValidationCheck[];
validatedAt: string | null;
};
const authDateColumn = {
mode: 'timestamp_ms'
} as const;
@@ -273,6 +336,121 @@ export const filingStatementSnapshot = sqliteTable('filing_statement_snapshot',
filingStatementStatusIndex: index('filing_stmt_status_idx').on(table.parse_status)
}));
export const filingTaxonomySnapshot = sqliteTable('filing_taxonomy_snapshot', {
id: integer('id').primaryKey({ autoIncrement: true }),
filing_id: integer('filing_id').notNull().references(() => filing.id, { onDelete: 'cascade' }),
ticker: text('ticker').notNull(),
filing_date: text('filing_date').notNull(),
filing_type: text('filing_type').$type<'10-K' | '10-Q'>().notNull(),
parse_status: text('parse_status').$type<TaxonomyParseStatus>().notNull(),
parse_error: text('parse_error'),
source: text('source').$type<'xbrl_instance' | 'xbrl_instance_with_linkbase' | 'legacy_html_fallback'>().notNull(),
periods: text('periods', { mode: 'json' }).$type<FilingStatementPeriod[]>(),
statement_rows: text('statement_rows', { mode: 'json' }).$type<TaxonomyStatementBundle['statements'] | null>(),
derived_metrics: text('derived_metrics', { mode: 'json' }).$type<FilingMetrics | null>(),
validation_result: text('validation_result', { mode: 'json' }).$type<TaxonomyMetricValidationResult | null>(),
facts_count: integer('facts_count').notNull().default(0),
concepts_count: integer('concepts_count').notNull().default(0),
dimensions_count: integer('dimensions_count').notNull().default(0),
created_at: text('created_at').notNull(),
updated_at: text('updated_at').notNull()
}, (table) => ({
filingTaxonomySnapshotFilingUnique: uniqueIndex('filing_taxonomy_snapshot_filing_uidx').on(table.filing_id),
filingTaxonomySnapshotTickerDateIndex: index('filing_taxonomy_snapshot_ticker_date_idx').on(table.ticker, table.filing_date),
filingTaxonomySnapshotStatusIndex: index('filing_taxonomy_snapshot_status_idx').on(table.parse_status)
}));
export const filingTaxonomyAsset = sqliteTable('filing_taxonomy_asset', {
id: integer('id').primaryKey({ autoIncrement: true }),
snapshot_id: integer('snapshot_id').notNull().references(() => filingTaxonomySnapshot.id, { onDelete: 'cascade' }),
asset_type: text('asset_type').$type<TaxonomyAssetType>().notNull(),
name: text('name').notNull(),
url: text('url').notNull(),
size_bytes: integer('size_bytes'),
score: numeric('score'),
is_selected: integer('is_selected', { mode: 'boolean' }).notNull().default(false),
created_at: text('created_at').notNull()
}, (table) => ({
filingTaxonomyAssetSnapshotIndex: index('filing_taxonomy_asset_snapshot_idx').on(table.snapshot_id),
filingTaxonomyAssetTypeIndex: index('filing_taxonomy_asset_type_idx').on(table.snapshot_id, table.asset_type)
}));
export const filingTaxonomyConcept = sqliteTable('filing_taxonomy_concept', {
id: integer('id').primaryKey({ autoIncrement: true }),
snapshot_id: integer('snapshot_id').notNull().references(() => filingTaxonomySnapshot.id, { onDelete: 'cascade' }),
concept_key: text('concept_key').notNull(),
qname: text('qname').notNull(),
namespace_uri: text('namespace_uri').notNull(),
local_name: text('local_name').notNull(),
label: text('label'),
is_extension: integer('is_extension', { mode: 'boolean' }).notNull().default(false),
statement_kind: text('statement_kind').$type<FinancialStatementKind>(),
role_uri: text('role_uri'),
presentation_order: numeric('presentation_order'),
presentation_depth: integer('presentation_depth'),
parent_concept_key: text('parent_concept_key'),
is_abstract: integer('is_abstract', { mode: 'boolean' }).notNull().default(false),
created_at: text('created_at').notNull()
}, (table) => ({
filingTaxonomyConceptSnapshotIndex: index('filing_taxonomy_concept_snapshot_idx').on(table.snapshot_id),
filingTaxonomyConceptStatementIndex: index('filing_taxonomy_concept_statement_idx').on(table.snapshot_id, table.statement_kind),
filingTaxonomyConceptUnique: uniqueIndex('filing_taxonomy_concept_uidx').on(
table.snapshot_id,
table.concept_key,
table.role_uri,
table.presentation_order
)
}));
export const filingTaxonomyFact = sqliteTable('filing_taxonomy_fact', {
id: integer('id').primaryKey({ autoIncrement: true }),
snapshot_id: integer('snapshot_id').notNull().references(() => filingTaxonomySnapshot.id, { onDelete: 'cascade' }),
concept_key: text('concept_key').notNull(),
qname: text('qname').notNull(),
namespace_uri: text('namespace_uri').notNull(),
local_name: text('local_name').notNull(),
statement_kind: text('statement_kind').$type<FinancialStatementKind>(),
role_uri: text('role_uri'),
context_id: text('context_id').notNull(),
unit: text('unit'),
decimals: text('decimals'),
value_num: numeric('value_num').notNull(),
period_start: text('period_start'),
period_end: text('period_end'),
period_instant: text('period_instant'),
dimensions: text('dimensions', { mode: 'json' }).$type<TaxonomyDimensionMember[]>().notNull(),
is_dimensionless: integer('is_dimensionless', { mode: 'boolean' }).notNull().default(true),
source_file: text('source_file'),
created_at: text('created_at').notNull()
}, (table) => ({
filingTaxonomyFactSnapshotIndex: index('filing_taxonomy_fact_snapshot_idx').on(table.snapshot_id),
filingTaxonomyFactConceptIndex: index('filing_taxonomy_fact_concept_idx').on(table.snapshot_id, table.concept_key),
filingTaxonomyFactPeriodIndex: index('filing_taxonomy_fact_period_idx').on(table.snapshot_id, table.period_end, table.period_instant),
filingTaxonomyFactStatementIndex: index('filing_taxonomy_fact_statement_idx').on(table.snapshot_id, table.statement_kind)
}));
export const filingTaxonomyMetricValidation = sqliteTable('filing_taxonomy_metric_validation', {
id: integer('id').primaryKey({ autoIncrement: true }),
snapshot_id: integer('snapshot_id').notNull().references(() => filingTaxonomySnapshot.id, { onDelete: 'cascade' }),
metric_key: text('metric_key').$type<keyof FilingMetrics>().notNull(),
taxonomy_value: numeric('taxonomy_value'),
llm_value: numeric('llm_value'),
absolute_diff: numeric('absolute_diff'),
relative_diff: numeric('relative_diff'),
status: text('status').$type<TaxonomyMetricValidationStatus>().notNull(),
evidence_pages: text('evidence_pages', { mode: 'json' }).$type<number[]>().notNull(),
pdf_url: text('pdf_url'),
provider: text('provider'),
model: text('model'),
error: text('error'),
created_at: text('created_at').notNull(),
updated_at: text('updated_at').notNull()
}, (table) => ({
filingTaxonomyMetricValidationSnapshotIndex: index('filing_taxonomy_metric_validation_snapshot_idx').on(table.snapshot_id),
filingTaxonomyMetricValidationStatusIndex: index('filing_taxonomy_metric_validation_status_idx').on(table.snapshot_id, table.status),
filingTaxonomyMetricValidationUnique: uniqueIndex('filing_taxonomy_metric_validation_uidx').on(table.snapshot_id, table.metric_key)
}));
export const filingLink = sqliteTable('filing_link', {
id: integer('id').primaryKey({ autoIncrement: true }),
filing_id: integer('filing_id').notNull().references(() => filing.id, { onDelete: 'cascade' }),
@@ -357,6 +535,11 @@ export const appSchema = {
holding,
filing,
filingStatementSnapshot,
filingTaxonomySnapshot,
filingTaxonomyAsset,
filingTaxonomyConcept,
filingTaxonomyFact,
filingTaxonomyMetricValidation,
filingLink,
taskRun,
taskStageEvent,

View File

@@ -1,137 +1,7 @@
import { describe, expect, it } from 'bun:test';
import { __financialStatementsInternals } from './financial-statements';
import type { FilingStatementSnapshotRecord } from '@/lib/server/repos/filing-statements';
function sampleSnapshot(): FilingStatementSnapshotRecord {
return {
id: 10,
filing_id: 44,
ticker: 'MSFT',
filing_date: '2025-12-31',
filing_type: '10-K',
period_end: '2025-12-31',
statement_bundle: {
periods: [
{
id: '2025-12-31-0001',
filingId: 44,
accessionNumber: '0001',
filingDate: '2025-12-31',
periodEnd: '2025-12-31',
filingType: '10-K',
periodLabel: 'Fiscal Year End'
}
],
statements: {
income: [
{
key: 'revenue-line',
label: 'Revenue',
concept: 'us-gaap:Revenues',
order: 1,
depth: 0,
isSubtotal: false,
values: { '2025-12-31-0001': 120_000 }
}
],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
}
},
standardized_bundle: {
periods: [
{
id: '2025-12-31-0001',
filingId: 44,
accessionNumber: '0001',
filingDate: '2025-12-31',
periodEnd: '2025-12-31',
filingType: '10-K',
periodLabel: 'Fiscal Year End'
}
],
statements: {
income: [
{
key: 'revenue',
label: 'Revenue',
concept: 'us-gaap:Revenues',
category: 'core',
sourceConcepts: ['us-gaap:Revenues'],
values: { '2025-12-31-0001': 120_000 }
}
],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
}
},
dimension_bundle: {
statements: {
income: [
{
rowKey: 'revenue-line',
concept: 'us-gaap:Revenues',
periodId: '2025-12-31-0001',
axis: 'srt:StatementBusinessSegmentsAxis',
member: 'acme:CloudMember',
value: 55_000,
unit: 'USD'
}
],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
}
},
parse_status: 'ready',
parse_error: null,
source: 'sec_filing_summary',
created_at: '2026-01-01T00:00:00.000Z',
updated_at: '2026-01-01T00:00:00.000Z'
};
}
describe('financial statements service internals', () => {
it('builds sorted periods for selected mode/statement', () => {
const snapshot = sampleSnapshot();
const periods = __financialStatementsInternals.buildPeriods(
[snapshot],
'standardized',
'income'
);
expect(periods.length).toBe(1);
expect(periods[0]?.id).toBe('2025-12-31-0001');
});
it('builds standardized rows and includes dimensions when requested', () => {
const snapshot = sampleSnapshot();
const periods = __financialStatementsInternals.buildPeriods(
[snapshot],
'standardized',
'income'
);
const result = __financialStatementsInternals.buildRows(
[snapshot],
periods,
'standardized',
'income',
true
);
expect(result.rows.length).toBe(1);
expect(result.rows[0]?.hasDimensions).toBe(true);
expect(result.dimensions).not.toBeNull();
expect(result.dimensions?.['revenue-line']?.length).toBe(1);
});
it('returns default sync limits by window', () => {
expect(__financialStatementsInternals.defaultFinancialSyncLimit('10y')).toBe(60);
expect(__financialStatementsInternals.defaultFinancialSyncLimit('all')).toBe(120);

View File

@@ -1,315 +1,48 @@
import type {
CompanyFinancialStatementsResponse,
DimensionBreakdownRow,
FilingFaithfulStatementRow,
FinancialHistoryWindow,
FinancialStatementKind,
FinancialStatementMode,
FinancialStatementPeriod,
StandardizedStatementRow
FinancialStatementKind
} from '@/lib/types';
import { listFilingsRecords } from '@/lib/server/repos/filings';
import {
countFilingStatementSnapshotStatuses,
type DimensionStatementSnapshotRow,
type FilingFaithfulStatementSnapshotRow,
type FilingStatementSnapshotRecord,
listFilingStatementSnapshotsByTicker,
type StandardizedStatementSnapshotRow
} from '@/lib/server/repos/filing-statements';
defaultFinancialSyncLimit,
getCompanyFinancialTaxonomy
} from '@/lib/server/financial-taxonomy';
type GetCompanyFinancialStatementsInput = {
ticker: string;
mode: FinancialStatementMode;
statement: FinancialStatementKind;
window: FinancialHistoryWindow;
includeDimensions: boolean;
includeFacts?: boolean;
factsCursor?: string | null;
factsLimit?: number;
cursor?: string | null;
limit?: number;
v2Enabled: boolean;
v2Enabled?: boolean;
v3Enabled?: boolean;
queuedSync: boolean;
};
type FinancialStatementRowByMode = StandardizedStatementRow | FilingFaithfulStatementRow;
function safeTicker(input: string) {
return input.trim().toUpperCase();
}
function isFinancialForm(type: string): type is '10-K' | '10-Q' {
return type === '10-K' || type === '10-Q';
}
function rowDimensionMatcher(row: { key: string; concept: string | null }, item: DimensionStatementSnapshotRow) {
const concept = row.concept?.toLowerCase() ?? '';
const itemConcept = item.concept?.toLowerCase() ?? '';
if (item.rowKey === row.key) {
return true;
}
return Boolean(concept && itemConcept && concept === itemConcept);
}
function periodSorter(left: FinancialStatementPeriod, right: FinancialStatementPeriod) {
const byDate = Date.parse(left.filingDate) - Date.parse(right.filingDate);
if (Number.isFinite(byDate) && byDate !== 0) {
return byDate;
}
return left.id.localeCompare(right.id);
}
function resolveDimensionPeriodId(rawPeriodId: string, periods: FinancialStatementPeriod[]) {
const exact = periods.find((period) => period.id === rawPeriodId);
if (exact) {
return exact.id;
}
const byDate = periods.find((period) => period.filingDate === rawPeriodId || period.periodEnd === rawPeriodId);
return byDate?.id ?? null;
}
function getRowsForSnapshot(
snapshot: FilingStatementSnapshotRecord,
mode: FinancialStatementMode,
statement: FinancialStatementKind
) {
if (mode === 'standardized') {
return snapshot.standardized_bundle?.statements?.[statement] ?? [];
}
return snapshot.statement_bundle?.statements?.[statement] ?? [];
}
function buildPeriods(
snapshots: FilingStatementSnapshotRecord[],
mode: FinancialStatementMode,
statement: FinancialStatementKind
) {
const map = new Map<string, FinancialStatementPeriod>();
for (const snapshot of snapshots) {
const rows = getRowsForSnapshot(snapshot, mode, statement);
if (rows.length === 0) {
continue;
}
const sourcePeriods = mode === 'standardized'
? snapshot.standardized_bundle?.periods
: snapshot.statement_bundle?.periods;
for (const period of sourcePeriods ?? []) {
if (!map.has(period.id)) {
map.set(period.id, {
id: period.id,
filingId: period.filingId,
accessionNumber: period.accessionNumber,
filingDate: period.filingDate,
periodEnd: period.periodEnd,
filingType: period.filingType,
periodLabel: period.periodLabel
});
}
}
}
return [...map.values()].sort(periodSorter);
}
function buildRows(
snapshots: FilingStatementSnapshotRecord[],
periods: FinancialStatementPeriod[],
mode: FinancialStatementMode,
statement: FinancialStatementKind,
includeDimensions: boolean
) {
const rowMap = new Map<string, FinancialStatementRowByMode>();
const dimensionMap = includeDimensions
? new Map<string, DimensionBreakdownRow[]>()
: null;
for (const snapshot of snapshots) {
const rows = getRowsForSnapshot(snapshot, mode, statement);
const dimensions = snapshot.dimension_bundle?.statements?.[statement] ?? [];
if (mode === 'standardized') {
for (const sourceRow of rows as StandardizedStatementSnapshotRow[]) {
const existing = rowMap.get(sourceRow.key) as StandardizedStatementRow | undefined;
const hasDimensions = dimensions.some((item) => rowDimensionMatcher(sourceRow, item));
if (!existing) {
rowMap.set(sourceRow.key, {
key: sourceRow.key,
label: sourceRow.label,
concept: sourceRow.concept,
category: sourceRow.category,
sourceConcepts: [...sourceRow.sourceConcepts],
values: { ...sourceRow.values },
hasDimensions
});
continue;
}
existing.hasDimensions = existing.hasDimensions || hasDimensions;
for (const concept of sourceRow.sourceConcepts) {
if (!existing.sourceConcepts.includes(concept)) {
existing.sourceConcepts.push(concept);
}
}
for (const [periodId, value] of Object.entries(sourceRow.values)) {
if (!(periodId in existing.values)) {
existing.values[periodId] = value;
}
}
}
} else {
for (const sourceRow of rows as FilingFaithfulStatementSnapshotRow[]) {
const rowKey = sourceRow.concept ? `concept-${sourceRow.concept.toLowerCase()}` : `label-${sourceRow.key}`;
const existing = rowMap.get(rowKey) as FilingFaithfulStatementRow | undefined;
const hasDimensions = dimensions.some((item) => rowDimensionMatcher(sourceRow, item));
if (!existing) {
rowMap.set(rowKey, {
key: rowKey,
label: sourceRow.label,
concept: sourceRow.concept,
order: sourceRow.order,
depth: sourceRow.depth,
isSubtotal: sourceRow.isSubtotal,
values: { ...sourceRow.values },
hasDimensions
});
continue;
}
existing.hasDimensions = existing.hasDimensions || hasDimensions;
existing.order = Math.min(existing.order, sourceRow.order);
existing.depth = Math.min(existing.depth, sourceRow.depth);
existing.isSubtotal = existing.isSubtotal || sourceRow.isSubtotal;
for (const [periodId, value] of Object.entries(sourceRow.values)) {
if (!(periodId in existing.values)) {
existing.values[periodId] = value;
}
}
}
}
if (dimensionMap) {
for (const item of dimensions) {
const periodId = resolveDimensionPeriodId(item.periodId, periods);
if (!periodId) {
continue;
}
const entry: DimensionBreakdownRow = {
rowKey: item.rowKey,
concept: item.concept,
periodId,
axis: item.axis,
member: item.member,
value: item.value,
unit: item.unit
};
const group = dimensionMap.get(item.rowKey);
if (group) {
group.push(entry);
} else {
dimensionMap.set(item.rowKey, [entry]);
}
}
}
}
const rows = [...rowMap.values()].sort((a, b) => {
const left = mode === 'standardized' ? a.label : `${(a as FilingFaithfulStatementRow).order.toString().padStart(5, '0')}::${a.label}`;
const right = mode === 'standardized' ? b.label : `${(b as FilingFaithfulStatementRow).order.toString().padStart(5, '0')}::${b.label}`;
return left.localeCompare(right);
});
if (mode === 'standardized') {
const standardized = rows as StandardizedStatementRow[];
const core = standardized.filter((row) => row.category === 'core');
const nonCore = standardized.filter((row) => row.category !== 'core');
const orderedRows = [...core, ...nonCore];
return {
rows: orderedRows,
dimensions: dimensionMap ? Object.fromEntries(dimensionMap.entries()) : null
};
}
return {
rows: rows as FilingFaithfulStatementRow[],
dimensions: dimensionMap ? Object.fromEntries(dimensionMap.entries()) : null
};
}
export function defaultFinancialSyncLimit(window: FinancialHistoryWindow) {
return window === 'all' ? 120 : 60;
}
export async function getCompanyFinancialStatements(input: GetCompanyFinancialStatementsInput): Promise<CompanyFinancialStatementsResponse> {
const ticker = safeTicker(input.ticker);
const snapshotResult = await listFilingStatementSnapshotsByTicker({
ticker,
window: input.window,
limit: input.limit,
cursor: input.cursor
});
const statuses = await countFilingStatementSnapshotStatuses(ticker);
const filings = await listFilingsRecords({
ticker,
limit: input.window === 'all' ? 250 : 120
});
const financialFilings = filings.filter((filing) => isFinancialForm(filing.filing_type));
const periods = buildPeriods(snapshotResult.snapshots, input.mode, input.statement);
const rowResult = buildRows(
snapshotResult.snapshots,
periods,
input.mode,
input.statement,
input.includeDimensions
);
const latestFiling = filings[0] ?? null;
return {
company: {
ticker,
companyName: latestFiling?.company_name ?? ticker,
cik: latestFiling?.cik ?? null
},
mode: input.mode,
export async function getCompanyFinancialStatements(
input: GetCompanyFinancialStatementsInput
): Promise<CompanyFinancialStatementsResponse> {
return await getCompanyFinancialTaxonomy({
ticker: input.ticker,
statement: input.statement,
window: input.window,
periods,
rows: rowResult.rows,
nextCursor: snapshotResult.nextCursor,
coverage: {
filings: periods.length,
rows: rowResult.rows.length,
dimensions: rowResult.dimensions
? Object.values(rowResult.dimensions).reduce((total, rows) => total + rows.length, 0)
: 0
},
dataSourceStatus: {
enabled: input.v2Enabled,
hydratedFilings: statuses.ready,
partialFilings: statuses.partial,
failedFilings: statuses.failed,
pendingFilings: Math.max(0, financialFilings.length - statuses.ready - statuses.partial - statuses.failed),
queuedSync: input.queuedSync
},
dimensionBreakdown: rowResult.dimensions
};
includeDimensions: input.includeDimensions,
includeFacts: input.includeFacts ?? false,
factsCursor: input.factsCursor,
factsLimit: input.factsLimit,
cursor: input.cursor,
limit: input.limit,
v3Enabled: input.v3Enabled ?? input.v2Enabled ?? true,
queuedSync: input.queuedSync
});
}
export { defaultFinancialSyncLimit };
export const __financialStatementsInternals = {
buildPeriods,
buildRows,
defaultFinancialSyncLimit
};

View File

@@ -0,0 +1,142 @@
import { describe, expect, it } from 'bun:test';
import { __financialTaxonomyInternals } from './financial-taxonomy';
import type { FilingTaxonomySnapshotRecord } from './repos/filing-taxonomy';
import type { FinancialStatementKind, TaxonomyStatementRow } from '@/lib/types';
function createRow(periodIds: string[]): TaxonomyStatementRow {
return {
key: 'us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax',
label: 'Revenue From Contract With Customer Excluding Assessed Tax',
conceptKey: 'us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax',
qname: 'us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax',
namespaceUri: 'http://fasb.org/us-gaap/2021-01-31',
localName: 'RevenueFromContractWithCustomerExcludingAssessedTax',
isExtension: false,
statement: 'income',
roleUri: 'income',
order: 1,
depth: 0,
parentKey: null,
values: Object.fromEntries(periodIds.map((periodId, index) => [periodId, 100 + index])),
units: Object.fromEntries(periodIds.map((periodId) => [periodId, 'iso4217:USD'])),
hasDimensions: false,
sourceFactIds: periodIds.map((_, index) => index + 1)
};
}
function createSnapshot(input: {
filingId: number;
filingType: '10-K' | '10-Q';
filingDate: string;
periods: Array<{
id: string;
periodStart: string | null;
periodEnd: string;
periodLabel: string;
}>;
statement: FinancialStatementKind;
}) {
const row = createRow(input.periods.map((period) => period.id));
return {
id: input.filingId,
filing_id: input.filingId,
ticker: 'MSFT',
filing_date: input.filingDate,
filing_type: input.filingType,
parse_status: 'ready',
parse_error: null,
source: 'xbrl_instance',
periods: input.periods.map((period) => ({
id: period.id,
filingId: input.filingId,
accessionNumber: `0000-${input.filingId}`,
filingDate: input.filingDate,
periodStart: period.periodStart,
periodEnd: period.periodEnd,
filingType: input.filingType,
periodLabel: period.periodLabel
})),
statement_rows: {
income: input.statement === 'income' ? [row] : [],
balance: input.statement === 'balance' ? [{ ...row, statement: 'balance' }] : [],
cash_flow: [],
equity: [],
comprehensive_income: []
},
derived_metrics: null,
validation_result: null,
facts_count: 0,
concepts_count: 0,
dimensions_count: 0,
created_at: input.filingDate,
updated_at: input.filingDate
} satisfies FilingTaxonomySnapshotRecord;
}
describe('financial taxonomy internals', () => {
it('selects the primary quarter duration for 10-Q income statements', () => {
const snapshot = createSnapshot({
filingId: 1,
filingType: '10-Q',
filingDate: '2026-01-28',
statement: 'income',
periods: [
{ id: 'instant', periodStart: null, periodEnd: '2025-12-31', periodLabel: 'Instant' },
{ id: 'quarter', periodStart: '2025-10-01', periodEnd: '2025-12-31', periodLabel: '2025-10-01 to 2025-12-31' },
{ id: 'ytd', periodStart: '2025-07-01', periodEnd: '2025-12-31', periodLabel: '2025-07-01 to 2025-12-31' }
]
});
const selection = __financialTaxonomyInternals.selectPrimaryPeriods([snapshot], 'income');
expect(selection.periods).toHaveLength(1);
expect(selection.periods[0]?.id).toBe('quarter');
});
it('selects the latest instant for balance sheets', () => {
const snapshot = createSnapshot({
filingId: 2,
filingType: '10-K',
filingDate: '2025-07-30',
statement: 'balance',
periods: [
{ id: 'prior', periodStart: null, periodEnd: '2024-06-30', periodLabel: 'Instant' },
{ id: 'current', periodStart: null, periodEnd: '2025-06-30', periodLabel: 'Instant' }
]
});
const selection = __financialTaxonomyInternals.selectPrimaryPeriods([snapshot], 'balance');
expect(selection.periods).toHaveLength(1);
expect(selection.periods[0]?.id).toBe('current');
});
it('builds one reporting period per filing for the selected statement', () => {
const annual = createSnapshot({
filingId: 10,
filingType: '10-K',
filingDate: '2025-07-30',
statement: 'income',
periods: [
{ id: 'annual', periodStart: '2024-07-01', periodEnd: '2025-06-30', periodLabel: '2024-07-01 to 2025-06-30' },
{ id: 'quarter', periodStart: '2025-04-01', periodEnd: '2025-06-30', periodLabel: '2025-04-01 to 2025-06-30' }
]
});
const quarterly = createSnapshot({
filingId: 11,
filingType: '10-Q',
filingDate: '2025-10-29',
statement: 'income',
periods: [
{ id: 'instant', periodStart: null, periodEnd: '2025-09-30', periodLabel: 'Instant' },
{ id: 'quarter', periodStart: '2025-07-01', periodEnd: '2025-09-30', periodLabel: '2025-07-01 to 2025-09-30' },
{ id: 'ytd', periodStart: '2025-01-01', periodEnd: '2025-09-30', periodLabel: '2025-01-01 to 2025-09-30' }
]
});
const periods = __financialTaxonomyInternals.buildPeriods([annual, quarterly], 'income');
expect(periods.map((period) => period.id)).toEqual(['annual', 'quarter']);
});
});

View File

@@ -0,0 +1,384 @@
import type {
CompanyFinancialStatementsResponse,
DimensionBreakdownRow,
FinancialHistoryWindow,
FinancialStatementKind,
FinancialStatementPeriod,
TaxonomyStatementRow
} from '@/lib/types';
import { listFilingsRecords } from '@/lib/server/repos/filings';
import {
countFilingTaxonomySnapshotStatuses,
listFilingTaxonomySnapshotsByTicker,
listTaxonomyFactsByTicker,
type FilingTaxonomySnapshotRecord
} from '@/lib/server/repos/filing-taxonomy';
type GetCompanyFinancialTaxonomyInput = {
ticker: string;
statement: FinancialStatementKind;
window: FinancialHistoryWindow;
includeDimensions: boolean;
includeFacts: boolean;
factsCursor?: string | null;
factsLimit?: number;
cursor?: string | null;
limit?: number;
v3Enabled: boolean;
queuedSync: boolean;
};
function safeTicker(input: string) {
return input.trim().toUpperCase();
}
function isFinancialForm(type: string): type is '10-K' | '10-Q' {
return type === '10-K' || type === '10-Q';
}
function parseEpoch(value: string | null) {
if (!value) {
return Number.NaN;
}
return Date.parse(value);
}
function periodSorter(left: FinancialStatementPeriod, right: FinancialStatementPeriod) {
const leftDate = parseEpoch(left.periodEnd ?? left.filingDate);
const rightDate = parseEpoch(right.periodEnd ?? right.filingDate);
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
return leftDate - rightDate;
}
return left.id.localeCompare(right.id);
}
function isInstantPeriod(period: FinancialStatementPeriod) {
return period.periodStart === null;
}
function periodDurationDays(period: FinancialStatementPeriod) {
if (!period.periodStart || !period.periodEnd) {
return null;
}
const start = Date.parse(period.periodStart);
const end = Date.parse(period.periodEnd);
if (!Number.isFinite(start) || !Number.isFinite(end) || end < start) {
return null;
}
return Math.round((end - start) / 86_400_000) + 1;
}
function preferredDurationDays(filingType: FinancialStatementPeriod['filingType']) {
return filingType === '10-K' ? 365 : 90;
}
function selectPrimaryPeriods(
snapshots: FilingTaxonomySnapshotRecord[],
statement: FinancialStatementKind
) {
const selectedByFilingId = new Map<number, FinancialStatementPeriod>();
for (const snapshot of snapshots) {
const rows = snapshot.statement_rows?.[statement] ?? [];
if (rows.length === 0) {
continue;
}
const usedPeriodIds = new Set<string>();
for (const row of rows) {
for (const periodId of Object.keys(row.values)) {
usedPeriodIds.add(periodId);
}
}
const candidates = (snapshot.periods ?? []).filter((period) => usedPeriodIds.has(period.id));
if (candidates.length === 0) {
continue;
}
const selected = (() => {
if (statement === 'balance') {
const instantCandidates = candidates.filter(isInstantPeriod);
return (instantCandidates.length > 0 ? instantCandidates : candidates)
.sort((left, right) => periodSorter(right, left))[0] ?? null;
}
const durationCandidates = candidates.filter((period) => !isInstantPeriod(period));
if (durationCandidates.length === 0) {
return candidates.sort((left, right) => periodSorter(right, left))[0] ?? null;
}
const targetDays = preferredDurationDays(snapshot.filing_type);
return durationCandidates.sort((left, right) => {
const leftDate = parseEpoch(left.periodEnd ?? left.filingDate);
const rightDate = parseEpoch(right.periodEnd ?? right.filingDate);
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
return rightDate - leftDate;
}
const leftDistance = Math.abs((periodDurationDays(left) ?? targetDays) - targetDays);
const rightDistance = Math.abs((periodDurationDays(right) ?? targetDays) - targetDays);
if (leftDistance !== rightDistance) {
return leftDistance - rightDistance;
}
return left.id.localeCompare(right.id);
})[0] ?? null;
})();
if (selected) {
selectedByFilingId.set(selected.filingId, selected);
}
}
const periods = [...selectedByFilingId.values()].sort(periodSorter);
return {
periods,
selectedPeriodIds: new Set(periods.map((period) => period.id)),
periodByFilingId: new Map(periods.map((period) => [period.filingId, period]))
};
}
function buildPeriods(
snapshots: FilingTaxonomySnapshotRecord[],
statement: FinancialStatementKind
) {
return selectPrimaryPeriods(snapshots, statement).periods;
}
function buildRows(
snapshots: FilingTaxonomySnapshotRecord[],
statement: FinancialStatementKind,
selectedPeriodIds: Set<string>
) {
const rowMap = new Map<string, TaxonomyStatementRow>();
for (const snapshot of snapshots) {
const rows = snapshot.statement_rows?.[statement] ?? [];
for (const row of rows) {
const existing = rowMap.get(row.key);
if (!existing) {
rowMap.set(row.key, {
...row,
values: Object.fromEntries(
Object.entries(row.values).filter(([periodId]) => selectedPeriodIds.has(periodId))
),
units: Object.fromEntries(
Object.entries(row.units).filter(([periodId]) => selectedPeriodIds.has(periodId))
),
sourceFactIds: [...row.sourceFactIds]
});
if (Object.keys(rowMap.get(row.key)?.values ?? {}).length === 0) {
rowMap.delete(row.key);
}
continue;
}
existing.hasDimensions = existing.hasDimensions || row.hasDimensions;
existing.order = Math.min(existing.order, row.order);
existing.depth = Math.min(existing.depth, row.depth);
if (!existing.parentKey && row.parentKey) {
existing.parentKey = row.parentKey;
}
for (const [periodId, value] of Object.entries(row.values)) {
if (selectedPeriodIds.has(periodId) && !(periodId in existing.values)) {
existing.values[periodId] = value;
}
}
for (const [periodId, unit] of Object.entries(row.units)) {
if (selectedPeriodIds.has(periodId) && !(periodId in existing.units)) {
existing.units[periodId] = unit;
}
}
for (const factId of row.sourceFactIds) {
if (!existing.sourceFactIds.includes(factId)) {
existing.sourceFactIds.push(factId);
}
}
}
}
return [...rowMap.values()].sort((left, right) => {
if (left.order !== right.order) {
return left.order - right.order;
}
return left.label.localeCompare(right.label);
});
}
function buildDimensionBreakdown(
facts: Awaited<ReturnType<typeof listTaxonomyFactsByTicker>>['facts'],
periods: FinancialStatementPeriod[]
) {
const periodByFilingId = new Map<number, FinancialStatementPeriod>();
for (const period of periods) {
periodByFilingId.set(period.filingId, period);
}
const map = new Map<string, DimensionBreakdownRow[]>();
for (const fact of facts) {
if (fact.dimensions.length === 0) {
continue;
}
const period = periodByFilingId.get(fact.filingId) ?? null;
if (!period) {
continue;
}
const matchesPeriod = period.periodStart
? fact.periodStart === period.periodStart && fact.periodEnd === period.periodEnd
: (fact.periodInstant ?? fact.periodEnd) === period.periodEnd;
if (!matchesPeriod) {
continue;
}
for (const dimension of fact.dimensions) {
const row: DimensionBreakdownRow = {
rowKey: fact.conceptKey,
concept: fact.qname,
periodId: period.id,
axis: dimension.axis,
member: dimension.member,
value: fact.value,
unit: fact.unit
};
const existing = map.get(fact.conceptKey);
if (existing) {
existing.push(row);
} else {
map.set(fact.conceptKey, [row]);
}
}
}
return map.size > 0 ? Object.fromEntries(map.entries()) : null;
}
function latestMetrics(snapshots: FilingTaxonomySnapshotRecord[]) {
for (const snapshot of snapshots) {
if (snapshot.derived_metrics) {
return {
taxonomy: snapshot.derived_metrics,
validation: snapshot.validation_result
};
}
}
return {
taxonomy: null,
validation: null
};
}
export function defaultFinancialSyncLimit(window: FinancialHistoryWindow) {
return window === 'all' ? 120 : 60;
}
export async function getCompanyFinancialTaxonomy(input: GetCompanyFinancialTaxonomyInput): Promise<CompanyFinancialStatementsResponse> {
const ticker = safeTicker(input.ticker);
const snapshotResult = await listFilingTaxonomySnapshotsByTicker({
ticker,
window: input.window,
limit: input.limit,
cursor: input.cursor
});
const statuses = await countFilingTaxonomySnapshotStatuses(ticker);
const filings = await listFilingsRecords({
ticker,
limit: input.window === 'all' ? 250 : 120
});
const financialFilings = filings.filter((filing) => isFinancialForm(filing.filing_type));
const selection = selectPrimaryPeriods(snapshotResult.snapshots, input.statement);
const periods = selection.periods;
const rows = buildRows(snapshotResult.snapshots, input.statement, selection.selectedPeriodIds);
const factsResult = input.includeFacts
? await listTaxonomyFactsByTicker({
ticker,
window: input.window,
statement: input.statement,
cursor: input.factsCursor,
limit: input.factsLimit
})
: { facts: [], nextCursor: null };
const dimensionFacts = input.includeDimensions
? await listTaxonomyFactsByTicker({
ticker,
window: input.window,
statement: input.statement,
limit: 1200
})
: { facts: [], nextCursor: null };
const latestFiling = filings[0] ?? null;
const metrics = latestMetrics(snapshotResult.snapshots);
const dimensionBreakdown = input.includeDimensions
? buildDimensionBreakdown(dimensionFacts.facts, periods)
: null;
const dimensionsCount = dimensionBreakdown
? Object.values(dimensionBreakdown).reduce((total, entries) => total + entries.length, 0)
: 0;
const factsCoverage = input.includeFacts
? factsResult.facts.length
: snapshotResult.snapshots.reduce((total, snapshot) => total + snapshot.facts_count, 0);
return {
company: {
ticker,
companyName: latestFiling?.company_name ?? ticker,
cik: latestFiling?.cik ?? null
},
statement: input.statement,
window: input.window,
periods,
rows,
nextCursor: snapshotResult.nextCursor,
facts: input.includeFacts
? {
rows: factsResult.facts,
nextCursor: factsResult.nextCursor
}
: null,
coverage: {
filings: periods.length,
rows: rows.length,
dimensions: dimensionsCount,
facts: factsCoverage
},
dataSourceStatus: {
enabled: input.v3Enabled,
hydratedFilings: statuses.ready,
partialFilings: statuses.partial,
failedFilings: statuses.failed,
pendingFilings: Math.max(0, financialFilings.length - statuses.ready - statuses.partial - statuses.failed),
queuedSync: input.queuedSync
},
metrics,
dimensionBreakdown
};
}
export const __financialTaxonomyInternals = {
buildPeriods,
isInstantPeriod,
periodDurationDays,
selectPrimaryPeriods
};

View File

@@ -16,6 +16,7 @@ export type FilingStatementSnapshotPeriod = {
filingId: number;
accessionNumber: string;
filingDate: string;
periodStart: string | null;
periodEnd: string | null;
filingType: '10-K' | '10-Q';
periodLabel: string;

View File

@@ -0,0 +1,676 @@
import { and, desc, eq, gte, inArray, lt, sql } from 'drizzle-orm';
import type { Filing, FinancialStatementKind, MetricValidationResult, TaxonomyDimensionMember, TaxonomyFactRow, TaxonomyStatementRow } from '@/lib/types';
import { db } from '@/lib/server/db';
import {
filingTaxonomyAsset,
filingTaxonomyConcept,
filingTaxonomyFact,
filingTaxonomyMetricValidation,
filingTaxonomySnapshot
} from '@/lib/server/db/schema';
export type FilingTaxonomyParseStatus = 'ready' | 'partial' | 'failed';
export type FilingTaxonomySource = 'xbrl_instance' | 'xbrl_instance_with_linkbase' | 'legacy_html_fallback';
export type FilingTaxonomyAssetType =
| 'instance'
| 'schema'
| 'presentation'
| 'label'
| 'calculation'
| 'definition'
| 'pdf'
| 'other';
export type FilingTaxonomyPeriod = {
id: string;
filingId: number;
accessionNumber: string;
filingDate: string;
periodStart: string | null;
periodEnd: string | null;
filingType: '10-K' | '10-Q';
periodLabel: string;
};
export type FilingTaxonomySnapshotRecord = {
id: number;
filing_id: number;
ticker: string;
filing_date: string;
filing_type: '10-K' | '10-Q';
parse_status: FilingTaxonomyParseStatus;
parse_error: string | null;
source: FilingTaxonomySource;
periods: FilingTaxonomyPeriod[];
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
derived_metrics: Filing['metrics'];
validation_result: MetricValidationResult | null;
facts_count: number;
concepts_count: number;
dimensions_count: number;
created_at: string;
updated_at: string;
};
export type FilingTaxonomyAssetRecord = {
id: number;
snapshot_id: number;
asset_type: FilingTaxonomyAssetType;
name: string;
url: string;
size_bytes: number | null;
score: number | null;
is_selected: boolean;
created_at: string;
};
export type FilingTaxonomyConceptRecord = {
id: number;
snapshot_id: number;
concept_key: string;
qname: string;
namespace_uri: string;
local_name: string;
label: string | null;
is_extension: boolean;
statement_kind: FinancialStatementKind | null;
role_uri: string | null;
presentation_order: number | null;
presentation_depth: number | null;
parent_concept_key: string | null;
is_abstract: boolean;
created_at: string;
};
export type FilingTaxonomyFactRecord = {
id: number;
snapshot_id: number;
concept_key: string;
qname: string;
namespace_uri: string;
local_name: string;
statement_kind: FinancialStatementKind | null;
role_uri: string | null;
context_id: string;
unit: string | null;
decimals: string | null;
value_num: number;
period_start: string | null;
period_end: string | null;
period_instant: string | null;
dimensions: TaxonomyDimensionMember[];
is_dimensionless: boolean;
source_file: string | null;
created_at: string;
};
export type FilingTaxonomyMetricValidationRecord = {
id: number;
snapshot_id: number;
metric_key: keyof NonNullable<Filing['metrics']>;
taxonomy_value: number | null;
llm_value: number | null;
absolute_diff: number | null;
relative_diff: number | null;
status: 'not_run' | 'matched' | 'mismatch' | 'error';
evidence_pages: number[];
pdf_url: string | null;
provider: string | null;
model: string | null;
error: string | null;
created_at: string;
updated_at: string;
};
export type UpsertFilingTaxonomySnapshotInput = {
filing_id: number;
ticker: string;
filing_date: string;
filing_type: '10-K' | '10-Q';
parse_status: FilingTaxonomyParseStatus;
parse_error: string | null;
source: FilingTaxonomySource;
periods: FilingTaxonomyPeriod[];
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
derived_metrics: Filing['metrics'];
validation_result: MetricValidationResult | null;
facts_count: number;
concepts_count: number;
dimensions_count: number;
assets: Array<{
asset_type: FilingTaxonomyAssetType;
name: string;
url: string;
size_bytes: number | null;
score: number | null;
is_selected: boolean;
}>;
concepts: Array<{
concept_key: string;
qname: string;
namespace_uri: string;
local_name: string;
label: string | null;
is_extension: boolean;
statement_kind: FinancialStatementKind | null;
role_uri: string | null;
presentation_order: number | null;
presentation_depth: number | null;
parent_concept_key: string | null;
is_abstract: boolean;
}>;
facts: Array<{
concept_key: string;
qname: string;
namespace_uri: string;
local_name: string;
statement_kind: FinancialStatementKind | null;
role_uri: string | null;
context_id: string;
unit: string | null;
decimals: string | null;
value_num: number;
period_start: string | null;
period_end: string | null;
period_instant: string | null;
dimensions: TaxonomyDimensionMember[];
is_dimensionless: boolean;
source_file: string | null;
}>;
metric_validations: Array<{
metric_key: keyof NonNullable<Filing['metrics']>;
taxonomy_value: number | null;
llm_value: number | null;
absolute_diff: number | null;
relative_diff: number | null;
status: 'not_run' | 'matched' | 'mismatch' | 'error';
evidence_pages: number[];
pdf_url: string | null;
provider: string | null;
model: string | null;
error: string | null;
}>;
};
function tenYearsAgoIso() {
const date = new Date();
date.setUTCFullYear(date.getUTCFullYear() - 10);
return date.toISOString().slice(0, 10);
}
function asNumber(value: unknown) {
if (typeof value === 'number') {
return Number.isFinite(value) ? value : null;
}
if (typeof value === 'string') {
const parsed = Number(value);
return Number.isFinite(parsed) ? parsed : null;
}
return null;
}
function asNumericText(value: number | null) {
if (value === null || !Number.isFinite(value)) {
return null;
}
return String(value);
}
function emptyStatementRows(): Record<FinancialStatementKind, TaxonomyStatementRow[]> {
return {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
};
}
function toSnapshotRecord(row: typeof filingTaxonomySnapshot.$inferSelect): FilingTaxonomySnapshotRecord {
return {
id: row.id,
filing_id: row.filing_id,
ticker: row.ticker,
filing_date: row.filing_date,
filing_type: row.filing_type,
parse_status: row.parse_status,
parse_error: row.parse_error,
source: row.source,
periods: row.periods ?? [],
statement_rows: row.statement_rows ?? emptyStatementRows(),
derived_metrics: row.derived_metrics ?? null,
validation_result: row.validation_result ?? null,
facts_count: row.facts_count,
concepts_count: row.concepts_count,
dimensions_count: row.dimensions_count,
created_at: row.created_at,
updated_at: row.updated_at
};
}
function toAssetRecord(row: typeof filingTaxonomyAsset.$inferSelect): FilingTaxonomyAssetRecord {
return {
id: row.id,
snapshot_id: row.snapshot_id,
asset_type: row.asset_type,
name: row.name,
url: row.url,
size_bytes: row.size_bytes,
score: asNumber(row.score),
is_selected: row.is_selected,
created_at: row.created_at
};
}
function toConceptRecord(row: typeof filingTaxonomyConcept.$inferSelect): FilingTaxonomyConceptRecord {
return {
id: row.id,
snapshot_id: row.snapshot_id,
concept_key: row.concept_key,
qname: row.qname,
namespace_uri: row.namespace_uri,
local_name: row.local_name,
label: row.label,
is_extension: row.is_extension,
statement_kind: row.statement_kind ?? null,
role_uri: row.role_uri,
presentation_order: asNumber(row.presentation_order),
presentation_depth: row.presentation_depth,
parent_concept_key: row.parent_concept_key,
is_abstract: row.is_abstract,
created_at: row.created_at
};
}
function toFactRecord(row: typeof filingTaxonomyFact.$inferSelect): FilingTaxonomyFactRecord {
const value = asNumber(row.value_num);
if (value === null) {
throw new Error(`Invalid value_num for taxonomy fact row ${row.id}`);
}
return {
id: row.id,
snapshot_id: row.snapshot_id,
concept_key: row.concept_key,
qname: row.qname,
namespace_uri: row.namespace_uri,
local_name: row.local_name,
statement_kind: row.statement_kind ?? null,
role_uri: row.role_uri,
context_id: row.context_id,
unit: row.unit,
decimals: row.decimals,
value_num: value,
period_start: row.period_start,
period_end: row.period_end,
period_instant: row.period_instant,
dimensions: row.dimensions,
is_dimensionless: row.is_dimensionless,
source_file: row.source_file,
created_at: row.created_at
};
}
function toMetricValidationRecord(row: typeof filingTaxonomyMetricValidation.$inferSelect): FilingTaxonomyMetricValidationRecord {
return {
id: row.id,
snapshot_id: row.snapshot_id,
metric_key: row.metric_key,
taxonomy_value: asNumber(row.taxonomy_value),
llm_value: asNumber(row.llm_value),
absolute_diff: asNumber(row.absolute_diff),
relative_diff: asNumber(row.relative_diff),
status: row.status,
evidence_pages: row.evidence_pages ?? [],
pdf_url: row.pdf_url,
provider: row.provider,
model: row.model,
error: row.error,
created_at: row.created_at,
updated_at: row.updated_at
};
}
export async function getFilingTaxonomySnapshotByFilingId(filingId: number) {
const [row] = await db
.select()
.from(filingTaxonomySnapshot)
.where(eq(filingTaxonomySnapshot.filing_id, filingId))
.limit(1);
return row ? toSnapshotRecord(row) : null;
}
export async function listFilingTaxonomyAssets(snapshotId: number) {
const rows = await db
.select()
.from(filingTaxonomyAsset)
.where(eq(filingTaxonomyAsset.snapshot_id, snapshotId))
.orderBy(desc(filingTaxonomyAsset.id));
return rows.map(toAssetRecord);
}
export async function listFilingTaxonomyConcepts(snapshotId: number) {
const rows = await db
.select()
.from(filingTaxonomyConcept)
.where(eq(filingTaxonomyConcept.snapshot_id, snapshotId))
.orderBy(desc(filingTaxonomyConcept.id));
return rows.map(toConceptRecord);
}
export async function listFilingTaxonomyFacts(snapshotId: number) {
const rows = await db
.select()
.from(filingTaxonomyFact)
.where(eq(filingTaxonomyFact.snapshot_id, snapshotId))
.orderBy(desc(filingTaxonomyFact.id));
return rows.map(toFactRecord);
}
export async function listFilingTaxonomyMetricValidations(snapshotId: number) {
const rows = await db
.select()
.from(filingTaxonomyMetricValidation)
.where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId))
.orderBy(desc(filingTaxonomyMetricValidation.id));
return rows.map(toMetricValidationRecord);
}
export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySnapshotInput) {
const now = new Date().toISOString();
const [saved] = await db
.insert(filingTaxonomySnapshot)
.values({
filing_id: input.filing_id,
ticker: input.ticker,
filing_date: input.filing_date,
filing_type: input.filing_type,
parse_status: input.parse_status,
parse_error: input.parse_error,
source: input.source,
periods: input.periods,
statement_rows: input.statement_rows,
derived_metrics: input.derived_metrics,
validation_result: input.validation_result,
facts_count: input.facts_count,
concepts_count: input.concepts_count,
dimensions_count: input.dimensions_count,
created_at: now,
updated_at: now
})
.onConflictDoUpdate({
target: filingTaxonomySnapshot.filing_id,
set: {
ticker: input.ticker,
filing_date: input.filing_date,
filing_type: input.filing_type,
parse_status: input.parse_status,
parse_error: input.parse_error,
source: input.source,
periods: input.periods,
statement_rows: input.statement_rows,
derived_metrics: input.derived_metrics,
validation_result: input.validation_result,
facts_count: input.facts_count,
concepts_count: input.concepts_count,
dimensions_count: input.dimensions_count,
updated_at: now
}
})
.returning();
const snapshotId = saved.id;
await db.delete(filingTaxonomyAsset).where(eq(filingTaxonomyAsset.snapshot_id, snapshotId));
await db.delete(filingTaxonomyConcept).where(eq(filingTaxonomyConcept.snapshot_id, snapshotId));
await db.delete(filingTaxonomyFact).where(eq(filingTaxonomyFact.snapshot_id, snapshotId));
await db.delete(filingTaxonomyMetricValidation).where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId));
if (input.assets.length > 0) {
await db.insert(filingTaxonomyAsset).values(input.assets.map((asset) => ({
snapshot_id: snapshotId,
asset_type: asset.asset_type,
name: asset.name,
url: asset.url,
size_bytes: asset.size_bytes,
score: asNumericText(asset.score),
is_selected: asset.is_selected,
created_at: now
})));
}
if (input.concepts.length > 0) {
await db.insert(filingTaxonomyConcept).values(input.concepts.map((concept) => ({
snapshot_id: snapshotId,
concept_key: concept.concept_key,
qname: concept.qname,
namespace_uri: concept.namespace_uri,
local_name: concept.local_name,
label: concept.label,
is_extension: concept.is_extension,
statement_kind: concept.statement_kind,
role_uri: concept.role_uri,
presentation_order: asNumericText(concept.presentation_order),
presentation_depth: concept.presentation_depth,
parent_concept_key: concept.parent_concept_key,
is_abstract: concept.is_abstract,
created_at: now
})));
}
if (input.facts.length > 0) {
await db.insert(filingTaxonomyFact).values(input.facts.map((fact) => ({
snapshot_id: snapshotId,
concept_key: fact.concept_key,
qname: fact.qname,
namespace_uri: fact.namespace_uri,
local_name: fact.local_name,
statement_kind: fact.statement_kind,
role_uri: fact.role_uri,
context_id: fact.context_id,
unit: fact.unit,
decimals: fact.decimals,
value_num: String(fact.value_num),
period_start: fact.period_start,
period_end: fact.period_end,
period_instant: fact.period_instant,
dimensions: fact.dimensions,
is_dimensionless: fact.is_dimensionless,
source_file: fact.source_file,
created_at: now
})));
}
if (input.metric_validations.length > 0) {
await db.insert(filingTaxonomyMetricValidation).values(input.metric_validations.map((check) => ({
snapshot_id: snapshotId,
metric_key: check.metric_key,
taxonomy_value: asNumericText(check.taxonomy_value),
llm_value: asNumericText(check.llm_value),
absolute_diff: asNumericText(check.absolute_diff),
relative_diff: asNumericText(check.relative_diff),
status: check.status,
evidence_pages: check.evidence_pages,
pdf_url: check.pdf_url,
provider: check.provider,
model: check.model,
error: check.error,
created_at: now,
updated_at: now
})));
}
return toSnapshotRecord(saved);
}
export async function listFilingTaxonomySnapshotsByTicker(input: {
ticker: string;
window: '10y' | 'all';
limit?: number;
cursor?: string | null;
}) {
const safeLimit = Math.min(Math.max(Math.trunc(input.limit ?? 40), 1), 120);
const cursorId = input.cursor ? Number.parseInt(input.cursor, 10) : null;
const constraints = [eq(filingTaxonomySnapshot.ticker, input.ticker.trim().toUpperCase())];
if (input.window === '10y') {
constraints.push(gte(filingTaxonomySnapshot.filing_date, tenYearsAgoIso()));
}
if (cursorId && Number.isFinite(cursorId) && cursorId > 0) {
constraints.push(lt(filingTaxonomySnapshot.id, cursorId));
}
const rows = await db
.select()
.from(filingTaxonomySnapshot)
.where(and(...constraints))
.orderBy(desc(filingTaxonomySnapshot.filing_date), desc(filingTaxonomySnapshot.id))
.limit(safeLimit + 1);
const hasMore = rows.length > safeLimit;
const usedRows = hasMore ? rows.slice(0, safeLimit) : rows;
const nextCursor = hasMore
? String(usedRows[usedRows.length - 1]?.id ?? '')
: null;
return {
snapshots: usedRows.map(toSnapshotRecord),
nextCursor
};
}
export async function countFilingTaxonomySnapshotStatuses(ticker: string) {
const rows = await db
.select({
status: filingTaxonomySnapshot.parse_status,
count: sql<string>`count(*)`
})
.from(filingTaxonomySnapshot)
.where(eq(filingTaxonomySnapshot.ticker, ticker.trim().toUpperCase()))
.groupBy(filingTaxonomySnapshot.parse_status);
return rows.reduce<Record<FilingTaxonomyParseStatus, number>>((acc, row) => {
acc[row.status] = Number(row.count);
return acc;
}, {
ready: 0,
partial: 0,
failed: 0
});
}
export async function listTaxonomyFactsByTicker(input: {
ticker: string;
window: '10y' | 'all';
statement?: FinancialStatementKind;
cursor?: string | null;
limit?: number;
}) {
const safeLimit = Math.min(Math.max(Math.trunc(input.limit ?? 500), 1), 2000);
const cursorId = input.cursor ? Number.parseInt(input.cursor, 10) : null;
const conditions = [eq(filingTaxonomySnapshot.ticker, input.ticker.trim().toUpperCase())];
if (input.window === '10y') {
conditions.push(gte(filingTaxonomySnapshot.filing_date, tenYearsAgoIso()));
}
if (input.statement) {
conditions.push(eq(filingTaxonomyFact.statement_kind, input.statement));
}
if (cursorId && Number.isFinite(cursorId) && cursorId > 0) {
conditions.push(lt(filingTaxonomyFact.id, cursorId));
}
const rows = await db
.select({
id: filingTaxonomyFact.id,
snapshot_id: filingTaxonomyFact.snapshot_id,
filing_id: filingTaxonomySnapshot.filing_id,
filing_date: filingTaxonomySnapshot.filing_date,
statement_kind: filingTaxonomyFact.statement_kind,
role_uri: filingTaxonomyFact.role_uri,
concept_key: filingTaxonomyFact.concept_key,
qname: filingTaxonomyFact.qname,
namespace_uri: filingTaxonomyFact.namespace_uri,
local_name: filingTaxonomyFact.local_name,
value_num: filingTaxonomyFact.value_num,
context_id: filingTaxonomyFact.context_id,
unit: filingTaxonomyFact.unit,
decimals: filingTaxonomyFact.decimals,
period_start: filingTaxonomyFact.period_start,
period_end: filingTaxonomyFact.period_end,
period_instant: filingTaxonomyFact.period_instant,
dimensions: filingTaxonomyFact.dimensions,
is_dimensionless: filingTaxonomyFact.is_dimensionless,
source_file: filingTaxonomyFact.source_file
})
.from(filingTaxonomyFact)
.innerJoin(filingTaxonomySnapshot, eq(filingTaxonomyFact.snapshot_id, filingTaxonomySnapshot.id))
.where(and(...conditions))
.orderBy(desc(filingTaxonomyFact.id))
.limit(safeLimit + 1);
const hasMore = rows.length > safeLimit;
const used = hasMore ? rows.slice(0, safeLimit) : rows;
const nextCursor = hasMore ? String(used[used.length - 1]?.id ?? '') : null;
const facts: TaxonomyFactRow[] = used.map((row) => {
const value = asNumber(row.value_num);
if (value === null) {
throw new Error(`Invalid value_num in taxonomy fact ${row.id}`);
}
return {
id: row.id,
snapshotId: row.snapshot_id,
filingId: row.filing_id,
filingDate: row.filing_date,
statement: row.statement_kind,
roleUri: row.role_uri,
conceptKey: row.concept_key,
qname: row.qname,
namespaceUri: row.namespace_uri,
localName: row.local_name,
value,
contextId: row.context_id,
unit: row.unit,
decimals: row.decimals,
periodStart: row.period_start,
periodEnd: row.period_end,
periodInstant: row.period_instant,
dimensions: row.dimensions,
isDimensionless: row.is_dimensionless,
sourceFile: row.source_file
};
});
return {
facts,
nextCursor
};
}
export async function listTaxonomyAssetsBySnapshotIds(snapshotIds: number[]) {
if (snapshotIds.length === 0) {
return [];
}
const rows = await db
.select()
.from(filingTaxonomyAsset)
.where(inArray(filingTaxonomyAsset.snapshot_id, snapshotIds))
.orderBy(desc(filingTaxonomyAsset.id));
return rows.map(toAssetRecord);
}

View File

@@ -170,3 +170,19 @@ export async function saveFilingAnalysis(
return updated ? toFiling(updated) : null;
}
export async function updateFilingMetricsById(
filingId: number,
metrics: Filing['metrics']
) {
const [updated] = await db
.update(filing)
.set({
metrics,
updated_at: new Date().toISOString()
})
.where(eq(filing.id, filingId))
.returning();
return updated ? toFiling(updated) : null;
}

View File

@@ -1378,6 +1378,7 @@ export async function hydrateFilingStatementSnapshot(
filingId: input.filingId,
accessionNumber: input.accessionNumber,
filingDate: input.filingDate,
periodStart: null,
periodEnd: input.filingDate,
filingType: input.filingType,
periodLabel: input.filingType === '10-Q' ? 'Quarter End' : 'Fiscal Year End'

View File

@@ -13,12 +13,13 @@ import {
getFilingByAccession,
listFilingsRecords,
saveFilingAnalysis,
updateFilingMetricsById,
upsertFilingsRecords
} from '@/lib/server/repos/filings';
import {
getFilingStatementSnapshotByFilingId,
upsertFilingStatementSnapshot
} from '@/lib/server/repos/filing-statements';
getFilingTaxonomySnapshotByFilingId,
upsertFilingTaxonomySnapshot
} from '@/lib/server/repos/filing-taxonomy';
import {
applyRefreshedPrices,
listHoldingsForPriceRefresh,
@@ -27,11 +28,10 @@ import {
import { createPortfolioInsight } from '@/lib/server/repos/insights';
import { updateTaskStage } from '@/lib/server/repos/tasks';
import {
fetchFilingMetricsForFilings,
fetchPrimaryFilingText,
fetchRecentFilings,
hydrateFilingStatementSnapshot
fetchRecentFilings
} from '@/lib/server/sec';
import { hydrateFilingTaxonomySnapshot } from '@/lib/server/taxonomy/engine';
const EXTRACTION_REQUIRED_KEYS = [
'summary',
@@ -88,6 +88,10 @@ const COMPANY_SPECIFIC_PATTERNS = [
type FilingMetricKey = keyof NonNullable<Filing['metrics']>;
function isFinancialMetricsForm(filingType: string): filingType is '10-K' | '10-Q' {
return filingType === '10-K' || filingType === '10-Q';
}
const METRIC_CHECK_PATTERNS: Array<{
key: FilingMetricKey;
label: string;
@@ -120,10 +124,6 @@ const METRIC_CHECK_PATTERNS: Array<{
}
];
function isFinancialMetricsForm(form: Filing['filing_type']) {
return form === '10-K' || form === '10-Q';
}
function toTaskResult(value: unknown): Record<string, unknown> {
if (!value || typeof value !== 'object' || Array.isArray(value)) {
return { value };
@@ -565,40 +565,6 @@ async function processSyncFilings(task: Task) {
`Fetching up to ${limit} filings for ${ticker}${scopeLabel ? ` (${scopeLabel})` : ''}`
);
const filings = await fetchRecentFilings(ticker, limit);
const metricsByAccession = new Map<string, Filing['metrics']>();
const filingsByCik = new Map<string, typeof filings>();
for (const filing of filings) {
const group = filingsByCik.get(filing.cik);
if (group) {
group.push(filing);
continue;
}
filingsByCik.set(filing.cik, [filing]);
}
await setProjectionStage(task, 'sync.fetch_metrics', `Computing financial metrics for ${filings.length} filings`);
for (const [cik, filingsForCik] of filingsByCik) {
const filingsForFinancialMetrics = filingsForCik.filter((filing) => isFinancialMetricsForm(filing.filingType));
if (filingsForFinancialMetrics.length === 0) {
continue;
}
const metricsMap = await fetchFilingMetricsForFilings(
cik,
filingsForCik[0]?.ticker ?? ticker,
filingsForFinancialMetrics.map((filing) => ({
accessionNumber: filing.accessionNumber,
filingDate: filing.filingDate,
filingType: filing.filingType
}))
);
for (const [accessionNumber, metrics] of metricsMap.entries()) {
metricsByAccession.set(accessionNumber, metrics);
}
}
await setProjectionStage(task, 'sync.persist_filings', 'Persisting filings and links');
const saveResult = await upsertFilingsRecords(
@@ -612,24 +578,24 @@ async function processSyncFilings(task: Task) {
filing_url: filing.filingUrl,
submission_url: filing.submissionUrl,
primary_document: filing.primaryDocument,
metrics: metricsByAccession.get(filing.accessionNumber) ?? null,
metrics: null,
links: filingLinks(filing)
}))
);
let statementSnapshotsHydrated = 0;
let statementSnapshotsFailed = 0;
let taxonomySnapshotsHydrated = 0;
let taxonomySnapshotsFailed = 0;
const hydrateCandidates = (await listFilingsRecords({
ticker,
limit: Math.min(Math.max(limit * 3, 40), STATEMENT_HYDRATION_MAX_FILINGS)
}))
.filter((filing): filing is Filing & { filing_type: '10-K' | '10-Q' } => {
return filing.filing_type === '10-K' || filing.filing_type === '10-Q';
return isFinancialMetricsForm(filing.filing_type);
});
await setProjectionStage(task, 'sync.hydrate_statements', `Hydrating statement snapshots for ${hydrateCandidates.length} candidate filings`);
await setProjectionStage(task, 'sync.discover_assets', `Discovering taxonomy assets for ${hydrateCandidates.length} candidate filings`);
for (const filing of hydrateCandidates) {
const existingSnapshot = await getFilingStatementSnapshotByFilingId(filing.id);
const existingSnapshot = await getFilingTaxonomySnapshotByFilingId(filing.id);
const shouldRefresh = !existingSnapshot
|| Date.parse(existingSnapshot.updated_at) < Date.parse(filing.updated_at);
@@ -638,7 +604,8 @@ async function processSyncFilings(task: Task) {
}
try {
const snapshot = await hydrateFilingStatementSnapshot({
await setProjectionStage(task, 'sync.extract_taxonomy', `Extracting XBRL taxonomy for ${filing.accession_number}`);
const snapshot = await hydrateFilingTaxonomySnapshot({
filingId: filing.id,
ticker: filing.ticker,
cik: filing.cik,
@@ -646,27 +613,50 @@ async function processSyncFilings(task: Task) {
filingDate: filing.filing_date,
filingType: filing.filing_type,
filingUrl: filing.filing_url,
primaryDocument: filing.primary_document ?? null,
metrics: filing.metrics
primaryDocument: filing.primary_document ?? null
});
await upsertFilingStatementSnapshot(snapshot);
statementSnapshotsHydrated += 1;
await setProjectionStage(task, 'sync.normalize_taxonomy', `Materializing statements for ${filing.accession_number}`);
await setProjectionStage(task, 'sync.derive_metrics', `Deriving taxonomy metrics for ${filing.accession_number}`);
await setProjectionStage(task, 'sync.validate_pdf_metrics', `Validating metrics via PDF + LLM for ${filing.accession_number}`);
await setProjectionStage(task, 'sync.persist_taxonomy', `Persisting taxonomy snapshot for ${filing.accession_number}`);
await upsertFilingTaxonomySnapshot(snapshot);
await updateFilingMetricsById(filing.id, snapshot.derived_metrics);
taxonomySnapshotsHydrated += 1;
} catch (error) {
await upsertFilingStatementSnapshot({
const now = new Date().toISOString();
await upsertFilingTaxonomySnapshot({
filing_id: filing.id,
ticker: filing.ticker,
filing_date: filing.filing_date,
filing_type: filing.filing_type,
period_end: filing.filing_date,
statement_bundle: null,
standardized_bundle: null,
dimension_bundle: null,
parse_status: 'failed',
parse_error: error instanceof Error ? error.message : 'Statement hydration failed',
source: 'companyfacts_fallback'
parse_error: error instanceof Error ? error.message : 'Taxonomy hydration failed',
source: 'legacy_html_fallback',
periods: [],
statement_rows: {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
},
derived_metrics: filing.metrics ?? null,
validation_result: {
status: 'error',
checks: [],
validatedAt: now
},
facts_count: 0,
concepts_count: 0,
dimensions_count: 0,
assets: [],
concepts: [],
facts: [],
metric_validations: []
});
statementSnapshotsFailed += 1;
taxonomySnapshotsFailed += 1;
}
await Bun.sleep(STATEMENT_HYDRATION_DELAY_MS);
@@ -679,8 +669,8 @@ async function processSyncFilings(task: Task) {
fetched: filings.length,
inserted: saveResult.inserted,
updated: saveResult.updated,
statementSnapshotsHydrated,
statementSnapshotsFailed
taxonomySnapshotsHydrated,
taxonomySnapshotsFailed
};
}

View File

@@ -0,0 +1,73 @@
import { describe, expect, it } from 'bun:test';
import { discoverFilingAssets } from '@/lib/server/taxonomy/asset-discovery';
describe('taxonomy asset discovery', () => {
it('classifies assets and selects ranked instance/pdf candidates', async () => {
const fetchImpl = (async () => {
return new Response(JSON.stringify({
directory: {
item: [
{ name: 'abc_htm.xml', size: '900000' },
{ name: 'abc_pre.xml', size: '250000' },
{ name: 'abc_lab.xml', size: '120000' },
{ name: '10k_financial_statements.pdf', size: '400000' },
{ name: 'annual_report.pdf', size: '300000' },
{ name: 'quarter_statement.pdf', size: '200000' },
{ name: 'exhibit99.pdf', size: '500000' }
]
}
}), {
status: 200,
headers: {
'content-type': 'application/json'
}
});
}) as unknown as typeof fetch;
const result = await discoverFilingAssets({
cik: '0000123456',
accessionNumber: '0000123456-26-000001',
filingUrl: 'https://www.sec.gov/Archives/edgar/data/123456/000012345626000001/abc.htm',
primaryDocument: 'abc.htm',
fetchImpl
});
expect(result.directoryUrl).toBe('https://www.sec.gov/Archives/edgar/data/123456/000012345626000001/');
const selectedInstance = result.assets.find((asset) => asset.asset_type === 'instance' && asset.is_selected);
expect(selectedInstance?.name).toBe('abc_htm.xml');
const selectedPdfs = result.assets
.filter((asset) => asset.asset_type === 'pdf' && asset.is_selected)
.map((asset) => asset.name);
expect(selectedPdfs.length).toBe(3);
expect(selectedPdfs).toContain('10k_financial_statements.pdf');
expect(selectedPdfs).toContain('annual_report.pdf');
expect(selectedPdfs).toContain('quarter_statement.pdf');
expect(selectedPdfs).not.toContain('exhibit99.pdf');
});
it('falls back to filing url when SEC directory assets are unavailable', async () => {
const fetchImpl = (async () => {
return new Response('not found', { status: 404 });
}) as unknown as typeof fetch;
const result = await discoverFilingAssets({
cik: '0000123456',
accessionNumber: '0000123456-26-000001',
filingUrl: 'https://www.sec.gov/Archives/edgar/data/123456/000012345626000001/abc.xml',
primaryDocument: 'abc.xml',
fetchImpl
});
expect(result.assets.length).toBe(1);
expect(result.assets[0]).toEqual({
asset_type: 'instance',
name: 'abc.xml',
url: 'https://www.sec.gov/Archives/edgar/data/123456/000012345626000001/abc.xml',
size_bytes: null,
score: 6,
is_selected: true
});
});
});

View File

@@ -0,0 +1,283 @@
import type { TaxonomyAsset } from '@/lib/server/taxonomy/types';
type FilingAssetDiscoveryInput = {
cik: string;
accessionNumber: string;
filingUrl: string | null;
primaryDocument: string | null;
fetchImpl?: typeof fetch;
};
type FilingDirectoryJson = {
directory?: {
item?: Array<{
name?: string;
type?: string;
size?: string | number;
}>;
};
};
function envUserAgent() {
return process.env.SEC_USER_AGENT || 'Fiscal Clone <support@fiscal.local>';
}
function compactAccessionNumber(value: string) {
return value.replace(/-/g, '');
}
function normalizeCikForPath(value: string) {
const digits = value.replace(/\D/g, '');
if (!digits) {
return null;
}
const numeric = Number(digits);
if (!Number.isFinite(numeric)) {
return null;
}
return String(numeric);
}
function resolveFilingDirectoryUrl(input: {
filingUrl: string | null;
cik: string;
accessionNumber: string;
}) {
const direct = input.filingUrl?.trim();
if (direct) {
const lastSlash = direct.lastIndexOf('/');
if (lastSlash > 'https://'.length) {
return direct.slice(0, lastSlash + 1);
}
}
const cikPath = normalizeCikForPath(input.cik);
const accessionPath = compactAccessionNumber(input.accessionNumber);
if (!cikPath || !accessionPath) {
return null;
}
return `https://www.sec.gov/Archives/edgar/data/${cikPath}/${accessionPath}/`;
}
function classifyAssetType(name: string): TaxonomyAsset['asset_type'] {
const lower = name.toLowerCase();
if (lower.endsWith('.pdf')) {
return 'pdf';
}
if (lower.endsWith('.xsd')) {
return 'schema';
}
if (lower.endsWith('.xml')) {
if (/(_|-)pre\.xml$/.test(lower) || /presentation/.test(lower)) {
return 'presentation';
}
if (/(_|-)lab\.xml$/.test(lower) || /label/.test(lower)) {
return 'label';
}
if (/(_|-)cal\.xml$/.test(lower) || /calculation/.test(lower)) {
return 'calculation';
}
if (/(_|-)def\.xml$/.test(lower) || /definition/.test(lower)) {
return 'definition';
}
return 'instance';
}
return 'other';
}
function scorePdf(name: string, sizeBytes: number | null) {
const lower = name.toLowerCase();
let score = 0;
if (/financial|statement|annual|quarter|10k|10q/.test(lower)) {
score += 8;
}
if (/exhibit|ex-\d+/.test(lower)) {
score -= 2;
}
if (sizeBytes && sizeBytes > 100_000) {
score += 1;
}
return score;
}
function scoreInstance(name: string, primaryDocument: string | null) {
const lower = name.toLowerCase();
let score = 1;
if (/_htm\.xml$/.test(lower)) {
score += 4;
}
if (/_ins\.xml$/.test(lower)) {
score += 4;
}
const basePrimary = (primaryDocument ?? '').replace(/\.[a-z0-9]+$/i, '').toLowerCase();
if (basePrimary && lower.includes(basePrimary)) {
score += 5;
}
if (/cal|def|lab|pre/.test(lower)) {
score -= 3;
}
return score;
}
function parseSize(raw: unknown) {
if (typeof raw === 'number') {
return Number.isFinite(raw) ? raw : null;
}
if (typeof raw === 'string') {
const parsed = Number(raw);
return Number.isFinite(parsed) ? parsed : null;
}
return null;
}
async function fetchJson<T>(url: string, fetchImpl: typeof fetch): Promise<T> {
const response = await fetchImpl(url, {
headers: {
'User-Agent': envUserAgent(),
Accept: 'application/json'
},
cache: 'no-store'
});
if (!response.ok) {
throw new Error(`SEC request failed (${response.status})`);
}
return await response.json() as T;
}
export async function discoverFilingAssets(input: FilingAssetDiscoveryInput): Promise<{
directoryUrl: string | null;
assets: TaxonomyAsset[];
}> {
const fetchImpl = input.fetchImpl ?? fetch;
const directoryUrl = resolveFilingDirectoryUrl({
filingUrl: input.filingUrl,
cik: input.cik,
accessionNumber: input.accessionNumber
});
if (!directoryUrl) {
return {
directoryUrl: null,
assets: []
};
}
let payload: FilingDirectoryJson | null = null;
try {
payload = await fetchJson<FilingDirectoryJson>(`${directoryUrl}index.json`, fetchImpl);
} catch {
payload = null;
}
const discovered: TaxonomyAsset[] = [];
for (const item of payload?.directory?.item ?? []) {
const name = (item.name ?? '').trim();
if (!name) {
continue;
}
const url = `${directoryUrl}${name.replace(/^\/+/, '')}`;
const asset_type = classifyAssetType(name);
const size_bytes = parseSize(item.size);
discovered.push({
asset_type,
name,
url,
size_bytes,
score: null,
is_selected: false
});
}
if (discovered.length === 0 && input.filingUrl) {
const fallbackName = input.primaryDocument ?? input.filingUrl.split('/').pop() ?? 'primary_document';
discovered.push({
asset_type: fallbackName.toLowerCase().endsWith('.xml') ? 'instance' : 'other',
name: fallbackName,
url: input.filingUrl,
size_bytes: null,
score: null,
is_selected: true
});
}
const instanceCandidates = discovered
.filter((asset) => asset.asset_type === 'instance')
.map((asset) => ({
asset,
score: scoreInstance(asset.name, input.primaryDocument)
}))
.sort((a, b) => b.score - a.score);
const selectedInstanceUrl = instanceCandidates[0]?.asset.url ?? null;
const selectedPdfUrls = discovered
.filter((asset) => asset.asset_type === 'pdf')
.map((asset) => ({
asset,
score: scorePdf(asset.name, asset.size_bytes)
}))
.sort((a, b) => b.score - a.score)
.slice(0, 3)
.map((entry) => entry.asset.url);
const assets = discovered.map((asset) => {
if (asset.asset_type === 'instance') {
const score = scoreInstance(asset.name, input.primaryDocument);
return {
...asset,
score,
is_selected: asset.url === selectedInstanceUrl
};
}
if (asset.asset_type === 'pdf') {
const score = scorePdf(asset.name, asset.size_bytes);
return {
...asset,
score,
is_selected: selectedPdfUrls.includes(asset.url)
};
}
return {
...asset,
score: null,
is_selected: asset.asset_type === 'presentation'
|| asset.asset_type === 'label'
|| asset.asset_type === 'calculation'
|| asset.asset_type === 'definition'
|| asset.asset_type === 'schema'
};
});
return {
directoryUrl,
assets
};
}

View File

@@ -0,0 +1,185 @@
import type { FinancialStatementKind } from '@/lib/types';
import { discoverFilingAssets } from '@/lib/server/taxonomy/asset-discovery';
import { parseLabelLinkbase, parsePresentationLinkbase } from '@/lib/server/taxonomy/linkbase-parser';
import { deriveTaxonomyMetrics } from '@/lib/server/taxonomy/metrics';
import { materializeTaxonomyStatements } from '@/lib/server/taxonomy/materialize';
import { validateMetricsWithPdfLlm } from '@/lib/server/taxonomy/pdf-validation';
import type { TaxonomyHydrationInput, TaxonomyHydrationResult } from '@/lib/server/taxonomy/types';
import { parseXbrlInstance } from '@/lib/server/taxonomy/xbrl-parser';
function createStatementRecord<T>(factory: () => T): Record<FinancialStatementKind, T> {
return {
income: factory(),
balance: factory(),
cash_flow: factory(),
equity: factory(),
comprehensive_income: factory()
};
}
function envUserAgent() {
return process.env.SEC_USER_AGENT || 'Fiscal Clone <support@fiscal.local>';
}
async function fetchText(url: string, fetchImpl: typeof fetch) {
const response = await fetchImpl(url, {
headers: {
'User-Agent': envUserAgent(),
Accept: 'text/xml, text/plain, text/html;q=0.8, */*;q=0.5'
},
cache: 'no-store'
});
if (!response.ok) {
throw new Error(`SEC request failed (${response.status})`);
}
return await response.text();
}
export async function hydrateFilingTaxonomySnapshot(
input: TaxonomyHydrationInput,
options?: {
fetchImpl?: typeof fetch;
}
): Promise<TaxonomyHydrationResult> {
const fetchImpl = options?.fetchImpl ?? fetch;
const discovered = await discoverFilingAssets({
cik: input.cik,
accessionNumber: input.accessionNumber,
filingUrl: input.filingUrl,
primaryDocument: input.primaryDocument,
fetchImpl
});
const emptyResult: TaxonomyHydrationResult = {
filing_id: input.filingId,
ticker: input.ticker.trim().toUpperCase(),
filing_date: input.filingDate,
filing_type: input.filingType,
parse_status: 'failed',
parse_error: 'No XBRL instance found',
source: 'legacy_html_fallback',
periods: [],
statement_rows: createStatementRecord(() => []),
derived_metrics: null,
validation_result: {
status: 'not_run',
checks: [],
validatedAt: null
},
facts_count: 0,
concepts_count: 0,
dimensions_count: 0,
assets: discovered.assets,
concepts: [],
facts: [],
metric_validations: []
};
const selectedInstance = discovered.assets.find((asset) => asset.asset_type === 'instance' && asset.is_selected)
?? discovered.assets.find((asset) => asset.asset_type === 'instance')
?? null;
if (!selectedInstance) {
return emptyResult;
}
let parseError: string | null = null;
let source: TaxonomyHydrationResult['source'] = 'xbrl_instance';
let instanceText = '';
try {
instanceText = await fetchText(selectedInstance.url, fetchImpl);
} catch (error) {
parseError = error instanceof Error ? error.message : 'Unable to fetch instance file';
return {
...emptyResult,
parse_error: parseError
};
}
const parsedInstance = parseXbrlInstance(instanceText, selectedInstance.name);
const labelByConcept = new Map<string, string>();
const presentation: ReturnType<typeof parsePresentationLinkbase> = [];
for (const asset of discovered.assets) {
if (!asset.is_selected) {
continue;
}
if (asset.asset_type !== 'presentation' && asset.asset_type !== 'label') {
continue;
}
try {
const content = await fetchText(asset.url, fetchImpl);
if (asset.asset_type === 'presentation') {
const parsed = parsePresentationLinkbase(content);
if (parsed.length > 0) {
source = 'xbrl_instance_with_linkbase';
}
presentation.push(...parsed);
} else if (asset.asset_type === 'label') {
const parsed = parseLabelLinkbase(content);
for (const [conceptKey, label] of parsed.entries()) {
if (!labelByConcept.has(conceptKey)) {
labelByConcept.set(conceptKey, label);
}
}
}
} catch (error) {
parseError = parseError ?? (error instanceof Error ? error.message : 'Failed to parse taxonomy linkbase');
}
}
const materialized = materializeTaxonomyStatements({
filingId: input.filingId,
accessionNumber: input.accessionNumber,
filingDate: input.filingDate,
filingType: input.filingType,
facts: parsedInstance.facts,
presentation,
labelByConcept
});
const derivedMetrics = deriveTaxonomyMetrics(parsedInstance.facts);
const llmValidation = await validateMetricsWithPdfLlm({
metrics: derivedMetrics,
assets: discovered.assets,
fetchImpl
});
const hasRows = (Object.values(materialized.statement_rows).reduce((total, rows) => total + rows.length, 0)) > 0;
const hasFacts = materialized.facts.length > 0;
const parseStatus: TaxonomyHydrationResult['parse_status'] = hasRows && hasFacts
? 'ready'
: hasFacts
? 'partial'
: 'failed';
return {
filing_id: input.filingId,
ticker: input.ticker.trim().toUpperCase(),
filing_date: input.filingDate,
filing_type: input.filingType,
parse_status: parseStatus,
parse_error: parseStatus === 'failed' ? (parseError ?? 'No XBRL facts extracted') : parseError,
source,
periods: materialized.periods,
statement_rows: materialized.statement_rows,
derived_metrics: derivedMetrics,
validation_result: llmValidation.validation_result,
facts_count: materialized.facts.length,
concepts_count: materialized.concepts.length,
dimensions_count: materialized.dimensionsCount,
assets: discovered.assets,
concepts: materialized.concepts,
facts: materialized.facts,
metric_validations: llmValidation.metric_validations
};
}

View File

@@ -0,0 +1,63 @@
import { describe, expect, it } from 'bun:test';
import {
classifyStatementRole,
parseLabelLinkbase,
parsePresentationLinkbase
} from '@/lib/server/taxonomy/linkbase-parser';
const SAMPLE_LABEL_LINKBASE = `
<link:linkbase xmlns:link="http://www.xbrl.org/2003/linkbase"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:us-gaap="http://fasb.org/us-gaap/2024">
<link:labelLink xlink:type="extended">
<link:loc xlink:type="locator" xlink:label="loc_rev" xlink:href="test.xsd#us-gaap_Revenues" />
<link:label xlink:type="resource" xlink:label="lab_terse" xlink:role="http://www.xbrl.org/2003/role/terseLabel">Rev.</link:label>
<link:label xlink:type="resource" xlink:label="lab_label" xlink:role="http://www.xbrl.org/2003/role/label">Revenues</link:label>
<link:labelArc xlink:type="arc" xlink:from="loc_rev" xlink:to="lab_terse" />
<link:labelArc xlink:type="arc" xlink:from="loc_rev" xlink:to="lab_label" />
</link:labelLink>
</link:linkbase>
`;
const SAMPLE_PRESENTATION_LINKBASE = `
<link:linkbase xmlns:link="http://www.xbrl.org/2003/linkbase"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:us-gaap="http://fasb.org/us-gaap/2024">
<link:presentationLink xlink:type="extended" xlink:role="http://www.xbrl.org/2003/role/StatementOfOperations">
<link:loc xlink:type="locator" xlink:label="root" xlink:href="test.xsd#us-gaap_StatementLineItems" />
<link:loc xlink:type="locator" xlink:label="rev" xlink:href="test.xsd#us-gaap_Revenues" />
<link:loc xlink:type="locator" xlink:label="cogs" xlink:href="test.xsd#us-gaap_CostOfGoodsSold" />
<link:presentationArc xlink:type="arc" xlink:from="root" xlink:to="rev" order="1" />
<link:presentationArc xlink:type="arc" xlink:from="root" xlink:to="cogs" order="2" />
</link:presentationLink>
</link:linkbase>
`;
describe('linkbase parser', () => {
it('builds preferred labels from label linkbase', () => {
const labels = parseLabelLinkbase(SAMPLE_LABEL_LINKBASE);
expect(labels.get('http://fasb.org/us-gaap/2024#Revenues')).toBe('Revenues');
});
it('builds role trees with depth/order/parent metadata', () => {
const rows = parsePresentationLinkbase(SAMPLE_PRESENTATION_LINKBASE);
expect(rows.length).toBe(3);
const root = rows.find((row) => row.qname === 'us-gaap:StatementLineItems');
const revenue = rows.find((row) => row.qname === 'us-gaap:Revenues');
const cogs = rows.find((row) => row.qname === 'us-gaap:CostOfGoodsSold');
expect(root?.depth).toBe(0);
expect(root?.parentConceptKey).toBeNull();
expect(revenue?.depth).toBe(1);
expect(cogs?.depth).toBe(1);
expect(revenue?.parentConceptKey).toBe(root?.conceptKey ?? null);
expect(revenue?.order).toBeLessThan(cogs?.order ?? Number.POSITIVE_INFINITY);
});
it('classifies statement roles into canonical statement kinds', () => {
expect(classifyStatementRole('http://www.xbrl.org/2003/role/StatementOfOperations')).toBe('income');
expect(classifyStatementRole('http://www.xbrl.org/2003/role/StatementOfFinancialPosition')).toBe('balance');
expect(classifyStatementRole('http://www.xbrl.org/2003/role/StatementOfCashFlows')).toBe('cash_flow');
});
});

View File

@@ -0,0 +1,310 @@
import type { FinancialStatementKind } from '@/lib/types';
import type { TaxonomyNamespaceMap, TaxonomyPresentationConcept } from '@/lib/server/taxonomy/types';
function decodeXmlEntities(value: string) {
return value
.replace(/&amp;/gi, '&')
.replace(/&lt;/gi, '<')
.replace(/&gt;/gi, '>')
.replace(/&quot;/gi, '"')
.replace(/&#39;/gi, "'")
.replace(/&#160;|&nbsp;/gi, ' ')
.trim();
}
function parseNamespaceMap(raw: string): TaxonomyNamespaceMap {
const map: TaxonomyNamespaceMap = {};
const rootStart = raw.match(/<[^>]*linkbase[^>]*>/i)?.[0] ?? raw.slice(0, 1200);
for (const match of rootStart.matchAll(/xmlns:([a-zA-Z0-9_\-]+)=["']([^"']+)["']/g)) {
const prefix = (match[1] ?? '').trim();
const uri = (match[2] ?? '').trim();
if (!prefix || !uri) {
continue;
}
map[prefix] = uri;
}
return map;
}
function qnameFromHref(href: string) {
const fragment = href.includes('#') ? href.slice(href.indexOf('#') + 1) : href;
if (!fragment) {
return null;
}
const cleaned = fragment.trim().replace(/^loc_+/i, '');
if (!cleaned) {
return null;
}
if (cleaned.includes(':')) {
return cleaned;
}
if (cleaned.includes('_')) {
const idx = cleaned.indexOf('_');
return `${cleaned.slice(0, idx)}:${cleaned.slice(idx + 1)}`;
}
return null;
}
function conceptFromQName(qname: string, namespaces: TaxonomyNamespaceMap) {
const [prefix, ...rest] = qname.split(':');
const localName = rest.join(':');
if (!prefix || !localName) {
return null;
}
const namespaceUri = namespaces[prefix] ?? `urn:unknown:${prefix}`;
return {
qname,
namespaceUri,
localName,
conceptKey: `${namespaceUri}#${localName}`
};
}
function labelPriority(role: string | null) {
const normalized = (role ?? '').toLowerCase();
if (!normalized) {
return 0;
}
if (normalized.endsWith('/label')) {
return 4;
}
if (normalized.endsWith('/terselabel')) {
return 3;
}
if (normalized.endsWith('/verboselabel')) {
return 2;
}
return 1;
}
export function classifyStatementRole(roleUri: string): FinancialStatementKind | null {
const normalized = roleUri.toLowerCase();
if (/cash\s*flow|statementsof?cashflows|netcash/.test(normalized)) {
return 'cash_flow';
}
if (/shareholders?|stockholders?|equity|retainedearnings/.test(normalized)) {
return 'equity';
}
if (/comprehensive\s*income/.test(normalized)) {
return 'comprehensive_income';
}
if (/balance\s*sheet|financial\s*position|assets?andliabilities/.test(normalized)) {
return 'balance';
}
if (/operations|income\s*statement|statementsofincome|profit/.test(normalized)) {
return 'income';
}
return null;
}
export function parseLabelLinkbase(raw: string): Map<string, string> {
const namespaces = parseNamespaceMap(raw);
const preferredLabelByConcept = new Map<string, { text: string; priority: number }>();
const linkPattern = /<(?:[a-z0-9_\-]+:)?labelLink\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?labelLink>/gi;
for (const linkMatch of raw.matchAll(linkPattern)) {
const block = linkMatch[1] ?? '';
const locByLabel = new Map<string, string>();
const resourceByLabel = new Map<string, { text: string; role: string | null }>();
for (const locMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?loc\b([^>]*)\/?>/gi)) {
const attrs = locMatch[1] ?? '';
const label = attrs.match(/\bxlink:label=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
const href = attrs.match(/\bxlink:href=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
if (!label || !href) {
continue;
}
const qname = qnameFromHref(href);
if (!qname) {
continue;
}
const concept = conceptFromQName(qname, namespaces);
if (!concept) {
continue;
}
locByLabel.set(label, concept.conceptKey);
}
for (const resourceMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?label\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?label>/gi)) {
const attrs = resourceMatch[1] ?? '';
const body = decodeXmlEntities(resourceMatch[2] ?? '').replace(/\s+/g, ' ').trim();
if (!body) {
continue;
}
const resourceLabel = attrs.match(/\bxlink:label=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
const role = attrs.match(/\bxlink:role=["']([^"']+)["']/i)?.[1]?.trim() ?? null;
if (!resourceLabel) {
continue;
}
resourceByLabel.set(resourceLabel, {
text: body,
role
});
}
for (const arcMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?labelArc\b([^>]*)\/?>/gi)) {
const attrs = arcMatch[1] ?? '';
const from = attrs.match(/\bxlink:from=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
const to = attrs.match(/\bxlink:to=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
if (!from || !to) {
continue;
}
const conceptKey = locByLabel.get(from);
const resource = resourceByLabel.get(to);
if (!conceptKey || !resource) {
continue;
}
const priority = labelPriority(resource.role);
const current = preferredLabelByConcept.get(conceptKey);
if (!current || priority > current.priority) {
preferredLabelByConcept.set(conceptKey, {
text: resource.text,
priority
});
}
}
}
return new Map(
[...preferredLabelByConcept.entries()].map(([conceptKey, value]) => [conceptKey, value.text])
);
}
export function parsePresentationLinkbase(raw: string): TaxonomyPresentationConcept[] {
const namespaces = parseNamespaceMap(raw);
const rows: TaxonomyPresentationConcept[] = [];
const linkPattern = /<(?:[a-z0-9_\-]+:)?presentationLink\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?presentationLink>/gi;
for (const linkMatch of raw.matchAll(linkPattern)) {
const linkAttrs = linkMatch[1] ?? '';
const block = linkMatch[2] ?? '';
const roleUri = linkAttrs.match(/\bxlink:role=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
if (!roleUri) {
continue;
}
const locByLabel = new Map<string, { conceptKey: string; qname: string; isAbstract: boolean }>();
for (const locMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?loc\b([^>]*)\/?>/gi)) {
const attrs = locMatch[1] ?? '';
const label = attrs.match(/\bxlink:label=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
const href = attrs.match(/\bxlink:href=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
if (!label || !href) {
continue;
}
const qname = qnameFromHref(href);
if (!qname) {
continue;
}
const concept = conceptFromQName(qname, namespaces);
if (!concept) {
continue;
}
locByLabel.set(label, {
conceptKey: concept.conceptKey,
qname: concept.qname,
isAbstract: /abstract/i.test(concept.localName)
});
}
const childrenByLabel = new Map<string, Array<{ label: string; order: number }>>();
const incoming = new Set<string>();
const allReferenced = new Set<string>();
for (const arcMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?presentationArc\b([^>]*)\/?>/gi)) {
const attrs = arcMatch[1] ?? '';
const from = attrs.match(/\bxlink:from=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
const to = attrs.match(/\bxlink:to=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
const orderRaw = attrs.match(/\border=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
const order = Number.parseFloat(orderRaw);
if (!from || !to || !locByLabel.has(from) || !locByLabel.has(to)) {
continue;
}
const group = childrenByLabel.get(from) ?? [];
group.push({ label: to, order: Number.isFinite(order) ? order : group.length + 1 });
childrenByLabel.set(from, group);
incoming.add(to);
allReferenced.add(from);
allReferenced.add(to);
}
const roots = [...allReferenced].filter((label) => !incoming.has(label));
const visited = new Set<string>();
function dfs(label: string, depth: number, parentLabel: string | null, baseOrder: number) {
const node = locByLabel.get(label);
if (!node) {
return;
}
const pathKey = `${parentLabel ?? 'root'}::${label}::${depth}`;
if (visited.has(pathKey)) {
return;
}
visited.add(pathKey);
const parentConceptKey = parentLabel ? (locByLabel.get(parentLabel)?.conceptKey ?? null) : null;
rows.push({
conceptKey: node.conceptKey,
qname: node.qname,
roleUri,
order: baseOrder,
depth,
parentConceptKey,
isAbstract: node.isAbstract
});
const children = [...(childrenByLabel.get(label) ?? [])].sort((left, right) => left.order - right.order);
for (let i = 0; i < children.length; i += 1) {
const child = children[i];
if (!child) {
continue;
}
dfs(child.label, depth + 1, label, baseOrder + (i + 1) / 1000);
}
}
for (let i = 0; i < roots.length; i += 1) {
const root = roots[i];
if (!root) {
continue;
}
dfs(root, 0, null, i + 1);
}
}
return rows;
}

View File

@@ -0,0 +1,374 @@
import type { Filing, FinancialStatementKind, TaxonomyStatementRow } from '@/lib/types';
import type { TaxonomyConcept, TaxonomyFact, TaxonomyPresentationConcept } from '@/lib/server/taxonomy/types';
import type { FilingTaxonomyPeriod } from '@/lib/server/repos/filing-taxonomy';
import { classifyStatementRole } from '@/lib/server/taxonomy/linkbase-parser';
import { conceptStatementFallback } from '@/lib/server/taxonomy/xbrl-parser';
function compactAccessionNumber(value: string) {
return value.replace(/-/g, '');
}
function isUsGaapNamespace(namespaceUri: string) {
return /fasb\.org\/us-gaap/i.test(namespaceUri) || /us-gaap/i.test(namespaceUri);
}
function splitConceptKey(conceptKey: string) {
const index = conceptKey.lastIndexOf('#');
if (index < 0) {
return {
namespaceUri: 'urn:unknown',
localName: conceptKey
};
}
return {
namespaceUri: conceptKey.slice(0, index),
localName: conceptKey.slice(index + 1)
};
}
function localNameToLabel(localName: string) {
return localName
.replace(/([a-z0-9])([A-Z])/g, '$1 $2')
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
.replace(/_/g, ' ')
.trim();
}
function createStatementRecord<T>(factory: () => T): Record<FinancialStatementKind, T> {
return {
income: factory(),
balance: factory(),
cash_flow: factory(),
equity: factory(),
comprehensive_income: factory()
};
}
function periodSignature(fact: TaxonomyFact) {
const start = fact.periodStart ?? '';
const end = fact.periodEnd ?? '';
const instant = fact.periodInstant ?? '';
return `start:${start}|end:${end}|instant:${instant}`;
}
function periodDate(fact: TaxonomyFact, fallbackDate: string) {
return fact.periodEnd ?? fact.periodInstant ?? fallbackDate;
}
function parseEpoch(value: string | null) {
if (!value) {
return Number.NaN;
}
return Date.parse(value);
}
function sortPeriods(periods: FilingTaxonomyPeriod[]) {
return [...periods].sort((left, right) => {
const leftDate = parseEpoch(left.periodEnd ?? left.filingDate);
const rightDate = parseEpoch(right.periodEnd ?? right.filingDate);
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
return leftDate - rightDate;
}
return left.id.localeCompare(right.id);
});
}
function pickPreferredFact<T extends TaxonomyFact>(facts: T[]) {
if (facts.length === 0) {
return null;
}
const ordered = [...facts].sort((left, right) => {
const leftScore = left.isDimensionless ? 1 : 0;
const rightScore = right.isDimensionless ? 1 : 0;
if (leftScore !== rightScore) {
return rightScore - leftScore;
}
const leftDate = parseEpoch(left.periodEnd ?? left.periodInstant);
const rightDate = parseEpoch(right.periodEnd ?? right.periodInstant);
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
return rightDate - leftDate;
}
return Math.abs(right.value) - Math.abs(left.value);
});
return ordered[0] ?? null;
}
export function materializeTaxonomyStatements(input: {
filingId: number;
accessionNumber: string;
filingDate: string;
filingType: '10-K' | '10-Q';
facts: TaxonomyFact[];
presentation: TaxonomyPresentationConcept[];
labelByConcept: Map<string, string>;
}) {
const periodBySignature = new Map<string, FilingTaxonomyPeriod>();
const compactAccession = compactAccessionNumber(input.accessionNumber);
for (const fact of input.facts) {
const signature = periodSignature(fact);
if (periodBySignature.has(signature)) {
continue;
}
const date = periodDate(fact, input.filingDate);
const id = `${date}-${compactAccession}-${periodBySignature.size + 1}`;
periodBySignature.set(signature, {
id,
filingId: input.filingId,
accessionNumber: input.accessionNumber,
filingDate: input.filingDate,
periodStart: fact.periodStart,
periodEnd: fact.periodEnd ?? fact.periodInstant ?? input.filingDate,
filingType: input.filingType,
periodLabel: fact.periodInstant && !fact.periodStart
? 'Instant'
: fact.periodStart && fact.periodEnd
? `${fact.periodStart} to ${fact.periodEnd}`
: 'Filing Period'
});
}
const periods = sortPeriods([...periodBySignature.values()]);
const periodIdBySignature = new Map<string, string>(
[...periodBySignature.entries()].map(([signature, period]) => [signature, period.id])
);
const presentationByConcept = new Map<string, TaxonomyPresentationConcept[]>();
for (const node of input.presentation) {
const existing = presentationByConcept.get(node.conceptKey);
if (existing) {
existing.push(node);
} else {
presentationByConcept.set(node.conceptKey, [node]);
}
}
const enrichedFacts = input.facts.map((fact, index) => {
const nodes = presentationByConcept.get(fact.conceptKey) ?? [];
const bestNode = nodes[0] ?? null;
const statementKind = bestNode
? classifyStatementRole(bestNode.roleUri)
: conceptStatementFallback(fact.localName);
return {
...fact,
__sourceFactId: index + 1,
statement_kind: statementKind,
role_uri: bestNode?.roleUri ?? null
};
});
const rowsByStatement = createStatementRecord<TaxonomyStatementRow[]>(() => []);
const conceptByKey = new Map<string, TaxonomyConcept>();
const groupedByStatement = createStatementRecord<Map<string, typeof enrichedFacts>>(() => new Map());
for (const fact of enrichedFacts) {
if (!fact.statement_kind) {
continue;
}
const group = groupedByStatement[fact.statement_kind].get(fact.conceptKey);
if (group) {
group.push(fact);
} else {
groupedByStatement[fact.statement_kind].set(fact.conceptKey, [fact]);
}
}
for (const statement of Object.keys(rowsByStatement) as FinancialStatementKind[]) {
const conceptKeys = new Set<string>();
for (const node of input.presentation) {
if (classifyStatementRole(node.roleUri) !== statement) {
continue;
}
conceptKeys.add(node.conceptKey);
}
for (const conceptKey of groupedByStatement[statement].keys()) {
conceptKeys.add(conceptKey);
}
const orderedConcepts = [...conceptKeys]
.map((conceptKey) => {
const presentationNodes = input.presentation.filter(
(node) => node.conceptKey === conceptKey && classifyStatementRole(node.roleUri) === statement
);
const presentationOrder = presentationNodes.length > 0
? Math.min(...presentationNodes.map((node) => node.order))
: Number.MAX_SAFE_INTEGER;
const presentationDepth = presentationNodes.length > 0
? Math.min(...presentationNodes.map((node) => node.depth))
: 0;
const roleUri = presentationNodes[0]?.roleUri ?? null;
const parentConceptKey = presentationNodes[0]?.parentConceptKey ?? null;
return {
conceptKey,
presentationOrder,
presentationDepth,
roleUri,
parentConceptKey
};
})
.sort((left, right) => {
if (left.presentationOrder !== right.presentationOrder) {
return left.presentationOrder - right.presentationOrder;
}
return left.conceptKey.localeCompare(right.conceptKey);
});
for (const orderedConcept of orderedConcepts) {
const facts = groupedByStatement[statement].get(orderedConcept.conceptKey) ?? [];
const { namespaceUri, localName } = splitConceptKey(orderedConcept.conceptKey);
const qname = facts[0]?.qname ?? `unknown:${localName}`;
const label = input.labelByConcept.get(orderedConcept.conceptKey) ?? localNameToLabel(localName);
const values: Record<string, number | null> = {};
const units: Record<string, string | null> = {};
const factGroups = new Map<string, typeof facts>();
for (const fact of facts) {
const signature = periodSignature(fact);
const group = factGroups.get(signature);
if (group) {
group.push(fact);
} else {
factGroups.set(signature, [fact]);
}
}
const sourceFactIds: number[] = [];
let hasDimensions = false;
for (const [signature, group] of factGroups.entries()) {
const periodId = periodIdBySignature.get(signature);
if (!periodId) {
continue;
}
const preferred = pickPreferredFact(group);
if (!preferred) {
continue;
}
values[periodId] = preferred.value;
units[periodId] = preferred.unit;
const sourceFactId = (preferred as { __sourceFactId?: number }).__sourceFactId;
if (typeof sourceFactId === 'number') {
sourceFactIds.push(sourceFactId);
}
if (group.some((entry) => !entry.isDimensionless)) {
hasDimensions = true;
}
}
if (Object.keys(values).length === 0) {
continue;
}
const row: TaxonomyStatementRow = {
key: orderedConcept.conceptKey,
label,
conceptKey: orderedConcept.conceptKey,
qname,
namespaceUri,
localName,
isExtension: !isUsGaapNamespace(namespaceUri),
statement,
roleUri: orderedConcept.roleUri,
order: Number.isFinite(orderedConcept.presentationOrder)
? orderedConcept.presentationOrder
: rowsByStatement[statement].length + 1,
depth: orderedConcept.presentationDepth,
parentKey: orderedConcept.parentConceptKey,
values,
units,
hasDimensions,
sourceFactIds
};
rowsByStatement[statement].push(row);
if (!conceptByKey.has(orderedConcept.conceptKey)) {
conceptByKey.set(orderedConcept.conceptKey, {
concept_key: orderedConcept.conceptKey,
qname,
namespace_uri: namespaceUri,
local_name: localName,
label,
is_extension: !isUsGaapNamespace(namespaceUri),
statement_kind: statement,
role_uri: orderedConcept.roleUri,
presentation_order: row.order,
presentation_depth: row.depth,
parent_concept_key: row.parentKey,
is_abstract: /abstract/i.test(localName)
});
}
}
}
for (const fact of enrichedFacts) {
if (conceptByKey.has(fact.conceptKey)) {
continue;
}
conceptByKey.set(fact.conceptKey, {
concept_key: fact.conceptKey,
qname: fact.qname,
namespace_uri: fact.namespaceUri,
local_name: fact.localName,
label: input.labelByConcept.get(fact.conceptKey) ?? localNameToLabel(fact.localName),
is_extension: !isUsGaapNamespace(fact.namespaceUri),
statement_kind: fact.statement_kind,
role_uri: fact.role_uri,
presentation_order: null,
presentation_depth: null,
parent_concept_key: null,
is_abstract: /abstract/i.test(fact.localName)
});
}
const concepts = [...conceptByKey.values()];
const factRows = enrichedFacts.map((fact) => ({
concept_key: fact.conceptKey,
qname: fact.qname,
namespace_uri: fact.namespaceUri,
local_name: fact.localName,
statement_kind: fact.statement_kind,
role_uri: fact.role_uri,
context_id: fact.contextId,
unit: fact.unit,
decimals: fact.decimals,
value_num: fact.value,
period_start: fact.periodStart,
period_end: fact.periodEnd,
period_instant: fact.periodInstant,
dimensions: fact.dimensions,
is_dimensionless: fact.isDimensionless,
source_file: fact.sourceFile,
}));
const dimensionsCount = enrichedFacts.reduce((total, fact) => {
return total + fact.dimensions.length;
}, 0);
return {
periods,
statement_rows: rowsByStatement,
concepts,
facts: factRows,
dimensionsCount
};
}

View File

@@ -0,0 +1,55 @@
import { describe, expect, it } from 'bun:test';
import type { TaxonomyFact } from '@/lib/server/taxonomy/types';
import { deriveTaxonomyMetrics } from '@/lib/server/taxonomy/metrics';
function fact(localName: string, value: number, overrides?: Partial<TaxonomyFact>): TaxonomyFact {
return {
conceptKey: `http://fasb.org/us-gaap/2024#${localName}`,
qname: `us-gaap:${localName}`,
namespaceUri: 'http://fasb.org/us-gaap/2024',
localName,
contextId: 'c1',
unit: 'iso4217:USD',
decimals: '-6',
value,
periodStart: '2025-01-01',
periodEnd: '2025-12-31',
periodInstant: null,
dimensions: [],
isDimensionless: true,
sourceFile: 'abc_htm.xml',
...overrides
};
}
describe('taxonomy metric derivation', () => {
it('applies concept priority for canonical metrics and debt component fallback', () => {
const metrics = deriveTaxonomyMetrics([
fact('SalesRevenueNet', 500),
fact('Revenues', 450),
fact('NetIncomeLoss', 40),
fact('Assets', 1000),
fact('CashAndCashEquivalentsAtCarryingValue', 80),
fact('DebtCurrent', 15),
fact('LongTermDebtNoncurrent', 35)
]);
expect(metrics).toEqual({
revenue: 450,
netIncome: 40,
totalAssets: 1000,
cash: 80,
debt: 50
});
});
it('uses direct debt concept before computed debt fallback when available', () => {
const metrics = deriveTaxonomyMetrics([
fact('DebtCurrent', 15),
fact('LongTermDebtNoncurrent', 35),
fact('LongTermDebtAndCapitalLeaseObligations', 90)
]);
expect(metrics.debt).toBe(90);
});
});

View File

@@ -0,0 +1,106 @@
import type { Filing } from '@/lib/types';
import type { TaxonomyFact } from '@/lib/server/taxonomy/types';
const METRIC_LOCAL_NAME_PRIORITY = {
revenue: [
'Revenues',
'SalesRevenueNet',
'RevenueFromContractWithCustomerExcludingAssessedTax',
'TotalRevenuesAndOtherIncome'
],
netIncome: ['NetIncomeLoss', 'ProfitLoss'],
totalAssets: ['Assets'],
cash: [
'CashAndCashEquivalentsAtCarryingValue',
'CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents'
],
debtDirect: [
'DebtAndFinanceLeaseLiabilities',
'Debt',
'LongTermDebtAndCapitalLeaseObligations'
],
debtCurrent: [
'DebtCurrent',
'ShortTermBorrowings',
'LongTermDebtCurrent'
],
debtNonCurrent: [
'LongTermDebtNoncurrent',
'LongTermDebt',
'DebtNoncurrent'
]
} as const;
function normalizeDateToEpoch(value: string | null) {
if (!value) {
return Number.NaN;
}
return Date.parse(value);
}
function sameLocalName(left: string, right: string) {
return left.toLowerCase() === right.toLowerCase();
}
function pickPreferredFact(facts: TaxonomyFact[]) {
const ordered = [...facts].sort((left, right) => {
const leftDimensionScore = left.isDimensionless ? 1 : 0;
const rightDimensionScore = right.isDimensionless ? 1 : 0;
if (leftDimensionScore !== rightDimensionScore) {
return rightDimensionScore - leftDimensionScore;
}
const leftDate = normalizeDateToEpoch(left.periodEnd ?? left.periodInstant);
const rightDate = normalizeDateToEpoch(right.periodEnd ?? right.periodInstant);
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
return rightDate - leftDate;
}
return Math.abs(right.value) - Math.abs(left.value);
});
return ordered[0] ?? null;
}
function pickBestFact(facts: TaxonomyFact[], localNames: readonly string[]) {
for (const localName of localNames) {
const matches = facts.filter((fact) => sameLocalName(fact.localName, localName));
if (matches.length === 0) {
continue;
}
return pickPreferredFact(matches);
}
return null;
}
function sumIfBoth(left: number | null, right: number | null) {
if (left === null || right === null) {
return null;
}
return left + right;
}
export function deriveTaxonomyMetrics(facts: TaxonomyFact[]): NonNullable<Filing['metrics']> {
const revenue = pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.revenue)?.value ?? null;
const netIncome = pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.netIncome)?.value ?? null;
const totalAssets = pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.totalAssets)?.value ?? null;
const cash = pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.cash)?.value ?? null;
const directDebt = pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.debtDirect)?.value ?? null;
const debt = directDebt ?? sumIfBoth(
pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.debtCurrent)?.value ?? null,
pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.debtNonCurrent)?.value ?? null
);
return {
revenue,
netIncome,
totalAssets,
cash,
debt
};
}

View File

@@ -0,0 +1,49 @@
import { describe, expect, it } from 'bun:test';
import { __pdfValidationInternals } from '@/lib/server/taxonomy/pdf-validation';
describe('pdf metric validation internals', () => {
it('parses fenced json payloads and rejects invalid payloads', () => {
const parsed = __pdfValidationInternals.parseValidationPayload([
'```json',
'{"revenue":{"value":1000,"pages":[3]},"cash":{"value":200,"pages":["4"]}}',
'```'
].join('\n'));
expect(parsed).not.toBeNull();
expect(parsed?.revenue?.value).toBe(1000);
expect(parsed?.cash?.pages).toEqual(['4']);
expect(__pdfValidationInternals.parseValidationPayload('not-json')).toBeNull();
});
it('compares taxonomy vs llm values with fixed tolerance rules', () => {
expect(__pdfValidationInternals.diffStatus(1000, 1004)).toEqual({
status: 'matched',
absoluteDiff: 4,
relativeDiff: 0.004
});
expect(__pdfValidationInternals.diffStatus(1000, 1007)).toEqual({
status: 'mismatch',
absoluteDiff: 7,
relativeDiff: 0.007
});
expect(__pdfValidationInternals.diffStatus(0.5, 1.2)).toEqual({
status: 'matched',
absoluteDiff: 0.7,
relativeDiff: 0.7
});
expect(__pdfValidationInternals.diffStatus(null, 1)).toEqual({
status: 'mismatch',
absoluteDiff: null,
relativeDiff: null
});
expect(__pdfValidationInternals.diffStatus(null, null)).toEqual({
status: 'not_run',
absoluteDiff: null,
relativeDiff: null
});
});
});

View File

@@ -0,0 +1,336 @@
import { execFile } from 'node:child_process';
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { promisify } from 'node:util';
import type { Filing, MetricValidationResult } from '@/lib/types';
import { runAiAnalysis } from '@/lib/server/ai';
import type { TaxonomyAsset, TaxonomyMetricValidationCheck } from '@/lib/server/taxonomy/types';
const execFileAsync = promisify(execFile);
const METRIC_KEYS: Array<keyof NonNullable<Filing['metrics']>> = [
'revenue',
'netIncome',
'totalAssets',
'cash',
'debt'
];
function extractJsonCandidate(raw: string) {
const fencedJson = raw.match(/```(?:json)?\s*([\s\S]*?)```/i)?.[1];
const candidate = fencedJson ?? (() => {
const start = raw.indexOf('{');
const end = raw.lastIndexOf('}');
return start >= 0 && end > start ? raw.slice(start, end + 1) : null;
})();
return candidate;
}
function parseValidationPayload(raw: string) {
const candidate = extractJsonCandidate(raw);
if (!candidate) {
return null;
}
try {
return JSON.parse(candidate) as Record<string, {
value?: number | string | null;
pages?: Array<number | string>;
}>;
} catch {
return null;
}
}
function asNumber(value: unknown) {
if (typeof value === 'number') {
return Number.isFinite(value) ? value : null;
}
if (typeof value === 'string') {
const parsed = Number(value.replace(/[,\s]/g, ''));
return Number.isFinite(parsed) ? parsed : null;
}
return null;
}
function asPageNumbers(raw: unknown): number[] {
if (!Array.isArray(raw)) {
return [];
}
return raw
.map((entry) => {
if (typeof entry === 'number' && Number.isFinite(entry)) {
return Math.trunc(entry);
}
if (typeof entry === 'string') {
const parsed = Number(entry);
return Number.isFinite(parsed) ? Math.trunc(parsed) : Number.NaN;
}
return Number.NaN;
})
.filter((entry) => Number.isFinite(entry) && entry > 0);
}
function diffStatus(taxonomyValue: number | null, llmValue: number | null) {
if (taxonomyValue === null && llmValue === null) {
return {
status: 'not_run' as const,
absoluteDiff: null,
relativeDiff: null
};
}
if (taxonomyValue === null || llmValue === null) {
return {
status: 'mismatch' as const,
absoluteDiff: null,
relativeDiff: null
};
}
const absoluteDiff = Math.abs(taxonomyValue - llmValue);
const denominator = Math.max(Math.abs(taxonomyValue), 1);
const relativeDiff = absoluteDiff / denominator;
const tolerance = Math.max(1, Math.abs(taxonomyValue) * 0.005);
return {
status: absoluteDiff <= tolerance ? 'matched' as const : 'mismatch' as const,
absoluteDiff,
relativeDiff
};
}
async function extractPdfText(url: string, fetchImpl: typeof fetch) {
const response = await fetchImpl(url, {
headers: {
Accept: 'application/pdf, */*;q=0.8'
},
cache: 'no-store'
});
if (!response.ok) {
throw new Error(`PDF request failed (${response.status})`);
}
const contentType = response.headers.get('content-type') ?? '';
if (!/pdf/i.test(contentType) && !/\.pdf$/i.test(url)) {
throw new Error(`Asset is not a PDF (${contentType || 'unknown content-type'})`);
}
const bytes = new Uint8Array(await response.arrayBuffer());
const tempRoot = await mkdtemp(join(tmpdir(), 'fiscal-pdf-'));
const pdfPath = join(tempRoot, 'source.pdf');
try {
await writeFile(pdfPath, bytes);
const { stdout } = await execFileAsync('pdftotext', ['-layout', '-enc', 'UTF-8', pdfPath, '-'], {
maxBuffer: 16 * 1024 * 1024
});
const text = stdout.trim();
if (!text) {
return null;
}
return text;
} finally {
await rm(tempRoot, { recursive: true, force: true });
}
}
function validationPrompt(metrics: Filing['metrics'], pdfText: string) {
const textSlice = pdfText.slice(0, 80_000);
return [
'Extract numeric financial metrics from the provided financial statement PDF text.',
`Taxonomy baseline metrics: ${JSON.stringify(metrics ?? {})}`,
'Return ONLY JSON with keys revenue, netIncome, totalAssets, cash, debt.',
'Each key must map to: {"value": number|null, "pages": [number]}.',
'Use null when a metric is not found.',
'PDF text follows:',
textSlice
].join('\n\n');
}
function providerModelOrNull(value: string | undefined | null) {
const normalized = value?.trim();
return normalized && normalized.length > 0 ? normalized : null;
}
export async function validateMetricsWithPdfLlm(input: {
metrics: Filing['metrics'];
assets: TaxonomyAsset[];
fetchImpl?: typeof fetch;
}): Promise<{
validation_result: MetricValidationResult | null;
metric_validations: TaxonomyMetricValidationCheck[];
}> {
const taxonomyMetrics = input.metrics ?? {
revenue: null,
netIncome: null,
totalAssets: null,
cash: null,
debt: null
};
const selectedPdf = input.assets.find((asset) => asset.asset_type === 'pdf' && asset.is_selected);
if (!selectedPdf) {
return {
validation_result: {
status: 'not_run',
checks: [],
validatedAt: null
},
metric_validations: []
};
}
const fetchImpl = input.fetchImpl ?? fetch;
let pdfText: string | null = null;
try {
pdfText = await extractPdfText(selectedPdf.url, fetchImpl);
} catch (error) {
const message = error instanceof Error ? error.message : 'PDF extraction failed';
const checks: TaxonomyMetricValidationCheck[] = METRIC_KEYS.map((metricKey) => ({
metric_key: metricKey,
taxonomy_value: taxonomyMetrics[metricKey],
llm_value: null,
absolute_diff: null,
relative_diff: null,
status: 'error',
evidence_pages: [],
pdf_url: selectedPdf.url,
provider: null,
model: null,
error: message
}));
return {
validation_result: {
status: 'error',
checks: checks.map((check) => ({
metricKey: check.metric_key,
taxonomyValue: check.taxonomy_value,
llmValue: check.llm_value,
absoluteDiff: check.absolute_diff,
relativeDiff: check.relative_diff,
status: check.status,
evidencePages: check.evidence_pages,
pdfUrl: check.pdf_url,
provider: check.provider,
model: check.model,
error: check.error
})),
validatedAt: new Date().toISOString()
},
metric_validations: checks
};
}
if (!pdfText) {
return {
validation_result: {
status: 'not_run',
checks: [],
validatedAt: new Date().toISOString()
},
metric_validations: []
};
}
let parsed: Record<string, { value?: number | string | null; pages?: Array<number | string> }> | null = null;
let provider: string | null = null;
let model: string | null = null;
let modelError: string | null = null;
try {
const aiResult = await runAiAnalysis(validationPrompt(taxonomyMetrics, pdfText), undefined, {
workload: 'extraction'
});
provider = providerModelOrNull(aiResult.provider);
model = providerModelOrNull(aiResult.model);
parsed = parseValidationPayload(aiResult.text);
if (!parsed) {
modelError = 'LLM response did not contain valid JSON payload';
}
} catch (error) {
modelError = error instanceof Error ? error.message : 'LLM validation failed';
}
const validations: TaxonomyMetricValidationCheck[] = METRIC_KEYS.map((metricKey) => {
const taxonomyValue = taxonomyMetrics[metricKey] ?? null;
if (!parsed) {
return {
metric_key: metricKey,
taxonomy_value: taxonomyValue,
llm_value: null,
absolute_diff: null,
relative_diff: null,
status: modelError ? 'error' : 'not_run',
evidence_pages: [],
pdf_url: selectedPdf.url,
provider,
model,
error: modelError
};
}
const entry = parsed[metricKey as string] ?? {};
const llmValue = asNumber(entry.value);
const pages = asPageNumbers(entry.pages);
const diff = diffStatus(taxonomyValue, llmValue);
return {
metric_key: metricKey,
taxonomy_value: taxonomyValue,
llm_value: llmValue,
absolute_diff: diff.absoluteDiff,
relative_diff: diff.relativeDiff,
status: diff.status,
evidence_pages: pages,
pdf_url: selectedPdf.url,
provider,
model,
error: null
};
});
const hasError = validations.some((entry) => entry.status === 'error');
const hasMismatch = validations.some((entry) => entry.status === 'mismatch');
return {
validation_result: {
status: hasError ? 'error' : hasMismatch ? 'mismatch' : 'matched',
checks: validations.map((check) => ({
metricKey: check.metric_key,
taxonomyValue: check.taxonomy_value,
llmValue: check.llm_value,
absoluteDiff: check.absolute_diff,
relativeDiff: check.relative_diff,
status: check.status,
evidencePages: check.evidence_pages,
pdfUrl: check.pdf_url,
provider: check.provider,
model: check.model,
error: check.error
})),
validatedAt: new Date().toISOString()
},
metric_validations: validations
};
}
export const __pdfValidationInternals = {
parseValidationPayload,
diffStatus
};

View File

@@ -0,0 +1,136 @@
import type { Filing, FinancialStatementKind, MetricValidationResult, TaxonomyStatementRow } from '@/lib/types';
import type {
FilingTaxonomyAssetType,
FilingTaxonomyParseStatus,
FilingTaxonomyPeriod,
FilingTaxonomySource
} from '@/lib/server/repos/filing-taxonomy';
export type TaxonomyAsset = {
asset_type: FilingTaxonomyAssetType;
name: string;
url: string;
size_bytes: number | null;
score: number | null;
is_selected: boolean;
};
export type TaxonomyNamespaceMap = Record<string, string>;
export type TaxonomyContext = {
id: string;
periodStart: string | null;
periodEnd: string | null;
periodInstant: string | null;
dimensions: Array<{ axis: string; member: string }>;
};
export type TaxonomyUnit = {
id: string;
measure: string | null;
};
export type TaxonomyFact = {
conceptKey: string;
qname: string;
namespaceUri: string;
localName: string;
contextId: string;
unit: string | null;
decimals: string | null;
value: number;
periodStart: string | null;
periodEnd: string | null;
periodInstant: string | null;
dimensions: Array<{ axis: string; member: string }>;
isDimensionless: boolean;
sourceFile: string | null;
};
export type TaxonomyPresentationConcept = {
conceptKey: string;
qname: string;
roleUri: string;
order: number;
depth: number;
parentConceptKey: string | null;
isAbstract: boolean;
};
export type TaxonomyConcept = {
concept_key: string;
qname: string;
namespace_uri: string;
local_name: string;
label: string | null;
is_extension: boolean;
statement_kind: FinancialStatementKind | null;
role_uri: string | null;
presentation_order: number | null;
presentation_depth: number | null;
parent_concept_key: string | null;
is_abstract: boolean;
};
export type TaxonomyMetricValidationCheck = {
metric_key: keyof NonNullable<Filing['metrics']>;
taxonomy_value: number | null;
llm_value: number | null;
absolute_diff: number | null;
relative_diff: number | null;
status: 'not_run' | 'matched' | 'mismatch' | 'error';
evidence_pages: number[];
pdf_url: string | null;
provider: string | null;
model: string | null;
error: string | null;
};
export type TaxonomyHydrationInput = {
filingId: number;
ticker: string;
cik: string;
accessionNumber: string;
filingDate: string;
filingType: '10-K' | '10-Q';
filingUrl: string | null;
primaryDocument: string | null;
};
export type TaxonomyHydrationResult = {
filing_id: number;
ticker: string;
filing_date: string;
filing_type: '10-K' | '10-Q';
parse_status: FilingTaxonomyParseStatus;
parse_error: string | null;
source: FilingTaxonomySource;
periods: FilingTaxonomyPeriod[];
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
derived_metrics: Filing['metrics'];
validation_result: MetricValidationResult | null;
facts_count: number;
concepts_count: number;
dimensions_count: number;
assets: TaxonomyAsset[];
concepts: TaxonomyConcept[];
facts: Array<{
concept_key: string;
qname: string;
namespace_uri: string;
local_name: string;
statement_kind: FinancialStatementKind | null;
role_uri: string | null;
context_id: string;
unit: string | null;
decimals: string | null;
value_num: number;
period_start: string | null;
period_end: string | null;
period_instant: string | null;
dimensions: Array<{ axis: string; member: string }>;
is_dimensionless: boolean;
source_file: string | null;
}>;
metric_validations: TaxonomyMetricValidationCheck[];
};

View File

@@ -0,0 +1,60 @@
import { describe, expect, it } from 'bun:test';
import { parseXbrlInstance } from '@/lib/server/taxonomy/xbrl-parser';
const SAMPLE_XBRL = `
<xbrli:xbrl xmlns:xbrli="http://www.xbrl.org/2003/instance"
xmlns:xbrldi="http://xbrl.org/2006/xbrldi"
xmlns:us-gaap="http://fasb.org/us-gaap/2024"
xmlns:dei="http://xbrl.sec.gov/dei/2024">
<xbrli:context id="c1">
<xbrli:period>
<xbrli:startDate>2025-01-01</xbrli:startDate>
<xbrli:endDate>2025-12-31</xbrli:endDate>
</xbrli:period>
</xbrli:context>
<xbrli:context id="c2">
<xbrli:entity>
<xbrli:segment>
<xbrldi:explicitMember dimension="us-gaap:StatementBusinessSegmentsAxis">us-gaap:ConsolidatedGroupMember</xbrldi:explicitMember>
</xbrli:segment>
</xbrli:entity>
<xbrli:period>
<xbrli:instant>2025-12-31</xbrli:instant>
</xbrli:period>
</xbrli:context>
<xbrli:unit id="u1">
<xbrli:measure>iso4217:USD</xbrli:measure>
</xbrli:unit>
<us-gaap:Revenues contextRef="c1" unitRef="u1" decimals="-6">1,234</us-gaap:Revenues>
<us-gaap:Assets contextRef="c2" unitRef="u1" decimals="-6">5,678</us-gaap:Assets>
<dei:EntityRegistrantName contextRef="c1">Acme Corp</dei:EntityRegistrantName>
</xbrli:xbrl>
`;
describe('xbrl instance parser', () => {
it('parses contexts, units, numeric facts, dimensions, and concept keys', () => {
const parsed = parseXbrlInstance(SAMPLE_XBRL, 'abc_htm.xml');
expect(parsed.contexts.c1?.periodStart).toBe('2025-01-01');
expect(parsed.contexts.c1?.periodEnd).toBe('2025-12-31');
expect(parsed.contexts.c2?.periodInstant).toBe('2025-12-31');
expect(parsed.contexts.c2?.dimensions.length).toBe(1);
expect(parsed.units.u1?.measure).toBe('iso4217:USD');
expect(parsed.facts.length).toBe(2);
const revenueFact = parsed.facts.find((fact) => fact.localName === 'Revenues');
const assetsFact = parsed.facts.find((fact) => fact.localName === 'Assets');
expect(revenueFact?.conceptKey).toBe('http://fasb.org/us-gaap/2024#Revenues');
expect(revenueFact?.isDimensionless).toBe(true);
expect(revenueFact?.value).toBe(1234);
expect(revenueFact?.sourceFile).toBe('abc_htm.xml');
expect(assetsFact?.conceptKey).toBe('http://fasb.org/us-gaap/2024#Assets');
expect(assetsFact?.isDimensionless).toBe(false);
expect(assetsFact?.dimensions[0]).toEqual({
axis: 'us-gaap:StatementBusinessSegmentsAxis',
member: 'us-gaap:ConsolidatedGroupMember'
});
});
});

View File

@@ -0,0 +1,264 @@
import type { FinancialStatementKind } from '@/lib/types';
import type { TaxonomyContext, TaxonomyFact, TaxonomyNamespaceMap, TaxonomyUnit } from '@/lib/server/taxonomy/types';
function decodeXmlEntities(value: string) {
return value
.replace(/&amp;/gi, '&')
.replace(/&lt;/gi, '<')
.replace(/&gt;/gi, '>')
.replace(/&quot;/gi, '"')
.replace(/&#39;/gi, "'")
.replace(/&#160;|&nbsp;/gi, ' ')
.replace(/&#x([0-9a-f]+);/gi, (_match, hex) => {
const parsed = Number.parseInt(hex, 16);
if (!Number.isFinite(parsed)) {
return ' ';
}
try {
return String.fromCodePoint(parsed);
} catch {
return ' ';
}
})
.replace(/&#([0-9]+);/g, (_match, numeric) => {
const parsed = Number.parseInt(numeric, 10);
if (!Number.isFinite(parsed)) {
return ' ';
}
try {
return String.fromCodePoint(parsed);
} catch {
return ' ';
}
});
}
function parseNumber(value: string) {
const trimmed = value.trim();
if (!trimmed) {
return null;
}
if (/^--+$/.test(trimmed)) {
return null;
}
const negative = trimmed.startsWith('(') && trimmed.endsWith(')');
const normalized = trimmed
.replace(/<[^>]+>/g, ' ')
.replace(/[,$\s]/g, '')
.replace(/[()]/g, '')
.replace(/\u2212/g, '-');
if (!normalized) {
return null;
}
const parsed = Number.parseFloat(normalized);
if (!Number.isFinite(parsed)) {
return null;
}
return negative ? -Math.abs(parsed) : parsed;
}
function parseNamespaceMapFromDocument(raw: string): TaxonomyNamespaceMap {
const map: TaxonomyNamespaceMap = {};
const rootStart = raw.match(/<[^>]*xbrl[^>]*>/i)?.[0] ?? raw.slice(0, 1200);
for (const match of rootStart.matchAll(/xmlns:([a-zA-Z0-9_\-]+)=["']([^"']+)["']/g)) {
const prefix = (match[1] ?? '').trim();
const uri = (match[2] ?? '').trim();
if (!prefix || !uri) {
continue;
}
map[prefix] = uri;
}
return map;
}
function parseContexts(raw: string): Record<string, TaxonomyContext> {
const contexts: Record<string, TaxonomyContext> = {};
const contextPattern = /<(?:[a-z0-9_\-]+:)?context\b[^>]*\bid=["']([^"']+)["'][^>]*>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?context>/gi;
for (const match of raw.matchAll(contextPattern)) {
const contextId = (match[1] ?? '').trim();
const block = match[2] ?? '';
if (!contextId) {
continue;
}
const periodStart = block.match(/<(?:[a-z0-9_\-]+:)?startDate>([^<]+)<\/(?:[a-z0-9_\-]+:)?startDate>/i)?.[1]?.trim() ?? null;
const periodEnd = block.match(/<(?:[a-z0-9_\-]+:)?endDate>([^<]+)<\/(?:[a-z0-9_\-]+:)?endDate>/i)?.[1]?.trim() ?? null;
const periodInstant = block.match(/<(?:[a-z0-9_\-]+:)?instant>([^<]+)<\/(?:[a-z0-9_\-]+:)?instant>/i)?.[1]?.trim() ?? null;
const dimensions: Array<{ axis: string; member: string }> = [];
const dimPattern = /<(?:[a-z0-9_\-]+:)?explicitMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>([^<]+)<\/(?:[a-z0-9_\-]+:)?explicitMember>/gi;
for (const dimMatch of block.matchAll(dimPattern)) {
const axis = decodeXmlEntities((dimMatch[1] ?? '').trim());
const member = decodeXmlEntities((dimMatch[2] ?? '').trim());
if (!axis || !member) {
continue;
}
dimensions.push({ axis, member });
}
contexts[contextId] = {
id: contextId,
periodStart,
periodEnd,
periodInstant,
dimensions
};
}
return contexts;
}
function parseUnits(raw: string): Record<string, TaxonomyUnit> {
const units: Record<string, TaxonomyUnit> = {};
const unitPattern = /<(?:[a-z0-9_\-]+:)?unit\b[^>]*\bid=["']([^"']+)["'][^>]*>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?unit>/gi;
for (const match of raw.matchAll(unitPattern)) {
const id = (match[1] ?? '').trim();
const block = match[2] ?? '';
if (!id) {
continue;
}
const measures = [...block.matchAll(/<(?:[a-z0-9_\-]+:)?measure>([^<]+)<\/(?:[a-z0-9_\-]+:)?measure>/gi)]
.map((entry) => decodeXmlEntities((entry[1] ?? '').trim()))
.filter(Boolean);
let measure: string | null = null;
if (measures.length === 1) {
measure = measures[0] ?? null;
} else if (measures.length > 1) {
measure = measures.join('/');
}
units[id] = {
id,
measure
};
}
return units;
}
function classifyStatementKind(localName: string): FinancialStatementKind | null {
const normalized = localName.toLowerCase();
if (/cash|operatingactivities|investingactivities|financingactivities/.test(normalized)) {
return 'cash_flow';
}
if (/equity|retainedearnings|additionalpaidincapital/.test(normalized)) {
return 'equity';
}
if (/comprehensiveincome/.test(normalized)) {
return 'comprehensive_income';
}
if (/asset|liabilit|debt/.test(normalized)) {
return 'balance';
}
if (/revenue|income|profit|expense|costof/.test(normalized)) {
return 'income';
}
return null;
}
function isXbrlInfrastructurePrefix(prefix: string) {
const normalized = prefix.toLowerCase();
return normalized === 'xbrli'
|| normalized === 'xlink'
|| normalized === 'link'
|| normalized === 'xbrldi'
|| normalized === 'xbrldt';
}
function localNameToKey(namespaceUri: string, localName: string) {
return `${namespaceUri}#${localName}`;
}
export function parseXbrlInstance(
raw: string,
sourceFile: string | null
): {
namespaces: TaxonomyNamespaceMap;
contexts: Record<string, TaxonomyContext>;
units: Record<string, TaxonomyUnit>;
facts: TaxonomyFact[];
} {
const namespaces = parseNamespaceMapFromDocument(raw);
const contexts = parseContexts(raw);
const units = parseUnits(raw);
const facts: TaxonomyFact[] = [];
const factPattern = /<([a-zA-Z0-9_\-]+):([a-zA-Z0-9_\-.]+)\b([^>]*\bcontextRef=["'][^"']+["'][^>]*)>([\s\S]*?)<\/\1:\2>/g;
for (const match of raw.matchAll(factPattern)) {
const prefix = (match[1] ?? '').trim();
const localName = (match[2] ?? '').trim();
const attrs = match[3] ?? '';
const body = decodeXmlEntities((match[4] ?? '').trim());
if (!prefix || !localName || isXbrlInfrastructurePrefix(prefix)) {
continue;
}
const contextId = attrs.match(/\bcontextRef=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
if (!contextId) {
continue;
}
const value = parseNumber(body);
if (value === null) {
continue;
}
const unitRef = attrs.match(/\bunitRef=["']([^"']+)["']/i)?.[1]?.trim() ?? null;
const decimals = attrs.match(/\bdecimals=["']([^"']+)["']/i)?.[1]?.trim() ?? null;
const namespaceUri = namespaces[prefix] ?? `urn:unknown:${prefix}`;
const context = contexts[contextId];
facts.push({
conceptKey: localNameToKey(namespaceUri, localName),
qname: `${prefix}:${localName}`,
namespaceUri,
localName,
contextId,
unit: unitRef && units[unitRef]?.measure ? units[unitRef]?.measure ?? unitRef : unitRef,
decimals,
value,
periodStart: context?.periodStart ?? null,
periodEnd: context?.periodEnd ?? null,
periodInstant: context?.periodInstant ?? null,
dimensions: context?.dimensions ?? [],
isDimensionless: (context?.dimensions.length ?? 0) === 0,
sourceFile,
});
}
return {
namespaces,
contexts,
units,
facts
};
}
export function conceptStatementFallback(localName: string) {
return classifyStatementKind(localName);
}

View File

@@ -98,6 +98,12 @@ export type TaskStage =
| 'completed'
| 'failed'
| 'sync.fetch_filings'
| 'sync.discover_assets'
| 'sync.extract_taxonomy'
| 'sync.normalize_taxonomy'
| 'sync.derive_metrics'
| 'sync.validate_pdf_metrics'
| 'sync.persist_taxonomy'
| 'sync.fetch_metrics'
| 'sync.persist_filings'
| 'sync.hydrate_statements'
@@ -169,7 +175,6 @@ export type CompanyFinancialPoint = {
debt: number | null;
};
export type FinancialStatementMode = 'standardized' | 'filing_faithful';
export type FinancialStatementKind = 'income' | 'balance' | 'cash_flow' | 'equity' | 'comprehensive_income';
export type FinancialHistoryWindow = '10y' | 'all';
@@ -178,11 +183,79 @@ export type FinancialStatementPeriod = {
filingId: number;
accessionNumber: string;
filingDate: string;
periodStart: string | null;
periodEnd: string | null;
filingType: Extract<Filing['filing_type'], '10-K' | '10-Q'>;
periodLabel: string;
};
export type TaxonomyDimensionMember = {
axis: string;
member: string;
};
export type TaxonomyStatementRow = {
key: string;
label: string;
conceptKey: string;
qname: string;
namespaceUri: string;
localName: string;
isExtension: boolean;
statement: FinancialStatementKind;
roleUri: string | null;
order: number;
depth: number;
parentKey: string | null;
values: Record<string, number | null>;
units: Record<string, string | null>;
hasDimensions: boolean;
sourceFactIds: number[];
};
export type TaxonomyFactRow = {
id: number;
snapshotId: number;
filingId: number;
filingDate: string;
statement: FinancialStatementKind | null;
roleUri: string | null;
conceptKey: string;
qname: string;
namespaceUri: string;
localName: string;
value: number;
contextId: string;
unit: string | null;
decimals: string | null;
periodStart: string | null;
periodEnd: string | null;
periodInstant: string | null;
dimensions: TaxonomyDimensionMember[];
isDimensionless: boolean;
sourceFile: string | null;
};
export type MetricValidationCheck = {
metricKey: keyof NonNullable<Filing['metrics']>;
taxonomyValue: number | null;
llmValue: number | null;
absoluteDiff: number | null;
relativeDiff: number | null;
status: 'not_run' | 'matched' | 'mismatch' | 'error';
evidencePages: number[];
pdfUrl: string | null;
provider: string | null;
model: string | null;
error: string | null;
};
export type MetricValidationResult = {
status: 'not_run' | 'matched' | 'mismatch' | 'error';
checks: MetricValidationCheck[];
validatedAt: string | null;
};
export type StandardizedStatementRow = {
key: string;
label: string;
@@ -220,16 +293,20 @@ export type CompanyFinancialStatementsResponse = {
companyName: string;
cik: string | null;
};
mode: FinancialStatementMode;
statement: FinancialStatementKind;
window: FinancialHistoryWindow;
periods: FinancialStatementPeriod[];
rows: StandardizedStatementRow[] | FilingFaithfulStatementRow[];
rows: TaxonomyStatementRow[];
nextCursor: string | null;
facts: {
rows: TaxonomyFactRow[];
nextCursor: string | null;
} | null;
coverage: {
filings: number;
rows: number;
dimensions: number;
facts: number;
};
dataSourceStatus: {
enabled: boolean;
@@ -239,6 +316,10 @@ export type CompanyFinancialStatementsResponse = {
pendingFilings: number;
queuedSync: boolean;
};
metrics: {
taxonomy: Filing['metrics'];
validation: MetricValidationResult | null;
};
dimensionBreakdown: Record<string, DimensionBreakdownRow[]> | null;
};