Run playwright UI tests
This commit is contained in:
13
lib/api.ts
13
lib/api.ts
@@ -8,7 +8,6 @@ import type {
|
||||
Holding,
|
||||
FinancialHistoryWindow,
|
||||
FinancialStatementKind,
|
||||
FinancialStatementMode,
|
||||
PortfolioInsight,
|
||||
PortfolioSummary,
|
||||
Task,
|
||||
@@ -199,24 +198,32 @@ export async function getCompanyAnalysis(ticker: string) {
|
||||
|
||||
export async function getCompanyFinancialStatements(input: {
|
||||
ticker: string;
|
||||
mode: FinancialStatementMode;
|
||||
statement: FinancialStatementKind;
|
||||
window: FinancialHistoryWindow;
|
||||
includeDimensions?: boolean;
|
||||
includeFacts?: boolean;
|
||||
factsCursor?: string | null;
|
||||
factsLimit?: number;
|
||||
cursor?: string | null;
|
||||
limit?: number;
|
||||
}) {
|
||||
const query = {
|
||||
ticker: input.ticker.trim().toUpperCase(),
|
||||
mode: input.mode,
|
||||
statement: input.statement,
|
||||
window: input.window,
|
||||
includeDimensions: input.includeDimensions ? 'true' : 'false',
|
||||
includeFacts: input.includeFacts ? 'true' : 'false',
|
||||
...(typeof input.cursor === 'string' && input.cursor.trim().length > 0
|
||||
? { cursor: input.cursor.trim() }
|
||||
: {}),
|
||||
...(typeof input.limit === 'number' && Number.isFinite(input.limit)
|
||||
? { limit: input.limit }
|
||||
: {}),
|
||||
...(typeof input.factsCursor === 'string' && input.factsCursor.trim().length > 0
|
||||
? { factsCursor: input.factsCursor.trim() }
|
||||
: {}),
|
||||
...(typeof input.factsLimit === 'number' && Number.isFinite(input.factsLimit)
|
||||
? { factsLimit: input.factsLimit }
|
||||
: {})
|
||||
};
|
||||
|
||||
|
||||
@@ -2,13 +2,15 @@ export const queryKeys = {
|
||||
companyAnalysis: (ticker: string) => ['analysis', ticker] as const,
|
||||
companyFinancialStatements: (
|
||||
ticker: string,
|
||||
mode: string,
|
||||
statement: string,
|
||||
window: string,
|
||||
includeDimensions: boolean,
|
||||
includeFacts: boolean,
|
||||
factsCursor: string | null,
|
||||
factsLimit: number,
|
||||
cursor: string | null,
|
||||
limit: number
|
||||
) => ['financials-v2', ticker, mode, statement, window, includeDimensions ? 'dims' : 'no-dims', cursor ?? '', limit] as const,
|
||||
) => ['financials-v3', ticker, statement, window, includeDimensions ? 'dims' : 'no-dims', includeFacts ? 'facts' : 'rows', factsCursor ?? '', factsLimit, cursor ?? '', limit] as const,
|
||||
filings: (ticker: string | null, limit: number) => ['filings', ticker ?? '', limit] as const,
|
||||
report: (accessionNumber: string) => ['report', accessionNumber] as const,
|
||||
watchlist: () => ['watchlist'] as const,
|
||||
|
||||
@@ -15,8 +15,7 @@ import {
|
||||
import { queryKeys } from '@/lib/query/keys';
|
||||
import type {
|
||||
FinancialHistoryWindow,
|
||||
FinancialStatementKind,
|
||||
FinancialStatementMode
|
||||
FinancialStatementKind
|
||||
} from '@/lib/types';
|
||||
|
||||
export function companyAnalysisQueryOptions(ticker: string) {
|
||||
@@ -31,34 +30,43 @@ export function companyAnalysisQueryOptions(ticker: string) {
|
||||
|
||||
export function companyFinancialStatementsQueryOptions(input: {
|
||||
ticker: string;
|
||||
mode: FinancialStatementMode;
|
||||
statement: FinancialStatementKind;
|
||||
window: FinancialHistoryWindow;
|
||||
includeDimensions?: boolean;
|
||||
includeFacts?: boolean;
|
||||
factsCursor?: string | null;
|
||||
factsLimit?: number;
|
||||
cursor?: string | null;
|
||||
limit?: number;
|
||||
}) {
|
||||
const normalizedTicker = input.ticker.trim().toUpperCase();
|
||||
const includeDimensions = input.includeDimensions ?? false;
|
||||
const includeFacts = input.includeFacts ?? false;
|
||||
const factsCursor = input.factsCursor ?? null;
|
||||
const factsLimit = input.factsLimit ?? 500;
|
||||
const cursor = input.cursor ?? null;
|
||||
const limit = input.limit ?? 40;
|
||||
|
||||
return queryOptions({
|
||||
queryKey: queryKeys.companyFinancialStatements(
|
||||
normalizedTicker,
|
||||
input.mode,
|
||||
input.statement,
|
||||
input.window,
|
||||
includeDimensions,
|
||||
includeFacts,
|
||||
factsCursor,
|
||||
factsLimit,
|
||||
cursor,
|
||||
limit
|
||||
),
|
||||
queryFn: () => getCompanyFinancialStatements({
|
||||
ticker: normalizedTicker,
|
||||
mode: input.mode,
|
||||
statement: input.statement,
|
||||
window: input.window,
|
||||
includeDimensions,
|
||||
includeFacts,
|
||||
factsCursor,
|
||||
factsLimit,
|
||||
cursor,
|
||||
limit
|
||||
}),
|
||||
|
||||
@@ -4,7 +4,6 @@ import type {
|
||||
Filing,
|
||||
FinancialHistoryWindow,
|
||||
FinancialStatementKind,
|
||||
FinancialStatementMode,
|
||||
TaskStatus
|
||||
} from '@/lib/types';
|
||||
import { auth } from '@/lib/auth';
|
||||
@@ -13,8 +12,8 @@ import { asErrorMessage, jsonError } from '@/lib/server/http';
|
||||
import { buildPortfolioSummary } from '@/lib/server/portfolio';
|
||||
import {
|
||||
defaultFinancialSyncLimit,
|
||||
getCompanyFinancialStatements
|
||||
} from '@/lib/server/financial-statements';
|
||||
getCompanyFinancialTaxonomy
|
||||
} from '@/lib/server/financial-taxonomy';
|
||||
import { redactInternalFilingAnalysisFields } from '@/lib/server/api/filing-redaction';
|
||||
import { getFilingByAccession, listFilingsRecords } from '@/lib/server/repos/filings';
|
||||
import {
|
||||
@@ -44,8 +43,7 @@ import {
|
||||
const ALLOWED_STATUSES: TaskStatus[] = ['queued', 'running', 'completed', 'failed'];
|
||||
const FINANCIAL_FORMS: ReadonlySet<Filing['filing_type']> = new Set(['10-K', '10-Q']);
|
||||
const AUTO_FILING_SYNC_LIMIT = 20;
|
||||
const FINANCIALS_V2_ENABLED = process.env.FINANCIALS_V2?.trim().toLowerCase() !== 'false';
|
||||
const FINANCIAL_STATEMENT_MODES: FinancialStatementMode[] = ['standardized', 'filing_faithful'];
|
||||
const FINANCIALS_V3_ENABLED = process.env.FINANCIALS_V3?.trim().toLowerCase() !== 'false';
|
||||
const FINANCIAL_STATEMENT_KINDS: FinancialStatementKind[] = [
|
||||
'income',
|
||||
'balance',
|
||||
@@ -120,12 +118,6 @@ function asTags(value: unknown) {
|
||||
return [...unique];
|
||||
}
|
||||
|
||||
function asStatementMode(value: unknown): FinancialStatementMode {
|
||||
return FINANCIAL_STATEMENT_MODES.includes(value as FinancialStatementMode)
|
||||
? value as FinancialStatementMode
|
||||
: 'standardized';
|
||||
}
|
||||
|
||||
function asStatementKind(value: unknown): FinancialStatementKind {
|
||||
return FINANCIAL_STATEMENT_KINDS.includes(value as FinancialStatementKind)
|
||||
? value as FinancialStatementKind
|
||||
@@ -613,8 +605,8 @@ export const app = new Elysia({ prefix: '/api' })
|
||||
return response;
|
||||
}
|
||||
|
||||
if (!FINANCIALS_V2_ENABLED) {
|
||||
return jsonError('Financial statements v2 is disabled', 404);
|
||||
if (!FINANCIALS_V3_ENABLED) {
|
||||
return jsonError('Financial statements v3 is disabled', 404);
|
||||
}
|
||||
|
||||
const ticker = typeof query.ticker === 'string'
|
||||
@@ -624,26 +616,34 @@ export const app = new Elysia({ prefix: '/api' })
|
||||
return jsonError('ticker is required');
|
||||
}
|
||||
|
||||
const mode = asStatementMode(query.mode);
|
||||
const statement = asStatementKind(query.statement);
|
||||
const window = asHistoryWindow(query.window);
|
||||
const includeDimensions = asBoolean(query.includeDimensions, false);
|
||||
const includeFacts = asBoolean(query.includeFacts, false);
|
||||
const cursor = typeof query.cursor === 'string' && query.cursor.trim().length > 0
|
||||
? query.cursor.trim()
|
||||
: null;
|
||||
const limit = Number.isFinite(Number(query.limit))
|
||||
? Number(query.limit)
|
||||
: undefined;
|
||||
const factsCursor = typeof query.factsCursor === 'string' && query.factsCursor.trim().length > 0
|
||||
? query.factsCursor.trim()
|
||||
: null;
|
||||
const factsLimit = Number.isFinite(Number(query.factsLimit))
|
||||
? Number(query.factsLimit)
|
||||
: undefined;
|
||||
|
||||
let payload = await getCompanyFinancialStatements({
|
||||
let payload = await getCompanyFinancialTaxonomy({
|
||||
ticker,
|
||||
mode,
|
||||
statement,
|
||||
window,
|
||||
includeDimensions,
|
||||
includeFacts,
|
||||
factsCursor,
|
||||
factsLimit,
|
||||
cursor,
|
||||
limit,
|
||||
v2Enabled: FINANCIALS_V2_ENABLED,
|
||||
v3Enabled: FINANCIALS_V3_ENABLED,
|
||||
queuedSync: false
|
||||
});
|
||||
|
||||
@@ -671,7 +671,7 @@ export const app = new Elysia({ prefix: '/api' })
|
||||
});
|
||||
queuedSync = true;
|
||||
} catch (error) {
|
||||
console.error(`[financials-v2-sync] failed for ${ticker}:`, error);
|
||||
console.error(`[financials-v3-sync] failed for ${ticker}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -689,7 +689,6 @@ export const app = new Elysia({ prefix: '/api' })
|
||||
}, {
|
||||
query: t.Object({
|
||||
ticker: t.String({ minLength: 1 }),
|
||||
mode: t.Optional(t.Union([t.Literal('standardized'), t.Literal('filing_faithful')])),
|
||||
statement: t.Optional(t.Union([
|
||||
t.Literal('income'),
|
||||
t.Literal('balance'),
|
||||
@@ -699,8 +698,11 @@ export const app = new Elysia({ prefix: '/api' })
|
||||
])),
|
||||
window: t.Optional(t.Union([t.Literal('10y'), t.Literal('all')])),
|
||||
includeDimensions: t.Optional(t.Union([t.String(), t.Boolean()])),
|
||||
includeFacts: t.Optional(t.Union([t.String(), t.Boolean()])),
|
||||
cursor: t.Optional(t.String()),
|
||||
limit: t.Optional(t.Numeric())
|
||||
limit: t.Optional(t.Numeric()),
|
||||
factsCursor: t.Optional(t.String()),
|
||||
factsLimit: t.Optional(t.Numeric())
|
||||
})
|
||||
})
|
||||
.get('/analysis/reports/:accessionNumber', async ({ params }) => {
|
||||
|
||||
@@ -86,7 +86,8 @@ function applySqlMigrations(client: { exec: (query: string) => void }) {
|
||||
'0001_glossy_statement_snapshots.sql',
|
||||
'0002_workflow_task_projection_metadata.sql',
|
||||
'0003_task_stage_event_timeline.sql',
|
||||
'0004_watchlist_company_taxonomy.sql'
|
||||
'0004_watchlist_company_taxonomy.sql',
|
||||
'0005_financial_taxonomy_v3.sql'
|
||||
];
|
||||
|
||||
for (const file of migrationFiles) {
|
||||
|
||||
@@ -15,6 +15,19 @@ type FilingMetrics = {
|
||||
debt: number | null;
|
||||
};
|
||||
|
||||
type TaxonomyAssetType =
|
||||
| 'instance'
|
||||
| 'schema'
|
||||
| 'presentation'
|
||||
| 'label'
|
||||
| 'calculation'
|
||||
| 'definition'
|
||||
| 'pdf'
|
||||
| 'other';
|
||||
|
||||
type TaxonomyParseStatus = 'ready' | 'partial' | 'failed';
|
||||
type TaxonomyMetricValidationStatus = 'not_run' | 'matched' | 'mismatch' | 'error';
|
||||
|
||||
type FilingAnalysis = {
|
||||
provider?: string;
|
||||
model?: string;
|
||||
@@ -47,6 +60,7 @@ type FilingStatementPeriod = {
|
||||
filingId: number;
|
||||
accessionNumber: string;
|
||||
filingDate: string;
|
||||
periodStart: string | null;
|
||||
periodEnd: string | null;
|
||||
filingType: '10-K' | '10-Q';
|
||||
periodLabel: string;
|
||||
@@ -97,6 +111,55 @@ type DimensionStatementBundle = {
|
||||
statements: Record<FinancialStatementKind, DimensionStatementSnapshotRow[]>;
|
||||
};
|
||||
|
||||
type TaxonomyDimensionMember = {
|
||||
axis: string;
|
||||
member: string;
|
||||
};
|
||||
|
||||
type TaxonomyStatementSnapshotRow = {
|
||||
key: string;
|
||||
label: string;
|
||||
conceptKey: string;
|
||||
qname: string;
|
||||
namespaceUri: string;
|
||||
localName: string;
|
||||
isExtension: boolean;
|
||||
statement: FinancialStatementKind;
|
||||
roleUri: string | null;
|
||||
order: number;
|
||||
depth: number;
|
||||
parentKey: string | null;
|
||||
values: Record<string, number | null>;
|
||||
units: Record<string, string | null>;
|
||||
hasDimensions: boolean;
|
||||
sourceFactIds: number[];
|
||||
};
|
||||
|
||||
type TaxonomyStatementBundle = {
|
||||
periods: FilingStatementPeriod[];
|
||||
statements: Record<FinancialStatementKind, TaxonomyStatementSnapshotRow[]>;
|
||||
};
|
||||
|
||||
type TaxonomyMetricValidationCheck = {
|
||||
metricKey: keyof FilingMetrics;
|
||||
taxonomyValue: number | null;
|
||||
llmValue: number | null;
|
||||
absoluteDiff: number | null;
|
||||
relativeDiff: number | null;
|
||||
status: TaxonomyMetricValidationStatus;
|
||||
evidencePages: number[];
|
||||
pdfUrl: string | null;
|
||||
provider: string | null;
|
||||
model: string | null;
|
||||
error: string | null;
|
||||
};
|
||||
|
||||
type TaxonomyMetricValidationResult = {
|
||||
status: TaxonomyMetricValidationStatus;
|
||||
checks: TaxonomyMetricValidationCheck[];
|
||||
validatedAt: string | null;
|
||||
};
|
||||
|
||||
const authDateColumn = {
|
||||
mode: 'timestamp_ms'
|
||||
} as const;
|
||||
@@ -273,6 +336,121 @@ export const filingStatementSnapshot = sqliteTable('filing_statement_snapshot',
|
||||
filingStatementStatusIndex: index('filing_stmt_status_idx').on(table.parse_status)
|
||||
}));
|
||||
|
||||
export const filingTaxonomySnapshot = sqliteTable('filing_taxonomy_snapshot', {
|
||||
id: integer('id').primaryKey({ autoIncrement: true }),
|
||||
filing_id: integer('filing_id').notNull().references(() => filing.id, { onDelete: 'cascade' }),
|
||||
ticker: text('ticker').notNull(),
|
||||
filing_date: text('filing_date').notNull(),
|
||||
filing_type: text('filing_type').$type<'10-K' | '10-Q'>().notNull(),
|
||||
parse_status: text('parse_status').$type<TaxonomyParseStatus>().notNull(),
|
||||
parse_error: text('parse_error'),
|
||||
source: text('source').$type<'xbrl_instance' | 'xbrl_instance_with_linkbase' | 'legacy_html_fallback'>().notNull(),
|
||||
periods: text('periods', { mode: 'json' }).$type<FilingStatementPeriod[]>(),
|
||||
statement_rows: text('statement_rows', { mode: 'json' }).$type<TaxonomyStatementBundle['statements'] | null>(),
|
||||
derived_metrics: text('derived_metrics', { mode: 'json' }).$type<FilingMetrics | null>(),
|
||||
validation_result: text('validation_result', { mode: 'json' }).$type<TaxonomyMetricValidationResult | null>(),
|
||||
facts_count: integer('facts_count').notNull().default(0),
|
||||
concepts_count: integer('concepts_count').notNull().default(0),
|
||||
dimensions_count: integer('dimensions_count').notNull().default(0),
|
||||
created_at: text('created_at').notNull(),
|
||||
updated_at: text('updated_at').notNull()
|
||||
}, (table) => ({
|
||||
filingTaxonomySnapshotFilingUnique: uniqueIndex('filing_taxonomy_snapshot_filing_uidx').on(table.filing_id),
|
||||
filingTaxonomySnapshotTickerDateIndex: index('filing_taxonomy_snapshot_ticker_date_idx').on(table.ticker, table.filing_date),
|
||||
filingTaxonomySnapshotStatusIndex: index('filing_taxonomy_snapshot_status_idx').on(table.parse_status)
|
||||
}));
|
||||
|
||||
export const filingTaxonomyAsset = sqliteTable('filing_taxonomy_asset', {
|
||||
id: integer('id').primaryKey({ autoIncrement: true }),
|
||||
snapshot_id: integer('snapshot_id').notNull().references(() => filingTaxonomySnapshot.id, { onDelete: 'cascade' }),
|
||||
asset_type: text('asset_type').$type<TaxonomyAssetType>().notNull(),
|
||||
name: text('name').notNull(),
|
||||
url: text('url').notNull(),
|
||||
size_bytes: integer('size_bytes'),
|
||||
score: numeric('score'),
|
||||
is_selected: integer('is_selected', { mode: 'boolean' }).notNull().default(false),
|
||||
created_at: text('created_at').notNull()
|
||||
}, (table) => ({
|
||||
filingTaxonomyAssetSnapshotIndex: index('filing_taxonomy_asset_snapshot_idx').on(table.snapshot_id),
|
||||
filingTaxonomyAssetTypeIndex: index('filing_taxonomy_asset_type_idx').on(table.snapshot_id, table.asset_type)
|
||||
}));
|
||||
|
||||
export const filingTaxonomyConcept = sqliteTable('filing_taxonomy_concept', {
|
||||
id: integer('id').primaryKey({ autoIncrement: true }),
|
||||
snapshot_id: integer('snapshot_id').notNull().references(() => filingTaxonomySnapshot.id, { onDelete: 'cascade' }),
|
||||
concept_key: text('concept_key').notNull(),
|
||||
qname: text('qname').notNull(),
|
||||
namespace_uri: text('namespace_uri').notNull(),
|
||||
local_name: text('local_name').notNull(),
|
||||
label: text('label'),
|
||||
is_extension: integer('is_extension', { mode: 'boolean' }).notNull().default(false),
|
||||
statement_kind: text('statement_kind').$type<FinancialStatementKind>(),
|
||||
role_uri: text('role_uri'),
|
||||
presentation_order: numeric('presentation_order'),
|
||||
presentation_depth: integer('presentation_depth'),
|
||||
parent_concept_key: text('parent_concept_key'),
|
||||
is_abstract: integer('is_abstract', { mode: 'boolean' }).notNull().default(false),
|
||||
created_at: text('created_at').notNull()
|
||||
}, (table) => ({
|
||||
filingTaxonomyConceptSnapshotIndex: index('filing_taxonomy_concept_snapshot_idx').on(table.snapshot_id),
|
||||
filingTaxonomyConceptStatementIndex: index('filing_taxonomy_concept_statement_idx').on(table.snapshot_id, table.statement_kind),
|
||||
filingTaxonomyConceptUnique: uniqueIndex('filing_taxonomy_concept_uidx').on(
|
||||
table.snapshot_id,
|
||||
table.concept_key,
|
||||
table.role_uri,
|
||||
table.presentation_order
|
||||
)
|
||||
}));
|
||||
|
||||
export const filingTaxonomyFact = sqliteTable('filing_taxonomy_fact', {
|
||||
id: integer('id').primaryKey({ autoIncrement: true }),
|
||||
snapshot_id: integer('snapshot_id').notNull().references(() => filingTaxonomySnapshot.id, { onDelete: 'cascade' }),
|
||||
concept_key: text('concept_key').notNull(),
|
||||
qname: text('qname').notNull(),
|
||||
namespace_uri: text('namespace_uri').notNull(),
|
||||
local_name: text('local_name').notNull(),
|
||||
statement_kind: text('statement_kind').$type<FinancialStatementKind>(),
|
||||
role_uri: text('role_uri'),
|
||||
context_id: text('context_id').notNull(),
|
||||
unit: text('unit'),
|
||||
decimals: text('decimals'),
|
||||
value_num: numeric('value_num').notNull(),
|
||||
period_start: text('period_start'),
|
||||
period_end: text('period_end'),
|
||||
period_instant: text('period_instant'),
|
||||
dimensions: text('dimensions', { mode: 'json' }).$type<TaxonomyDimensionMember[]>().notNull(),
|
||||
is_dimensionless: integer('is_dimensionless', { mode: 'boolean' }).notNull().default(true),
|
||||
source_file: text('source_file'),
|
||||
created_at: text('created_at').notNull()
|
||||
}, (table) => ({
|
||||
filingTaxonomyFactSnapshotIndex: index('filing_taxonomy_fact_snapshot_idx').on(table.snapshot_id),
|
||||
filingTaxonomyFactConceptIndex: index('filing_taxonomy_fact_concept_idx').on(table.snapshot_id, table.concept_key),
|
||||
filingTaxonomyFactPeriodIndex: index('filing_taxonomy_fact_period_idx').on(table.snapshot_id, table.period_end, table.period_instant),
|
||||
filingTaxonomyFactStatementIndex: index('filing_taxonomy_fact_statement_idx').on(table.snapshot_id, table.statement_kind)
|
||||
}));
|
||||
|
||||
export const filingTaxonomyMetricValidation = sqliteTable('filing_taxonomy_metric_validation', {
|
||||
id: integer('id').primaryKey({ autoIncrement: true }),
|
||||
snapshot_id: integer('snapshot_id').notNull().references(() => filingTaxonomySnapshot.id, { onDelete: 'cascade' }),
|
||||
metric_key: text('metric_key').$type<keyof FilingMetrics>().notNull(),
|
||||
taxonomy_value: numeric('taxonomy_value'),
|
||||
llm_value: numeric('llm_value'),
|
||||
absolute_diff: numeric('absolute_diff'),
|
||||
relative_diff: numeric('relative_diff'),
|
||||
status: text('status').$type<TaxonomyMetricValidationStatus>().notNull(),
|
||||
evidence_pages: text('evidence_pages', { mode: 'json' }).$type<number[]>().notNull(),
|
||||
pdf_url: text('pdf_url'),
|
||||
provider: text('provider'),
|
||||
model: text('model'),
|
||||
error: text('error'),
|
||||
created_at: text('created_at').notNull(),
|
||||
updated_at: text('updated_at').notNull()
|
||||
}, (table) => ({
|
||||
filingTaxonomyMetricValidationSnapshotIndex: index('filing_taxonomy_metric_validation_snapshot_idx').on(table.snapshot_id),
|
||||
filingTaxonomyMetricValidationStatusIndex: index('filing_taxonomy_metric_validation_status_idx').on(table.snapshot_id, table.status),
|
||||
filingTaxonomyMetricValidationUnique: uniqueIndex('filing_taxonomy_metric_validation_uidx').on(table.snapshot_id, table.metric_key)
|
||||
}));
|
||||
|
||||
export const filingLink = sqliteTable('filing_link', {
|
||||
id: integer('id').primaryKey({ autoIncrement: true }),
|
||||
filing_id: integer('filing_id').notNull().references(() => filing.id, { onDelete: 'cascade' }),
|
||||
@@ -357,6 +535,11 @@ export const appSchema = {
|
||||
holding,
|
||||
filing,
|
||||
filingStatementSnapshot,
|
||||
filingTaxonomySnapshot,
|
||||
filingTaxonomyAsset,
|
||||
filingTaxonomyConcept,
|
||||
filingTaxonomyFact,
|
||||
filingTaxonomyMetricValidation,
|
||||
filingLink,
|
||||
taskRun,
|
||||
taskStageEvent,
|
||||
|
||||
@@ -1,137 +1,7 @@
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
import { __financialStatementsInternals } from './financial-statements';
|
||||
import type { FilingStatementSnapshotRecord } from '@/lib/server/repos/filing-statements';
|
||||
|
||||
function sampleSnapshot(): FilingStatementSnapshotRecord {
|
||||
return {
|
||||
id: 10,
|
||||
filing_id: 44,
|
||||
ticker: 'MSFT',
|
||||
filing_date: '2025-12-31',
|
||||
filing_type: '10-K',
|
||||
period_end: '2025-12-31',
|
||||
statement_bundle: {
|
||||
periods: [
|
||||
{
|
||||
id: '2025-12-31-0001',
|
||||
filingId: 44,
|
||||
accessionNumber: '0001',
|
||||
filingDate: '2025-12-31',
|
||||
periodEnd: '2025-12-31',
|
||||
filingType: '10-K',
|
||||
periodLabel: 'Fiscal Year End'
|
||||
}
|
||||
],
|
||||
statements: {
|
||||
income: [
|
||||
{
|
||||
key: 'revenue-line',
|
||||
label: 'Revenue',
|
||||
concept: 'us-gaap:Revenues',
|
||||
order: 1,
|
||||
depth: 0,
|
||||
isSubtotal: false,
|
||||
values: { '2025-12-31-0001': 120_000 }
|
||||
}
|
||||
],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: []
|
||||
}
|
||||
},
|
||||
standardized_bundle: {
|
||||
periods: [
|
||||
{
|
||||
id: '2025-12-31-0001',
|
||||
filingId: 44,
|
||||
accessionNumber: '0001',
|
||||
filingDate: '2025-12-31',
|
||||
periodEnd: '2025-12-31',
|
||||
filingType: '10-K',
|
||||
periodLabel: 'Fiscal Year End'
|
||||
}
|
||||
],
|
||||
statements: {
|
||||
income: [
|
||||
{
|
||||
key: 'revenue',
|
||||
label: 'Revenue',
|
||||
concept: 'us-gaap:Revenues',
|
||||
category: 'core',
|
||||
sourceConcepts: ['us-gaap:Revenues'],
|
||||
values: { '2025-12-31-0001': 120_000 }
|
||||
}
|
||||
],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: []
|
||||
}
|
||||
},
|
||||
dimension_bundle: {
|
||||
statements: {
|
||||
income: [
|
||||
{
|
||||
rowKey: 'revenue-line',
|
||||
concept: 'us-gaap:Revenues',
|
||||
periodId: '2025-12-31-0001',
|
||||
axis: 'srt:StatementBusinessSegmentsAxis',
|
||||
member: 'acme:CloudMember',
|
||||
value: 55_000,
|
||||
unit: 'USD'
|
||||
}
|
||||
],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: []
|
||||
}
|
||||
},
|
||||
parse_status: 'ready',
|
||||
parse_error: null,
|
||||
source: 'sec_filing_summary',
|
||||
created_at: '2026-01-01T00:00:00.000Z',
|
||||
updated_at: '2026-01-01T00:00:00.000Z'
|
||||
};
|
||||
}
|
||||
|
||||
describe('financial statements service internals', () => {
|
||||
it('builds sorted periods for selected mode/statement', () => {
|
||||
const snapshot = sampleSnapshot();
|
||||
|
||||
const periods = __financialStatementsInternals.buildPeriods(
|
||||
[snapshot],
|
||||
'standardized',
|
||||
'income'
|
||||
);
|
||||
|
||||
expect(periods.length).toBe(1);
|
||||
expect(periods[0]?.id).toBe('2025-12-31-0001');
|
||||
});
|
||||
|
||||
it('builds standardized rows and includes dimensions when requested', () => {
|
||||
const snapshot = sampleSnapshot();
|
||||
const periods = __financialStatementsInternals.buildPeriods(
|
||||
[snapshot],
|
||||
'standardized',
|
||||
'income'
|
||||
);
|
||||
|
||||
const result = __financialStatementsInternals.buildRows(
|
||||
[snapshot],
|
||||
periods,
|
||||
'standardized',
|
||||
'income',
|
||||
true
|
||||
);
|
||||
|
||||
expect(result.rows.length).toBe(1);
|
||||
expect(result.rows[0]?.hasDimensions).toBe(true);
|
||||
expect(result.dimensions).not.toBeNull();
|
||||
expect(result.dimensions?.['revenue-line']?.length).toBe(1);
|
||||
});
|
||||
|
||||
it('returns default sync limits by window', () => {
|
||||
expect(__financialStatementsInternals.defaultFinancialSyncLimit('10y')).toBe(60);
|
||||
expect(__financialStatementsInternals.defaultFinancialSyncLimit('all')).toBe(120);
|
||||
|
||||
@@ -1,315 +1,48 @@
|
||||
import type {
|
||||
CompanyFinancialStatementsResponse,
|
||||
DimensionBreakdownRow,
|
||||
FilingFaithfulStatementRow,
|
||||
FinancialHistoryWindow,
|
||||
FinancialStatementKind,
|
||||
FinancialStatementMode,
|
||||
FinancialStatementPeriod,
|
||||
StandardizedStatementRow
|
||||
FinancialStatementKind
|
||||
} from '@/lib/types';
|
||||
import { listFilingsRecords } from '@/lib/server/repos/filings';
|
||||
import {
|
||||
countFilingStatementSnapshotStatuses,
|
||||
type DimensionStatementSnapshotRow,
|
||||
type FilingFaithfulStatementSnapshotRow,
|
||||
type FilingStatementSnapshotRecord,
|
||||
listFilingStatementSnapshotsByTicker,
|
||||
type StandardizedStatementSnapshotRow
|
||||
} from '@/lib/server/repos/filing-statements';
|
||||
defaultFinancialSyncLimit,
|
||||
getCompanyFinancialTaxonomy
|
||||
} from '@/lib/server/financial-taxonomy';
|
||||
|
||||
type GetCompanyFinancialStatementsInput = {
|
||||
ticker: string;
|
||||
mode: FinancialStatementMode;
|
||||
statement: FinancialStatementKind;
|
||||
window: FinancialHistoryWindow;
|
||||
includeDimensions: boolean;
|
||||
includeFacts?: boolean;
|
||||
factsCursor?: string | null;
|
||||
factsLimit?: number;
|
||||
cursor?: string | null;
|
||||
limit?: number;
|
||||
v2Enabled: boolean;
|
||||
v2Enabled?: boolean;
|
||||
v3Enabled?: boolean;
|
||||
queuedSync: boolean;
|
||||
};
|
||||
|
||||
type FinancialStatementRowByMode = StandardizedStatementRow | FilingFaithfulStatementRow;
|
||||
|
||||
function safeTicker(input: string) {
|
||||
return input.trim().toUpperCase();
|
||||
}
|
||||
|
||||
function isFinancialForm(type: string): type is '10-K' | '10-Q' {
|
||||
return type === '10-K' || type === '10-Q';
|
||||
}
|
||||
|
||||
function rowDimensionMatcher(row: { key: string; concept: string | null }, item: DimensionStatementSnapshotRow) {
|
||||
const concept = row.concept?.toLowerCase() ?? '';
|
||||
const itemConcept = item.concept?.toLowerCase() ?? '';
|
||||
if (item.rowKey === row.key) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return Boolean(concept && itemConcept && concept === itemConcept);
|
||||
}
|
||||
|
||||
function periodSorter(left: FinancialStatementPeriod, right: FinancialStatementPeriod) {
|
||||
const byDate = Date.parse(left.filingDate) - Date.parse(right.filingDate);
|
||||
if (Number.isFinite(byDate) && byDate !== 0) {
|
||||
return byDate;
|
||||
}
|
||||
|
||||
return left.id.localeCompare(right.id);
|
||||
}
|
||||
|
||||
function resolveDimensionPeriodId(rawPeriodId: string, periods: FinancialStatementPeriod[]) {
|
||||
const exact = periods.find((period) => period.id === rawPeriodId);
|
||||
if (exact) {
|
||||
return exact.id;
|
||||
}
|
||||
|
||||
const byDate = periods.find((period) => period.filingDate === rawPeriodId || period.periodEnd === rawPeriodId);
|
||||
return byDate?.id ?? null;
|
||||
}
|
||||
|
||||
function getRowsForSnapshot(
|
||||
snapshot: FilingStatementSnapshotRecord,
|
||||
mode: FinancialStatementMode,
|
||||
statement: FinancialStatementKind
|
||||
) {
|
||||
if (mode === 'standardized') {
|
||||
return snapshot.standardized_bundle?.statements?.[statement] ?? [];
|
||||
}
|
||||
|
||||
return snapshot.statement_bundle?.statements?.[statement] ?? [];
|
||||
}
|
||||
|
||||
function buildPeriods(
|
||||
snapshots: FilingStatementSnapshotRecord[],
|
||||
mode: FinancialStatementMode,
|
||||
statement: FinancialStatementKind
|
||||
) {
|
||||
const map = new Map<string, FinancialStatementPeriod>();
|
||||
|
||||
for (const snapshot of snapshots) {
|
||||
const rows = getRowsForSnapshot(snapshot, mode, statement);
|
||||
if (rows.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const sourcePeriods = mode === 'standardized'
|
||||
? snapshot.standardized_bundle?.periods
|
||||
: snapshot.statement_bundle?.periods;
|
||||
|
||||
for (const period of sourcePeriods ?? []) {
|
||||
if (!map.has(period.id)) {
|
||||
map.set(period.id, {
|
||||
id: period.id,
|
||||
filingId: period.filingId,
|
||||
accessionNumber: period.accessionNumber,
|
||||
filingDate: period.filingDate,
|
||||
periodEnd: period.periodEnd,
|
||||
filingType: period.filingType,
|
||||
periodLabel: period.periodLabel
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return [...map.values()].sort(periodSorter);
|
||||
}
|
||||
|
||||
function buildRows(
|
||||
snapshots: FilingStatementSnapshotRecord[],
|
||||
periods: FinancialStatementPeriod[],
|
||||
mode: FinancialStatementMode,
|
||||
statement: FinancialStatementKind,
|
||||
includeDimensions: boolean
|
||||
) {
|
||||
const rowMap = new Map<string, FinancialStatementRowByMode>();
|
||||
const dimensionMap = includeDimensions
|
||||
? new Map<string, DimensionBreakdownRow[]>()
|
||||
: null;
|
||||
|
||||
for (const snapshot of snapshots) {
|
||||
const rows = getRowsForSnapshot(snapshot, mode, statement);
|
||||
const dimensions = snapshot.dimension_bundle?.statements?.[statement] ?? [];
|
||||
|
||||
if (mode === 'standardized') {
|
||||
for (const sourceRow of rows as StandardizedStatementSnapshotRow[]) {
|
||||
const existing = rowMap.get(sourceRow.key) as StandardizedStatementRow | undefined;
|
||||
const hasDimensions = dimensions.some((item) => rowDimensionMatcher(sourceRow, item));
|
||||
|
||||
if (!existing) {
|
||||
rowMap.set(sourceRow.key, {
|
||||
key: sourceRow.key,
|
||||
label: sourceRow.label,
|
||||
concept: sourceRow.concept,
|
||||
category: sourceRow.category,
|
||||
sourceConcepts: [...sourceRow.sourceConcepts],
|
||||
values: { ...sourceRow.values },
|
||||
hasDimensions
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
existing.hasDimensions = existing.hasDimensions || hasDimensions;
|
||||
for (const concept of sourceRow.sourceConcepts) {
|
||||
if (!existing.sourceConcepts.includes(concept)) {
|
||||
existing.sourceConcepts.push(concept);
|
||||
}
|
||||
}
|
||||
|
||||
for (const [periodId, value] of Object.entries(sourceRow.values)) {
|
||||
if (!(periodId in existing.values)) {
|
||||
existing.values[periodId] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (const sourceRow of rows as FilingFaithfulStatementSnapshotRow[]) {
|
||||
const rowKey = sourceRow.concept ? `concept-${sourceRow.concept.toLowerCase()}` : `label-${sourceRow.key}`;
|
||||
const existing = rowMap.get(rowKey) as FilingFaithfulStatementRow | undefined;
|
||||
const hasDimensions = dimensions.some((item) => rowDimensionMatcher(sourceRow, item));
|
||||
|
||||
if (!existing) {
|
||||
rowMap.set(rowKey, {
|
||||
key: rowKey,
|
||||
label: sourceRow.label,
|
||||
concept: sourceRow.concept,
|
||||
order: sourceRow.order,
|
||||
depth: sourceRow.depth,
|
||||
isSubtotal: sourceRow.isSubtotal,
|
||||
values: { ...sourceRow.values },
|
||||
hasDimensions
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
existing.hasDimensions = existing.hasDimensions || hasDimensions;
|
||||
existing.order = Math.min(existing.order, sourceRow.order);
|
||||
existing.depth = Math.min(existing.depth, sourceRow.depth);
|
||||
existing.isSubtotal = existing.isSubtotal || sourceRow.isSubtotal;
|
||||
for (const [periodId, value] of Object.entries(sourceRow.values)) {
|
||||
if (!(periodId in existing.values)) {
|
||||
existing.values[periodId] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dimensionMap) {
|
||||
for (const item of dimensions) {
|
||||
const periodId = resolveDimensionPeriodId(item.periodId, periods);
|
||||
if (!periodId) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const entry: DimensionBreakdownRow = {
|
||||
rowKey: item.rowKey,
|
||||
concept: item.concept,
|
||||
periodId,
|
||||
axis: item.axis,
|
||||
member: item.member,
|
||||
value: item.value,
|
||||
unit: item.unit
|
||||
};
|
||||
|
||||
const group = dimensionMap.get(item.rowKey);
|
||||
if (group) {
|
||||
group.push(entry);
|
||||
} else {
|
||||
dimensionMap.set(item.rowKey, [entry]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const rows = [...rowMap.values()].sort((a, b) => {
|
||||
const left = mode === 'standardized' ? a.label : `${(a as FilingFaithfulStatementRow).order.toString().padStart(5, '0')}::${a.label}`;
|
||||
const right = mode === 'standardized' ? b.label : `${(b as FilingFaithfulStatementRow).order.toString().padStart(5, '0')}::${b.label}`;
|
||||
return left.localeCompare(right);
|
||||
});
|
||||
|
||||
if (mode === 'standardized') {
|
||||
const standardized = rows as StandardizedStatementRow[];
|
||||
const core = standardized.filter((row) => row.category === 'core');
|
||||
const nonCore = standardized.filter((row) => row.category !== 'core');
|
||||
const orderedRows = [...core, ...nonCore];
|
||||
|
||||
return {
|
||||
rows: orderedRows,
|
||||
dimensions: dimensionMap ? Object.fromEntries(dimensionMap.entries()) : null
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
rows: rows as FilingFaithfulStatementRow[],
|
||||
dimensions: dimensionMap ? Object.fromEntries(dimensionMap.entries()) : null
|
||||
};
|
||||
}
|
||||
|
||||
export function defaultFinancialSyncLimit(window: FinancialHistoryWindow) {
|
||||
return window === 'all' ? 120 : 60;
|
||||
}
|
||||
|
||||
export async function getCompanyFinancialStatements(input: GetCompanyFinancialStatementsInput): Promise<CompanyFinancialStatementsResponse> {
|
||||
const ticker = safeTicker(input.ticker);
|
||||
const snapshotResult = await listFilingStatementSnapshotsByTicker({
|
||||
ticker,
|
||||
window: input.window,
|
||||
limit: input.limit,
|
||||
cursor: input.cursor
|
||||
});
|
||||
|
||||
const statuses = await countFilingStatementSnapshotStatuses(ticker);
|
||||
const filings = await listFilingsRecords({
|
||||
ticker,
|
||||
limit: input.window === 'all' ? 250 : 120
|
||||
});
|
||||
|
||||
const financialFilings = filings.filter((filing) => isFinancialForm(filing.filing_type));
|
||||
const periods = buildPeriods(snapshotResult.snapshots, input.mode, input.statement);
|
||||
const rowResult = buildRows(
|
||||
snapshotResult.snapshots,
|
||||
periods,
|
||||
input.mode,
|
||||
input.statement,
|
||||
input.includeDimensions
|
||||
);
|
||||
|
||||
const latestFiling = filings[0] ?? null;
|
||||
|
||||
return {
|
||||
company: {
|
||||
ticker,
|
||||
companyName: latestFiling?.company_name ?? ticker,
|
||||
cik: latestFiling?.cik ?? null
|
||||
},
|
||||
mode: input.mode,
|
||||
export async function getCompanyFinancialStatements(
|
||||
input: GetCompanyFinancialStatementsInput
|
||||
): Promise<CompanyFinancialStatementsResponse> {
|
||||
return await getCompanyFinancialTaxonomy({
|
||||
ticker: input.ticker,
|
||||
statement: input.statement,
|
||||
window: input.window,
|
||||
periods,
|
||||
rows: rowResult.rows,
|
||||
nextCursor: snapshotResult.nextCursor,
|
||||
coverage: {
|
||||
filings: periods.length,
|
||||
rows: rowResult.rows.length,
|
||||
dimensions: rowResult.dimensions
|
||||
? Object.values(rowResult.dimensions).reduce((total, rows) => total + rows.length, 0)
|
||||
: 0
|
||||
},
|
||||
dataSourceStatus: {
|
||||
enabled: input.v2Enabled,
|
||||
hydratedFilings: statuses.ready,
|
||||
partialFilings: statuses.partial,
|
||||
failedFilings: statuses.failed,
|
||||
pendingFilings: Math.max(0, financialFilings.length - statuses.ready - statuses.partial - statuses.failed),
|
||||
queuedSync: input.queuedSync
|
||||
},
|
||||
dimensionBreakdown: rowResult.dimensions
|
||||
};
|
||||
includeDimensions: input.includeDimensions,
|
||||
includeFacts: input.includeFacts ?? false,
|
||||
factsCursor: input.factsCursor,
|
||||
factsLimit: input.factsLimit,
|
||||
cursor: input.cursor,
|
||||
limit: input.limit,
|
||||
v3Enabled: input.v3Enabled ?? input.v2Enabled ?? true,
|
||||
queuedSync: input.queuedSync
|
||||
});
|
||||
}
|
||||
|
||||
export { defaultFinancialSyncLimit };
|
||||
|
||||
export const __financialStatementsInternals = {
|
||||
buildPeriods,
|
||||
buildRows,
|
||||
defaultFinancialSyncLimit
|
||||
};
|
||||
|
||||
142
lib/server/financial-taxonomy.test.ts
Normal file
142
lib/server/financial-taxonomy.test.ts
Normal file
@@ -0,0 +1,142 @@
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
import { __financialTaxonomyInternals } from './financial-taxonomy';
|
||||
import type { FilingTaxonomySnapshotRecord } from './repos/filing-taxonomy';
|
||||
import type { FinancialStatementKind, TaxonomyStatementRow } from '@/lib/types';
|
||||
|
||||
function createRow(periodIds: string[]): TaxonomyStatementRow {
|
||||
return {
|
||||
key: 'us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax',
|
||||
label: 'Revenue From Contract With Customer Excluding Assessed Tax',
|
||||
conceptKey: 'us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax',
|
||||
qname: 'us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax',
|
||||
namespaceUri: 'http://fasb.org/us-gaap/2021-01-31',
|
||||
localName: 'RevenueFromContractWithCustomerExcludingAssessedTax',
|
||||
isExtension: false,
|
||||
statement: 'income',
|
||||
roleUri: 'income',
|
||||
order: 1,
|
||||
depth: 0,
|
||||
parentKey: null,
|
||||
values: Object.fromEntries(periodIds.map((periodId, index) => [periodId, 100 + index])),
|
||||
units: Object.fromEntries(periodIds.map((periodId) => [periodId, 'iso4217:USD'])),
|
||||
hasDimensions: false,
|
||||
sourceFactIds: periodIds.map((_, index) => index + 1)
|
||||
};
|
||||
}
|
||||
|
||||
function createSnapshot(input: {
|
||||
filingId: number;
|
||||
filingType: '10-K' | '10-Q';
|
||||
filingDate: string;
|
||||
periods: Array<{
|
||||
id: string;
|
||||
periodStart: string | null;
|
||||
periodEnd: string;
|
||||
periodLabel: string;
|
||||
}>;
|
||||
statement: FinancialStatementKind;
|
||||
}) {
|
||||
const row = createRow(input.periods.map((period) => period.id));
|
||||
|
||||
return {
|
||||
id: input.filingId,
|
||||
filing_id: input.filingId,
|
||||
ticker: 'MSFT',
|
||||
filing_date: input.filingDate,
|
||||
filing_type: input.filingType,
|
||||
parse_status: 'ready',
|
||||
parse_error: null,
|
||||
source: 'xbrl_instance',
|
||||
periods: input.periods.map((period) => ({
|
||||
id: period.id,
|
||||
filingId: input.filingId,
|
||||
accessionNumber: `0000-${input.filingId}`,
|
||||
filingDate: input.filingDate,
|
||||
periodStart: period.periodStart,
|
||||
periodEnd: period.periodEnd,
|
||||
filingType: input.filingType,
|
||||
periodLabel: period.periodLabel
|
||||
})),
|
||||
statement_rows: {
|
||||
income: input.statement === 'income' ? [row] : [],
|
||||
balance: input.statement === 'balance' ? [{ ...row, statement: 'balance' }] : [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: []
|
||||
},
|
||||
derived_metrics: null,
|
||||
validation_result: null,
|
||||
facts_count: 0,
|
||||
concepts_count: 0,
|
||||
dimensions_count: 0,
|
||||
created_at: input.filingDate,
|
||||
updated_at: input.filingDate
|
||||
} satisfies FilingTaxonomySnapshotRecord;
|
||||
}
|
||||
|
||||
describe('financial taxonomy internals', () => {
|
||||
it('selects the primary quarter duration for 10-Q income statements', () => {
|
||||
const snapshot = createSnapshot({
|
||||
filingId: 1,
|
||||
filingType: '10-Q',
|
||||
filingDate: '2026-01-28',
|
||||
statement: 'income',
|
||||
periods: [
|
||||
{ id: 'instant', periodStart: null, periodEnd: '2025-12-31', periodLabel: 'Instant' },
|
||||
{ id: 'quarter', periodStart: '2025-10-01', periodEnd: '2025-12-31', periodLabel: '2025-10-01 to 2025-12-31' },
|
||||
{ id: 'ytd', periodStart: '2025-07-01', periodEnd: '2025-12-31', periodLabel: '2025-07-01 to 2025-12-31' }
|
||||
]
|
||||
});
|
||||
|
||||
const selection = __financialTaxonomyInternals.selectPrimaryPeriods([snapshot], 'income');
|
||||
|
||||
expect(selection.periods).toHaveLength(1);
|
||||
expect(selection.periods[0]?.id).toBe('quarter');
|
||||
});
|
||||
|
||||
it('selects the latest instant for balance sheets', () => {
|
||||
const snapshot = createSnapshot({
|
||||
filingId: 2,
|
||||
filingType: '10-K',
|
||||
filingDate: '2025-07-30',
|
||||
statement: 'balance',
|
||||
periods: [
|
||||
{ id: 'prior', periodStart: null, periodEnd: '2024-06-30', periodLabel: 'Instant' },
|
||||
{ id: 'current', periodStart: null, periodEnd: '2025-06-30', periodLabel: 'Instant' }
|
||||
]
|
||||
});
|
||||
|
||||
const selection = __financialTaxonomyInternals.selectPrimaryPeriods([snapshot], 'balance');
|
||||
|
||||
expect(selection.periods).toHaveLength(1);
|
||||
expect(selection.periods[0]?.id).toBe('current');
|
||||
});
|
||||
|
||||
it('builds one reporting period per filing for the selected statement', () => {
|
||||
const annual = createSnapshot({
|
||||
filingId: 10,
|
||||
filingType: '10-K',
|
||||
filingDate: '2025-07-30',
|
||||
statement: 'income',
|
||||
periods: [
|
||||
{ id: 'annual', periodStart: '2024-07-01', periodEnd: '2025-06-30', periodLabel: '2024-07-01 to 2025-06-30' },
|
||||
{ id: 'quarter', periodStart: '2025-04-01', periodEnd: '2025-06-30', periodLabel: '2025-04-01 to 2025-06-30' }
|
||||
]
|
||||
});
|
||||
const quarterly = createSnapshot({
|
||||
filingId: 11,
|
||||
filingType: '10-Q',
|
||||
filingDate: '2025-10-29',
|
||||
statement: 'income',
|
||||
periods: [
|
||||
{ id: 'instant', periodStart: null, periodEnd: '2025-09-30', periodLabel: 'Instant' },
|
||||
{ id: 'quarter', periodStart: '2025-07-01', periodEnd: '2025-09-30', periodLabel: '2025-07-01 to 2025-09-30' },
|
||||
{ id: 'ytd', periodStart: '2025-01-01', periodEnd: '2025-09-30', periodLabel: '2025-01-01 to 2025-09-30' }
|
||||
]
|
||||
});
|
||||
|
||||
const periods = __financialTaxonomyInternals.buildPeriods([annual, quarterly], 'income');
|
||||
|
||||
expect(periods.map((period) => period.id)).toEqual(['annual', 'quarter']);
|
||||
});
|
||||
});
|
||||
384
lib/server/financial-taxonomy.ts
Normal file
384
lib/server/financial-taxonomy.ts
Normal file
@@ -0,0 +1,384 @@
|
||||
import type {
|
||||
CompanyFinancialStatementsResponse,
|
||||
DimensionBreakdownRow,
|
||||
FinancialHistoryWindow,
|
||||
FinancialStatementKind,
|
||||
FinancialStatementPeriod,
|
||||
TaxonomyStatementRow
|
||||
} from '@/lib/types';
|
||||
import { listFilingsRecords } from '@/lib/server/repos/filings';
|
||||
import {
|
||||
countFilingTaxonomySnapshotStatuses,
|
||||
listFilingTaxonomySnapshotsByTicker,
|
||||
listTaxonomyFactsByTicker,
|
||||
type FilingTaxonomySnapshotRecord
|
||||
} from '@/lib/server/repos/filing-taxonomy';
|
||||
|
||||
type GetCompanyFinancialTaxonomyInput = {
|
||||
ticker: string;
|
||||
statement: FinancialStatementKind;
|
||||
window: FinancialHistoryWindow;
|
||||
includeDimensions: boolean;
|
||||
includeFacts: boolean;
|
||||
factsCursor?: string | null;
|
||||
factsLimit?: number;
|
||||
cursor?: string | null;
|
||||
limit?: number;
|
||||
v3Enabled: boolean;
|
||||
queuedSync: boolean;
|
||||
};
|
||||
|
||||
function safeTicker(input: string) {
|
||||
return input.trim().toUpperCase();
|
||||
}
|
||||
|
||||
function isFinancialForm(type: string): type is '10-K' | '10-Q' {
|
||||
return type === '10-K' || type === '10-Q';
|
||||
}
|
||||
|
||||
function parseEpoch(value: string | null) {
|
||||
if (!value) {
|
||||
return Number.NaN;
|
||||
}
|
||||
|
||||
return Date.parse(value);
|
||||
}
|
||||
|
||||
function periodSorter(left: FinancialStatementPeriod, right: FinancialStatementPeriod) {
|
||||
const leftDate = parseEpoch(left.periodEnd ?? left.filingDate);
|
||||
const rightDate = parseEpoch(right.periodEnd ?? right.filingDate);
|
||||
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
|
||||
return leftDate - rightDate;
|
||||
}
|
||||
|
||||
return left.id.localeCompare(right.id);
|
||||
}
|
||||
|
||||
function isInstantPeriod(period: FinancialStatementPeriod) {
|
||||
return period.periodStart === null;
|
||||
}
|
||||
|
||||
function periodDurationDays(period: FinancialStatementPeriod) {
|
||||
if (!period.periodStart || !period.periodEnd) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const start = Date.parse(period.periodStart);
|
||||
const end = Date.parse(period.periodEnd);
|
||||
if (!Number.isFinite(start) || !Number.isFinite(end) || end < start) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return Math.round((end - start) / 86_400_000) + 1;
|
||||
}
|
||||
|
||||
function preferredDurationDays(filingType: FinancialStatementPeriod['filingType']) {
|
||||
return filingType === '10-K' ? 365 : 90;
|
||||
}
|
||||
|
||||
function selectPrimaryPeriods(
|
||||
snapshots: FilingTaxonomySnapshotRecord[],
|
||||
statement: FinancialStatementKind
|
||||
) {
|
||||
const selectedByFilingId = new Map<number, FinancialStatementPeriod>();
|
||||
|
||||
for (const snapshot of snapshots) {
|
||||
const rows = snapshot.statement_rows?.[statement] ?? [];
|
||||
if (rows.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const usedPeriodIds = new Set<string>();
|
||||
for (const row of rows) {
|
||||
for (const periodId of Object.keys(row.values)) {
|
||||
usedPeriodIds.add(periodId);
|
||||
}
|
||||
}
|
||||
|
||||
const candidates = (snapshot.periods ?? []).filter((period) => usedPeriodIds.has(period.id));
|
||||
if (candidates.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const selected = (() => {
|
||||
if (statement === 'balance') {
|
||||
const instantCandidates = candidates.filter(isInstantPeriod);
|
||||
return (instantCandidates.length > 0 ? instantCandidates : candidates)
|
||||
.sort((left, right) => periodSorter(right, left))[0] ?? null;
|
||||
}
|
||||
|
||||
const durationCandidates = candidates.filter((period) => !isInstantPeriod(period));
|
||||
if (durationCandidates.length === 0) {
|
||||
return candidates.sort((left, right) => periodSorter(right, left))[0] ?? null;
|
||||
}
|
||||
|
||||
const targetDays = preferredDurationDays(snapshot.filing_type);
|
||||
return durationCandidates.sort((left, right) => {
|
||||
const leftDate = parseEpoch(left.periodEnd ?? left.filingDate);
|
||||
const rightDate = parseEpoch(right.periodEnd ?? right.filingDate);
|
||||
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
|
||||
return rightDate - leftDate;
|
||||
}
|
||||
|
||||
const leftDistance = Math.abs((periodDurationDays(left) ?? targetDays) - targetDays);
|
||||
const rightDistance = Math.abs((periodDurationDays(right) ?? targetDays) - targetDays);
|
||||
if (leftDistance !== rightDistance) {
|
||||
return leftDistance - rightDistance;
|
||||
}
|
||||
|
||||
return left.id.localeCompare(right.id);
|
||||
})[0] ?? null;
|
||||
})();
|
||||
|
||||
if (selected) {
|
||||
selectedByFilingId.set(selected.filingId, selected);
|
||||
}
|
||||
}
|
||||
|
||||
const periods = [...selectedByFilingId.values()].sort(periodSorter);
|
||||
return {
|
||||
periods,
|
||||
selectedPeriodIds: new Set(periods.map((period) => period.id)),
|
||||
periodByFilingId: new Map(periods.map((period) => [period.filingId, period]))
|
||||
};
|
||||
}
|
||||
|
||||
function buildPeriods(
|
||||
snapshots: FilingTaxonomySnapshotRecord[],
|
||||
statement: FinancialStatementKind
|
||||
) {
|
||||
return selectPrimaryPeriods(snapshots, statement).periods;
|
||||
}
|
||||
|
||||
function buildRows(
|
||||
snapshots: FilingTaxonomySnapshotRecord[],
|
||||
statement: FinancialStatementKind,
|
||||
selectedPeriodIds: Set<string>
|
||||
) {
|
||||
const rowMap = new Map<string, TaxonomyStatementRow>();
|
||||
|
||||
for (const snapshot of snapshots) {
|
||||
const rows = snapshot.statement_rows?.[statement] ?? [];
|
||||
|
||||
for (const row of rows) {
|
||||
const existing = rowMap.get(row.key);
|
||||
if (!existing) {
|
||||
rowMap.set(row.key, {
|
||||
...row,
|
||||
values: Object.fromEntries(
|
||||
Object.entries(row.values).filter(([periodId]) => selectedPeriodIds.has(periodId))
|
||||
),
|
||||
units: Object.fromEntries(
|
||||
Object.entries(row.units).filter(([periodId]) => selectedPeriodIds.has(periodId))
|
||||
),
|
||||
sourceFactIds: [...row.sourceFactIds]
|
||||
});
|
||||
if (Object.keys(rowMap.get(row.key)?.values ?? {}).length === 0) {
|
||||
rowMap.delete(row.key);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
existing.hasDimensions = existing.hasDimensions || row.hasDimensions;
|
||||
existing.order = Math.min(existing.order, row.order);
|
||||
existing.depth = Math.min(existing.depth, row.depth);
|
||||
if (!existing.parentKey && row.parentKey) {
|
||||
existing.parentKey = row.parentKey;
|
||||
}
|
||||
|
||||
for (const [periodId, value] of Object.entries(row.values)) {
|
||||
if (selectedPeriodIds.has(periodId) && !(periodId in existing.values)) {
|
||||
existing.values[periodId] = value;
|
||||
}
|
||||
}
|
||||
|
||||
for (const [periodId, unit] of Object.entries(row.units)) {
|
||||
if (selectedPeriodIds.has(periodId) && !(periodId in existing.units)) {
|
||||
existing.units[periodId] = unit;
|
||||
}
|
||||
}
|
||||
|
||||
for (const factId of row.sourceFactIds) {
|
||||
if (!existing.sourceFactIds.includes(factId)) {
|
||||
existing.sourceFactIds.push(factId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return [...rowMap.values()].sort((left, right) => {
|
||||
if (left.order !== right.order) {
|
||||
return left.order - right.order;
|
||||
}
|
||||
|
||||
return left.label.localeCompare(right.label);
|
||||
});
|
||||
}
|
||||
|
||||
function buildDimensionBreakdown(
|
||||
facts: Awaited<ReturnType<typeof listTaxonomyFactsByTicker>>['facts'],
|
||||
periods: FinancialStatementPeriod[]
|
||||
) {
|
||||
const periodByFilingId = new Map<number, FinancialStatementPeriod>();
|
||||
for (const period of periods) {
|
||||
periodByFilingId.set(period.filingId, period);
|
||||
}
|
||||
|
||||
const map = new Map<string, DimensionBreakdownRow[]>();
|
||||
|
||||
for (const fact of facts) {
|
||||
if (fact.dimensions.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const period = periodByFilingId.get(fact.filingId) ?? null;
|
||||
if (!period) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const matchesPeriod = period.periodStart
|
||||
? fact.periodStart === period.periodStart && fact.periodEnd === period.periodEnd
|
||||
: (fact.periodInstant ?? fact.periodEnd) === period.periodEnd;
|
||||
|
||||
if (!matchesPeriod) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const dimension of fact.dimensions) {
|
||||
const row: DimensionBreakdownRow = {
|
||||
rowKey: fact.conceptKey,
|
||||
concept: fact.qname,
|
||||
periodId: period.id,
|
||||
axis: dimension.axis,
|
||||
member: dimension.member,
|
||||
value: fact.value,
|
||||
unit: fact.unit
|
||||
};
|
||||
|
||||
const existing = map.get(fact.conceptKey);
|
||||
if (existing) {
|
||||
existing.push(row);
|
||||
} else {
|
||||
map.set(fact.conceptKey, [row]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return map.size > 0 ? Object.fromEntries(map.entries()) : null;
|
||||
}
|
||||
|
||||
function latestMetrics(snapshots: FilingTaxonomySnapshotRecord[]) {
|
||||
for (const snapshot of snapshots) {
|
||||
if (snapshot.derived_metrics) {
|
||||
return {
|
||||
taxonomy: snapshot.derived_metrics,
|
||||
validation: snapshot.validation_result
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
taxonomy: null,
|
||||
validation: null
|
||||
};
|
||||
}
|
||||
|
||||
export function defaultFinancialSyncLimit(window: FinancialHistoryWindow) {
|
||||
return window === 'all' ? 120 : 60;
|
||||
}
|
||||
|
||||
export async function getCompanyFinancialTaxonomy(input: GetCompanyFinancialTaxonomyInput): Promise<CompanyFinancialStatementsResponse> {
|
||||
const ticker = safeTicker(input.ticker);
|
||||
const snapshotResult = await listFilingTaxonomySnapshotsByTicker({
|
||||
ticker,
|
||||
window: input.window,
|
||||
limit: input.limit,
|
||||
cursor: input.cursor
|
||||
});
|
||||
|
||||
const statuses = await countFilingTaxonomySnapshotStatuses(ticker);
|
||||
const filings = await listFilingsRecords({
|
||||
ticker,
|
||||
limit: input.window === 'all' ? 250 : 120
|
||||
});
|
||||
|
||||
const financialFilings = filings.filter((filing) => isFinancialForm(filing.filing_type));
|
||||
const selection = selectPrimaryPeriods(snapshotResult.snapshots, input.statement);
|
||||
const periods = selection.periods;
|
||||
const rows = buildRows(snapshotResult.snapshots, input.statement, selection.selectedPeriodIds);
|
||||
|
||||
const factsResult = input.includeFacts
|
||||
? await listTaxonomyFactsByTicker({
|
||||
ticker,
|
||||
window: input.window,
|
||||
statement: input.statement,
|
||||
cursor: input.factsCursor,
|
||||
limit: input.factsLimit
|
||||
})
|
||||
: { facts: [], nextCursor: null };
|
||||
|
||||
const dimensionFacts = input.includeDimensions
|
||||
? await listTaxonomyFactsByTicker({
|
||||
ticker,
|
||||
window: input.window,
|
||||
statement: input.statement,
|
||||
limit: 1200
|
||||
})
|
||||
: { facts: [], nextCursor: null };
|
||||
|
||||
const latestFiling = filings[0] ?? null;
|
||||
const metrics = latestMetrics(snapshotResult.snapshots);
|
||||
const dimensionBreakdown = input.includeDimensions
|
||||
? buildDimensionBreakdown(dimensionFacts.facts, periods)
|
||||
: null;
|
||||
|
||||
const dimensionsCount = dimensionBreakdown
|
||||
? Object.values(dimensionBreakdown).reduce((total, entries) => total + entries.length, 0)
|
||||
: 0;
|
||||
|
||||
const factsCoverage = input.includeFacts
|
||||
? factsResult.facts.length
|
||||
: snapshotResult.snapshots.reduce((total, snapshot) => total + snapshot.facts_count, 0);
|
||||
|
||||
return {
|
||||
company: {
|
||||
ticker,
|
||||
companyName: latestFiling?.company_name ?? ticker,
|
||||
cik: latestFiling?.cik ?? null
|
||||
},
|
||||
statement: input.statement,
|
||||
window: input.window,
|
||||
periods,
|
||||
rows,
|
||||
nextCursor: snapshotResult.nextCursor,
|
||||
facts: input.includeFacts
|
||||
? {
|
||||
rows: factsResult.facts,
|
||||
nextCursor: factsResult.nextCursor
|
||||
}
|
||||
: null,
|
||||
coverage: {
|
||||
filings: periods.length,
|
||||
rows: rows.length,
|
||||
dimensions: dimensionsCount,
|
||||
facts: factsCoverage
|
||||
},
|
||||
dataSourceStatus: {
|
||||
enabled: input.v3Enabled,
|
||||
hydratedFilings: statuses.ready,
|
||||
partialFilings: statuses.partial,
|
||||
failedFilings: statuses.failed,
|
||||
pendingFilings: Math.max(0, financialFilings.length - statuses.ready - statuses.partial - statuses.failed),
|
||||
queuedSync: input.queuedSync
|
||||
},
|
||||
metrics,
|
||||
dimensionBreakdown
|
||||
};
|
||||
}
|
||||
|
||||
export const __financialTaxonomyInternals = {
|
||||
buildPeriods,
|
||||
isInstantPeriod,
|
||||
periodDurationDays,
|
||||
selectPrimaryPeriods
|
||||
};
|
||||
@@ -16,6 +16,7 @@ export type FilingStatementSnapshotPeriod = {
|
||||
filingId: number;
|
||||
accessionNumber: string;
|
||||
filingDate: string;
|
||||
periodStart: string | null;
|
||||
periodEnd: string | null;
|
||||
filingType: '10-K' | '10-Q';
|
||||
periodLabel: string;
|
||||
|
||||
676
lib/server/repos/filing-taxonomy.ts
Normal file
676
lib/server/repos/filing-taxonomy.ts
Normal file
@@ -0,0 +1,676 @@
|
||||
import { and, desc, eq, gte, inArray, lt, sql } from 'drizzle-orm';
|
||||
import type { Filing, FinancialStatementKind, MetricValidationResult, TaxonomyDimensionMember, TaxonomyFactRow, TaxonomyStatementRow } from '@/lib/types';
|
||||
import { db } from '@/lib/server/db';
|
||||
import {
|
||||
filingTaxonomyAsset,
|
||||
filingTaxonomyConcept,
|
||||
filingTaxonomyFact,
|
||||
filingTaxonomyMetricValidation,
|
||||
filingTaxonomySnapshot
|
||||
} from '@/lib/server/db/schema';
|
||||
|
||||
export type FilingTaxonomyParseStatus = 'ready' | 'partial' | 'failed';
|
||||
export type FilingTaxonomySource = 'xbrl_instance' | 'xbrl_instance_with_linkbase' | 'legacy_html_fallback';
|
||||
export type FilingTaxonomyAssetType =
|
||||
| 'instance'
|
||||
| 'schema'
|
||||
| 'presentation'
|
||||
| 'label'
|
||||
| 'calculation'
|
||||
| 'definition'
|
||||
| 'pdf'
|
||||
| 'other';
|
||||
|
||||
export type FilingTaxonomyPeriod = {
|
||||
id: string;
|
||||
filingId: number;
|
||||
accessionNumber: string;
|
||||
filingDate: string;
|
||||
periodStart: string | null;
|
||||
periodEnd: string | null;
|
||||
filingType: '10-K' | '10-Q';
|
||||
periodLabel: string;
|
||||
};
|
||||
|
||||
export type FilingTaxonomySnapshotRecord = {
|
||||
id: number;
|
||||
filing_id: number;
|
||||
ticker: string;
|
||||
filing_date: string;
|
||||
filing_type: '10-K' | '10-Q';
|
||||
parse_status: FilingTaxonomyParseStatus;
|
||||
parse_error: string | null;
|
||||
source: FilingTaxonomySource;
|
||||
periods: FilingTaxonomyPeriod[];
|
||||
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
||||
derived_metrics: Filing['metrics'];
|
||||
validation_result: MetricValidationResult | null;
|
||||
facts_count: number;
|
||||
concepts_count: number;
|
||||
dimensions_count: number;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
};
|
||||
|
||||
export type FilingTaxonomyAssetRecord = {
|
||||
id: number;
|
||||
snapshot_id: number;
|
||||
asset_type: FilingTaxonomyAssetType;
|
||||
name: string;
|
||||
url: string;
|
||||
size_bytes: number | null;
|
||||
score: number | null;
|
||||
is_selected: boolean;
|
||||
created_at: string;
|
||||
};
|
||||
|
||||
export type FilingTaxonomyConceptRecord = {
|
||||
id: number;
|
||||
snapshot_id: number;
|
||||
concept_key: string;
|
||||
qname: string;
|
||||
namespace_uri: string;
|
||||
local_name: string;
|
||||
label: string | null;
|
||||
is_extension: boolean;
|
||||
statement_kind: FinancialStatementKind | null;
|
||||
role_uri: string | null;
|
||||
presentation_order: number | null;
|
||||
presentation_depth: number | null;
|
||||
parent_concept_key: string | null;
|
||||
is_abstract: boolean;
|
||||
created_at: string;
|
||||
};
|
||||
|
||||
export type FilingTaxonomyFactRecord = {
|
||||
id: number;
|
||||
snapshot_id: number;
|
||||
concept_key: string;
|
||||
qname: string;
|
||||
namespace_uri: string;
|
||||
local_name: string;
|
||||
statement_kind: FinancialStatementKind | null;
|
||||
role_uri: string | null;
|
||||
context_id: string;
|
||||
unit: string | null;
|
||||
decimals: string | null;
|
||||
value_num: number;
|
||||
period_start: string | null;
|
||||
period_end: string | null;
|
||||
period_instant: string | null;
|
||||
dimensions: TaxonomyDimensionMember[];
|
||||
is_dimensionless: boolean;
|
||||
source_file: string | null;
|
||||
created_at: string;
|
||||
};
|
||||
|
||||
export type FilingTaxonomyMetricValidationRecord = {
|
||||
id: number;
|
||||
snapshot_id: number;
|
||||
metric_key: keyof NonNullable<Filing['metrics']>;
|
||||
taxonomy_value: number | null;
|
||||
llm_value: number | null;
|
||||
absolute_diff: number | null;
|
||||
relative_diff: number | null;
|
||||
status: 'not_run' | 'matched' | 'mismatch' | 'error';
|
||||
evidence_pages: number[];
|
||||
pdf_url: string | null;
|
||||
provider: string | null;
|
||||
model: string | null;
|
||||
error: string | null;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
};
|
||||
|
||||
export type UpsertFilingTaxonomySnapshotInput = {
|
||||
filing_id: number;
|
||||
ticker: string;
|
||||
filing_date: string;
|
||||
filing_type: '10-K' | '10-Q';
|
||||
parse_status: FilingTaxonomyParseStatus;
|
||||
parse_error: string | null;
|
||||
source: FilingTaxonomySource;
|
||||
periods: FilingTaxonomyPeriod[];
|
||||
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
||||
derived_metrics: Filing['metrics'];
|
||||
validation_result: MetricValidationResult | null;
|
||||
facts_count: number;
|
||||
concepts_count: number;
|
||||
dimensions_count: number;
|
||||
assets: Array<{
|
||||
asset_type: FilingTaxonomyAssetType;
|
||||
name: string;
|
||||
url: string;
|
||||
size_bytes: number | null;
|
||||
score: number | null;
|
||||
is_selected: boolean;
|
||||
}>;
|
||||
concepts: Array<{
|
||||
concept_key: string;
|
||||
qname: string;
|
||||
namespace_uri: string;
|
||||
local_name: string;
|
||||
label: string | null;
|
||||
is_extension: boolean;
|
||||
statement_kind: FinancialStatementKind | null;
|
||||
role_uri: string | null;
|
||||
presentation_order: number | null;
|
||||
presentation_depth: number | null;
|
||||
parent_concept_key: string | null;
|
||||
is_abstract: boolean;
|
||||
}>;
|
||||
facts: Array<{
|
||||
concept_key: string;
|
||||
qname: string;
|
||||
namespace_uri: string;
|
||||
local_name: string;
|
||||
statement_kind: FinancialStatementKind | null;
|
||||
role_uri: string | null;
|
||||
context_id: string;
|
||||
unit: string | null;
|
||||
decimals: string | null;
|
||||
value_num: number;
|
||||
period_start: string | null;
|
||||
period_end: string | null;
|
||||
period_instant: string | null;
|
||||
dimensions: TaxonomyDimensionMember[];
|
||||
is_dimensionless: boolean;
|
||||
source_file: string | null;
|
||||
}>;
|
||||
metric_validations: Array<{
|
||||
metric_key: keyof NonNullable<Filing['metrics']>;
|
||||
taxonomy_value: number | null;
|
||||
llm_value: number | null;
|
||||
absolute_diff: number | null;
|
||||
relative_diff: number | null;
|
||||
status: 'not_run' | 'matched' | 'mismatch' | 'error';
|
||||
evidence_pages: number[];
|
||||
pdf_url: string | null;
|
||||
provider: string | null;
|
||||
model: string | null;
|
||||
error: string | null;
|
||||
}>;
|
||||
};
|
||||
|
||||
function tenYearsAgoIso() {
|
||||
const date = new Date();
|
||||
date.setUTCFullYear(date.getUTCFullYear() - 10);
|
||||
return date.toISOString().slice(0, 10);
|
||||
}
|
||||
|
||||
function asNumber(value: unknown) {
|
||||
if (typeof value === 'number') {
|
||||
return Number.isFinite(value) ? value : null;
|
||||
}
|
||||
|
||||
if (typeof value === 'string') {
|
||||
const parsed = Number(value);
|
||||
return Number.isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function asNumericText(value: number | null) {
|
||||
if (value === null || !Number.isFinite(value)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return String(value);
|
||||
}
|
||||
|
||||
function emptyStatementRows(): Record<FinancialStatementKind, TaxonomyStatementRow[]> {
|
||||
return {
|
||||
income: [],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: []
|
||||
};
|
||||
}
|
||||
|
||||
function toSnapshotRecord(row: typeof filingTaxonomySnapshot.$inferSelect): FilingTaxonomySnapshotRecord {
|
||||
return {
|
||||
id: row.id,
|
||||
filing_id: row.filing_id,
|
||||
ticker: row.ticker,
|
||||
filing_date: row.filing_date,
|
||||
filing_type: row.filing_type,
|
||||
parse_status: row.parse_status,
|
||||
parse_error: row.parse_error,
|
||||
source: row.source,
|
||||
periods: row.periods ?? [],
|
||||
statement_rows: row.statement_rows ?? emptyStatementRows(),
|
||||
derived_metrics: row.derived_metrics ?? null,
|
||||
validation_result: row.validation_result ?? null,
|
||||
facts_count: row.facts_count,
|
||||
concepts_count: row.concepts_count,
|
||||
dimensions_count: row.dimensions_count,
|
||||
created_at: row.created_at,
|
||||
updated_at: row.updated_at
|
||||
};
|
||||
}
|
||||
|
||||
function toAssetRecord(row: typeof filingTaxonomyAsset.$inferSelect): FilingTaxonomyAssetRecord {
|
||||
return {
|
||||
id: row.id,
|
||||
snapshot_id: row.snapshot_id,
|
||||
asset_type: row.asset_type,
|
||||
name: row.name,
|
||||
url: row.url,
|
||||
size_bytes: row.size_bytes,
|
||||
score: asNumber(row.score),
|
||||
is_selected: row.is_selected,
|
||||
created_at: row.created_at
|
||||
};
|
||||
}
|
||||
|
||||
function toConceptRecord(row: typeof filingTaxonomyConcept.$inferSelect): FilingTaxonomyConceptRecord {
|
||||
return {
|
||||
id: row.id,
|
||||
snapshot_id: row.snapshot_id,
|
||||
concept_key: row.concept_key,
|
||||
qname: row.qname,
|
||||
namespace_uri: row.namespace_uri,
|
||||
local_name: row.local_name,
|
||||
label: row.label,
|
||||
is_extension: row.is_extension,
|
||||
statement_kind: row.statement_kind ?? null,
|
||||
role_uri: row.role_uri,
|
||||
presentation_order: asNumber(row.presentation_order),
|
||||
presentation_depth: row.presentation_depth,
|
||||
parent_concept_key: row.parent_concept_key,
|
||||
is_abstract: row.is_abstract,
|
||||
created_at: row.created_at
|
||||
};
|
||||
}
|
||||
|
||||
function toFactRecord(row: typeof filingTaxonomyFact.$inferSelect): FilingTaxonomyFactRecord {
|
||||
const value = asNumber(row.value_num);
|
||||
if (value === null) {
|
||||
throw new Error(`Invalid value_num for taxonomy fact row ${row.id}`);
|
||||
}
|
||||
|
||||
return {
|
||||
id: row.id,
|
||||
snapshot_id: row.snapshot_id,
|
||||
concept_key: row.concept_key,
|
||||
qname: row.qname,
|
||||
namespace_uri: row.namespace_uri,
|
||||
local_name: row.local_name,
|
||||
statement_kind: row.statement_kind ?? null,
|
||||
role_uri: row.role_uri,
|
||||
context_id: row.context_id,
|
||||
unit: row.unit,
|
||||
decimals: row.decimals,
|
||||
value_num: value,
|
||||
period_start: row.period_start,
|
||||
period_end: row.period_end,
|
||||
period_instant: row.period_instant,
|
||||
dimensions: row.dimensions,
|
||||
is_dimensionless: row.is_dimensionless,
|
||||
source_file: row.source_file,
|
||||
created_at: row.created_at
|
||||
};
|
||||
}
|
||||
|
||||
function toMetricValidationRecord(row: typeof filingTaxonomyMetricValidation.$inferSelect): FilingTaxonomyMetricValidationRecord {
|
||||
return {
|
||||
id: row.id,
|
||||
snapshot_id: row.snapshot_id,
|
||||
metric_key: row.metric_key,
|
||||
taxonomy_value: asNumber(row.taxonomy_value),
|
||||
llm_value: asNumber(row.llm_value),
|
||||
absolute_diff: asNumber(row.absolute_diff),
|
||||
relative_diff: asNumber(row.relative_diff),
|
||||
status: row.status,
|
||||
evidence_pages: row.evidence_pages ?? [],
|
||||
pdf_url: row.pdf_url,
|
||||
provider: row.provider,
|
||||
model: row.model,
|
||||
error: row.error,
|
||||
created_at: row.created_at,
|
||||
updated_at: row.updated_at
|
||||
};
|
||||
}
|
||||
|
||||
export async function getFilingTaxonomySnapshotByFilingId(filingId: number) {
|
||||
const [row] = await db
|
||||
.select()
|
||||
.from(filingTaxonomySnapshot)
|
||||
.where(eq(filingTaxonomySnapshot.filing_id, filingId))
|
||||
.limit(1);
|
||||
|
||||
return row ? toSnapshotRecord(row) : null;
|
||||
}
|
||||
|
||||
export async function listFilingTaxonomyAssets(snapshotId: number) {
|
||||
const rows = await db
|
||||
.select()
|
||||
.from(filingTaxonomyAsset)
|
||||
.where(eq(filingTaxonomyAsset.snapshot_id, snapshotId))
|
||||
.orderBy(desc(filingTaxonomyAsset.id));
|
||||
|
||||
return rows.map(toAssetRecord);
|
||||
}
|
||||
|
||||
export async function listFilingTaxonomyConcepts(snapshotId: number) {
|
||||
const rows = await db
|
||||
.select()
|
||||
.from(filingTaxonomyConcept)
|
||||
.where(eq(filingTaxonomyConcept.snapshot_id, snapshotId))
|
||||
.orderBy(desc(filingTaxonomyConcept.id));
|
||||
|
||||
return rows.map(toConceptRecord);
|
||||
}
|
||||
|
||||
export async function listFilingTaxonomyFacts(snapshotId: number) {
|
||||
const rows = await db
|
||||
.select()
|
||||
.from(filingTaxonomyFact)
|
||||
.where(eq(filingTaxonomyFact.snapshot_id, snapshotId))
|
||||
.orderBy(desc(filingTaxonomyFact.id));
|
||||
|
||||
return rows.map(toFactRecord);
|
||||
}
|
||||
|
||||
export async function listFilingTaxonomyMetricValidations(snapshotId: number) {
|
||||
const rows = await db
|
||||
.select()
|
||||
.from(filingTaxonomyMetricValidation)
|
||||
.where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId))
|
||||
.orderBy(desc(filingTaxonomyMetricValidation.id));
|
||||
|
||||
return rows.map(toMetricValidationRecord);
|
||||
}
|
||||
|
||||
export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySnapshotInput) {
|
||||
const now = new Date().toISOString();
|
||||
|
||||
const [saved] = await db
|
||||
.insert(filingTaxonomySnapshot)
|
||||
.values({
|
||||
filing_id: input.filing_id,
|
||||
ticker: input.ticker,
|
||||
filing_date: input.filing_date,
|
||||
filing_type: input.filing_type,
|
||||
parse_status: input.parse_status,
|
||||
parse_error: input.parse_error,
|
||||
source: input.source,
|
||||
periods: input.periods,
|
||||
statement_rows: input.statement_rows,
|
||||
derived_metrics: input.derived_metrics,
|
||||
validation_result: input.validation_result,
|
||||
facts_count: input.facts_count,
|
||||
concepts_count: input.concepts_count,
|
||||
dimensions_count: input.dimensions_count,
|
||||
created_at: now,
|
||||
updated_at: now
|
||||
})
|
||||
.onConflictDoUpdate({
|
||||
target: filingTaxonomySnapshot.filing_id,
|
||||
set: {
|
||||
ticker: input.ticker,
|
||||
filing_date: input.filing_date,
|
||||
filing_type: input.filing_type,
|
||||
parse_status: input.parse_status,
|
||||
parse_error: input.parse_error,
|
||||
source: input.source,
|
||||
periods: input.periods,
|
||||
statement_rows: input.statement_rows,
|
||||
derived_metrics: input.derived_metrics,
|
||||
validation_result: input.validation_result,
|
||||
facts_count: input.facts_count,
|
||||
concepts_count: input.concepts_count,
|
||||
dimensions_count: input.dimensions_count,
|
||||
updated_at: now
|
||||
}
|
||||
})
|
||||
.returning();
|
||||
|
||||
const snapshotId = saved.id;
|
||||
|
||||
await db.delete(filingTaxonomyAsset).where(eq(filingTaxonomyAsset.snapshot_id, snapshotId));
|
||||
await db.delete(filingTaxonomyConcept).where(eq(filingTaxonomyConcept.snapshot_id, snapshotId));
|
||||
await db.delete(filingTaxonomyFact).where(eq(filingTaxonomyFact.snapshot_id, snapshotId));
|
||||
await db.delete(filingTaxonomyMetricValidation).where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId));
|
||||
|
||||
if (input.assets.length > 0) {
|
||||
await db.insert(filingTaxonomyAsset).values(input.assets.map((asset) => ({
|
||||
snapshot_id: snapshotId,
|
||||
asset_type: asset.asset_type,
|
||||
name: asset.name,
|
||||
url: asset.url,
|
||||
size_bytes: asset.size_bytes,
|
||||
score: asNumericText(asset.score),
|
||||
is_selected: asset.is_selected,
|
||||
created_at: now
|
||||
})));
|
||||
}
|
||||
|
||||
if (input.concepts.length > 0) {
|
||||
await db.insert(filingTaxonomyConcept).values(input.concepts.map((concept) => ({
|
||||
snapshot_id: snapshotId,
|
||||
concept_key: concept.concept_key,
|
||||
qname: concept.qname,
|
||||
namespace_uri: concept.namespace_uri,
|
||||
local_name: concept.local_name,
|
||||
label: concept.label,
|
||||
is_extension: concept.is_extension,
|
||||
statement_kind: concept.statement_kind,
|
||||
role_uri: concept.role_uri,
|
||||
presentation_order: asNumericText(concept.presentation_order),
|
||||
presentation_depth: concept.presentation_depth,
|
||||
parent_concept_key: concept.parent_concept_key,
|
||||
is_abstract: concept.is_abstract,
|
||||
created_at: now
|
||||
})));
|
||||
}
|
||||
|
||||
if (input.facts.length > 0) {
|
||||
await db.insert(filingTaxonomyFact).values(input.facts.map((fact) => ({
|
||||
snapshot_id: snapshotId,
|
||||
concept_key: fact.concept_key,
|
||||
qname: fact.qname,
|
||||
namespace_uri: fact.namespace_uri,
|
||||
local_name: fact.local_name,
|
||||
statement_kind: fact.statement_kind,
|
||||
role_uri: fact.role_uri,
|
||||
context_id: fact.context_id,
|
||||
unit: fact.unit,
|
||||
decimals: fact.decimals,
|
||||
value_num: String(fact.value_num),
|
||||
period_start: fact.period_start,
|
||||
period_end: fact.period_end,
|
||||
period_instant: fact.period_instant,
|
||||
dimensions: fact.dimensions,
|
||||
is_dimensionless: fact.is_dimensionless,
|
||||
source_file: fact.source_file,
|
||||
created_at: now
|
||||
})));
|
||||
}
|
||||
|
||||
if (input.metric_validations.length > 0) {
|
||||
await db.insert(filingTaxonomyMetricValidation).values(input.metric_validations.map((check) => ({
|
||||
snapshot_id: snapshotId,
|
||||
metric_key: check.metric_key,
|
||||
taxonomy_value: asNumericText(check.taxonomy_value),
|
||||
llm_value: asNumericText(check.llm_value),
|
||||
absolute_diff: asNumericText(check.absolute_diff),
|
||||
relative_diff: asNumericText(check.relative_diff),
|
||||
status: check.status,
|
||||
evidence_pages: check.evidence_pages,
|
||||
pdf_url: check.pdf_url,
|
||||
provider: check.provider,
|
||||
model: check.model,
|
||||
error: check.error,
|
||||
created_at: now,
|
||||
updated_at: now
|
||||
})));
|
||||
}
|
||||
|
||||
return toSnapshotRecord(saved);
|
||||
}
|
||||
|
||||
export async function listFilingTaxonomySnapshotsByTicker(input: {
|
||||
ticker: string;
|
||||
window: '10y' | 'all';
|
||||
limit?: number;
|
||||
cursor?: string | null;
|
||||
}) {
|
||||
const safeLimit = Math.min(Math.max(Math.trunc(input.limit ?? 40), 1), 120);
|
||||
const cursorId = input.cursor ? Number.parseInt(input.cursor, 10) : null;
|
||||
const constraints = [eq(filingTaxonomySnapshot.ticker, input.ticker.trim().toUpperCase())];
|
||||
|
||||
if (input.window === '10y') {
|
||||
constraints.push(gte(filingTaxonomySnapshot.filing_date, tenYearsAgoIso()));
|
||||
}
|
||||
|
||||
if (cursorId && Number.isFinite(cursorId) && cursorId > 0) {
|
||||
constraints.push(lt(filingTaxonomySnapshot.id, cursorId));
|
||||
}
|
||||
|
||||
const rows = await db
|
||||
.select()
|
||||
.from(filingTaxonomySnapshot)
|
||||
.where(and(...constraints))
|
||||
.orderBy(desc(filingTaxonomySnapshot.filing_date), desc(filingTaxonomySnapshot.id))
|
||||
.limit(safeLimit + 1);
|
||||
|
||||
const hasMore = rows.length > safeLimit;
|
||||
const usedRows = hasMore ? rows.slice(0, safeLimit) : rows;
|
||||
const nextCursor = hasMore
|
||||
? String(usedRows[usedRows.length - 1]?.id ?? '')
|
||||
: null;
|
||||
|
||||
return {
|
||||
snapshots: usedRows.map(toSnapshotRecord),
|
||||
nextCursor
|
||||
};
|
||||
}
|
||||
|
||||
export async function countFilingTaxonomySnapshotStatuses(ticker: string) {
|
||||
const rows = await db
|
||||
.select({
|
||||
status: filingTaxonomySnapshot.parse_status,
|
||||
count: sql<string>`count(*)`
|
||||
})
|
||||
.from(filingTaxonomySnapshot)
|
||||
.where(eq(filingTaxonomySnapshot.ticker, ticker.trim().toUpperCase()))
|
||||
.groupBy(filingTaxonomySnapshot.parse_status);
|
||||
|
||||
return rows.reduce<Record<FilingTaxonomyParseStatus, number>>((acc, row) => {
|
||||
acc[row.status] = Number(row.count);
|
||||
return acc;
|
||||
}, {
|
||||
ready: 0,
|
||||
partial: 0,
|
||||
failed: 0
|
||||
});
|
||||
}
|
||||
|
||||
export async function listTaxonomyFactsByTicker(input: {
|
||||
ticker: string;
|
||||
window: '10y' | 'all';
|
||||
statement?: FinancialStatementKind;
|
||||
cursor?: string | null;
|
||||
limit?: number;
|
||||
}) {
|
||||
const safeLimit = Math.min(Math.max(Math.trunc(input.limit ?? 500), 1), 2000);
|
||||
const cursorId = input.cursor ? Number.parseInt(input.cursor, 10) : null;
|
||||
const conditions = [eq(filingTaxonomySnapshot.ticker, input.ticker.trim().toUpperCase())];
|
||||
|
||||
if (input.window === '10y') {
|
||||
conditions.push(gte(filingTaxonomySnapshot.filing_date, tenYearsAgoIso()));
|
||||
}
|
||||
|
||||
if (input.statement) {
|
||||
conditions.push(eq(filingTaxonomyFact.statement_kind, input.statement));
|
||||
}
|
||||
|
||||
if (cursorId && Number.isFinite(cursorId) && cursorId > 0) {
|
||||
conditions.push(lt(filingTaxonomyFact.id, cursorId));
|
||||
}
|
||||
|
||||
const rows = await db
|
||||
.select({
|
||||
id: filingTaxonomyFact.id,
|
||||
snapshot_id: filingTaxonomyFact.snapshot_id,
|
||||
filing_id: filingTaxonomySnapshot.filing_id,
|
||||
filing_date: filingTaxonomySnapshot.filing_date,
|
||||
statement_kind: filingTaxonomyFact.statement_kind,
|
||||
role_uri: filingTaxonomyFact.role_uri,
|
||||
concept_key: filingTaxonomyFact.concept_key,
|
||||
qname: filingTaxonomyFact.qname,
|
||||
namespace_uri: filingTaxonomyFact.namespace_uri,
|
||||
local_name: filingTaxonomyFact.local_name,
|
||||
value_num: filingTaxonomyFact.value_num,
|
||||
context_id: filingTaxonomyFact.context_id,
|
||||
unit: filingTaxonomyFact.unit,
|
||||
decimals: filingTaxonomyFact.decimals,
|
||||
period_start: filingTaxonomyFact.period_start,
|
||||
period_end: filingTaxonomyFact.period_end,
|
||||
period_instant: filingTaxonomyFact.period_instant,
|
||||
dimensions: filingTaxonomyFact.dimensions,
|
||||
is_dimensionless: filingTaxonomyFact.is_dimensionless,
|
||||
source_file: filingTaxonomyFact.source_file
|
||||
})
|
||||
.from(filingTaxonomyFact)
|
||||
.innerJoin(filingTaxonomySnapshot, eq(filingTaxonomyFact.snapshot_id, filingTaxonomySnapshot.id))
|
||||
.where(and(...conditions))
|
||||
.orderBy(desc(filingTaxonomyFact.id))
|
||||
.limit(safeLimit + 1);
|
||||
|
||||
const hasMore = rows.length > safeLimit;
|
||||
const used = hasMore ? rows.slice(0, safeLimit) : rows;
|
||||
const nextCursor = hasMore ? String(used[used.length - 1]?.id ?? '') : null;
|
||||
|
||||
const facts: TaxonomyFactRow[] = used.map((row) => {
|
||||
const value = asNumber(row.value_num);
|
||||
if (value === null) {
|
||||
throw new Error(`Invalid value_num in taxonomy fact ${row.id}`);
|
||||
}
|
||||
|
||||
return {
|
||||
id: row.id,
|
||||
snapshotId: row.snapshot_id,
|
||||
filingId: row.filing_id,
|
||||
filingDate: row.filing_date,
|
||||
statement: row.statement_kind,
|
||||
roleUri: row.role_uri,
|
||||
conceptKey: row.concept_key,
|
||||
qname: row.qname,
|
||||
namespaceUri: row.namespace_uri,
|
||||
localName: row.local_name,
|
||||
value,
|
||||
contextId: row.context_id,
|
||||
unit: row.unit,
|
||||
decimals: row.decimals,
|
||||
periodStart: row.period_start,
|
||||
periodEnd: row.period_end,
|
||||
periodInstant: row.period_instant,
|
||||
dimensions: row.dimensions,
|
||||
isDimensionless: row.is_dimensionless,
|
||||
sourceFile: row.source_file
|
||||
};
|
||||
});
|
||||
|
||||
return {
|
||||
facts,
|
||||
nextCursor
|
||||
};
|
||||
}
|
||||
|
||||
export async function listTaxonomyAssetsBySnapshotIds(snapshotIds: number[]) {
|
||||
if (snapshotIds.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const rows = await db
|
||||
.select()
|
||||
.from(filingTaxonomyAsset)
|
||||
.where(inArray(filingTaxonomyAsset.snapshot_id, snapshotIds))
|
||||
.orderBy(desc(filingTaxonomyAsset.id));
|
||||
|
||||
return rows.map(toAssetRecord);
|
||||
}
|
||||
@@ -170,3 +170,19 @@ export async function saveFilingAnalysis(
|
||||
|
||||
return updated ? toFiling(updated) : null;
|
||||
}
|
||||
|
||||
export async function updateFilingMetricsById(
|
||||
filingId: number,
|
||||
metrics: Filing['metrics']
|
||||
) {
|
||||
const [updated] = await db
|
||||
.update(filing)
|
||||
.set({
|
||||
metrics,
|
||||
updated_at: new Date().toISOString()
|
||||
})
|
||||
.where(eq(filing.id, filingId))
|
||||
.returning();
|
||||
|
||||
return updated ? toFiling(updated) : null;
|
||||
}
|
||||
|
||||
@@ -1378,6 +1378,7 @@ export async function hydrateFilingStatementSnapshot(
|
||||
filingId: input.filingId,
|
||||
accessionNumber: input.accessionNumber,
|
||||
filingDate: input.filingDate,
|
||||
periodStart: null,
|
||||
periodEnd: input.filingDate,
|
||||
filingType: input.filingType,
|
||||
periodLabel: input.filingType === '10-Q' ? 'Quarter End' : 'Fiscal Year End'
|
||||
|
||||
@@ -13,12 +13,13 @@ import {
|
||||
getFilingByAccession,
|
||||
listFilingsRecords,
|
||||
saveFilingAnalysis,
|
||||
updateFilingMetricsById,
|
||||
upsertFilingsRecords
|
||||
} from '@/lib/server/repos/filings';
|
||||
import {
|
||||
getFilingStatementSnapshotByFilingId,
|
||||
upsertFilingStatementSnapshot
|
||||
} from '@/lib/server/repos/filing-statements';
|
||||
getFilingTaxonomySnapshotByFilingId,
|
||||
upsertFilingTaxonomySnapshot
|
||||
} from '@/lib/server/repos/filing-taxonomy';
|
||||
import {
|
||||
applyRefreshedPrices,
|
||||
listHoldingsForPriceRefresh,
|
||||
@@ -27,11 +28,10 @@ import {
|
||||
import { createPortfolioInsight } from '@/lib/server/repos/insights';
|
||||
import { updateTaskStage } from '@/lib/server/repos/tasks';
|
||||
import {
|
||||
fetchFilingMetricsForFilings,
|
||||
fetchPrimaryFilingText,
|
||||
fetchRecentFilings,
|
||||
hydrateFilingStatementSnapshot
|
||||
fetchRecentFilings
|
||||
} from '@/lib/server/sec';
|
||||
import { hydrateFilingTaxonomySnapshot } from '@/lib/server/taxonomy/engine';
|
||||
|
||||
const EXTRACTION_REQUIRED_KEYS = [
|
||||
'summary',
|
||||
@@ -88,6 +88,10 @@ const COMPANY_SPECIFIC_PATTERNS = [
|
||||
|
||||
type FilingMetricKey = keyof NonNullable<Filing['metrics']>;
|
||||
|
||||
function isFinancialMetricsForm(filingType: string): filingType is '10-K' | '10-Q' {
|
||||
return filingType === '10-K' || filingType === '10-Q';
|
||||
}
|
||||
|
||||
const METRIC_CHECK_PATTERNS: Array<{
|
||||
key: FilingMetricKey;
|
||||
label: string;
|
||||
@@ -120,10 +124,6 @@ const METRIC_CHECK_PATTERNS: Array<{
|
||||
}
|
||||
];
|
||||
|
||||
function isFinancialMetricsForm(form: Filing['filing_type']) {
|
||||
return form === '10-K' || form === '10-Q';
|
||||
}
|
||||
|
||||
function toTaskResult(value: unknown): Record<string, unknown> {
|
||||
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
||||
return { value };
|
||||
@@ -565,40 +565,6 @@ async function processSyncFilings(task: Task) {
|
||||
`Fetching up to ${limit} filings for ${ticker}${scopeLabel ? ` (${scopeLabel})` : ''}`
|
||||
);
|
||||
const filings = await fetchRecentFilings(ticker, limit);
|
||||
const metricsByAccession = new Map<string, Filing['metrics']>();
|
||||
const filingsByCik = new Map<string, typeof filings>();
|
||||
|
||||
for (const filing of filings) {
|
||||
const group = filingsByCik.get(filing.cik);
|
||||
if (group) {
|
||||
group.push(filing);
|
||||
continue;
|
||||
}
|
||||
|
||||
filingsByCik.set(filing.cik, [filing]);
|
||||
}
|
||||
|
||||
await setProjectionStage(task, 'sync.fetch_metrics', `Computing financial metrics for ${filings.length} filings`);
|
||||
for (const [cik, filingsForCik] of filingsByCik) {
|
||||
const filingsForFinancialMetrics = filingsForCik.filter((filing) => isFinancialMetricsForm(filing.filingType));
|
||||
if (filingsForFinancialMetrics.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const metricsMap = await fetchFilingMetricsForFilings(
|
||||
cik,
|
||||
filingsForCik[0]?.ticker ?? ticker,
|
||||
filingsForFinancialMetrics.map((filing) => ({
|
||||
accessionNumber: filing.accessionNumber,
|
||||
filingDate: filing.filingDate,
|
||||
filingType: filing.filingType
|
||||
}))
|
||||
);
|
||||
|
||||
for (const [accessionNumber, metrics] of metricsMap.entries()) {
|
||||
metricsByAccession.set(accessionNumber, metrics);
|
||||
}
|
||||
}
|
||||
|
||||
await setProjectionStage(task, 'sync.persist_filings', 'Persisting filings and links');
|
||||
const saveResult = await upsertFilingsRecords(
|
||||
@@ -612,24 +578,24 @@ async function processSyncFilings(task: Task) {
|
||||
filing_url: filing.filingUrl,
|
||||
submission_url: filing.submissionUrl,
|
||||
primary_document: filing.primaryDocument,
|
||||
metrics: metricsByAccession.get(filing.accessionNumber) ?? null,
|
||||
metrics: null,
|
||||
links: filingLinks(filing)
|
||||
}))
|
||||
);
|
||||
|
||||
let statementSnapshotsHydrated = 0;
|
||||
let statementSnapshotsFailed = 0;
|
||||
let taxonomySnapshotsHydrated = 0;
|
||||
let taxonomySnapshotsFailed = 0;
|
||||
const hydrateCandidates = (await listFilingsRecords({
|
||||
ticker,
|
||||
limit: Math.min(Math.max(limit * 3, 40), STATEMENT_HYDRATION_MAX_FILINGS)
|
||||
}))
|
||||
.filter((filing): filing is Filing & { filing_type: '10-K' | '10-Q' } => {
|
||||
return filing.filing_type === '10-K' || filing.filing_type === '10-Q';
|
||||
return isFinancialMetricsForm(filing.filing_type);
|
||||
});
|
||||
|
||||
await setProjectionStage(task, 'sync.hydrate_statements', `Hydrating statement snapshots for ${hydrateCandidates.length} candidate filings`);
|
||||
await setProjectionStage(task, 'sync.discover_assets', `Discovering taxonomy assets for ${hydrateCandidates.length} candidate filings`);
|
||||
for (const filing of hydrateCandidates) {
|
||||
const existingSnapshot = await getFilingStatementSnapshotByFilingId(filing.id);
|
||||
const existingSnapshot = await getFilingTaxonomySnapshotByFilingId(filing.id);
|
||||
const shouldRefresh = !existingSnapshot
|
||||
|| Date.parse(existingSnapshot.updated_at) < Date.parse(filing.updated_at);
|
||||
|
||||
@@ -638,7 +604,8 @@ async function processSyncFilings(task: Task) {
|
||||
}
|
||||
|
||||
try {
|
||||
const snapshot = await hydrateFilingStatementSnapshot({
|
||||
await setProjectionStage(task, 'sync.extract_taxonomy', `Extracting XBRL taxonomy for ${filing.accession_number}`);
|
||||
const snapshot = await hydrateFilingTaxonomySnapshot({
|
||||
filingId: filing.id,
|
||||
ticker: filing.ticker,
|
||||
cik: filing.cik,
|
||||
@@ -646,27 +613,50 @@ async function processSyncFilings(task: Task) {
|
||||
filingDate: filing.filing_date,
|
||||
filingType: filing.filing_type,
|
||||
filingUrl: filing.filing_url,
|
||||
primaryDocument: filing.primary_document ?? null,
|
||||
metrics: filing.metrics
|
||||
primaryDocument: filing.primary_document ?? null
|
||||
});
|
||||
|
||||
await upsertFilingStatementSnapshot(snapshot);
|
||||
statementSnapshotsHydrated += 1;
|
||||
await setProjectionStage(task, 'sync.normalize_taxonomy', `Materializing statements for ${filing.accession_number}`);
|
||||
await setProjectionStage(task, 'sync.derive_metrics', `Deriving taxonomy metrics for ${filing.accession_number}`);
|
||||
await setProjectionStage(task, 'sync.validate_pdf_metrics', `Validating metrics via PDF + LLM for ${filing.accession_number}`);
|
||||
await setProjectionStage(task, 'sync.persist_taxonomy', `Persisting taxonomy snapshot for ${filing.accession_number}`);
|
||||
|
||||
await upsertFilingTaxonomySnapshot(snapshot);
|
||||
await updateFilingMetricsById(filing.id, snapshot.derived_metrics);
|
||||
taxonomySnapshotsHydrated += 1;
|
||||
} catch (error) {
|
||||
await upsertFilingStatementSnapshot({
|
||||
const now = new Date().toISOString();
|
||||
await upsertFilingTaxonomySnapshot({
|
||||
filing_id: filing.id,
|
||||
ticker: filing.ticker,
|
||||
filing_date: filing.filing_date,
|
||||
filing_type: filing.filing_type,
|
||||
period_end: filing.filing_date,
|
||||
statement_bundle: null,
|
||||
standardized_bundle: null,
|
||||
dimension_bundle: null,
|
||||
parse_status: 'failed',
|
||||
parse_error: error instanceof Error ? error.message : 'Statement hydration failed',
|
||||
source: 'companyfacts_fallback'
|
||||
parse_error: error instanceof Error ? error.message : 'Taxonomy hydration failed',
|
||||
source: 'legacy_html_fallback',
|
||||
periods: [],
|
||||
statement_rows: {
|
||||
income: [],
|
||||
balance: [],
|
||||
cash_flow: [],
|
||||
equity: [],
|
||||
comprehensive_income: []
|
||||
},
|
||||
derived_metrics: filing.metrics ?? null,
|
||||
validation_result: {
|
||||
status: 'error',
|
||||
checks: [],
|
||||
validatedAt: now
|
||||
},
|
||||
facts_count: 0,
|
||||
concepts_count: 0,
|
||||
dimensions_count: 0,
|
||||
assets: [],
|
||||
concepts: [],
|
||||
facts: [],
|
||||
metric_validations: []
|
||||
});
|
||||
statementSnapshotsFailed += 1;
|
||||
taxonomySnapshotsFailed += 1;
|
||||
}
|
||||
|
||||
await Bun.sleep(STATEMENT_HYDRATION_DELAY_MS);
|
||||
@@ -679,8 +669,8 @@ async function processSyncFilings(task: Task) {
|
||||
fetched: filings.length,
|
||||
inserted: saveResult.inserted,
|
||||
updated: saveResult.updated,
|
||||
statementSnapshotsHydrated,
|
||||
statementSnapshotsFailed
|
||||
taxonomySnapshotsHydrated,
|
||||
taxonomySnapshotsFailed
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
73
lib/server/taxonomy/asset-discovery.test.ts
Normal file
73
lib/server/taxonomy/asset-discovery.test.ts
Normal file
@@ -0,0 +1,73 @@
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
import { discoverFilingAssets } from '@/lib/server/taxonomy/asset-discovery';
|
||||
|
||||
describe('taxonomy asset discovery', () => {
|
||||
it('classifies assets and selects ranked instance/pdf candidates', async () => {
|
||||
const fetchImpl = (async () => {
|
||||
return new Response(JSON.stringify({
|
||||
directory: {
|
||||
item: [
|
||||
{ name: 'abc_htm.xml', size: '900000' },
|
||||
{ name: 'abc_pre.xml', size: '250000' },
|
||||
{ name: 'abc_lab.xml', size: '120000' },
|
||||
{ name: '10k_financial_statements.pdf', size: '400000' },
|
||||
{ name: 'annual_report.pdf', size: '300000' },
|
||||
{ name: 'quarter_statement.pdf', size: '200000' },
|
||||
{ name: 'exhibit99.pdf', size: '500000' }
|
||||
]
|
||||
}
|
||||
}), {
|
||||
status: 200,
|
||||
headers: {
|
||||
'content-type': 'application/json'
|
||||
}
|
||||
});
|
||||
}) as unknown as typeof fetch;
|
||||
|
||||
const result = await discoverFilingAssets({
|
||||
cik: '0000123456',
|
||||
accessionNumber: '0000123456-26-000001',
|
||||
filingUrl: 'https://www.sec.gov/Archives/edgar/data/123456/000012345626000001/abc.htm',
|
||||
primaryDocument: 'abc.htm',
|
||||
fetchImpl
|
||||
});
|
||||
|
||||
expect(result.directoryUrl).toBe('https://www.sec.gov/Archives/edgar/data/123456/000012345626000001/');
|
||||
|
||||
const selectedInstance = result.assets.find((asset) => asset.asset_type === 'instance' && asset.is_selected);
|
||||
expect(selectedInstance?.name).toBe('abc_htm.xml');
|
||||
|
||||
const selectedPdfs = result.assets
|
||||
.filter((asset) => asset.asset_type === 'pdf' && asset.is_selected)
|
||||
.map((asset) => asset.name);
|
||||
expect(selectedPdfs.length).toBe(3);
|
||||
expect(selectedPdfs).toContain('10k_financial_statements.pdf');
|
||||
expect(selectedPdfs).toContain('annual_report.pdf');
|
||||
expect(selectedPdfs).toContain('quarter_statement.pdf');
|
||||
expect(selectedPdfs).not.toContain('exhibit99.pdf');
|
||||
});
|
||||
|
||||
it('falls back to filing url when SEC directory assets are unavailable', async () => {
|
||||
const fetchImpl = (async () => {
|
||||
return new Response('not found', { status: 404 });
|
||||
}) as unknown as typeof fetch;
|
||||
|
||||
const result = await discoverFilingAssets({
|
||||
cik: '0000123456',
|
||||
accessionNumber: '0000123456-26-000001',
|
||||
filingUrl: 'https://www.sec.gov/Archives/edgar/data/123456/000012345626000001/abc.xml',
|
||||
primaryDocument: 'abc.xml',
|
||||
fetchImpl
|
||||
});
|
||||
|
||||
expect(result.assets.length).toBe(1);
|
||||
expect(result.assets[0]).toEqual({
|
||||
asset_type: 'instance',
|
||||
name: 'abc.xml',
|
||||
url: 'https://www.sec.gov/Archives/edgar/data/123456/000012345626000001/abc.xml',
|
||||
size_bytes: null,
|
||||
score: 6,
|
||||
is_selected: true
|
||||
});
|
||||
});
|
||||
});
|
||||
283
lib/server/taxonomy/asset-discovery.ts
Normal file
283
lib/server/taxonomy/asset-discovery.ts
Normal file
@@ -0,0 +1,283 @@
|
||||
import type { TaxonomyAsset } from '@/lib/server/taxonomy/types';
|
||||
|
||||
type FilingAssetDiscoveryInput = {
|
||||
cik: string;
|
||||
accessionNumber: string;
|
||||
filingUrl: string | null;
|
||||
primaryDocument: string | null;
|
||||
fetchImpl?: typeof fetch;
|
||||
};
|
||||
|
||||
type FilingDirectoryJson = {
|
||||
directory?: {
|
||||
item?: Array<{
|
||||
name?: string;
|
||||
type?: string;
|
||||
size?: string | number;
|
||||
}>;
|
||||
};
|
||||
};
|
||||
|
||||
function envUserAgent() {
|
||||
return process.env.SEC_USER_AGENT || 'Fiscal Clone <support@fiscal.local>';
|
||||
}
|
||||
|
||||
function compactAccessionNumber(value: string) {
|
||||
return value.replace(/-/g, '');
|
||||
}
|
||||
|
||||
function normalizeCikForPath(value: string) {
|
||||
const digits = value.replace(/\D/g, '');
|
||||
if (!digits) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const numeric = Number(digits);
|
||||
if (!Number.isFinite(numeric)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return String(numeric);
|
||||
}
|
||||
|
||||
function resolveFilingDirectoryUrl(input: {
|
||||
filingUrl: string | null;
|
||||
cik: string;
|
||||
accessionNumber: string;
|
||||
}) {
|
||||
const direct = input.filingUrl?.trim();
|
||||
if (direct) {
|
||||
const lastSlash = direct.lastIndexOf('/');
|
||||
if (lastSlash > 'https://'.length) {
|
||||
return direct.slice(0, lastSlash + 1);
|
||||
}
|
||||
}
|
||||
|
||||
const cikPath = normalizeCikForPath(input.cik);
|
||||
const accessionPath = compactAccessionNumber(input.accessionNumber);
|
||||
if (!cikPath || !accessionPath) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return `https://www.sec.gov/Archives/edgar/data/${cikPath}/${accessionPath}/`;
|
||||
}
|
||||
|
||||
function classifyAssetType(name: string): TaxonomyAsset['asset_type'] {
|
||||
const lower = name.toLowerCase();
|
||||
|
||||
if (lower.endsWith('.pdf')) {
|
||||
return 'pdf';
|
||||
}
|
||||
|
||||
if (lower.endsWith('.xsd')) {
|
||||
return 'schema';
|
||||
}
|
||||
|
||||
if (lower.endsWith('.xml')) {
|
||||
if (/(_|-)pre\.xml$/.test(lower) || /presentation/.test(lower)) {
|
||||
return 'presentation';
|
||||
}
|
||||
|
||||
if (/(_|-)lab\.xml$/.test(lower) || /label/.test(lower)) {
|
||||
return 'label';
|
||||
}
|
||||
|
||||
if (/(_|-)cal\.xml$/.test(lower) || /calculation/.test(lower)) {
|
||||
return 'calculation';
|
||||
}
|
||||
|
||||
if (/(_|-)def\.xml$/.test(lower) || /definition/.test(lower)) {
|
||||
return 'definition';
|
||||
}
|
||||
|
||||
return 'instance';
|
||||
}
|
||||
|
||||
return 'other';
|
||||
}
|
||||
|
||||
function scorePdf(name: string, sizeBytes: number | null) {
|
||||
const lower = name.toLowerCase();
|
||||
let score = 0;
|
||||
|
||||
if (/financial|statement|annual|quarter|10k|10q/.test(lower)) {
|
||||
score += 8;
|
||||
}
|
||||
|
||||
if (/exhibit|ex-\d+/.test(lower)) {
|
||||
score -= 2;
|
||||
}
|
||||
|
||||
if (sizeBytes && sizeBytes > 100_000) {
|
||||
score += 1;
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
function scoreInstance(name: string, primaryDocument: string | null) {
|
||||
const lower = name.toLowerCase();
|
||||
let score = 1;
|
||||
|
||||
if (/_htm\.xml$/.test(lower)) {
|
||||
score += 4;
|
||||
}
|
||||
|
||||
if (/_ins\.xml$/.test(lower)) {
|
||||
score += 4;
|
||||
}
|
||||
|
||||
const basePrimary = (primaryDocument ?? '').replace(/\.[a-z0-9]+$/i, '').toLowerCase();
|
||||
if (basePrimary && lower.includes(basePrimary)) {
|
||||
score += 5;
|
||||
}
|
||||
|
||||
if (/cal|def|lab|pre/.test(lower)) {
|
||||
score -= 3;
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
function parseSize(raw: unknown) {
|
||||
if (typeof raw === 'number') {
|
||||
return Number.isFinite(raw) ? raw : null;
|
||||
}
|
||||
|
||||
if (typeof raw === 'string') {
|
||||
const parsed = Number(raw);
|
||||
return Number.isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchJson<T>(url: string, fetchImpl: typeof fetch): Promise<T> {
|
||||
const response = await fetchImpl(url, {
|
||||
headers: {
|
||||
'User-Agent': envUserAgent(),
|
||||
Accept: 'application/json'
|
||||
},
|
||||
cache: 'no-store'
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`SEC request failed (${response.status})`);
|
||||
}
|
||||
|
||||
return await response.json() as T;
|
||||
}
|
||||
|
||||
export async function discoverFilingAssets(input: FilingAssetDiscoveryInput): Promise<{
|
||||
directoryUrl: string | null;
|
||||
assets: TaxonomyAsset[];
|
||||
}> {
|
||||
const fetchImpl = input.fetchImpl ?? fetch;
|
||||
const directoryUrl = resolveFilingDirectoryUrl({
|
||||
filingUrl: input.filingUrl,
|
||||
cik: input.cik,
|
||||
accessionNumber: input.accessionNumber
|
||||
});
|
||||
|
||||
if (!directoryUrl) {
|
||||
return {
|
||||
directoryUrl: null,
|
||||
assets: []
|
||||
};
|
||||
}
|
||||
|
||||
let payload: FilingDirectoryJson | null = null;
|
||||
try {
|
||||
payload = await fetchJson<FilingDirectoryJson>(`${directoryUrl}index.json`, fetchImpl);
|
||||
} catch {
|
||||
payload = null;
|
||||
}
|
||||
|
||||
const discovered: TaxonomyAsset[] = [];
|
||||
for (const item of payload?.directory?.item ?? []) {
|
||||
const name = (item.name ?? '').trim();
|
||||
if (!name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const url = `${directoryUrl}${name.replace(/^\/+/, '')}`;
|
||||
const asset_type = classifyAssetType(name);
|
||||
const size_bytes = parseSize(item.size);
|
||||
|
||||
discovered.push({
|
||||
asset_type,
|
||||
name,
|
||||
url,
|
||||
size_bytes,
|
||||
score: null,
|
||||
is_selected: false
|
||||
});
|
||||
}
|
||||
|
||||
if (discovered.length === 0 && input.filingUrl) {
|
||||
const fallbackName = input.primaryDocument ?? input.filingUrl.split('/').pop() ?? 'primary_document';
|
||||
discovered.push({
|
||||
asset_type: fallbackName.toLowerCase().endsWith('.xml') ? 'instance' : 'other',
|
||||
name: fallbackName,
|
||||
url: input.filingUrl,
|
||||
size_bytes: null,
|
||||
score: null,
|
||||
is_selected: true
|
||||
});
|
||||
}
|
||||
|
||||
const instanceCandidates = discovered
|
||||
.filter((asset) => asset.asset_type === 'instance')
|
||||
.map((asset) => ({
|
||||
asset,
|
||||
score: scoreInstance(asset.name, input.primaryDocument)
|
||||
}))
|
||||
.sort((a, b) => b.score - a.score);
|
||||
|
||||
const selectedInstanceUrl = instanceCandidates[0]?.asset.url ?? null;
|
||||
|
||||
const selectedPdfUrls = discovered
|
||||
.filter((asset) => asset.asset_type === 'pdf')
|
||||
.map((asset) => ({
|
||||
asset,
|
||||
score: scorePdf(asset.name, asset.size_bytes)
|
||||
}))
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, 3)
|
||||
.map((entry) => entry.asset.url);
|
||||
|
||||
const assets = discovered.map((asset) => {
|
||||
if (asset.asset_type === 'instance') {
|
||||
const score = scoreInstance(asset.name, input.primaryDocument);
|
||||
return {
|
||||
...asset,
|
||||
score,
|
||||
is_selected: asset.url === selectedInstanceUrl
|
||||
};
|
||||
}
|
||||
|
||||
if (asset.asset_type === 'pdf') {
|
||||
const score = scorePdf(asset.name, asset.size_bytes);
|
||||
return {
|
||||
...asset,
|
||||
score,
|
||||
is_selected: selectedPdfUrls.includes(asset.url)
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
...asset,
|
||||
score: null,
|
||||
is_selected: asset.asset_type === 'presentation'
|
||||
|| asset.asset_type === 'label'
|
||||
|| asset.asset_type === 'calculation'
|
||||
|| asset.asset_type === 'definition'
|
||||
|| asset.asset_type === 'schema'
|
||||
};
|
||||
});
|
||||
|
||||
return {
|
||||
directoryUrl,
|
||||
assets
|
||||
};
|
||||
}
|
||||
185
lib/server/taxonomy/engine.ts
Normal file
185
lib/server/taxonomy/engine.ts
Normal file
@@ -0,0 +1,185 @@
|
||||
import type { FinancialStatementKind } from '@/lib/types';
|
||||
import { discoverFilingAssets } from '@/lib/server/taxonomy/asset-discovery';
|
||||
import { parseLabelLinkbase, parsePresentationLinkbase } from '@/lib/server/taxonomy/linkbase-parser';
|
||||
import { deriveTaxonomyMetrics } from '@/lib/server/taxonomy/metrics';
|
||||
import { materializeTaxonomyStatements } from '@/lib/server/taxonomy/materialize';
|
||||
import { validateMetricsWithPdfLlm } from '@/lib/server/taxonomy/pdf-validation';
|
||||
import type { TaxonomyHydrationInput, TaxonomyHydrationResult } from '@/lib/server/taxonomy/types';
|
||||
import { parseXbrlInstance } from '@/lib/server/taxonomy/xbrl-parser';
|
||||
|
||||
function createStatementRecord<T>(factory: () => T): Record<FinancialStatementKind, T> {
|
||||
return {
|
||||
income: factory(),
|
||||
balance: factory(),
|
||||
cash_flow: factory(),
|
||||
equity: factory(),
|
||||
comprehensive_income: factory()
|
||||
};
|
||||
}
|
||||
|
||||
function envUserAgent() {
|
||||
return process.env.SEC_USER_AGENT || 'Fiscal Clone <support@fiscal.local>';
|
||||
}
|
||||
|
||||
async function fetchText(url: string, fetchImpl: typeof fetch) {
|
||||
const response = await fetchImpl(url, {
|
||||
headers: {
|
||||
'User-Agent': envUserAgent(),
|
||||
Accept: 'text/xml, text/plain, text/html;q=0.8, */*;q=0.5'
|
||||
},
|
||||
cache: 'no-store'
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`SEC request failed (${response.status})`);
|
||||
}
|
||||
|
||||
return await response.text();
|
||||
}
|
||||
|
||||
export async function hydrateFilingTaxonomySnapshot(
|
||||
input: TaxonomyHydrationInput,
|
||||
options?: {
|
||||
fetchImpl?: typeof fetch;
|
||||
}
|
||||
): Promise<TaxonomyHydrationResult> {
|
||||
const fetchImpl = options?.fetchImpl ?? fetch;
|
||||
|
||||
const discovered = await discoverFilingAssets({
|
||||
cik: input.cik,
|
||||
accessionNumber: input.accessionNumber,
|
||||
filingUrl: input.filingUrl,
|
||||
primaryDocument: input.primaryDocument,
|
||||
fetchImpl
|
||||
});
|
||||
|
||||
const emptyResult: TaxonomyHydrationResult = {
|
||||
filing_id: input.filingId,
|
||||
ticker: input.ticker.trim().toUpperCase(),
|
||||
filing_date: input.filingDate,
|
||||
filing_type: input.filingType,
|
||||
parse_status: 'failed',
|
||||
parse_error: 'No XBRL instance found',
|
||||
source: 'legacy_html_fallback',
|
||||
periods: [],
|
||||
statement_rows: createStatementRecord(() => []),
|
||||
derived_metrics: null,
|
||||
validation_result: {
|
||||
status: 'not_run',
|
||||
checks: [],
|
||||
validatedAt: null
|
||||
},
|
||||
facts_count: 0,
|
||||
concepts_count: 0,
|
||||
dimensions_count: 0,
|
||||
assets: discovered.assets,
|
||||
concepts: [],
|
||||
facts: [],
|
||||
metric_validations: []
|
||||
};
|
||||
|
||||
const selectedInstance = discovered.assets.find((asset) => asset.asset_type === 'instance' && asset.is_selected)
|
||||
?? discovered.assets.find((asset) => asset.asset_type === 'instance')
|
||||
?? null;
|
||||
|
||||
if (!selectedInstance) {
|
||||
return emptyResult;
|
||||
}
|
||||
|
||||
let parseError: string | null = null;
|
||||
let source: TaxonomyHydrationResult['source'] = 'xbrl_instance';
|
||||
|
||||
let instanceText = '';
|
||||
try {
|
||||
instanceText = await fetchText(selectedInstance.url, fetchImpl);
|
||||
} catch (error) {
|
||||
parseError = error instanceof Error ? error.message : 'Unable to fetch instance file';
|
||||
return {
|
||||
...emptyResult,
|
||||
parse_error: parseError
|
||||
};
|
||||
}
|
||||
|
||||
const parsedInstance = parseXbrlInstance(instanceText, selectedInstance.name);
|
||||
|
||||
const labelByConcept = new Map<string, string>();
|
||||
const presentation: ReturnType<typeof parsePresentationLinkbase> = [];
|
||||
|
||||
for (const asset of discovered.assets) {
|
||||
if (!asset.is_selected) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (asset.asset_type !== 'presentation' && asset.asset_type !== 'label') {
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const content = await fetchText(asset.url, fetchImpl);
|
||||
if (asset.asset_type === 'presentation') {
|
||||
const parsed = parsePresentationLinkbase(content);
|
||||
if (parsed.length > 0) {
|
||||
source = 'xbrl_instance_with_linkbase';
|
||||
}
|
||||
|
||||
presentation.push(...parsed);
|
||||
} else if (asset.asset_type === 'label') {
|
||||
const parsed = parseLabelLinkbase(content);
|
||||
for (const [conceptKey, label] of parsed.entries()) {
|
||||
if (!labelByConcept.has(conceptKey)) {
|
||||
labelByConcept.set(conceptKey, label);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
parseError = parseError ?? (error instanceof Error ? error.message : 'Failed to parse taxonomy linkbase');
|
||||
}
|
||||
}
|
||||
|
||||
const materialized = materializeTaxonomyStatements({
|
||||
filingId: input.filingId,
|
||||
accessionNumber: input.accessionNumber,
|
||||
filingDate: input.filingDate,
|
||||
filingType: input.filingType,
|
||||
facts: parsedInstance.facts,
|
||||
presentation,
|
||||
labelByConcept
|
||||
});
|
||||
|
||||
const derivedMetrics = deriveTaxonomyMetrics(parsedInstance.facts);
|
||||
const llmValidation = await validateMetricsWithPdfLlm({
|
||||
metrics: derivedMetrics,
|
||||
assets: discovered.assets,
|
||||
fetchImpl
|
||||
});
|
||||
|
||||
const hasRows = (Object.values(materialized.statement_rows).reduce((total, rows) => total + rows.length, 0)) > 0;
|
||||
const hasFacts = materialized.facts.length > 0;
|
||||
|
||||
const parseStatus: TaxonomyHydrationResult['parse_status'] = hasRows && hasFacts
|
||||
? 'ready'
|
||||
: hasFacts
|
||||
? 'partial'
|
||||
: 'failed';
|
||||
|
||||
return {
|
||||
filing_id: input.filingId,
|
||||
ticker: input.ticker.trim().toUpperCase(),
|
||||
filing_date: input.filingDate,
|
||||
filing_type: input.filingType,
|
||||
parse_status: parseStatus,
|
||||
parse_error: parseStatus === 'failed' ? (parseError ?? 'No XBRL facts extracted') : parseError,
|
||||
source,
|
||||
periods: materialized.periods,
|
||||
statement_rows: materialized.statement_rows,
|
||||
derived_metrics: derivedMetrics,
|
||||
validation_result: llmValidation.validation_result,
|
||||
facts_count: materialized.facts.length,
|
||||
concepts_count: materialized.concepts.length,
|
||||
dimensions_count: materialized.dimensionsCount,
|
||||
assets: discovered.assets,
|
||||
concepts: materialized.concepts,
|
||||
facts: materialized.facts,
|
||||
metric_validations: llmValidation.metric_validations
|
||||
};
|
||||
}
|
||||
63
lib/server/taxonomy/linkbase-parser.test.ts
Normal file
63
lib/server/taxonomy/linkbase-parser.test.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
import {
|
||||
classifyStatementRole,
|
||||
parseLabelLinkbase,
|
||||
parsePresentationLinkbase
|
||||
} from '@/lib/server/taxonomy/linkbase-parser';
|
||||
|
||||
const SAMPLE_LABEL_LINKBASE = `
|
||||
<link:linkbase xmlns:link="http://www.xbrl.org/2003/linkbase"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:us-gaap="http://fasb.org/us-gaap/2024">
|
||||
<link:labelLink xlink:type="extended">
|
||||
<link:loc xlink:type="locator" xlink:label="loc_rev" xlink:href="test.xsd#us-gaap_Revenues" />
|
||||
<link:label xlink:type="resource" xlink:label="lab_terse" xlink:role="http://www.xbrl.org/2003/role/terseLabel">Rev.</link:label>
|
||||
<link:label xlink:type="resource" xlink:label="lab_label" xlink:role="http://www.xbrl.org/2003/role/label">Revenues</link:label>
|
||||
<link:labelArc xlink:type="arc" xlink:from="loc_rev" xlink:to="lab_terse" />
|
||||
<link:labelArc xlink:type="arc" xlink:from="loc_rev" xlink:to="lab_label" />
|
||||
</link:labelLink>
|
||||
</link:linkbase>
|
||||
`;
|
||||
|
||||
const SAMPLE_PRESENTATION_LINKBASE = `
|
||||
<link:linkbase xmlns:link="http://www.xbrl.org/2003/linkbase"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:us-gaap="http://fasb.org/us-gaap/2024">
|
||||
<link:presentationLink xlink:type="extended" xlink:role="http://www.xbrl.org/2003/role/StatementOfOperations">
|
||||
<link:loc xlink:type="locator" xlink:label="root" xlink:href="test.xsd#us-gaap_StatementLineItems" />
|
||||
<link:loc xlink:type="locator" xlink:label="rev" xlink:href="test.xsd#us-gaap_Revenues" />
|
||||
<link:loc xlink:type="locator" xlink:label="cogs" xlink:href="test.xsd#us-gaap_CostOfGoodsSold" />
|
||||
<link:presentationArc xlink:type="arc" xlink:from="root" xlink:to="rev" order="1" />
|
||||
<link:presentationArc xlink:type="arc" xlink:from="root" xlink:to="cogs" order="2" />
|
||||
</link:presentationLink>
|
||||
</link:linkbase>
|
||||
`;
|
||||
|
||||
describe('linkbase parser', () => {
|
||||
it('builds preferred labels from label linkbase', () => {
|
||||
const labels = parseLabelLinkbase(SAMPLE_LABEL_LINKBASE);
|
||||
expect(labels.get('http://fasb.org/us-gaap/2024#Revenues')).toBe('Revenues');
|
||||
});
|
||||
|
||||
it('builds role trees with depth/order/parent metadata', () => {
|
||||
const rows = parsePresentationLinkbase(SAMPLE_PRESENTATION_LINKBASE);
|
||||
expect(rows.length).toBe(3);
|
||||
|
||||
const root = rows.find((row) => row.qname === 'us-gaap:StatementLineItems');
|
||||
const revenue = rows.find((row) => row.qname === 'us-gaap:Revenues');
|
||||
const cogs = rows.find((row) => row.qname === 'us-gaap:CostOfGoodsSold');
|
||||
|
||||
expect(root?.depth).toBe(0);
|
||||
expect(root?.parentConceptKey).toBeNull();
|
||||
expect(revenue?.depth).toBe(1);
|
||||
expect(cogs?.depth).toBe(1);
|
||||
expect(revenue?.parentConceptKey).toBe(root?.conceptKey ?? null);
|
||||
expect(revenue?.order).toBeLessThan(cogs?.order ?? Number.POSITIVE_INFINITY);
|
||||
});
|
||||
|
||||
it('classifies statement roles into canonical statement kinds', () => {
|
||||
expect(classifyStatementRole('http://www.xbrl.org/2003/role/StatementOfOperations')).toBe('income');
|
||||
expect(classifyStatementRole('http://www.xbrl.org/2003/role/StatementOfFinancialPosition')).toBe('balance');
|
||||
expect(classifyStatementRole('http://www.xbrl.org/2003/role/StatementOfCashFlows')).toBe('cash_flow');
|
||||
});
|
||||
});
|
||||
310
lib/server/taxonomy/linkbase-parser.ts
Normal file
310
lib/server/taxonomy/linkbase-parser.ts
Normal file
@@ -0,0 +1,310 @@
|
||||
import type { FinancialStatementKind } from '@/lib/types';
|
||||
import type { TaxonomyNamespaceMap, TaxonomyPresentationConcept } from '@/lib/server/taxonomy/types';
|
||||
|
||||
function decodeXmlEntities(value: string) {
|
||||
return value
|
||||
.replace(/&/gi, '&')
|
||||
.replace(/</gi, '<')
|
||||
.replace(/>/gi, '>')
|
||||
.replace(/"/gi, '"')
|
||||
.replace(/'/gi, "'")
|
||||
.replace(/ | /gi, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function parseNamespaceMap(raw: string): TaxonomyNamespaceMap {
|
||||
const map: TaxonomyNamespaceMap = {};
|
||||
const rootStart = raw.match(/<[^>]*linkbase[^>]*>/i)?.[0] ?? raw.slice(0, 1200);
|
||||
|
||||
for (const match of rootStart.matchAll(/xmlns:([a-zA-Z0-9_\-]+)=["']([^"']+)["']/g)) {
|
||||
const prefix = (match[1] ?? '').trim();
|
||||
const uri = (match[2] ?? '').trim();
|
||||
if (!prefix || !uri) {
|
||||
continue;
|
||||
}
|
||||
|
||||
map[prefix] = uri;
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
function qnameFromHref(href: string) {
|
||||
const fragment = href.includes('#') ? href.slice(href.indexOf('#') + 1) : href;
|
||||
if (!fragment) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const cleaned = fragment.trim().replace(/^loc_+/i, '');
|
||||
if (!cleaned) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (cleaned.includes(':')) {
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
if (cleaned.includes('_')) {
|
||||
const idx = cleaned.indexOf('_');
|
||||
return `${cleaned.slice(0, idx)}:${cleaned.slice(idx + 1)}`;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function conceptFromQName(qname: string, namespaces: TaxonomyNamespaceMap) {
|
||||
const [prefix, ...rest] = qname.split(':');
|
||||
const localName = rest.join(':');
|
||||
if (!prefix || !localName) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const namespaceUri = namespaces[prefix] ?? `urn:unknown:${prefix}`;
|
||||
|
||||
return {
|
||||
qname,
|
||||
namespaceUri,
|
||||
localName,
|
||||
conceptKey: `${namespaceUri}#${localName}`
|
||||
};
|
||||
}
|
||||
|
||||
function labelPriority(role: string | null) {
|
||||
const normalized = (role ?? '').toLowerCase();
|
||||
if (!normalized) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (normalized.endsWith('/label')) {
|
||||
return 4;
|
||||
}
|
||||
|
||||
if (normalized.endsWith('/terselabel')) {
|
||||
return 3;
|
||||
}
|
||||
|
||||
if (normalized.endsWith('/verboselabel')) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
export function classifyStatementRole(roleUri: string): FinancialStatementKind | null {
|
||||
const normalized = roleUri.toLowerCase();
|
||||
|
||||
if (/cash\s*flow|statementsof?cashflows|netcash/.test(normalized)) {
|
||||
return 'cash_flow';
|
||||
}
|
||||
|
||||
if (/shareholders?|stockholders?|equity|retainedearnings/.test(normalized)) {
|
||||
return 'equity';
|
||||
}
|
||||
|
||||
if (/comprehensive\s*income/.test(normalized)) {
|
||||
return 'comprehensive_income';
|
||||
}
|
||||
|
||||
if (/balance\s*sheet|financial\s*position|assets?andliabilities/.test(normalized)) {
|
||||
return 'balance';
|
||||
}
|
||||
|
||||
if (/operations|income\s*statement|statementsofincome|profit/.test(normalized)) {
|
||||
return 'income';
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function parseLabelLinkbase(raw: string): Map<string, string> {
|
||||
const namespaces = parseNamespaceMap(raw);
|
||||
const preferredLabelByConcept = new Map<string, { text: string; priority: number }>();
|
||||
|
||||
const linkPattern = /<(?:[a-z0-9_\-]+:)?labelLink\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?labelLink>/gi;
|
||||
for (const linkMatch of raw.matchAll(linkPattern)) {
|
||||
const block = linkMatch[1] ?? '';
|
||||
const locByLabel = new Map<string, string>();
|
||||
const resourceByLabel = new Map<string, { text: string; role: string | null }>();
|
||||
|
||||
for (const locMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?loc\b([^>]*)\/?>/gi)) {
|
||||
const attrs = locMatch[1] ?? '';
|
||||
const label = attrs.match(/\bxlink:label=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const href = attrs.match(/\bxlink:href=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
if (!label || !href) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const qname = qnameFromHref(href);
|
||||
if (!qname) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const concept = conceptFromQName(qname, namespaces);
|
||||
if (!concept) {
|
||||
continue;
|
||||
}
|
||||
|
||||
locByLabel.set(label, concept.conceptKey);
|
||||
}
|
||||
|
||||
for (const resourceMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?label\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?label>/gi)) {
|
||||
const attrs = resourceMatch[1] ?? '';
|
||||
const body = decodeXmlEntities(resourceMatch[2] ?? '').replace(/\s+/g, ' ').trim();
|
||||
if (!body) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const resourceLabel = attrs.match(/\bxlink:label=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const role = attrs.match(/\bxlink:role=["']([^"']+)["']/i)?.[1]?.trim() ?? null;
|
||||
if (!resourceLabel) {
|
||||
continue;
|
||||
}
|
||||
|
||||
resourceByLabel.set(resourceLabel, {
|
||||
text: body,
|
||||
role
|
||||
});
|
||||
}
|
||||
|
||||
for (const arcMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?labelArc\b([^>]*)\/?>/gi)) {
|
||||
const attrs = arcMatch[1] ?? '';
|
||||
const from = attrs.match(/\bxlink:from=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const to = attrs.match(/\bxlink:to=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
if (!from || !to) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const conceptKey = locByLabel.get(from);
|
||||
const resource = resourceByLabel.get(to);
|
||||
if (!conceptKey || !resource) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const priority = labelPriority(resource.role);
|
||||
const current = preferredLabelByConcept.get(conceptKey);
|
||||
if (!current || priority > current.priority) {
|
||||
preferredLabelByConcept.set(conceptKey, {
|
||||
text: resource.text,
|
||||
priority
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new Map(
|
||||
[...preferredLabelByConcept.entries()].map(([conceptKey, value]) => [conceptKey, value.text])
|
||||
);
|
||||
}
|
||||
|
||||
export function parsePresentationLinkbase(raw: string): TaxonomyPresentationConcept[] {
|
||||
const namespaces = parseNamespaceMap(raw);
|
||||
const rows: TaxonomyPresentationConcept[] = [];
|
||||
|
||||
const linkPattern = /<(?:[a-z0-9_\-]+:)?presentationLink\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?presentationLink>/gi;
|
||||
for (const linkMatch of raw.matchAll(linkPattern)) {
|
||||
const linkAttrs = linkMatch[1] ?? '';
|
||||
const block = linkMatch[2] ?? '';
|
||||
const roleUri = linkAttrs.match(/\bxlink:role=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
if (!roleUri) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const locByLabel = new Map<string, { conceptKey: string; qname: string; isAbstract: boolean }>();
|
||||
for (const locMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?loc\b([^>]*)\/?>/gi)) {
|
||||
const attrs = locMatch[1] ?? '';
|
||||
const label = attrs.match(/\bxlink:label=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const href = attrs.match(/\bxlink:href=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
if (!label || !href) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const qname = qnameFromHref(href);
|
||||
if (!qname) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const concept = conceptFromQName(qname, namespaces);
|
||||
if (!concept) {
|
||||
continue;
|
||||
}
|
||||
|
||||
locByLabel.set(label, {
|
||||
conceptKey: concept.conceptKey,
|
||||
qname: concept.qname,
|
||||
isAbstract: /abstract/i.test(concept.localName)
|
||||
});
|
||||
}
|
||||
|
||||
const childrenByLabel = new Map<string, Array<{ label: string; order: number }>>();
|
||||
const incoming = new Set<string>();
|
||||
const allReferenced = new Set<string>();
|
||||
|
||||
for (const arcMatch of block.matchAll(/<(?:[a-z0-9_\-]+:)?presentationArc\b([^>]*)\/?>/gi)) {
|
||||
const attrs = arcMatch[1] ?? '';
|
||||
const from = attrs.match(/\bxlink:from=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const to = attrs.match(/\bxlink:to=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const orderRaw = attrs.match(/\border=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
const order = Number.parseFloat(orderRaw);
|
||||
|
||||
if (!from || !to || !locByLabel.has(from) || !locByLabel.has(to)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const group = childrenByLabel.get(from) ?? [];
|
||||
group.push({ label: to, order: Number.isFinite(order) ? order : group.length + 1 });
|
||||
childrenByLabel.set(from, group);
|
||||
|
||||
incoming.add(to);
|
||||
allReferenced.add(from);
|
||||
allReferenced.add(to);
|
||||
}
|
||||
|
||||
const roots = [...allReferenced].filter((label) => !incoming.has(label));
|
||||
const visited = new Set<string>();
|
||||
|
||||
function dfs(label: string, depth: number, parentLabel: string | null, baseOrder: number) {
|
||||
const node = locByLabel.get(label);
|
||||
if (!node) {
|
||||
return;
|
||||
}
|
||||
|
||||
const pathKey = `${parentLabel ?? 'root'}::${label}::${depth}`;
|
||||
if (visited.has(pathKey)) {
|
||||
return;
|
||||
}
|
||||
visited.add(pathKey);
|
||||
|
||||
const parentConceptKey = parentLabel ? (locByLabel.get(parentLabel)?.conceptKey ?? null) : null;
|
||||
rows.push({
|
||||
conceptKey: node.conceptKey,
|
||||
qname: node.qname,
|
||||
roleUri,
|
||||
order: baseOrder,
|
||||
depth,
|
||||
parentConceptKey,
|
||||
isAbstract: node.isAbstract
|
||||
});
|
||||
|
||||
const children = [...(childrenByLabel.get(label) ?? [])].sort((left, right) => left.order - right.order);
|
||||
for (let i = 0; i < children.length; i += 1) {
|
||||
const child = children[i];
|
||||
if (!child) {
|
||||
continue;
|
||||
}
|
||||
|
||||
dfs(child.label, depth + 1, label, baseOrder + (i + 1) / 1000);
|
||||
}
|
||||
}
|
||||
|
||||
for (let i = 0; i < roots.length; i += 1) {
|
||||
const root = roots[i];
|
||||
if (!root) {
|
||||
continue;
|
||||
}
|
||||
|
||||
dfs(root, 0, null, i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
return rows;
|
||||
}
|
||||
374
lib/server/taxonomy/materialize.ts
Normal file
374
lib/server/taxonomy/materialize.ts
Normal file
@@ -0,0 +1,374 @@
|
||||
import type { Filing, FinancialStatementKind, TaxonomyStatementRow } from '@/lib/types';
|
||||
import type { TaxonomyConcept, TaxonomyFact, TaxonomyPresentationConcept } from '@/lib/server/taxonomy/types';
|
||||
import type { FilingTaxonomyPeriod } from '@/lib/server/repos/filing-taxonomy';
|
||||
import { classifyStatementRole } from '@/lib/server/taxonomy/linkbase-parser';
|
||||
import { conceptStatementFallback } from '@/lib/server/taxonomy/xbrl-parser';
|
||||
|
||||
function compactAccessionNumber(value: string) {
|
||||
return value.replace(/-/g, '');
|
||||
}
|
||||
|
||||
function isUsGaapNamespace(namespaceUri: string) {
|
||||
return /fasb\.org\/us-gaap/i.test(namespaceUri) || /us-gaap/i.test(namespaceUri);
|
||||
}
|
||||
|
||||
function splitConceptKey(conceptKey: string) {
|
||||
const index = conceptKey.lastIndexOf('#');
|
||||
if (index < 0) {
|
||||
return {
|
||||
namespaceUri: 'urn:unknown',
|
||||
localName: conceptKey
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
namespaceUri: conceptKey.slice(0, index),
|
||||
localName: conceptKey.slice(index + 1)
|
||||
};
|
||||
}
|
||||
|
||||
function localNameToLabel(localName: string) {
|
||||
return localName
|
||||
.replace(/([a-z0-9])([A-Z])/g, '$1 $2')
|
||||
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
|
||||
.replace(/_/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function createStatementRecord<T>(factory: () => T): Record<FinancialStatementKind, T> {
|
||||
return {
|
||||
income: factory(),
|
||||
balance: factory(),
|
||||
cash_flow: factory(),
|
||||
equity: factory(),
|
||||
comprehensive_income: factory()
|
||||
};
|
||||
}
|
||||
|
||||
function periodSignature(fact: TaxonomyFact) {
|
||||
const start = fact.periodStart ?? '';
|
||||
const end = fact.periodEnd ?? '';
|
||||
const instant = fact.periodInstant ?? '';
|
||||
return `start:${start}|end:${end}|instant:${instant}`;
|
||||
}
|
||||
|
||||
function periodDate(fact: TaxonomyFact, fallbackDate: string) {
|
||||
return fact.periodEnd ?? fact.periodInstant ?? fallbackDate;
|
||||
}
|
||||
|
||||
function parseEpoch(value: string | null) {
|
||||
if (!value) {
|
||||
return Number.NaN;
|
||||
}
|
||||
|
||||
return Date.parse(value);
|
||||
}
|
||||
|
||||
function sortPeriods(periods: FilingTaxonomyPeriod[]) {
|
||||
return [...periods].sort((left, right) => {
|
||||
const leftDate = parseEpoch(left.periodEnd ?? left.filingDate);
|
||||
const rightDate = parseEpoch(right.periodEnd ?? right.filingDate);
|
||||
|
||||
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
|
||||
return leftDate - rightDate;
|
||||
}
|
||||
|
||||
return left.id.localeCompare(right.id);
|
||||
});
|
||||
}
|
||||
|
||||
function pickPreferredFact<T extends TaxonomyFact>(facts: T[]) {
|
||||
if (facts.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const ordered = [...facts].sort((left, right) => {
|
||||
const leftScore = left.isDimensionless ? 1 : 0;
|
||||
const rightScore = right.isDimensionless ? 1 : 0;
|
||||
if (leftScore !== rightScore) {
|
||||
return rightScore - leftScore;
|
||||
}
|
||||
|
||||
const leftDate = parseEpoch(left.periodEnd ?? left.periodInstant);
|
||||
const rightDate = parseEpoch(right.periodEnd ?? right.periodInstant);
|
||||
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
|
||||
return rightDate - leftDate;
|
||||
}
|
||||
|
||||
return Math.abs(right.value) - Math.abs(left.value);
|
||||
});
|
||||
|
||||
return ordered[0] ?? null;
|
||||
}
|
||||
|
||||
export function materializeTaxonomyStatements(input: {
|
||||
filingId: number;
|
||||
accessionNumber: string;
|
||||
filingDate: string;
|
||||
filingType: '10-K' | '10-Q';
|
||||
facts: TaxonomyFact[];
|
||||
presentation: TaxonomyPresentationConcept[];
|
||||
labelByConcept: Map<string, string>;
|
||||
}) {
|
||||
const periodBySignature = new Map<string, FilingTaxonomyPeriod>();
|
||||
const compactAccession = compactAccessionNumber(input.accessionNumber);
|
||||
|
||||
for (const fact of input.facts) {
|
||||
const signature = periodSignature(fact);
|
||||
if (periodBySignature.has(signature)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const date = periodDate(fact, input.filingDate);
|
||||
const id = `${date}-${compactAccession}-${periodBySignature.size + 1}`;
|
||||
|
||||
periodBySignature.set(signature, {
|
||||
id,
|
||||
filingId: input.filingId,
|
||||
accessionNumber: input.accessionNumber,
|
||||
filingDate: input.filingDate,
|
||||
periodStart: fact.periodStart,
|
||||
periodEnd: fact.periodEnd ?? fact.periodInstant ?? input.filingDate,
|
||||
filingType: input.filingType,
|
||||
periodLabel: fact.periodInstant && !fact.periodStart
|
||||
? 'Instant'
|
||||
: fact.periodStart && fact.periodEnd
|
||||
? `${fact.periodStart} to ${fact.periodEnd}`
|
||||
: 'Filing Period'
|
||||
});
|
||||
}
|
||||
|
||||
const periods = sortPeriods([...periodBySignature.values()]);
|
||||
const periodIdBySignature = new Map<string, string>(
|
||||
[...periodBySignature.entries()].map(([signature, period]) => [signature, period.id])
|
||||
);
|
||||
|
||||
const presentationByConcept = new Map<string, TaxonomyPresentationConcept[]>();
|
||||
for (const node of input.presentation) {
|
||||
const existing = presentationByConcept.get(node.conceptKey);
|
||||
if (existing) {
|
||||
existing.push(node);
|
||||
} else {
|
||||
presentationByConcept.set(node.conceptKey, [node]);
|
||||
}
|
||||
}
|
||||
|
||||
const enrichedFacts = input.facts.map((fact, index) => {
|
||||
const nodes = presentationByConcept.get(fact.conceptKey) ?? [];
|
||||
const bestNode = nodes[0] ?? null;
|
||||
const statementKind = bestNode
|
||||
? classifyStatementRole(bestNode.roleUri)
|
||||
: conceptStatementFallback(fact.localName);
|
||||
|
||||
return {
|
||||
...fact,
|
||||
__sourceFactId: index + 1,
|
||||
statement_kind: statementKind,
|
||||
role_uri: bestNode?.roleUri ?? null
|
||||
};
|
||||
});
|
||||
|
||||
const rowsByStatement = createStatementRecord<TaxonomyStatementRow[]>(() => []);
|
||||
const conceptByKey = new Map<string, TaxonomyConcept>();
|
||||
const groupedByStatement = createStatementRecord<Map<string, typeof enrichedFacts>>(() => new Map());
|
||||
|
||||
for (const fact of enrichedFacts) {
|
||||
if (!fact.statement_kind) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const group = groupedByStatement[fact.statement_kind].get(fact.conceptKey);
|
||||
if (group) {
|
||||
group.push(fact);
|
||||
} else {
|
||||
groupedByStatement[fact.statement_kind].set(fact.conceptKey, [fact]);
|
||||
}
|
||||
}
|
||||
|
||||
for (const statement of Object.keys(rowsByStatement) as FinancialStatementKind[]) {
|
||||
const conceptKeys = new Set<string>();
|
||||
|
||||
for (const node of input.presentation) {
|
||||
if (classifyStatementRole(node.roleUri) !== statement) {
|
||||
continue;
|
||||
}
|
||||
|
||||
conceptKeys.add(node.conceptKey);
|
||||
}
|
||||
|
||||
for (const conceptKey of groupedByStatement[statement].keys()) {
|
||||
conceptKeys.add(conceptKey);
|
||||
}
|
||||
|
||||
const orderedConcepts = [...conceptKeys]
|
||||
.map((conceptKey) => {
|
||||
const presentationNodes = input.presentation.filter(
|
||||
(node) => node.conceptKey === conceptKey && classifyStatementRole(node.roleUri) === statement
|
||||
);
|
||||
const presentationOrder = presentationNodes.length > 0
|
||||
? Math.min(...presentationNodes.map((node) => node.order))
|
||||
: Number.MAX_SAFE_INTEGER;
|
||||
const presentationDepth = presentationNodes.length > 0
|
||||
? Math.min(...presentationNodes.map((node) => node.depth))
|
||||
: 0;
|
||||
const roleUri = presentationNodes[0]?.roleUri ?? null;
|
||||
const parentConceptKey = presentationNodes[0]?.parentConceptKey ?? null;
|
||||
return {
|
||||
conceptKey,
|
||||
presentationOrder,
|
||||
presentationDepth,
|
||||
roleUri,
|
||||
parentConceptKey
|
||||
};
|
||||
})
|
||||
.sort((left, right) => {
|
||||
if (left.presentationOrder !== right.presentationOrder) {
|
||||
return left.presentationOrder - right.presentationOrder;
|
||||
}
|
||||
|
||||
return left.conceptKey.localeCompare(right.conceptKey);
|
||||
});
|
||||
|
||||
for (const orderedConcept of orderedConcepts) {
|
||||
const facts = groupedByStatement[statement].get(orderedConcept.conceptKey) ?? [];
|
||||
const { namespaceUri, localName } = splitConceptKey(orderedConcept.conceptKey);
|
||||
const qname = facts[0]?.qname ?? `unknown:${localName}`;
|
||||
const label = input.labelByConcept.get(orderedConcept.conceptKey) ?? localNameToLabel(localName);
|
||||
const values: Record<string, number | null> = {};
|
||||
const units: Record<string, string | null> = {};
|
||||
|
||||
const factGroups = new Map<string, typeof facts>();
|
||||
for (const fact of facts) {
|
||||
const signature = periodSignature(fact);
|
||||
const group = factGroups.get(signature);
|
||||
if (group) {
|
||||
group.push(fact);
|
||||
} else {
|
||||
factGroups.set(signature, [fact]);
|
||||
}
|
||||
}
|
||||
|
||||
const sourceFactIds: number[] = [];
|
||||
let hasDimensions = false;
|
||||
for (const [signature, group] of factGroups.entries()) {
|
||||
const periodId = periodIdBySignature.get(signature);
|
||||
if (!periodId) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const preferred = pickPreferredFact(group);
|
||||
if (!preferred) {
|
||||
continue;
|
||||
}
|
||||
|
||||
values[periodId] = preferred.value;
|
||||
units[periodId] = preferred.unit;
|
||||
const sourceFactId = (preferred as { __sourceFactId?: number }).__sourceFactId;
|
||||
if (typeof sourceFactId === 'number') {
|
||||
sourceFactIds.push(sourceFactId);
|
||||
}
|
||||
|
||||
if (group.some((entry) => !entry.isDimensionless)) {
|
||||
hasDimensions = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (Object.keys(values).length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const row: TaxonomyStatementRow = {
|
||||
key: orderedConcept.conceptKey,
|
||||
label,
|
||||
conceptKey: orderedConcept.conceptKey,
|
||||
qname,
|
||||
namespaceUri,
|
||||
localName,
|
||||
isExtension: !isUsGaapNamespace(namespaceUri),
|
||||
statement,
|
||||
roleUri: orderedConcept.roleUri,
|
||||
order: Number.isFinite(orderedConcept.presentationOrder)
|
||||
? orderedConcept.presentationOrder
|
||||
: rowsByStatement[statement].length + 1,
|
||||
depth: orderedConcept.presentationDepth,
|
||||
parentKey: orderedConcept.parentConceptKey,
|
||||
values,
|
||||
units,
|
||||
hasDimensions,
|
||||
sourceFactIds
|
||||
};
|
||||
|
||||
rowsByStatement[statement].push(row);
|
||||
|
||||
if (!conceptByKey.has(orderedConcept.conceptKey)) {
|
||||
conceptByKey.set(orderedConcept.conceptKey, {
|
||||
concept_key: orderedConcept.conceptKey,
|
||||
qname,
|
||||
namespace_uri: namespaceUri,
|
||||
local_name: localName,
|
||||
label,
|
||||
is_extension: !isUsGaapNamespace(namespaceUri),
|
||||
statement_kind: statement,
|
||||
role_uri: orderedConcept.roleUri,
|
||||
presentation_order: row.order,
|
||||
presentation_depth: row.depth,
|
||||
parent_concept_key: row.parentKey,
|
||||
is_abstract: /abstract/i.test(localName)
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const fact of enrichedFacts) {
|
||||
if (conceptByKey.has(fact.conceptKey)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
conceptByKey.set(fact.conceptKey, {
|
||||
concept_key: fact.conceptKey,
|
||||
qname: fact.qname,
|
||||
namespace_uri: fact.namespaceUri,
|
||||
local_name: fact.localName,
|
||||
label: input.labelByConcept.get(fact.conceptKey) ?? localNameToLabel(fact.localName),
|
||||
is_extension: !isUsGaapNamespace(fact.namespaceUri),
|
||||
statement_kind: fact.statement_kind,
|
||||
role_uri: fact.role_uri,
|
||||
presentation_order: null,
|
||||
presentation_depth: null,
|
||||
parent_concept_key: null,
|
||||
is_abstract: /abstract/i.test(fact.localName)
|
||||
});
|
||||
}
|
||||
|
||||
const concepts = [...conceptByKey.values()];
|
||||
const factRows = enrichedFacts.map((fact) => ({
|
||||
concept_key: fact.conceptKey,
|
||||
qname: fact.qname,
|
||||
namespace_uri: fact.namespaceUri,
|
||||
local_name: fact.localName,
|
||||
statement_kind: fact.statement_kind,
|
||||
role_uri: fact.role_uri,
|
||||
context_id: fact.contextId,
|
||||
unit: fact.unit,
|
||||
decimals: fact.decimals,
|
||||
value_num: fact.value,
|
||||
period_start: fact.periodStart,
|
||||
period_end: fact.periodEnd,
|
||||
period_instant: fact.periodInstant,
|
||||
dimensions: fact.dimensions,
|
||||
is_dimensionless: fact.isDimensionless,
|
||||
source_file: fact.sourceFile,
|
||||
}));
|
||||
|
||||
const dimensionsCount = enrichedFacts.reduce((total, fact) => {
|
||||
return total + fact.dimensions.length;
|
||||
}, 0);
|
||||
|
||||
return {
|
||||
periods,
|
||||
statement_rows: rowsByStatement,
|
||||
concepts,
|
||||
facts: factRows,
|
||||
dimensionsCount
|
||||
};
|
||||
}
|
||||
55
lib/server/taxonomy/metrics.test.ts
Normal file
55
lib/server/taxonomy/metrics.test.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
import type { TaxonomyFact } from '@/lib/server/taxonomy/types';
|
||||
import { deriveTaxonomyMetrics } from '@/lib/server/taxonomy/metrics';
|
||||
|
||||
function fact(localName: string, value: number, overrides?: Partial<TaxonomyFact>): TaxonomyFact {
|
||||
return {
|
||||
conceptKey: `http://fasb.org/us-gaap/2024#${localName}`,
|
||||
qname: `us-gaap:${localName}`,
|
||||
namespaceUri: 'http://fasb.org/us-gaap/2024',
|
||||
localName,
|
||||
contextId: 'c1',
|
||||
unit: 'iso4217:USD',
|
||||
decimals: '-6',
|
||||
value,
|
||||
periodStart: '2025-01-01',
|
||||
periodEnd: '2025-12-31',
|
||||
periodInstant: null,
|
||||
dimensions: [],
|
||||
isDimensionless: true,
|
||||
sourceFile: 'abc_htm.xml',
|
||||
...overrides
|
||||
};
|
||||
}
|
||||
|
||||
describe('taxonomy metric derivation', () => {
|
||||
it('applies concept priority for canonical metrics and debt component fallback', () => {
|
||||
const metrics = deriveTaxonomyMetrics([
|
||||
fact('SalesRevenueNet', 500),
|
||||
fact('Revenues', 450),
|
||||
fact('NetIncomeLoss', 40),
|
||||
fact('Assets', 1000),
|
||||
fact('CashAndCashEquivalentsAtCarryingValue', 80),
|
||||
fact('DebtCurrent', 15),
|
||||
fact('LongTermDebtNoncurrent', 35)
|
||||
]);
|
||||
|
||||
expect(metrics).toEqual({
|
||||
revenue: 450,
|
||||
netIncome: 40,
|
||||
totalAssets: 1000,
|
||||
cash: 80,
|
||||
debt: 50
|
||||
});
|
||||
});
|
||||
|
||||
it('uses direct debt concept before computed debt fallback when available', () => {
|
||||
const metrics = deriveTaxonomyMetrics([
|
||||
fact('DebtCurrent', 15),
|
||||
fact('LongTermDebtNoncurrent', 35),
|
||||
fact('LongTermDebtAndCapitalLeaseObligations', 90)
|
||||
]);
|
||||
|
||||
expect(metrics.debt).toBe(90);
|
||||
});
|
||||
});
|
||||
106
lib/server/taxonomy/metrics.ts
Normal file
106
lib/server/taxonomy/metrics.ts
Normal file
@@ -0,0 +1,106 @@
|
||||
import type { Filing } from '@/lib/types';
|
||||
import type { TaxonomyFact } from '@/lib/server/taxonomy/types';
|
||||
|
||||
const METRIC_LOCAL_NAME_PRIORITY = {
|
||||
revenue: [
|
||||
'Revenues',
|
||||
'SalesRevenueNet',
|
||||
'RevenueFromContractWithCustomerExcludingAssessedTax',
|
||||
'TotalRevenuesAndOtherIncome'
|
||||
],
|
||||
netIncome: ['NetIncomeLoss', 'ProfitLoss'],
|
||||
totalAssets: ['Assets'],
|
||||
cash: [
|
||||
'CashAndCashEquivalentsAtCarryingValue',
|
||||
'CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents'
|
||||
],
|
||||
debtDirect: [
|
||||
'DebtAndFinanceLeaseLiabilities',
|
||||
'Debt',
|
||||
'LongTermDebtAndCapitalLeaseObligations'
|
||||
],
|
||||
debtCurrent: [
|
||||
'DebtCurrent',
|
||||
'ShortTermBorrowings',
|
||||
'LongTermDebtCurrent'
|
||||
],
|
||||
debtNonCurrent: [
|
||||
'LongTermDebtNoncurrent',
|
||||
'LongTermDebt',
|
||||
'DebtNoncurrent'
|
||||
]
|
||||
} as const;
|
||||
|
||||
function normalizeDateToEpoch(value: string | null) {
|
||||
if (!value) {
|
||||
return Number.NaN;
|
||||
}
|
||||
|
||||
return Date.parse(value);
|
||||
}
|
||||
|
||||
function sameLocalName(left: string, right: string) {
|
||||
return left.toLowerCase() === right.toLowerCase();
|
||||
}
|
||||
|
||||
function pickPreferredFact(facts: TaxonomyFact[]) {
|
||||
const ordered = [...facts].sort((left, right) => {
|
||||
const leftDimensionScore = left.isDimensionless ? 1 : 0;
|
||||
const rightDimensionScore = right.isDimensionless ? 1 : 0;
|
||||
if (leftDimensionScore !== rightDimensionScore) {
|
||||
return rightDimensionScore - leftDimensionScore;
|
||||
}
|
||||
|
||||
const leftDate = normalizeDateToEpoch(left.periodEnd ?? left.periodInstant);
|
||||
const rightDate = normalizeDateToEpoch(right.periodEnd ?? right.periodInstant);
|
||||
if (Number.isFinite(leftDate) && Number.isFinite(rightDate) && leftDate !== rightDate) {
|
||||
return rightDate - leftDate;
|
||||
}
|
||||
|
||||
return Math.abs(right.value) - Math.abs(left.value);
|
||||
});
|
||||
|
||||
return ordered[0] ?? null;
|
||||
}
|
||||
|
||||
function pickBestFact(facts: TaxonomyFact[], localNames: readonly string[]) {
|
||||
for (const localName of localNames) {
|
||||
const matches = facts.filter((fact) => sameLocalName(fact.localName, localName));
|
||||
if (matches.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return pickPreferredFact(matches);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function sumIfBoth(left: number | null, right: number | null) {
|
||||
if (left === null || right === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return left + right;
|
||||
}
|
||||
|
||||
export function deriveTaxonomyMetrics(facts: TaxonomyFact[]): NonNullable<Filing['metrics']> {
|
||||
const revenue = pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.revenue)?.value ?? null;
|
||||
const netIncome = pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.netIncome)?.value ?? null;
|
||||
const totalAssets = pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.totalAssets)?.value ?? null;
|
||||
const cash = pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.cash)?.value ?? null;
|
||||
|
||||
const directDebt = pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.debtDirect)?.value ?? null;
|
||||
const debt = directDebt ?? sumIfBoth(
|
||||
pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.debtCurrent)?.value ?? null,
|
||||
pickBestFact(facts, METRIC_LOCAL_NAME_PRIORITY.debtNonCurrent)?.value ?? null
|
||||
);
|
||||
|
||||
return {
|
||||
revenue,
|
||||
netIncome,
|
||||
totalAssets,
|
||||
cash,
|
||||
debt
|
||||
};
|
||||
}
|
||||
49
lib/server/taxonomy/pdf-validation.test.ts
Normal file
49
lib/server/taxonomy/pdf-validation.test.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
import { __pdfValidationInternals } from '@/lib/server/taxonomy/pdf-validation';
|
||||
|
||||
describe('pdf metric validation internals', () => {
|
||||
it('parses fenced json payloads and rejects invalid payloads', () => {
|
||||
const parsed = __pdfValidationInternals.parseValidationPayload([
|
||||
'```json',
|
||||
'{"revenue":{"value":1000,"pages":[3]},"cash":{"value":200,"pages":["4"]}}',
|
||||
'```'
|
||||
].join('\n'));
|
||||
|
||||
expect(parsed).not.toBeNull();
|
||||
expect(parsed?.revenue?.value).toBe(1000);
|
||||
expect(parsed?.cash?.pages).toEqual(['4']);
|
||||
expect(__pdfValidationInternals.parseValidationPayload('not-json')).toBeNull();
|
||||
});
|
||||
|
||||
it('compares taxonomy vs llm values with fixed tolerance rules', () => {
|
||||
expect(__pdfValidationInternals.diffStatus(1000, 1004)).toEqual({
|
||||
status: 'matched',
|
||||
absoluteDiff: 4,
|
||||
relativeDiff: 0.004
|
||||
});
|
||||
|
||||
expect(__pdfValidationInternals.diffStatus(1000, 1007)).toEqual({
|
||||
status: 'mismatch',
|
||||
absoluteDiff: 7,
|
||||
relativeDiff: 0.007
|
||||
});
|
||||
|
||||
expect(__pdfValidationInternals.diffStatus(0.5, 1.2)).toEqual({
|
||||
status: 'matched',
|
||||
absoluteDiff: 0.7,
|
||||
relativeDiff: 0.7
|
||||
});
|
||||
|
||||
expect(__pdfValidationInternals.diffStatus(null, 1)).toEqual({
|
||||
status: 'mismatch',
|
||||
absoluteDiff: null,
|
||||
relativeDiff: null
|
||||
});
|
||||
|
||||
expect(__pdfValidationInternals.diffStatus(null, null)).toEqual({
|
||||
status: 'not_run',
|
||||
absoluteDiff: null,
|
||||
relativeDiff: null
|
||||
});
|
||||
});
|
||||
});
|
||||
336
lib/server/taxonomy/pdf-validation.ts
Normal file
336
lib/server/taxonomy/pdf-validation.ts
Normal file
@@ -0,0 +1,336 @@
|
||||
import { execFile } from 'node:child_process';
|
||||
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { promisify } from 'node:util';
|
||||
import type { Filing, MetricValidationResult } from '@/lib/types';
|
||||
import { runAiAnalysis } from '@/lib/server/ai';
|
||||
import type { TaxonomyAsset, TaxonomyMetricValidationCheck } from '@/lib/server/taxonomy/types';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
const METRIC_KEYS: Array<keyof NonNullable<Filing['metrics']>> = [
|
||||
'revenue',
|
||||
'netIncome',
|
||||
'totalAssets',
|
||||
'cash',
|
||||
'debt'
|
||||
];
|
||||
|
||||
function extractJsonCandidate(raw: string) {
|
||||
const fencedJson = raw.match(/```(?:json)?\s*([\s\S]*?)```/i)?.[1];
|
||||
const candidate = fencedJson ?? (() => {
|
||||
const start = raw.indexOf('{');
|
||||
const end = raw.lastIndexOf('}');
|
||||
return start >= 0 && end > start ? raw.slice(start, end + 1) : null;
|
||||
})();
|
||||
|
||||
return candidate;
|
||||
}
|
||||
|
||||
function parseValidationPayload(raw: string) {
|
||||
const candidate = extractJsonCandidate(raw);
|
||||
if (!candidate) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
return JSON.parse(candidate) as Record<string, {
|
||||
value?: number | string | null;
|
||||
pages?: Array<number | string>;
|
||||
}>;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function asNumber(value: unknown) {
|
||||
if (typeof value === 'number') {
|
||||
return Number.isFinite(value) ? value : null;
|
||||
}
|
||||
|
||||
if (typeof value === 'string') {
|
||||
const parsed = Number(value.replace(/[,\s]/g, ''));
|
||||
return Number.isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function asPageNumbers(raw: unknown): number[] {
|
||||
if (!Array.isArray(raw)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return raw
|
||||
.map((entry) => {
|
||||
if (typeof entry === 'number' && Number.isFinite(entry)) {
|
||||
return Math.trunc(entry);
|
||||
}
|
||||
|
||||
if (typeof entry === 'string') {
|
||||
const parsed = Number(entry);
|
||||
return Number.isFinite(parsed) ? Math.trunc(parsed) : Number.NaN;
|
||||
}
|
||||
|
||||
return Number.NaN;
|
||||
})
|
||||
.filter((entry) => Number.isFinite(entry) && entry > 0);
|
||||
}
|
||||
|
||||
function diffStatus(taxonomyValue: number | null, llmValue: number | null) {
|
||||
if (taxonomyValue === null && llmValue === null) {
|
||||
return {
|
||||
status: 'not_run' as const,
|
||||
absoluteDiff: null,
|
||||
relativeDiff: null
|
||||
};
|
||||
}
|
||||
|
||||
if (taxonomyValue === null || llmValue === null) {
|
||||
return {
|
||||
status: 'mismatch' as const,
|
||||
absoluteDiff: null,
|
||||
relativeDiff: null
|
||||
};
|
||||
}
|
||||
|
||||
const absoluteDiff = Math.abs(taxonomyValue - llmValue);
|
||||
const denominator = Math.max(Math.abs(taxonomyValue), 1);
|
||||
const relativeDiff = absoluteDiff / denominator;
|
||||
const tolerance = Math.max(1, Math.abs(taxonomyValue) * 0.005);
|
||||
|
||||
return {
|
||||
status: absoluteDiff <= tolerance ? 'matched' as const : 'mismatch' as const,
|
||||
absoluteDiff,
|
||||
relativeDiff
|
||||
};
|
||||
}
|
||||
|
||||
async function extractPdfText(url: string, fetchImpl: typeof fetch) {
|
||||
const response = await fetchImpl(url, {
|
||||
headers: {
|
||||
Accept: 'application/pdf, */*;q=0.8'
|
||||
},
|
||||
cache: 'no-store'
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`PDF request failed (${response.status})`);
|
||||
}
|
||||
|
||||
const contentType = response.headers.get('content-type') ?? '';
|
||||
if (!/pdf/i.test(contentType) && !/\.pdf$/i.test(url)) {
|
||||
throw new Error(`Asset is not a PDF (${contentType || 'unknown content-type'})`);
|
||||
}
|
||||
|
||||
const bytes = new Uint8Array(await response.arrayBuffer());
|
||||
const tempRoot = await mkdtemp(join(tmpdir(), 'fiscal-pdf-'));
|
||||
const pdfPath = join(tempRoot, 'source.pdf');
|
||||
|
||||
try {
|
||||
await writeFile(pdfPath, bytes);
|
||||
const { stdout } = await execFileAsync('pdftotext', ['-layout', '-enc', 'UTF-8', pdfPath, '-'], {
|
||||
maxBuffer: 16 * 1024 * 1024
|
||||
});
|
||||
|
||||
const text = stdout.trim();
|
||||
if (!text) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return text;
|
||||
} finally {
|
||||
await rm(tempRoot, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
function validationPrompt(metrics: Filing['metrics'], pdfText: string) {
|
||||
const textSlice = pdfText.slice(0, 80_000);
|
||||
|
||||
return [
|
||||
'Extract numeric financial metrics from the provided financial statement PDF text.',
|
||||
`Taxonomy baseline metrics: ${JSON.stringify(metrics ?? {})}`,
|
||||
'Return ONLY JSON with keys revenue, netIncome, totalAssets, cash, debt.',
|
||||
'Each key must map to: {"value": number|null, "pages": [number]}.',
|
||||
'Use null when a metric is not found.',
|
||||
'PDF text follows:',
|
||||
textSlice
|
||||
].join('\n\n');
|
||||
}
|
||||
|
||||
function providerModelOrNull(value: string | undefined | null) {
|
||||
const normalized = value?.trim();
|
||||
return normalized && normalized.length > 0 ? normalized : null;
|
||||
}
|
||||
|
||||
export async function validateMetricsWithPdfLlm(input: {
|
||||
metrics: Filing['metrics'];
|
||||
assets: TaxonomyAsset[];
|
||||
fetchImpl?: typeof fetch;
|
||||
}): Promise<{
|
||||
validation_result: MetricValidationResult | null;
|
||||
metric_validations: TaxonomyMetricValidationCheck[];
|
||||
}> {
|
||||
const taxonomyMetrics = input.metrics ?? {
|
||||
revenue: null,
|
||||
netIncome: null,
|
||||
totalAssets: null,
|
||||
cash: null,
|
||||
debt: null
|
||||
};
|
||||
|
||||
const selectedPdf = input.assets.find((asset) => asset.asset_type === 'pdf' && asset.is_selected);
|
||||
if (!selectedPdf) {
|
||||
return {
|
||||
validation_result: {
|
||||
status: 'not_run',
|
||||
checks: [],
|
||||
validatedAt: null
|
||||
},
|
||||
metric_validations: []
|
||||
};
|
||||
}
|
||||
|
||||
const fetchImpl = input.fetchImpl ?? fetch;
|
||||
let pdfText: string | null = null;
|
||||
try {
|
||||
pdfText = await extractPdfText(selectedPdf.url, fetchImpl);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'PDF extraction failed';
|
||||
|
||||
const checks: TaxonomyMetricValidationCheck[] = METRIC_KEYS.map((metricKey) => ({
|
||||
metric_key: metricKey,
|
||||
taxonomy_value: taxonomyMetrics[metricKey],
|
||||
llm_value: null,
|
||||
absolute_diff: null,
|
||||
relative_diff: null,
|
||||
status: 'error',
|
||||
evidence_pages: [],
|
||||
pdf_url: selectedPdf.url,
|
||||
provider: null,
|
||||
model: null,
|
||||
error: message
|
||||
}));
|
||||
|
||||
return {
|
||||
validation_result: {
|
||||
status: 'error',
|
||||
checks: checks.map((check) => ({
|
||||
metricKey: check.metric_key,
|
||||
taxonomyValue: check.taxonomy_value,
|
||||
llmValue: check.llm_value,
|
||||
absoluteDiff: check.absolute_diff,
|
||||
relativeDiff: check.relative_diff,
|
||||
status: check.status,
|
||||
evidencePages: check.evidence_pages,
|
||||
pdfUrl: check.pdf_url,
|
||||
provider: check.provider,
|
||||
model: check.model,
|
||||
error: check.error
|
||||
})),
|
||||
validatedAt: new Date().toISOString()
|
||||
},
|
||||
metric_validations: checks
|
||||
};
|
||||
}
|
||||
|
||||
if (!pdfText) {
|
||||
return {
|
||||
validation_result: {
|
||||
status: 'not_run',
|
||||
checks: [],
|
||||
validatedAt: new Date().toISOString()
|
||||
},
|
||||
metric_validations: []
|
||||
};
|
||||
}
|
||||
|
||||
let parsed: Record<string, { value?: number | string | null; pages?: Array<number | string> }> | null = null;
|
||||
let provider: string | null = null;
|
||||
let model: string | null = null;
|
||||
let modelError: string | null = null;
|
||||
|
||||
try {
|
||||
const aiResult = await runAiAnalysis(validationPrompt(taxonomyMetrics, pdfText), undefined, {
|
||||
workload: 'extraction'
|
||||
});
|
||||
|
||||
provider = providerModelOrNull(aiResult.provider);
|
||||
model = providerModelOrNull(aiResult.model);
|
||||
parsed = parseValidationPayload(aiResult.text);
|
||||
if (!parsed) {
|
||||
modelError = 'LLM response did not contain valid JSON payload';
|
||||
}
|
||||
} catch (error) {
|
||||
modelError = error instanceof Error ? error.message : 'LLM validation failed';
|
||||
}
|
||||
|
||||
const validations: TaxonomyMetricValidationCheck[] = METRIC_KEYS.map((metricKey) => {
|
||||
const taxonomyValue = taxonomyMetrics[metricKey] ?? null;
|
||||
|
||||
if (!parsed) {
|
||||
return {
|
||||
metric_key: metricKey,
|
||||
taxonomy_value: taxonomyValue,
|
||||
llm_value: null,
|
||||
absolute_diff: null,
|
||||
relative_diff: null,
|
||||
status: modelError ? 'error' : 'not_run',
|
||||
evidence_pages: [],
|
||||
pdf_url: selectedPdf.url,
|
||||
provider,
|
||||
model,
|
||||
error: modelError
|
||||
};
|
||||
}
|
||||
|
||||
const entry = parsed[metricKey as string] ?? {};
|
||||
const llmValue = asNumber(entry.value);
|
||||
const pages = asPageNumbers(entry.pages);
|
||||
const diff = diffStatus(taxonomyValue, llmValue);
|
||||
|
||||
return {
|
||||
metric_key: metricKey,
|
||||
taxonomy_value: taxonomyValue,
|
||||
llm_value: llmValue,
|
||||
absolute_diff: diff.absoluteDiff,
|
||||
relative_diff: diff.relativeDiff,
|
||||
status: diff.status,
|
||||
evidence_pages: pages,
|
||||
pdf_url: selectedPdf.url,
|
||||
provider,
|
||||
model,
|
||||
error: null
|
||||
};
|
||||
});
|
||||
|
||||
const hasError = validations.some((entry) => entry.status === 'error');
|
||||
const hasMismatch = validations.some((entry) => entry.status === 'mismatch');
|
||||
|
||||
return {
|
||||
validation_result: {
|
||||
status: hasError ? 'error' : hasMismatch ? 'mismatch' : 'matched',
|
||||
checks: validations.map((check) => ({
|
||||
metricKey: check.metric_key,
|
||||
taxonomyValue: check.taxonomy_value,
|
||||
llmValue: check.llm_value,
|
||||
absoluteDiff: check.absolute_diff,
|
||||
relativeDiff: check.relative_diff,
|
||||
status: check.status,
|
||||
evidencePages: check.evidence_pages,
|
||||
pdfUrl: check.pdf_url,
|
||||
provider: check.provider,
|
||||
model: check.model,
|
||||
error: check.error
|
||||
})),
|
||||
validatedAt: new Date().toISOString()
|
||||
},
|
||||
metric_validations: validations
|
||||
};
|
||||
}
|
||||
|
||||
export const __pdfValidationInternals = {
|
||||
parseValidationPayload,
|
||||
diffStatus
|
||||
};
|
||||
136
lib/server/taxonomy/types.ts
Normal file
136
lib/server/taxonomy/types.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
import type { Filing, FinancialStatementKind, MetricValidationResult, TaxonomyStatementRow } from '@/lib/types';
|
||||
import type {
|
||||
FilingTaxonomyAssetType,
|
||||
FilingTaxonomyParseStatus,
|
||||
FilingTaxonomyPeriod,
|
||||
FilingTaxonomySource
|
||||
} from '@/lib/server/repos/filing-taxonomy';
|
||||
|
||||
export type TaxonomyAsset = {
|
||||
asset_type: FilingTaxonomyAssetType;
|
||||
name: string;
|
||||
url: string;
|
||||
size_bytes: number | null;
|
||||
score: number | null;
|
||||
is_selected: boolean;
|
||||
};
|
||||
|
||||
export type TaxonomyNamespaceMap = Record<string, string>;
|
||||
|
||||
export type TaxonomyContext = {
|
||||
id: string;
|
||||
periodStart: string | null;
|
||||
periodEnd: string | null;
|
||||
periodInstant: string | null;
|
||||
dimensions: Array<{ axis: string; member: string }>;
|
||||
};
|
||||
|
||||
export type TaxonomyUnit = {
|
||||
id: string;
|
||||
measure: string | null;
|
||||
};
|
||||
|
||||
export type TaxonomyFact = {
|
||||
conceptKey: string;
|
||||
qname: string;
|
||||
namespaceUri: string;
|
||||
localName: string;
|
||||
contextId: string;
|
||||
unit: string | null;
|
||||
decimals: string | null;
|
||||
value: number;
|
||||
periodStart: string | null;
|
||||
periodEnd: string | null;
|
||||
periodInstant: string | null;
|
||||
dimensions: Array<{ axis: string; member: string }>;
|
||||
isDimensionless: boolean;
|
||||
sourceFile: string | null;
|
||||
};
|
||||
|
||||
export type TaxonomyPresentationConcept = {
|
||||
conceptKey: string;
|
||||
qname: string;
|
||||
roleUri: string;
|
||||
order: number;
|
||||
depth: number;
|
||||
parentConceptKey: string | null;
|
||||
isAbstract: boolean;
|
||||
};
|
||||
|
||||
export type TaxonomyConcept = {
|
||||
concept_key: string;
|
||||
qname: string;
|
||||
namespace_uri: string;
|
||||
local_name: string;
|
||||
label: string | null;
|
||||
is_extension: boolean;
|
||||
statement_kind: FinancialStatementKind | null;
|
||||
role_uri: string | null;
|
||||
presentation_order: number | null;
|
||||
presentation_depth: number | null;
|
||||
parent_concept_key: string | null;
|
||||
is_abstract: boolean;
|
||||
};
|
||||
|
||||
export type TaxonomyMetricValidationCheck = {
|
||||
metric_key: keyof NonNullable<Filing['metrics']>;
|
||||
taxonomy_value: number | null;
|
||||
llm_value: number | null;
|
||||
absolute_diff: number | null;
|
||||
relative_diff: number | null;
|
||||
status: 'not_run' | 'matched' | 'mismatch' | 'error';
|
||||
evidence_pages: number[];
|
||||
pdf_url: string | null;
|
||||
provider: string | null;
|
||||
model: string | null;
|
||||
error: string | null;
|
||||
};
|
||||
|
||||
export type TaxonomyHydrationInput = {
|
||||
filingId: number;
|
||||
ticker: string;
|
||||
cik: string;
|
||||
accessionNumber: string;
|
||||
filingDate: string;
|
||||
filingType: '10-K' | '10-Q';
|
||||
filingUrl: string | null;
|
||||
primaryDocument: string | null;
|
||||
};
|
||||
|
||||
export type TaxonomyHydrationResult = {
|
||||
filing_id: number;
|
||||
ticker: string;
|
||||
filing_date: string;
|
||||
filing_type: '10-K' | '10-Q';
|
||||
parse_status: FilingTaxonomyParseStatus;
|
||||
parse_error: string | null;
|
||||
source: FilingTaxonomySource;
|
||||
periods: FilingTaxonomyPeriod[];
|
||||
statement_rows: Record<FinancialStatementKind, TaxonomyStatementRow[]>;
|
||||
derived_metrics: Filing['metrics'];
|
||||
validation_result: MetricValidationResult | null;
|
||||
facts_count: number;
|
||||
concepts_count: number;
|
||||
dimensions_count: number;
|
||||
assets: TaxonomyAsset[];
|
||||
concepts: TaxonomyConcept[];
|
||||
facts: Array<{
|
||||
concept_key: string;
|
||||
qname: string;
|
||||
namespace_uri: string;
|
||||
local_name: string;
|
||||
statement_kind: FinancialStatementKind | null;
|
||||
role_uri: string | null;
|
||||
context_id: string;
|
||||
unit: string | null;
|
||||
decimals: string | null;
|
||||
value_num: number;
|
||||
period_start: string | null;
|
||||
period_end: string | null;
|
||||
period_instant: string | null;
|
||||
dimensions: Array<{ axis: string; member: string }>;
|
||||
is_dimensionless: boolean;
|
||||
source_file: string | null;
|
||||
}>;
|
||||
metric_validations: TaxonomyMetricValidationCheck[];
|
||||
};
|
||||
60
lib/server/taxonomy/xbrl-parser.test.ts
Normal file
60
lib/server/taxonomy/xbrl-parser.test.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
import { describe, expect, it } from 'bun:test';
|
||||
import { parseXbrlInstance } from '@/lib/server/taxonomy/xbrl-parser';
|
||||
|
||||
const SAMPLE_XBRL = `
|
||||
<xbrli:xbrl xmlns:xbrli="http://www.xbrl.org/2003/instance"
|
||||
xmlns:xbrldi="http://xbrl.org/2006/xbrldi"
|
||||
xmlns:us-gaap="http://fasb.org/us-gaap/2024"
|
||||
xmlns:dei="http://xbrl.sec.gov/dei/2024">
|
||||
<xbrli:context id="c1">
|
||||
<xbrli:period>
|
||||
<xbrli:startDate>2025-01-01</xbrli:startDate>
|
||||
<xbrli:endDate>2025-12-31</xbrli:endDate>
|
||||
</xbrli:period>
|
||||
</xbrli:context>
|
||||
<xbrli:context id="c2">
|
||||
<xbrli:entity>
|
||||
<xbrli:segment>
|
||||
<xbrldi:explicitMember dimension="us-gaap:StatementBusinessSegmentsAxis">us-gaap:ConsolidatedGroupMember</xbrldi:explicitMember>
|
||||
</xbrli:segment>
|
||||
</xbrli:entity>
|
||||
<xbrli:period>
|
||||
<xbrli:instant>2025-12-31</xbrli:instant>
|
||||
</xbrli:period>
|
||||
</xbrli:context>
|
||||
<xbrli:unit id="u1">
|
||||
<xbrli:measure>iso4217:USD</xbrli:measure>
|
||||
</xbrli:unit>
|
||||
<us-gaap:Revenues contextRef="c1" unitRef="u1" decimals="-6">1,234</us-gaap:Revenues>
|
||||
<us-gaap:Assets contextRef="c2" unitRef="u1" decimals="-6">5,678</us-gaap:Assets>
|
||||
<dei:EntityRegistrantName contextRef="c1">Acme Corp</dei:EntityRegistrantName>
|
||||
</xbrli:xbrl>
|
||||
`;
|
||||
|
||||
describe('xbrl instance parser', () => {
|
||||
it('parses contexts, units, numeric facts, dimensions, and concept keys', () => {
|
||||
const parsed = parseXbrlInstance(SAMPLE_XBRL, 'abc_htm.xml');
|
||||
|
||||
expect(parsed.contexts.c1?.periodStart).toBe('2025-01-01');
|
||||
expect(parsed.contexts.c1?.periodEnd).toBe('2025-12-31');
|
||||
expect(parsed.contexts.c2?.periodInstant).toBe('2025-12-31');
|
||||
expect(parsed.contexts.c2?.dimensions.length).toBe(1);
|
||||
expect(parsed.units.u1?.measure).toBe('iso4217:USD');
|
||||
|
||||
expect(parsed.facts.length).toBe(2);
|
||||
const revenueFact = parsed.facts.find((fact) => fact.localName === 'Revenues');
|
||||
const assetsFact = parsed.facts.find((fact) => fact.localName === 'Assets');
|
||||
|
||||
expect(revenueFact?.conceptKey).toBe('http://fasb.org/us-gaap/2024#Revenues');
|
||||
expect(revenueFact?.isDimensionless).toBe(true);
|
||||
expect(revenueFact?.value).toBe(1234);
|
||||
expect(revenueFact?.sourceFile).toBe('abc_htm.xml');
|
||||
|
||||
expect(assetsFact?.conceptKey).toBe('http://fasb.org/us-gaap/2024#Assets');
|
||||
expect(assetsFact?.isDimensionless).toBe(false);
|
||||
expect(assetsFact?.dimensions[0]).toEqual({
|
||||
axis: 'us-gaap:StatementBusinessSegmentsAxis',
|
||||
member: 'us-gaap:ConsolidatedGroupMember'
|
||||
});
|
||||
});
|
||||
});
|
||||
264
lib/server/taxonomy/xbrl-parser.ts
Normal file
264
lib/server/taxonomy/xbrl-parser.ts
Normal file
@@ -0,0 +1,264 @@
|
||||
import type { FinancialStatementKind } from '@/lib/types';
|
||||
import type { TaxonomyContext, TaxonomyFact, TaxonomyNamespaceMap, TaxonomyUnit } from '@/lib/server/taxonomy/types';
|
||||
|
||||
function decodeXmlEntities(value: string) {
|
||||
return value
|
||||
.replace(/&/gi, '&')
|
||||
.replace(/</gi, '<')
|
||||
.replace(/>/gi, '>')
|
||||
.replace(/"/gi, '"')
|
||||
.replace(/'/gi, "'")
|
||||
.replace(/ | /gi, ' ')
|
||||
.replace(/&#x([0-9a-f]+);/gi, (_match, hex) => {
|
||||
const parsed = Number.parseInt(hex, 16);
|
||||
if (!Number.isFinite(parsed)) {
|
||||
return ' ';
|
||||
}
|
||||
|
||||
try {
|
||||
return String.fromCodePoint(parsed);
|
||||
} catch {
|
||||
return ' ';
|
||||
}
|
||||
})
|
||||
.replace(/&#([0-9]+);/g, (_match, numeric) => {
|
||||
const parsed = Number.parseInt(numeric, 10);
|
||||
if (!Number.isFinite(parsed)) {
|
||||
return ' ';
|
||||
}
|
||||
|
||||
try {
|
||||
return String.fromCodePoint(parsed);
|
||||
} catch {
|
||||
return ' ';
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function parseNumber(value: string) {
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (/^--+$/.test(trimmed)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const negative = trimmed.startsWith('(') && trimmed.endsWith(')');
|
||||
const normalized = trimmed
|
||||
.replace(/<[^>]+>/g, ' ')
|
||||
.replace(/[,$\s]/g, '')
|
||||
.replace(/[()]/g, '')
|
||||
.replace(/\u2212/g, '-');
|
||||
|
||||
if (!normalized) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const parsed = Number.parseFloat(normalized);
|
||||
if (!Number.isFinite(parsed)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return negative ? -Math.abs(parsed) : parsed;
|
||||
}
|
||||
|
||||
function parseNamespaceMapFromDocument(raw: string): TaxonomyNamespaceMap {
|
||||
const map: TaxonomyNamespaceMap = {};
|
||||
const rootStart = raw.match(/<[^>]*xbrl[^>]*>/i)?.[0] ?? raw.slice(0, 1200);
|
||||
|
||||
for (const match of rootStart.matchAll(/xmlns:([a-zA-Z0-9_\-]+)=["']([^"']+)["']/g)) {
|
||||
const prefix = (match[1] ?? '').trim();
|
||||
const uri = (match[2] ?? '').trim();
|
||||
|
||||
if (!prefix || !uri) {
|
||||
continue;
|
||||
}
|
||||
|
||||
map[prefix] = uri;
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
function parseContexts(raw: string): Record<string, TaxonomyContext> {
|
||||
const contexts: Record<string, TaxonomyContext> = {};
|
||||
const contextPattern = /<(?:[a-z0-9_\-]+:)?context\b[^>]*\bid=["']([^"']+)["'][^>]*>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?context>/gi;
|
||||
|
||||
for (const match of raw.matchAll(contextPattern)) {
|
||||
const contextId = (match[1] ?? '').trim();
|
||||
const block = match[2] ?? '';
|
||||
if (!contextId) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const periodStart = block.match(/<(?:[a-z0-9_\-]+:)?startDate>([^<]+)<\/(?:[a-z0-9_\-]+:)?startDate>/i)?.[1]?.trim() ?? null;
|
||||
const periodEnd = block.match(/<(?:[a-z0-9_\-]+:)?endDate>([^<]+)<\/(?:[a-z0-9_\-]+:)?endDate>/i)?.[1]?.trim() ?? null;
|
||||
const periodInstant = block.match(/<(?:[a-z0-9_\-]+:)?instant>([^<]+)<\/(?:[a-z0-9_\-]+:)?instant>/i)?.[1]?.trim() ?? null;
|
||||
|
||||
const dimensions: Array<{ axis: string; member: string }> = [];
|
||||
const dimPattern = /<(?:[a-z0-9_\-]+:)?explicitMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>([^<]+)<\/(?:[a-z0-9_\-]+:)?explicitMember>/gi;
|
||||
for (const dimMatch of block.matchAll(dimPattern)) {
|
||||
const axis = decodeXmlEntities((dimMatch[1] ?? '').trim());
|
||||
const member = decodeXmlEntities((dimMatch[2] ?? '').trim());
|
||||
if (!axis || !member) {
|
||||
continue;
|
||||
}
|
||||
|
||||
dimensions.push({ axis, member });
|
||||
}
|
||||
|
||||
contexts[contextId] = {
|
||||
id: contextId,
|
||||
periodStart,
|
||||
periodEnd,
|
||||
periodInstant,
|
||||
dimensions
|
||||
};
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
function parseUnits(raw: string): Record<string, TaxonomyUnit> {
|
||||
const units: Record<string, TaxonomyUnit> = {};
|
||||
const unitPattern = /<(?:[a-z0-9_\-]+:)?unit\b[^>]*\bid=["']([^"']+)["'][^>]*>([\s\S]*?)<\/(?:[a-z0-9_\-]+:)?unit>/gi;
|
||||
|
||||
for (const match of raw.matchAll(unitPattern)) {
|
||||
const id = (match[1] ?? '').trim();
|
||||
const block = match[2] ?? '';
|
||||
if (!id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const measures = [...block.matchAll(/<(?:[a-z0-9_\-]+:)?measure>([^<]+)<\/(?:[a-z0-9_\-]+:)?measure>/gi)]
|
||||
.map((entry) => decodeXmlEntities((entry[1] ?? '').trim()))
|
||||
.filter(Boolean);
|
||||
|
||||
let measure: string | null = null;
|
||||
if (measures.length === 1) {
|
||||
measure = measures[0] ?? null;
|
||||
} else if (measures.length > 1) {
|
||||
measure = measures.join('/');
|
||||
}
|
||||
|
||||
units[id] = {
|
||||
id,
|
||||
measure
|
||||
};
|
||||
}
|
||||
|
||||
return units;
|
||||
}
|
||||
|
||||
function classifyStatementKind(localName: string): FinancialStatementKind | null {
|
||||
const normalized = localName.toLowerCase();
|
||||
|
||||
if (/cash|operatingactivities|investingactivities|financingactivities/.test(normalized)) {
|
||||
return 'cash_flow';
|
||||
}
|
||||
|
||||
if (/equity|retainedearnings|additionalpaidincapital/.test(normalized)) {
|
||||
return 'equity';
|
||||
}
|
||||
|
||||
if (/comprehensiveincome/.test(normalized)) {
|
||||
return 'comprehensive_income';
|
||||
}
|
||||
|
||||
if (/asset|liabilit|debt/.test(normalized)) {
|
||||
return 'balance';
|
||||
}
|
||||
|
||||
if (/revenue|income|profit|expense|costof/.test(normalized)) {
|
||||
return 'income';
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function isXbrlInfrastructurePrefix(prefix: string) {
|
||||
const normalized = prefix.toLowerCase();
|
||||
return normalized === 'xbrli'
|
||||
|| normalized === 'xlink'
|
||||
|| normalized === 'link'
|
||||
|| normalized === 'xbrldi'
|
||||
|| normalized === 'xbrldt';
|
||||
}
|
||||
|
||||
function localNameToKey(namespaceUri: string, localName: string) {
|
||||
return `${namespaceUri}#${localName}`;
|
||||
}
|
||||
|
||||
export function parseXbrlInstance(
|
||||
raw: string,
|
||||
sourceFile: string | null
|
||||
): {
|
||||
namespaces: TaxonomyNamespaceMap;
|
||||
contexts: Record<string, TaxonomyContext>;
|
||||
units: Record<string, TaxonomyUnit>;
|
||||
facts: TaxonomyFact[];
|
||||
} {
|
||||
const namespaces = parseNamespaceMapFromDocument(raw);
|
||||
const contexts = parseContexts(raw);
|
||||
const units = parseUnits(raw);
|
||||
const facts: TaxonomyFact[] = [];
|
||||
|
||||
const factPattern = /<([a-zA-Z0-9_\-]+):([a-zA-Z0-9_\-.]+)\b([^>]*\bcontextRef=["'][^"']+["'][^>]*)>([\s\S]*?)<\/\1:\2>/g;
|
||||
|
||||
for (const match of raw.matchAll(factPattern)) {
|
||||
const prefix = (match[1] ?? '').trim();
|
||||
const localName = (match[2] ?? '').trim();
|
||||
const attrs = match[3] ?? '';
|
||||
const body = decodeXmlEntities((match[4] ?? '').trim());
|
||||
|
||||
if (!prefix || !localName || isXbrlInfrastructurePrefix(prefix)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const contextId = attrs.match(/\bcontextRef=["']([^"']+)["']/i)?.[1]?.trim() ?? '';
|
||||
if (!contextId) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const value = parseNumber(body);
|
||||
if (value === null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const unitRef = attrs.match(/\bunitRef=["']([^"']+)["']/i)?.[1]?.trim() ?? null;
|
||||
const decimals = attrs.match(/\bdecimals=["']([^"']+)["']/i)?.[1]?.trim() ?? null;
|
||||
|
||||
const namespaceUri = namespaces[prefix] ?? `urn:unknown:${prefix}`;
|
||||
const context = contexts[contextId];
|
||||
|
||||
facts.push({
|
||||
conceptKey: localNameToKey(namespaceUri, localName),
|
||||
qname: `${prefix}:${localName}`,
|
||||
namespaceUri,
|
||||
localName,
|
||||
contextId,
|
||||
unit: unitRef && units[unitRef]?.measure ? units[unitRef]?.measure ?? unitRef : unitRef,
|
||||
decimals,
|
||||
value,
|
||||
periodStart: context?.periodStart ?? null,
|
||||
periodEnd: context?.periodEnd ?? null,
|
||||
periodInstant: context?.periodInstant ?? null,
|
||||
dimensions: context?.dimensions ?? [],
|
||||
isDimensionless: (context?.dimensions.length ?? 0) === 0,
|
||||
sourceFile,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
namespaces,
|
||||
contexts,
|
||||
units,
|
||||
facts
|
||||
};
|
||||
}
|
||||
|
||||
export function conceptStatementFallback(localName: string) {
|
||||
return classifyStatementKind(localName);
|
||||
}
|
||||
87
lib/types.ts
87
lib/types.ts
@@ -98,6 +98,12 @@ export type TaskStage =
|
||||
| 'completed'
|
||||
| 'failed'
|
||||
| 'sync.fetch_filings'
|
||||
| 'sync.discover_assets'
|
||||
| 'sync.extract_taxonomy'
|
||||
| 'sync.normalize_taxonomy'
|
||||
| 'sync.derive_metrics'
|
||||
| 'sync.validate_pdf_metrics'
|
||||
| 'sync.persist_taxonomy'
|
||||
| 'sync.fetch_metrics'
|
||||
| 'sync.persist_filings'
|
||||
| 'sync.hydrate_statements'
|
||||
@@ -169,7 +175,6 @@ export type CompanyFinancialPoint = {
|
||||
debt: number | null;
|
||||
};
|
||||
|
||||
export type FinancialStatementMode = 'standardized' | 'filing_faithful';
|
||||
export type FinancialStatementKind = 'income' | 'balance' | 'cash_flow' | 'equity' | 'comprehensive_income';
|
||||
export type FinancialHistoryWindow = '10y' | 'all';
|
||||
|
||||
@@ -178,11 +183,79 @@ export type FinancialStatementPeriod = {
|
||||
filingId: number;
|
||||
accessionNumber: string;
|
||||
filingDate: string;
|
||||
periodStart: string | null;
|
||||
periodEnd: string | null;
|
||||
filingType: Extract<Filing['filing_type'], '10-K' | '10-Q'>;
|
||||
periodLabel: string;
|
||||
};
|
||||
|
||||
export type TaxonomyDimensionMember = {
|
||||
axis: string;
|
||||
member: string;
|
||||
};
|
||||
|
||||
export type TaxonomyStatementRow = {
|
||||
key: string;
|
||||
label: string;
|
||||
conceptKey: string;
|
||||
qname: string;
|
||||
namespaceUri: string;
|
||||
localName: string;
|
||||
isExtension: boolean;
|
||||
statement: FinancialStatementKind;
|
||||
roleUri: string | null;
|
||||
order: number;
|
||||
depth: number;
|
||||
parentKey: string | null;
|
||||
values: Record<string, number | null>;
|
||||
units: Record<string, string | null>;
|
||||
hasDimensions: boolean;
|
||||
sourceFactIds: number[];
|
||||
};
|
||||
|
||||
export type TaxonomyFactRow = {
|
||||
id: number;
|
||||
snapshotId: number;
|
||||
filingId: number;
|
||||
filingDate: string;
|
||||
statement: FinancialStatementKind | null;
|
||||
roleUri: string | null;
|
||||
conceptKey: string;
|
||||
qname: string;
|
||||
namespaceUri: string;
|
||||
localName: string;
|
||||
value: number;
|
||||
contextId: string;
|
||||
unit: string | null;
|
||||
decimals: string | null;
|
||||
periodStart: string | null;
|
||||
periodEnd: string | null;
|
||||
periodInstant: string | null;
|
||||
dimensions: TaxonomyDimensionMember[];
|
||||
isDimensionless: boolean;
|
||||
sourceFile: string | null;
|
||||
};
|
||||
|
||||
export type MetricValidationCheck = {
|
||||
metricKey: keyof NonNullable<Filing['metrics']>;
|
||||
taxonomyValue: number | null;
|
||||
llmValue: number | null;
|
||||
absoluteDiff: number | null;
|
||||
relativeDiff: number | null;
|
||||
status: 'not_run' | 'matched' | 'mismatch' | 'error';
|
||||
evidencePages: number[];
|
||||
pdfUrl: string | null;
|
||||
provider: string | null;
|
||||
model: string | null;
|
||||
error: string | null;
|
||||
};
|
||||
|
||||
export type MetricValidationResult = {
|
||||
status: 'not_run' | 'matched' | 'mismatch' | 'error';
|
||||
checks: MetricValidationCheck[];
|
||||
validatedAt: string | null;
|
||||
};
|
||||
|
||||
export type StandardizedStatementRow = {
|
||||
key: string;
|
||||
label: string;
|
||||
@@ -220,16 +293,20 @@ export type CompanyFinancialStatementsResponse = {
|
||||
companyName: string;
|
||||
cik: string | null;
|
||||
};
|
||||
mode: FinancialStatementMode;
|
||||
statement: FinancialStatementKind;
|
||||
window: FinancialHistoryWindow;
|
||||
periods: FinancialStatementPeriod[];
|
||||
rows: StandardizedStatementRow[] | FilingFaithfulStatementRow[];
|
||||
rows: TaxonomyStatementRow[];
|
||||
nextCursor: string | null;
|
||||
facts: {
|
||||
rows: TaxonomyFactRow[];
|
||||
nextCursor: string | null;
|
||||
} | null;
|
||||
coverage: {
|
||||
filings: number;
|
||||
rows: number;
|
||||
dimensions: number;
|
||||
facts: number;
|
||||
};
|
||||
dataSourceStatus: {
|
||||
enabled: boolean;
|
||||
@@ -239,6 +316,10 @@ export type CompanyFinancialStatementsResponse = {
|
||||
pendingFilings: number;
|
||||
queuedSync: boolean;
|
||||
};
|
||||
metrics: {
|
||||
taxonomy: Filing['metrics'];
|
||||
validation: MetricValidationResult | null;
|
||||
};
|
||||
dimensionBreakdown: Record<string, DimensionBreakdownRow[]> | null;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user