diff --git a/.gitignore b/.gitignore index fc6c4b8..27877ef 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,9 @@ data/*.sqlite-shm data/*.sqlite-wal .workflow-data/ output/ +rust/target/ +rust/vendor/crabrl/.git-vendor/ +bin/fiscal-xbrl # Local automation/test artifacts .playwright-cli/ diff --git a/Dockerfile b/Dockerfile index c6320e4..f6e0609 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,13 @@ COPY package.json bun.lock ./ RUN --mount=type=cache,target=/root/.bun/install/cache \ bun install --frozen-lockfile +FROM rust:1.93-alpine AS rust-builder +WORKDIR /app +COPY rust ./rust +RUN --mount=type=cache,target=/usr/local/cargo/registry \ + --mount=type=cache,target=/app/rust/target \ + cargo build --manifest-path rust/Cargo.toml --release --bin fiscal-xbrl + FROM deps AS builder ARG NEXT_PUBLIC_API_URL= ARG DATABASE_URL=file:data/fiscal.sqlite @@ -38,15 +45,21 @@ COPY --from=builder /app/.next/static ./.next/static COPY --from=builder /app/drizzle ./drizzle COPY --from=builder /app/scripts ./scripts COPY --from=builder /app/lib ./lib +COPY --from=builder /app/contracts ./contracts +COPY --from=builder /app/rust/taxonomy ./rust/taxonomy COPY --from=builder /app/tsconfig.json ./tsconfig.json COPY --from=deps /app/node_modules ./node_modules COPY --from=deps /app/package.json ./package.json COPY --from=deps /app/bun.lock ./bun.lock +COPY --from=rust-builder /app/rust/target/release/fiscal-xbrl ./bin/fiscal-xbrl -RUN mkdir -p /app/data /app/.workflow-data +RUN mkdir -p /app/data /app/.workflow-data /app/bin /app/.cache/xbrl && chmod +x /app/bin/fiscal-xbrl EXPOSE 3000 ENV PORT=3000 +ENV FISCAL_XBRL_BIN=/app/bin/fiscal-xbrl +ENV FISCAL_XBRL_CACHE_DIR=/app/.cache/xbrl +ENV XBRL_ENGINE_TIMEOUT_MS=45000 -CMD ["sh", "-c", "bun run bootstrap:prod && bun server.js"] +CMD ["sh", "-c", "if [ ! -x \"${FISCAL_XBRL_BIN:-/app/bin/fiscal-xbrl}\" ]; then echo \"Missing Rust XBRL sidecar at ${FISCAL_XBRL_BIN:-/app/bin/fiscal-xbrl}\" >&2; exit 1; fi; bun run bootstrap:prod && bun server.js"] diff --git a/contracts/xbrl-hydrate-v1.schema.json b/contracts/xbrl-hydrate-v1.schema.json new file mode 100644 index 0000000..cbe240f --- /dev/null +++ b/contracts/xbrl-hydrate-v1.schema.json @@ -0,0 +1,57 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://fiscal.ai/contracts/xbrl-hydrate-v1.schema.json", + "title": "Fiscal XBRL Hydrate Filing v1", + "type": "object", + "properties": { + "filingId": { "type": "integer" }, + "ticker": { "type": "string" }, + "cik": { "type": "string" }, + "accessionNumber": { "type": "string" }, + "filingDate": { "type": "string", "format": "date" }, + "filingType": { "type": "string", "enum": ["10-K", "10-Q"] }, + "filingUrl": { "type": ["string", "null"] }, + "primaryDocument": { "type": ["string", "null"] }, + "cacheDir": { "type": "string" } + }, + "required": [ + "filingId", + "ticker", + "cik", + "accessionNumber", + "filingDate", + "filingType", + "cacheDir" + ], + "$defs": { + "statementKind": { + "type": "string", + "enum": ["income", "balance", "cash_flow", "equity", "comprehensive_income"] + }, + "surfaceRow": { + "type": "object", + "properties": { + "key": { "type": "string" }, + "label": { "type": "string" }, + "category": { "type": "string" }, + "order": { "type": "integer" }, + "unit": { "type": "string" }, + "values": { + "type": "object", + "additionalProperties": { + "type": ["number", "null"] + } + }, + "sourceConcepts": { + "type": "array", + "items": { "type": "string" } + }, + "sourceFactIds": { + "type": "array", + "items": { "type": "integer" } + } + }, + "required": ["key", "label", "category", "order", "unit", "values", "sourceConcepts", "sourceFactIds"] + } + } +} diff --git a/drizzle/0010_taxonomy_surface_sidecar.sql b/drizzle/0010_taxonomy_surface_sidecar.sql new file mode 100644 index 0000000..6a60a45 --- /dev/null +++ b/drizzle/0010_taxonomy_surface_sidecar.sql @@ -0,0 +1,79 @@ +ALTER TABLE `filing_taxonomy_snapshot` ADD `parser_engine` text DEFAULT 'legacy-ts' NOT NULL; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_snapshot` ADD `parser_version` text DEFAULT '0.0.0' NOT NULL; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_snapshot` ADD `taxonomy_regime` text DEFAULT 'unknown' NOT NULL; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_snapshot` ADD `fiscal_pack` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_snapshot` ADD `faithful_rows` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_snapshot` ADD `surface_rows` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_snapshot` ADD `detail_rows` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_snapshot` ADD `kpi_rows` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_snapshot` ADD `normalization_summary` text; +--> statement-breakpoint +UPDATE `filing_taxonomy_snapshot` +SET + `faithful_rows` = COALESCE(`faithful_rows`, `statement_rows`), + `surface_rows` = COALESCE(`surface_rows`, '{"income":[],"balance":[],"cash_flow":[],"equity":[],"comprehensive_income":[]}'), + `detail_rows` = COALESCE(`detail_rows`, '{"income":{},"balance":{},"cash_flow":{},"equity":{},"comprehensive_income":{}}'), + `kpi_rows` = COALESCE(`kpi_rows`, '[]'); +--> statement-breakpoint +CREATE TABLE `filing_taxonomy_context` ( + `id` integer PRIMARY KEY AUTOINCREMENT NOT NULL, + `snapshot_id` integer NOT NULL, + `context_id` text NOT NULL, + `entity_identifier` text, + `entity_scheme` text, + `period_start` text, + `period_end` text, + `period_instant` text, + `segment_json` text, + `scenario_json` text, + `created_at` text NOT NULL, + FOREIGN KEY (`snapshot_id`) REFERENCES `filing_taxonomy_snapshot`(`id`) ON UPDATE no action ON DELETE cascade +); +--> statement-breakpoint +CREATE INDEX `filing_taxonomy_context_snapshot_idx` ON `filing_taxonomy_context` (`snapshot_id`); +--> statement-breakpoint +CREATE UNIQUE INDEX `filing_taxonomy_context_uidx` ON `filing_taxonomy_context` (`snapshot_id`,`context_id`); +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_concept` ADD `balance` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_concept` ADD `period_type` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_concept` ADD `data_type` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_concept` ADD `authoritative_concept_key` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_concept` ADD `mapping_method` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_concept` ADD `surface_key` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_concept` ADD `detail_parent_surface_key` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_concept` ADD `kpi_key` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_concept` ADD `residual_flag` integer DEFAULT false NOT NULL; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_fact` ADD `data_type` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_fact` ADD `authoritative_concept_key` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_fact` ADD `mapping_method` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_fact` ADD `surface_key` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_fact` ADD `detail_parent_surface_key` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_fact` ADD `kpi_key` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_fact` ADD `residual_flag` integer DEFAULT false NOT NULL; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_fact` ADD `precision` text; +--> statement-breakpoint +ALTER TABLE `filing_taxonomy_fact` ADD `nil` integer DEFAULT false NOT NULL; diff --git a/drizzle/0011_remove_legacy_xbrl_defaults.sql b/drizzle/0011_remove_legacy_xbrl_defaults.sql new file mode 100644 index 0000000..b7ddde9 --- /dev/null +++ b/drizzle/0011_remove_legacy_xbrl_defaults.sql @@ -0,0 +1,100 @@ +PRAGMA foreign_keys=OFF; +--> statement-breakpoint +CREATE TABLE `__new_filing_taxonomy_snapshot` ( + `id` integer PRIMARY KEY AUTOINCREMENT NOT NULL, + `filing_id` integer NOT NULL, + `ticker` text NOT NULL, + `filing_date` text NOT NULL, + `filing_type` text NOT NULL, + `parse_status` text NOT NULL, + `parse_error` text, + `source` text NOT NULL, + `parser_engine` text DEFAULT 'fiscal-xbrl' NOT NULL, + `parser_version` text DEFAULT 'unknown' NOT NULL, + `taxonomy_regime` text DEFAULT 'unknown' NOT NULL, + `fiscal_pack` text, + `periods` text, + `faithful_rows` text, + `statement_rows` text, + `surface_rows` text, + `detail_rows` text, + `kpi_rows` text, + `derived_metrics` text, + `validation_result` text, + `normalization_summary` text, + `facts_count` integer DEFAULT 0 NOT NULL, + `concepts_count` integer DEFAULT 0 NOT NULL, + `dimensions_count` integer DEFAULT 0 NOT NULL, + `created_at` text NOT NULL, + `updated_at` text NOT NULL, + FOREIGN KEY (`filing_id`) REFERENCES `filing`(`id`) ON UPDATE no action ON DELETE cascade +); +--> statement-breakpoint +INSERT INTO `__new_filing_taxonomy_snapshot` ( + `id`, + `filing_id`, + `ticker`, + `filing_date`, + `filing_type`, + `parse_status`, + `parse_error`, + `source`, + `parser_engine`, + `parser_version`, + `taxonomy_regime`, + `fiscal_pack`, + `periods`, + `faithful_rows`, + `statement_rows`, + `surface_rows`, + `detail_rows`, + `kpi_rows`, + `derived_metrics`, + `validation_result`, + `normalization_summary`, + `facts_count`, + `concepts_count`, + `dimensions_count`, + `created_at`, + `updated_at` +) +SELECT + `id`, + `filing_id`, + `ticker`, + `filing_date`, + `filing_type`, + `parse_status`, + `parse_error`, + `source`, + `parser_engine`, + `parser_version`, + `taxonomy_regime`, + `fiscal_pack`, + `periods`, + `faithful_rows`, + `statement_rows`, + `surface_rows`, + `detail_rows`, + `kpi_rows`, + `derived_metrics`, + `validation_result`, + `normalization_summary`, + `facts_count`, + `concepts_count`, + `dimensions_count`, + `created_at`, + `updated_at` +FROM `filing_taxonomy_snapshot`; +--> statement-breakpoint +DROP TABLE `filing_taxonomy_snapshot`; +--> statement-breakpoint +ALTER TABLE `__new_filing_taxonomy_snapshot` RENAME TO `filing_taxonomy_snapshot`; +--> statement-breakpoint +CREATE UNIQUE INDEX `filing_taxonomy_snapshot_filing_uidx` ON `filing_taxonomy_snapshot` (`filing_id`); +--> statement-breakpoint +CREATE INDEX `filing_taxonomy_snapshot_ticker_date_idx` ON `filing_taxonomy_snapshot` (`ticker`,`filing_date`); +--> statement-breakpoint +CREATE INDEX `filing_taxonomy_snapshot_status_idx` ON `filing_taxonomy_snapshot` (`parse_status`); +--> statement-breakpoint +PRAGMA foreign_keys=ON; diff --git a/drizzle/meta/_journal.json b/drizzle/meta/_journal.json index c27516d..1924a18 100644 --- a/drizzle/meta/_journal.json +++ b/drizzle/meta/_journal.json @@ -71,6 +71,20 @@ "when": 1773000000000, "tag": "0009_task_notification_context", "breakpoints": true + }, + { + "idx": 10, + "version": "6", + "when": 1773090000000, + "tag": "0010_taxonomy_surface_sidecar", + "breakpoints": true + }, + { + "idx": 11, + "version": "6", + "when": 1773180000000, + "tag": "0011_remove_legacy_xbrl_defaults", + "breakpoints": true } ] } diff --git a/lib/server/api/task-workflow-hybrid.e2e.test.ts b/lib/server/api/task-workflow-hybrid.e2e.test.ts index a6f71ed..d1efc19 100644 --- a/lib/server/api/task-workflow-hybrid.e2e.test.ts +++ b/lib/server/api/task-workflow-hybrid.e2e.test.ts @@ -92,7 +92,9 @@ function applySqlMigrations(client: { exec: (query: string) => void }) { '0006_coverage_journal_tracking.sql', '0007_company_financial_bundles.sql', '0008_research_workspace.sql', - '0009_task_notification_context.sql' + '0009_task_notification_context.sql', + '0010_taxonomy_surface_sidecar.sql', + '0011_remove_legacy_xbrl_defaults.sql' ]; for (const file of migrationFiles) { diff --git a/lib/server/db/schema.ts b/lib/server/db/schema.ts index 7b2f28a..f4a5a8b 100644 --- a/lib/server/db/schema.ts +++ b/lib/server/db/schema.ts @@ -169,6 +169,67 @@ type TaxonomyStatementBundle = { statements: Record; }; +type TaxonomySurfaceSnapshotRow = { + key: string; + label: string; + category: string; + templateSection?: string; + order: number; + unit: 'currency' | 'count' | 'shares' | 'percent' | 'ratio'; + values: Record; + sourceConcepts: string[]; + sourceRowKeys: string[]; + sourceFactIds: number[]; + formulaKey: string | null; + hasDimensions: boolean; + resolvedSourceRowKeys: Record; + statement?: 'income' | 'balance' | 'cash_flow'; + detailCount?: number; +}; + +type TaxonomyDetailSnapshotRow = { + key: string; + parentSurfaceKey: string; + label: string; + conceptKey: string; + qname: string; + namespaceUri: string; + localName: string; + unit: string | null; + values: Record; + sourceFactIds: number[]; + isExtension: boolean; + dimensionsSummary: string[]; + residualFlag: boolean; +}; + +type TaxonomyDetailStatementMap = Record; + +type StructuredKpiSnapshotRow = { + key: string; + label: string; + category: string; + unit: 'currency' | 'count' | 'shares' | 'percent' | 'ratio'; + order: number; + segment: string | null; + axis: string | null; + member: string | null; + values: Record; + sourceConcepts: string[]; + sourceFactIds: number[]; + provenanceType: 'taxonomy' | 'structured_note'; + hasDimensions: boolean; +}; + +type TaxonomyNormalizationSummary = { + surfaceRowCount: number; + detailRowCount: number; + kpiRowCount: number; + unmappedRowCount: number; + materialUnmappedRowCount: number; + warnings: string[]; +}; + type TaxonomyMetricValidationCheck = { metricKey: keyof FilingMetrics; taxonomyValue: number | null; @@ -380,10 +441,19 @@ export const filingTaxonomySnapshot = sqliteTable('filing_taxonomy_snapshot', { parse_status: text('parse_status').$type().notNull(), parse_error: text('parse_error'), source: text('source').$type<'xbrl_instance' | 'xbrl_instance_with_linkbase' | 'legacy_html_fallback'>().notNull(), + parser_engine: text('parser_engine').notNull().default('fiscal-xbrl'), + parser_version: text('parser_version').notNull().default('unknown'), + taxonomy_regime: text('taxonomy_regime').$type<'us-gaap' | 'ifrs-full' | 'unknown'>().notNull().default('unknown'), + fiscal_pack: text('fiscal_pack'), periods: text('periods', { mode: 'json' }).$type(), + faithful_rows: text('faithful_rows', { mode: 'json' }).$type(), statement_rows: text('statement_rows', { mode: 'json' }).$type(), + surface_rows: text('surface_rows', { mode: 'json' }).$type | null>(), + detail_rows: text('detail_rows', { mode: 'json' }).$type | null>(), + kpi_rows: text('kpi_rows', { mode: 'json' }).$type(), derived_metrics: text('derived_metrics', { mode: 'json' }).$type(), validation_result: text('validation_result', { mode: 'json' }).$type(), + normalization_summary: text('normalization_summary', { mode: 'json' }).$type(), facts_count: integer('facts_count').notNull().default(0), concepts_count: integer('concepts_count').notNull().default(0), dimensions_count: integer('dimensions_count').notNull().default(0), @@ -395,6 +465,23 @@ export const filingTaxonomySnapshot = sqliteTable('filing_taxonomy_snapshot', { filingTaxonomySnapshotStatusIndex: index('filing_taxonomy_snapshot_status_idx').on(table.parse_status) })); +export const filingTaxonomyContext = sqliteTable('filing_taxonomy_context', { + id: integer('id').primaryKey({ autoIncrement: true }), + snapshot_id: integer('snapshot_id').notNull().references(() => filingTaxonomySnapshot.id, { onDelete: 'cascade' }), + context_id: text('context_id').notNull(), + entity_identifier: text('entity_identifier'), + entity_scheme: text('entity_scheme'), + period_start: text('period_start'), + period_end: text('period_end'), + period_instant: text('period_instant'), + segment_json: text('segment_json', { mode: 'json' }).$type | null>(), + scenario_json: text('scenario_json', { mode: 'json' }).$type | null>(), + created_at: text('created_at').notNull() +}, (table) => ({ + filingTaxonomyContextSnapshotIndex: index('filing_taxonomy_context_snapshot_idx').on(table.snapshot_id), + filingTaxonomyContextUnique: uniqueIndex('filing_taxonomy_context_uidx').on(table.snapshot_id, table.context_id) +})); + export const filingTaxonomyAsset = sqliteTable('filing_taxonomy_asset', { id: integer('id').primaryKey({ autoIncrement: true }), snapshot_id: integer('snapshot_id').notNull().references(() => filingTaxonomySnapshot.id, { onDelete: 'cascade' }), @@ -419,8 +506,17 @@ export const filingTaxonomyConcept = sqliteTable('filing_taxonomy_concept', { local_name: text('local_name').notNull(), label: text('label'), is_extension: integer('is_extension', { mode: 'boolean' }).notNull().default(false), + balance: text('balance'), + period_type: text('period_type'), + data_type: text('data_type'), statement_kind: text('statement_kind').$type(), role_uri: text('role_uri'), + authoritative_concept_key: text('authoritative_concept_key'), + mapping_method: text('mapping_method'), + surface_key: text('surface_key'), + detail_parent_surface_key: text('detail_parent_surface_key'), + kpi_key: text('kpi_key'), + residual_flag: integer('residual_flag', { mode: 'boolean' }).notNull().default(false), presentation_order: numeric('presentation_order'), presentation_depth: integer('presentation_depth'), parent_concept_key: text('parent_concept_key'), @@ -444,11 +540,20 @@ export const filingTaxonomyFact = sqliteTable('filing_taxonomy_fact', { qname: text('qname').notNull(), namespace_uri: text('namespace_uri').notNull(), local_name: text('local_name').notNull(), + data_type: text('data_type'), statement_kind: text('statement_kind').$type(), role_uri: text('role_uri'), + authoritative_concept_key: text('authoritative_concept_key'), + mapping_method: text('mapping_method'), + surface_key: text('surface_key'), + detail_parent_surface_key: text('detail_parent_surface_key'), + kpi_key: text('kpi_key'), + residual_flag: integer('residual_flag', { mode: 'boolean' }).notNull().default(false), context_id: text('context_id').notNull(), unit: text('unit'), decimals: text('decimals'), + precision: text('precision'), + nil: integer('nil', { mode: 'boolean' }).notNull().default(false), value_num: numeric('value_num').notNull(), period_start: text('period_start'), period_end: text('period_end'), diff --git a/lib/server/financial-taxonomy.test.ts b/lib/server/financial-taxonomy.test.ts index 771453f..67e52b8 100644 --- a/lib/server/financial-taxonomy.test.ts +++ b/lib/server/financial-taxonomy.test.ts @@ -7,6 +7,7 @@ import type { FilingTaxonomySnapshotRecord } from './repos/filing-taxonomy'; import type { FinancialStatementKind, FinancialStatementPeriod, + StructuredKpiRow, TaxonomyFactRow, TaxonomyStatementRow } from '@/lib/types'; @@ -67,6 +68,13 @@ function createSnapshot(input: { statement: input.statement, values: Object.fromEntries(input.periods.map((period, index) => [period.id, 100 + index])) }); + const faithfulRows = { + income: input.statement === 'income' ? (input.rows ?? [defaultRow]) : [], + balance: input.statement === 'balance' ? (input.rows ?? [{ ...defaultRow, statement: 'balance' }]) : [], + cash_flow: input.statement === 'cash_flow' ? (input.rows ?? [{ ...defaultRow, statement: 'cash_flow' }]) : [], + equity: [], + comprehensive_income: [] + } satisfies FilingTaxonomySnapshotRecord['faithful_rows']; return { id: input.filingId, @@ -77,6 +85,10 @@ function createSnapshot(input: { parse_status: 'ready', parse_error: null, source: 'xbrl_instance', + parser_engine: 'fiscal-xbrl', + parser_version: '0.1.0', + taxonomy_regime: 'us-gaap', + fiscal_pack: 'core', periods: input.periods.map((period) => ({ id: period.id, filingId: input.filingId, @@ -87,15 +99,26 @@ function createSnapshot(input: { filingType: input.filingType, periodLabel: period.periodLabel })), - statement_rows: { - income: input.statement === 'income' ? (input.rows ?? [defaultRow]) : [], - balance: input.statement === 'balance' ? (input.rows ?? [{ ...defaultRow, statement: 'balance' }]) : [], - cash_flow: input.statement === 'cash_flow' ? (input.rows ?? [{ ...defaultRow, statement: 'cash_flow' }]) : [], + faithful_rows: faithfulRows, + statement_rows: faithfulRows, + surface_rows: { + income: [], + balance: [], + cash_flow: [], equity: [], comprehensive_income: [] }, + detail_rows: { + income: {}, + balance: {}, + cash_flow: {}, + equity: {}, + comprehensive_income: {} + }, + kpi_rows: [], derived_metrics: null, validation_result: null, + normalization_summary: null, facts_count: 0, concepts_count: 0, dimensions_count: 0, @@ -203,6 +226,30 @@ function createFact(input: { }; } +function createKpiRow(input: { + key: string; + values: Record; + provenanceType?: StructuredKpiRow['provenanceType']; + sourceConcepts?: string[]; + sourceFactIds?: number[]; +}): StructuredKpiRow { + return { + key: input.key, + label: input.key, + category: 'operating_kpi', + unit: 'percent', + order: 10, + segment: null, + axis: null, + member: null, + values: input.values, + sourceConcepts: input.sourceConcepts ?? [], + sourceFactIds: input.sourceFactIds ?? [], + provenanceType: input.provenanceType ?? 'taxonomy', + hasDimensions: false + }; +} + function findRow(rows: ReturnType, key: string) { const row = rows.find((entry) => entry.key === key); expect(row).toBeDefined(); @@ -1500,4 +1547,174 @@ describe('financial taxonomy internals', () => { expect(findStandardizedResponseRow(cash, 'changes_accrued_expenses').values[cashPeriod2025]).toBe(21_525_000); expect(findStandardizedResponseRow(cash, 'other_adjustments').values[cashPeriod2025]).toBe(55_904_000); }); + + it('merges KPI rows by priority without overwriting higher-priority periods', () => { + const merged = __financialTaxonomyInternals.mergeStructuredKpiRowsByPriority([ + [ + createKpiRow({ + key: 'loan_growth', + values: { p1: 0.12 }, + sourceConcepts: ['us-gaap:LoansReceivableNetReportedAmount'], + sourceFactIds: [1] + }) + ], + [ + createKpiRow({ + key: 'loan_growth', + values: { p1: 0.11, p2: 0.09 }, + sourceConcepts: ['us-gaap:FinancingReceivableRecordedInvestment'], + sourceFactIds: [2] + }) + ], + [ + createKpiRow({ + key: 'loan_growth', + values: { p2: 0.08, p3: 0.07 }, + provenanceType: 'structured_note', + sourceFactIds: [3] + }) + ] + ]); + + expect(merged).toHaveLength(1); + expect(merged[0]?.values).toEqual({ p1: 0.12, p2: 0.09, p3: 0.07 }); + expect(merged[0]?.sourceConcepts).toEqual([ + 'us-gaap:FinancingReceivableRecordedInvestment', + 'us-gaap:LoansReceivableNetReportedAmount' + ]); + expect(merged[0]?.sourceFactIds).toEqual([1, 2, 3]); + expect(merged[0]?.provenanceType).toBe('taxonomy'); + }); + + it('builds normalization metadata from snapshot fiscal pack and counts', () => { + const snapshot = { + ...createSnapshot({ + filingId: 15, + filingType: '10-Q', + filingDate: '2026-01-28', + statement: 'income', + periods: [ + { id: 'quarter', periodStart: '2025-10-01', periodEnd: '2025-12-31', periodLabel: '2025-10-01 to 2025-12-31' } + ] + }), + parser_version: '0.1.0', + fiscal_pack: 'bank_lender', + normalization_summary: { + surfaceRowCount: 5, + detailRowCount: 3, + kpiRowCount: 2, + unmappedRowCount: 4, + materialUnmappedRowCount: 1, + warnings: [] + } + } satisfies FilingTaxonomySnapshotRecord; + + expect(__financialTaxonomyInternals.buildNormalizationMetadata([snapshot])).toEqual({ + regime: 'us-gaap', + fiscalPack: 'bank_lender', + parserVersion: '0.1.0', + unmappedRowCount: 4, + materialUnmappedRowCount: 1 + }); + }); + + it('retains pinned income surface rows even when they are intentionally null', () => { + const snapshot = { + ...createSnapshot({ + filingId: 16, + filingType: '10-K', + filingDate: '2026-02-13', + statement: 'income', + periods: [ + { id: '2025-fy', periodStart: '2025-01-01', periodEnd: '2025-12-31', periodLabel: '2025 FY' } + ] + }), + fiscal_pack: 'bank_lender', + surface_rows: { + income: [ + { + key: 'revenue', + label: 'Revenue', + category: 'surface', + templateSection: 'surface', + order: 10, + unit: 'currency', + values: { '2025-fy': 100_000_000 }, + sourceConcepts: ['us-gaap:TotalNetRevenues'], + sourceRowKeys: ['revenue'], + sourceFactIds: [1], + formulaKey: null, + hasDimensions: false, + resolvedSourceRowKeys: { '2025-fy': 'revenue' }, + statement: 'income', + detailCount: 0, + resolutionMethod: 'direct', + confidence: 'high', + warningCodes: [] + }, + { + key: 'gross_profit', + label: 'Gross Profit', + category: 'surface', + templateSection: 'surface', + order: 20, + unit: 'currency', + values: { '2025-fy': null }, + sourceConcepts: [], + sourceRowKeys: [], + sourceFactIds: [], + formulaKey: null, + hasDimensions: false, + resolvedSourceRowKeys: { '2025-fy': null }, + statement: 'income', + detailCount: 0, + resolutionMethod: 'not_meaningful', + confidence: 'low', + warningCodes: ['gross_profit_not_meaningful_bank_pack'] + }, + { + key: 'selling_general_and_administrative', + label: 'SG&A', + category: 'surface', + templateSection: 'surface', + order: 31, + unit: 'currency', + values: { '2025-fy': null }, + sourceConcepts: [], + sourceRowKeys: [], + sourceFactIds: [], + formulaKey: null, + hasDimensions: false, + resolvedSourceRowKeys: { '2025-fy': null }, + statement: 'income', + detailCount: 0, + resolutionMethod: 'not_meaningful', + confidence: 'low', + warningCodes: ['selling_general_and_administrative_not_meaningful_bank_pack'] + } + ], + balance: [], + cash_flow: [], + equity: [], + comprehensive_income: [] + } + } satisfies FilingTaxonomySnapshotRecord; + + const rows = __financialTaxonomyInternals.aggregateSurfaceRows({ + snapshots: [snapshot], + statement: 'income', + selectedPeriodIds: new Set(['2025-fy']) + }); + + const grossProfit = rows.find((row) => row.key === 'gross_profit'); + const sga = rows.find((row) => row.key === 'selling_general_and_administrative'); + expect(grossProfit).toBeDefined(); + expect(grossProfit?.values['2025-fy']).toBeNull(); + expect(grossProfit?.resolutionMethod).toBe('not_meaningful'); + expect(grossProfit?.warningCodes).toEqual(['gross_profit_not_meaningful_bank_pack']); + expect(sga).toBeDefined(); + expect(sga?.values['2025-fy']).toBeNull(); + expect(sga?.resolutionMethod).toBe('not_meaningful'); + expect(sga?.warningCodes).toEqual(['selling_general_and_administrative_not_meaningful_bank_pack']); + }); }); diff --git a/lib/server/financial-taxonomy.ts b/lib/server/financial-taxonomy.ts index f234f92..1840e25 100644 --- a/lib/server/financial-taxonomy.ts +++ b/lib/server/financial-taxonomy.ts @@ -1,12 +1,16 @@ import type { CompanyFinancialStatementsResponse, + DetailFinancialRow, FinancialCadence, FinancialDisplayMode, FinancialStatementKind, FinancialStatementPeriod, FinancialSurfaceKind, + NormalizationMetadata, StandardizedFinancialRow, StructuredKpiRow, + SurfaceDetailMap, + SurfaceFinancialRow, TaxonomyFactRow, TaxonomyStatementRow } from '@/lib/types'; @@ -59,9 +63,11 @@ type GetCompanyFinancialsInput = { }; type StandardizedStatementBundlePayload = { - rows: StandardizedFinancialRow[]; + rows: SurfaceFinancialRow[]; + detailRows: SurfaceDetailMap; trendSeries: CompanyFinancialStatementsResponse['trendSeries']; categories: CompanyFinancialStatementsResponse['categories']; + normalization: NormalizationMetadata; }; type FilingDocumentRef = { @@ -204,6 +210,354 @@ function latestPeriodDate(period: FinancialStatementPeriod) { return period.periodEnd ?? period.filingDate; } +function cloneStructuredKpiRow(row: StructuredKpiRow): StructuredKpiRow { + return { + ...row, + values: { ...row.values }, + sourceConcepts: [...row.sourceConcepts], + sourceFactIds: [...row.sourceFactIds] + }; +} + +function mergeStructuredKpiRowsByPriority(groups: StructuredKpiRow[][]) { + const rowsByKey = new Map(); + + for (const group of groups) { + for (const row of group) { + const existing = rowsByKey.get(row.key); + if (!existing) { + rowsByKey.set(row.key, cloneStructuredKpiRow(row)); + continue; + } + + for (const [periodId, value] of Object.entries(row.values)) { + const hasExistingValue = Object.prototype.hasOwnProperty.call(existing.values, periodId) + && existing.values[periodId] !== null; + if (!hasExistingValue) { + existing.values[periodId] = value; + } + } + + existing.sourceConcepts = [...new Set([...existing.sourceConcepts, ...row.sourceConcepts])] + .sort((left, right) => left.localeCompare(right)); + existing.sourceFactIds = [...new Set([...existing.sourceFactIds, ...row.sourceFactIds])] + .sort((left, right) => left - right); + existing.hasDimensions = existing.hasDimensions || row.hasDimensions; + existing.segment ??= row.segment; + existing.axis ??= row.axis; + existing.member ??= row.member; + } + } + + return [...rowsByKey.values()].sort((left, right) => { + if (left.order !== right.order) { + return left.order - right.order; + } + + return left.label.localeCompare(right.label); + }); +} + +function emptyNormalizationMetadata(): NormalizationMetadata { + return { + regime: 'unknown', + fiscalPack: null, + parserVersion: '0.0.0', + unmappedRowCount: 0, + materialUnmappedRowCount: 0 + }; +} + +function buildNormalizationMetadata( + snapshots: FilingTaxonomySnapshotRecord[] +): NormalizationMetadata { + const latestSnapshot = snapshots[snapshots.length - 1]; + if (!latestSnapshot) { + return emptyNormalizationMetadata(); + } + + return { + regime: latestSnapshot.taxonomy_regime, + fiscalPack: latestSnapshot.fiscal_pack, + parserVersion: latestSnapshot.parser_version, + unmappedRowCount: snapshots.reduce( + (sum, snapshot) => sum + (snapshot.normalization_summary?.unmappedRowCount ?? 0), + 0 + ), + materialUnmappedRowCount: snapshots.reduce( + (sum, snapshot) => sum + (snapshot.normalization_summary?.materialUnmappedRowCount ?? 0), + 0 + ) + }; +} + +function rowHasValues(values: Record) { + return Object.values(values).some((value) => value !== null); +} + +const PINNED_INCOME_SURFACE_ROWS = new Set([ + 'revenue', + 'gross_profit', + 'operating_expenses', + 'selling_general_and_administrative', + 'research_and_development', + 'other_operating_expense', + 'operating_income', + 'income_tax_expense', + 'net_income' +]); + +function shouldRetainSurfaceRow( + statement: FinancialStatementKind, + row: SurfaceFinancialRow, + values: Record +) { + if (rowHasValues(values)) { + return true; + } + + return statement === 'income' && PINNED_INCOME_SURFACE_ROWS.has(row.key); +} + +function aggregateSurfaceRows(input: { + snapshots: FilingTaxonomySnapshotRecord[]; + statement: FinancialStatementKind; + selectedPeriodIds: Set; +}) { + const rowMap = new Map(); + + for (const snapshot of input.snapshots) { + const rows = snapshot.surface_rows?.[input.statement] ?? []; + for (const row of rows) { + const filteredValues = Object.fromEntries( + Object.entries(row.values).filter(([periodId]) => input.selectedPeriodIds.has(periodId)) + ); + const filteredResolvedSourceRowKeys = Object.fromEntries( + Object.entries(row.resolvedSourceRowKeys ?? {}).filter(([periodId]) => input.selectedPeriodIds.has(periodId)) + ); + if (!shouldRetainSurfaceRow(input.statement, row, filteredValues)) { + continue; + } + + const existing = rowMap.get(row.key); + if (!existing) { + rowMap.set(row.key, { + ...row, + values: filteredValues, + resolvedSourceRowKeys: filteredResolvedSourceRowKeys, + sourceConcepts: [...row.sourceConcepts], + sourceRowKeys: [...row.sourceRowKeys], + sourceFactIds: [...row.sourceFactIds], + warningCodes: row.warningCodes ? [...row.warningCodes] : undefined + }); + continue; + } + + for (const [periodId, value] of Object.entries(filteredValues)) { + if (!(periodId in existing.values)) { + existing.values[periodId] = value; + } + } + + for (const [periodId, sourceRowKey] of Object.entries(filteredResolvedSourceRowKeys)) { + if (!(periodId in existing.resolvedSourceRowKeys)) { + existing.resolvedSourceRowKeys[periodId] = sourceRowKey; + } + } + + existing.sourceConcepts = [...new Set([...existing.sourceConcepts, ...row.sourceConcepts])].sort((left, right) => left.localeCompare(right)); + existing.sourceRowKeys = [...new Set([...existing.sourceRowKeys, ...row.sourceRowKeys])].sort((left, right) => left.localeCompare(right)); + existing.sourceFactIds = [...new Set([...existing.sourceFactIds, ...row.sourceFactIds])].sort((left, right) => left - right); + existing.hasDimensions = existing.hasDimensions || row.hasDimensions; + existing.order = Math.min(existing.order, row.order); + existing.detailCount = Math.max(existing.detailCount ?? 0, row.detailCount ?? 0); + existing.formulaKey = existing.formulaKey ?? row.formulaKey; + existing.statement = existing.statement ?? row.statement; + existing.resolutionMethod = existing.resolutionMethod ?? row.resolutionMethod; + existing.confidence = existing.confidence ?? row.confidence; + existing.warningCodes = [...new Set([...(existing.warningCodes ?? []), ...(row.warningCodes ?? [])])] + .sort((left, right) => left.localeCompare(right)); + } + } + + return [...rowMap.values()].sort((left, right) => { + if (left.order !== right.order) { + return left.order - right.order; + } + + return left.label.localeCompare(right.label); + }); +} + +function aggregateDetailRows(input: { + snapshots: FilingTaxonomySnapshotRecord[]; + statement: FinancialStatementKind; + selectedPeriodIds: Set; +}) { + const detailBuckets = new Map>(); + + for (const snapshot of input.snapshots) { + const groups = snapshot.detail_rows?.[input.statement] ?? {}; + for (const [surfaceKey, rows] of Object.entries(groups)) { + let bucket = detailBuckets.get(surfaceKey); + if (!bucket) { + bucket = new Map(); + detailBuckets.set(surfaceKey, bucket); + } + + for (const row of rows) { + const filteredValues = Object.fromEntries( + Object.entries(row.values).filter(([periodId]) => input.selectedPeriodIds.has(periodId)) + ); + if (!rowHasValues(filteredValues)) { + continue; + } + + const existing = bucket.get(row.key); + if (!existing) { + bucket.set(row.key, { + ...row, + values: filteredValues, + sourceFactIds: [...row.sourceFactIds], + dimensionsSummary: [...row.dimensionsSummary] + }); + continue; + } + + for (const [periodId, value] of Object.entries(filteredValues)) { + if (!(periodId in existing.values)) { + existing.values[periodId] = value; + } + } + + existing.sourceFactIds = [...new Set([...existing.sourceFactIds, ...row.sourceFactIds])].sort((left, right) => left - right); + existing.dimensionsSummary = [...new Set([...existing.dimensionsSummary, ...row.dimensionsSummary])].sort((left, right) => left.localeCompare(right)); + existing.isExtension = existing.isExtension || row.isExtension; + existing.residualFlag = existing.residualFlag || row.residualFlag; + } + } + } + + return Object.fromEntries( + [...detailBuckets.entries()].map(([surfaceKey, bucket]) => [ + surfaceKey, + [...bucket.values()].sort((left, right) => left.label.localeCompare(right.label)) + ]) + ) satisfies SurfaceDetailMap; +} + +function buildLtmDetailRows(input: { + detailRows: SurfaceDetailMap; + quarterlyPeriods: FinancialStatementPeriod[]; + ltmPeriods: FinancialStatementPeriod[]; + statement: Extract; +}) { + const sortedQuarterlyPeriods = [...input.quarterlyPeriods].sort(periodSorter); + + return Object.fromEntries( + Object.entries(input.detailRows).map(([surfaceKey, rows]) => { + const ltmRows = rows + .map((row) => { + const values: Record = {}; + + for (const ltmPeriod of input.ltmPeriods) { + const anchorIndex = sortedQuarterlyPeriods.findIndex((period) => `ltm:${period.id}` === ltmPeriod.id); + if (anchorIndex < 3) { + continue; + } + + const slice = sortedQuarterlyPeriods.slice(anchorIndex - 3, anchorIndex + 1); + const sourceValues = slice.map((period) => row.values[period.id] ?? null); + values[ltmPeriod.id] = input.statement === 'balance' + ? sourceValues[sourceValues.length - 1] ?? null + : sourceValues.some((value) => value === null) + ? null + : sourceValues.reduce((sum, value) => sum + (value ?? 0), 0); + } + + return { + ...row, + values + }; + }) + .filter((row) => rowHasValues(row.values)); + + return [surfaceKey, ltmRows]; + }) + ) satisfies SurfaceDetailMap; +} + +function buildQuarterlyStatementSurfaceRows(input: { + statement: Extract; + sourcePeriods: FinancialStatementPeriod[]; + selectedPeriodIds: Set; + faithfulRows: TaxonomyStatementRow[]; + facts: TaxonomyFactRow[]; + snapshots: FilingTaxonomySnapshotRecord[]; +}) { + const aggregatedRows = aggregateSurfaceRows({ + snapshots: input.snapshots, + statement: input.statement, + selectedPeriodIds: input.selectedPeriodIds + }); + + if (aggregatedRows.length > 0) { + return aggregatedRows; + } + + return buildStandardizedRows({ + rows: input.faithfulRows, + statement: input.statement, + periods: input.sourcePeriods, + facts: input.facts + }) as SurfaceFinancialRow[]; +} + +function aggregatePersistedKpiRows(input: { + snapshots: FilingTaxonomySnapshotRecord[]; + selectedPeriodIds: Set; +}) { + const rowMap = new Map(); + + for (const snapshot of input.snapshots) { + for (const row of snapshot.kpi_rows ?? []) { + const filteredValues = Object.fromEntries( + Object.entries(row.values).filter(([periodId]) => input.selectedPeriodIds.has(periodId)) + ); + if (!rowHasValues(filteredValues)) { + continue; + } + + const existing = rowMap.get(row.key); + if (!existing) { + rowMap.set(row.key, { + ...row, + values: filteredValues, + sourceConcepts: [...row.sourceConcepts], + sourceFactIds: [...row.sourceFactIds] + }); + continue; + } + + existing.values = { + ...existing.values, + ...filteredValues + }; + existing.sourceConcepts = [...new Set([...existing.sourceConcepts, ...row.sourceConcepts])].sort((left, right) => left.localeCompare(right)); + existing.sourceFactIds = [...new Set([...existing.sourceFactIds, ...row.sourceFactIds])].sort((left, right) => left - right); + existing.hasDimensions = existing.hasDimensions || row.hasDimensions; + } + } + + return [...rowMap.values()].sort((left, right) => { + if (left.order !== right.order) { + return left.order - right.order; + } + + return left.label.localeCompare(right.label); + }); +} + function buildEmptyResponse(input: { ticker: string; companyName: string; @@ -230,6 +584,7 @@ function buildEmptyResponse(input: { statementRows: isStatementSurface(input.surfaceKind) ? { faithful: [], standardized: [] } : null, + statementDetails: null, ratioRows: input.surfaceKind === 'ratios' ? [] : null, kpiRows: input.surfaceKind === 'segments_kpis' ? [] : null, trendSeries: [], @@ -255,6 +610,7 @@ function buildEmptyResponse(input: { queuedSync: input.queuedSync }, metrics: input.metrics, + normalization: emptyNormalizationMetadata(), dimensionBreakdown: null } satisfies CompanyFinancialStatementsResponse; } @@ -262,7 +618,9 @@ function buildEmptyResponse(input: { async function buildStatementSurfaceBundle(input: { surfaceKind: Extract; cadence: FinancialCadence; - periods: FinancialStatementPeriod[]; + sourcePeriods: FinancialStatementPeriod[]; + targetPeriods: FinancialStatementPeriod[]; + selectedPeriodIds: Set; faithfulRows: TaxonomyStatementRow[]; facts: TaxonomyFactRow[]; snapshots: FilingTaxonomySnapshotRecord[]; @@ -274,7 +632,11 @@ async function buildStatementSurfaceBundle(input: { snapshots: input.snapshots }); - if (cached) { + if ( + cached + && Array.isArray((cached as Partial).rows) + && typeof (cached as Partial).detailRows === 'object' + ) { return cached as StandardizedStatementBundlePayload; } @@ -282,25 +644,48 @@ async function buildStatementSurfaceBundle(input: { if (!statement || (statement !== 'income' && statement !== 'balance' && statement !== 'cash_flow')) { return { rows: [], + detailRows: {}, trendSeries: [], - categories: [] + categories: [], + normalization: buildNormalizationMetadata(input.snapshots) } satisfies StandardizedStatementBundlePayload; } - const standardizedRows = buildStandardizedRows({ - rows: input.faithfulRows, + const quarterlyRows = buildQuarterlyStatementSurfaceRows({ statement, - periods: input.periods, - facts: input.facts + sourcePeriods: input.sourcePeriods, + selectedPeriodIds: input.selectedPeriodIds, + faithfulRows: input.faithfulRows, + facts: input.facts, + snapshots: input.snapshots }); + const quarterlyDetailRows = aggregateDetailRows({ + snapshots: input.snapshots, + statement, + selectedPeriodIds: input.selectedPeriodIds + }); + const rows = input.cadence === 'ltm' + ? buildLtmStandardizedRows(quarterlyRows, input.sourcePeriods, input.targetPeriods, statement) as SurfaceFinancialRow[] + : quarterlyRows; + const detailRows = input.cadence === 'ltm' + ? buildLtmDetailRows({ + detailRows: quarterlyDetailRows, + quarterlyPeriods: input.sourcePeriods, + ltmPeriods: input.targetPeriods, + statement + }) + : quarterlyDetailRows; + const normalization = buildNormalizationMetadata(input.snapshots); const payload = { - rows: standardizedRows, + rows, + detailRows, trendSeries: buildTrendSeries({ surfaceKind: input.surfaceKind, - statementRows: standardizedRows + statementRows: rows }), - categories: buildFinancialCategories(standardizedRows, input.surfaceKind) + categories: buildFinancialCategories(rows, input.surfaceKind), + normalization } satisfies StandardizedStatementBundlePayload; await writeFinancialBundle({ @@ -386,12 +771,19 @@ async function buildKpiSurfaceBundle(input: { return cached as Pick; } + const persistedRows = aggregatePersistedKpiRows({ + snapshots: input.snapshots, + selectedPeriodIds: new Set(input.periods.map((period) => period.id)) + }); const resolved = resolveKpiDefinitions(input.ticker); if (!resolved.template) { return { - kpiRows: [], - trendSeries: [], - categories: [] + kpiRows: persistedRows, + trendSeries: buildTrendSeries({ + surfaceKind: 'segments_kpis', + kpiRows: persistedRows + }), + categories: buildFinancialCategories(persistedRows, 'segments_kpis') }; } @@ -408,27 +800,11 @@ async function buildKpiSurfaceBundle(input: { definitions: resolved.definitions }); - const rowsByKey = new Map(); - for (const row of [...taxonomyRows, ...noteRows]) { - const existing = rowsByKey.get(row.key); - if (existing) { - existing.values = { - ...existing.values, - ...row.values - }; - continue; - } - - rowsByKey.set(row.key, row); - } - - const kpiRows = [...rowsByKey.values()].sort((left, right) => { - if (left.order !== right.order) { - return left.order - right.order; - } - - return left.label.localeCompare(right.label); - }); + const kpiRows = mergeStructuredKpiRowsByPriority([ + persistedRows, + taxonomyRows, + noteRows + ]); const payload = { kpiRows, @@ -515,7 +891,8 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr failedFilings: statuses.failed, pendingFilings: Math.max(0, financialFilings.filter((filing) => filingTypes.includes(filing.filing_type as '10-K' | '10-Q')).length - statuses.ready - statuses.partial - statuses.failed), queuedSync: input.queuedSync - } + }, + normalization: buildNormalizationMetadata(snapshotResult.snapshots) }; } @@ -539,48 +916,39 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr const periods = input.cadence === 'ltm' ? buildLtmPeriods(selection.periods) : selection.periods; + const baseFaithfulRows = buildRows(selection.snapshots, statement, selection.selectedPeriodIds); const faithfulRows = input.cadence === 'ltm' ? buildLtmFaithfulRows( - buildRows(selection.snapshots, statement, selection.selectedPeriodIds), + baseFaithfulRows, selection.periods, periods, statement ) - : buildRows(selection.snapshots, statement, selection.selectedPeriodIds); + : baseFaithfulRows; const factsForStatement = allFacts.facts.filter((fact) => fact.statement === statement); const factsForStandardization = allFacts.facts; const standardizedPayload = await buildStatementSurfaceBundle({ surfaceKind: input.surfaceKind as Extract, cadence: input.cadence, - periods, - faithfulRows, + sourcePeriods: selection.periods, + targetPeriods: periods, + selectedPeriodIds: selection.selectedPeriodIds, + faithfulRows: baseFaithfulRows, facts: factsForStandardization, snapshots: selection.snapshots }); - const standardizedRows = input.cadence === 'ltm' - ? buildLtmStandardizedRows( - buildStandardizedRows({ - rows: buildRows(selection.snapshots, statement, selection.selectedPeriodIds), - statement: statement as Extract, - periods: selection.periods, - facts: factsForStandardization - }), - selection.periods, - periods, - statement as Extract - ) - : standardizedPayload.rows; + const standardizedRows = standardizedPayload.rows; const rawFacts = input.includeFacts ? await listTaxonomyFactsByTicker({ ticker, window: 'all', filingTypes: [...filingTypes], - statement, - cursor: input.factsCursor, - limit: input.factsLimit + statement, + cursor: input.factsCursor, + limit: input.factsLimit }) : { facts: [], nextCursor: null }; @@ -603,12 +971,10 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr faithful: faithfulRows, standardized: standardizedRows }, + statementDetails: standardizedPayload.detailRows, ratioRows: null, kpiRows: null, - trendSeries: buildTrendSeries({ - surfaceKind: input.surfaceKind, - statementRows: standardizedRows - }), + trendSeries: standardizedPayload.trendSeries, categories: standardizedPayload.categories, availability: { adjusted: false, @@ -636,6 +1002,7 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr queuedSync: input.queuedSync }, metrics, + normalization: standardizedPayload.normalization, dimensionBreakdown }; } @@ -654,23 +1021,29 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr ? buildLtmPeriods(incomeSelection.periods) : incomeSelection.periods; - const incomeQuarterlyRows = buildStandardizedRows({ - rows: buildRows(incomeSelection.snapshots, 'income', incomeSelection.selectedPeriodIds), + const incomeQuarterlyRows = buildQuarterlyStatementSurfaceRows({ statement: 'income', - periods: incomeSelection.periods, - facts: allFacts.facts + sourcePeriods: incomeSelection.periods, + selectedPeriodIds: incomeSelection.selectedPeriodIds, + faithfulRows: buildRows(incomeSelection.snapshots, 'income', incomeSelection.selectedPeriodIds), + facts: allFacts.facts, + snapshots: incomeSelection.snapshots }); - const balanceQuarterlyRows = rekeyRowsByFilingId(buildStandardizedRows({ - rows: buildRows(balanceSelection.snapshots, 'balance', balanceSelection.selectedPeriodIds), + const balanceQuarterlyRows = rekeyRowsByFilingId(buildQuarterlyStatementSurfaceRows({ statement: 'balance', - periods: balanceSelection.periods, - facts: allFacts.facts + sourcePeriods: balanceSelection.periods, + selectedPeriodIds: balanceSelection.selectedPeriodIds, + faithfulRows: buildRows(balanceSelection.snapshots, 'balance', balanceSelection.selectedPeriodIds), + facts: allFacts.facts, + snapshots: balanceSelection.snapshots }), balanceSelection.periods, incomeSelection.periods); - const cashFlowQuarterlyRows = rekeyRowsByFilingId(buildStandardizedRows({ - rows: buildRows(cashFlowSelection.snapshots, 'cash_flow', cashFlowSelection.selectedPeriodIds), + const cashFlowQuarterlyRows = rekeyRowsByFilingId(buildQuarterlyStatementSurfaceRows({ statement: 'cash_flow', - periods: cashFlowSelection.periods, - facts: allFacts.facts + sourcePeriods: cashFlowSelection.periods, + selectedPeriodIds: cashFlowSelection.selectedPeriodIds, + faithfulRows: buildRows(cashFlowSelection.snapshots, 'cash_flow', cashFlowSelection.selectedPeriodIds), + facts: allFacts.facts, + snapshots: cashFlowSelection.snapshots }), cashFlowSelection.periods, incomeSelection.periods); const incomeRows = input.cadence === 'ltm' @@ -706,6 +1079,7 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr defaultDisplayMode: 'standardized', periods: basePeriods, statementRows: null, + statementDetails: null, ratioRows: ratioBundle.ratioRows, kpiRows: null, trendSeries: ratioBundle.trendSeries, @@ -731,6 +1105,7 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr queuedSync: input.queuedSync }, metrics, + normalization: buildNormalizationMetadata(incomeSelection.snapshots), dimensionBreakdown: null }; } @@ -770,6 +1145,7 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr defaultDisplayMode: 'standardized', periods: basePeriods, statementRows: null, + statementDetails: null, ratioRows: null, kpiRows: kpiBundle.kpiRows, trendSeries: kpiBundle.trendSeries, @@ -795,6 +1171,7 @@ export async function getCompanyFinancials(input: GetCompanyFinancialsInput): Pr queuedSync: input.queuedSync }, metrics, + normalization: buildNormalizationMetadata(incomeSelection.snapshots), dimensionBreakdown: mergeDimensionBreakdownMaps(kpiBreakdown) }; } @@ -807,6 +1184,9 @@ export const __financialTaxonomyInternals = { buildRows, buildStandardizedRows, buildDimensionBreakdown, + buildNormalizationMetadata, + aggregateSurfaceRows, + mergeStructuredKpiRowsByPriority, periodSorter, selectPrimaryPeriodsByCadence, buildLtmPeriods, diff --git a/lib/server/financials/surface.ts b/lib/server/financials/surface.ts new file mode 100644 index 0000000..49e5171 --- /dev/null +++ b/lib/server/financials/surface.ts @@ -0,0 +1,320 @@ +import type { + DetailFinancialRow, + FinancialStatementKind, + FinancialStatementPeriod, + NormalizationSummary, + StructuredKpiRow, + SurfaceDetailMap, + SurfaceFinancialRow, + TaxonomyFactRow, + TaxonomyStatementRow +} from '@/lib/types'; +import { buildStandardizedRows } from '@/lib/server/financials/standardize'; + +type CompactStatement = Extract; + +type SurfaceDefinition = { + key: string; + label: string; + category: string; + order: number; + unit: SurfaceFinancialRow['unit']; + rowKey?: string; + componentKeys?: string[]; + formula?: { + kind: 'subtract'; + left: string; + right: string; + }; +}; + +const EMPTY_SURFACE_ROWS: Record = { + income: [], + balance: [], + cash_flow: [], + equity: [], + comprehensive_income: [] +}; + +const EMPTY_DETAIL_ROWS: Record = { + income: {}, + balance: {}, + cash_flow: {}, + equity: {}, + comprehensive_income: {} +}; + +const SURFACE_DEFINITIONS: Record = { + income: [ + { key: 'revenue', label: 'Revenue', category: 'surface', order: 10, unit: 'currency', rowKey: 'revenue' }, + { key: 'cost_of_revenue', label: 'Cost of Revenue', category: 'surface', order: 20, unit: 'currency', rowKey: 'cost_of_revenue' }, + { key: 'gross_profit', label: 'Gross Profit', category: 'surface', order: 30, unit: 'currency', rowKey: 'gross_profit' }, + { + key: 'operating_expenses', + label: 'Operating Expenses', + category: 'surface', + order: 40, + unit: 'currency', + componentKeys: ['selling_general_and_administrative', 'research_and_development', 'depreciation_and_amortization'] + }, + { key: 'operating_income', label: 'Operating Income', category: 'surface', order: 50, unit: 'currency', rowKey: 'operating_income' }, + { + key: 'interest_and_other', + label: 'Interest and Other', + category: 'surface', + order: 60, + unit: 'currency', + formula: { + kind: 'subtract', + left: 'pretax_income', + right: 'operating_income' + } + }, + { key: 'pretax_income', label: 'Pretax Income', category: 'surface', order: 70, unit: 'currency', rowKey: 'pretax_income' }, + { key: 'income_taxes', label: 'Income Taxes', category: 'surface', order: 80, unit: 'currency', rowKey: 'income_tax_expense' }, + { key: 'net_income', label: 'Net Income', category: 'surface', order: 90, unit: 'currency', rowKey: 'net_income' } + ], + balance: [ + { key: 'cash_and_equivalents', label: 'Cash and Equivalents', category: 'surface', order: 10, unit: 'currency', rowKey: 'cash_and_equivalents' }, + { key: 'receivables', label: 'Receivables', category: 'surface', order: 20, unit: 'currency', rowKey: 'accounts_receivable' }, + { key: 'inventory', label: 'Inventory', category: 'surface', order: 30, unit: 'currency', rowKey: 'inventory' }, + { key: 'current_assets', label: 'Current Assets', category: 'surface', order: 40, unit: 'currency', rowKey: 'current_assets' }, + { key: 'ppe', label: 'Property, Plant & Equipment', category: 'surface', order: 50, unit: 'currency', rowKey: 'property_plant_equipment' }, + { + key: 'goodwill_and_intangibles', + label: 'Goodwill and Intangibles', + category: 'surface', + order: 60, + unit: 'currency', + componentKeys: ['goodwill', 'intangible_assets'] + }, + { key: 'total_assets', label: 'Total Assets', category: 'surface', order: 70, unit: 'currency', rowKey: 'total_assets' }, + { key: 'current_liabilities', label: 'Current Liabilities', category: 'surface', order: 80, unit: 'currency', rowKey: 'current_liabilities' }, + { key: 'debt', label: 'Debt', category: 'surface', order: 90, unit: 'currency', rowKey: 'total_debt' }, + { key: 'total_liabilities', label: 'Total Liabilities', category: 'surface', order: 100, unit: 'currency', rowKey: 'total_liabilities' }, + { key: 'shareholders_equity', label: 'Shareholders Equity', category: 'surface', order: 110, unit: 'currency', rowKey: 'total_equity' } + ], + cash_flow: [ + { key: 'operating_cash_flow', label: 'Operating Cash Flow', category: 'surface', order: 10, unit: 'currency', rowKey: 'operating_cash_flow' }, + { key: 'capital_expenditures', label: 'Capital Expenditures', category: 'surface', order: 20, unit: 'currency', rowKey: 'capital_expenditures' }, + { key: 'acquisitions', label: 'Acquisitions', category: 'surface', order: 30, unit: 'currency', rowKey: 'acquisitions' }, + { key: 'investing_cash_flow', label: 'Investing Cash Flow', category: 'surface', order: 40, unit: 'currency', rowKey: 'investing_cash_flow' }, + { key: 'financing_cash_flow', label: 'Financing Cash Flow', category: 'surface', order: 50, unit: 'currency', rowKey: 'financing_cash_flow' }, + { key: 'free_cash_flow', label: 'Free Cash Flow', category: 'surface', order: 60, unit: 'currency', rowKey: 'free_cash_flow' } + ] +}; + +function rowHasAnyValue(row: { values: Record }) { + return Object.values(row.values).some((value) => value !== null); +} + +function sumValues(values: Array) { + if (values.every((value) => value === null)) { + return null; + } + + return values.reduce((sum, value) => sum + (value ?? 0), 0); +} + +function valueForPeriod( + rowByKey: Map, + rowKey: string, + periodId: string +) { + return rowByKey.get(rowKey)?.values[periodId] ?? null; +} + +function maxAbsValue(values: Record) { + return Object.values(values).reduce((max, value) => Math.max(max, Math.abs(value ?? 0)), 0); +} + +function detailUnit(row: SurfaceFinancialRow, faithfulRow: TaxonomyStatementRow | undefined) { + if (faithfulRow) { + return Object.values(faithfulRow.units)[0] ?? null; + } + + switch (row.unit) { + case 'currency': + return 'USD'; + case 'shares': + return 'shares'; + case 'percent': + return 'pure'; + default: + return null; + } +} + +function buildDetailRow(input: { + row: SurfaceFinancialRow; + parentSurfaceKey: string; + faithfulRowByKey: Map; +}): DetailFinancialRow { + const sourceRowKey = input.row.sourceRowKeys.find((key) => input.faithfulRowByKey.has(key)) ?? input.row.sourceRowKeys[0] ?? input.row.key; + const faithfulRow = sourceRowKey ? input.faithfulRowByKey.get(sourceRowKey) : undefined; + const qname = faithfulRow?.qname ?? input.row.sourceConcepts[0] ?? input.row.key; + const [prefix, ...rest] = qname.split(':'); + const localName = faithfulRow?.localName ?? (rest.length > 0 ? rest.join(':') : qname); + + return { + key: input.row.key, + parentSurfaceKey: input.parentSurfaceKey, + label: input.row.label, + conceptKey: faithfulRow?.conceptKey ?? sourceRowKey, + qname, + namespaceUri: faithfulRow?.namespaceUri ?? (prefix && rest.length > 0 ? `urn:unknown:${prefix}` : 'urn:surface'), + localName, + unit: detailUnit(input.row, faithfulRow), + values: { ...input.row.values }, + sourceFactIds: [...input.row.sourceFactIds], + isExtension: faithfulRow?.isExtension ?? false, + dimensionsSummary: faithfulRow?.hasDimensions ? ['has_dimensions'] : [], + residualFlag: input.parentSurfaceKey === 'unmapped' + }; +} + +function baselineForStatement(statement: CompactStatement, rowByKey: Map) { + const anchorKey = statement === 'balance' ? 'total_assets' : 'revenue'; + return maxAbsValue(rowByKey.get(anchorKey)?.values ?? {}); +} + +function materialityThreshold(statement: CompactStatement, baseline: number) { + if (statement === 'balance') { + return Math.max(5_000_000, baseline * 0.005); + } + + return Math.max(1_000_000, baseline * 0.01); +} + +export function buildCompactHydrationModel(input: { + periods: FinancialStatementPeriod[]; + faithfulRows: Record; + facts: TaxonomyFactRow[]; + kpiRows?: StructuredKpiRow[]; +}) { + const surfaceRows = structuredClone(EMPTY_SURFACE_ROWS); + const detailRows = structuredClone(EMPTY_DETAIL_ROWS); + let surfaceRowCount = 0; + let detailRowCount = 0; + let unmappedRowCount = 0; + let materialUnmappedRowCount = 0; + + for (const statement of Object.keys(SURFACE_DEFINITIONS) as CompactStatement[]) { + const faithfulRows = input.faithfulRows[statement] ?? []; + const facts = input.facts.filter((fact) => fact.statement === statement); + const fullRows = buildStandardizedRows({ + rows: faithfulRows, + statement, + periods: input.periods, + facts + }); + const rowByKey = new Map(fullRows.map((row) => [row.key, row])); + const faithfulRowByKey = new Map(faithfulRows.map((row) => [row.key, row])); + const statementDetails: SurfaceDetailMap = {}; + + for (const definition of SURFACE_DEFINITIONS[statement]) { + const contributingRows = definition.rowKey + ? [rowByKey.get(definition.rowKey)].filter((row): row is SurfaceFinancialRow => row !== undefined) + : (definition.componentKeys ?? []) + .map((key) => rowByKey.get(key)) + .filter((row): row is SurfaceFinancialRow => row !== undefined); + + const values = Object.fromEntries(input.periods.map((period) => { + const nextValue = definition.rowKey + ? valueForPeriod(rowByKey, definition.rowKey, period.id) + : definition.formula + ? (() => { + const left = valueForPeriod(rowByKey, definition.formula!.left, period.id); + const right = valueForPeriod(rowByKey, definition.formula!.right, period.id); + return left === null || right === null ? null : left - right; + })() + : sumValues(contributingRows.map((row) => row.values[period.id] ?? null)); + + return [period.id, nextValue]; + })) satisfies Record; + + if (!rowHasAnyValue({ values })) { + continue; + } + + const sourceConcepts = [...new Set(contributingRows.flatMap((row) => row.sourceConcepts))].sort((left, right) => left.localeCompare(right)); + const sourceRowKeys = [...new Set(contributingRows.flatMap((row) => row.sourceRowKeys))].sort((left, right) => left.localeCompare(right)); + const sourceFactIds = [...new Set(contributingRows.flatMap((row) => row.sourceFactIds))].sort((left, right) => left - right); + const hasDimensions = contributingRows.some((row) => row.hasDimensions); + const resolvedSourceRowKeys = Object.fromEntries(input.periods.map((period) => [ + period.id, + definition.rowKey + ? rowByKey.get(definition.rowKey)?.resolvedSourceRowKeys[period.id] ?? null + : null + ])); + + const rowsForDetail = definition.componentKeys + ? contributingRows + : []; + const details = rowsForDetail + .filter((row) => rowHasAnyValue(row)) + .map((row) => buildDetailRow({ + row, + parentSurfaceKey: definition.key, + faithfulRowByKey + })); + + statementDetails[definition.key] = details; + detailRowCount += details.length; + + surfaceRows[statement].push({ + key: definition.key, + label: definition.label, + category: definition.category, + templateSection: definition.category, + order: definition.order, + unit: definition.unit, + values, + sourceConcepts, + sourceRowKeys, + sourceFactIds, + formulaKey: definition.formula ? definition.key : null, + hasDimensions, + resolvedSourceRowKeys, + statement, + detailCount: details.length + }); + surfaceRowCount += 1; + } + + const baseline = baselineForStatement(statement, rowByKey); + const threshold = materialityThreshold(statement, baseline); + const residualRows = fullRows + .filter((row) => row.key.startsWith('other:')) + .filter((row) => rowHasAnyValue(row)) + .map((row) => buildDetailRow({ + row, + parentSurfaceKey: 'unmapped', + faithfulRowByKey + })); + + if (residualRows.length > 0) { + statementDetails.unmapped = residualRows; + detailRowCount += residualRows.length; + unmappedRowCount += residualRows.length; + materialUnmappedRowCount += residualRows.filter((row) => maxAbsValue(row.values) >= threshold).length; + } + + detailRows[statement] = statementDetails; + } + + const normalizationSummary: NormalizationSummary = { + surfaceRowCount, + detailRowCount, + kpiRowCount: input.kpiRows?.length ?? 0, + unmappedRowCount, + materialUnmappedRowCount, + warnings: [] + }; + + return { + surfaceRows, + detailRows, + normalizationSummary + }; +} diff --git a/lib/server/repos/filing-taxonomy.ts b/lib/server/repos/filing-taxonomy.ts index 561f7a4..c7e6d63 100644 --- a/lib/server/repos/filing-taxonomy.ts +++ b/lib/server/repos/filing-taxonomy.ts @@ -1,9 +1,21 @@ import { and, desc, eq, gte, inArray, lt, sql } from 'drizzle-orm'; -import type { Filing, FinancialStatementKind, MetricValidationResult, TaxonomyDimensionMember, TaxonomyFactRow, TaxonomyStatementRow } from '@/lib/types'; +import type { + Filing, + FinancialStatementKind, + MetricValidationResult, + NormalizationSummary, + StructuredKpiRow, + SurfaceDetailMap, + SurfaceFinancialRow, + TaxonomyDimensionMember, + TaxonomyFactRow, + TaxonomyStatementRow +} from '@/lib/types'; import { db } from '@/lib/server/db'; import { filingTaxonomyAsset, filingTaxonomyConcept, + filingTaxonomyContext, filingTaxonomyFact, filingTaxonomyMetricValidation, filingTaxonomySnapshot @@ -41,10 +53,19 @@ export type FilingTaxonomySnapshotRecord = { parse_status: FilingTaxonomyParseStatus; parse_error: string | null; source: FilingTaxonomySource; + parser_engine: string; + parser_version: string; + taxonomy_regime: 'us-gaap' | 'ifrs-full' | 'unknown'; + fiscal_pack: string | null; periods: FilingTaxonomyPeriod[]; + faithful_rows: Record; statement_rows: Record; + surface_rows: Record; + detail_rows: Record; + kpi_rows: StructuredKpiRow[]; derived_metrics: Filing['metrics']; validation_result: MetricValidationResult | null; + normalization_summary: NormalizationSummary | null; facts_count: number; concepts_count: number; dimensions_count: number; @@ -52,6 +73,20 @@ export type FilingTaxonomySnapshotRecord = { updated_at: string; }; +export type FilingTaxonomyContextRecord = { + id: number; + snapshot_id: number; + context_id: string; + entity_identifier: string | null; + entity_scheme: string | null; + period_start: string | null; + period_end: string | null; + period_instant: string | null; + segment_json: Record | null; + scenario_json: Record | null; + created_at: string; +}; + export type FilingTaxonomyAssetRecord = { id: number; snapshot_id: number; @@ -73,8 +108,17 @@ export type FilingTaxonomyConceptRecord = { local_name: string; label: string | null; is_extension: boolean; + balance: string | null; + period_type: string | null; + data_type: string | null; statement_kind: FinancialStatementKind | null; role_uri: string | null; + authoritative_concept_key: string | null; + mapping_method: string | null; + surface_key: string | null; + detail_parent_surface_key: string | null; + kpi_key: string | null; + residual_flag: boolean; presentation_order: number | null; presentation_depth: number | null; parent_concept_key: string | null; @@ -89,11 +133,20 @@ export type FilingTaxonomyFactRecord = { qname: string; namespace_uri: string; local_name: string; + data_type: string | null; statement_kind: FinancialStatementKind | null; role_uri: string | null; + authoritative_concept_key: string | null; + mapping_method: string | null; + surface_key: string | null; + detail_parent_surface_key: string | null; + kpi_key: string | null; + residual_flag: boolean; context_id: string; unit: string | null; decimals: string | null; + precision: string | null; + nil: boolean; value_num: number; period_start: string | null; period_end: string | null; @@ -130,13 +183,32 @@ export type UpsertFilingTaxonomySnapshotInput = { parse_status: FilingTaxonomyParseStatus; parse_error: string | null; source: FilingTaxonomySource; + parser_engine: string; + parser_version: string; + taxonomy_regime: 'us-gaap' | 'ifrs-full' | 'unknown'; + fiscal_pack: string | null; periods: FilingTaxonomyPeriod[]; + faithful_rows: Record; statement_rows: Record; + surface_rows: Record; + detail_rows: Record; + kpi_rows: StructuredKpiRow[]; derived_metrics: Filing['metrics']; validation_result: MetricValidationResult | null; + normalization_summary: NormalizationSummary | null; facts_count: number; concepts_count: number; dimensions_count: number; + contexts: Array<{ + context_id: string; + entity_identifier: string | null; + entity_scheme: string | null; + period_start: string | null; + period_end: string | null; + period_instant: string | null; + segment_json: Record | null; + scenario_json: Record | null; + }>; assets: Array<{ asset_type: FilingTaxonomyAssetType; name: string; @@ -152,8 +224,17 @@ export type UpsertFilingTaxonomySnapshotInput = { local_name: string; label: string | null; is_extension: boolean; + balance: string | null; + period_type: string | null; + data_type: string | null; statement_kind: FinancialStatementKind | null; role_uri: string | null; + authoritative_concept_key: string | null; + mapping_method: string | null; + surface_key: string | null; + detail_parent_surface_key: string | null; + kpi_key: string | null; + residual_flag: boolean; presentation_order: number | null; presentation_depth: number | null; parent_concept_key: string | null; @@ -164,11 +245,20 @@ export type UpsertFilingTaxonomySnapshotInput = { qname: string; namespace_uri: string; local_name: string; + data_type: string | null; statement_kind: FinancialStatementKind | null; role_uri: string | null; + authoritative_concept_key: string | null; + mapping_method: string | null; + surface_key: string | null; + detail_parent_surface_key: string | null; + kpi_key: string | null; + residual_flag: boolean; context_id: string; unit: string | null; decimals: string | null; + precision: string | null; + nil: boolean; value_num: number; period_start: string | null; period_end: string | null; @@ -229,7 +319,29 @@ function emptyStatementRows(): Record { + return { + income: [], + balance: [], + cash_flow: [], + equity: [], + comprehensive_income: [] + }; +} + +function emptyDetailRows(): Record { + return { + income: {}, + balance: {}, + cash_flow: {}, + equity: {}, + comprehensive_income: {} + }; +} + function toSnapshotRecord(row: typeof filingTaxonomySnapshot.$inferSelect): FilingTaxonomySnapshotRecord { + const faithfulRows = row.faithful_rows ?? row.statement_rows ?? emptyStatementRows(); + return { id: row.id, filing_id: row.filing_id, @@ -239,10 +351,19 @@ function toSnapshotRecord(row: typeof filingTaxonomySnapshot.$inferSelect): Fili parse_status: row.parse_status, parse_error: row.parse_error, source: row.source, + parser_engine: row.parser_engine, + parser_version: row.parser_version, + taxonomy_regime: row.taxonomy_regime, + fiscal_pack: row.fiscal_pack, periods: row.periods ?? [], - statement_rows: row.statement_rows ?? emptyStatementRows(), + faithful_rows: faithfulRows, + statement_rows: faithfulRows, + surface_rows: row.surface_rows ?? emptySurfaceRows(), + detail_rows: row.detail_rows ?? emptyDetailRows(), + kpi_rows: row.kpi_rows ?? [], derived_metrics: row.derived_metrics ?? null, validation_result: row.validation_result ?? null, + normalization_summary: row.normalization_summary ?? null, facts_count: row.facts_count, concepts_count: row.concepts_count, dimensions_count: row.dimensions_count, @@ -251,6 +372,22 @@ function toSnapshotRecord(row: typeof filingTaxonomySnapshot.$inferSelect): Fili }; } +function toContextRecord(row: typeof filingTaxonomyContext.$inferSelect): FilingTaxonomyContextRecord { + return { + id: row.id, + snapshot_id: row.snapshot_id, + context_id: row.context_id, + entity_identifier: row.entity_identifier, + entity_scheme: row.entity_scheme, + period_start: row.period_start, + period_end: row.period_end, + period_instant: row.period_instant, + segment_json: row.segment_json ?? null, + scenario_json: row.scenario_json ?? null, + created_at: row.created_at + }; +} + function toAssetRecord(row: typeof filingTaxonomyAsset.$inferSelect): FilingTaxonomyAssetRecord { return { id: row.id, @@ -275,8 +412,17 @@ function toConceptRecord(row: typeof filingTaxonomyConcept.$inferSelect): Filing local_name: row.local_name, label: row.label, is_extension: row.is_extension, + balance: row.balance, + period_type: row.period_type, + data_type: row.data_type, statement_kind: row.statement_kind ?? null, role_uri: row.role_uri, + authoritative_concept_key: row.authoritative_concept_key, + mapping_method: row.mapping_method, + surface_key: row.surface_key, + detail_parent_surface_key: row.detail_parent_surface_key, + kpi_key: row.kpi_key, + residual_flag: row.residual_flag, presentation_order: asNumber(row.presentation_order), presentation_depth: row.presentation_depth, parent_concept_key: row.parent_concept_key, @@ -298,11 +444,20 @@ function toFactRecord(row: typeof filingTaxonomyFact.$inferSelect): FilingTaxono qname: row.qname, namespace_uri: row.namespace_uri, local_name: row.local_name, + data_type: row.data_type, statement_kind: row.statement_kind ?? null, role_uri: row.role_uri, + authoritative_concept_key: row.authoritative_concept_key, + mapping_method: row.mapping_method, + surface_key: row.surface_key, + detail_parent_surface_key: row.detail_parent_surface_key, + kpi_key: row.kpi_key, + residual_flag: row.residual_flag, context_id: row.context_id, unit: row.unit, decimals: row.decimals, + precision: row.precision, + nil: row.nil, value_num: value, period_start: row.period_start, period_end: row.period_end, @@ -354,6 +509,16 @@ export async function listFilingTaxonomyAssets(snapshotId: number) { return rows.map(toAssetRecord); } +export async function listFilingTaxonomyContexts(snapshotId: number) { + const rows = await db + .select() + .from(filingTaxonomyContext) + .where(eq(filingTaxonomyContext.snapshot_id, snapshotId)) + .orderBy(desc(filingTaxonomyContext.id)); + + return rows.map(toContextRecord); +} + export async function listFilingTaxonomyConcepts(snapshotId: number) { const rows = await db .select() @@ -397,10 +562,19 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn parse_status: input.parse_status, parse_error: input.parse_error, source: input.source, + parser_engine: input.parser_engine, + parser_version: input.parser_version, + taxonomy_regime: input.taxonomy_regime, + fiscal_pack: input.fiscal_pack, periods: input.periods, + faithful_rows: input.faithful_rows, statement_rows: input.statement_rows, + surface_rows: input.surface_rows, + detail_rows: input.detail_rows, + kpi_rows: input.kpi_rows, derived_metrics: input.derived_metrics, validation_result: input.validation_result, + normalization_summary: input.normalization_summary, facts_count: input.facts_count, concepts_count: input.concepts_count, dimensions_count: input.dimensions_count, @@ -416,10 +590,19 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn parse_status: input.parse_status, parse_error: input.parse_error, source: input.source, + parser_engine: input.parser_engine, + parser_version: input.parser_version, + taxonomy_regime: input.taxonomy_regime, + fiscal_pack: input.fiscal_pack, periods: input.periods, + faithful_rows: input.faithful_rows, statement_rows: input.statement_rows, + surface_rows: input.surface_rows, + detail_rows: input.detail_rows, + kpi_rows: input.kpi_rows, derived_metrics: input.derived_metrics, validation_result: input.validation_result, + normalization_summary: input.normalization_summary, facts_count: input.facts_count, concepts_count: input.concepts_count, dimensions_count: input.dimensions_count, @@ -431,10 +614,26 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn const snapshotId = saved.id; await db.delete(filingTaxonomyAsset).where(eq(filingTaxonomyAsset.snapshot_id, snapshotId)); + await db.delete(filingTaxonomyContext).where(eq(filingTaxonomyContext.snapshot_id, snapshotId)); await db.delete(filingTaxonomyConcept).where(eq(filingTaxonomyConcept.snapshot_id, snapshotId)); await db.delete(filingTaxonomyFact).where(eq(filingTaxonomyFact.snapshot_id, snapshotId)); await db.delete(filingTaxonomyMetricValidation).where(eq(filingTaxonomyMetricValidation.snapshot_id, snapshotId)); + if (input.contexts.length > 0) { + await db.insert(filingTaxonomyContext).values(input.contexts.map((context) => ({ + snapshot_id: snapshotId, + context_id: context.context_id, + entity_identifier: context.entity_identifier, + entity_scheme: context.entity_scheme, + period_start: context.period_start, + period_end: context.period_end, + period_instant: context.period_instant, + segment_json: context.segment_json, + scenario_json: context.scenario_json, + created_at: now + }))); + } + if (input.assets.length > 0) { await db.insert(filingTaxonomyAsset).values(input.assets.map((asset) => ({ snapshot_id: snapshotId, @@ -457,8 +656,17 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn local_name: concept.local_name, label: concept.label, is_extension: concept.is_extension, + balance: concept.balance, + period_type: concept.period_type, + data_type: concept.data_type, statement_kind: concept.statement_kind, role_uri: concept.role_uri, + authoritative_concept_key: concept.authoritative_concept_key, + mapping_method: concept.mapping_method, + surface_key: concept.surface_key, + detail_parent_surface_key: concept.detail_parent_surface_key, + kpi_key: concept.kpi_key, + residual_flag: concept.residual_flag, presentation_order: asNumericText(concept.presentation_order), presentation_depth: concept.presentation_depth, parent_concept_key: concept.parent_concept_key, @@ -474,11 +682,20 @@ export async function upsertFilingTaxonomySnapshot(input: UpsertFilingTaxonomySn qname: fact.qname, namespace_uri: fact.namespace_uri, local_name: fact.local_name, + data_type: fact.data_type, statement_kind: fact.statement_kind, role_uri: fact.role_uri, + authoritative_concept_key: fact.authoritative_concept_key, + mapping_method: fact.mapping_method, + surface_key: fact.surface_key, + detail_parent_surface_key: fact.detail_parent_surface_key, + kpi_key: fact.kpi_key, + residual_flag: fact.residual_flag, context_id: fact.context_id, unit: fact.unit, decimals: fact.decimals, + precision: fact.precision, + nil: fact.nil, value_num: String(fact.value_num), period_start: fact.period_start, period_end: fact.period_end, diff --git a/lib/server/task-processors.ts b/lib/server/task-processors.ts index a90db7e..2c34991 100644 --- a/lib/server/task-processors.ts +++ b/lib/server/task-processors.ts @@ -766,7 +766,18 @@ async function processSyncFilings(task: Task) { parse_status: 'failed', parse_error: error instanceof Error ? error.message : 'Taxonomy hydration failed', source: 'legacy_html_fallback', + parser_engine: 'fiscal-xbrl', + parser_version: 'unknown', + taxonomy_regime: 'unknown', + fiscal_pack: 'core', periods: [], + faithful_rows: { + income: [], + balance: [], + cash_flow: [], + equity: [], + comprehensive_income: [] + }, statement_rows: { income: [], balance: [], @@ -774,12 +785,36 @@ async function processSyncFilings(task: Task) { equity: [], comprehensive_income: [] }, + surface_rows: { + income: [], + balance: [], + cash_flow: [], + equity: [], + comprehensive_income: [] + }, + detail_rows: { + income: {}, + balance: {}, + cash_flow: {}, + equity: {}, + comprehensive_income: {} + }, + kpi_rows: [], + contexts: [], derived_metrics: filing.metrics ?? null, validation_result: { status: 'error', checks: [], validatedAt: now }, + normalization_summary: { + surfaceRowCount: 0, + detailRowCount: 0, + kpiRowCount: 0, + unmappedRowCount: 0, + materialUnmappedRowCount: 0, + warnings: [] + }, facts_count: 0, concepts_count: 0, dimensions_count: 0, diff --git a/lib/server/taxonomy/classifiers.ts b/lib/server/taxonomy/classifiers.ts new file mode 100644 index 0000000..1d9e8e0 --- /dev/null +++ b/lib/server/taxonomy/classifiers.ts @@ -0,0 +1,53 @@ +import type { FinancialStatementKind } from '@/lib/types'; + +export function classifyStatementRole(roleUri: string): FinancialStatementKind | null { + const normalized = roleUri.toLowerCase(); + + if (/cash\s*flow|statementsof?cashflows|netcash/.test(normalized)) { + return 'cash_flow'; + } + + if (/shareholders?|stockholders?|equity|retainedearnings/.test(normalized)) { + return 'equity'; + } + + if (/comprehensive\s*income/.test(normalized)) { + return 'comprehensive_income'; + } + + if (/balance\s*sheet|financial\s*position|assets?andliabilities/.test(normalized)) { + return 'balance'; + } + + if (/operations|income\s*statement|statementsofincome|profit/.test(normalized)) { + return 'income'; + } + + return null; +} + +export function conceptStatementFallback(localName: string): FinancialStatementKind | null { + const normalized = localName.toLowerCase(); + + if (/cash|operatingactivities|investingactivities|financingactivities/.test(normalized)) { + return 'cash_flow'; + } + + if (/equity|retainedearnings|additionalpaidincapital/.test(normalized)) { + return 'equity'; + } + + if (/comprehensiveincome/.test(normalized)) { + return 'comprehensive_income'; + } + + if (/asset|liabilit|debt/.test(normalized)) { + return 'balance'; + } + + if (/revenue|income|profit|expense|costof/.test(normalized)) { + return 'income'; + } + + return null; +} diff --git a/lib/server/taxonomy/engine.ts b/lib/server/taxonomy/engine.ts index e27e2b2..4d1a5ef 100644 --- a/lib/server/taxonomy/engine.ts +++ b/lib/server/taxonomy/engine.ts @@ -1,185 +1,8 @@ -import type { FinancialStatementKind } from '@/lib/types'; -import { discoverFilingAssets } from '@/lib/server/taxonomy/asset-discovery'; -import { parseLabelLinkbase, parsePresentationLinkbase } from '@/lib/server/taxonomy/linkbase-parser'; -import { deriveTaxonomyMetrics } from '@/lib/server/taxonomy/metrics'; -import { materializeTaxonomyStatements } from '@/lib/server/taxonomy/materialize'; -import { validateMetricsWithPdfLlm } from '@/lib/server/taxonomy/pdf-validation'; +import { hydrateFilingTaxonomySnapshotFromSidecar } from '@/lib/server/taxonomy/parser-client'; import type { TaxonomyHydrationInput, TaxonomyHydrationResult } from '@/lib/server/taxonomy/types'; -import { parseXbrlInstance } from '@/lib/server/taxonomy/xbrl-parser'; - -function createStatementRecord(factory: () => T): Record { - return { - income: factory(), - balance: factory(), - cash_flow: factory(), - equity: factory(), - comprehensive_income: factory() - }; -} - -function envUserAgent() { - return process.env.SEC_USER_AGENT || 'Fiscal Clone '; -} - -async function fetchText(url: string, fetchImpl: typeof fetch) { - const response = await fetchImpl(url, { - headers: { - 'User-Agent': envUserAgent(), - Accept: 'text/xml, text/plain, text/html;q=0.8, */*;q=0.5' - }, - cache: 'no-store' - }); - - if (!response.ok) { - throw new Error(`SEC request failed (${response.status})`); - } - - return await response.text(); -} export async function hydrateFilingTaxonomySnapshot( - input: TaxonomyHydrationInput, - options?: { - fetchImpl?: typeof fetch; - } + input: TaxonomyHydrationInput ): Promise { - const fetchImpl = options?.fetchImpl ?? fetch; - - const discovered = await discoverFilingAssets({ - cik: input.cik, - accessionNumber: input.accessionNumber, - filingUrl: input.filingUrl, - primaryDocument: input.primaryDocument, - fetchImpl - }); - - const emptyResult: TaxonomyHydrationResult = { - filing_id: input.filingId, - ticker: input.ticker.trim().toUpperCase(), - filing_date: input.filingDate, - filing_type: input.filingType, - parse_status: 'failed', - parse_error: 'No XBRL instance found', - source: 'legacy_html_fallback', - periods: [], - statement_rows: createStatementRecord(() => []), - derived_metrics: null, - validation_result: { - status: 'not_run', - checks: [], - validatedAt: null - }, - facts_count: 0, - concepts_count: 0, - dimensions_count: 0, - assets: discovered.assets, - concepts: [], - facts: [], - metric_validations: [] - }; - - const selectedInstance = discovered.assets.find((asset) => asset.asset_type === 'instance' && asset.is_selected) - ?? discovered.assets.find((asset) => asset.asset_type === 'instance') - ?? null; - - if (!selectedInstance) { - return emptyResult; - } - - let parseError: string | null = null; - let source: TaxonomyHydrationResult['source'] = 'xbrl_instance'; - - let instanceText = ''; - try { - instanceText = await fetchText(selectedInstance.url, fetchImpl); - } catch (error) { - parseError = error instanceof Error ? error.message : 'Unable to fetch instance file'; - return { - ...emptyResult, - parse_error: parseError - }; - } - - const parsedInstance = parseXbrlInstance(instanceText, selectedInstance.name); - - const labelByConcept = new Map(); - const presentation: ReturnType = []; - - for (const asset of discovered.assets) { - if (!asset.is_selected) { - continue; - } - - if (asset.asset_type !== 'presentation' && asset.asset_type !== 'label') { - continue; - } - - try { - const content = await fetchText(asset.url, fetchImpl); - if (asset.asset_type === 'presentation') { - const parsed = parsePresentationLinkbase(content); - if (parsed.length > 0) { - source = 'xbrl_instance_with_linkbase'; - } - - presentation.push(...parsed); - } else if (asset.asset_type === 'label') { - const parsed = parseLabelLinkbase(content); - for (const [conceptKey, label] of parsed.entries()) { - if (!labelByConcept.has(conceptKey)) { - labelByConcept.set(conceptKey, label); - } - } - } - } catch (error) { - parseError = parseError ?? (error instanceof Error ? error.message : 'Failed to parse taxonomy linkbase'); - } - } - - const materialized = materializeTaxonomyStatements({ - filingId: input.filingId, - accessionNumber: input.accessionNumber, - filingDate: input.filingDate, - filingType: input.filingType, - facts: parsedInstance.facts, - presentation, - labelByConcept - }); - - const derivedMetrics = deriveTaxonomyMetrics(parsedInstance.facts); - const llmValidation = await validateMetricsWithPdfLlm({ - metrics: derivedMetrics, - assets: discovered.assets, - fetchImpl - }); - - const hasRows = (Object.values(materialized.statement_rows).reduce((total, rows) => total + rows.length, 0)) > 0; - const hasFacts = materialized.facts.length > 0; - - const parseStatus: TaxonomyHydrationResult['parse_status'] = hasRows && hasFacts - ? 'ready' - : hasFacts - ? 'partial' - : 'failed'; - - return { - filing_id: input.filingId, - ticker: input.ticker.trim().toUpperCase(), - filing_date: input.filingDate, - filing_type: input.filingType, - parse_status: parseStatus, - parse_error: parseStatus === 'failed' ? (parseError ?? 'No XBRL facts extracted') : parseError, - source, - periods: materialized.periods, - statement_rows: materialized.statement_rows, - derived_metrics: derivedMetrics, - validation_result: llmValidation.validation_result, - facts_count: materialized.facts.length, - concepts_count: materialized.concepts.length, - dimensions_count: materialized.dimensionsCount, - assets: discovered.assets, - concepts: materialized.concepts, - facts: materialized.facts, - metric_validations: llmValidation.metric_validations - }; + return await hydrateFilingTaxonomySnapshotFromSidecar(input); } diff --git a/lib/server/taxonomy/materialize.ts b/lib/server/taxonomy/materialize.ts index c21d9d9..c5b4299 100644 --- a/lib/server/taxonomy/materialize.ts +++ b/lib/server/taxonomy/materialize.ts @@ -1,8 +1,7 @@ import type { Filing, FinancialStatementKind, TaxonomyStatementRow } from '@/lib/types'; import type { TaxonomyConcept, TaxonomyFact, TaxonomyPresentationConcept } from '@/lib/server/taxonomy/types'; import type { FilingTaxonomyPeriod } from '@/lib/server/repos/filing-taxonomy'; -import { classifyStatementRole } from '@/lib/server/taxonomy/linkbase-parser'; -import { conceptStatementFallback } from '@/lib/server/taxonomy/xbrl-parser'; +import { classifyStatementRole, conceptStatementFallback } from '@/lib/server/taxonomy/classifiers'; function compactAccessionNumber(value: string) { return value.replace(/-/g, ''); @@ -308,8 +307,17 @@ export function materializeTaxonomyStatements(input: { local_name: localName, label, is_extension: !isUsGaapNamespace(namespaceUri), + balance: null, + period_type: null, + data_type: null, statement_kind: statement, role_uri: orderedConcept.roleUri, + authoritative_concept_key: null, + mapping_method: null, + surface_key: null, + detail_parent_surface_key: null, + kpi_key: null, + residual_flag: false, presentation_order: row.order, presentation_depth: row.depth, parent_concept_key: row.parentKey, @@ -331,8 +339,17 @@ export function materializeTaxonomyStatements(input: { local_name: fact.localName, label: input.labelByConcept.get(fact.conceptKey) ?? localNameToLabel(fact.localName), is_extension: !isUsGaapNamespace(fact.namespaceUri), + balance: null, + period_type: null, + data_type: fact.dataType, statement_kind: fact.statement_kind, role_uri: fact.role_uri, + authoritative_concept_key: null, + mapping_method: null, + surface_key: null, + detail_parent_surface_key: null, + kpi_key: null, + residual_flag: false, presentation_order: null, presentation_depth: null, parent_concept_key: null, @@ -346,11 +363,20 @@ export function materializeTaxonomyStatements(input: { qname: fact.qname, namespace_uri: fact.namespaceUri, local_name: fact.localName, + data_type: fact.dataType, statement_kind: fact.statement_kind, role_uri: fact.role_uri, + authoritative_concept_key: null, + mapping_method: null, + surface_key: null, + detail_parent_surface_key: null, + kpi_key: null, + residual_flag: false, context_id: fact.contextId, unit: fact.unit, decimals: fact.decimals, + precision: fact.precision, + nil: fact.nil, value_num: fact.value, period_start: fact.periodStart, period_end: fact.periodEnd, diff --git a/lib/server/taxonomy/metrics.test.ts b/lib/server/taxonomy/metrics.test.ts index e8a166c..0fd804e 100644 --- a/lib/server/taxonomy/metrics.test.ts +++ b/lib/server/taxonomy/metrics.test.ts @@ -8,9 +8,12 @@ function fact(localName: string, value: number, overrides?: Partial typeof value === 'string' && value.length > 0); +} + +export function resolveFiscalXbrlBinary() { + const resolved = candidateBinaryPaths().find((path) => existsSync(path)); + if (!resolved) { + throw new Error('Rust XBRL sidecar binary is required but was not found. Set FISCAL_XBRL_BIN or build `fiscal-xbrl` under rust/target.'); + } + + return resolved; +} + +export async function hydrateFilingTaxonomySnapshotFromSidecar( + input: TaxonomyHydrationInput +): Promise { + const binary = resolveFiscalXbrlBinary(); + const timeoutMs = Math.max(Number(process.env.XBRL_ENGINE_TIMEOUT_MS ?? 45_000), 1_000); + const command = [binary, 'hydrate-filing']; + const requestBody = JSON.stringify({ + filingId: input.filingId, + ticker: input.ticker, + cik: input.cik, + accessionNumber: input.accessionNumber, + filingDate: input.filingDate, + filingType: input.filingType, + filingUrl: input.filingUrl, + primaryDocument: input.primaryDocument, + cacheDir: process.env.FISCAL_XBRL_CACHE_DIR ?? join(process.cwd(), '.cache', 'xbrl') + }); + + const child = Bun.spawn(command, { + stdin: 'pipe', + stdout: 'pipe', + stderr: 'pipe', + env: { + ...process.env + } + }); + + child.stdin.write(new TextEncoder().encode(requestBody)); + child.stdin.end(); + + const timeout = setTimeout(() => { + child.kill(); + }, timeoutMs); + + try { + const [stdout, stderr, exitCode] = await Promise.all([ + new Response(child.stdout).text(), + new Response(child.stderr).text(), + child.exited + ]); + + if (stderr.trim().length > 0) { + console.warn(`[fiscal-xbrl] ${stderr.trim()}`); + } + + if (exitCode !== 0) { + throw new Error(`Rust XBRL sidecar failed with exit code ${exitCode}: ${stderr.trim() || stdout.trim() || 'no error output'}`); + } + + return JSON.parse(stdout) as TaxonomyHydrationResult; + } finally { + clearTimeout(timeout); + } +} diff --git a/lib/server/taxonomy/types.ts b/lib/server/taxonomy/types.ts index 555a6e4..7f7de56 100644 --- a/lib/server/taxonomy/types.ts +++ b/lib/server/taxonomy/types.ts @@ -1,4 +1,13 @@ -import type { Filing, FinancialStatementKind, MetricValidationResult, TaxonomyStatementRow } from '@/lib/types'; +import type { + Filing, + FinancialStatementKind, + MetricValidationResult, + NormalizationSummary, + StructuredKpiRow, + SurfaceDetailMap, + SurfaceFinancialRow, + TaxonomyStatementRow +} from '@/lib/types'; import type { FilingTaxonomyAssetType, FilingTaxonomyParseStatus, @@ -19,10 +28,20 @@ export type TaxonomyNamespaceMap = Record; export type TaxonomyContext = { id: string; + entityIdentifier: string | null; + entityScheme: string | null; periodStart: string | null; periodEnd: string | null; periodInstant: string | null; dimensions: Array<{ axis: string; member: string }>; + segment: { + explicitMembers: Array<{ axis: string; member: string }>; + typedMembers: Array<{ axis: string; value: string }>; + } | null; + scenario: { + explicitMembers: Array<{ axis: string; member: string }>; + typedMembers: Array<{ axis: string; value: string }>; + } | null; }; export type TaxonomyUnit = { @@ -35,9 +54,12 @@ export type TaxonomyFact = { qname: string; namespaceUri: string; localName: string; + dataType: string | null; contextId: string; unit: string | null; decimals: string | null; + precision: string | null; + nil: boolean; value: number; periodStart: string | null; periodEnd: string | null; @@ -64,8 +86,17 @@ export type TaxonomyConcept = { local_name: string; label: string | null; is_extension: boolean; + balance: string | null; + period_type: string | null; + data_type: string | null; statement_kind: FinancialStatementKind | null; role_uri: string | null; + authoritative_concept_key: string | null; + mapping_method: string | null; + surface_key: string | null; + detail_parent_surface_key: string | null; + kpi_key: string | null; + residual_flag: boolean; presentation_order: number | null; presentation_depth: number | null; parent_concept_key: string | null; @@ -105,8 +136,26 @@ export type TaxonomyHydrationResult = { parse_status: FilingTaxonomyParseStatus; parse_error: string | null; source: FilingTaxonomySource; + parser_engine: string; + parser_version: string; + taxonomy_regime: 'us-gaap' | 'ifrs-full' | 'unknown'; + fiscal_pack: string | null; periods: FilingTaxonomyPeriod[]; + faithful_rows: Record; statement_rows: Record; + surface_rows: Record; + detail_rows: Record; + kpi_rows: StructuredKpiRow[]; + contexts: Array<{ + context_id: string; + entity_identifier: string | null; + entity_scheme: string | null; + period_start: string | null; + period_end: string | null; + period_instant: string | null; + segment_json: Record | null; + scenario_json: Record | null; + }>; derived_metrics: Filing['metrics']; validation_result: MetricValidationResult | null; facts_count: number; @@ -119,11 +168,20 @@ export type TaxonomyHydrationResult = { qname: string; namespace_uri: string; local_name: string; + data_type: string | null; statement_kind: FinancialStatementKind | null; role_uri: string | null; + authoritative_concept_key: string | null; + mapping_method: string | null; + surface_key: string | null; + detail_parent_surface_key: string | null; + kpi_key: string | null; + residual_flag: boolean; context_id: string; unit: string | null; decimals: string | null; + precision: string | null; + nil: boolean; value_num: number; period_start: string | null; period_end: string | null; @@ -133,4 +191,5 @@ export type TaxonomyHydrationResult = { source_file: string | null; }>; metric_validations: TaxonomyMetricValidationCheck[]; + normalization_summary: NormalizationSummary; }; diff --git a/lib/types.ts b/lib/types.ts index 32de78f..ef31629 100644 --- a/lib/types.ts +++ b/lib/types.ts @@ -462,6 +462,48 @@ export type DerivedFinancialRow = { export type StandardizedFinancialRow = DerivedFinancialRow; export type StandardizedStatementRow = StandardizedFinancialRow; +export type SurfaceFinancialRow = StandardizedFinancialRow & { + statement?: Extract; + detailCount?: number; + resolutionMethod?: 'direct' | 'surface_bridge' | 'formula_derived' | 'not_meaningful'; + confidence?: 'high' | 'medium' | 'low'; + warningCodes?: string[]; +}; + +export type DetailFinancialRow = { + key: string; + parentSurfaceKey: string; + label: string; + conceptKey: string; + qname: string; + namespaceUri: string; + localName: string; + unit: string | null; + values: Record; + sourceFactIds: number[]; + isExtension: boolean; + dimensionsSummary: string[]; + residualFlag: boolean; +}; + +export type SurfaceDetailMap = Record; + +export type NormalizationSummary = { + surfaceRowCount: number; + detailRowCount: number; + kpiRowCount: number; + unmappedRowCount: number; + materialUnmappedRowCount: number; + warnings: string[]; +}; + +export type NormalizationMetadata = { + regime: 'us-gaap' | 'ifrs-full' | 'unknown'; + fiscalPack: string | null; + parserVersion: string; + unmappedRowCount: number; + materialUnmappedRowCount: number; +}; export type RatioRow = DerivedFinancialRow & { denominatorKey: string | null; @@ -571,8 +613,9 @@ export type CompanyFinancialStatementsResponse = { periods: FinancialStatementPeriod[]; statementRows: { faithful: TaxonomyStatementRow[]; - standardized: StandardizedFinancialRow[]; + standardized: SurfaceFinancialRow[]; } | null; + statementDetails: SurfaceDetailMap | null; ratioRows: RatioRow[] | null; kpiRows: StructuredKpiRow[] | null; trendSeries: TrendSeries[]; @@ -608,6 +651,7 @@ export type CompanyFinancialStatementsResponse = { taxonomy: Filing['metrics']; validation: MetricValidationResult | null; }; + normalization: NormalizationMetadata; dimensionBreakdown: Record | null; }; diff --git a/rust/Cargo.lock b/rust/Cargo.lock new file mode 100644 index 0000000..476d1c6 --- /dev/null +++ b/rust/Cargo.lock @@ -0,0 +1,2371 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cc" +version = "1.2.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.5.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "colored" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" +dependencies = [ + "lazy_static", + "windows-sys 0.59.0", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "crabrl" +version = "0.1.0" +dependencies = [ + "ahash", + "anyhow", + "bitflags", + "chrono", + "clap", + "colored", + "criterion", + "memmap2", + "mimalloc", + "parking_lot", + "pretty_assertions", + "quick-xml", + "rayon", + "serde", + "serde_json", + "tempfile", + "thiserror", +] + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "fiscal-xbrl-cli" +version = "0.1.0" +dependencies = [ + "anyhow", + "fiscal-xbrl-core", + "serde", + "serde_json", +] + +[[package]] +name = "fiscal-xbrl-core" +version = "0.1.0" +dependencies = [ + "anyhow", + "crabrl", + "once_cell", + "regex", + "reqwest", + "serde", + "serde_json", +] + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi 5.3.0", + "wasip2", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "hyper" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "http", + "http-body", + "httparse", + "itoa", + "pin-project-lite", + "pin-utils", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +dependencies = [ + "http", + "hyper", + "hyper-util", + "rustls", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", + "webpki-roots", +] + +[[package]] +name = "hyper-util" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" + +[[package]] +name = "icu_properties" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" + +[[package]] +name = "icu_provider" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", + "serde", + "serde_core", +] + +[[package]] +name = "ipnet" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" + +[[package]] +name = "iri-string" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "js-sys" +version = "0.3.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.183" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" + +[[package]] +name = "libmimalloc-sys" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "667f4fec20f29dfc6bc7357c582d91796c169ad7e2fce709468aefeb2c099870" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "litemap" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "memmap2" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" +dependencies = [ + "libc", +] + +[[package]] +name = "mimalloc" +version = "0.1.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1ee66a4b64c74f4ef288bcbb9192ad9c3feaad75193129ac8509af543894fd8" +dependencies = [ + "libmimalloc-sys", +] + +[[package]] +name = "mio" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "pretty_assertions" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" +dependencies = [ + "diff", + "yansi", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quick-xml" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7649a7b4df05aed9ea7ec6f628c67c9953a43869b8bc50929569b2999d443fe" +dependencies = [ + "memchr", +] + +[[package]] +name = "quinn" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls", + "socket2", + "thiserror", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" +dependencies = [ + "bytes", + "getrandom 0.3.4", + "lru-slab", + "rand", + "ring", + "rustc-hash", + "rustls", + "rustls-pki-types", + "slab", + "thiserror", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2", + "tracing", + "windows-sys 0.60.2", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "reqwest" +version = "0.12.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots", +] + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.17", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls" +version = "0.23.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" +dependencies = [ + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +dependencies = [ + "web-time", + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "tinyvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" +dependencies = [ + "bytes", + "libc", + "mio", + "pin-project-lite", + "socket2", + "windows-sys 0.61.2", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" +dependencies = [ + "rustls", + "tokio", +] + +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-http" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +dependencies = [ + "bitflags", + "bytes", + "futures-util", + "http", + "http-body", + "iri-string", + "pin-project-lite", + "tower", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" +dependencies = [ + "cfg-if", + "futures-util", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "web-sys" +version = "0.3.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-roots" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "yoke" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 0000000..a7b9ac5 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,19 @@ +[workspace] +members = [ + "fiscal-xbrl-core", + "fiscal-xbrl-cli" +] +resolver = "2" + +[workspace.package] +edition = "2021" +license = "AGPL-3.0" +version = "0.1.0" + +[workspace.dependencies] +anyhow = "1.0" +once_cell = "1.21" +regex = "1.11" +reqwest = { version = "0.12", default-features = false, features = ["blocking", "json", "rustls-tls"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" diff --git a/rust/fiscal-xbrl-cli/Cargo.toml b/rust/fiscal-xbrl-cli/Cargo.toml new file mode 100644 index 0000000..6184447 --- /dev/null +++ b/rust/fiscal-xbrl-cli/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "fiscal-xbrl-cli" +version.workspace = true +edition.workspace = true +license.workspace = true + +[[bin]] +name = "fiscal-xbrl" +path = "src/main.rs" + +[dependencies] +anyhow.workspace = true +serde.workspace = true +serde_json.workspace = true +fiscal-xbrl-core = { path = "../fiscal-xbrl-core" } diff --git a/rust/fiscal-xbrl-cli/src/main.rs b/rust/fiscal-xbrl-cli/src/main.rs new file mode 100644 index 0000000..a306cd9 --- /dev/null +++ b/rust/fiscal-xbrl-cli/src/main.rs @@ -0,0 +1,37 @@ +use anyhow::{anyhow, Context, Result}; +use fiscal_xbrl_core::{hydrate_filing, HydrateFilingRequest}; +use std::env; +use std::io::{self, Read}; + +fn main() { + if let Err(error) = run() { + eprintln!("{error:#}"); + let code = match error.to_string().as_str() { + message if message.contains("invalid request") => 6, + message if message.contains("taxonomy resolution") => 4, + message if message.contains("parse") => 3, + message if message.contains("fetch") || message.contains("request failed") => 2, + _ => 5, + }; + std::process::exit(code); + } +} + +fn run() -> Result<()> { + let command = env::args().nth(1).unwrap_or_default(); + if command != "hydrate-filing" { + return Err(anyhow!("invalid request: expected `hydrate-filing` command")); + } + + let mut buffer = String::new(); + io::stdin() + .read_to_string(&mut buffer) + .context("invalid request: unable to read stdin")?; + + let request: HydrateFilingRequest = serde_json::from_str(&buffer) + .context("invalid request: unable to parse hydrate request JSON")?; + + let response = hydrate_filing(request)?; + serde_json::to_writer(io::stdout(), &response).context("unable to write hydrate response")?; + Ok(()) +} diff --git a/rust/fiscal-xbrl-core/Cargo.toml b/rust/fiscal-xbrl-core/Cargo.toml new file mode 100644 index 0000000..5781ca4 --- /dev/null +++ b/rust/fiscal-xbrl-core/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "fiscal-xbrl-core" +version.workspace = true +edition.workspace = true +license.workspace = true + +[features] +default = [] +with-crabrl = ["dep:crabrl"] + +[dependencies] +anyhow.workspace = true +once_cell.workspace = true +regex.workspace = true +reqwest.workspace = true +serde.workspace = true +serde_json.workspace = true +crabrl = { path = "../vendor/crabrl", default-features = false, optional = true } diff --git a/rust/fiscal-xbrl-core/src/kpi_mapper.rs b/rust/fiscal-xbrl-core/src/kpi_mapper.rs new file mode 100644 index 0000000..af81eac --- /dev/null +++ b/rust/fiscal-xbrl-core/src/kpi_mapper.rs @@ -0,0 +1,700 @@ +use anyhow::Result; +use std::collections::{BTreeMap, HashMap, HashSet}; + +use crate::pack_selector::FiscalPack; +use crate::surface_mapper::{MappingAssignment, MappingMethod}; +use crate::taxonomy_loader::{load_kpi_pack, KpiDefinition}; +use crate::{FactOutput, KpiRowOutput, PeriodOutput}; + +#[derive(Debug, Default)] +pub struct KpiExtractionResult { + pub rows: Vec, + pub mapping_assignments: HashMap, + pub warnings: Vec, +} + +pub fn build_taxonomy_kpis( + periods: &[PeriodOutput], + facts: &[FactOutput], + fiscal_pack: FiscalPack, +) -> Result { + if fiscal_pack == FiscalPack::Core { + return Ok(KpiExtractionResult::default()); + } + + let kpi_pack = load_kpi_pack(fiscal_pack)?; + let mut rows = Vec::::new(); + let mut mapping_assignments = HashMap::::new(); + + for (index, definition) in kpi_pack.kpis.iter().enumerate() { + let Some(kpi_row) = build_kpi_row(definition, index as i64, periods, facts) else { + continue; + }; + + for concept_key in unique_sorted_strings(kpi_row.source_concepts.iter().map(|qname| concept_key_from_qname(qname)).collect()) { + mapping_assignments.insert( + concept_key, + MappingAssignment { + authoritative_concept_key: None, + mapping_method: Some(MappingMethod::TaxonomyKpi), + surface_key: None, + detail_parent_surface_key: None, + kpi_key: Some(kpi_row.key.clone()), + residual_flag: false, + }, + ); + } + + rows.push(kpi_row); + } + + rows.sort_by(|left, right| left.order.cmp(&right.order).then_with(|| left.label.cmp(&right.label))); + + Ok(KpiExtractionResult { + rows, + mapping_assignments, + warnings: vec![], + }) +} + +fn build_kpi_row( + definition: &KpiDefinition, + order_index: i64, + periods: &[PeriodOutput], + facts: &[FactOutput], +) -> Option { + match definition.key.as_str() { + "loan_growth" => growth_kpi_row( + definition, + order_index, + periods, + facts, + &[ + "FinancingReceivableRecordedInvestment", + "LoansReceivableNetReportedAmount", + "FinancingReceivableExcludingAccruedInterestBeforeAllowanceForCreditLoss", + "FinancingReceivableExcludingAccruedInterestAfterAllowanceForCreditLoss", + "FinanceReceivableAllowanceForCreditLossesExcluded", + ], + ), + "deposit_growth" => growth_kpi_row( + definition, + order_index, + periods, + facts, + &["DepositsLiabilities", "Deposits", "DepositsDomestic", "DepositsForeign"], + ), + "premium_growth" => growth_kpi_row( + definition, + order_index, + periods, + facts, + &[ + "Premiums", + "PremiumsEarned", + "PremiumsWritten", + "PremiumsEarnedNet", + "PremiumsWrittenNet", + "SupplementaryInsuranceInformationPremiumRevenue", + ], + ), + "net_interest_margin" => direct_or_formula_row( + definition, + order_index, + periods, + facts, + &["NetInterestMargin", "NetInterestSpread"], + Some(( + &[ + "InterestAndDividendIncomeOperating", + "InterestIncomeExpenseOperatingNet", + "InterestIncomeExpenseNet", + ], + &["Assets", "AverageInterestEarningAssets"], + true, + )), + ), + "combined_ratio" => direct_or_formula_row( + definition, + order_index, + periods, + facts, + &["CombinedRatio"], + Some(( + &[ + "PolicyholderBenefitsAndClaimsIncurredNet", + "BenefitsLossesAndExpenses", + "LossesAndLossAdjustmentExpenses", + "SupplementaryInsuranceInformationBenefitsClaimsLossesAndSettlementExpense", + ], + &[ + "Premiums", + "PremiumsEarned", + "PremiumsWritten", + "PremiumsEarnedNet", + "PremiumsWrittenNet", + "SupplementaryInsuranceInformationPremiumRevenue", + ], + true, + )), + ), + "property_count" => direct_or_formula_row( + definition, + order_index, + periods, + facts, + &["NumberOfRealEstateProperties", "SECScheduleIIIRealEstateNumberOfUnits"], + None, + ), + "investment_property_growth" => growth_kpi_row( + definition, + order_index, + periods, + facts, + &[ + "RealEstateInvestmentPropertyNet", + "RealEstateInvestmentPropertyAtCost", + "RealEstateGrossAtCarryingValue", + ], + ), + "aum" => direct_or_formula_row( + definition, + order_index, + periods, + facts, + &["AssetsUnderManagementCarryingAmount"], + None, + ), + "fee_paying_aum" => direct_or_formula_row( + definition, + order_index, + periods, + facts, + &["FeePayingAssetUnderManagement"], + None, + ), + _ => None, + } +} + +fn growth_kpi_row( + definition: &KpiDefinition, + order_index: i64, + periods: &[PeriodOutput], + facts: &[FactOutput], + local_names: &[&str], +) -> Option { + let matched = collect_period_values(periods, facts, local_names); + if matched.values.is_empty() { + return None; + } + + let sorted_periods = sort_periods(periods); + let mut values = BTreeMap::>::new(); + for window in sorted_periods.windows(2) { + let previous = window.first()?; + let current = window.get(1)?; + let current_value = matched.values.get(¤t.id).copied().flatten(); + let previous_value = matched.values.get(&previous.id).copied().flatten(); + let growth = match (current_value, previous_value) { + (Some(current_value), Some(previous_value)) if previous_value != 0.0 => { + Some(current_value / previous_value - 1.0) + } + _ => None, + }; + values.insert(current.id.clone(), growth); + } + + build_kpi_output(definition, order_index, "operating_kpi", values, matched) +} + +fn direct_or_formula_row( + definition: &KpiDefinition, + order_index: i64, + periods: &[PeriodOutput], + facts: &[FactOutput], + direct_local_names: &[&str], + formula: Option<(&[&str], &[&str], bool)>, +) -> Option { + let direct = collect_period_values(periods, facts, direct_local_names); + if !direct.values.is_empty() { + return build_kpi_output( + definition, + order_index, + "operating_kpi", + direct.values.clone(), + direct, + ); + } + + let direct_by_end_date = collect_end_date_values(facts, direct_local_names); + if !direct_by_end_date.values.is_empty() { + return build_date_aligned_kpi_output( + definition, + order_index, + "operating_kpi", + periods, + direct_by_end_date, + ); + } + + let Some((numerator_names, denominator_names, divide)) = formula else { + return None; + }; + let numerator = collect_period_values(periods, facts, numerator_names); + let denominator = collect_period_values(periods, facts, denominator_names); + let mut values = BTreeMap::>::new(); + let mut sources = PeriodFactValues::default(); + + for period in periods { + let numerator_value = numerator.values.get(&period.id).copied().flatten(); + let denominator_value = denominator.values.get(&period.id).copied().flatten(); + let next_value = if divide { + match (numerator_value, denominator_value) { + (Some(numerator_value), Some(denominator_value)) if denominator_value != 0.0 => { + Some(numerator_value / denominator_value) + } + _ => None, + } + } else { + None + }; + values.insert(period.id.clone(), next_value); + + for qname in numerator.source_concepts.iter().chain(denominator.source_concepts.iter()) { + sources.source_concepts.insert(qname.clone()); + } + for fact_id in numerator.source_fact_ids.iter().chain(denominator.source_fact_ids.iter()) { + sources.source_fact_ids.insert(*fact_id); + } + sources.has_dimensions = sources.has_dimensions || numerator.has_dimensions || denominator.has_dimensions; + } + + if values.values().any(|value| value.is_some()) { + return Some(KpiRowOutput { + key: definition.key.clone(), + label: definition.label.clone(), + category: "operating_kpi".to_string(), + unit: definition.unit.clone(), + order: (order_index + 1) * 10, + segment: None, + axis: None, + member: None, + values, + source_concepts: unique_sorted_strings(sources.source_concepts.into_iter().collect()), + source_fact_ids: unique_sorted_i64(sources.source_fact_ids.into_iter().collect()), + provenance_type: "taxonomy".to_string(), + has_dimensions: sources.has_dimensions, + }); + } + + let numerator_by_end_date = collect_end_date_values(facts, numerator_names); + let denominator_by_end_date = collect_end_date_values(facts, denominator_names); + let mut aligned_values = BTreeMap::>::new(); + + for end_date in numerator_by_end_date.values.keys() { + let numerator_value = numerator_by_end_date.values.get(end_date).copied().flatten(); + let denominator_value = denominator_by_end_date.values.get(end_date).copied().flatten(); + let next_value = if divide { + match (numerator_value, denominator_value) { + (Some(numerator_value), Some(denominator_value)) if denominator_value != 0.0 => { + Some(numerator_value / denominator_value) + } + _ => None, + } + } else { + None + }; + let Some(period_id) = select_period_id_for_end_date(periods, end_date, true) else { + continue; + }; + aligned_values.insert(period_id, next_value); + } + + if aligned_values.values().all(|value| value.is_none()) { + return None; + } + + let mut aligned_sources = DateFactValues { + prefer_duration: true, + ..DateFactValues::default() + }; + aligned_sources.source_concepts.extend(numerator_by_end_date.source_concepts); + aligned_sources.source_concepts.extend(denominator_by_end_date.source_concepts); + aligned_sources.source_fact_ids.extend(numerator_by_end_date.source_fact_ids); + aligned_sources.source_fact_ids.extend(denominator_by_end_date.source_fact_ids); + aligned_sources.has_dimensions = numerator_by_end_date.has_dimensions || denominator_by_end_date.has_dimensions; + + Some(KpiRowOutput { + key: definition.key.clone(), + label: definition.label.clone(), + category: "operating_kpi".to_string(), + unit: definition.unit.clone(), + order: (order_index + 1) * 10, + segment: None, + axis: None, + member: None, + values: aligned_values, + source_concepts: unique_sorted_strings(aligned_sources.source_concepts.into_iter().collect()), + source_fact_ids: unique_sorted_i64(aligned_sources.source_fact_ids.into_iter().collect()), + provenance_type: "taxonomy".to_string(), + has_dimensions: aligned_sources.has_dimensions, + }) +} + +#[derive(Debug, Default, Clone)] +struct PeriodFactValues { + values: BTreeMap>, + source_concepts: HashSet, + source_fact_ids: HashSet, + has_dimensions: bool, +} + +#[derive(Debug, Default, Clone)] +struct DateFactValues { + values: BTreeMap>, + source_concepts: HashSet, + source_fact_ids: HashSet, + has_dimensions: bool, + prefer_duration: bool, +} + +fn collect_period_values( + periods: &[PeriodOutput], + facts: &[FactOutput], + local_names: &[&str], +) -> PeriodFactValues { + let mut values = PeriodFactValues::default(); + let targets = local_names + .iter() + .map(|name| name.to_ascii_lowercase()) + .collect::>(); + let mut fact_ids_by_period = HashMap::>::new(); + + for (index, fact) in facts.iter().enumerate() { + if !targets.contains(&fact.local_name.to_ascii_lowercase()) { + continue; + } + + let Some(period_id) = period_id_for_fact(periods, fact) else { + continue; + }; + fact_ids_by_period + .entry(period_id) + .or_default() + .push((index as i64 + 1, fact)); + } + + for period in periods { + let Some(grouped_facts) = fact_ids_by_period.get(&period.id) else { + continue; + }; + let Some((fact_id, fact)) = pick_preferred_fact(grouped_facts) else { + continue; + }; + + values.values.insert(period.id.clone(), Some(fact.value_num)); + values.source_concepts.insert(fact.qname.clone()); + values.source_fact_ids.insert(*fact_id); + values.has_dimensions = values.has_dimensions || !fact.is_dimensionless; + } + + values +} + +fn collect_end_date_values( + facts: &[FactOutput], + local_names: &[&str], +) -> DateFactValues { + let mut values = DateFactValues::default(); + let targets = local_names + .iter() + .map(|name| name.to_ascii_lowercase()) + .collect::>(); + let mut fact_ids_by_end_date = HashMap::>::new(); + + for (index, fact) in facts.iter().enumerate() { + if !targets.contains(&fact.local_name.to_ascii_lowercase()) { + continue; + } + + let Some(end_date) = fact.period_end.clone().or_else(|| fact.period_instant.clone()) else { + continue; + }; + fact_ids_by_end_date + .entry(end_date) + .or_default() + .push((index as i64 + 1, fact)); + } + + for (end_date, grouped_facts) in fact_ids_by_end_date { + let Some((fact_id, fact)) = pick_preferred_fact(&grouped_facts) else { + continue; + }; + + values.values.insert(end_date, Some(fact.value_num)); + values.source_concepts.insert(fact.qname.clone()); + values.source_fact_ids.insert(*fact_id); + values.has_dimensions = values.has_dimensions || !fact.is_dimensionless; + values.prefer_duration = values.prefer_duration || fact.period_start.is_some(); + } + + values +} + +fn period_id_for_fact(periods: &[PeriodOutput], fact: &FactOutput) -> Option { + let fact_period_end = fact.period_end.clone().or_else(|| fact.period_instant.clone()); + + periods + .iter() + .find(|period| { + period.period_start == fact.period_start + && period.period_end == fact_period_end + }) + .map(|period| period.id.clone()) +} + +fn pick_preferred_fact<'a>(grouped_facts: &'a [(i64, &'a FactOutput)]) -> Option<&'a (i64, &'a FactOutput)> { + grouped_facts.iter().max_by(|left, right| { + let left_dimension_score = if left.1.is_dimensionless { 1 } else { 0 }; + let right_dimension_score = if right.1.is_dimensionless { 1 } else { 0 }; + left_dimension_score + .cmp(&right_dimension_score) + .then_with(|| { + left.1 + .value_num + .abs() + .partial_cmp(&right.1.value_num.abs()) + .unwrap_or(std::cmp::Ordering::Equal) + }) + }) +} + +fn select_period_id_for_end_date( + periods: &[PeriodOutput], + end_date: &str, + prefer_duration: bool, +) -> Option { + periods + .iter() + .filter(|period| period.period_end.as_deref() == Some(end_date)) + .max_by(|left, right| { + let left_score = if prefer_duration { + if left.period_start.is_some() { 1 } else { 0 } + } else if left.period_start.is_none() { + 1 + } else { + 0 + }; + let right_score = if prefer_duration { + if right.period_start.is_some() { 1 } else { 0 } + } else if right.period_start.is_none() { + 1 + } else { + 0 + }; + left_score + .cmp(&right_score) + .then_with(|| left.id.cmp(&right.id)) + }) + .map(|period| period.id.clone()) +} + +fn sort_periods(periods: &[PeriodOutput]) -> Vec<&PeriodOutput> { + let mut periods = periods.iter().collect::>(); + periods.sort_by(|left, right| { + let left_key = left.period_end.clone().unwrap_or_else(|| left.filing_date.clone()); + let right_key = right.period_end.clone().unwrap_or_else(|| right.filing_date.clone()); + left_key.cmp(&right_key).then_with(|| left.id.cmp(&right.id)) + }); + periods +} + +fn build_kpi_output( + definition: &KpiDefinition, + order_index: i64, + category: &str, + values: BTreeMap>, + matched: PeriodFactValues, +) -> Option { + if values.values().all(|value| value.is_none()) { + return None; + } + + Some(KpiRowOutput { + key: definition.key.clone(), + label: definition.label.clone(), + category: category.to_string(), + unit: definition.unit.clone(), + order: (order_index + 1) * 10, + segment: None, + axis: None, + member: None, + values, + source_concepts: unique_sorted_strings(matched.source_concepts.into_iter().collect()), + source_fact_ids: unique_sorted_i64(matched.source_fact_ids.into_iter().collect()), + provenance_type: "taxonomy".to_string(), + has_dimensions: matched.has_dimensions, + }) +} + +fn build_date_aligned_kpi_output( + definition: &KpiDefinition, + order_index: i64, + category: &str, + periods: &[PeriodOutput], + matched: DateFactValues, +) -> Option { + let mut values = BTreeMap::>::new(); + + for (end_date, value) in &matched.values { + let Some(period_id) = select_period_id_for_end_date(periods, end_date, matched.prefer_duration) else { + continue; + }; + values.insert(period_id, *value); + } + + if values.values().all(|value| value.is_none()) { + return None; + } + + Some(KpiRowOutput { + key: definition.key.clone(), + label: definition.label.clone(), + category: category.to_string(), + unit: definition.unit.clone(), + order: (order_index + 1) * 10, + segment: None, + axis: None, + member: None, + values, + source_concepts: unique_sorted_strings(matched.source_concepts.into_iter().collect()), + source_fact_ids: unique_sorted_i64(matched.source_fact_ids.into_iter().collect()), + provenance_type: "taxonomy".to_string(), + has_dimensions: matched.has_dimensions, + }) +} + +fn concept_key_from_qname(qname: &str) -> String { + if let Some((prefix, local_name)) = qname.split_once(':') { + let namespace_uri = if prefix.eq_ignore_ascii_case("us-gaap") { + "http://fasb.org/us-gaap/2024".to_string() + } else if prefix.eq_ignore_ascii_case("ifrs-full") { + "http://xbrl.ifrs.org/taxonomy/2024-03-27/ifrs-full".to_string() + } else { + format!("urn:{prefix}") + }; + return format!("{namespace_uri}#{local_name}"); + } + + qname.to_string() +} + +fn unique_sorted_strings(values: Vec) -> Vec { + let mut values = values.into_iter().collect::>().into_iter().collect::>(); + values.sort(); + values +} + +fn unique_sorted_i64(values: Vec) -> Vec { + let mut values = values.into_iter().collect::>().into_iter().collect::>(); + values.sort(); + values +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::pack_selector::FiscalPack; + use crate::{FactOutput, PeriodOutput}; + + fn period(id: &str, end: &str, start: Option<&str>) -> PeriodOutput { + PeriodOutput { + id: id.to_string(), + filing_id: 1, + accession_number: "0000000000-00-000001".to_string(), + filing_date: end.to_string(), + period_start: start.map(|value| value.to_string()), + period_end: Some(end.to_string()), + filing_type: "10-Q".to_string(), + period_label: id.to_string(), + } + } + + fn fact(local_name: &str, period_start: Option<&str>, period_end: &str, value: f64) -> FactOutput { + FactOutput { + concept_key: format!("http://fasb.org/us-gaap/2024#{local_name}"), + qname: format!("us-gaap:{local_name}"), + namespace_uri: "http://fasb.org/us-gaap/2024".to_string(), + local_name: local_name.to_string(), + data_type: None, + statement_kind: Some("balance".to_string()), + role_uri: Some("balance".to_string()), + authoritative_concept_key: None, + mapping_method: None, + surface_key: None, + detail_parent_surface_key: None, + kpi_key: None, + residual_flag: false, + context_id: "c1".to_string(), + unit: Some("iso4217:USD".to_string()), + decimals: None, + precision: None, + nil: false, + value_num: value, + period_start: period_start.map(|value| value.to_string()), + period_end: Some(period_end.to_string()), + period_instant: None, + dimensions: vec![], + is_dimensionless: true, + source_file: None, + } + } + + #[test] + fn emits_taxonomy_growth_kpis_for_bank_pack() { + let periods = vec![ + period("prev", "2024-12-31", None), + period("curr", "2025-12-31", None), + ]; + let facts = vec![ + fact("FinancingReceivableRecordedInvestment", None, "2024-12-31", 100.0), + fact("FinancingReceivableRecordedInvestment", None, "2025-12-31", 120.0), + fact("DepositsLiabilities", None, "2024-12-31", 200.0), + fact("DepositsLiabilities", None, "2025-12-31", 250.0), + ]; + + let result = build_taxonomy_kpis(&periods, &facts, FiscalPack::BankLender) + .expect("taxonomy kpis should build"); + assert!(result.rows.iter().all(|row| row.provenance_type == "taxonomy")); + assert!(result.rows.iter().any(|row| row.key == "loan_growth")); + assert!(result.rows.iter().any(|row| row.key == "deposit_growth")); + } + + #[test] + fn emits_net_interest_margin_when_duration_and_instant_periods_share_end_date() { + let periods = vec![ + period("dur-prev", "2024-12-31", Some("2024-01-01")), + period("inst-prev", "2024-12-31", None), + period("dur-curr", "2025-12-31", Some("2025-01-01")), + period("inst-curr", "2025-12-31", None), + ]; + let facts = vec![ + fact("InterestIncomeExpenseNet", Some("2024-01-01"), "2024-12-31", 90.0), + fact("InterestIncomeExpenseNet", Some("2025-01-01"), "2025-12-31", 100.0), + fact("Assets", None, "2024-12-31", 1000.0), + fact("Assets", None, "2025-12-31", 1200.0), + ]; + + let result = build_taxonomy_kpis(&periods, &facts, FiscalPack::BankLender) + .expect("taxonomy kpis should build"); + let net_interest_margin = result + .rows + .iter() + .find(|row| row.key == "net_interest_margin") + .expect("net interest margin should be present"); + + assert_eq!(net_interest_margin.values.get("dur-prev").copied().flatten(), Some(0.09)); + assert_eq!(net_interest_margin.values.get("dur-curr").copied().flatten(), Some(100.0 / 1200.0)); + } +} diff --git a/rust/fiscal-xbrl-core/src/lib.rs b/rust/fiscal-xbrl-core/src/lib.rs new file mode 100644 index 0000000..615f31c --- /dev/null +++ b/rust/fiscal-xbrl-core/src/lib.rs @@ -0,0 +1,2069 @@ +use anyhow::{anyhow, Context, Result}; +use once_cell::sync::Lazy; +use regex::Regex; +use reqwest::blocking::Client; +use serde::{Deserialize, Serialize}; +use std::collections::{BTreeMap, HashMap, HashSet}; + +mod kpi_mapper; +mod metrics; +mod pack_selector; +mod surface_mapper; +mod taxonomy_loader; +mod universal_income; + +#[cfg(feature = "with-crabrl")] +use crabrl as _; + +pub const PARSER_ENGINE: &str = "fiscal-xbrl"; +pub const PARSER_VERSION: &str = env!("CARGO_PKG_VERSION"); + +static CONTEXT_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?context\b[^>]*\bid=["']([^"']+)["'][^>]*>(.*?)"#).unwrap() +}); +static UNIT_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?unit\b[^>]*\bid=["']([^"']+)["'][^>]*>(.*?)"#).unwrap() +}); +static FACT_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<([a-zA-Z0-9_\-]+):([a-zA-Z0-9_\-.]+)\b([^>]*\bcontextRef=["'][^"']+["'][^>]*)>(.*?)"#).unwrap() +}); +static EXPLICIT_MEMBER_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?explicitMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>(.*?)"#).unwrap() +}); +static TYPED_MEMBER_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?typedMember\b[^>]*\bdimension=["']([^"']+)["'][^>]*>(.*?)"#).unwrap() +}); +static IDENTIFIER_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?identifier\b[^>]*\bscheme=["']([^"']+)["'][^>]*>(.*?)"#).unwrap() +}); +static SEGMENT_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?segment\b[^>]*>(.*?)"#).unwrap() +}); +static SCENARIO_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?scenario\b[^>]*>(.*?)"#).unwrap() +}); +static START_DATE_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?startDate>(.*?)"#).unwrap() +}); +static END_DATE_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?endDate>(.*?)"#).unwrap() +}); +static INSTANT_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?instant>(.*?)"#).unwrap() +}); +static MEASURE_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?measure>(.*?)"#).unwrap() +}); +static LABEL_LINK_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?labelLink\b[^>]*>(.*?)"#).unwrap() +}); +static PRESENTATION_LINK_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?presentationLink\b([^>]*)>(.*?)"#).unwrap() +}); +static LOC_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?loc\b([^>]*)/?>(?:)?"#).unwrap() +}); +static LABEL_RESOURCE_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?label\b([^>]*)>(.*?)"#).unwrap() +}); +static LABEL_ARC_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?labelArc\b([^>]*)/?>(?:)?"#).unwrap() +}); +static PRESENTATION_ARC_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(?is)<(?:[a-z0-9_\-]+:)?presentationArc\b([^>]*)/?>(?:)?"#).unwrap() +}); +static ATTR_RE: Lazy = Lazy::new(|| Regex::new(r#"([a-zA-Z0-9:_\-]+)=["']([^"']+)["']"#).unwrap()); + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct HydrateFilingRequest { + pub filing_id: i64, + pub ticker: String, + pub cik: String, + pub accession_number: String, + pub filing_date: String, + pub filing_type: String, + pub filing_url: Option, + pub primary_document: Option, + pub cache_dir: Option, +} + +#[derive(Debug, Serialize)] +pub struct HydrateFilingResponse { + pub filing_id: i64, + pub ticker: String, + pub filing_date: String, + pub filing_type: String, + pub parse_status: String, + pub parse_error: Option, + pub source: String, + pub parser_engine: String, + pub parser_version: String, + pub taxonomy_regime: String, + pub fiscal_pack: Option, + pub periods: Vec, + pub faithful_rows: StatementRowMap, + pub statement_rows: StatementRowMap, + pub surface_rows: SurfaceRowMap, + pub detail_rows: DetailRowStatementMap, + pub kpi_rows: Vec, + pub contexts: Vec, + pub derived_metrics: FilingMetrics, + pub validation_result: ValidationResultOutput, + pub facts_count: usize, + pub concepts_count: usize, + pub dimensions_count: usize, + pub assets: Vec, + pub concepts: Vec, + pub facts: Vec, + pub metric_validations: Vec, + pub normalization_summary: NormalizationSummaryOutput, +} + +#[derive(Debug, Clone, Serialize, Default)] +pub struct FilingMetrics { + pub revenue: Option, + #[serde(rename = "netIncome")] + pub net_income: Option, + #[serde(rename = "totalAssets")] + pub total_assets: Option, + pub cash: Option, + pub debt: Option, +} + +#[derive(Debug, Clone, Serialize)] +pub struct ValidationResultOutput { + pub status: String, + pub checks: Vec, + #[serde(rename = "validatedAt")] + pub validated_at: Option, +} + +#[derive(Debug, Clone, Serialize)] +pub struct AssetOutput { + pub asset_type: String, + pub name: String, + pub url: String, + pub size_bytes: Option, + pub score: Option, + pub is_selected: bool, +} + +#[derive(Debug, Clone, Serialize)] +pub struct PeriodOutput { + pub id: String, + pub filing_id: i64, + pub accession_number: String, + pub filing_date: String, + pub period_start: Option, + pub period_end: Option, + pub filing_type: String, + pub period_label: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct ContextOutput { + pub context_id: String, + pub entity_identifier: Option, + pub entity_scheme: Option, + pub period_start: Option, + pub period_end: Option, + pub period_instant: Option, + pub segment_json: Option, + pub scenario_json: Option, +} + +#[derive(Debug, Clone, Serialize)] +pub struct StatementRowOutput { + pub key: String, + pub label: String, + pub concept_key: String, + pub qname: String, + pub namespace_uri: String, + pub local_name: String, + pub is_extension: bool, + pub statement: String, + pub role_uri: Option, + pub order: i64, + pub depth: i64, + pub parent_key: Option, + pub values: BTreeMap>, + pub units: BTreeMap>, + pub has_dimensions: bool, + pub source_fact_ids: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct SurfaceRowOutput { + pub key: String, + pub label: String, + pub category: String, + pub template_section: String, + pub order: i64, + pub unit: String, + pub values: BTreeMap>, + pub source_concepts: Vec, + pub source_row_keys: Vec, + pub source_fact_ids: Vec, + pub formula_key: Option, + pub has_dimensions: bool, + pub resolved_source_row_keys: BTreeMap>, + pub statement: Option, + pub detail_count: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub resolution_method: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub confidence: Option, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub warning_codes: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct DetailRowOutput { + pub key: String, + pub parent_surface_key: String, + pub label: String, + pub concept_key: String, + pub qname: String, + pub namespace_uri: String, + pub local_name: String, + pub unit: Option, + pub values: BTreeMap>, + pub source_fact_ids: Vec, + pub is_extension: bool, + pub dimensions_summary: Vec, + pub residual_flag: bool, +} + +#[derive(Debug, Clone, Serialize)] +pub struct KpiRowOutput { + pub key: String, + pub label: String, + pub category: String, + pub unit: String, + pub order: i64, + pub segment: Option, + pub axis: Option, + pub member: Option, + pub values: BTreeMap>, + pub source_concepts: Vec, + pub source_fact_ids: Vec, + pub provenance_type: String, + pub has_dimensions: bool, +} + +#[derive(Debug, Clone, Serialize)] +pub struct ConceptOutput { + pub concept_key: String, + pub qname: String, + pub namespace_uri: String, + pub local_name: String, + pub label: Option, + pub is_extension: bool, + pub balance: Option, + pub period_type: Option, + pub data_type: Option, + pub statement_kind: Option, + pub role_uri: Option, + pub authoritative_concept_key: Option, + pub mapping_method: Option, + pub surface_key: Option, + pub detail_parent_surface_key: Option, + pub kpi_key: Option, + pub residual_flag: bool, + pub presentation_order: Option, + pub presentation_depth: Option, + pub parent_concept_key: Option, + pub is_abstract: bool, +} + +#[derive(Debug, Clone, Serialize)] +pub struct FactOutput { + pub concept_key: String, + pub qname: String, + pub namespace_uri: String, + pub local_name: String, + pub data_type: Option, + pub statement_kind: Option, + pub role_uri: Option, + pub authoritative_concept_key: Option, + pub mapping_method: Option, + pub surface_key: Option, + pub detail_parent_surface_key: Option, + pub kpi_key: Option, + pub residual_flag: bool, + pub context_id: String, + pub unit: Option, + pub decimals: Option, + pub precision: Option, + pub nil: bool, + pub value_num: f64, + pub period_start: Option, + pub period_end: Option, + pub period_instant: Option, + pub dimensions: Vec, + pub is_dimensionless: bool, + pub source_file: Option, +} + +#[derive(Debug, Clone, Serialize)] +pub struct DimensionOutput { + pub axis: String, + pub member: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct MetricValidationOutput { + pub metric_key: String, + pub taxonomy_value: Option, + pub llm_value: Option, + pub absolute_diff: Option, + pub relative_diff: Option, + pub status: String, + pub evidence_pages: Vec, + pub pdf_url: Option, + pub provider: Option, + pub model: Option, + pub error: Option, +} + +#[derive(Debug, Clone, Serialize, Default)] +pub struct NormalizationSummaryOutput { + pub surface_row_count: usize, + pub detail_row_count: usize, + pub kpi_row_count: usize, + pub unmapped_row_count: usize, + pub material_unmapped_row_count: usize, + pub warnings: Vec, +} + +pub type StatementRowMap = BTreeMap>; +pub type SurfaceRowMap = BTreeMap>; +pub type DetailRowStatementMap = BTreeMap>>; + +#[derive(Debug, Clone)] +struct ParsedContext { + id: String, + entity_identifier: Option, + entity_scheme: Option, + period_start: Option, + period_end: Option, + period_instant: Option, + dimensions: Vec, + segment: Option, + scenario: Option, +} + +#[derive(Debug, Clone)] +struct ParsedUnit { + measure: Option, +} + +#[derive(Debug, Clone)] +struct ParsedFact { + concept_key: String, + qname: String, + namespace_uri: String, + local_name: String, + data_type: Option, + context_id: String, + unit: Option, + decimals: Option, + precision: Option, + nil: bool, + value: f64, + period_start: Option, + period_end: Option, + period_instant: Option, + dimensions: Vec, + is_dimensionless: bool, + source_file: Option, +} + +#[derive(Debug, Clone)] +struct PresentationNode { + concept_key: String, + role_uri: String, + order: f64, + depth: i64, + parent_concept_key: Option, + is_abstract: bool, +} + +pub fn hydrate_filing(input: HydrateFilingRequest) -> Result { + let client = Client::builder() + .user_agent("Fiscal Clone ") + .build() + .context("unable to build HTTP client")?; + + let discovered = discover_filing_assets(&input, &client)?; + let empty_rows = empty_statement_row_map(); + let empty_surface_rows = empty_surface_row_map(); + let empty_detail_rows = empty_detail_row_map(); + let validation_result = ValidationResultOutput { + status: "not_run".to_string(), + checks: vec![], + validated_at: None, + }; + + let Some(instance_asset) = discovered + .assets + .iter() + .find(|asset| asset.asset_type == "instance" && asset.is_selected) + .cloned() + else { + return Ok(HydrateFilingResponse { + filing_id: input.filing_id, + ticker: input.ticker.to_uppercase(), + filing_date: input.filing_date, + filing_type: input.filing_type, + parse_status: "failed".to_string(), + parse_error: Some("No XBRL instance found".to_string()), + source: "legacy_html_fallback".to_string(), + parser_engine: PARSER_ENGINE.to_string(), + parser_version: PARSER_VERSION.to_string(), + taxonomy_regime: "unknown".to_string(), + fiscal_pack: Some("core".to_string()), + periods: vec![], + faithful_rows: empty_rows.clone(), + statement_rows: empty_rows, + surface_rows: empty_surface_rows, + detail_rows: empty_detail_rows, + kpi_rows: vec![], + contexts: vec![], + derived_metrics: FilingMetrics::default(), + validation_result, + facts_count: 0, + concepts_count: 0, + dimensions_count: 0, + assets: discovered.assets, + concepts: vec![], + facts: vec![], + metric_validations: vec![], + normalization_summary: NormalizationSummaryOutput { + surface_row_count: 0, + detail_row_count: 0, + kpi_row_count: 0, + unmapped_row_count: 0, + material_unmapped_row_count: 0, + warnings: vec![], + }, + }); + }; + + let instance_text = fetch_text(&client, &instance_asset.url).context("fetch request failed for XBRL instance")?; + let parsed_instance = parse_xbrl_instance(&instance_text, Some(instance_asset.name.clone())); + + let mut label_by_concept = HashMap::new(); + let mut presentation = Vec::new(); + let mut source = "xbrl_instance".to_string(); + let mut parse_error = None; + + for asset in discovered + .assets + .iter() + .filter(|asset| asset.is_selected && (asset.asset_type == "presentation" || asset.asset_type == "label")) + { + match fetch_text(&client, &asset.url) { + Ok(content) => { + if asset.asset_type == "presentation" { + let parsed = parse_presentation_linkbase(&content); + if !parsed.is_empty() { + source = "xbrl_instance_with_linkbase".to_string(); + } + presentation.extend(parsed); + } else { + for (key, value) in parse_label_linkbase(&content) { + label_by_concept.entry(key).or_insert(value); + } + } + } + Err(error) => { + if parse_error.is_none() { + parse_error = Some(error.to_string()); + } + } + } + } + + let materialized = materialize_taxonomy_statements( + input.filing_id, + &input.accession_number, + &input.filing_date, + &input.filing_type, + &parsed_instance.facts, + &presentation, + &label_by_concept, + ); + let taxonomy_regime = infer_taxonomy_regime(&parsed_instance.facts); + let mut concepts = materialized.concepts; + let mut facts = materialized.facts; + let pack_selection = pack_selector::select_fiscal_pack(&materialized.statement_rows, &facts); + let fiscal_pack = pack_selection.pack.as_str().to_string(); + let mut compact_model = surface_mapper::build_compact_surface_model( + &materialized.periods, + &materialized.statement_rows, + &taxonomy_regime, + pack_selection.pack, + pack_selection.warnings, + )?; + universal_income::apply_universal_income_rows( + &materialized.periods, + &materialized.statement_rows, + &facts, + &taxonomy_regime, + pack_selection.pack, + &mut compact_model, + )?; + let kpi_result = kpi_mapper::build_taxonomy_kpis(&materialized.periods, &facts, pack_selection.pack)?; + compact_model.normalization_summary.kpi_row_count = kpi_result.rows.len(); + for warning in kpi_result.warnings { + if !compact_model.normalization_summary.warnings.contains(&warning) { + compact_model.normalization_summary.warnings.push(warning); + } + } + surface_mapper::merge_mapping_assignments( + &mut compact_model.concept_mappings, + kpi_result.mapping_assignments, + ); + surface_mapper::apply_mapping_assignments(&mut concepts, &mut facts, &compact_model.concept_mappings); + + let has_rows = materialized + .statement_rows + .values() + .map(|rows| rows.len()) + .sum::() + > 0; + let has_facts = !facts.is_empty(); + let parse_status = if has_rows && has_facts { + "ready" + } else if has_facts { + "partial" + } else { + "failed" + }; + + Ok(HydrateFilingResponse { + filing_id: input.filing_id, + ticker: input.ticker.to_uppercase(), + filing_date: input.filing_date, + filing_type: input.filing_type, + parse_status: parse_status.to_string(), + parse_error: if parse_status == "failed" { + Some(parse_error.unwrap_or_else(|| "No XBRL facts extracted".to_string())) + } else { + parse_error + }, + source, + parser_engine: PARSER_ENGINE.to_string(), + parser_version: PARSER_VERSION.to_string(), + taxonomy_regime, + fiscal_pack: Some(fiscal_pack), + periods: materialized.periods, + faithful_rows: materialized.statement_rows.clone(), + statement_rows: materialized.statement_rows, + surface_rows: compact_model.surface_rows, + detail_rows: compact_model.detail_rows, + kpi_rows: kpi_result.rows, + contexts: parsed_instance.contexts, + derived_metrics: metrics::derive_metrics(&facts), + validation_result, + facts_count: facts.len(), + concepts_count: concepts.len(), + dimensions_count: facts + .iter() + .flat_map(|fact| fact.dimensions.iter().map(|dimension| format!("{}::{}", dimension.axis, dimension.member))) + .collect::>() + .len(), + assets: discovered.assets, + concepts, + facts, + metric_validations: vec![], + normalization_summary: compact_model.normalization_summary, + }) +} + +fn infer_taxonomy_regime(facts: &[ParsedFact]) -> String { + if facts + .iter() + .any(|fact| fact.namespace_uri.to_lowercase().contains("us-gaap")) + { + return "us-gaap".to_string(); + } + + if facts + .iter() + .any(|fact| fact.namespace_uri.to_lowercase().contains("ifrs")) + { + return "ifrs-full".to_string(); + } + + "unknown".to_string() +} + +#[derive(Debug, Deserialize)] +struct FilingDirectoryPayload { + directory: Option, +} + +#[derive(Debug, Deserialize)] +struct FilingDirectory { + item: Option>, +} + +#[derive(Debug, Deserialize)] +struct FilingDirectoryItem { + name: Option, + size: Option, +} + +#[derive(Debug)] +struct DiscoveredAssets { + assets: Vec, +} + +fn discover_filing_assets(input: &HydrateFilingRequest, client: &Client) -> Result { + let Some(directory_url) = resolve_filing_directory_url( + input.filing_url.as_deref(), + &input.cik, + &input.accession_number, + ) else { + return Ok(DiscoveredAssets { assets: vec![] }); + }; + + let payload = fetch_json::(client, &format!("{directory_url}index.json")).ok(); + let mut discovered = Vec::new(); + + if let Some(items) = payload.and_then(|payload| payload.directory.and_then(|directory| directory.item)) { + for item in items { + let Some(name) = item.name.map(|name| name.trim().to_string()).filter(|name| !name.is_empty()) else { + continue; + }; + + let asset_type = classify_asset_type(&name); + let size_bytes = parse_size(item.size.as_ref()); + discovered.push(AssetOutput { + asset_type: asset_type.to_string(), + name: name.clone(), + url: format!("{directory_url}{}", name.trim_start_matches('/')), + size_bytes, + score: None, + is_selected: false, + }); + } + } + + if discovered.is_empty() { + if let Some(filing_url) = &input.filing_url { + discovered.push(AssetOutput { + asset_type: if filing_url.to_lowercase().ends_with(".xml") { + "instance".to_string() + } else { + "other".to_string() + }, + name: input + .primary_document + .clone() + .or_else(|| filing_url.split('/').last().map(|part| part.to_string())) + .unwrap_or_else(|| "primary_document".to_string()), + url: filing_url.clone(), + size_bytes: None, + score: None, + is_selected: true, + }); + } + } + + let selected_instance_url = discovered + .iter() + .filter(|asset| asset.asset_type == "instance") + .map(|asset| { + ( + asset.url.clone(), + score_instance(&asset.name, input.primary_document.as_deref()), + ) + }) + .max_by(|left, right| left.1.partial_cmp(&right.1).unwrap_or(std::cmp::Ordering::Equal)) + .map(|entry| entry.0); + + for asset in &mut discovered { + asset.score = if asset.asset_type == "instance" { + Some(score_instance(&asset.name, input.primary_document.as_deref())) + } else if asset.asset_type == "pdf" { + Some(score_pdf(&asset.name, asset.size_bytes)) + } else { + None + }; + + asset.is_selected = match asset.asset_type.as_str() { + "instance" => selected_instance_url + .as_ref() + .map(|url| url == &asset.url) + .unwrap_or(false), + "presentation" | "label" => true, + _ => false, + }; + } + + Ok(DiscoveredAssets { assets: discovered }) +} + +fn resolve_filing_directory_url(filing_url: Option<&str>, cik: &str, accession_number: &str) -> Option { + if let Some(filing_url) = filing_url.map(str::trim).filter(|value| !value.is_empty()) { + if let Some(last_slash) = filing_url.rfind('/') { + if last_slash > "https://".len() { + return Some(filing_url[..=last_slash].to_string()); + } + } + } + + let cik_path = normalize_cik_for_path(cik)?; + let accession_path = accession_number.replace('-', ""); + Some(format!( + "https://www.sec.gov/Archives/edgar/data/{cik_path}/{accession_path}/" + )) +} + +fn normalize_cik_for_path(value: &str) -> Option { + let digits = value.chars().filter(|char| char.is_ascii_digit()).collect::(); + if digits.is_empty() { + return None; + } + digits.parse::().ok().map(|parsed| parsed.to_string()) +} + +fn classify_asset_type(name: &str) -> &'static str { + let lower = name.to_lowercase(); + if lower.ends_with(".pdf") { + return "pdf"; + } + if lower.ends_with(".xsd") { + return "schema"; + } + if lower.ends_with(".xml") { + if lower.ends_with("_pre.xml") || lower.ends_with("-pre.xml") || lower.contains("presentation") { + return "presentation"; + } + if lower.ends_with("_lab.xml") || lower.ends_with("-lab.xml") || lower.contains("label") { + return "label"; + } + if lower.ends_with("_cal.xml") || lower.ends_with("-cal.xml") || lower.contains("calculation") { + return "calculation"; + } + if lower.ends_with("_def.xml") || lower.ends_with("-def.xml") || lower.contains("definition") { + return "definition"; + } + return "instance"; + } + "other" +} + +fn score_instance(name: &str, primary_document: Option<&str>) -> f64 { + let lower = name.to_lowercase(); + let mut score = 1.0; + if lower.ends_with("_htm.xml") { + score += 4.0; + } + if lower.ends_with("_ins.xml") { + score += 4.0; + } + if let Some(base_primary) = primary_document + .map(|value| value.replace(|char: char| char == '.' || char == '-', "_")) + .map(|value| value.to_lowercase()) + { + let base = base_primary + .rsplit_once('_') + .map(|(head, _)| head.to_string()) + .unwrap_or(base_primary); + if !base.is_empty() && lower.contains(&base) { + score += 5.0; + } + } + if lower.contains("cal") || lower.contains("def") || lower.contains("lab") || lower.contains("pre") { + score -= 3.0; + } + score +} + +fn score_pdf(name: &str, size_bytes: Option) -> f64 { + let lower = name.to_lowercase(); + let mut score = 0.0; + if ["financial", "statement", "annual", "quarter", "10k", "10q"] + .iter() + .any(|needle| lower.contains(needle)) + { + score += 8.0; + } + if lower.contains("exhibit") { + score -= 2.0; + } + if size_bytes.unwrap_or_default() > 100_000 { + score += 1.0; + } + score +} + +fn parse_size(value: Option<&serde_json::Value>) -> Option { + match value { + Some(serde_json::Value::Number(number)) => number.as_i64(), + Some(serde_json::Value::String(raw)) => raw.parse::().ok(), + _ => None, + } +} + +fn fetch_text(client: &Client, url: &str) -> Result { + let response = client + .get(url) + .send() + .with_context(|| format!("request failed for {url}"))?; + if !response.status().is_success() { + return Err(anyhow!("request failed for {url} ({})", response.status())); + } + response.text().with_context(|| format!("unable to read response body for {url}")) +} + +fn fetch_json Deserialize<'de>>(client: &Client, url: &str) -> Result { + let response = client + .get(url) + .send() + .with_context(|| format!("request failed for {url}"))?; + if !response.status().is_success() { + return Err(anyhow!("request failed for {url} ({})", response.status())); + } + response + .json::() + .with_context(|| format!("unable to parse JSON response for {url}")) +} + +struct ParsedInstance { + contexts: Vec, + facts: Vec, +} + +fn parse_xbrl_instance(raw: &str, source_file: Option) -> ParsedInstance { + let namespaces = parse_namespace_map(raw, "xbrl"); + let context_by_id = parse_contexts(raw); + let unit_by_id = parse_units(raw); + let mut facts = Vec::new(); + + for captures in FACT_RE.captures_iter(raw) { + let prefix = captures.get(1).map(|value| value.as_str().trim()).unwrap_or_default(); + let local_name = captures.get(2).map(|value| value.as_str().trim()).unwrap_or_default(); + let attrs = captures.get(3).map(|value| value.as_str()).unwrap_or_default(); + let body = decode_xml_entities(captures.get(4).map(|value| value.as_str()).unwrap_or_default().trim()); + + if prefix.is_empty() || local_name.is_empty() || is_xbrl_infrastructure_prefix(prefix) { + continue; + } + + let attr_map = parse_attrs(attrs); + let Some(context_id) = attr_map.get("contextRef").cloned().or_else(|| attr_map.get("contextref").cloned()) else { + continue; + }; + + let Some(value) = parse_number(&body) else { + continue; + }; + + let namespace_uri = namespaces + .get(prefix) + .cloned() + .unwrap_or_else(|| format!("urn:unknown:{prefix}")); + let context = context_by_id.get(&context_id); + let unit_ref = attr_map.get("unitRef").cloned().or_else(|| attr_map.get("unitref").cloned()); + let unit = unit_ref + .as_ref() + .and_then(|unit_ref| unit_by_id.get(unit_ref)) + .and_then(|unit| unit.measure.clone()) + .or(unit_ref); + + facts.push(ParsedFact { + concept_key: format!("{namespace_uri}#{local_name}"), + qname: format!("{prefix}:{local_name}"), + namespace_uri, + local_name: local_name.to_string(), + data_type: None, + context_id: context_id.clone(), + unit, + decimals: attr_map.get("decimals").cloned(), + precision: attr_map.get("precision").cloned(), + nil: attr_map + .get("xsi:nil") + .or_else(|| attr_map.get("nil")) + .map(|value| value.eq_ignore_ascii_case("true")) + .unwrap_or(false), + value, + period_start: context.and_then(|value| value.period_start.clone()), + period_end: context.and_then(|value| value.period_end.clone()), + period_instant: context.and_then(|value| value.period_instant.clone()), + dimensions: context.map(|value| value.dimensions.clone()).unwrap_or_default(), + is_dimensionless: context.map(|value| value.dimensions.is_empty()).unwrap_or(true), + source_file: source_file.clone(), + }); + } + + let contexts = context_by_id + .values() + .map(|context| ContextOutput { + context_id: context.id.clone(), + entity_identifier: context.entity_identifier.clone(), + entity_scheme: context.entity_scheme.clone(), + period_start: context.period_start.clone(), + period_end: context.period_end.clone(), + period_instant: context.period_instant.clone(), + segment_json: context.segment.clone(), + scenario_json: context.scenario.clone(), + }) + .collect::>(); + + ParsedInstance { + contexts, + facts, + } +} + +fn parse_namespace_map(raw: &str, root_tag_hint: &str) -> HashMap { + let mut map = HashMap::new(); + let root_start = Regex::new(&format!(r#"(?is)<[^>]*{root_tag_hint}[^>]*>"#)) + .unwrap() + .find(raw) + .map(|match_| match_.as_str().to_string()) + .unwrap_or_else(|| raw.chars().take(1200).collect::()); + + for captures in Regex::new(r#"xmlns:([a-zA-Z0-9_\-]+)=["']([^"']+)["']"#) + .unwrap() + .captures_iter(&root_start) + { + if let (Some(prefix), Some(uri)) = (captures.get(1), captures.get(2)) { + map.insert(prefix.as_str().trim().to_string(), uri.as_str().trim().to_string()); + } + } + + map +} + +fn parse_contexts(raw: &str) -> HashMap { + let mut contexts = HashMap::new(); + + for captures in CONTEXT_RE.captures_iter(raw) { + let Some(context_id) = captures.get(1).map(|value| value.as_str().trim().to_string()) else { + continue; + }; + let block = captures.get(2).map(|value| value.as_str()).unwrap_or_default(); + let (entity_identifier, entity_scheme) = IDENTIFIER_RE + .captures(block) + .map(|captures| { + ( + captures.get(2).map(|value| decode_xml_entities(value.as_str().trim())), + captures.get(1).map(|value| decode_xml_entities(value.as_str().trim())), + ) + }) + .unwrap_or((None, None)); + + let period_start = START_DATE_RE + .captures(block) + .and_then(|captures| captures.get(1)) + .map(|value| decode_xml_entities(value.as_str().trim())); + let period_end = END_DATE_RE + .captures(block) + .and_then(|captures| captures.get(1)) + .map(|value| decode_xml_entities(value.as_str().trim())); + let period_instant = INSTANT_RE + .captures(block) + .and_then(|captures| captures.get(1)) + .map(|value| decode_xml_entities(value.as_str().trim())); + + let segment = SEGMENT_RE + .captures(block) + .and_then(|captures| captures.get(1)) + .map(|value| parse_dimension_container(value.as_str())); + let scenario = SCENARIO_RE + .captures(block) + .and_then(|captures| captures.get(1)) + .map(|value| parse_dimension_container(value.as_str())); + + let mut dimensions = Vec::new(); + if let Some(segment_value) = segment.as_ref() { + if let Some(members) = segment_value.get("explicitMembers").and_then(|value| value.as_array()) { + for member in members { + if let (Some(axis), Some(member_value)) = ( + member.get("axis").and_then(|value| value.as_str()), + member.get("member").and_then(|value| value.as_str()), + ) { + dimensions.push(DimensionOutput { + axis: axis.to_string(), + member: member_value.to_string(), + }); + } + } + } + } + if let Some(scenario_value) = scenario.as_ref() { + if let Some(members) = scenario_value.get("explicitMembers").and_then(|value| value.as_array()) { + for member in members { + if let (Some(axis), Some(member_value)) = ( + member.get("axis").and_then(|value| value.as_str()), + member.get("member").and_then(|value| value.as_str()), + ) { + dimensions.push(DimensionOutput { + axis: axis.to_string(), + member: member_value.to_string(), + }); + } + } + } + } + + contexts.insert( + context_id.clone(), + ParsedContext { + id: context_id, + entity_identifier, + entity_scheme, + period_start, + period_end, + period_instant, + dimensions, + segment, + scenario, + }, + ); + } + + contexts +} + +fn parse_dimension_container(raw: &str) -> serde_json::Value { + let explicit_members = EXPLICIT_MEMBER_RE + .captures_iter(raw) + .filter_map(|captures| { + Some(serde_json::json!({ + "axis": decode_xml_entities(captures.get(1)?.as_str().trim()), + "member": decode_xml_entities(captures.get(2)?.as_str().trim()) + })) + }) + .collect::>(); + let typed_members = TYPED_MEMBER_RE + .captures_iter(raw) + .filter_map(|captures| { + Some(serde_json::json!({ + "axis": decode_xml_entities(captures.get(1)?.as_str().trim()), + "value": decode_xml_entities(captures.get(2)?.as_str().trim()) + })) + }) + .collect::>(); + + serde_json::json!({ + "explicitMembers": explicit_members, + "typedMembers": typed_members + }) +} + +fn parse_units(raw: &str) -> HashMap { + let mut units = HashMap::new(); + for captures in UNIT_RE.captures_iter(raw) { + let Some(id) = captures.get(1).map(|value| value.as_str().trim().to_string()) else { + continue; + }; + let block = captures.get(2).map(|value| value.as_str()).unwrap_or_default(); + let measures = MEASURE_RE + .captures_iter(block) + .filter_map(|captures| captures.get(1)) + .map(|value| decode_xml_entities(value.as_str().trim())) + .filter(|value| !value.is_empty()) + .collect::>(); + + let measure = if measures.len() == 1 { + measures.first().cloned() + } else if measures.len() > 1 { + Some(measures.join("/")) + } else { + None + }; + + units.insert(id, ParsedUnit { measure }); + } + units +} + +fn is_xbrl_infrastructure_prefix(prefix: &str) -> bool { + matches!( + prefix.to_ascii_lowercase().as_str(), + "xbrli" | "xlink" | "link" | "xbrldi" | "xbrldt" + ) +} + +fn parse_attrs(raw: &str) -> HashMap { + let mut map = HashMap::new(); + for captures in ATTR_RE.captures_iter(raw) { + if let (Some(name), Some(value)) = (captures.get(1), captures.get(2)) { + map.insert(name.as_str().to_string(), decode_xml_entities(value.as_str())); + } + } + map +} + +fn decode_xml_entities(value: &str) -> String { + value + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace(""", "\"") + .replace("'", "'") + .replace(" ", " ") + .replace(" ", " ") +} + +fn parse_number(raw: &str) -> Option { + let trimmed = raw.trim(); + if trimmed.is_empty() || trimmed.chars().all(|char| char == '-') { + return None; + } + let negative = trimmed.starts_with('(') && trimmed.ends_with(')'); + let normalized = Regex::new(r#"<[^>]+>"#) + .unwrap() + .replace_all(trimmed, " ") + .replace(',', "") + .replace('$', "") + .replace(['(', ')'], "") + .replace('\u{2212}', "-") + .split_whitespace() + .collect::(); + let parsed = normalized.parse::().ok()?; + Some(if negative { -parsed.abs() } else { parsed }) +} + +fn parse_label_linkbase(raw: &str) -> HashMap { + let namespaces = parse_namespace_map(raw, "linkbase"); + let mut preferred = HashMap::::new(); + + for captures in LABEL_LINK_RE.captures_iter(raw) { + let block = captures.get(1).map(|value| value.as_str()).unwrap_or_default(); + let mut loc_by_label = HashMap::::new(); + let mut resource_by_label = HashMap::)>::new(); + + for captures in LOC_RE.captures_iter(block) { + let attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default()); + let Some(label) = attrs.get("xlink:label").cloned() else { + continue; + }; + let Some(href) = attrs.get("xlink:href").cloned() else { + continue; + }; + let Some(qname) = qname_from_href(&href) else { + continue; + }; + let Some((concept_key, _, _)) = concept_from_qname(&qname, &namespaces) else { + continue; + }; + loc_by_label.insert(label, concept_key); + } + + for captures in LABEL_RESOURCE_RE.captures_iter(block) { + let attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default()); + let Some(label) = attrs.get("xlink:label").cloned() else { + continue; + }; + let body = decode_xml_entities(captures.get(2).map(|value| value.as_str()).unwrap_or_default()) + .split_whitespace() + .collect::>() + .join(" "); + if body.is_empty() { + continue; + } + resource_by_label.insert(label, (body, attrs.get("xlink:role").cloned())); + } + + for captures in LABEL_ARC_RE.captures_iter(block) { + let attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default()); + let Some(from) = attrs.get("xlink:from").cloned() else { + continue; + }; + let Some(to) = attrs.get("xlink:to").cloned() else { + continue; + }; + let Some(concept_key) = loc_by_label.get(&from) else { + continue; + }; + let Some((label, role)) = resource_by_label.get(&to) else { + continue; + }; + let priority = label_priority(role.as_deref()); + let current = preferred.get(concept_key).cloned(); + if current.as_ref().map(|(_, current_priority)| priority > *current_priority).unwrap_or(true) { + preferred.insert(concept_key.clone(), (label.clone(), priority)); + } + } + } + + preferred + .into_iter() + .map(|(key, (value, _))| (key, value)) + .collect() +} + +fn parse_presentation_linkbase(raw: &str) -> Vec { + let namespaces = parse_namespace_map(raw, "linkbase"); + let mut rows = Vec::new(); + + for captures in PRESENTATION_LINK_RE.captures_iter(raw) { + let link_attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default()); + let Some(role_uri) = link_attrs.get("xlink:role").cloned() else { + continue; + }; + let block = captures.get(2).map(|value| value.as_str()).unwrap_or_default(); + let mut loc_by_label = HashMap::::new(); + let mut children_by_label = HashMap::>::new(); + let mut incoming = HashSet::::new(); + let mut all_referenced = HashSet::::new(); + + for captures in LOC_RE.captures_iter(block) { + let attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default()); + let Some(label) = attrs.get("xlink:label").cloned() else { + continue; + }; + let Some(href) = attrs.get("xlink:href").cloned() else { + continue; + }; + let Some(qname) = qname_from_href(&href) else { + continue; + }; + let Some((concept_key, qname, local_name)) = concept_from_qname(&qname, &namespaces) else { + continue; + }; + loc_by_label.insert(label, (concept_key, qname, local_name.to_ascii_lowercase().contains("abstract"))); + } + + for captures in PRESENTATION_ARC_RE.captures_iter(block) { + let attrs = parse_attrs(captures.get(1).map(|value| value.as_str()).unwrap_or_default()); + let Some(from) = attrs.get("xlink:from").cloned() else { + continue; + }; + let Some(to) = attrs.get("xlink:to").cloned() else { + continue; + }; + if !loc_by_label.contains_key(&from) || !loc_by_label.contains_key(&to) { + continue; + } + let order = attrs + .get("order") + .and_then(|value| value.parse::().ok()) + .unwrap_or_else(|| children_by_label.get(&from).map(|children| children.len() as f64 + 1.0).unwrap_or(1.0)); + children_by_label.entry(from.clone()).or_default().push((to.clone(), order)); + incoming.insert(to.clone()); + all_referenced.insert(from); + all_referenced.insert(to); + } + + let roots = all_referenced + .iter() + .filter(|label| !incoming.contains(*label)) + .cloned() + .collect::>(); + let mut visited = HashSet::::new(); + + fn dfs( + label: &str, + depth: i64, + parent_label: Option<&str>, + base_order: f64, + role_uri: &str, + loc_by_label: &HashMap, + children_by_label: &HashMap>, + rows: &mut Vec, + visited: &mut HashSet, + ) { + let Some((concept_key, _qname, is_abstract)) = loc_by_label.get(label) else { + return; + }; + let path_key = format!("{}::{label}::{depth}", parent_label.unwrap_or("root")); + if !visited.insert(path_key) { + return; + } + + let parent_concept_key = parent_label.and_then(|parent| loc_by_label.get(parent).map(|(concept_key, _, _)| concept_key.clone())); + rows.push(PresentationNode { + concept_key: concept_key.clone(), + role_uri: role_uri.to_string(), + order: base_order, + depth, + parent_concept_key, + is_abstract: *is_abstract, + }); + + let mut children = children_by_label.get(label).cloned().unwrap_or_default(); + children.sort_by(|left, right| left.1.partial_cmp(&right.1).unwrap_or(std::cmp::Ordering::Equal)); + for (index, (child_label, _)) in children.into_iter().enumerate() { + dfs( + &child_label, + depth + 1, + Some(label), + base_order + (index as f64 + 1.0) / 1000.0, + role_uri, + loc_by_label, + children_by_label, + rows, + visited, + ); + } + } + + for (index, root) in roots.iter().enumerate() { + dfs( + root, + 0, + None, + index as f64 + 1.0, + &role_uri, + &loc_by_label, + &children_by_label, + &mut rows, + &mut visited, + ); + } + } + + rows +} + +fn qname_from_href(href: &str) -> Option { + let fragment = href.split('#').nth(1).unwrap_or(href).trim(); + if fragment.is_empty() { + return None; + } + let cleaned = fragment.trim_start_matches("loc_"); + if cleaned.contains(':') { + return Some(cleaned.to_string()); + } + cleaned + .split_once('_') + .map(|(prefix, local)| format!("{prefix}:{local}")) +} + +fn concept_from_qname( + qname: &str, + namespaces: &HashMap, +) -> Option<(String, String, String)> { + let (prefix, local_name) = qname.split_once(':')?; + let namespace_uri = namespaces + .get(prefix) + .cloned() + .unwrap_or_else(|| format!("urn:unknown:{prefix}")); + Some(( + format!("{namespace_uri}#{local_name}"), + qname.to_string(), + local_name.to_string(), + )) +} + +fn label_priority(role: Option<&str>) -> i64 { + let normalized = role.unwrap_or_default().to_ascii_lowercase(); + if normalized.ends_with("/label") { + 4 + } else if normalized.ends_with("/terselabel") { + 3 + } else if normalized.ends_with("/verboselabel") { + 2 + } else if normalized.is_empty() { + 0 + } else { + 1 + } +} + +struct MaterializedStatements { + periods: Vec, + statement_rows: StatementRowMap, + concepts: Vec, + facts: Vec, +} + +fn materialize_taxonomy_statements( + filing_id: i64, + accession_number: &str, + filing_date: &str, + filing_type: &str, + facts: &[ParsedFact], + presentation: &[PresentationNode], + label_by_concept: &HashMap, +) -> MaterializedStatements { + let compact_accession = accession_number.replace('-', ""); + let mut period_by_signature = HashMap::::new(); + + for fact in facts { + let signature = period_signature(fact); + if period_by_signature.contains_key(&signature) { + continue; + } + let date = fact + .period_end + .clone() + .or_else(|| fact.period_instant.clone()) + .unwrap_or_else(|| filing_date.to_string()); + let id = format!("{date}-{compact_accession}-{}", period_by_signature.len() + 1); + let period_label = if fact.period_instant.is_some() && fact.period_start.is_none() { + "Instant".to_string() + } else if fact.period_start.is_some() && fact.period_end.is_some() { + format!( + "{} to {}", + fact.period_start.clone().unwrap_or_default(), + fact.period_end.clone().unwrap_or_default() + ) + } else { + "Filing Period".to_string() + }; + period_by_signature.insert( + signature, + PeriodOutput { + id, + filing_id, + accession_number: accession_number.to_string(), + filing_date: filing_date.to_string(), + period_start: fact.period_start.clone(), + period_end: fact.period_end.clone().or_else(|| fact.period_instant.clone()), + filing_type: filing_type.to_string(), + period_label, + }, + ); + } + + let mut periods = period_by_signature.values().cloned().collect::>(); + periods.sort_by(|left, right| { + let left_key = left.period_end.clone().unwrap_or_else(|| left.filing_date.clone()); + let right_key = right.period_end.clone().unwrap_or_else(|| right.filing_date.clone()); + left_key.cmp(&right_key).then_with(|| left.id.cmp(&right.id)) + }); + let period_id_by_signature = period_by_signature + .iter() + .map(|(signature, period)| (signature.clone(), period.id.clone())) + .collect::>(); + + let mut presentation_by_concept = HashMap::>::new(); + for node in presentation { + presentation_by_concept.entry(node.concept_key.clone()).or_default().push(node); + } + + let mut grouped_by_statement = empty_parsed_fact_map(); + let mut enriched_facts = Vec::new(); + + for (index, fact) in facts.iter().enumerate() { + let nodes = presentation_by_concept + .get(&fact.concept_key) + .cloned() + .unwrap_or_default(); + let best_node = nodes.first().copied(); + let statement_kind = best_node + .and_then(|node| classify_statement_role(&node.role_uri)) + .or_else(|| concept_statement_fallback(&fact.local_name)); + + let fact_output = FactOutput { + concept_key: fact.concept_key.clone(), + qname: fact.qname.clone(), + namespace_uri: fact.namespace_uri.clone(), + local_name: fact.local_name.clone(), + data_type: fact.data_type.clone(), + statement_kind: statement_kind.clone(), + role_uri: best_node.map(|node| node.role_uri.clone()), + authoritative_concept_key: None, + mapping_method: None, + surface_key: None, + detail_parent_surface_key: None, + kpi_key: None, + residual_flag: false, + context_id: fact.context_id.clone(), + unit: fact.unit.clone(), + decimals: fact.decimals.clone(), + precision: fact.precision.clone(), + nil: fact.nil, + value_num: fact.value, + period_start: fact.period_start.clone(), + period_end: fact.period_end.clone(), + period_instant: fact.period_instant.clone(), + dimensions: fact.dimensions.clone(), + is_dimensionless: fact.is_dimensionless, + source_file: fact.source_file.clone(), + }; + + if let Some(statement_kind) = statement_kind.clone() { + if let Some(statement_key) = statement_key_ref(&statement_kind) { + grouped_by_statement + .entry(statement_key) + .or_default() + .entry(fact.concept_key.clone()) + .or_default() + .push((index as i64 + 1, fact.clone(), best_node.cloned())); + } + } + + enriched_facts.push(fact_output); + } + + let mut statement_rows = empty_statement_row_map(); + let mut concepts = Vec::::new(); + + for statement_kind in statement_keys() { + let concept_groups = grouped_by_statement.remove(statement_kind).unwrap_or_default(); + let mut concept_keys = HashSet::::new(); + for node in presentation.iter().filter(|node| classify_statement_role(&node.role_uri).as_deref() == Some(statement_kind)) { + concept_keys.insert(node.concept_key.clone()); + } + for concept_key in concept_groups.keys() { + concept_keys.insert(concept_key.clone()); + } + + let mut ordered_concepts = concept_keys + .into_iter() + .map(|concept_key| { + let nodes = presentation + .iter() + .filter(|node| node.concept_key == concept_key && classify_statement_role(&node.role_uri).as_deref() == Some(statement_kind)) + .collect::>(); + let order = nodes.iter().map(|node| node.order).fold(f64::INFINITY, f64::min); + let depth = nodes.iter().map(|node| node.depth).min().unwrap_or(0); + let role_uri = nodes.first().map(|node| node.role_uri.clone()); + let parent_concept_key = nodes.first().and_then(|node| node.parent_concept_key.clone()); + (concept_key, order, depth, role_uri, parent_concept_key) + }) + .collect::>(); + ordered_concepts.sort_by(|left, right| { + left.1 + .partial_cmp(&right.1) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| left.0.cmp(&right.0)) + }); + + for (concept_key, presentation_order, depth, role_uri, parent_concept_key) in ordered_concepts { + let fact_group = concept_groups.get(&concept_key).cloned().unwrap_or_default(); + let (namespace_uri, local_name) = split_concept_key(&concept_key); + let qname = fact_group + .first() + .map(|(_, fact, _)| fact.qname.clone()) + .unwrap_or_else(|| format!("unknown:{local_name}")); + let label = label_by_concept + .get(&concept_key) + .cloned() + .unwrap_or_else(|| local_name_to_label(&local_name)); + let mut values = BTreeMap::>::new(); + let mut units = BTreeMap::>::new(); + let mut source_fact_ids = Vec::::new(); + let mut has_dimensions = false; + + let mut fact_groups = HashMap::>::new(); + for (fact_id, fact, _) in fact_group.iter() { + fact_groups + .entry(period_signature(fact)) + .or_default() + .push((*fact_id, fact.clone())); + } + + for (signature, grouped_facts) in fact_groups { + let Some(period_id) = period_id_by_signature.get(&signature) else { + continue; + }; + let preferred = pick_preferred_fact(&grouped_facts); + if let Some((fact_id, fact)) = preferred { + values.insert(period_id.clone(), Some(fact.value)); + units.insert(period_id.clone(), fact.unit.clone()); + source_fact_ids.push(*fact_id); + has_dimensions = has_dimensions || !fact.is_dimensionless; + } + } + + let row = StatementRowOutput { + key: concept_key.clone(), + label: label.clone(), + concept_key: concept_key.clone(), + qname: qname.clone(), + namespace_uri: namespace_uri.clone(), + local_name: local_name.clone(), + is_extension: !is_standard_namespace(&namespace_uri), + statement: statement_kind.to_string(), + role_uri: role_uri.clone(), + order: if presentation_order.is_finite() { + (presentation_order * 1000.0).round() as i64 + } else { + 1_000_000 + }, + depth, + parent_key: parent_concept_key.clone(), + values, + units, + has_dimensions, + source_fact_ids: { + source_fact_ids.sort(); + source_fact_ids + }, + }; + + if let Some(statement_rows) = statement_rows.get_mut(statement_kind) { + statement_rows.push(row.clone()); + } + + concepts.push(ConceptOutput { + concept_key, + qname, + namespace_uri, + local_name, + label: Some(label), + is_extension: !is_standard_namespace(&row.namespace_uri), + balance: None, + period_type: None, + data_type: None, + statement_kind: Some(statement_kind.to_string()), + role_uri, + authoritative_concept_key: None, + mapping_method: None, + surface_key: None, + detail_parent_surface_key: None, + kpi_key: None, + residual_flag: false, + presentation_order: if presentation_order.is_finite() { + Some(presentation_order) + } else { + None + }, + presentation_depth: Some(depth), + parent_concept_key, + is_abstract: presentation + .iter() + .find(|node| node.concept_key == row.concept_key) + .map(|node| node.is_abstract) + .unwrap_or(false), + }); + } + } + + MaterializedStatements { + periods, + statement_rows, + concepts, + facts: enriched_facts, + } +} + +fn empty_parsed_fact_map( +) -> HashMap<&'static str, HashMap)>>> { + let mut map = HashMap::new(); + for key in statement_keys() { + map.insert(key, HashMap::new()); + } + map +} + +fn empty_statement_row_map() -> StatementRowMap { + statement_keys() + .into_iter() + .map(|key| (key.to_string(), Vec::new())) + .collect() +} + +fn empty_surface_row_map() -> SurfaceRowMap { + statement_keys() + .into_iter() + .map(|key| (key.to_string(), Vec::new())) + .collect() +} + +fn empty_detail_row_map() -> DetailRowStatementMap { + statement_keys() + .into_iter() + .map(|key| (key.to_string(), BTreeMap::new())) + .collect() +} + +fn statement_keys() -> [&'static str; 5] { + ["income", "balance", "cash_flow", "equity", "comprehensive_income"] +} + +fn statement_key_ref(value: &str) -> Option<&'static str> { + match value { + "income" => Some("income"), + "balance" => Some("balance"), + "cash_flow" => Some("cash_flow"), + "equity" => Some("equity"), + "comprehensive_income" => Some("comprehensive_income"), + _ => None, + } +} + +fn pick_preferred_fact(grouped_facts: &[(i64, ParsedFact)]) -> Option<&(i64, ParsedFact)> { + grouped_facts.iter().max_by(|left, right| { + let left_dimension_score = if left.1.is_dimensionless { 1 } else { 0 }; + let right_dimension_score = if right.1.is_dimensionless { 1 } else { 0 }; + left_dimension_score + .cmp(&right_dimension_score) + .then_with(|| { + let left_date = left + .1 + .period_end + .as_ref() + .or(left.1.period_instant.as_ref()) + .cloned() + .unwrap_or_default(); + let right_date = right + .1 + .period_end + .as_ref() + .or(right.1.period_instant.as_ref()) + .cloned() + .unwrap_or_default(); + left_date.cmp(&right_date) + }) + .then_with(|| left.1.value.abs().partial_cmp(&right.1.value.abs()).unwrap_or(std::cmp::Ordering::Equal)) + }) +} + +fn period_signature(fact: &ParsedFact) -> String { + format!( + "start:{}|end:{}|instant:{}", + fact.period_start.clone().unwrap_or_default(), + fact.period_end.clone().unwrap_or_default(), + fact.period_instant.clone().unwrap_or_default() + ) +} + +fn split_concept_key(concept_key: &str) -> (String, String) { + concept_key + .rsplit_once('#') + .map(|(namespace_uri, local_name)| (namespace_uri.to_string(), local_name.to_string())) + .unwrap_or_else(|| ("urn:unknown".to_string(), concept_key.to_string())) +} + +fn local_name_to_label(local_name: &str) -> String { + let spaced = Regex::new(r#"([a-z0-9])([A-Z])"#) + .unwrap() + .replace_all(local_name, "$1 $2") + .to_string(); + Regex::new(r#"([A-Z]+)([A-Z][a-z])"#) + .unwrap() + .replace_all(&spaced, "$1 $2") + .replace('_', " ") + .trim() + .to_string() +} + +fn classify_statement_role(role_uri: &str) -> Option { + let normalized = role_uri.to_ascii_lowercase(); + if Regex::new(r#"cash\s*flow|statementsof?cashflows|netcash"#) + .unwrap() + .is_match(&normalized) + { + return Some("cash_flow".to_string()); + } + if Regex::new(r#"shareholders?|stockholders?|equity|retainedearnings"#) + .unwrap() + .is_match(&normalized) + { + return Some("equity".to_string()); + } + if Regex::new(r#"comprehensive\s*income"#) + .unwrap() + .is_match(&normalized) + { + return Some("comprehensive_income".to_string()); + } + if Regex::new(r#"balance\s*sheet|financial\s*position|assets?andliabilities"#) + .unwrap() + .is_match(&normalized) + { + return Some("balance".to_string()); + } + if Regex::new(r#"operations|income\s*statement|statementsofincome|profit"#) + .unwrap() + .is_match(&normalized) + { + return Some("income".to_string()); + } + None +} + +fn concept_statement_fallback(local_name: &str) -> Option { + let normalized = local_name.to_ascii_lowercase(); + if Regex::new(r#"cash|operatingactivities|investingactivities|financingactivities"#) + .unwrap() + .is_match(&normalized) + { + return Some("cash_flow".to_string()); + } + if Regex::new(r#"equity|retainedearnings|additionalpaidincapital"#) + .unwrap() + .is_match(&normalized) + { + return Some("equity".to_string()); + } + if normalized.contains("comprehensiveincome") { + return Some("comprehensive_income".to_string()); + } + if Regex::new( + r#"asset|liabilit|debt|financingreceivable|loansreceivable|deposits|allowanceforcreditloss|futurepolicybenefits|policyholderaccountbalances|unearnedpremiums|realestateinvestmentproperty|grossatcarryingvalue|investmentproperty"#, + ) + .unwrap() + .is_match(&normalized) + { + return Some("balance".to_string()); + } + if Regex::new( + r#"revenue|income|profit|expense|costof|leaseincome|rental|premiums|claims|underwriting|policyacquisition|interestincome|interestexpense|noninterest|leasedandrentedproperty"#, + ) + .unwrap() + .is_match(&normalized) + { + return Some("income".to_string()); + } + None +} + +fn is_standard_namespace(namespace_uri: &str) -> bool { + let lower = namespace_uri.to_ascii_lowercase(); + lower.contains("us-gaap") + || lower.contains("ifrs") + || lower.contains("/dei/") + || lower.contains("xbrl.sec.gov/dei") +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::pack_selector::FiscalPack; + + fn period(id: &str, period_end: &str) -> PeriodOutput { + PeriodOutput { + id: id.to_string(), + filing_id: 1, + accession_number: "0000000000-00-000001".to_string(), + filing_date: "2025-12-31".to_string(), + period_start: Some("2025-01-01".to_string()), + period_end: Some(period_end.to_string()), + filing_type: "10-K".to_string(), + period_label: period_end.to_string(), + } + } + + fn row( + key: &str, + qname: &str, + statement: &str, + order: i64, + values: &[(&str, f64)], + ) -> StatementRowOutput { + let namespace_uri = qname + .split_once(':') + .map(|(prefix, _)| { + if prefix == "us-gaap" { + "http://fasb.org/us-gaap/2024".to_string() + } else { + format!("urn:{prefix}") + } + }) + .unwrap_or_else(|| "urn:unknown".to_string()); + let local_name = qname + .split_once(':') + .map(|(_, local_name)| local_name.to_string()) + .unwrap_or_else(|| qname.to_string()); + + StatementRowOutput { + key: key.to_string(), + label: local_name_to_label(&local_name), + concept_key: format!("{namespace_uri}#{local_name}"), + qname: qname.to_string(), + namespace_uri, + local_name, + is_extension: false, + statement: statement.to_string(), + role_uri: Some(statement.to_string()), + order, + depth: 0, + parent_key: None, + values: values + .iter() + .map(|(period_id, value)| (period_id.to_string(), Some(*value))) + .collect(), + units: values + .iter() + .map(|(period_id, _)| (period_id.to_string(), Some("iso4217:USD".to_string()))) + .collect(), + has_dimensions: false, + source_fact_ids: vec![order], + } + } + + #[test] + fn builds_compact_surface_rows_from_core_pack() { + let periods = vec![period("2024", "2024-12-31"), period("2025", "2025-12-31")]; + let mut statement_rows = empty_statement_row_map(); + statement_rows.insert( + "income".to_string(), + vec![ + row( + "revenue-row", + "us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax", + "income", + 10, + &[("2024", 100.0), ("2025", 120.0)], + ), + row( + "operating-expenses-row", + "us-gaap:OperatingExpenses", + "income", + 20, + &[("2024", 40.0), ("2025", 50.0)], + ), + row( + "sga-row", + "us-gaap:SellingGeneralAndAdministrativeExpense", + "income", + 30, + &[("2024", 25.0), ("2025", 31.0)], + ), + row( + "rd-row", + "us-gaap:ResearchAndDevelopmentExpense", + "income", + 40, + &[("2024", 15.0), ("2025", 19.0)], + ), + row( + "net-income-row", + "us-gaap:NetIncomeLoss", + "income", + 50, + &[("2024", 22.0), ("2025", 30.0)], + ), + row( + "unmapped-row", + "company:OtherOperatingCharges", + "income", + 60, + &[("2024", 3.0), ("2025", 4.0)], + ), + ], + ); + statement_rows.insert( + "balance".to_string(), + vec![row( + "assets-row", + "us-gaap:Assets", + "balance", + 70, + &[("2024", 500.0), ("2025", 550.0)], + )], + ); + statement_rows.insert( + "cash_flow".to_string(), + vec![row( + "ocf-row", + "us-gaap:NetCashProvidedByUsedInOperatingActivities", + "cash_flow", + 80, + &[("2024", 60.0), ("2025", 65.0)], + )], + ); + + let model = surface_mapper::build_compact_surface_model( + &periods, + &statement_rows, + "us-gaap", + FiscalPack::Core, + vec![], + ) + .expect("core pack should load and map"); + let income_surface_rows = model.surface_rows.get("income").expect("income surface rows"); + let op_expenses = income_surface_rows + .iter() + .find(|row| row.key == "operating_expenses") + .expect("operating expenses surface row"); + let revenue = income_surface_rows + .iter() + .find(|row| row.key == "revenue") + .expect("revenue surface row"); + + assert_eq!(revenue.values.get("2025").copied().flatten(), Some(120.0)); + assert_eq!(op_expenses.values.get("2024").copied().flatten(), Some(40.0)); + assert_eq!(op_expenses.detail_count, Some(2)); + + let operating_expense_details = model + .detail_rows + .get("income") + .and_then(|groups| groups.get("operating_expenses")) + .expect("operating expenses details"); + assert_eq!(operating_expense_details.len(), 2); + assert!(operating_expense_details.iter().any(|row| row.key == "sga-row")); + assert!(operating_expense_details.iter().any(|row| row.key == "rd-row")); + + let residual_rows = model + .detail_rows + .get("income") + .and_then(|groups| groups.get("unmapped")) + .expect("unmapped detail rows"); + assert_eq!(residual_rows.len(), 1); + assert_eq!(residual_rows[0].key, "unmapped-row"); + assert!(residual_rows[0].residual_flag); + + let rd_mapping = model + .concept_mappings + .get("http://fasb.org/us-gaap/2024#ResearchAndDevelopmentExpense") + .expect("rd mapping"); + assert_eq!(rd_mapping.detail_parent_surface_key.as_deref(), Some("operating_expenses")); + assert_eq!(rd_mapping.surface_key.as_deref(), Some("operating_expenses")); + + let residual_mapping = model + .concept_mappings + .get("urn:company#OtherOperatingCharges") + .expect("residual mapping"); + assert!(residual_mapping.residual_flag); + assert_eq!(residual_mapping.detail_parent_surface_key.as_deref(), Some("unmapped")); + + assert_eq!(model.normalization_summary.surface_row_count, 5); + assert_eq!(model.normalization_summary.detail_row_count, 3); + assert_eq!(model.normalization_summary.unmapped_row_count, 1); + } + + #[test] + fn parses_basic_xbrl_facts_without_regex_backreferences() { + let raw = r#" + + + + 0000320193 + + + 2025-01-01 + 2025-12-31 + + + + iso4217:USD + + 1000 + + "#; + + let parsed = parse_xbrl_instance(raw, Some("test.xml".to_string())); + assert_eq!(parsed.facts.len(), 1); + assert_eq!( + parsed.facts[0].qname, + "us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax" + ); + assert_eq!(parsed.facts[0].value, 1000.0); + assert_eq!(parsed.facts[0].unit.as_deref(), Some("iso4217:USD")); + } + + #[test] + fn classifies_pack_specific_concepts_without_presentation_roles() { + assert_eq!( + concept_statement_fallback("FinancingReceivableExcludingAccruedInterestAfterAllowanceForCreditLoss") + .as_deref(), + Some("balance") + ); + assert_eq!(concept_statement_fallback("Deposits").as_deref(), Some("balance")); + assert_eq!( + concept_statement_fallback("RealEstateInvestmentPropertyNet").as_deref(), + Some("balance") + ); + assert_eq!(concept_statement_fallback("LeaseIncome").as_deref(), Some("income")); + assert_eq!( + concept_statement_fallback("DirectCostsOfLeasedAndRentedPropertyOrEquipment").as_deref(), + Some("income") + ); + } +} diff --git a/rust/fiscal-xbrl-core/src/metrics.rs b/rust/fiscal-xbrl-core/src/metrics.rs new file mode 100644 index 0000000..a884972 --- /dev/null +++ b/rust/fiscal-xbrl-core/src/metrics.rs @@ -0,0 +1,90 @@ +use crate::{FactOutput, FilingMetrics}; + +pub fn derive_metrics(facts: &[FactOutput]) -> FilingMetrics { + fn pick_best(facts: &[&FactOutput]) -> Option { + facts + .iter() + .max_by(|left, right| { + let left_dimension_score = if left.is_dimensionless { 1 } else { 0 }; + let right_dimension_score = if right.is_dimensionless { 1 } else { 0 }; + left_dimension_score + .cmp(&right_dimension_score) + .then_with(|| { + let left_date = left + .period_end + .as_ref() + .or(left.period_instant.as_ref()) + .cloned() + .unwrap_or_default(); + let right_date = right + .period_end + .as_ref() + .or(right.period_instant.as_ref()) + .cloned() + .unwrap_or_default(); + left_date.cmp(&right_date) + }) + .then_with(|| { + left.value_num + .abs() + .partial_cmp(&right.value_num.abs()) + .unwrap_or(std::cmp::Ordering::Equal) + }) + }) + .map(|fact| fact.value_num) + } + + fn by_local_names<'a>(facts: &'a [FactOutput], names: &[&str]) -> Vec<&'a FactOutput> { + let targets = names.iter().map(|name| name.to_ascii_lowercase()).collect::>(); + facts + .iter() + .filter(|fact| targets.iter().any(|target| fact.local_name.eq_ignore_ascii_case(target))) + .collect() + } + + let revenue = pick_best(&by_local_names( + facts, + &[ + "Revenues", + "SalesRevenueNet", + "RevenueFromContractWithCustomerExcludingAssessedTax", + "TotalRevenuesAndOtherIncome", + ], + )); + let net_income = pick_best(&by_local_names(facts, &["NetIncomeLoss", "ProfitLoss"])); + let total_assets = pick_best(&by_local_names(facts, &["Assets"])); + let cash = pick_best(&by_local_names( + facts, + &[ + "CashAndCashEquivalentsAtCarryingValue", + "CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents", + ], + )); + let direct_debt = pick_best(&by_local_names( + facts, + &[ + "DebtAndFinanceLeaseLiabilities", + "Debt", + "LongTermDebtAndCapitalLeaseObligations", + ], + )); + let current_debt = pick_best(&by_local_names( + facts, + &["DebtCurrent", "ShortTermBorrowings", "LongTermDebtCurrent"], + )); + let long_term_debt = pick_best(&by_local_names( + facts, + &["LongTermDebtNoncurrent", "LongTermDebt", "DebtNoncurrent"], + )); + + FilingMetrics { + revenue, + net_income, + total_assets, + cash, + debt: direct_debt.or_else(|| match (current_debt, long_term_debt) { + (Some(left), Some(right)) => Some(left + right), + _ => None, + }), + } +} diff --git a/rust/fiscal-xbrl-core/src/pack_selector.rs b/rust/fiscal-xbrl-core/src/pack_selector.rs new file mode 100644 index 0000000..2403754 --- /dev/null +++ b/rust/fiscal-xbrl-core/src/pack_selector.rs @@ -0,0 +1,361 @@ +use std::collections::HashSet; + +use crate::{FactOutput, StatementRowMap}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum FiscalPack { + Core, + BankLender, + Insurance, + ReitRealEstate, + BrokerAssetManager, +} + +impl FiscalPack { + pub fn as_str(&self) -> &'static str { + match self { + FiscalPack::Core => "core", + FiscalPack::BankLender => "bank_lender", + FiscalPack::Insurance => "insurance", + FiscalPack::ReitRealEstate => "reit_real_estate", + FiscalPack::BrokerAssetManager => "broker_asset_manager", + } + } +} + +#[derive(Debug, Clone)] +pub struct PackSelection { + pub pack: FiscalPack, + pub warnings: Vec, +} + +pub fn select_fiscal_pack(statement_rows: &StatementRowMap, facts: &[FactOutput]) -> PackSelection { + let concept_names = collect_concept_names(statement_rows, facts); + let role_tokens = collect_role_tokens(statement_rows, facts); + + let bank_score = score_bank_lender(&concept_names, &role_tokens); + let insurance_score = score_insurance(&concept_names, &role_tokens); + let reit_score = score_reit_real_estate(&concept_names, &role_tokens); + let broker_score = score_broker_asset_manager(&concept_names, &role_tokens); + let mut scored_packs = vec![ + (FiscalPack::BankLender, bank_score), + (FiscalPack::Insurance, insurance_score), + (FiscalPack::ReitRealEstate, reit_score), + (FiscalPack::BrokerAssetManager, broker_score), + ]; + scored_packs.sort_by(|left, right| right.1.cmp(&left.1)); + + let (top_pack, top_score) = scored_packs[0]; + let second_score = scored_packs.get(1).map(|(_, score)| *score).unwrap_or_default(); + let margin = top_score - second_score; + let selected_pack = if top_score >= 10 && margin >= 4 { + top_pack + } else { + FiscalPack::Core + }; + + let mut warnings = Vec::new(); + if selected_pack == FiscalPack::Core && top_score > 0 { + warnings.push("fiscal_pack_defaulted_to_core".to_string()); + } + + PackSelection { + pack: selected_pack, + warnings, + } +} + +fn collect_concept_names(statement_rows: &StatementRowMap, facts: &[FactOutput]) -> HashSet { + let mut names = HashSet::new(); + + for rows in statement_rows.values() { + for row in rows { + names.insert(row.local_name.to_ascii_lowercase()); + names.insert(row.qname.to_ascii_lowercase()); + } + } + + for fact in facts { + names.insert(fact.local_name.to_ascii_lowercase()); + names.insert(fact.qname.to_ascii_lowercase()); + } + + names +} + +fn collect_role_tokens(statement_rows: &StatementRowMap, facts: &[FactOutput]) -> HashSet { + let mut roles = HashSet::new(); + + for rows in statement_rows.values() { + for row in rows { + if let Some(role_uri) = &row.role_uri { + roles.insert(role_uri.to_ascii_lowercase()); + } + } + } + + for fact in facts { + if let Some(role_uri) = &fact.role_uri { + roles.insert(role_uri.to_ascii_lowercase()); + } + } + + roles +} + +fn score_bank_lender(concepts: &HashSet, roles: &HashSet) -> i64 { + let mut score = 0; + + score += weighted_match( + concepts, + &[ + "depositsliabilities", + "us-gaap:depositsliabilities", + "deposits", + ], + 8, + ); + score += weighted_match( + concepts, + &[ + "financingreceivablerecordedinvestment", + "us-gaap:financingreceivablerecordedinvestment", + "loansreceivablenetreportedamount", + "us-gaap:loansreceivablenetreportedamount", + ], + 8, + ); + score += weighted_match( + concepts, + &[ + "allowanceforcreditlosses", + "allowanceforloanlosses", + "provisionforcreditlosses", + "loanlossprovision", + "netinterestincome", + "interestexpense", + "interestanddividendincomeoperating", + ], + 4, + ); + score += weighted_role_match(roles, &["deposit", "loan", "credit", "netinterest"], 2); + + score +} + +fn score_insurance(concepts: &HashSet, roles: &HashSet) -> i64 { + let mut score = 0; + + score += weighted_match( + concepts, + &[ + "premiums", + "premiumswritten", + "premiumsearned", + "premiumswrittennet", + "premiumsearnednet", + "us-gaap:premiums", + ], + 8, + ); + score += weighted_match( + concepts, + &[ + "policyholderbenefitsandclaimsincurrednet", + "futurepolicybenefits", + "liabilityforfuturepolicybenefits", + "liabilityforunpaidlossesandlossadjustmentexpenses", + "liabilityforunpaidclaimsandclaimsadjustmentexpense", + "liabilityforfuturepolicybenefits", + "deferredpolicyacquisitioncosts", + "deferredpolicyacquisitioncostsamortizationexpense", + "netinvestmentincome", + "underwritingincomeloss", + "unearnedpremiumsnet", + ], + 6, + ); + score += weighted_role_match(roles, &["insurance", "premium", "policy", "claims"], 2); + + score +} + +fn score_reit_real_estate(concepts: &HashSet, roles: &HashSet) -> i64 { + let mut score = 0; + + score += weighted_match( + concepts, + &[ + "leaseincome", + "realestateinvestmentpropertynet", + "realestategrossatcarryingvalue", + "realestateinvestmentpropertyatcost", + ], + 8, + ); + score += weighted_match( + concepts, + &[ + "numberofrealestateproperties", + "directcostsofleasedandrentedpropertyorequipment", + "depreciationdepletionandamortization", + "realestateaccumulateddepreciation", + "paymentstoacquirecommercialrealestate", + ], + 6, + ); + score += weighted_role_match(roles, &["realestate", "property", "lease", "rental"], 2); + + score +} + +fn score_broker_asset_manager(concepts: &HashSet, roles: &HashSet) -> i64 { + let mut score = 0; + + score += weighted_match( + concepts, + &[ + "assetsundermanagementcarryingamount", + "feepayingassetundermanagement", + ], + 8, + ); + score += weighted_match( + concepts, + &[ + "performancefeerevenuerecognized", + "subadvisoryandother", + "sponsorfees", + ], + 6, + ); + score += weighted_match( + concepts, + &[ + "totalsalesassetandaccountexpense", + "estimatedannualfixedminimumfeesforcurrentlyoutstandingcontracts", + "reductioninthevalueofmanagementcontract", + ], + 6, + ); + score += weighted_role_match(roles, &["advis", "management", "asset", "distribution"], 2); + + score +} + +fn weighted_match(concepts: &HashSet, candidates: &[&str], weight: i64) -> i64 { + if candidates + .iter() + .any(|candidate| concepts.contains(&candidate.to_ascii_lowercase())) + { + weight + } else { + 0 + } +} + +fn weighted_role_match(roles: &HashSet, candidates: &[&str], weight: i64) -> i64 { + if roles.iter().any(|role| candidates.iter().any(|candidate| role.contains(candidate))) { + weight + } else { + 0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{StatementRowOutput, StatementRowMap}; + use std::collections::BTreeMap; + + fn row(local_name: &str, statement: &str) -> StatementRowOutput { + StatementRowOutput { + key: local_name.to_string(), + label: local_name.to_string(), + concept_key: format!("http://fasb.org/us-gaap/2024#{local_name}"), + qname: format!("us-gaap:{local_name}"), + namespace_uri: "http://fasb.org/us-gaap/2024".to_string(), + local_name: local_name.to_string(), + is_extension: false, + statement: statement.to_string(), + role_uri: Some(statement.to_string()), + order: 1, + depth: 0, + parent_key: None, + values: BTreeMap::from([("p1".to_string(), Some(1.0))]), + units: BTreeMap::from([("p1".to_string(), Some("iso4217:USD".to_string()))]), + has_dimensions: false, + source_fact_ids: vec![1], + } + } + + fn empty_map() -> StatementRowMap { + BTreeMap::from([ + ("income".to_string(), Vec::new()), + ("balance".to_string(), Vec::new()), + ("cash_flow".to_string(), Vec::new()), + ("equity".to_string(), Vec::new()), + ("comprehensive_income".to_string(), Vec::new()), + ]) + } + + #[test] + fn chooses_bank_lender_from_loan_and_deposit_signatures() { + let mut rows = empty_map(); + rows.get_mut("balance").unwrap().extend([ + row("DepositsLiabilities", "balance"), + row("FinancingReceivableRecordedInvestment", "balance"), + row("AllowanceForCreditLosses", "balance"), + ]); + + let selection = select_fiscal_pack(&rows, &[]); + assert_eq!(selection.pack, FiscalPack::BankLender); + assert!(selection.warnings.is_empty()); + } + + #[test] + fn chooses_insurance_from_premium_and_claim_signatures() { + let mut rows = empty_map(); + rows.get_mut("income").unwrap().extend([ + row("Premiums", "income"), + row("PolicyholderBenefitsAndClaimsIncurredNet", "income"), + ]); + rows.get_mut("balance").unwrap().push(row("FuturePolicyBenefits", "balance")); + + let selection = select_fiscal_pack(&rows, &[]); + assert_eq!(selection.pack, FiscalPack::Insurance); + assert!(selection.warnings.is_empty()); + } + + #[test] + fn defaults_to_core_on_low_confidence() { + let mut rows = empty_map(); + rows.get_mut("income").unwrap().push(row("InterestExpense", "income")); + + let selection = select_fiscal_pack(&rows, &[]); + assert_eq!(selection.pack, FiscalPack::Core); + assert_eq!(selection.warnings, vec!["fiscal_pack_defaulted_to_core"]); + } + + #[test] + fn chooses_reit_from_property_and_lease_signatures() { + let mut rows = empty_map(); + rows.get_mut("income").unwrap().push(row("LeaseIncome", "income")); + rows.get_mut("balance").unwrap().push(row("RealEstateInvestmentPropertyNet", "balance")); + rows.get_mut("balance").unwrap().push(row("NumberOfRealEstateProperties", "balance")); + + let selection = select_fiscal_pack(&rows, &[]); + assert_eq!(selection.pack, FiscalPack::ReitRealEstate); + } + + #[test] + fn chooses_broker_asset_manager_from_aum_and_fee_signatures() { + let mut rows = empty_map(); + rows.get_mut("income").unwrap().push(row("PerformanceFeeRevenueRecognized", "income")); + rows.get_mut("balance").unwrap().push(row("AssetsUnderManagementCarryingAmount", "balance")); + rows.get_mut("balance").unwrap().push(row("FeePayingAssetUnderManagement", "balance")); + + let selection = select_fiscal_pack(&rows, &[]); + assert_eq!(selection.pack, FiscalPack::BrokerAssetManager); + } +} diff --git a/rust/fiscal-xbrl-core/src/surface_mapper.rs b/rust/fiscal-xbrl-core/src/surface_mapper.rs new file mode 100644 index 0000000..61d81ce --- /dev/null +++ b/rust/fiscal-xbrl-core/src/surface_mapper.rs @@ -0,0 +1,667 @@ +use anyhow::Result; +use std::collections::{BTreeMap, HashMap, HashSet}; + +use crate::pack_selector::FiscalPack; +use crate::taxonomy_loader::{load_crosswalk, load_surface_pack, CrosswalkFile, SurfaceDefinition}; +use crate::{ + ConceptOutput, DetailRowOutput, DetailRowStatementMap, FactOutput, NormalizationSummaryOutput, + PeriodOutput, StatementRowMap, StatementRowOutput, SurfaceRowMap, SurfaceRowOutput, +}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum MappingMethod { + AuthoritativeDirect, + DirectSourceConcept, + AggregateChildren, + TaxonomyKpi, + UnmappedResidual, +} + +impl MappingMethod { + pub fn as_str(&self) -> &'static str { + match self { + MappingMethod::AuthoritativeDirect => "authoritative_direct", + MappingMethod::DirectSourceConcept => "direct_source_concept", + MappingMethod::AggregateChildren => "aggregate_children", + MappingMethod::TaxonomyKpi => "taxonomy_kpi", + MappingMethod::UnmappedResidual => "unmapped_residual", + } + } +} + +#[derive(Debug, Clone, Default)] +pub struct MappingAssignment { + pub authoritative_concept_key: Option, + pub mapping_method: Option, + pub surface_key: Option, + pub detail_parent_surface_key: Option, + pub kpi_key: Option, + pub residual_flag: bool, +} + +#[derive(Debug, Default)] +pub struct CompactSurfaceModel { + pub surface_rows: SurfaceRowMap, + pub detail_rows: DetailRowStatementMap, + pub normalization_summary: NormalizationSummaryOutput, + pub concept_mappings: HashMap, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum MatchRole { + Surface, + Detail, +} + +#[derive(Debug, Clone)] +struct MatchedStatementRow<'a> { + row: &'a StatementRowOutput, + authoritative_concept_key: Option, + mapping_method: MappingMethod, + match_role: MatchRole, + rank: i64, +} + +pub fn build_compact_surface_model( + periods: &[PeriodOutput], + statement_rows: &StatementRowMap, + taxonomy_regime: &str, + fiscal_pack: FiscalPack, + warnings: Vec, +) -> Result { + let pack = load_surface_pack(fiscal_pack)?; + let crosswalk = load_crosswalk(taxonomy_regime)?; + let mut surface_rows = empty_surface_row_map(); + let mut detail_rows = empty_detail_row_map(); + let mut concept_mappings = HashMap::::new(); + let mut surface_row_count = 0usize; + let mut detail_row_count = 0usize; + let mut unmapped_row_count = 0usize; + let mut material_unmapped_row_count = 0usize; + + for statement in statement_keys() { + let rows = statement_rows + .get(statement) + .cloned() + .unwrap_or_default(); + let statement_definitions = pack + .surfaces + .iter() + .filter(|definition| definition.statement == statement) + .collect::>(); + let mut used_row_keys = HashSet::::new(); + let mut statement_surface_rows = Vec::::new(); + let mut statement_detail_rows = BTreeMap::>::new(); + + for definition in statement_definitions { + let matches = rows + .iter() + .filter(|row| !used_row_keys.contains(&row.key)) + .filter_map(|row| match_statement_row(row, definition, crosswalk.as_ref())) + .collect::>(); + + if matches.is_empty() { + continue; + } + + let direct_surface_matches = matches + .iter() + .filter(|matched| matched.match_role == MatchRole::Surface) + .cloned() + .collect::>(); + let detail_component_matches = matches + .iter() + .filter(|matched| matched.match_role == MatchRole::Detail) + .cloned() + .collect::>(); + + let mut surface_source_matches = if !direct_surface_matches.is_empty() { + vec![pick_best_match(&direct_surface_matches).clone()] + } else if definition.rollup_policy == "aggregate_children" { + detail_component_matches.clone() + } else { + Vec::new() + }; + + if surface_source_matches.is_empty() { + continue; + } + + let detail_matches = if definition.detail_grouping_policy == "group_all_children" { + if detail_component_matches.is_empty() && definition.rollup_policy == "aggregate_children" { + Vec::new() + } else { + detail_component_matches.clone() + } + } else { + Vec::new() + }; + + if definition.rollup_policy == "aggregate_children" + && direct_surface_matches.is_empty() + && !surface_source_matches.is_empty() + { + for matched in &mut surface_source_matches { + matched.mapping_method = MappingMethod::AggregateChildren; + } + } + + let values = build_surface_values(periods, &surface_source_matches); + if !has_any_value(&values) { + continue; + } + + let resolved_source_row_keys = periods + .iter() + .map(|period| { + let resolved = if surface_source_matches.len() == 1 { + surface_source_matches + .first() + .and_then(|matched| matched.row.values.get(&period.id).copied().flatten().map(|_| matched.row.key.clone())) + } else { + None + }; + (period.id.clone(), resolved) + }) + .collect::>(); + + let source_concepts = unique_sorted_strings( + surface_source_matches + .iter() + .map(|matched| matched.row.qname.clone()) + .collect::>(), + ); + let source_row_keys = unique_sorted_strings( + surface_source_matches + .iter() + .map(|matched| matched.row.key.clone()) + .collect::>(), + ); + let source_fact_ids = unique_sorted_i64( + surface_source_matches + .iter() + .flat_map(|matched| matched.row.source_fact_ids.clone()) + .collect::>(), + ); + let has_dimensions = surface_source_matches.iter().any(|matched| matched.row.has_dimensions); + + for matched in &surface_source_matches { + used_row_keys.insert(matched.row.key.clone()); + concept_mappings.insert( + matched.row.concept_key.clone(), + MappingAssignment { + authoritative_concept_key: matched.authoritative_concept_key.clone(), + mapping_method: Some(matched.mapping_method), + surface_key: Some(definition.surface_key.clone()), + detail_parent_surface_key: None, + kpi_key: None, + residual_flag: false, + }, + ); + } + + let details = detail_matches + .iter() + .map(|matched| { + used_row_keys.insert(matched.row.key.clone()); + concept_mappings.insert( + matched.row.concept_key.clone(), + MappingAssignment { + authoritative_concept_key: matched.authoritative_concept_key.clone(), + mapping_method: Some(matched.mapping_method), + surface_key: Some(definition.surface_key.clone()), + detail_parent_surface_key: Some(definition.surface_key.clone()), + kpi_key: None, + residual_flag: false, + }, + ); + build_detail_row(matched.row, &definition.surface_key, false) + }) + .collect::>(); + if !details.is_empty() { + detail_row_count += details.len(); + statement_detail_rows.insert(definition.surface_key.clone(), details); + } + + statement_surface_rows.push(SurfaceRowOutput { + key: definition.surface_key.clone(), + label: definition.label.clone(), + category: definition.category.clone(), + template_section: definition.category.clone(), + order: definition.order, + unit: definition.unit.clone(), + values, + source_concepts, + source_row_keys, + source_fact_ids, + formula_key: definition.formula_fallback.as_ref().map(|_| definition.surface_key.clone()), + has_dimensions, + resolved_source_row_keys, + statement: Some(definition.statement.clone()), + detail_count: statement_detail_rows + .get(&definition.surface_key) + .map(|rows| rows.len() as i64), + resolution_method: None, + confidence: None, + warning_codes: vec![], + }); + surface_row_count += 1; + let _ = &definition.materiality_policy; + } + + statement_surface_rows.sort_by(|left, right| left.order.cmp(&right.order).then_with(|| left.label.cmp(&right.label))); + let baseline = baseline_for_statement(statement, &statement_surface_rows); + let threshold = materiality_threshold(statement, baseline); + let residual_rows = rows + .iter() + .filter(|row| !used_row_keys.contains(&row.key)) + .filter(|row| has_any_value(&row.values)) + .map(|row| { + concept_mappings.insert( + row.concept_key.clone(), + MappingAssignment { + authoritative_concept_key: None, + mapping_method: Some(MappingMethod::UnmappedResidual), + surface_key: None, + detail_parent_surface_key: Some("unmapped".to_string()), + kpi_key: None, + residual_flag: true, + }, + ); + build_detail_row(row, "unmapped", true) + }) + .collect::>(); + + if !residual_rows.is_empty() { + unmapped_row_count += residual_rows.len(); + material_unmapped_row_count += residual_rows + .iter() + .filter(|row| max_abs_value(&row.values) >= threshold) + .count(); + detail_row_count += residual_rows.len(); + statement_detail_rows.insert("unmapped".to_string(), residual_rows); + } + + surface_rows.insert(statement.to_string(), statement_surface_rows); + detail_rows.insert(statement.to_string(), statement_detail_rows); + } + + Ok(CompactSurfaceModel { + surface_rows, + detail_rows, + normalization_summary: NormalizationSummaryOutput { + surface_row_count, + detail_row_count, + kpi_row_count: 0, + unmapped_row_count, + material_unmapped_row_count, + warnings, + }, + concept_mappings, + }) +} + +pub fn merge_mapping_assignments( + primary: &mut HashMap, + secondary: HashMap, +) { + for (concept_key, assignment) in secondary { + let existing = primary.entry(concept_key).or_default(); + existing.authoritative_concept_key = existing + .authoritative_concept_key + .clone() + .or(assignment.authoritative_concept_key); + if existing.mapping_method.is_none() + || matches!(existing.mapping_method, Some(MappingMethod::UnmappedResidual)) + { + existing.mapping_method = assignment.mapping_method; + } + if existing.surface_key.is_none() { + existing.surface_key = assignment.surface_key; + } + if existing.detail_parent_surface_key.is_none() { + existing.detail_parent_surface_key = assignment.detail_parent_surface_key; + } + if existing.kpi_key.is_none() { + existing.kpi_key = assignment.kpi_key; + } + existing.residual_flag = existing.residual_flag && assignment.residual_flag; + } +} + +pub fn apply_mapping_assignments( + concepts: &mut [ConceptOutput], + facts: &mut [FactOutput], + mappings: &HashMap, +) { + for concept in concepts { + if let Some(mapping) = mappings.get(&concept.concept_key) { + concept.authoritative_concept_key = mapping.authoritative_concept_key.clone(); + concept.mapping_method = mapping.mapping_method.map(|method| method.as_str().to_string()); + concept.surface_key = mapping.surface_key.clone(); + concept.detail_parent_surface_key = mapping.detail_parent_surface_key.clone(); + concept.kpi_key = mapping.kpi_key.clone(); + concept.residual_flag = mapping.residual_flag; + } + } + + for fact in facts { + if let Some(mapping) = mappings.get(&fact.concept_key) { + fact.authoritative_concept_key = mapping.authoritative_concept_key.clone(); + fact.mapping_method = mapping.mapping_method.map(|method| method.as_str().to_string()); + fact.surface_key = mapping.surface_key.clone(); + fact.detail_parent_surface_key = mapping.detail_parent_surface_key.clone(); + fact.kpi_key = mapping.kpi_key.clone(); + fact.residual_flag = mapping.residual_flag; + } + } +} + +fn match_statement_row<'a>( + row: &'a StatementRowOutput, + definition: &SurfaceDefinition, + crosswalk: Option<&CrosswalkFile>, +) -> Option> { + let authoritative_mapping = crosswalk.and_then(|crosswalk| crosswalk.mappings.get(&row.qname)); + let authoritative_concept_key = authoritative_mapping + .map(|mapping| mapping.authoritative_concept_key.clone()) + .or_else(|| { + if !row.is_extension { + Some(row.qname.clone()) + } else { + None + } + }); + + let matches_authoritative = authoritative_concept_key.as_ref().map_or(false, |concept| { + definition + .allowed_authoritative_concepts + .iter() + .any(|candidate| candidate_matches(candidate, concept)) + }) || authoritative_mapping + .map(|mapping| mapping.surface_key == definition.surface_key) + .unwrap_or(false); + + if matches_authoritative { + return Some(MatchedStatementRow { + row, + authoritative_concept_key, + mapping_method: MappingMethod::AuthoritativeDirect, + match_role: MatchRole::Surface, + rank: 0, + }); + } + + let matches_source = definition + .allowed_source_concepts + .iter() + .any(|candidate| candidate_matches(candidate, &row.qname) || candidate_matches(candidate, &row.local_name)); + if matches_source { + return Some(MatchedStatementRow { + row, + authoritative_concept_key, + mapping_method: MappingMethod::DirectSourceConcept, + match_role: if definition.rollup_policy == "aggregate_children" { + MatchRole::Detail + } else { + MatchRole::Surface + }, + rank: 1, + }); + } + + None +} + +fn pick_best_match<'a>(matches: &'a [MatchedStatementRow<'a>]) -> &'a MatchedStatementRow<'a> { + matches + .iter() + .min_by(|left, right| { + left.rank + .cmp(&right.rank) + .then_with(|| { + let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 }; + let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 }; + left_dimension_rank.cmp(&right_dimension_rank) + }) + .then_with(|| left.row.order.cmp(&right.row.order)) + .then_with(|| { + max_abs_value(&right.row.values) + .partial_cmp(&max_abs_value(&left.row.values)) + .unwrap_or(std::cmp::Ordering::Equal) + }) + .then_with(|| left.row.label.cmp(&right.row.label)) + }) + .expect("pick_best_match requires at least one match") +} + +fn build_surface_values( + periods: &[PeriodOutput], + matches: &[MatchedStatementRow<'_>], +) -> BTreeMap> { + periods + .iter() + .map(|period| { + let value = if matches.len() == 1 { + matches + .first() + .and_then(|matched| matched.row.values.get(&period.id).copied()) + .flatten() + } else { + sum_nullable_values( + matches + .iter() + .map(|matched| matched.row.values.get(&period.id).copied().flatten()) + .collect::>(), + ) + }; + (period.id.clone(), value) + }) + .collect() +} + +fn sum_nullable_values(values: Vec>) -> Option { + if values.iter().all(|value| value.is_none()) { + return None; + } + + Some(values.into_iter().map(|value| value.unwrap_or(0.0)).sum()) +} + +fn build_detail_row( + row: &StatementRowOutput, + parent_surface_key: &str, + residual_flag: bool, +) -> DetailRowOutput { + DetailRowOutput { + key: row.key.clone(), + parent_surface_key: parent_surface_key.to_string(), + label: row.label.clone(), + concept_key: row.concept_key.clone(), + qname: row.qname.clone(), + namespace_uri: row.namespace_uri.clone(), + local_name: row.local_name.clone(), + unit: row.units.values().find_map(|value| value.clone()), + values: row.values.clone(), + source_fact_ids: row.source_fact_ids.clone(), + is_extension: row.is_extension, + dimensions_summary: if row.has_dimensions { + vec!["has_dimensions".to_string()] + } else { + vec![] + }, + residual_flag, + } +} + +fn has_any_value(values: &BTreeMap>) -> bool { + values.values().any(|value| value.is_some()) +} + +fn max_abs_value(values: &BTreeMap>) -> f64 { + values + .values() + .fold(0.0_f64, |max, value| max.max(value.unwrap_or(0.0).abs())) +} + +fn baseline_for_statement(statement: &str, surface_rows: &[SurfaceRowOutput]) -> f64 { + let anchor_key = if statement == "balance" { + "total_assets" + } else { + "revenue" + }; + + surface_rows + .iter() + .find(|row| row.key == anchor_key) + .map(|row| max_abs_value(&row.values)) + .unwrap_or(0.0) +} + +fn materiality_threshold(statement: &str, baseline: f64) -> f64 { + if statement == "balance" { + return (baseline * 0.005).max(5_000_000.0); + } + + (baseline * 0.01).max(1_000_000.0) +} + +fn unique_sorted_strings(values: Vec) -> Vec { + let mut values = values.into_iter().collect::>().into_iter().collect::>(); + values.sort(); + values +} + +fn unique_sorted_i64(values: Vec) -> Vec { + let mut values = values.into_iter().collect::>().into_iter().collect::>(); + values.sort(); + values +} + +fn candidate_matches(candidate: &str, actual: &str) -> bool { + candidate.eq_ignore_ascii_case(actual) + || candidate + .rsplit_once(':') + .map(|(_, local_name)| local_name.eq_ignore_ascii_case(actual)) + .unwrap_or(false) + || actual + .rsplit_once(':') + .map(|(_, local_name)| local_name.eq_ignore_ascii_case(candidate)) + .unwrap_or(false) +} + +fn statement_keys() -> [&'static str; 5] { + ["income", "balance", "cash_flow", "equity", "comprehensive_income"] +} + +fn empty_surface_row_map() -> SurfaceRowMap { + statement_keys() + .into_iter() + .map(|key| (key.to_string(), Vec::new())) + .collect() +} + +fn empty_detail_row_map() -> DetailRowStatementMap { + statement_keys() + .into_iter() + .map(|key| (key.to_string(), BTreeMap::new())) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::pack_selector::FiscalPack; + use crate::{PeriodOutput, StatementRowOutput}; + + fn period(id: &str) -> PeriodOutput { + PeriodOutput { + id: id.to_string(), + filing_id: 1, + accession_number: "0000000000-00-000001".to_string(), + filing_date: "2025-12-31".to_string(), + period_start: Some("2025-01-01".to_string()), + period_end: Some("2025-12-31".to_string()), + filing_type: "10-K".to_string(), + period_label: id.to_string(), + } + } + + fn row(key: &str, qname: &str, statement: &str, value: f64) -> StatementRowOutput { + StatementRowOutput { + key: key.to_string(), + label: key.to_string(), + concept_key: format!("http://fasb.org/us-gaap/2024#{}", qname.split(':').nth(1).unwrap_or(key)), + qname: qname.to_string(), + namespace_uri: "http://fasb.org/us-gaap/2024".to_string(), + local_name: qname.split(':').nth(1).unwrap_or(key).to_string(), + is_extension: false, + statement: statement.to_string(), + role_uri: Some(statement.to_string()), + order: 1, + depth: 0, + parent_key: None, + values: BTreeMap::from([("p1".to_string(), Some(value))]), + units: BTreeMap::from([("p1".to_string(), Some("iso4217:USD".to_string()))]), + has_dimensions: false, + source_fact_ids: vec![1], + } + } + + fn empty_map() -> StatementRowMap { + BTreeMap::from([ + ("income".to_string(), Vec::new()), + ("balance".to_string(), Vec::new()), + ("cash_flow".to_string(), Vec::new()), + ("equity".to_string(), Vec::new()), + ("comprehensive_income".to_string(), Vec::new()), + ]) + } + + #[test] + fn prefers_direct_authoritative_row_over_child_aggregation() { + let mut rows = empty_map(); + rows.get_mut("income").unwrap().extend([ + row("op-expenses", "us-gaap:OperatingExpenses", "income", 40.0), + row("sga", "us-gaap:SellingGeneralAndAdministrativeExpense", "income", 25.0), + row("rd", "us-gaap:ResearchAndDevelopmentExpense", "income", 15.0), + ]); + + let model = build_compact_surface_model( + &[period("p1")], + &rows, + "us-gaap", + FiscalPack::Core, + vec![], + ) + .expect("compact model should build"); + let op_expenses = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "operating_expenses") + .unwrap(); + + assert_eq!(op_expenses.values.get("p1").copied().flatten(), Some(40.0)); + assert_eq!(op_expenses.detail_count, Some(2)); + } + + #[test] + fn emits_unmapped_residual_rows() { + let mut rows = empty_map(); + rows.get_mut("income").unwrap().push(row("custom", "company:CustomMetric", "income", 12.0)); + + let model = build_compact_surface_model( + &[period("p1")], + &rows, + "us-gaap", + FiscalPack::Core, + vec![], + ) + .expect("compact model should build"); + let residual_rows = model.detail_rows.get("income").unwrap().get("unmapped").unwrap(); + assert_eq!(residual_rows.len(), 1); + assert!(residual_rows[0].residual_flag); + } +} diff --git a/rust/fiscal-xbrl-core/src/taxonomy_loader.rs b/rust/fiscal-xbrl-core/src/taxonomy_loader.rs new file mode 100644 index 0000000..d568ae9 --- /dev/null +++ b/rust/fiscal-xbrl-core/src/taxonomy_loader.rs @@ -0,0 +1,249 @@ +use anyhow::{anyhow, Context, Result}; +use serde::Deserialize; +use std::env; +use std::fs; +use std::collections::HashMap; +use std::path::PathBuf; + +use crate::pack_selector::FiscalPack; + +#[derive(Debug, Deserialize, Clone)] +pub struct SurfacePackFile { + pub version: String, + pub pack: String, + pub surfaces: Vec, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct SurfaceDefinition { + pub surface_key: String, + pub statement: String, + pub label: String, + pub category: String, + pub order: i64, + pub unit: String, + pub rollup_policy: String, + pub allowed_source_concepts: Vec, + pub allowed_authoritative_concepts: Vec, + pub formula_fallback: Option, + pub detail_grouping_policy: String, + pub materiality_policy: String, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct CrosswalkFile { + pub version: String, + pub regime: String, + pub mappings: std::collections::HashMap, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct CrosswalkMapping { + pub surface_key: String, + pub authoritative_concept_key: String, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct KpiPackFile { + pub version: String, + pub pack: String, + pub kpis: Vec, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct KpiDefinition { + pub key: String, + pub label: String, + pub unit: String, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct UniversalIncomeFile { + pub version: String, + pub rows: Vec, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct UniversalIncomeDefinition { + pub key: String, + pub statement: String, + pub label: String, + pub category: String, + pub order: i64, + pub unit: String, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct IncomeBridgeFile { + pub version: String, + pub pack: String, + pub rows: HashMap, +} + +#[derive(Debug, Deserialize, Clone, Default)] +pub struct IncomeBridgeComponents { + #[serde(default)] + pub positive: Vec, + #[serde(default)] + pub negative: Vec, +} + +#[derive(Debug, Deserialize, Clone, Default)] +pub struct IncomeBridgeConceptGroups { + #[serde(default)] + pub positive: Vec, + #[serde(default)] + pub negative: Vec, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct IncomeBridgeConceptGroup { + pub name: String, + pub concepts: Vec, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct IncomeBridgeRow { + #[serde(default)] + pub direct_authoritative_concepts: Vec, + #[serde(default)] + pub direct_source_concepts: Vec, + #[serde(default)] + pub component_surfaces: IncomeBridgeComponents, + #[serde(default)] + pub component_concept_groups: IncomeBridgeConceptGroups, + pub formula: String, + #[serde(default)] + pub not_meaningful_for_pack: bool, + #[serde(default)] + pub warning_codes_when_used: Vec, +} + +pub fn resolve_taxonomy_dir() -> Result { + let mut candidates = Vec::new(); + + if let Some(value) = env::var("FISCAL_TAXONOMY_DIR") + .ok() + .map(|value| value.trim().to_string()) + .filter(|value| !value.is_empty()) + { + candidates.push(PathBuf::from(value)); + } + + if let Ok(current_dir) = env::current_dir() { + candidates.push(current_dir.join("rust").join("taxonomy")); + candidates.push(current_dir.join("taxonomy")); + } + + candidates.push(PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../taxonomy")); + + if let Ok(executable) = env::current_exe() { + if let Some(parent) = executable.parent() { + candidates.push(parent.join("../rust/taxonomy")); + candidates.push(parent.join("../taxonomy")); + } + } + + candidates + .into_iter() + .find(|path| path.is_dir()) + .ok_or_else(|| anyhow!("taxonomy resolution failed: unable to locate runtime taxonomy directory")) +} + +pub fn load_surface_pack(pack: FiscalPack) -> Result { + let taxonomy_dir = resolve_taxonomy_dir()?; + let path = taxonomy_dir + .join("fiscal") + .join("v1") + .join(format!("{}.surface.json", pack.as_str())); + let raw = fs::read_to_string(&path) + .with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?; + let file = serde_json::from_str::(&raw) + .with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?; + let _ = (&file.version, &file.pack); + Ok(file) +} + +pub fn load_crosswalk(regime: &str) -> Result> { + let file_name = match regime { + "us-gaap" => "us-gaap.json", + "ifrs-full" => "ifrs.json", + _ => return Ok(None), + }; + + let taxonomy_dir = resolve_taxonomy_dir()?; + let path = taxonomy_dir.join("crosswalk").join(file_name); + let raw = fs::read_to_string(&path) + .with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?; + let file = serde_json::from_str::(&raw) + .with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?; + let _ = (&file.version, &file.regime); + Ok(Some(file)) +} + +pub fn load_kpi_pack(pack: FiscalPack) -> Result { + let taxonomy_dir = resolve_taxonomy_dir()?; + let path = taxonomy_dir + .join("fiscal") + .join("v1") + .join("kpis") + .join(format!("{}.kpis.json", pack.as_str())); + let raw = fs::read_to_string(&path) + .with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?; + let file = serde_json::from_str::(&raw) + .with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?; + let _ = (&file.version, &file.pack); + Ok(file) +} + +pub fn load_universal_income_definitions() -> Result { + let taxonomy_dir = resolve_taxonomy_dir()?; + let path = taxonomy_dir + .join("fiscal") + .join("v1") + .join("universal_income.surface.json"); + let raw = fs::read_to_string(&path) + .with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?; + let file = serde_json::from_str::(&raw) + .with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?; + let _ = &file.version; + Ok(file) +} + +pub fn load_income_bridge(pack: FiscalPack) -> Result { + let taxonomy_dir = resolve_taxonomy_dir()?; + let path = taxonomy_dir + .join("fiscal") + .join("v1") + .join(format!("{}.income-bridge.json", pack.as_str())); + let raw = fs::read_to_string(&path) + .with_context(|| format!("taxonomy resolution failed: unable to read {}", path.display()))?; + let file = serde_json::from_str::(&raw) + .with_context(|| format!("taxonomy resolution failed: unable to parse {}", path.display()))?; + let _ = (&file.version, &file.pack); + Ok(file) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn resolves_taxonomy_dir_and_loads_core_pack() { + let taxonomy_dir = resolve_taxonomy_dir().expect("taxonomy dir should resolve during tests"); + assert!(taxonomy_dir.exists()); + + let surface_pack = load_surface_pack(FiscalPack::Core).expect("core surface pack should load"); + assert_eq!(surface_pack.pack, "core"); + assert!(!surface_pack.surfaces.is_empty()); + + let kpi_pack = load_kpi_pack(FiscalPack::Core).expect("core kpi pack should load"); + assert_eq!(kpi_pack.pack, "core"); + + let universal_income = load_universal_income_definitions().expect("universal income config should load"); + assert!(!universal_income.rows.is_empty()); + + let core_bridge = load_income_bridge(FiscalPack::Core).expect("core bridge should load"); + assert_eq!(core_bridge.pack, "core"); + } +} diff --git a/rust/fiscal-xbrl-core/src/universal_income.rs b/rust/fiscal-xbrl-core/src/universal_income.rs new file mode 100644 index 0000000..b993d8a --- /dev/null +++ b/rust/fiscal-xbrl-core/src/universal_income.rs @@ -0,0 +1,1404 @@ +use anyhow::Result; +use std::collections::{BTreeMap, HashMap, HashSet}; + +use crate::pack_selector::FiscalPack; +use crate::surface_mapper::{CompactSurfaceModel, MappingAssignment, MappingMethod}; +use crate::taxonomy_loader::{ + load_crosswalk, load_income_bridge, load_universal_income_definitions, CrosswalkFile, + IncomeBridgeConceptGroup, IncomeBridgeRow, UniversalIncomeDefinition, +}; +use crate::{DetailRowOutput, FactOutput, PeriodOutput, StatementRowMap, StatementRowOutput, SurfaceRowOutput}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ResolutionMethod { + Direct, + SurfaceBridge, + FormulaDerived, + NotMeaningful, +} + +impl ResolutionMethod { + fn as_str(&self) -> &'static str { + match self { + ResolutionMethod::Direct => "direct", + ResolutionMethod::SurfaceBridge => "surface_bridge", + ResolutionMethod::FormulaDerived => "formula_derived", + ResolutionMethod::NotMeaningful => "not_meaningful", + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Confidence { + High, + Medium, + Low, +} + +impl Confidence { + fn as_str(&self) -> &'static str { + match self { + Confidence::High => "high", + Confidence::Medium => "medium", + Confidence::Low => "low", + } + } +} + +#[derive(Debug, Clone)] +struct MatchedStatementRow<'a> { + row: &'a StatementRowOutput, + authoritative_concept_key: Option, + mapping_method: MappingMethod, + rank: i64, +} + +#[derive(Debug, Clone)] +struct MatchedFact<'a> { + fact: &'a FactOutput, + authoritative_concept_key: Option, + mapping_method: MappingMethod, + rank: i64, +} + +#[derive(Debug, Clone)] +struct ValueSource { + values: BTreeMap>, + source_concepts: Vec, + source_row_keys: Vec, + source_fact_ids: Vec, + has_dimensions: bool, +} + +#[derive(Debug)] +struct ResolvedUniversalRow { + row: SurfaceRowOutput, + detail_rows: Vec, + mapping_assignments: HashMap, + warning_codes: Vec, +} + +pub fn apply_universal_income_rows( + periods: &[PeriodOutput], + statement_rows: &StatementRowMap, + facts: &[FactOutput], + taxonomy_regime: &str, + fiscal_pack: FiscalPack, + compact_model: &mut CompactSurfaceModel, +) -> Result<()> { + let universal_income = load_universal_income_definitions()?; + let income_bridge = load_income_bridge(fiscal_pack)?; + let crosswalk = load_crosswalk(taxonomy_regime)?; + let income_statement_rows = statement_rows.get("income").cloned().unwrap_or_default(); + let existing_income_surfaces = compact_model + .surface_rows + .get("income") + .cloned() + .unwrap_or_default(); + let mut working_income_surfaces = existing_income_surfaces.clone(); + let mut income_detail_rows = compact_model + .detail_rows + .get("income") + .cloned() + .unwrap_or_default(); + let mut built_rows = Vec::::new(); + let mut warnings_seen = compact_model + .normalization_summary + .warnings + .iter() + .cloned() + .collect::>(); + + for definition in universal_income.rows.iter().filter(|row| row.statement == "income") { + let resolved = if let Some(bridge_row) = income_bridge.rows.get(&definition.key) { + resolve_universal_row( + definition, + bridge_row, + periods, + &income_statement_rows, + facts, + &working_income_surfaces, + &income_detail_rows, + crosswalk.as_ref(), + ) + } else { + unresolved_row(definition, periods, &[format!("{}_bridge_missing", definition.key)]) + }; + + for warning in &resolved.warning_codes { + if warnings_seen.insert(warning.clone()) { + compact_model.normalization_summary.warnings.push(warning.clone()); + } + } + + if !resolved.detail_rows.is_empty() { + merge_detail_bucket( + income_detail_rows + .entry(definition.key.clone()) + .or_insert_with(Vec::new), + resolved.detail_rows, + ); + } + + for (concept_key, assignment) in resolved.mapping_assignments { + compact_model.concept_mappings.insert(concept_key, assignment); + } + + let detail_count = income_detail_rows + .get(&definition.key) + .map(|rows| rows.len() as i64); + let mut row = resolved.row; + row.detail_count = detail_count; + upsert_surface_row(&mut working_income_surfaces, row.clone()); + built_rows.push(row); + } + + let universal_keys = built_rows + .iter() + .map(|row| row.key.clone()) + .collect::>(); + let remaining_pack_rows = existing_income_surfaces + .into_iter() + .filter(|row| !universal_keys.contains(&row.key)) + .collect::>(); + + built_rows.sort_by(|left, right| left.order.cmp(&right.order).then_with(|| left.label.cmp(&right.label))); + let mut final_income_rows = built_rows; + final_income_rows.extend(remaining_pack_rows); + + compact_model + .surface_rows + .insert("income".to_string(), final_income_rows); + compact_model + .detail_rows + .insert("income".to_string(), income_detail_rows); + recount_normalization_summary(compact_model); + + Ok(()) +} + +fn resolve_universal_row( + definition: &UniversalIncomeDefinition, + bridge_row: &IncomeBridgeRow, + periods: &[PeriodOutput], + income_statement_rows: &[StatementRowOutput], + facts: &[FactOutput], + income_surface_rows: &[SurfaceRowOutput], + income_detail_rows: &BTreeMap>, + crosswalk: Option<&CrosswalkFile>, +) -> ResolvedUniversalRow { + if bridge_row.not_meaningful_for_pack || bridge_row.formula == "not_meaningful" { + return not_meaningful_row(definition, periods, &bridge_row.warning_codes_when_used); + } + + if let Some(matched) = pick_best_match( + &income_statement_rows + .iter() + .filter(|row| has_any_value(&row.values)) + .filter_map(|row| { + match_direct_authoritative( + row, + &bridge_row.direct_authoritative_concepts, + crosswalk, + ) + }) + .collect::>(), + ) { + return build_direct_row( + definition, + periods, + matched, + ResolutionMethod::Direct, + Confidence::High, + vec![], + ); + } + + if let Some(matched) = pick_best_match( + &income_statement_rows + .iter() + .filter(|row| has_any_value(&row.values)) + .filter_map(|row| match_direct_source(row, &bridge_row.direct_source_concepts, crosswalk)) + .collect::>(), + ) { + return build_direct_row( + definition, + periods, + matched, + ResolutionMethod::Direct, + Confidence::Medium, + vec![], + ); + } + + if let Some(matched_facts) = match_direct_facts( + periods, + facts, + &bridge_row.direct_authoritative_concepts, + &bridge_row.direct_source_concepts, + crosswalk, + ) { + return build_direct_fact_row(definition, periods, &matched_facts, vec![]); + } + + if let Some(existing_surface) = income_surface_rows.iter().find(|row| row.key == definition.key) { + let mut row = existing_surface.clone(); + row.order = definition.order; + row.label = definition.label.clone(); + row.category = definition.category.clone(); + row.template_section = definition.category.clone(); + row.statement = Some(definition.statement.clone()); + row.resolution_method = Some(ResolutionMethod::SurfaceBridge.as_str().to_string()); + row.confidence = Some(Confidence::Medium.as_str().to_string()); + row.warning_codes = vec![]; + row.detail_count = income_detail_rows + .get(&definition.key) + .map(|rows| rows.len() as i64) + .or(row.detail_count); + return ResolvedUniversalRow { + row, + detail_rows: vec![], + mapping_assignments: HashMap::new(), + warning_codes: vec![], + }; + } + + build_formula_row( + definition, + bridge_row, + periods, + income_statement_rows, + income_surface_rows, + crosswalk, + ) +} + +fn build_formula_row( + definition: &UniversalIncomeDefinition, + bridge_row: &IncomeBridgeRow, + periods: &[PeriodOutput], + income_statement_rows: &[StatementRowOutput], + income_surface_rows: &[SurfaceRowOutput], + crosswalk: Option<&CrosswalkFile>, +) -> ResolvedUniversalRow { + let positive_surface_sources = bridge_row + .component_surfaces + .positive + .iter() + .filter_map(|surface_key| income_surface_rows.iter().find(|row| row.key == *surface_key)) + .map(surface_source) + .collect::>(); + let negative_surface_sources = bridge_row + .component_surfaces + .negative + .iter() + .filter_map(|surface_key| income_surface_rows.iter().find(|row| row.key == *surface_key)) + .map(surface_source) + .collect::>(); + + let (positive_group_sources, positive_group_rows) = collect_group_sources( + income_statement_rows, + &bridge_row.component_concept_groups.positive, + crosswalk, + ); + let (negative_group_sources, negative_group_rows) = collect_group_sources( + income_statement_rows, + &bridge_row.component_concept_groups.negative, + crosswalk, + ); + + let mut positive_sources = positive_surface_sources; + positive_sources.extend(positive_group_sources); + let mut negative_sources = negative_surface_sources; + negative_sources.extend(negative_group_sources); + + let values = periods + .iter() + .map(|period| { + let positive_value = sum_source_values(&positive_sources, &period.id); + let negative_value = sum_source_values(&negative_sources, &period.id); + let value = match bridge_row.formula.as_str() { + "sum" | "pack_bridge_sum" => positive_value, + "subtract" => match (positive_value, negative_value) { + (Some(positive_value), Some(negative_value)) => Some(positive_value - negative_value), + _ => None, + }, + "sum_then_subtract" => positive_value.map(|positive_value| positive_value - negative_value.unwrap_or(0.0)), + _ => None, + }; + + (period.id.clone(), value) + }) + .collect::>(); + + if !has_any_value(&values) { + return unresolved_row( + definition, + periods, + &[format!("{}_unresolved", definition.key)], + ); + } + + let warning_codes = if bridge_row.warning_codes_when_used.is_empty() { + vec![format!("{}_formula_derived", definition.key)] + } else { + bridge_row.warning_codes_when_used.clone() + }; + + let source_concepts = unique_sorted_strings( + positive_sources + .iter() + .chain(negative_sources.iter()) + .flat_map(|source| source.source_concepts.clone()) + .collect(), + ); + let source_row_keys = unique_sorted_strings( + positive_sources + .iter() + .chain(negative_sources.iter()) + .flat_map(|source| source.source_row_keys.clone()) + .collect(), + ); + let source_fact_ids = unique_sorted_i64( + positive_sources + .iter() + .chain(negative_sources.iter()) + .flat_map(|source| source.source_fact_ids.clone()) + .collect(), + ); + let has_dimensions = positive_sources + .iter() + .chain(negative_sources.iter()) + .any(|source| source.has_dimensions); + let resolved_source_row_keys = periods + .iter() + .map(|period| { + let resolved = if source_row_keys.len() == 1 && values.get(&period.id).copied().flatten().is_some() { + source_row_keys.first().cloned() + } else { + None + }; + (period.id.clone(), resolved) + }) + .collect::>(); + + let detail_rows = positive_group_rows + .into_iter() + .chain(negative_group_rows) + .map(|row| build_detail_row(row, &definition.key, false)) + .collect::>(); + let mut mapping_assignments = HashMap::::new(); + for row in detail_rows.iter() { + mapping_assignments.insert( + row.concept_key.clone(), + MappingAssignment { + authoritative_concept_key: Some(row.qname.clone()), + mapping_method: Some(MappingMethod::AggregateChildren), + surface_key: Some(definition.key.clone()), + detail_parent_surface_key: Some(definition.key.clone()), + kpi_key: None, + residual_flag: false, + }, + ); + } + + ResolvedUniversalRow { + row: SurfaceRowOutput { + key: definition.key.clone(), + label: definition.label.clone(), + category: definition.category.clone(), + template_section: definition.category.clone(), + order: definition.order, + unit: definition.unit.clone(), + values, + source_concepts, + source_row_keys, + source_fact_ids, + formula_key: Some(definition.key.clone()), + has_dimensions, + resolved_source_row_keys, + statement: Some(definition.statement.clone()), + detail_count: Some(detail_rows.len() as i64), + resolution_method: Some(ResolutionMethod::FormulaDerived.as_str().to_string()), + confidence: Some(Confidence::Medium.as_str().to_string()), + warning_codes: warning_codes.clone(), + }, + detail_rows, + mapping_assignments, + warning_codes, + } +} + +fn build_direct_row( + definition: &UniversalIncomeDefinition, + periods: &[PeriodOutput], + matched: &MatchedStatementRow<'_>, + resolution_method: ResolutionMethod, + confidence: Confidence, + warning_codes: Vec, +) -> ResolvedUniversalRow { + let resolved_source_row_keys = periods + .iter() + .map(|period| { + let resolved = matched + .row + .values + .get(&period.id) + .copied() + .flatten() + .map(|_| matched.row.key.clone()); + (period.id.clone(), resolved) + }) + .collect::>(); + let mut mapping_assignments = HashMap::::new(); + mapping_assignments.insert( + matched.row.concept_key.clone(), + MappingAssignment { + authoritative_concept_key: matched.authoritative_concept_key.clone(), + mapping_method: Some(matched.mapping_method), + surface_key: Some(definition.key.clone()), + detail_parent_surface_key: None, + kpi_key: None, + residual_flag: false, + }, + ); + + ResolvedUniversalRow { + row: SurfaceRowOutput { + key: definition.key.clone(), + label: definition.label.clone(), + category: definition.category.clone(), + template_section: definition.category.clone(), + order: definition.order, + unit: definition.unit.clone(), + values: matched.row.values.clone(), + source_concepts: vec![matched.row.qname.clone()], + source_row_keys: vec![matched.row.key.clone()], + source_fact_ids: matched.row.source_fact_ids.clone(), + formula_key: None, + has_dimensions: matched.row.has_dimensions, + resolved_source_row_keys, + statement: Some(definition.statement.clone()), + detail_count: None, + resolution_method: Some(resolution_method.as_str().to_string()), + confidence: Some(confidence.as_str().to_string()), + warning_codes: warning_codes.clone(), + }, + detail_rows: vec![], + mapping_assignments, + warning_codes, + } +} + +fn build_direct_fact_row( + definition: &UniversalIncomeDefinition, + periods: &[PeriodOutput], + matches: &BTreeMap>, + warning_codes: Vec, +) -> ResolvedUniversalRow { + let values = periods + .iter() + .map(|period| { + ( + period.id.clone(), + matches.get(&period.id).map(|matched| matched.fact.value_num), + ) + }) + .collect::>(); + let resolved_source_row_keys = periods + .iter() + .map(|period| { + ( + period.id.clone(), + matches.get(&period.id).map(|matched| matched.fact.local_name.clone()), + ) + }) + .collect::>(); + let source_concepts = unique_sorted_strings( + matches + .values() + .map(|matched| matched.fact.qname.clone()) + .collect::>(), + ); + let source_row_keys = unique_sorted_strings( + matches + .values() + .map(|matched| matched.fact.local_name.clone()) + .collect::>(), + ); + let source_fact_ids = unique_sorted_i64( + vec![], + ); + let has_dimensions = matches.values().any(|matched| !matched.fact.is_dimensionless); + let mut mapping_assignments = HashMap::::new(); + for matched in matches.values() { + mapping_assignments.insert( + matched.fact.concept_key.clone(), + MappingAssignment { + authoritative_concept_key: matched.authoritative_concept_key.clone(), + mapping_method: Some(matched.mapping_method), + surface_key: Some(definition.key.clone()), + detail_parent_surface_key: None, + kpi_key: None, + residual_flag: false, + }, + ); + } + + ResolvedUniversalRow { + row: SurfaceRowOutput { + key: definition.key.clone(), + label: definition.label.clone(), + category: definition.category.clone(), + template_section: definition.category.clone(), + order: definition.order, + unit: definition.unit.clone(), + values, + source_concepts, + source_row_keys, + source_fact_ids, + formula_key: None, + has_dimensions, + resolved_source_row_keys, + statement: Some(definition.statement.clone()), + detail_count: None, + resolution_method: Some(ResolutionMethod::Direct.as_str().to_string()), + confidence: Some(Confidence::High.as_str().to_string()), + warning_codes: warning_codes.clone(), + }, + detail_rows: vec![], + mapping_assignments, + warning_codes, + } +} + +fn not_meaningful_row( + definition: &UniversalIncomeDefinition, + periods: &[PeriodOutput], + warning_codes: &[String], +) -> ResolvedUniversalRow { + ResolvedUniversalRow { + row: null_surface_row( + definition, + periods, + Some(ResolutionMethod::NotMeaningful), + Some(Confidence::Low), + warning_codes.to_vec(), + ), + detail_rows: vec![], + mapping_assignments: HashMap::new(), + warning_codes: warning_codes.to_vec(), + } +} + +fn unresolved_row( + definition: &UniversalIncomeDefinition, + periods: &[PeriodOutput], + warning_codes: &[String], +) -> ResolvedUniversalRow { + ResolvedUniversalRow { + row: null_surface_row(definition, periods, None, Some(Confidence::Low), warning_codes.to_vec()), + detail_rows: vec![], + mapping_assignments: HashMap::new(), + warning_codes: warning_codes.to_vec(), + } +} + +fn null_surface_row( + definition: &UniversalIncomeDefinition, + periods: &[PeriodOutput], + resolution_method: Option, + confidence: Option, + warning_codes: Vec, +) -> SurfaceRowOutput { + SurfaceRowOutput { + key: definition.key.clone(), + label: definition.label.clone(), + category: definition.category.clone(), + template_section: definition.category.clone(), + order: definition.order, + unit: definition.unit.clone(), + values: periods + .iter() + .map(|period| (period.id.clone(), None)) + .collect::>(), + source_concepts: vec![], + source_row_keys: vec![], + source_fact_ids: vec![], + formula_key: None, + has_dimensions: false, + resolved_source_row_keys: periods + .iter() + .map(|period| (period.id.clone(), None)) + .collect::>(), + statement: Some(definition.statement.clone()), + detail_count: Some(0), + resolution_method: resolution_method.map(|value| value.as_str().to_string()), + confidence: confidence.map(|value| value.as_str().to_string()), + warning_codes, + } +} + +fn collect_group_sources<'a>( + income_statement_rows: &'a [StatementRowOutput], + groups: &[IncomeBridgeConceptGroup], + crosswalk: Option<&CrosswalkFile>, +) -> (Vec, Vec<&'a StatementRowOutput>) { + let mut sources = Vec::::new(); + let mut rows = Vec::<&'a StatementRowOutput>::new(); + + for group in groups { + let _ = &group.name; + let matches = income_statement_rows + .iter() + .filter(|row| { + group.concepts.iter().any(|candidate| { + candidate_matches(candidate, &row.qname) + || candidate_matches(candidate, &row.local_name) + || authoritative_concept_key(row, crosswalk) + .map(|concept| candidate_matches(candidate, &concept)) + .unwrap_or(false) + }) + }) + .collect::>(); + + for row in matches { + sources.push(statement_row_source(row)); + rows.push(row); + } + } + + (sources, rows) +} + +fn match_direct_authoritative<'a>( + row: &'a StatementRowOutput, + candidates: &[String], + crosswalk: Option<&CrosswalkFile>, +) -> Option> { + let authoritative_concept_key = authoritative_concept_key(row, crosswalk); + let matches = authoritative_concept_key.as_ref().map_or(false, |actual| { + candidates.iter().any(|candidate| candidate_matches(candidate, actual)) + }); + if !matches { + return None; + } + + Some(MatchedStatementRow { + row, + authoritative_concept_key, + mapping_method: MappingMethod::AuthoritativeDirect, + rank: 0, + }) +} + +fn match_direct_source<'a>( + row: &'a StatementRowOutput, + candidates: &[String], + crosswalk: Option<&CrosswalkFile>, +) -> Option> { + let matches = candidates.iter().any(|candidate| { + candidate_matches(candidate, &row.qname) || candidate_matches(candidate, &row.local_name) + }); + if !matches { + return None; + } + + Some(MatchedStatementRow { + row, + authoritative_concept_key: authoritative_concept_key(row, crosswalk), + mapping_method: MappingMethod::DirectSourceConcept, + rank: 1, + }) +} + +fn match_direct_facts<'a>( + periods: &[PeriodOutput], + facts: &'a [FactOutput], + authoritative_candidates: &[String], + source_candidates: &[String], + crosswalk: Option<&CrosswalkFile>, +) -> Option>> { + let income_facts = facts + .iter() + .filter(|fact| fact.statement_kind.as_deref() == Some("income")) + .collect::>(); + let mut matches = BTreeMap::>::new(); + + for period in periods { + let matched = income_facts + .iter() + .filter(|fact| fact_matches_period(fact, period)) + .filter_map(|fact| { + match_direct_fact_authoritative(fact, authoritative_candidates, crosswalk) + .or_else(|| match_direct_fact_source(fact, source_candidates, crosswalk)) + }) + .min_by(|left, right| compare_fact_matches(left, right)); + + if let Some(matched) = matched { + matches.insert(period.id.clone(), matched.clone()); + } + } + + if matches.is_empty() { + None + } else { + Some(matches) + } +} + +fn match_direct_fact_authoritative<'a>( + fact: &'a FactOutput, + candidates: &[String], + crosswalk: Option<&CrosswalkFile>, +) -> Option> { + let authoritative_concept_key = fact_authoritative_concept_key(fact, crosswalk); + let matches = authoritative_concept_key.as_ref().map_or(false, |actual| { + candidates.iter().any(|candidate| candidate_matches(candidate, actual)) + }); + if !matches { + return None; + } + + Some(MatchedFact { + fact, + authoritative_concept_key, + mapping_method: MappingMethod::AuthoritativeDirect, + rank: 0, + }) +} + +fn match_direct_fact_source<'a>( + fact: &'a FactOutput, + candidates: &[String], + crosswalk: Option<&CrosswalkFile>, +) -> Option> { + let matches = candidates.iter().any(|candidate| { + candidate_matches(candidate, &fact.qname) || candidate_matches(candidate, &fact.local_name) + }); + if !matches { + return None; + } + + Some(MatchedFact { + fact, + authoritative_concept_key: fact_authoritative_concept_key(fact, crosswalk), + mapping_method: MappingMethod::DirectSourceConcept, + rank: 1, + }) +} + +fn authoritative_concept_key( + row: &StatementRowOutput, + crosswalk: Option<&CrosswalkFile>, +) -> Option { + crosswalk + .and_then(|crosswalk| crosswalk.mappings.get(&row.qname)) + .map(|mapping| mapping.authoritative_concept_key.clone()) + .or_else(|| { + if !row.is_extension { + Some(row.qname.clone()) + } else { + None + } + }) +} + +fn fact_authoritative_concept_key( + fact: &FactOutput, + crosswalk: Option<&CrosswalkFile>, +) -> Option { + crosswalk + .and_then(|crosswalk| crosswalk.mappings.get(&fact.qname)) + .map(|mapping| mapping.authoritative_concept_key.clone()) + .or_else(|| Some(fact.qname.clone())) +} + +fn pick_best_match<'a>(matches: &'a [MatchedStatementRow<'a>]) -> Option<&'a MatchedStatementRow<'a>> { + matches.iter().min_by(|left, right| { + left.rank + .cmp(&right.rank) + .then_with(|| { + let left_dimension_rank = if left.row.has_dimensions { 1 } else { 0 }; + let right_dimension_rank = if right.row.has_dimensions { 1 } else { 0 }; + left_dimension_rank.cmp(&right_dimension_rank) + }) + .then_with(|| left.row.order.cmp(&right.row.order)) + .then_with(|| { + max_abs_value(&right.row.values) + .partial_cmp(&max_abs_value(&left.row.values)) + .unwrap_or(std::cmp::Ordering::Equal) + }) + .then_with(|| left.row.label.cmp(&right.row.label)) + }) +} + +fn compare_fact_matches(left: &MatchedFact<'_>, right: &MatchedFact<'_>) -> std::cmp::Ordering { + left.rank + .cmp(&right.rank) + .then_with(|| { + let left_dimension_rank = if left.fact.is_dimensionless { 0 } else { 1 }; + let right_dimension_rank = if right.fact.is_dimensionless { 0 } else { 1 }; + left_dimension_rank.cmp(&right_dimension_rank) + }) + .then_with(|| { + right + .fact + .value_num + .abs() + .partial_cmp(&left.fact.value_num.abs()) + .unwrap_or(std::cmp::Ordering::Equal) + }) + .then_with(|| left.fact.local_name.cmp(&right.fact.local_name)) +} + +fn statement_row_source(row: &StatementRowOutput) -> ValueSource { + ValueSource { + values: row.values.clone(), + source_concepts: vec![row.qname.clone()], + source_row_keys: vec![row.key.clone()], + source_fact_ids: row.source_fact_ids.clone(), + has_dimensions: row.has_dimensions, + } +} + +fn surface_source(row: &SurfaceRowOutput) -> ValueSource { + ValueSource { + values: row.values.clone(), + source_concepts: row.source_concepts.clone(), + source_row_keys: row.source_row_keys.clone(), + source_fact_ids: row.source_fact_ids.clone(), + has_dimensions: row.has_dimensions, + } +} + +fn fact_matches_period(fact: &FactOutput, period: &PeriodOutput) -> bool { + if fact.period_end != period.period_end { + return false; + } + + match (&period.period_start, &fact.period_start) { + (Some(period_start), Some(fact_start)) => period_start == fact_start, + (None, None) => true, + _ => false, + } +} + +fn sum_source_values(sources: &[ValueSource], period_id: &str) -> Option { + let values = sources + .iter() + .map(|source| source.values.get(period_id).copied().flatten()) + .collect::>(); + if values.iter().all(|value| value.is_none()) { + return None; + } + + Some(values.into_iter().map(|value| value.unwrap_or(0.0)).sum()) +} + +fn build_detail_row( + row: &StatementRowOutput, + parent_surface_key: &str, + residual_flag: bool, +) -> DetailRowOutput { + DetailRowOutput { + key: row.key.clone(), + parent_surface_key: parent_surface_key.to_string(), + label: row.label.clone(), + concept_key: row.concept_key.clone(), + qname: row.qname.clone(), + namespace_uri: row.namespace_uri.clone(), + local_name: row.local_name.clone(), + unit: row.units.values().find_map(|value| value.clone()), + values: row.values.clone(), + source_fact_ids: row.source_fact_ids.clone(), + is_extension: row.is_extension, + dimensions_summary: if row.has_dimensions { + vec!["has_dimensions".to_string()] + } else { + vec![] + }, + residual_flag, + } +} + +fn upsert_surface_row(rows: &mut Vec, row: SurfaceRowOutput) { + if let Some(index) = rows.iter().position(|existing| existing.key == row.key) { + rows[index] = row; + } else { + rows.push(row); + } +} + +fn merge_detail_bucket(existing: &mut Vec, new_rows: Vec) { + let mut by_key = existing + .drain(..) + .map(|row| (row.key.clone(), row)) + .collect::>(); + + for row in new_rows { + by_key + .entry(row.key.clone()) + .and_modify(|existing| { + existing.values.extend(row.values.clone()); + existing.source_fact_ids = unique_sorted_i64( + existing + .source_fact_ids + .iter() + .copied() + .chain(row.source_fact_ids.iter().copied()) + .collect(), + ); + existing.dimensions_summary = unique_sorted_strings( + existing + .dimensions_summary + .iter() + .cloned() + .chain(row.dimensions_summary.iter().cloned()) + .collect(), + ); + existing.is_extension = existing.is_extension || row.is_extension; + existing.residual_flag = existing.residual_flag || row.residual_flag; + }) + .or_insert(row); + } + + let mut merged = by_key.into_values().collect::>(); + merged.sort_by(|left, right| left.label.cmp(&right.label)); + *existing = merged; +} + +fn recount_normalization_summary(compact_model: &mut CompactSurfaceModel) { + compact_model.normalization_summary.surface_row_count = compact_model + .surface_rows + .values() + .map(|rows| rows.len()) + .sum(); + compact_model.normalization_summary.detail_row_count = compact_model + .detail_rows + .values() + .map(|groups| groups.values().map(|rows| rows.len()).sum::()) + .sum(); +} + +fn candidate_matches(candidate: &str, actual: &str) -> bool { + candidate.eq_ignore_ascii_case(actual) + || candidate + .rsplit_once(':') + .map(|(_, local_name)| local_name.eq_ignore_ascii_case(actual)) + .unwrap_or(false) + || actual + .rsplit_once(':') + .map(|(_, local_name)| local_name.eq_ignore_ascii_case(candidate)) + .unwrap_or(false) +} + +fn has_any_value(values: &BTreeMap>) -> bool { + values.values().any(|value| value.is_some()) +} + +fn max_abs_value(values: &BTreeMap>) -> f64 { + values + .values() + .fold(0.0_f64, |max, value| max.max(value.unwrap_or(0.0).abs())) +} + +fn unique_sorted_strings(values: Vec) -> Vec { + let mut values = values.into_iter().collect::>().into_iter().collect::>(); + values.sort(); + values +} + +fn unique_sorted_i64(values: Vec) -> Vec { + let mut values = values.into_iter().collect::>().into_iter().collect::>(); + values.sort(); + values +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::surface_mapper::CompactSurfaceModel; + use crate::{DetailRowStatementMap, NormalizationSummaryOutput, SurfaceRowMap}; + + fn period(id: &str) -> PeriodOutput { + PeriodOutput { + id: id.to_string(), + filing_id: 1, + accession_number: "0000000000-00-000001".to_string(), + filing_date: "2025-12-31".to_string(), + period_start: Some("2025-01-01".to_string()), + period_end: Some("2025-12-31".to_string()), + filing_type: "10-K".to_string(), + period_label: id.to_string(), + } + } + + fn row(key: &str, qname: &str, value: f64) -> StatementRowOutput { + StatementRowOutput { + key: key.to_string(), + label: key.to_string(), + concept_key: format!("http://fasb.org/us-gaap/2024#{}", qname.split(':').nth(1).unwrap_or(key)), + qname: qname.to_string(), + namespace_uri: "http://fasb.org/us-gaap/2024".to_string(), + local_name: qname.split(':').nth(1).unwrap_or(key).to_string(), + is_extension: false, + statement: "income".to_string(), + role_uri: Some("income".to_string()), + order: 1, + depth: 0, + parent_key: None, + values: BTreeMap::from([("p1".to_string(), Some(value))]), + units: BTreeMap::from([("p1".to_string(), Some("iso4217:USD".to_string()))]), + has_dimensions: false, + source_fact_ids: vec![1], + } + } + + fn empty_model() -> CompactSurfaceModel { + CompactSurfaceModel { + surface_rows: SurfaceRowMap::from([ + ("income".to_string(), Vec::new()), + ("balance".to_string(), Vec::new()), + ("cash_flow".to_string(), Vec::new()), + ("equity".to_string(), Vec::new()), + ("comprehensive_income".to_string(), Vec::new()), + ]), + detail_rows: DetailRowStatementMap::from([ + ("income".to_string(), BTreeMap::new()), + ("balance".to_string(), BTreeMap::new()), + ("cash_flow".to_string(), BTreeMap::new()), + ("equity".to_string(), BTreeMap::new()), + ("comprehensive_income".to_string(), BTreeMap::new()), + ]), + normalization_summary: NormalizationSummaryOutput::default(), + concept_mappings: HashMap::new(), + } + } + + fn empty_rows() -> StatementRowMap { + BTreeMap::from([ + ("income".to_string(), Vec::new()), + ("balance".to_string(), Vec::new()), + ("cash_flow".to_string(), Vec::new()), + ("equity".to_string(), Vec::new()), + ("comprehensive_income".to_string(), Vec::new()), + ]) + } + + fn surface_row(key: &str, value: f64) -> SurfaceRowOutput { + SurfaceRowOutput { + key: key.to_string(), + label: key.to_string(), + category: "surface".to_string(), + template_section: "surface".to_string(), + order: 10, + unit: "currency".to_string(), + values: BTreeMap::from([("p1".to_string(), Some(value))]), + source_concepts: vec![format!("us-gaap:{key}")], + source_row_keys: vec![key.to_string()], + source_fact_ids: vec![1], + formula_key: None, + has_dimensions: false, + resolved_source_row_keys: BTreeMap::from([("p1".to_string(), Some(key.to_string()))]), + statement: Some("income".to_string()), + detail_count: Some(0), + resolution_method: None, + confidence: None, + warning_codes: vec![], + } + } + + fn fact(local_name: &str, value: f64) -> FactOutput { + FactOutput { + concept_key: format!("http://fasb.org/us-gaap/2024#{local_name}"), + qname: format!("us-gaap:{local_name}"), + namespace_uri: "http://fasb.org/us-gaap/2024".to_string(), + local_name: local_name.to_string(), + data_type: Some("monetaryItemType".to_string()), + statement_kind: Some("income".to_string()), + role_uri: None, + authoritative_concept_key: None, + mapping_method: None, + surface_key: None, + detail_parent_surface_key: None, + kpi_key: None, + residual_flag: false, + context_id: "c1".to_string(), + unit: Some("iso4217:USD".to_string()), + decimals: None, + precision: None, + nil: false, + value_num: value, + period_start: Some("2025-01-01".to_string()), + period_end: Some("2025-12-31".to_string()), + period_instant: None, + dimensions: vec![], + is_dimensionless: true, + source_file: None, + } + } + + #[test] + fn derives_gross_profit_from_revenue_minus_cost_of_revenue() { + let mut rows = empty_rows(); + rows.get_mut("income").unwrap().extend([ + row("revenue", "us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax", 100.0), + row("cogs", "us-gaap:CostOfRevenue", 40.0), + ]); + let mut model = empty_model(); + + apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::Core, &mut model) + .expect("universal income rows should build"); + + let gross_profit = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "gross_profit") + .unwrap(); + + assert_eq!(gross_profit.values.get("p1").copied().flatten(), Some(60.0)); + assert_eq!(gross_profit.formula_key.as_deref(), Some("gross_profit")); + } + + #[test] + fn emits_not_meaningful_bank_gross_profit_row() { + let mut rows = empty_rows(); + rows.get_mut("income").unwrap().push(row("net-interest", "us-gaap:NetInterestIncome", 50.0)); + let mut model = empty_model(); + + apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::BankLender, &mut model) + .expect("universal income rows should build"); + + let gross_profit = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "gross_profit") + .unwrap(); + + assert_eq!(gross_profit.values.get("p1").copied().flatten(), None); + assert_eq!(gross_profit.resolution_method.as_deref(), Some("not_meaningful")); + assert!(gross_profit.warning_codes.contains(&"gross_profit_not_meaningful_bank_pack".to_string())); + } + + #[test] + fn derives_bank_revenue_from_net_interest_income_and_noninterest_income() { + let rows = empty_rows(); + let mut model = empty_model(); + model + .surface_rows + .get_mut("income") + .unwrap() + .extend([surface_row("net_interest_income", 60.0), surface_row("noninterest_income", 40.0)]); + + apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::BankLender, &mut model) + .expect("universal income rows should build"); + + let revenue = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "revenue") + .unwrap(); + + assert_eq!(revenue.values.get("p1").copied().flatten(), Some(100.0)); + assert_eq!(revenue.resolution_method.as_deref(), Some("formula_derived")); + } + + #[test] + fn derives_sga_from_sales_and_marketing_plus_general_and_administrative() { + let mut rows = empty_rows(); + rows.get_mut("income").unwrap().extend([ + row("sales-and-marketing", "us-gaap:SalesAndMarketingExpense", 30.0), + row("g-and-a", "us-gaap:GeneralAndAdministrativeExpense", 10.0), + ]); + let mut model = empty_model(); + + apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::Core, &mut model) + .expect("universal income rows should build"); + + let sga = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "selling_general_and_administrative") + .unwrap(); + + assert_eq!(sga.values.get("p1").copied().flatten(), Some(40.0)); + assert_eq!(sga.formula_key.as_deref(), Some("selling_general_and_administrative")); + assert_eq!(sga.resolution_method.as_deref(), Some("formula_derived")); + + let detail_rows = model + .detail_rows + .get("income") + .and_then(|groups| groups.get("selling_general_and_administrative")) + .cloned() + .unwrap_or_default(); + assert_eq!(detail_rows.len(), 2); + } + + #[test] + fn derives_other_operating_expense_from_operating_expenses_minus_sga_and_missing_rnd() { + let rows = empty_rows(); + let mut model = empty_model(); + model + .surface_rows + .get_mut("income") + .unwrap() + .extend([surface_row("operating_expenses", 100.0), surface_row("selling_general_and_administrative", 60.0)]); + + apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::Core, &mut model) + .expect("universal income rows should build"); + + let other = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "other_operating_expense") + .unwrap(); + + assert_eq!(other.values.get("p1").copied().flatten(), Some(40.0)); + assert_eq!(other.formula_key.as_deref(), Some("other_operating_expense")); + assert_eq!(other.resolution_method.as_deref(), Some("formula_derived")); + } + + #[test] + fn derives_insurance_operating_expenses_from_claims_and_underwriting() { + let rows = empty_rows(); + let mut model = empty_model(); + model + .surface_rows + .get_mut("income") + .unwrap() + .extend([surface_row("claims_and_benefits", 80.0), surface_row("underwriting_expenses", 20.0)]); + + apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::Insurance, &mut model) + .expect("universal income rows should build"); + + let operating_expenses = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "operating_expenses") + .unwrap(); + + assert_eq!(operating_expenses.values.get("p1").copied().flatten(), Some(100.0)); + } + + #[test] + fn emits_not_meaningful_bank_expense_breakdown_rows() { + let mut rows = empty_rows(); + rows.get_mut("income").unwrap().push(row("net-interest", "us-gaap:NetInterestIncome", 50.0)); + let mut model = empty_model(); + + apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::BankLender, &mut model) + .expect("universal income rows should build"); + + let sga = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "selling_general_and_administrative") + .unwrap(); + let other = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "other_operating_expense") + .unwrap(); + + assert_eq!(sga.values.get("p1").copied().flatten(), None); + assert_eq!(sga.resolution_method.as_deref(), Some("not_meaningful")); + assert!(sga.warning_codes.contains(&"selling_general_and_administrative_not_meaningful_bank_pack".to_string())); + assert_eq!(other.values.get("p1").copied().flatten(), None); + assert_eq!(other.resolution_method.as_deref(), Some("not_meaningful")); + assert!(other.warning_codes.contains(&"other_operating_expense_not_meaningful_bank_pack".to_string())); + } + + #[test] + fn derives_reit_gross_profit_from_revenue_minus_property_operating_expense() { + let rows = empty_rows(); + let mut model = empty_model(); + model + .surface_rows + .get_mut("income") + .unwrap() + .extend([surface_row("revenue", 75.0), surface_row("property_operating_expense", 15.0)]); + + apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::ReitRealEstate, &mut model) + .expect("universal income rows should build"); + + let gross_profit = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "gross_profit") + .unwrap(); + + assert_eq!(gross_profit.values.get("p1").copied().flatten(), Some(60.0)); + } + + #[test] + fn resolves_broker_revenue_from_direct_total_revenues() { + let rows = empty_rows(); + let facts = vec![fact("TotalRevenues", 90.0)]; + let mut model = empty_model(); + + apply_universal_income_rows( + &[period("p1")], + &rows, + &facts, + "us-gaap", + FiscalPack::BrokerAssetManager, + &mut model, + ) + .expect("universal income rows should build"); + + let revenue = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "revenue") + .unwrap(); + + assert_eq!(revenue.values.get("p1").copied().flatten(), Some(90.0)); + assert_eq!(revenue.resolution_method.as_deref(), Some("direct")); + } + + #[test] + fn derives_income_tax_expense_from_pretax_income_minus_net_income() { + let mut rows = empty_rows(); + rows.get_mut("income").unwrap().push(row("pretax", "us-gaap:IncomeBeforeTaxExpenseBenefit", 100.0)); + let mut model = empty_model(); + model + .surface_rows + .get_mut("income") + .unwrap() + .push(surface_row("net_income", 75.0)); + + apply_universal_income_rows(&[period("p1")], &rows, &[], "us-gaap", FiscalPack::Core, &mut model) + .expect("universal income rows should build"); + + let tax = model + .surface_rows + .get("income") + .unwrap() + .iter() + .find(|row| row.key == "income_tax_expense") + .unwrap(); + + assert_eq!(tax.values.get("p1").copied().flatten(), Some(25.0)); + } +} diff --git a/rust/taxonomy/crosswalk/ifrs.json b/rust/taxonomy/crosswalk/ifrs.json new file mode 100644 index 0000000..38c566d --- /dev/null +++ b/rust/taxonomy/crosswalk/ifrs.json @@ -0,0 +1,18 @@ +{ + "version": "fiscal-v1", + "regime": "ifrs-full", + "mappings": { + "ifrs-full:Revenue": { + "surface_key": "revenue", + "authoritative_concept_key": "ifrs-full:Revenue" + }, + "ifrs-full:ProfitLoss": { + "surface_key": "net_income", + "authoritative_concept_key": "ifrs-full:ProfitLoss" + }, + "ifrs-full:Assets": { + "surface_key": "total_assets", + "authoritative_concept_key": "ifrs-full:Assets" + } + } +} diff --git a/rust/taxonomy/crosswalk/us-gaap.json b/rust/taxonomy/crosswalk/us-gaap.json new file mode 100644 index 0000000..0d8132b --- /dev/null +++ b/rust/taxonomy/crosswalk/us-gaap.json @@ -0,0 +1,338 @@ +{ + "version": "fiscal-v1", + "regime": "us-gaap", + "mappings": { + "us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax": { + "surface_key": "revenue", + "authoritative_concept_key": "us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax" + }, + "us-gaap:SalesRevenueNet": { + "surface_key": "revenue", + "authoritative_concept_key": "us-gaap:SalesRevenueNet" + }, + "us-gaap:Revenues": { + "surface_key": "revenue", + "authoritative_concept_key": "us-gaap:Revenues" + }, + "us-gaap:Revenue": { + "surface_key": "revenue", + "authoritative_concept_key": "us-gaap:Revenue" + }, + "us-gaap:TotalRevenues": { + "surface_key": "revenue", + "authoritative_concept_key": "us-gaap:TotalRevenues" + }, + "us-gaap:TotalNetRevenues": { + "surface_key": "revenue", + "authoritative_concept_key": "us-gaap:TotalNetRevenues" + }, + "us-gaap:GrossProfit": { + "surface_key": "gross_profit", + "authoritative_concept_key": "us-gaap:GrossProfit" + }, + "us-gaap:OperatingIncomeLoss": { + "surface_key": "operating_income", + "authoritative_concept_key": "us-gaap:OperatingIncomeLoss" + }, + "us-gaap:IncomeFromOperations": { + "surface_key": "operating_income", + "authoritative_concept_key": "us-gaap:IncomeFromOperations" + }, + "us-gaap:OperatingProfit": { + "surface_key": "operating_income", + "authoritative_concept_key": "us-gaap:OperatingProfit" + }, + "us-gaap:OperatingExpenses": { + "surface_key": "operating_expenses", + "authoritative_concept_key": "us-gaap:OperatingExpenses" + }, + "us-gaap:SellingGeneralAndAdministrativeExpense": { + "surface_key": "selling_general_and_administrative", + "authoritative_concept_key": "us-gaap:SellingGeneralAndAdministrativeExpense" + }, + "us-gaap:SellingGeneralAndAdministrativeExpenseExcludingEmployeeStockOptionPlanSpecialDividendCompensation": { + "surface_key": "selling_general_and_administrative", + "authoritative_concept_key": "us-gaap:SellingGeneralAndAdministrativeExpenseExcludingEmployeeStockOptionPlanSpecialDividendCompensation" + }, + "us-gaap:ResearchAndDevelopmentExpense": { + "surface_key": "research_and_development", + "authoritative_concept_key": "us-gaap:ResearchAndDevelopmentExpense" + }, + "us-gaap:OtherThanInterestExpense": { + "surface_key": "other_operating_expense", + "authoritative_concept_key": "us-gaap:OtherThanInterestExpense" + }, + "us-gaap:OtherOperatingExpenses": { + "surface_key": "other_operating_expense", + "authoritative_concept_key": "us-gaap:OtherOperatingExpenses" + }, + "us-gaap:OtherCostAndExpenseOperating": { + "surface_key": "other_operating_expense", + "authoritative_concept_key": "us-gaap:OtherCostAndExpenseOperating" + }, + "us-gaap:OtherOperatingExpense": { + "surface_key": "other_operating_expense", + "authoritative_concept_key": "us-gaap:OtherOperatingExpense" + }, + "us-gaap:IncomeTaxExpenseBenefit": { + "surface_key": "income_tax_expense", + "authoritative_concept_key": "us-gaap:IncomeTaxExpenseBenefit" + }, + "us-gaap:IncomeBeforeTaxExpenseBenefit": { + "surface_key": "pretax_income", + "authoritative_concept_key": "us-gaap:IncomeBeforeTaxExpenseBenefit" + }, + "us-gaap:PretaxIncome": { + "surface_key": "pretax_income", + "authoritative_concept_key": "us-gaap:PretaxIncome" + }, + "us-gaap:IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest": { + "surface_key": "pretax_income", + "authoritative_concept_key": "us-gaap:IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest" + }, + "us-gaap:IncomeBeforeEquityMethodInvestmentsIncomeTaxesExtraordinaryItemsNoncontrollingInterest": { + "surface_key": "pretax_income", + "authoritative_concept_key": "us-gaap:IncomeBeforeEquityMethodInvestmentsIncomeTaxesExtraordinaryItemsNoncontrollingInterest" + }, + "us-gaap:IncomeFromContinuingOperationsBeforeIncomeTaxesMinorityInterestAndIncomeLossFromEquityMethodInvestments": { + "surface_key": "pretax_income", + "authoritative_concept_key": "us-gaap:IncomeFromContinuingOperationsBeforeIncomeTaxesMinorityInterestAndIncomeLossFromEquityMethodInvestments" + }, + "us-gaap:CostOfRevenue": { + "surface_key": "cost_of_revenue", + "authoritative_concept_key": "us-gaap:CostOfRevenue" + }, + "us-gaap:CostOfGoodsSold": { + "surface_key": "cost_of_revenue", + "authoritative_concept_key": "us-gaap:CostOfGoodsSold" + }, + "us-gaap:CostOfSales": { + "surface_key": "cost_of_revenue", + "authoritative_concept_key": "us-gaap:CostOfSales" + }, + "us-gaap:CostOfGoodsAndServicesSold": { + "surface_key": "cost_of_revenue", + "authoritative_concept_key": "us-gaap:CostOfGoodsAndServicesSold" + }, + "us-gaap:CostOfGoodsAndServiceExcludingDepreciationDepletionAndAmortization": { + "surface_key": "cost_of_revenue", + "authoritative_concept_key": "us-gaap:CostOfGoodsAndServiceExcludingDepreciationDepletionAndAmortization" + }, + "us-gaap:CostOfProductsSold": { + "surface_key": "cost_of_revenue", + "authoritative_concept_key": "us-gaap:CostOfProductsSold" + }, + "us-gaap:CostOfServices": { + "surface_key": "cost_of_revenue", + "authoritative_concept_key": "us-gaap:CostOfServices" + }, + "us-gaap:NetIncomeLoss": { + "surface_key": "net_income", + "authoritative_concept_key": "us-gaap:NetIncomeLoss" + }, + "us-gaap:ProfitLoss": { + "surface_key": "net_income", + "authoritative_concept_key": "us-gaap:ProfitLoss" + }, + "us-gaap:Assets": { + "surface_key": "total_assets", + "authoritative_concept_key": "us-gaap:Assets" + }, + "us-gaap:InterestAndDividendIncomeOperating": { + "surface_key": "interest_income", + "authoritative_concept_key": "us-gaap:InterestAndDividendIncomeOperating" + }, + "us-gaap:InterestIncomeOperating": { + "surface_key": "interest_income", + "authoritative_concept_key": "us-gaap:InterestIncomeOperating" + }, + "us-gaap:InterestIncomeExpenseOperatingGross": { + "surface_key": "interest_income", + "authoritative_concept_key": "us-gaap:InterestIncomeExpenseOperatingGross" + }, + "us-gaap:InterestExpense": { + "surface_key": "interest_expense", + "authoritative_concept_key": "us-gaap:InterestExpense" + }, + "us-gaap:InterestIncomeExpenseOperatingNet": { + "surface_key": "net_interest_income", + "authoritative_concept_key": "us-gaap:InterestIncomeExpenseOperatingNet" + }, + "us-gaap:NetInterestIncome": { + "surface_key": "net_interest_income", + "authoritative_concept_key": "us-gaap:NetInterestIncome" + }, + "us-gaap:InterestIncomeExpenseNet": { + "surface_key": "net_interest_income", + "authoritative_concept_key": "us-gaap:InterestIncomeExpenseNet" + }, + "us-gaap:ProvisionForCreditLosses": { + "surface_key": "provision_for_credit_losses", + "authoritative_concept_key": "us-gaap:ProvisionForCreditLosses" + }, + "us-gaap:ProvisionForLoanLeaseAndOtherLosses": { + "surface_key": "provision_for_credit_losses", + "authoritative_concept_key": "us-gaap:ProvisionForLoanLeaseAndOtherLosses" + }, + "us-gaap:NoninterestIncome": { + "surface_key": "noninterest_income", + "authoritative_concept_key": "us-gaap:NoninterestIncome" + }, + "us-gaap:NoninterestExpense": { + "surface_key": "noninterest_expense", + "authoritative_concept_key": "us-gaap:NoninterestExpense" + }, + "us-gaap:FinancingReceivableRecordedInvestment": { + "surface_key": "loans", + "authoritative_concept_key": "us-gaap:FinancingReceivableRecordedInvestment" + }, + "us-gaap:LoansReceivableNetReportedAmount": { + "surface_key": "loans", + "authoritative_concept_key": "us-gaap:LoansReceivableNetReportedAmount" + }, + "us-gaap:FinancingReceivableExcludingAccruedInterestBeforeAllowanceForCreditLoss": { + "surface_key": "loans", + "authoritative_concept_key": "us-gaap:FinancingReceivableExcludingAccruedInterestBeforeAllowanceForCreditLoss" + }, + "us-gaap:FinancingReceivableExcludingAccruedInterestAfterAllowanceForCreditLoss": { + "surface_key": "loans", + "authoritative_concept_key": "us-gaap:FinancingReceivableExcludingAccruedInterestAfterAllowanceForCreditLoss" + }, + "us-gaap:AllowanceForCreditLosses": { + "surface_key": "allowance_for_credit_losses", + "authoritative_concept_key": "us-gaap:AllowanceForCreditLosses" + }, + "us-gaap:AllowanceForLoanLosses": { + "surface_key": "allowance_for_credit_losses", + "authoritative_concept_key": "us-gaap:AllowanceForLoanLosses" + }, + "us-gaap:DepositsLiabilities": { + "surface_key": "deposits", + "authoritative_concept_key": "us-gaap:DepositsLiabilities" + }, + "us-gaap:Deposits": { + "surface_key": "deposits", + "authoritative_concept_key": "us-gaap:Deposits" + }, + "us-gaap:Liabilities": { + "surface_key": "total_liabilities", + "authoritative_concept_key": "us-gaap:Liabilities" + }, + "us-gaap:StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest": { + "surface_key": "total_equity", + "authoritative_concept_key": "us-gaap:StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest" + }, + "us-gaap:StockholdersEquity": { + "surface_key": "total_equity", + "authoritative_concept_key": "us-gaap:StockholdersEquity" + }, + "us-gaap:Premiums": { + "surface_key": "premiums", + "authoritative_concept_key": "us-gaap:Premiums" + }, + "us-gaap:PremiumsEarned": { + "surface_key": "premiums", + "authoritative_concept_key": "us-gaap:PremiumsEarned" + }, + "us-gaap:PremiumsEarnedNet": { + "surface_key": "premiums", + "authoritative_concept_key": "us-gaap:PremiumsEarnedNet" + }, + "us-gaap:PremiumsWrittenNet": { + "surface_key": "premiums", + "authoritative_concept_key": "us-gaap:PremiumsWrittenNet" + }, + "us-gaap:SupplementaryInsuranceInformationPremiumRevenue": { + "surface_key": "premiums", + "authoritative_concept_key": "us-gaap:SupplementaryInsuranceInformationPremiumRevenue" + }, + "us-gaap:PolicyholderBenefitsAndClaimsIncurredNet": { + "surface_key": "claims_and_benefits", + "authoritative_concept_key": "us-gaap:PolicyholderBenefitsAndClaimsIncurredNet" + }, + "us-gaap:BenefitsLossesAndExpenses": { + "surface_key": "claims_and_benefits", + "authoritative_concept_key": "us-gaap:BenefitsLossesAndExpenses" + }, + "us-gaap:SupplementaryInsuranceInformationBenefitsClaimsLossesAndSettlementExpense": { + "surface_key": "claims_and_benefits", + "authoritative_concept_key": "us-gaap:SupplementaryInsuranceInformationBenefitsClaimsLossesAndSettlementExpense" + }, + "us-gaap:UnderwritingExpenses": { + "surface_key": "underwriting_expenses", + "authoritative_concept_key": "us-gaap:UnderwritingExpenses" + }, + "us-gaap:PolicyAcquisitionExpense": { + "surface_key": "underwriting_expenses", + "authoritative_concept_key": "us-gaap:PolicyAcquisitionExpense" + }, + "us-gaap:DeferredPolicyAcquisitionCostAmortizationExpense": { + "surface_key": "underwriting_expenses", + "authoritative_concept_key": "us-gaap:DeferredPolicyAcquisitionCostAmortizationExpense" + }, + "us-gaap:InvestmentIncome": { + "surface_key": "investment_income", + "authoritative_concept_key": "us-gaap:InvestmentIncome" + }, + "us-gaap:NetInvestmentIncome": { + "surface_key": "investment_income", + "authoritative_concept_key": "us-gaap:NetInvestmentIncome" + }, + "us-gaap:SupplementaryInsuranceInformationNetInvestmentIncome": { + "surface_key": "investment_income", + "authoritative_concept_key": "us-gaap:SupplementaryInsuranceInformationNetInvestmentIncome" + }, + "us-gaap:FuturePolicyBenefits": { + "surface_key": "policy_liabilities", + "authoritative_concept_key": "us-gaap:FuturePolicyBenefits" + }, + "us-gaap:LiabilityForFuturePolicyBenefits": { + "surface_key": "policy_liabilities", + "authoritative_concept_key": "us-gaap:LiabilityForFuturePolicyBenefits" + }, + "us-gaap:LiabilityForUnpaidLossesAndLossAdjustmentExpenses": { + "surface_key": "policy_liabilities", + "authoritative_concept_key": "us-gaap:LiabilityForUnpaidLossesAndLossAdjustmentExpenses" + }, + "us-gaap:LiabilityForUnpaidClaimsAndClaimsAdjustmentExpense": { + "surface_key": "policy_liabilities", + "authoritative_concept_key": "us-gaap:LiabilityForUnpaidClaimsAndClaimsAdjustmentExpense" + }, + "us-gaap:DeferredPolicyAcquisitionCosts": { + "surface_key": "deferred_acquisition_costs", + "authoritative_concept_key": "us-gaap:DeferredPolicyAcquisitionCosts" + }, + "us-gaap:DeferredAcquisitionCosts": { + "surface_key": "deferred_acquisition_costs", + "authoritative_concept_key": "us-gaap:DeferredAcquisitionCosts" + }, + "us-gaap:LeaseIncome": { + "surface_key": "rental_revenue", + "authoritative_concept_key": "us-gaap:LeaseIncome" + }, + "us-gaap:DirectCostsOfLeasedAndRentedPropertyOrEquipment": { + "surface_key": "property_operating_expense", + "authoritative_concept_key": "us-gaap:DirectCostsOfLeasedAndRentedPropertyOrEquipment" + }, + "us-gaap:DepreciationDepletionAndAmortization": { + "surface_key": "depreciation_and_amortization", + "authoritative_concept_key": "us-gaap:DepreciationDepletionAndAmortization" + }, + "us-gaap:RealEstateInvestmentPropertyNet": { + "surface_key": "investment_property", + "authoritative_concept_key": "us-gaap:RealEstateInvestmentPropertyNet" + }, + "us-gaap:RealEstateInvestmentPropertyAtCost": { + "surface_key": "investment_property", + "authoritative_concept_key": "us-gaap:RealEstateInvestmentPropertyAtCost" + }, + "us-gaap:PerformanceFeeRevenueRecognized": { + "surface_key": "fee_revenue", + "authoritative_concept_key": "us-gaap:PerformanceFeeRevenueRecognized" + }, + "us-gaap:TotalSalesAssetAndAccountExpense": { + "surface_key": "distribution_and_servicing_expense", + "authoritative_concept_key": "us-gaap:TotalSalesAssetAndAccountExpense" + } + } +} diff --git a/rust/taxonomy/fiscal/v1/bank_lender.income-bridge.json b/rust/taxonomy/fiscal/v1/bank_lender.income-bridge.json new file mode 100644 index 0000000..b909277 --- /dev/null +++ b/rust/taxonomy/fiscal/v1/bank_lender.income-bridge.json @@ -0,0 +1,213 @@ +{ + "version": "fiscal-v1", + "pack": "bank_lender", + "rows": { + "revenue": { + "direct_authoritative_concepts": [ + "us-gaap:TotalRevenues", + "us-gaap:TotalNetRevenues" + ], + "direct_source_concepts": [ + "TotalRevenues", + "TotalNetRevenues" + ], + "component_surfaces": { + "positive": [ + "net_interest_income", + "noninterest_income" + ], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "pack_bridge_sum", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "revenue_formula_derived" + ] + }, + "gross_profit": { + "direct_authoritative_concepts": [], + "direct_source_concepts": [], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "not_meaningful", + "not_meaningful_for_pack": true, + "warning_codes_when_used": [ + "gross_profit_not_meaningful_bank_pack" + ] + }, + "operating_expenses": { + "direct_authoritative_concepts": [ + "us-gaap:OperatingExpenses" + ], + "direct_source_concepts": [ + "OperatingExpenses" + ], + "component_surfaces": { + "positive": [ + "provision_for_credit_losses", + "noninterest_expense" + ], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "pack_bridge_sum", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "operating_expenses_includes_credit_provision" + ] + }, + "selling_general_and_administrative": { + "direct_authoritative_concepts": [], + "direct_source_concepts": [], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "not_meaningful", + "not_meaningful_for_pack": true, + "warning_codes_when_used": [ + "selling_general_and_administrative_not_meaningful_bank_pack" + ] + }, + "research_and_development": { + "direct_authoritative_concepts": [], + "direct_source_concepts": [], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "not_meaningful", + "not_meaningful_for_pack": true, + "warning_codes_when_used": [ + "research_and_development_not_meaningful_bank_pack" + ] + }, + "other_operating_expense": { + "direct_authoritative_concepts": [], + "direct_source_concepts": [], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "not_meaningful", + "not_meaningful_for_pack": true, + "warning_codes_when_used": [ + "other_operating_expense_not_meaningful_bank_pack" + ] + }, + "operating_income": { + "direct_authoritative_concepts": [ + "us-gaap:OperatingIncomeLoss", + "us-gaap:IncomeBeforeTaxExpenseBenefit" + ], + "direct_source_concepts": [ + "OperatingIncomeLoss", + "IncomeBeforeTaxExpenseBenefit" + ], + "component_surfaces": { + "positive": [ + "revenue" + ], + "negative": [ + "operating_expenses" + ] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "operating_income_formula_derived" + ] + }, + "income_tax_expense": { + "direct_authoritative_concepts": [ + "us-gaap:IncomeTaxExpenseBenefit" + ], + "direct_source_concepts": [ + "IncomeTaxExpenseBenefit" + ], + "component_surfaces": { + "positive": [], + "negative": [ + "net_income" + ] + }, + "component_concept_groups": { + "positive": [ + { + "name": "pretax_income", + "concepts": [ + "us-gaap:IncomeBeforeTaxExpenseBenefit", + "us-gaap:PretaxIncome" + ] + } + ], + "negative": [] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "income_tax_expense_formula_derived" + ] + }, + "net_income": { + "direct_authoritative_concepts": [ + "us-gaap:NetIncomeLoss" + ], + "direct_source_concepts": [ + "NetIncomeLoss" + ], + "component_surfaces": { + "positive": [], + "negative": [ + "income_tax_expense" + ] + }, + "component_concept_groups": { + "positive": [ + { + "name": "pretax_income", + "concepts": [ + "us-gaap:IncomeBeforeTaxExpenseBenefit", + "us-gaap:PretaxIncome" + ] + } + ], + "negative": [] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "net_income_formula_derived" + ] + } + } +} diff --git a/rust/taxonomy/fiscal/v1/bank_lender.surface.json b/rust/taxonomy/fiscal/v1/bank_lender.surface.json new file mode 100644 index 0000000..03fee56 --- /dev/null +++ b/rust/taxonomy/fiscal/v1/bank_lender.surface.json @@ -0,0 +1,257 @@ +{ + "version": "fiscal-v1", + "pack": "bank_lender", + "surfaces": [ + { + "surface_key": "interest_income", + "statement": "income", + "label": "Interest Income", + "category": "surface", + "order": 10, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": [ + "us-gaap:InterestAndDividendIncomeOperating", + "us-gaap:InterestIncomeOperating", + "us-gaap:InterestIncomeExpenseOperatingGross", + "us-gaap:InterestIncomeOther" + ], + "allowed_authoritative_concepts": [ + "us-gaap:InterestAndDividendIncomeOperating", + "us-gaap:InterestIncomeOperating", + "us-gaap:InterestIncomeExpenseOperatingGross" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "income_default" + }, + { + "surface_key": "interest_expense", + "statement": "income", + "label": "Interest Expense", + "category": "surface", + "order": 20, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:InterestExpense", + "us-gaap:InterestExpenseDeposits", + "us-gaap:InterestExpenseBorrowings", + "us-gaap:InterestExpenseOther" + ], + "allowed_authoritative_concepts": [ + "us-gaap:InterestExpense" + ], + "formula_fallback": "sum(detail_rows)", + "detail_grouping_policy": "group_all_children", + "materiality_policy": "income_default" + }, + { + "surface_key": "net_interest_income", + "statement": "income", + "label": "Net Interest Income", + "category": "surface", + "order": 30, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:InterestAndDividendIncomeOperating", + "us-gaap:InterestIncomeOperating", + "us-gaap:InterestIncomeExpenseOperatingGross", + "us-gaap:InterestExpense", + "us-gaap:InterestExpenseDeposits", + "us-gaap:InterestExpenseBorrowings" + ], + "allowed_authoritative_concepts": [ + "us-gaap:InterestIncomeExpenseOperatingNet", + "us-gaap:NetInterestIncome", + "us-gaap:InterestIncomeExpenseNet" + ], + "formula_fallback": "sum(detail_rows)", + "detail_grouping_policy": "group_all_children", + "materiality_policy": "income_default" + }, + { + "surface_key": "provision_for_credit_losses", + "statement": "income", + "label": "Provision for Credit Losses", + "category": "surface", + "order": 40, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:ProvisionForCreditLosses", + "us-gaap:ProvisionForLoanLeaseAndOtherLosses", + "us-gaap:CreditLossExpense" + ], + "allowed_authoritative_concepts": [ + "us-gaap:ProvisionForCreditLosses", + "us-gaap:ProvisionForLoanLeaseAndOtherLosses" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "income_default" + }, + { + "surface_key": "noninterest_income", + "statement": "income", + "label": "Noninterest Income", + "category": "surface", + "order": 50, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:NoninterestIncome", + "us-gaap:FeeRevenueAndOtherIncome", + "us-gaap:ServiceChargesOnDepositAccounts", + "us-gaap:CardRevenue" + ], + "allowed_authoritative_concepts": [ + "us-gaap:NoninterestIncome" + ], + "formula_fallback": "sum(detail_rows)", + "detail_grouping_policy": "group_all_children", + "materiality_policy": "income_default" + }, + { + "surface_key": "noninterest_expense", + "statement": "income", + "label": "Noninterest Expense", + "category": "surface", + "order": 60, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:NoninterestExpense", + "us-gaap:CompensationAndBenefits", + "us-gaap:OccupancyNet", + "us-gaap:DataProcessingExpense" + ], + "allowed_authoritative_concepts": [ + "us-gaap:NoninterestExpense" + ], + "formula_fallback": "sum(detail_rows)", + "detail_grouping_policy": "group_all_children", + "materiality_policy": "income_default" + }, + { + "surface_key": "net_income", + "statement": "income", + "label": "Net Income", + "category": "surface", + "order": 90, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": [ + "us-gaap:NetIncomeLoss" + ], + "allowed_authoritative_concepts": [ + "us-gaap:NetIncomeLoss" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "income_default" + }, + { + "surface_key": "loans", + "statement": "balance", + "label": "Loans", + "category": "surface", + "order": 30, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:FinancingReceivableRecordedInvestment", + "us-gaap:LoansReceivableNetReportedAmount", + "us-gaap:FinancingReceivableExcludingAccruedInterestBeforeAllowanceForCreditLoss", + "us-gaap:FinancingReceivableExcludingAccruedInterestAfterAllowanceForCreditLoss", + "us-gaap:FinanceReceivableAllowanceForCreditLossesExcluded" + ], + "allowed_authoritative_concepts": [ + "us-gaap:FinancingReceivableRecordedInvestment", + "us-gaap:LoansReceivableNetReportedAmount", + "us-gaap:FinancingReceivableExcludingAccruedInterestBeforeAllowanceForCreditLoss", + "us-gaap:FinancingReceivableExcludingAccruedInterestAfterAllowanceForCreditLoss" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "balance_default" + }, + { + "surface_key": "allowance_for_credit_losses", + "statement": "balance", + "label": "Allowance for Credit Losses", + "category": "surface", + "order": 40, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:AllowanceForCreditLosses", + "us-gaap:AllowanceForLoanLosses" + ], + "allowed_authoritative_concepts": [ + "us-gaap:AllowanceForCreditLosses", + "us-gaap:AllowanceForLoanLosses" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "balance_default" + }, + { + "surface_key": "deposits", + "statement": "balance", + "label": "Deposits", + "category": "surface", + "order": 80, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": ["us-gaap:DepositsLiabilities", "us-gaap:Deposits"], + "allowed_authoritative_concepts": ["us-gaap:DepositsLiabilities", "us-gaap:Deposits"], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_assets", + "statement": "balance", + "label": "Total Assets", + "category": "surface", + "order": 90, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": ["us-gaap:Assets"], + "allowed_authoritative_concepts": ["us-gaap:Assets"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_liabilities", + "statement": "balance", + "label": "Total Liabilities", + "category": "surface", + "order": 100, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": ["us-gaap:Liabilities"], + "allowed_authoritative_concepts": ["us-gaap:Liabilities"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_equity", + "statement": "balance", + "label": "Total Equity", + "category": "surface", + "order": 110, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": ["us-gaap:StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest", "us-gaap:StockholdersEquity"], + "allowed_authoritative_concepts": ["us-gaap:StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest", "us-gaap:StockholdersEquity"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + } + ] +} diff --git a/rust/taxonomy/fiscal/v1/broker_asset_manager.income-bridge.json b/rust/taxonomy/fiscal/v1/broker_asset_manager.income-bridge.json new file mode 100644 index 0000000..81c4d41 --- /dev/null +++ b/rust/taxonomy/fiscal/v1/broker_asset_manager.income-bridge.json @@ -0,0 +1,264 @@ +{ + "version": "fiscal-v1", + "pack": "broker_asset_manager", + "rows": { + "revenue": { + "direct_authoritative_concepts": [ + "us-gaap:TotalRevenues", + "us-gaap:Revenue", + "us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax" + ], + "direct_source_concepts": [ + "TotalRevenues", + "Revenue", + "RevenueFromContractWithCustomerExcludingAssessedTax" + ], + "component_surfaces": { + "positive": [ + "fee_revenue" + ], + "negative": [] + }, + "component_concept_groups": { + "positive": [ + { + "name": "other_operating_revenue", + "concepts": [ + "us-gaap:OtherRevenue", + "us-gaap:OtherOperatingIncome" + ] + } + ], + "negative": [] + }, + "formula": "pack_bridge_sum", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "revenue_formula_derived" + ] + }, + "gross_profit": { + "direct_authoritative_concepts": [], + "direct_source_concepts": [], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "not_meaningful", + "not_meaningful_for_pack": true, + "warning_codes_when_used": [ + "gross_profit_not_meaningful_broker_pack" + ] + }, + "operating_expenses": { + "direct_authoritative_concepts": [ + "us-gaap:OperatingExpenses" + ], + "direct_source_concepts": [ + "OperatingExpenses" + ], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [ + { + "name": "broker_operating_expense_components", + "concepts": [ + "us-gaap:CompensationAndBenefits", + "us-gaap:GeneralAndAdministrativeExpense", + "us-gaap:MarketingExpense", + "us-gaap:OccupancyNet", + "us-gaap:OtherThanInterestExpense" + ] + } + ], + "negative": [] + }, + "formula": "sum", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "operating_expenses_formula_derived" + ] + }, + "selling_general_and_administrative": { + "direct_authoritative_concepts": [ + "us-gaap:SellingGeneralAndAdministrativeExpense" + ], + "direct_source_concepts": [ + "SellingGeneralAndAdministrativeExpense" + ], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [ + { + "name": "sales_and_marketing", + "concepts": [ + "us-gaap:SalesAndMarketingExpense", + "us-gaap:SellingAndMarketingExpense", + "us-gaap:MarketingExpense" + ] + }, + { + "name": "general_and_administrative", + "concepts": [ + "us-gaap:GeneralAndAdministrativeExpense" + ] + } + ], + "negative": [] + }, + "formula": "sum", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "selling_general_and_administrative_formula_derived" + ] + }, + "research_and_development": { + "direct_authoritative_concepts": [], + "direct_source_concepts": [], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "not_meaningful", + "not_meaningful_for_pack": true, + "warning_codes_when_used": [ + "research_and_development_not_meaningful_broker_pack" + ] + }, + "other_operating_expense": { + "direct_authoritative_concepts": [ + "us-gaap:OtherThanInterestExpense", + "us-gaap:OtherOperatingExpenses", + "us-gaap:OtherCostAndExpenseOperating", + "us-gaap:OtherOperatingExpense" + ], + "direct_source_concepts": [ + "OtherThanInterestExpense", + "OtherOperatingExpenses", + "OtherCostAndExpenseOperating", + "OtherOperatingExpense" + ], + "component_surfaces": { + "positive": [ + "operating_expenses" + ], + "negative": [ + "selling_general_and_administrative", + "research_and_development" + ] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "sum_then_subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "other_operating_expense_formula_derived" + ] + }, + "operating_income": { + "direct_authoritative_concepts": [ + "us-gaap:OperatingIncomeLoss", + "us-gaap:IncomeFromOperations" + ], + "direct_source_concepts": [ + "OperatingIncomeLoss", + "IncomeFromOperations" + ], + "component_surfaces": { + "positive": [ + "gross_profit" + ], + "negative": [ + "operating_expenses" + ] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "operating_income_formula_derived" + ] + }, + "income_tax_expense": { + "direct_authoritative_concepts": [ + "us-gaap:IncomeTaxExpenseBenefit" + ], + "direct_source_concepts": [ + "IncomeTaxExpenseBenefit" + ], + "component_surfaces": { + "positive": [], + "negative": [ + "net_income" + ] + }, + "component_concept_groups": { + "positive": [ + { + "name": "pretax_income", + "concepts": [ + "us-gaap:IncomeBeforeTaxExpenseBenefit", + "us-gaap:PretaxIncome" + ] + } + ], + "negative": [] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "income_tax_expense_formula_derived" + ] + }, + "net_income": { + "direct_authoritative_concepts": [ + "us-gaap:NetIncomeLoss" + ], + "direct_source_concepts": [ + "NetIncomeLoss" + ], + "component_surfaces": { + "positive": [], + "negative": [ + "income_tax_expense" + ] + }, + "component_concept_groups": { + "positive": [ + { + "name": "pretax_income", + "concepts": [ + "us-gaap:IncomeBeforeTaxExpenseBenefit", + "us-gaap:PretaxIncome" + ] + } + ], + "negative": [] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "net_income_formula_derived" + ] + } + } +} diff --git a/rust/taxonomy/fiscal/v1/broker_asset_manager.surface.json b/rust/taxonomy/fiscal/v1/broker_asset_manager.surface.json new file mode 100644 index 0000000..b51ba94 --- /dev/null +++ b/rust/taxonomy/fiscal/v1/broker_asset_manager.surface.json @@ -0,0 +1,105 @@ +{ + "version": "fiscal-v1", + "pack": "broker_asset_manager", + "surfaces": [ + { + "surface_key": "fee_revenue", + "statement": "income", + "label": "Fee Revenue", + "category": "surface", + "order": 10, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax", + "us-gaap:PerformanceFeeRevenueRecognized", + "us-gaap:SponsorFees", + "us-gaap:SubAdvisoryAndOther" + ], + "allowed_authoritative_concepts": [ + "us-gaap:PerformanceFeeRevenueRecognized" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "income_default" + }, + { + "surface_key": "distribution_and_servicing_expense", + "statement": "income", + "label": "Distribution and Servicing Expense", + "category": "surface", + "order": 20, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:TotalSalesAssetAndAccountExpense" + ], + "allowed_authoritative_concepts": [ + "us-gaap:TotalSalesAssetAndAccountExpense" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "income_default" + }, + { + "surface_key": "net_income", + "statement": "income", + "label": "Net Income", + "category": "surface", + "order": 90, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": [ + "us-gaap:NetIncomeLoss" + ], + "allowed_authoritative_concepts": [ + "us-gaap:NetIncomeLoss" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "income_default" + }, + { + "surface_key": "total_assets", + "statement": "balance", + "label": "Total Assets", + "category": "surface", + "order": 90, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": ["us-gaap:Assets"], + "allowed_authoritative_concepts": ["us-gaap:Assets"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_liabilities", + "statement": "balance", + "label": "Total Liabilities", + "category": "surface", + "order": 100, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": ["us-gaap:Liabilities"], + "allowed_authoritative_concepts": ["us-gaap:Liabilities"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_equity", + "statement": "balance", + "label": "Total Equity", + "category": "surface", + "order": 110, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": ["us-gaap:StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest", "us-gaap:StockholdersEquity"], + "allowed_authoritative_concepts": ["us-gaap:StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest", "us-gaap:StockholdersEquity"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + } + ] +} diff --git a/rust/taxonomy/fiscal/v1/core.income-bridge.json b/rust/taxonomy/fiscal/v1/core.income-bridge.json new file mode 100644 index 0000000..bbe5d04 --- /dev/null +++ b/rust/taxonomy/fiscal/v1/core.income-bridge.json @@ -0,0 +1,292 @@ +{ + "version": "fiscal-v1", + "pack": "core", + "rows": { + "revenue": { + "direct_authoritative_concepts": [ + "us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax", + "us-gaap:SalesRevenueNet", + "us-gaap:Revenues", + "us-gaap:Revenue" + ], + "direct_source_concepts": [ + "RevenueFromContractWithCustomerExcludingAssessedTax", + "SalesRevenueNet", + "Revenues", + "Revenue" + ], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "direct", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [] + }, + "gross_profit": { + "direct_authoritative_concepts": [ + "us-gaap:GrossProfit" + ], + "direct_source_concepts": [ + "GrossProfit" + ], + "component_surfaces": { + "positive": [ + "revenue" + ], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [ + { + "name": "cost_of_revenue", + "concepts": [ + "us-gaap:CostOfRevenue", + "us-gaap:CostOfGoodsSold", + "us-gaap:CostOfSales", + "us-gaap:CostOfGoodsAndServicesSold", + "us-gaap:CostOfGoodsAndServiceExcludingDepreciationDepletionAndAmortization", + "us-gaap:CostOfProductsSold", + "us-gaap:CostOfServices" + ] + } + ] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "gross_profit_formula_derived" + ] + }, + "operating_expenses": { + "direct_authoritative_concepts": [ + "us-gaap:OperatingExpenses" + ], + "direct_source_concepts": [ + "OperatingExpenses" + ], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [ + { + "name": "operating_expense_components", + "concepts": [ + "us-gaap:SellingGeneralAndAdministrativeExpense", + "us-gaap:ResearchAndDevelopmentExpense", + "us-gaap:MarketingExpense", + "us-gaap:GeneralAndAdministrativeExpense", + "us-gaap:LaborAndRelatedExpense", + "us-gaap:OccupancyNet", + "us-gaap:DataProcessingExpense", + "us-gaap:RestructuringCharges", + "us-gaap:StockBasedCompensation" + ] + } + ], + "negative": [] + }, + "formula": "sum", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "operating_expenses_formula_derived" + ] + }, + "selling_general_and_administrative": { + "direct_authoritative_concepts": [ + "us-gaap:SellingGeneralAndAdministrativeExpense", + "us-gaap:SellingGeneralAndAdministrativeExpenseExcludingEmployeeStockOptionPlanSpecialDividendCompensation" + ], + "direct_source_concepts": [ + "SellingGeneralAndAdministrativeExpense", + "SellingGeneralAndAdministrativeExpenseExcludingEmployeeStockOptionPlanSpecialDividendCompensation" + ], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [ + { + "name": "sales_and_marketing", + "concepts": [ + "us-gaap:SalesAndMarketingExpense", + "us-gaap:SellingAndMarketingExpense", + "us-gaap:MarketingExpense", + "us-gaap:AdvertisingExpense" + ] + }, + { + "name": "general_and_administrative", + "concepts": [ + "us-gaap:GeneralAndAdministrativeExpense" + ] + } + ], + "negative": [] + }, + "formula": "sum", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "selling_general_and_administrative_formula_derived" + ] + }, + "research_and_development": { + "direct_authoritative_concepts": [ + "us-gaap:ResearchAndDevelopmentExpense" + ], + "direct_source_concepts": [ + "ResearchAndDevelopmentExpense" + ], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "direct", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [] + }, + "other_operating_expense": { + "direct_authoritative_concepts": [ + "us-gaap:OtherThanInterestExpense", + "us-gaap:OtherOperatingExpenses", + "us-gaap:OtherCostAndExpenseOperating", + "us-gaap:OtherOperatingExpense" + ], + "direct_source_concepts": [ + "OtherThanInterestExpense", + "OtherOperatingExpenses", + "OtherCostAndExpenseOperating", + "OtherOperatingExpense" + ], + "component_surfaces": { + "positive": [ + "operating_expenses" + ], + "negative": [ + "selling_general_and_administrative", + "research_and_development" + ] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "sum_then_subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "other_operating_expense_formula_derived" + ] + }, + "operating_income": { + "direct_authoritative_concepts": [ + "us-gaap:OperatingIncomeLoss", + "us-gaap:IncomeFromOperations", + "us-gaap:OperatingProfit" + ], + "direct_source_concepts": [ + "OperatingIncomeLoss", + "IncomeFromOperations", + "OperatingProfit" + ], + "component_surfaces": { + "positive": [ + "gross_profit" + ], + "negative": [ + "operating_expenses" + ] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "operating_income_formula_derived" + ] + }, + "income_tax_expense": { + "direct_authoritative_concepts": [ + "us-gaap:IncomeTaxExpenseBenefit" + ], + "direct_source_concepts": [ + "IncomeTaxExpenseBenefit" + ], + "component_surfaces": { + "positive": [], + "negative": [ + "net_income" + ] + }, + "component_concept_groups": { + "positive": [ + { + "name": "pretax_income", + "concepts": [ + "us-gaap:IncomeBeforeTaxExpenseBenefit", + "us-gaap:PretaxIncome", + "us-gaap:IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest", + "us-gaap:IncomeBeforeEquityMethodInvestmentsIncomeTaxesExtraordinaryItemsNoncontrollingInterest", + "us-gaap:IncomeFromContinuingOperationsBeforeIncomeTaxesMinorityInterestAndIncomeLossFromEquityMethodInvestments" + ] + } + ], + "negative": [] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "income_tax_expense_formula_derived" + ] + }, + "net_income": { + "direct_authoritative_concepts": [ + "us-gaap:NetIncomeLoss", + "us-gaap:ProfitLoss" + ], + "direct_source_concepts": [ + "NetIncomeLoss", + "ProfitLoss" + ], + "component_surfaces": { + "positive": [], + "negative": [ + "income_tax_expense" + ] + }, + "component_concept_groups": { + "positive": [ + { + "name": "pretax_income", + "concepts": [ + "us-gaap:IncomeBeforeTaxExpenseBenefit", + "us-gaap:PretaxIncome", + "us-gaap:IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest", + "us-gaap:IncomeBeforeEquityMethodInvestmentsIncomeTaxesExtraordinaryItemsNoncontrollingInterest", + "us-gaap:IncomeFromContinuingOperationsBeforeIncomeTaxesMinorityInterestAndIncomeLossFromEquityMethodInvestments" + ] + } + ], + "negative": [] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "net_income_formula_derived" + ] + } + } +} diff --git a/rust/taxonomy/fiscal/v1/core.surface.json b/rust/taxonomy/fiscal/v1/core.surface.json new file mode 100644 index 0000000..0fabf05 --- /dev/null +++ b/rust/taxonomy/fiscal/v1/core.surface.json @@ -0,0 +1,76 @@ +{ + "version": "fiscal-v1", + "pack": "core", + "surfaces": [ + { + "surface_key": "revenue", + "statement": "income", + "label": "Revenue", + "category": "surface", + "order": 10, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": ["us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax", "us-gaap:SalesRevenueNet"], + "allowed_authoritative_concepts": ["us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "income_default" + }, + { + "surface_key": "operating_expenses", + "statement": "income", + "label": "Operating Expenses", + "category": "surface", + "order": 40, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": ["us-gaap:SellingGeneralAndAdministrativeExpense", "us-gaap:ResearchAndDevelopmentExpense"], + "allowed_authoritative_concepts": ["us-gaap:OperatingExpenses"], + "formula_fallback": "sum(detail_rows)", + "detail_grouping_policy": "group_all_children", + "materiality_policy": "income_default" + }, + { + "surface_key": "net_income", + "statement": "income", + "label": "Net Income", + "category": "surface", + "order": 90, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": ["us-gaap:NetIncomeLoss"], + "allowed_authoritative_concepts": ["us-gaap:NetIncomeLoss"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "income_default" + }, + { + "surface_key": "total_assets", + "statement": "balance", + "label": "Total Assets", + "category": "surface", + "order": 70, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": ["us-gaap:Assets"], + "allowed_authoritative_concepts": ["us-gaap:Assets"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "operating_cash_flow", + "statement": "cash_flow", + "label": "Operating Cash Flow", + "category": "surface", + "order": 10, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": ["us-gaap:NetCashProvidedByUsedInOperatingActivities"], + "allowed_authoritative_concepts": ["us-gaap:NetCashProvidedByUsedInOperatingActivities"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "cash_flow_default" + } + ] +} diff --git a/rust/taxonomy/fiscal/v1/insurance.income-bridge.json b/rust/taxonomy/fiscal/v1/insurance.income-bridge.json new file mode 100644 index 0000000..e32071b --- /dev/null +++ b/rust/taxonomy/fiscal/v1/insurance.income-bridge.json @@ -0,0 +1,222 @@ +{ + "version": "fiscal-v1", + "pack": "insurance", + "rows": { + "revenue": { + "direct_authoritative_concepts": [ + "us-gaap:TotalRevenues", + "us-gaap:Revenues" + ], + "direct_source_concepts": [ + "TotalRevenues", + "Revenues" + ], + "component_surfaces": { + "positive": [ + "premiums", + "investment_income" + ], + "negative": [] + }, + "component_concept_groups": { + "positive": [ + { + "name": "other_operating_revenue", + "concepts": [ + "us-gaap:OtherRevenue", + "us-gaap:FeeRevenueAndOtherIncome", + "us-gaap:NetRealizedInvestmentGainsLosses" + ] + } + ], + "negative": [] + }, + "formula": "sum_then_subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "revenue_formula_derived" + ] + }, + "gross_profit": { + "direct_authoritative_concepts": [], + "direct_source_concepts": [], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "not_meaningful", + "not_meaningful_for_pack": true, + "warning_codes_when_used": [ + "gross_profit_not_meaningful_insurance_pack" + ] + }, + "operating_expenses": { + "direct_authoritative_concepts": [ + "us-gaap:OperatingExpenses" + ], + "direct_source_concepts": [ + "OperatingExpenses" + ], + "component_surfaces": { + "positive": [ + "claims_and_benefits", + "underwriting_expenses" + ], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "pack_bridge_sum", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "operating_expenses_formula_derived" + ] + }, + "selling_general_and_administrative": { + "direct_authoritative_concepts": [], + "direct_source_concepts": [], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "not_meaningful", + "not_meaningful_for_pack": true, + "warning_codes_when_used": [ + "selling_general_and_administrative_not_meaningful_insurance_pack" + ] + }, + "research_and_development": { + "direct_authoritative_concepts": [], + "direct_source_concepts": [], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "not_meaningful", + "not_meaningful_for_pack": true, + "warning_codes_when_used": [ + "research_and_development_not_meaningful_insurance_pack" + ] + }, + "other_operating_expense": { + "direct_authoritative_concepts": [], + "direct_source_concepts": [], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "not_meaningful", + "not_meaningful_for_pack": true, + "warning_codes_when_used": [ + "other_operating_expense_not_meaningful_insurance_pack" + ] + }, + "operating_income": { + "direct_authoritative_concepts": [ + "us-gaap:OperatingIncomeLoss", + "us-gaap:IncomeFromOperations" + ], + "direct_source_concepts": [ + "OperatingIncomeLoss", + "IncomeFromOperations" + ], + "component_surfaces": { + "positive": [ + "revenue" + ], + "negative": [ + "operating_expenses" + ] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "operating_income_formula_derived" + ] + }, + "income_tax_expense": { + "direct_authoritative_concepts": [ + "us-gaap:IncomeTaxExpenseBenefit" + ], + "direct_source_concepts": [ + "IncomeTaxExpenseBenefit" + ], + "component_surfaces": { + "positive": [], + "negative": [ + "net_income" + ] + }, + "component_concept_groups": { + "positive": [ + { + "name": "pretax_income", + "concepts": [ + "us-gaap:IncomeBeforeTaxExpenseBenefit", + "us-gaap:PretaxIncome" + ] + } + ], + "negative": [] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "income_tax_expense_formula_derived" + ] + }, + "net_income": { + "direct_authoritative_concepts": [ + "us-gaap:NetIncomeLoss" + ], + "direct_source_concepts": [ + "NetIncomeLoss" + ], + "component_surfaces": { + "positive": [], + "negative": [ + "income_tax_expense" + ] + }, + "component_concept_groups": { + "positive": [ + { + "name": "pretax_income", + "concepts": [ + "us-gaap:IncomeBeforeTaxExpenseBenefit", + "us-gaap:PretaxIncome" + ] + } + ], + "negative": [] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "net_income_formula_derived" + ] + } + } +} diff --git a/rust/taxonomy/fiscal/v1/insurance.surface.json b/rust/taxonomy/fiscal/v1/insurance.surface.json new file mode 100644 index 0000000..44de828 --- /dev/null +++ b/rust/taxonomy/fiscal/v1/insurance.surface.json @@ -0,0 +1,207 @@ +{ + "version": "fiscal-v1", + "pack": "insurance", + "surfaces": [ + { + "surface_key": "premiums", + "statement": "income", + "label": "Premiums", + "category": "surface", + "order": 10, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:Premiums", + "us-gaap:PremiumsEarned", + "us-gaap:PremiumsWritten", + "us-gaap:PremiumsEarnedNet", + "us-gaap:PremiumsWrittenNet", + "us-gaap:SupplementaryInsuranceInformationPremiumRevenue" + ], + "allowed_authoritative_concepts": [ + "us-gaap:Premiums", + "us-gaap:PremiumsEarned", + "us-gaap:PremiumsEarnedNet", + "us-gaap:PremiumsWrittenNet" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "income_default" + }, + { + "surface_key": "claims_and_benefits", + "statement": "income", + "label": "Claims and Benefits", + "category": "surface", + "order": 30, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:PolicyholderBenefitsAndClaimsIncurredNet", + "us-gaap:BenefitsLossesAndExpenses", + "us-gaap:LossesAndLossAdjustmentExpenses", + "us-gaap:SupplementaryInsuranceInformationBenefitsClaimsLossesAndSettlementExpense" + ], + "allowed_authoritative_concepts": [ + "us-gaap:PolicyholderBenefitsAndClaimsIncurredNet", + "us-gaap:BenefitsLossesAndExpenses", + "us-gaap:SupplementaryInsuranceInformationBenefitsClaimsLossesAndSettlementExpense" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "income_default" + }, + { + "surface_key": "underwriting_expenses", + "statement": "income", + "label": "Underwriting Expenses", + "category": "surface", + "order": 40, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:DeferredPolicyAcquisitionCostsAmortizationExpense", + "us-gaap:DeferredPolicyAcquisitionCostAmortizationExpense", + "us-gaap:PolicyAcquisitionExpense", + "us-gaap:UnderwritingExpenses", + "us-gaap:SupplementaryInsuranceInformationOtherOperatingExpense" + ], + "allowed_authoritative_concepts": [ + "us-gaap:UnderwritingExpenses", + "us-gaap:PolicyAcquisitionExpense", + "us-gaap:DeferredPolicyAcquisitionCostAmortizationExpense" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "income_default" + }, + { + "surface_key": "investment_income", + "statement": "income", + "label": "Investment Income", + "category": "surface", + "order": 50, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:InvestmentIncome", + "us-gaap:NetInvestmentIncome", + "us-gaap:SupplementaryInsuranceInformationNetInvestmentIncome" + ], + "allowed_authoritative_concepts": [ + "us-gaap:InvestmentIncome", + "us-gaap:NetInvestmentIncome", + "us-gaap:SupplementaryInsuranceInformationNetInvestmentIncome" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "income_default" + }, + { + "surface_key": "net_income", + "statement": "income", + "label": "Net Income", + "category": "surface", + "order": 90, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": [ + "us-gaap:NetIncomeLoss" + ], + "allowed_authoritative_concepts": [ + "us-gaap:NetIncomeLoss" + ], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "income_default" + }, + { + "surface_key": "policy_liabilities", + "statement": "balance", + "label": "Policy Liabilities", + "category": "surface", + "order": 80, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:FuturePolicyBenefits", + "us-gaap:LiabilityForFuturePolicyBenefits", + "us-gaap:PolicyholderAccountBalances", + "us-gaap:LiabilityForUnpaidLossesAndLossAdjustmentExpenses", + "us-gaap:LiabilityForUnpaidClaimsAndClaimsAdjustmentExpense", + "us-gaap:UnearnedPremiumsNet" + ], + "allowed_authoritative_concepts": [ + "us-gaap:FuturePolicyBenefits", + "us-gaap:LiabilityForFuturePolicyBenefits", + "us-gaap:LiabilityForUnpaidLossesAndLossAdjustmentExpenses", + "us-gaap:LiabilityForUnpaidClaimsAndClaimsAdjustmentExpense" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "balance_default" + }, + { + "surface_key": "deferred_acquisition_costs", + "statement": "balance", + "label": "Deferred Acquisition Costs", + "category": "surface", + "order": 90, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:DeferredPolicyAcquisitionCosts", + "us-gaap:DeferredAcquisitionCosts" + ], + "allowed_authoritative_concepts": [ + "us-gaap:DeferredPolicyAcquisitionCosts", + "us-gaap:DeferredAcquisitionCosts" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_assets", + "statement": "balance", + "label": "Total Assets", + "category": "surface", + "order": 100, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": ["us-gaap:Assets"], + "allowed_authoritative_concepts": ["us-gaap:Assets"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_liabilities", + "statement": "balance", + "label": "Total Liabilities", + "category": "surface", + "order": 110, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": ["us-gaap:Liabilities"], + "allowed_authoritative_concepts": ["us-gaap:Liabilities"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_equity", + "statement": "balance", + "label": "Total Equity", + "category": "surface", + "order": 120, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": ["us-gaap:StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest", "us-gaap:StockholdersEquity"], + "allowed_authoritative_concepts": ["us-gaap:StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest", "us-gaap:StockholdersEquity"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + } + ] +} diff --git a/rust/taxonomy/fiscal/v1/kpis/bank_lender.kpis.json b/rust/taxonomy/fiscal/v1/kpis/bank_lender.kpis.json new file mode 100644 index 0000000..bc5ab00 --- /dev/null +++ b/rust/taxonomy/fiscal/v1/kpis/bank_lender.kpis.json @@ -0,0 +1,9 @@ +{ + "version": "fiscal-v1", + "pack": "bank_lender", + "kpis": [ + { "key": "net_interest_margin", "label": "Net Interest Margin", "unit": "percent" }, + { "key": "loan_growth", "label": "Loan Growth", "unit": "percent" }, + { "key": "deposit_growth", "label": "Deposit Growth", "unit": "percent" } + ] +} diff --git a/rust/taxonomy/fiscal/v1/kpis/broker_asset_manager.kpis.json b/rust/taxonomy/fiscal/v1/kpis/broker_asset_manager.kpis.json new file mode 100644 index 0000000..9eed62f --- /dev/null +++ b/rust/taxonomy/fiscal/v1/kpis/broker_asset_manager.kpis.json @@ -0,0 +1,8 @@ +{ + "version": "fiscal-v1", + "pack": "broker_asset_manager", + "kpis": [ + { "key": "aum", "label": "Assets Under Management", "unit": "currency" }, + { "key": "fee_paying_aum", "label": "Fee-Paying AUM", "unit": "currency" } + ] +} diff --git a/rust/taxonomy/fiscal/v1/kpis/core.kpis.json b/rust/taxonomy/fiscal/v1/kpis/core.kpis.json new file mode 100644 index 0000000..84ef220 --- /dev/null +++ b/rust/taxonomy/fiscal/v1/kpis/core.kpis.json @@ -0,0 +1,5 @@ +{ + "version": "fiscal-v1", + "pack": "core", + "kpis": [] +} diff --git a/rust/taxonomy/fiscal/v1/kpis/insurance.kpis.json b/rust/taxonomy/fiscal/v1/kpis/insurance.kpis.json new file mode 100644 index 0000000..91bde58 --- /dev/null +++ b/rust/taxonomy/fiscal/v1/kpis/insurance.kpis.json @@ -0,0 +1,8 @@ +{ + "version": "fiscal-v1", + "pack": "insurance", + "kpis": [ + { "key": "combined_ratio", "label": "Combined Ratio", "unit": "percent" }, + { "key": "premium_growth", "label": "Premium Growth", "unit": "percent" } + ] +} diff --git a/rust/taxonomy/fiscal/v1/kpis/reit_real_estate.kpis.json b/rust/taxonomy/fiscal/v1/kpis/reit_real_estate.kpis.json new file mode 100644 index 0000000..b9796cc --- /dev/null +++ b/rust/taxonomy/fiscal/v1/kpis/reit_real_estate.kpis.json @@ -0,0 +1,8 @@ +{ + "version": "fiscal-v1", + "pack": "reit_real_estate", + "kpis": [ + { "key": "property_count", "label": "Property Count", "unit": "count" }, + { "key": "investment_property_growth", "label": "Investment Property Growth", "unit": "percent" } + ] +} diff --git a/rust/taxonomy/fiscal/v1/reit_real_estate.income-bridge.json b/rust/taxonomy/fiscal/v1/reit_real_estate.income-bridge.json new file mode 100644 index 0000000..1823dff --- /dev/null +++ b/rust/taxonomy/fiscal/v1/reit_real_estate.income-bridge.json @@ -0,0 +1,272 @@ +{ + "version": "fiscal-v1", + "pack": "reit_real_estate", + "rows": { + "revenue": { + "direct_authoritative_concepts": [ + "us-gaap:TotalRevenues", + "us-gaap:Revenue" + ], + "direct_source_concepts": [ + "TotalRevenues", + "Revenue" + ], + "component_surfaces": { + "positive": [ + "rental_revenue" + ], + "negative": [] + }, + "component_concept_groups": { + "positive": [ + { + "name": "other_property_revenue", + "concepts": [ + "us-gaap:RentalRevenue", + "us-gaap:TenantReimbursements", + "us-gaap:LeaseTerminationFees" + ] + } + ], + "negative": [] + }, + "formula": "pack_bridge_sum", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "revenue_formula_derived" + ] + }, + "gross_profit": { + "direct_authoritative_concepts": [ + "us-gaap:GrossProfit" + ], + "direct_source_concepts": [ + "GrossProfit" + ], + "component_surfaces": { + "positive": [ + "revenue" + ], + "negative": [ + "property_operating_expense" + ] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "gross_profit_formula_derived" + ] + }, + "operating_expenses": { + "direct_authoritative_concepts": [ + "us-gaap:OperatingExpenses" + ], + "direct_source_concepts": [ + "OperatingExpenses" + ], + "component_surfaces": { + "positive": [ + "depreciation_and_amortization" + ], + "negative": [] + }, + "component_concept_groups": { + "positive": [ + { + "name": "reit_operating_expense_components", + "concepts": [ + "us-gaap:GeneralAndAdministrativeExpense", + "us-gaap:ImpairmentOfRealEstate", + "us-gaap:AssetImpairmentCharges", + "us-gaap:OtherThanInterestExpense" + ] + } + ], + "negative": [] + }, + "formula": "sum", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "operating_expenses_formula_derived" + ] + }, + "selling_general_and_administrative": { + "direct_authoritative_concepts": [ + "us-gaap:SellingGeneralAndAdministrativeExpense" + ], + "direct_source_concepts": [ + "SellingGeneralAndAdministrativeExpense" + ], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [ + { + "name": "sales_and_marketing", + "concepts": [ + "us-gaap:SalesAndMarketingExpense", + "us-gaap:SellingAndMarketingExpense", + "us-gaap:MarketingExpense" + ] + }, + { + "name": "general_and_administrative", + "concepts": [ + "us-gaap:GeneralAndAdministrativeExpense" + ] + } + ], + "negative": [] + }, + "formula": "sum", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "selling_general_and_administrative_formula_derived" + ] + }, + "research_and_development": { + "direct_authoritative_concepts": [], + "direct_source_concepts": [], + "component_surfaces": { + "positive": [], + "negative": [] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "not_meaningful", + "not_meaningful_for_pack": true, + "warning_codes_when_used": [ + "research_and_development_not_meaningful_reit_pack" + ] + }, + "other_operating_expense": { + "direct_authoritative_concepts": [ + "us-gaap:OtherThanInterestExpense", + "us-gaap:OtherOperatingExpenses", + "us-gaap:OtherCostAndExpenseOperating", + "us-gaap:OtherOperatingExpense" + ], + "direct_source_concepts": [ + "OtherThanInterestExpense", + "OtherOperatingExpenses", + "OtherCostAndExpenseOperating", + "OtherOperatingExpense" + ], + "component_surfaces": { + "positive": [ + "operating_expenses" + ], + "negative": [ + "selling_general_and_administrative", + "research_and_development" + ] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "sum_then_subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "other_operating_expense_formula_derived" + ] + }, + "operating_income": { + "direct_authoritative_concepts": [ + "us-gaap:OperatingIncomeLoss", + "us-gaap:IncomeFromOperations" + ], + "direct_source_concepts": [ + "OperatingIncomeLoss", + "IncomeFromOperations" + ], + "component_surfaces": { + "positive": [ + "gross_profit" + ], + "negative": [ + "operating_expenses" + ] + }, + "component_concept_groups": { + "positive": [], + "negative": [] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "operating_income_formula_derived" + ] + }, + "income_tax_expense": { + "direct_authoritative_concepts": [ + "us-gaap:IncomeTaxExpenseBenefit" + ], + "direct_source_concepts": [ + "IncomeTaxExpenseBenefit" + ], + "component_surfaces": { + "positive": [], + "negative": [ + "net_income" + ] + }, + "component_concept_groups": { + "positive": [ + { + "name": "pretax_income", + "concepts": [ + "us-gaap:IncomeBeforeTaxExpenseBenefit", + "us-gaap:PretaxIncome" + ] + } + ], + "negative": [] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "income_tax_expense_formula_derived" + ] + }, + "net_income": { + "direct_authoritative_concepts": [ + "us-gaap:NetIncomeLoss" + ], + "direct_source_concepts": [ + "NetIncomeLoss" + ], + "component_surfaces": { + "positive": [], + "negative": [ + "income_tax_expense" + ] + }, + "component_concept_groups": { + "positive": [ + { + "name": "pretax_income", + "concepts": [ + "us-gaap:IncomeBeforeTaxExpenseBenefit", + "us-gaap:PretaxIncome" + ] + } + ], + "negative": [] + }, + "formula": "subtract", + "not_meaningful_for_pack": false, + "warning_codes_when_used": [ + "net_income_formula_derived" + ] + } + } +} diff --git a/rust/taxonomy/fiscal/v1/reit_real_estate.surface.json b/rust/taxonomy/fiscal/v1/reit_real_estate.surface.json new file mode 100644 index 0000000..1891ad7 --- /dev/null +++ b/rust/taxonomy/fiscal/v1/reit_real_estate.surface.json @@ -0,0 +1,141 @@ +{ + "version": "fiscal-v1", + "pack": "reit_real_estate", + "surfaces": [ + { + "surface_key": "rental_revenue", + "statement": "income", + "label": "Rental Revenue", + "category": "surface", + "order": 10, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:LeaseIncome", + "us-gaap:RentalRevenue", + "us-gaap:RentalRevenueNet" + ], + "allowed_authoritative_concepts": [ + "us-gaap:LeaseIncome", + "us-gaap:RentalRevenue" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "income_default" + }, + { + "surface_key": "property_operating_expense", + "statement": "income", + "label": "Property Operating Expense", + "category": "surface", + "order": 20, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:DirectCostsOfLeasedAndRentedPropertyOrEquipment" + ], + "allowed_authoritative_concepts": [ + "us-gaap:DirectCostsOfLeasedAndRentedPropertyOrEquipment" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "income_default" + }, + { + "surface_key": "depreciation_and_amortization", + "statement": "income", + "label": "Depreciation and Amortization", + "category": "surface", + "order": 30, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:DepreciationDepletionAndAmortization", + "us-gaap:AmortizationOfAboveAndBelowMarketLeases" + ], + "allowed_authoritative_concepts": [ + "us-gaap:DepreciationDepletionAndAmortization" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "income_default" + }, + { + "surface_key": "net_income", + "statement": "income", + "label": "Net Income", + "category": "surface", + "order": 90, + "unit": "currency", + "rollup_policy": "direct_or_formula", + "allowed_source_concepts": ["us-gaap:NetIncomeLoss"], + "allowed_authoritative_concepts": ["us-gaap:NetIncomeLoss"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "income_default" + }, + { + "surface_key": "investment_property", + "statement": "balance", + "label": "Investment Property", + "category": "surface", + "order": 40, + "unit": "currency", + "rollup_policy": "aggregate_children", + "allowed_source_concepts": [ + "us-gaap:RealEstateInvestmentPropertyNet", + "us-gaap:RealEstateInvestmentPropertyAtCost", + "us-gaap:RealEstateGrossAtCarryingValue" + ], + "allowed_authoritative_concepts": [ + "us-gaap:RealEstateInvestmentPropertyNet", + "us-gaap:RealEstateInvestmentPropertyAtCost" + ], + "formula_fallback": null, + "detail_grouping_policy": "group_all_children", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_assets", + "statement": "balance", + "label": "Total Assets", + "category": "surface", + "order": 90, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": ["us-gaap:Assets"], + "allowed_authoritative_concepts": ["us-gaap:Assets"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_liabilities", + "statement": "balance", + "label": "Total Liabilities", + "category": "surface", + "order": 100, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": ["us-gaap:Liabilities"], + "allowed_authoritative_concepts": ["us-gaap:Liabilities"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + }, + { + "surface_key": "total_equity", + "statement": "balance", + "label": "Total Equity", + "category": "surface", + "order": 110, + "unit": "currency", + "rollup_policy": "direct_only", + "allowed_source_concepts": ["us-gaap:StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest", "us-gaap:StockholdersEquity"], + "allowed_authoritative_concepts": ["us-gaap:StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest", "us-gaap:StockholdersEquity"], + "formula_fallback": null, + "detail_grouping_policy": "top_level_only", + "materiality_policy": "balance_default" + } + ] +} diff --git a/rust/taxonomy/fiscal/v1/universal_income.surface.json b/rust/taxonomy/fiscal/v1/universal_income.surface.json new file mode 100644 index 0000000..827c5e8 --- /dev/null +++ b/rust/taxonomy/fiscal/v1/universal_income.surface.json @@ -0,0 +1,77 @@ +{ + "version": "fiscal-v1", + "rows": [ + { + "key": "revenue", + "statement": "income", + "label": "Revenue", + "category": "surface", + "order": 10, + "unit": "currency" + }, + { + "key": "gross_profit", + "statement": "income", + "label": "Gross Profit", + "category": "surface", + "order": 20, + "unit": "currency" + }, + { + "key": "operating_expenses", + "statement": "income", + "label": "Operating Expenses", + "category": "surface", + "order": 30, + "unit": "currency" + }, + { + "key": "selling_general_and_administrative", + "statement": "income", + "label": "SG&A", + "category": "surface", + "order": 31, + "unit": "currency" + }, + { + "key": "research_and_development", + "statement": "income", + "label": "Research Expense", + "category": "surface", + "order": 32, + "unit": "currency" + }, + { + "key": "other_operating_expense", + "statement": "income", + "label": "Other Expense", + "category": "surface", + "order": 33, + "unit": "currency" + }, + { + "key": "operating_income", + "statement": "income", + "label": "Operating Income", + "category": "surface", + "order": 40, + "unit": "currency" + }, + { + "key": "income_tax_expense", + "statement": "income", + "label": "Income Tax Expense", + "category": "surface", + "order": 50, + "unit": "currency" + }, + { + "key": "net_income", + "statement": "income", + "label": "Net Income", + "category": "surface", + "order": 60, + "unit": "currency" + } + ] +} diff --git a/rust/taxonomy/issuer_overrides/default.json b/rust/taxonomy/issuer_overrides/default.json new file mode 100644 index 0000000..555a3a3 --- /dev/null +++ b/rust/taxonomy/issuer_overrides/default.json @@ -0,0 +1,4 @@ +{ + "version": "fiscal-v1", + "overrides": [] +} diff --git a/rust/vendor/crabrl/.gitattributes b/rust/vendor/crabrl/.gitattributes new file mode 100644 index 0000000..99d8419 --- /dev/null +++ b/rust/vendor/crabrl/.gitattributes @@ -0,0 +1,55 @@ +# Auto detect text files and perform LF normalization +* text=auto + +# Rust files +*.rs text eol=lf +*.toml text eol=lf +Cargo.lock text eol=lf + +# Python files +*.py text eol=lf +*.pyx text eol=lf +*.pxd text eol=lf + +# Documentation +*.md text eol=lf +*.txt text eol=lf +LICENSE text eol=lf + +# Config files +*.json text eol=lf +*.yaml text eol=lf +*.yml text eol=lf +*.xml text eol=lf +*.xsd text eol=lf +*.xbrl text eol=lf + +# Shell scripts +*.sh text eol=lf +*.bash text eol=lf + +# Git files +.gitignore text eol=lf +.gitattributes text eol=lf + +# Binary files +*.png binary +*.jpg binary +*.jpeg binary +*.gif binary +*.ico binary +*.pdf binary +*.zip binary +*.gz binary +*.tar binary +*.7z binary +*.exe binary +*.dll binary +*.so binary +*.dylib binary + +# Linguist overrides - ensure Rust is recognized as primary language +*.rs linguist-language=Rust +benchmarks/*.py linguist-documentation +scripts/*.py linguist-documentation +examples/* linguist-documentation \ No newline at end of file diff --git a/rust/vendor/crabrl/.github/workflows/ci.yml b/rust/vendor/crabrl/.github/workflows/ci.yml new file mode 100644 index 0000000..518fca9 --- /dev/null +++ b/rust/vendor/crabrl/.github/workflows/ci.yml @@ -0,0 +1,106 @@ +name: CI + +on: + push: + branches: [ main, master ] + pull_request: + branches: [ main, master ] + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: 1 + +jobs: + test: + name: Test - ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + rust: [stable, beta] + exclude: + - os: windows-latest + rust: beta + - os: macos-latest + rust: beta + + steps: + - uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.rust }} + components: rustfmt, clippy + + - name: Cache cargo registry + uses: actions/cache@v4 + with: + path: ~/.cargo/registry + key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} + + - name: Cache cargo index + uses: actions/cache@v4 + with: + path: ~/.cargo/git + key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }} + + - name: Cache cargo build + uses: actions/cache@v4 + with: + path: target + key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }} + + - name: Check formatting + run: cargo fmt -- --check + + - name: Run clippy + run: cargo clippy --all-features -- -D warnings + + - name: Build + run: cargo build --verbose --all-features + + - name: Run tests + run: cargo test --verbose --all-features + + - name: Build release + run: cargo build --release --all-features + + - name: Run benchmarks (smoke test) + run: cargo bench --no-run + + coverage: + name: Code Coverage + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + with: + components: llvm-tools-preview + + - name: Install cargo-llvm-cov + uses: taiki-e/install-action@cargo-llvm-cov + + - name: Generate code coverage + run: cargo llvm-cov --all-features --workspace --lcov --output-path lcov.info + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + files: lcov.info + fail_ci_if_error: false + + security-audit: + name: Security Audit + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Run cargo audit + uses: actions-rs/audit-check@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/rust/vendor/crabrl/.github/workflows/release.yml b/rust/vendor/crabrl/.github/workflows/release.yml new file mode 100644 index 0000000..ff6d5ef --- /dev/null +++ b/rust/vendor/crabrl/.github/workflows/release.yml @@ -0,0 +1,195 @@ +name: Release + +on: + push: + tags: + - 'v*' + workflow_dispatch: + inputs: + version: + description: 'Version to publish (e.g., 0.1.0)' + required: true + type: string + +env: + CARGO_TERM_COLOR: always + +jobs: + test: + name: Final Tests + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install Rust stable + uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt, clippy + + - name: Run tests + run: cargo test --all-features --release + + # Temporarily skip formatting check to get initial release out + # - name: Check formatting + # run: cargo fmt -- --check + + # - name: Run clippy + # run: cargo clippy --all-features -- -D warnings + + publish-crates-io: + name: Publish to crates.io + needs: test + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install Rust stable + uses: dtolnay/rust-toolchain@stable + + - name: Verify version + run: | + # Extract version from Cargo.toml + CARGO_VERSION=$(grep -E "^version" Cargo.toml | head -1 | cut -d'"' -f2) + echo "Cargo.toml version: $CARGO_VERSION" + + # For manual workflow dispatch + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + INPUT_VERSION="${{ github.event.inputs.version }}" + if [ "$CARGO_VERSION" != "$INPUT_VERSION" ]; then + echo "Error: Cargo.toml version ($CARGO_VERSION) doesn't match input version ($INPUT_VERSION)" + exit 1 + fi + fi + + # For tag push + if [ "${{ github.event_name }}" = "push" ]; then + TAG_VERSION="${GITHUB_REF#refs/tags/v}" + if [ "$CARGO_VERSION" != "$TAG_VERSION" ]; then + echo "Error: Cargo.toml version ($CARGO_VERSION) doesn't match tag version ($TAG_VERSION)" + exit 1 + fi + fi + + - name: Check if version exists on crates.io + run: | + CRATE_NAME=$(grep -E "^name" Cargo.toml | head -1 | cut -d'"' -f2) + VERSION=$(grep -E "^version" Cargo.toml | head -1 | cut -d'"' -f2) + + if cargo search "$CRATE_NAME" | grep -q "^$CRATE_NAME = \"$VERSION\""; then + echo "Version $VERSION already exists on crates.io" + exit 1 + fi + + - name: Build release + run: cargo build --release --all-features + + - name: Package for crates.io + run: cargo package --all-features + + - name: Publish to crates.io + run: cargo publish --all-features --token ${{ secrets.CARGO_REGISTRY_TOKEN }} + env: + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} + + create-github-release: + name: Create GitHub Release + needs: publish-crates-io + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install Rust stable + uses: dtolnay/rust-toolchain@stable + + - name: Build release binaries + run: | + cargo build --release --all-features + mkdir -p release + cp target/release/crabrl release/crabrl-linux-x64 + chmod +x release/crabrl-linux-x64 + + - name: Create Release + uses: softprops/action-gh-release@v2 + with: + files: release/* + generate_release_notes: true + body: | + ## Installation + + ### From crates.io + ```bash + cargo install crabrl + ``` + + ### Download Binary + Download the pre-built binary for your platform from the assets below. + + ## What's Changed + See the full changelog below. + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + build-cross-platform: + name: Build ${{ matrix.target }} + needs: test + runs-on: ${{ matrix.os }} + strategy: + matrix: + include: + - os: ubuntu-latest + target: x86_64-unknown-linux-gnu + artifact: crabrl-linux-x64 + - os: ubuntu-latest + target: aarch64-unknown-linux-gnu + artifact: crabrl-linux-arm64 + use-cross: true + - os: windows-latest + target: x86_64-pc-windows-msvc + artifact: crabrl-windows-x64.exe + - os: macos-latest + target: x86_64-apple-darwin + artifact: crabrl-macos-x64 + - os: macos-latest + target: aarch64-apple-darwin + artifact: crabrl-macos-arm64 + + steps: + - uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + with: + targets: ${{ matrix.target }} + + - name: Install cross + if: matrix.use-cross + run: cargo install cross + + - name: Build + run: | + if [ "${{ matrix.use-cross }}" = "true" ]; then + cross build --release --target ${{ matrix.target }} --all-features + else + cargo build --release --target ${{ matrix.target }} --all-features + fi + shell: bash + + - name: Package + run: | + mkdir -p release + if [ "${{ matrix.os }}" = "windows-latest" ]; then + cp target/${{ matrix.target }}/release/crabrl.exe release/${{ matrix.artifact }} + else + cp target/${{ matrix.target }}/release/crabrl release/${{ matrix.artifact }} + chmod +x release/${{ matrix.artifact }} + fi + shell: bash + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.artifact }} + path: release/${{ matrix.artifact }} \ No newline at end of file diff --git a/rust/vendor/crabrl/.gitignore b/rust/vendor/crabrl/.gitignore new file mode 100644 index 0000000..a1ceb39 --- /dev/null +++ b/rust/vendor/crabrl/.gitignore @@ -0,0 +1,125 @@ +# Rust +/target/ +**/*.rs.bk +*.pdb +Cargo.lock + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +venv/ +ENV/ +env/ +.venv +.env + +# Virtual environments +benchmarks/venv/ +**/venv/ +**/virtualenv/ +**/.venv/ + +# Test data and fixtures +test_data/ +benchmarks/fixtures/ +fixtures/ + +# Benchmark outputs +*.png +*.json +benchmark_results/ +benchmarks/*.png +benchmarks/*.json +benchmarks/*_results.json + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ +.DS_Store + +# Build artifacts +*.o +*.a +*.so +*.dll +*.exe +*.out + +# Documentation +/target/doc/ +/target/debug/ +/target/release/ + +# Logs +*.log +logs/ + +# Coverage +*.profraw +*.profdata +/target/coverage/ +tarpaulin-report.html +cobertura.xml + +# OS files +.DS_Store +Thumbs.db +desktop.ini + +# Temporary files +*.tmp +*.temp +*.bak +.cache/ +tmp/ + +# Large test files +*.xbrl +*.xml +!examples/*.xml +!tests/fixtures/*.xml + +# Downloaded SEC filings +benchmarks/fixtures/ +scripts/fixtures/ + +# Benchmark comparison artifacts +benchmarks/benchmark_results.png +benchmarks/synthetic_benchmark_chart.png +benchmarks/real_benchmark_chart.png +benchmarks/sec_comparison_results.json +benchmarks/synthetic_benchmark_results.json +benchmarks/real_benchmark_results.json +benchmarks/real_filing_results.json + +# Python artifacts from benchmarking +*.pyc +.pytest_cache/ +.coverage +htmlcov/ +.tox/ +.hypothesis/ + +# Backup files +*.backup +*.old +*.orig + +# Archives +*.zip +*.tar.gz +*.tar.bz2 +*.7z +*.rar + +# Keep important config examples +!.gitignore +!.github/ +!examples/.gitkeep +!tests/fixtures/.gitkeep \ No newline at end of file diff --git a/rust/vendor/crabrl/.rustfmt.toml b/rust/vendor/crabrl/.rustfmt.toml new file mode 100644 index 0000000..c0778a0 --- /dev/null +++ b/rust/vendor/crabrl/.rustfmt.toml @@ -0,0 +1,2 @@ +# Rust formatting configuration +edition = "2021" \ No newline at end of file diff --git a/rust/vendor/crabrl/CITATION.cff b/rust/vendor/crabrl/CITATION.cff new file mode 100644 index 0000000..c781ad3 --- /dev/null +++ b/rust/vendor/crabrl/CITATION.cff @@ -0,0 +1,20 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: + - family-names: "Amorelli" + given-names: "Stefano" + email: "stefano@amorelli.tech" + orcid: "https://orcid.org/0009-0004-4917-0999" +title: "crabrl: High-performance XBRL parser for SEC EDGAR filings" +version: 0.1.0 +date-released: 2025-01-16 +url: "https://github.com/stefanoamorelli/crabrl" +repository-code: "https://github.com/stefanoamorelli/crabrl" +license: AGPL-3.0 +keywords: + - xbrl + - parser + - sec-edgar + - finance + - rust +abstract: "A high-performance XBRL parser and validator written in Rust, optimized for SEC EDGAR filings. Achieves 50-150x performance gains over traditional parsers through zero-copy parsing, memory-mapped I/O, and Rust's ownership model." \ No newline at end of file diff --git a/rust/vendor/crabrl/Cargo.toml b/rust/vendor/crabrl/Cargo.toml new file mode 100644 index 0000000..a6dd89e --- /dev/null +++ b/rust/vendor/crabrl/Cargo.toml @@ -0,0 +1,63 @@ +[package] +name = "crabrl" +version = "0.1.0" +edition = "2021" +authors = ["Stefano Amorelli "] +description = "High-performance XBRL parser and validator" +license = "AGPL-3.0" +repository = "https://github.com/stefanoamorelli/crabrl" +keywords = ["xbrl", "parser", "finance", "sec", "edgar"] +categories = ["parser-implementations", "finance", "command-line-utilities"] + +[dependencies] +# Core +quick-xml = "0.36" +chrono = "0.4" + +# Performance +ahash = "0.8" +parking_lot = "0.12" +rayon = { version = "1.10", optional = true } +memmap2 = { version = "0.9", optional = true } +mimalloc = { version = "0.1", default-features = false } +bitflags = "2.6" + +# CLI +clap = { version = "4.5", features = ["derive"], optional = true } +colored = { version = "2.1", optional = true } + +# Serialization +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" + +# Error handling +thiserror = "2.0" +anyhow = "1.0" + +[dev-dependencies] +criterion = "0.5" +pretty_assertions = "1.4" +tempfile = "3.15" + +[[bin]] +name = "crabrl" +required-features = ["cli"] + +[[bench]] +name = "parser" +harness = false + +[features] +default = ["cli", "parallel"] +cli = ["clap", "colored"] +parallel = ["rayon"] +mmap = ["memmap2"] + +[profile.release] +lto = "fat" +codegen-units = 1 +opt-level = 3 +strip = true + +[profile.bench] +inherits = "release" \ No newline at end of file diff --git a/rust/vendor/crabrl/LICENSE b/rust/vendor/crabrl/LICENSE new file mode 100644 index 0000000..e797e25 --- /dev/null +++ b/rust/vendor/crabrl/LICENSE @@ -0,0 +1,661 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + crabrl - fast XBRL parsers and validator in Rust + Copyright (C) 2025 Stefano Amorelli + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/rust/vendor/crabrl/README.md b/rust/vendor/crabrl/README.md new file mode 100644 index 0000000..6f2281a --- /dev/null +++ b/rust/vendor/crabrl/README.md @@ -0,0 +1,228 @@ +# crabrl 🦀 + +[![Crates.io](https://img.shields.io/crates/v/crabrl.svg)](https://crates.io/crates/crabrl) +[![CI Status](https://github.com/stefanoamorelli/crabrl/workflows/CI/badge.svg)](https://github.com/stefanoamorelli/crabrl/actions) +[![License: AGPL v3](https://img.shields.io/badge/License-AGPL%20v3-blue.svg)](https://www.gnu.org/licenses/agpl-3.0) +[![Rust Version](https://img.shields.io/badge/rust-1.75%2B-orange.svg)](https://www.rust-lang.org) +[![Downloads](https://img.shields.io/crates/d/crabrl.svg)](https://crates.io/crates/crabrl) +[![docs.rs](https://docs.rs/crabrl/badge.svg)](https://docs.rs/crabrl) + +![crabrl Performance](benchmarks/header.png) + +Lightning-fast XBRL parser that's **50-150x faster** than traditional parsers, built for speed and accuracy when processing [SEC EDGAR](https://www.sec.gov/edgar) filings. + +## Performance + +![Performance Benchmarks](benchmarks/performance_charts.png) + +### Speed Comparison + +![Speed Comparison](benchmarks/speed_comparison_clean.png) + +**Key Performance Metrics:** +- **50-150x faster** than traditional XBRL parsers +- **140,000+ facts/second** throughput +- **< 50MB memory** for 100K facts +- **Linear scaling** with file size + +## Technical Architecture + +crabrl is built on Rust's zero-cost abstractions and modern parsing techniques. While established parsers like [Arelle](https://arelle.org/) provide comprehensive XBRL specification support and extensive validation capabilities, crabrl focuses on high-performance parsing for scenarios where speed is critical. + +### Implementation Details + +| Optimization | Impact | Technology | +|-------------|---------|------------| +| **Zero-copy parsing** | -90% memory allocs | [`quick-xml`](https://github.com/tafia/quick-xml) with string slicing | +| **No garbage collection** | Predictable latency | Rust's ownership model | +| **Faster hashmaps** | 2x lookup speed | [`ahash`](https://github.com/tkaitchuck/aHash) instead of default hasher | +| **Compact strings** | -50% memory for small strings | [`compact_str`](https://github.com/ParkMyCar/compact_str) | +| **Parallelization** | 4-8x on multicore | [`rayon`](https://github.com/rayon-rs/rayon) work-stealing | +| **Memory mapping** | Zero-copy file I/O | [`memmap2`](https://github.com/RazrFalcon/memmap2-rs) | +| **Better allocator** | -25% allocation time | [`mimalloc`](https://github.com/microsoft/mimalloc) | + +**Benchmark results:** 100,000 XBRL facts parsed in 56ms (crabrl) vs 2,672ms (Arelle) on identical hardware. + +## XBRL Support Status + +| Feature | Description | Status | +|---------|-------------|---------| +| **XBRL 2.1 Instance** | Parse facts, contexts, units from `.xml` files | ✅ Stable | +| **SEC Validation** | EDGAR-specific rules and checks | ✅ Stable | +| **Calculation Linkbase** | Validate arithmetic relationships | ✅ Stable | +| **Presentation Linkbase** | Extract display hierarchy | 🚧 Beta | +| **Label Linkbase** | Human-readable concept names | 🚧 Beta | +| **Definition Linkbase** | Dimensional relationships | 📋 Planned | +| **Formula Linkbase** | Business rules validation | 📋 Planned | +| **Inline XBRL (iXBRL)** | HTML-embedded XBRL | 📋 Planned | + +## Installation + +### From crates.io +```bash +cargo install crabrl +``` + +### From Source +```bash +git clone https://github.com/stefanoamorelli/crabrl +cd crabrl +cargo build --release --features cli +``` + +### As Library Dependency +```toml +[dependencies] +crabrl = "0.1.0" +``` + +## Usage + +### CLI + +```bash +# Parse and display summary +crabrl parse filing.xml + +# Parse with statistics (timing and throughput) +crabrl parse filing.xml --stats + +# Validate with generic rules +crabrl validate filing.xml + +# Validate with SEC EDGAR rules +crabrl validate filing.xml --profile sec-edgar + +# Validate with strict mode (warnings as errors) +crabrl validate filing.xml --strict + +# Benchmark performance +crabrl bench filing.xml --iterations 100 +``` + +### Library + +#### Basic Usage + +```rust +use crabrl::Parser; + +// Parse XBRL document +let parser = Parser::new(); +let doc = parser.parse_file("filing.xml")?; + +// Access parsed data +println!("Facts: {}", doc.facts.len()); +println!("Contexts: {}", doc.contexts.len()); +println!("Units: {}", doc.units.len()); +``` + +#### Parse from Different Sources + +```rust +// From file path +let doc = parser.parse_file("filing.xml")?; + +// From bytes +let xml_bytes = std::fs::read("filing.xml")?; +let doc = parser.parse_bytes(&xml_bytes)?; +``` + +#### Validation + +```rust +use crabrl::{Parser, Validator}; + +let parser = Parser::new(); +let doc = parser.parse_file("filing.xml")?; + +// Generic validation +let validator = Validator::new(); +let result = validator.validate(&doc)?; + +if result.is_valid { + println!("Document is valid!"); +} else { + for error in &result.errors { + eprintln!("Error: {}", error); + } +} + +// SEC EDGAR validation (stricter rules) +let sec_validator = Validator::sec_edgar(); +let sec_result = sec_validator.validate(&doc)?; +``` + +## Performance Measurements + +Performance comparison with [Arelle](https://arelle.org/) v2.17.4 (Python-based XBRL processor with full specification support): + +### Synthetic Dataset Benchmarks + +| File Size | Facts | crabrl | Arelle | Ratio | +|-----------|------:|-------:|-------:|------:| +| Tiny | 10 | 1.1 ms | 164 ms | 150x | +| Small | 100 | 1.4 ms | 168 ms | 119x | +| Medium | 1K | 1.7 ms | 184 ms | 108x | +| Large | 10K | 6.1 ms | 351 ms | 58x | +| Huge | 100K | 57 ms | 2,672 ms | 47x | + +### SEC Filing Parse Times + +| Company | Filing Type | File Size | Facts | Parse Time | Throughput | +|---------|-------------|-----------|-------|------------|------------| +| Apple | [10-K 2023](https://www.sec.gov/Archives/edgar/data/320193/000032019323000106/aapl-20230930_htm.xml) | 1.4 MB | 1,075 | 2.1 ms | 516K facts/sec | +| Microsoft | [10-Q 2023](https://www.sec.gov/Archives/edgar/data/789019/000095017023064280/msft-20230930_htm.xml) | 2.8 MB | 2,341 | 4.3 ms | 544K facts/sec | +| Tesla | [10-K 2023](https://www.sec.gov/Archives/edgar/data/1318605/000162828024002390/tsla-20231231_htm.xml) | 3.1 MB | 3,122 | 5.8 ms | 538K facts/sec | + +### Run Your Own Benchmarks + +```bash +# Quick benchmark with Criterion +cargo bench + +# Compare against Arelle +cd benchmarks && python compare_performance.py + +# Test on real SEC filings +python scripts/download_fixtures.py # Download Apple, MSFT, Tesla, etc. +cargo run --release --bin crabrl -- bench fixtures/apple/aapl-20230930_htm.xml +``` + +## Resources & Links + +### XBRL Standards +- [XBRL International](https://www.xbrl.org/) - Official XBRL specifications +- [XBRL 2.1 Specification](https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html) - Core standard we implement +- [SEC EDGAR](https://www.sec.gov/edgar/searchedgar/companysearch) - Search real company filings +- [EDGAR Filer Manual](https://www.sec.gov/info/edgar/forms/edgform.pdf) - SEC filing requirements + +### Dependencies We Use + +| Crate | Purpose | Why We Chose It | +|-------|---------|-----------------| +| [`quick-xml`](https://github.com/tafia/quick-xml) | XML parsing | Zero-copy, fastest XML parser in Rust | +| [`ahash`](https://github.com/tkaitchuck/aHash) | HashMap hashing | 2x faster than default hasher | +| [`compact_str`](https://github.com/ParkMyCar/compact_str) | String storage | Small string optimization | +| [`rayon`](https://github.com/rayon-rs/rayon) | Parallelization | Work-stealing for automatic load balancing | +| [`mimalloc`](https://github.com/microsoft/mimalloc) | Memory allocator | Microsoft's high-performance allocator | +| [`criterion`](https://github.com/bheisler/criterion.rs) | Benchmarking | Statistical benchmarking with graphs | + +### Alternative XBRL Parsers +- [Arelle](https://arelle.org/) - Complete XBRL processor with validation, formulas, and rendering (Python) +- [python-xbrl](https://github.com/manusimidt/py-xbrl) - Lightweight Python parser +- [xbrl-parser](https://www.npmjs.com/package/xbrl-parser) - JavaScript/Node.js +- [XBRL4j](https://github.com/br-data/xbrl-parser) - Java implementation + +## License ⚖️ + +This open-source project is licensed under the GNU Affero General Public License v3.0 (AGPL-3.0). This means: + +- You can use, modify, and distribute this software +- If you modify and distribute it, you must release your changes under AGPL-3.0 +- If you run a modified version on a server, you must provide the source code to users +- See the [LICENSE](LICENSE) file for full details + +For commercial licensing options or other licensing inquiries, please contact stefano@amorelli.tech. + +© 2025 Stefano Amorelli – Released under the GNU Affero General Public License v3.0. Enjoy! 🎉 \ No newline at end of file diff --git a/rust/vendor/crabrl/benches/parser.rs b/rust/vendor/crabrl/benches/parser.rs new file mode 100644 index 0000000..9ed11b1 --- /dev/null +++ b/rust/vendor/crabrl/benches/parser.rs @@ -0,0 +1,37 @@ +use crabrl::Parser; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use std::path::Path; + +fn parse_sample_sec_file(c: &mut Criterion) { + let parser = Parser::new(); + let sample_file = Path::new("fixtures/sample-sec.xml"); + + if sample_file.exists() { + c.bench_function("parse_sample_sec", |b| { + b.iter(|| parser.parse_file(black_box(&sample_file))); + }); + } else { + // If no fixtures exist, use a minimal inline XBRL for benchmarking + let minimal_xbrl = r#" + + + + 0000000000 + + + 2023-12-31 + + + + iso4217:USD + +"#; + + c.bench_function("parse_minimal", |b| { + b.iter(|| parser.parse_str(black_box(minimal_xbrl))); + }); + } +} + +criterion_group!(benches, parse_sample_sec_file); +criterion_main!(benches); diff --git a/rust/vendor/crabrl/benchmarks/compare.py b/rust/vendor/crabrl/benchmarks/compare.py new file mode 100644 index 0000000..16355cf --- /dev/null +++ b/rust/vendor/crabrl/benchmarks/compare.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +""" +Compare crabrl performance with Arelle +""" + +import subprocess +import time +import sys +from pathlib import Path + +def run_crabrl(filepath): + """Run crabrl and measure time""" + cmd = ["../target/release/crabrl", "parse", filepath] + start = time.perf_counter() + result = subprocess.run(cmd, capture_output=True, text=True) + elapsed = (time.perf_counter() - start) * 1000 + + if result.returncode == 0: + # Parse output for fact count + facts = 0 + for line in result.stdout.split('\n'): + if 'Facts:' in line: + facts = int(line.split(':')[1].strip()) + break + return elapsed, facts + return None, 0 + +def run_arelle(filepath): + """Run Arelle and measure time""" + try: + cmd = ["python3", "-m", "arelle.CntlrCmdLine", + "--file", filepath, "--skipDTS", "--logLevel", "ERROR"] + start = time.perf_counter() + result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) + elapsed = (time.perf_counter() - start) * 1000 + + if result.returncode == 0: + return elapsed + return None + except: + return None + +def main(): + if len(sys.argv) < 2: + print("Usage: compare.py ") + sys.exit(1) + + filepath = sys.argv[1] + print(f"Comparing performance on: {filepath}\n") + + # Run crabrl + crabrl_time, facts = run_crabrl(filepath) + if crabrl_time: + print(f"crabrl: {crabrl_time:.1f}ms ({facts} facts)") + else: + print("crabrl: Failed") + + # Run Arelle + arelle_time = run_arelle(filepath) + if arelle_time: + print(f"Arelle: {arelle_time:.1f}ms") + else: + print("Arelle: Failed or not installed") + + # Calculate speedup + if crabrl_time and arelle_time: + speedup = arelle_time / crabrl_time + print(f"\nSpeedup: {speedup:.1f}x faster") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/rust/vendor/crabrl/benchmarks/compare_performance.py b/rust/vendor/crabrl/benchmarks/compare_performance.py new file mode 100644 index 0000000..05ab3e2 --- /dev/null +++ b/rust/vendor/crabrl/benchmarks/compare_performance.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 +"""Compare performance between crabrl and Arelle.""" + +import os +import sys +import time +import subprocess +import json +import statistics +from pathlib import Path +from tabulate import tabulate +import matplotlib.pyplot as plt + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + +def benchmark_arelle(file_path, runs=3): + """Benchmark Arelle parsing performance.""" + times = [] + + for _ in range(runs): + start = time.perf_counter() + + # Run Arelle in subprocess to isolate memory + result = subprocess.run([ + sys.executable, "-c", + f""" +import sys +sys.path.insert(0, 'venv/lib/python{sys.version_info.major}.{sys.version_info.minor}/site-packages') +from arelle import Cntlr +from arelle import ModelManager + +# Suppress Arelle output +import logging +logging.getLogger("arelle").setLevel(logging.ERROR) + +controller = Cntlr.Cntlr(logFileName=None) +controller.webCache.workOffline = True +modelManager = ModelManager.initialize(controller) + +# Load and parse the XBRL file +modelXbrl = modelManager.load('{file_path}') +if modelXbrl: + facts = len(modelXbrl.facts) + contexts = len(modelXbrl.contexts) + units = len(modelXbrl.units) + print(f"{{facts}},{{contexts}},{{units}}") + modelXbrl.close() +""" + ], capture_output=True, text=True, cwd=Path(__file__).parent) + + end = time.perf_counter() + + if result.returncode == 0 and result.stdout: + times.append(end - start) + if len(times) == 1: # Print counts on first run + parts = result.stdout.strip().split(',') + if len(parts) == 3: + print(f" Arelle found: {parts[0]} facts, {parts[1]} contexts, {parts[2]} units") + else: + print(f" Arelle error: {result.stderr}") + + if times: + return { + 'mean': statistics.mean(times), + 'median': statistics.median(times), + 'stdev': statistics.stdev(times) if len(times) > 1 else 0, + 'min': min(times), + 'max': max(times), + 'runs': len(times) + } + return None + +def benchmark_crabrl(file_path, runs=3): + """Benchmark crabrl parsing performance.""" + times = [] + + # Build the benchmark binary if needed + subprocess.run(["cargo", "build", "--release", "--example", "benchmark_single"], + capture_output=True, cwd=Path(__file__).parent.parent) + + for _ in range(runs): + start = time.perf_counter() + + result = subprocess.run([ + "../target/release/examples/benchmark_single", + file_path + ], capture_output=True, text=True, cwd=Path(__file__).parent) + + end = time.perf_counter() + + if result.returncode == 0: + times.append(end - start) + if len(times) == 1 and result.stdout: # Print counts on first run + print(f" crabrl output: {result.stdout.strip()}") + else: + print(f" crabrl error: {result.stderr}") + + if times: + return { + 'mean': statistics.mean(times), + 'median': statistics.median(times), + 'stdev': statistics.stdev(times) if len(times) > 1 else 0, + 'min': min(times), + 'max': max(times), + 'runs': len(times) + } + return None + +def main(): + """Run comparative benchmarks.""" + print("=" * 80) + print("XBRL Parser Performance Comparison: crabrl vs Arelle") + print("=" * 80) + + test_files = [ + ("Tiny (10 facts)", "../test_data/test_tiny.xbrl"), + ("Small (100 facts)", "../test_data/test_small.xbrl"), + ("Medium (1K facts)", "../test_data/test_medium.xbrl"), + ("Large (10K facts)", "../test_data/test_large.xbrl"), + ("Huge (100K facts)", "../test_data/test_huge.xbrl"), + ] + + results = [] + + for name, file_path in test_files: + if not Path(file_path).exists(): + print(f"Skipping {name}: file not found") + continue + + file_size_mb = Path(file_path).stat().st_size / (1024 * 1024) + print(f"\nBenchmarking {name} ({file_size_mb:.2f} MB)...") + + # Benchmark Arelle + print(" Running Arelle...") + arelle_stats = benchmark_arelle(file_path, runs=5) + + # Benchmark crabrl + print(" Running crabrl...") + crabrl_stats = benchmark_crabrl(file_path, runs=5) + + if arelle_stats and crabrl_stats: + speedup = arelle_stats['median'] / crabrl_stats['median'] + results.append({ + 'File': name, + 'Size (MB)': f"{file_size_mb:.2f}", + 'Arelle (ms)': f"{arelle_stats['median']*1000:.1f}", + 'crabrl (ms)': f"{crabrl_stats['median']*1000:.1f}", + 'Speedup': f"{speedup:.1f}x", + 'arelle_raw': arelle_stats['median'], + 'crabrl_raw': crabrl_stats['median'], + }) + + # Print results table + print("\n" + "=" * 80) + print("RESULTS SUMMARY") + print("=" * 80) + + if results: + table_data = [{k: v for k, v in r.items() if not k.endswith('_raw')} for r in results] + print(tabulate(table_data, headers="keys", tablefmt="grid")) + + # Calculate average speedup + speedups = [r['arelle_raw'] / r['crabrl_raw'] for r in results] + avg_speedup = statistics.mean(speedups) + print(f"\nAverage speedup: {avg_speedup:.1f}x faster than Arelle") + + # Create performance chart + create_performance_chart(results) + else: + print("No results to display") + +def create_performance_chart(results): + """Create a performance comparison chart.""" + labels = [r['File'].split('(')[0].strip() for r in results] + arelle_times = [r['arelle_raw'] * 1000 for r in results] + crabrl_times = [r['crabrl_raw'] * 1000 for r in results] + + x = range(len(labels)) + width = 0.35 + + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6)) + + # Bar chart + ax1.bar([i - width/2 for i in x], arelle_times, width, label='Arelle', color='#FF6B6B') + ax1.bar([i + width/2 for i in x], crabrl_times, width, label='crabrl', color='#4ECDC4') + ax1.set_xlabel('File Size') + ax1.set_ylabel('Time (ms)') + ax1.set_title('Parsing Time Comparison') + ax1.set_xticks(x) + ax1.set_xticklabels(labels, rotation=45) + ax1.legend() + ax1.grid(True, alpha=0.3) + + # Speedup chart + speedups = [a/c for a, c in zip(arelle_times, crabrl_times)] + ax2.bar(x, speedups, color='#95E77E') + ax2.set_xlabel('File Size') + ax2.set_ylabel('Speedup Factor') + ax2.set_title('crabrl Speedup over Arelle') + ax2.set_xticks(x) + ax2.set_xticklabels(labels, rotation=45) + ax2.grid(True, alpha=0.3) + + # Add value labels on bars + for i, v in enumerate(speedups): + ax2.text(i, v + 0.5, f'{v:.1f}x', ha='center', va='bottom') + + plt.tight_layout() + plt.savefig('benchmark_results.png', dpi=150) + print(f"\nPerformance chart saved to: benchmarks/benchmark_results.png") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/rust/vendor/crabrl/examples/benchmark_single.rs b/rust/vendor/crabrl/examples/benchmark_single.rs new file mode 100644 index 0000000..e60a15c --- /dev/null +++ b/rust/vendor/crabrl/examples/benchmark_single.rs @@ -0,0 +1,36 @@ +//! Single file benchmark + +use crabrl::Parser; +use std::env; +use std::fs; +use std::time::Instant; + +fn main() { + let args: Vec = env::args().collect(); + if args.len() != 2 { + eprintln!("Usage: {} ", args[0]); + std::process::exit(1); + } + + let content = fs::read(&args[1]).expect("Failed to read file"); + + let parser = Parser::new(); + let start = Instant::now(); + + match parser.parse_bytes(&content) { + Ok(document) => { + let elapsed = start.elapsed(); + println!( + "Parsed in {:.3}ms: {} facts, {} contexts, {} units", + elapsed.as_secs_f64() * 1000.0, + document.facts.len(), + document.contexts.len(), + document.units.len() + ); + } + Err(e) => { + eprintln!("Parse error: {}", e); + std::process::exit(1); + } + } +} diff --git a/rust/vendor/crabrl/examples/parse.rs b/rust/vendor/crabrl/examples/parse.rs new file mode 100644 index 0000000..5b76f19 --- /dev/null +++ b/rust/vendor/crabrl/examples/parse.rs @@ -0,0 +1,22 @@ +//! Parse and display XBRL file info + +use crabrl::Parser; +use std::env; + +fn main() -> Result<(), Box> { + let args: Vec = env::args().collect(); + if args.len() != 2 { + eprintln!("Usage: {} ", args[0]); + std::process::exit(1); + } + + let parser = Parser::new(); + let doc = parser.parse_file(&args[1])?; + + println!("Parsed {}:", args[1]); + println!(" Facts: {}", doc.facts.len()); + println!(" Contexts: {}", doc.contexts.len()); + println!(" Units: {}", doc.units.len()); + + Ok(()) +} diff --git a/rust/vendor/crabrl/examples/validate.rs b/rust/vendor/crabrl/examples/validate.rs new file mode 100644 index 0000000..7e58913 --- /dev/null +++ b/rust/vendor/crabrl/examples/validate.rs @@ -0,0 +1,29 @@ +//! Validation example + +use crabrl::{Parser, Validator}; +use std::env; + +fn main() -> Result<(), Box> { + let args: Vec = env::args().collect(); + if args.len() != 2 { + eprintln!("Usage: {} ", args[0]); + std::process::exit(1); + } + + // Parse + let parser = Parser::new(); + let doc = parser.parse_file(&args[1])?; + + // Validate + let validator = Validator::new(); + match validator.validate(&doc) { + Ok(_) => { + println!("✓ Document is valid"); + } + Err(e) => { + println!("✗ Validation failed: {}", e); + } + } + + Ok(()) +} diff --git a/rust/vendor/crabrl/scripts/download_fixtures.py b/rust/vendor/crabrl/scripts/download_fixtures.py new file mode 100644 index 0000000..8f90413 --- /dev/null +++ b/rust/vendor/crabrl/scripts/download_fixtures.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +""" +Download real SEC XBRL filings from various companies to use as test fixtures. +These will be used for benchmarking and testing the parser. +""" + +import os +import time +import urllib.request +from pathlib import Path + +# Create fixtures directory +fixtures_dir = Path("fixtures") +fixtures_dir.mkdir(exist_ok=True) + +# List of real SEC XBRL filings from various companies +# Format: (company_name, ticker, description, url) +filings = [ + # Apple filings + ("apple", "AAPL", "10-K 2023 Instance", + "https://www.sec.gov/Archives/edgar/data/320193/000032019323000106/aapl-20230930_htm.xml"), + ("apple", "AAPL", "10-K 2023 Labels", + "https://www.sec.gov/Archives/edgar/data/320193/000032019323000106/aapl-20230930_lab.xml"), + ("apple", "AAPL", "10-K 2023 Calculation", + "https://www.sec.gov/Archives/edgar/data/320193/000032019323000106/aapl-20230930_cal.xml"), + + # Microsoft filings + ("microsoft", "MSFT", "10-Q 2023 Instance", + "https://www.sec.gov/Archives/edgar/data/789019/000095017023064280/msft-20230930_htm.xml"), + ("microsoft", "MSFT", "10-Q 2023 Labels", + "https://www.sec.gov/Archives/edgar/data/789019/000095017023064280/msft-20230930_lab.xml"), + ("microsoft", "MSFT", "10-Q 2023 Presentation", + "https://www.sec.gov/Archives/edgar/data/789019/000095017023064280/msft-20230930_pre.xml"), + + # Tesla filings + ("tesla", "TSLA", "10-K 2023 Instance", + "https://www.sec.gov/Archives/edgar/data/1318605/000162828024002390/tsla-20231231_htm.xml"), + ("tesla", "TSLA", "10-K 2023 Definition", + "https://www.sec.gov/Archives/edgar/data/1318605/000162828024002390/tsla-20231231_def.xml"), + + # Amazon filings + ("amazon", "AMZN", "10-K 2023 Instance", + "https://www.sec.gov/Archives/edgar/data/1018724/000101872424000006/amzn-20231231_htm.xml"), + ("amazon", "AMZN", "10-K 2023 Labels", + "https://www.sec.gov/Archives/edgar/data/1018724/000101872424000006/amzn-20231231_lab.xml"), + + # Google/Alphabet filings + ("alphabet", "GOOGL", "10-K 2023 Instance", + "https://www.sec.gov/Archives/edgar/data/1652044/000165204424000022/goog-20231231_htm.xml"), + ("alphabet", "GOOGL", "10-K 2023 Calculation", + "https://www.sec.gov/Archives/edgar/data/1652044/000165204424000022/goog-20231231_cal.xml"), + + # JPMorgan Chase filings + ("jpmorgan", "JPM", "10-K 2023 Instance", + "https://www.sec.gov/Archives/edgar/data/19617/000001961724000198/jpm-20231231_htm.xml"), + ("jpmorgan", "JPM", "10-K 2023 Labels", + "https://www.sec.gov/Archives/edgar/data/19617/000001961724000198/jpm-20231231_lab.xml"), + + # Walmart filings + ("walmart", "WMT", "10-K 2024 Instance", + "https://www.sec.gov/Archives/edgar/data/104169/000010416924000012/wmt-20240131_htm.xml"), + ("walmart", "WMT", "10-K 2024 Presentation", + "https://www.sec.gov/Archives/edgar/data/104169/000010416924000012/wmt-20240131_pre.xml"), + + # Johnson & Johnson filings + ("jnj", "JNJ", "10-K 2023 Instance", + "https://www.sec.gov/Archives/edgar/data/200406/000020040624000016/jnj-20231231_htm.xml"), + + # ExxonMobil filings + ("exxon", "XOM", "10-K 2023 Instance", + "https://www.sec.gov/Archives/edgar/data/34088/000003408824000013/xom-20231231_htm.xml"), + + # Berkshire Hathaway filings + ("berkshire", "BRK", "10-K 2023 Instance", + "https://www.sec.gov/Archives/edgar/data/1067983/000095017024021825/brka-20231231_htm.xml"), +] + +def download_file(url, filepath): + """Download a file from URL to filepath.""" + try: + # Add headers to avoid being blocked + request = urllib.request.Request( + url, + headers={ + 'User-Agent': 'crabrl-test-fixtures/1.0 (testing@example.com)' + } + ) + + with urllib.request.urlopen(request) as response: + content = response.read() + with open(filepath, 'wb') as f: + f.write(content) + return True + except Exception as e: + print(f" Error: {e}") + return False + +def main(): + print("Downloading SEC XBRL fixtures from various companies...") + print("=" * 60) + + downloaded = 0 + failed = 0 + + for company, ticker, description, url in filings: + # Create company directory + company_dir = fixtures_dir / company + company_dir.mkdir(exist_ok=True) + + # Generate filename from URL + filename = url.split('/')[-1] + filepath = company_dir / filename + + print(f"\n[{ticker}] {description}") + print(f" URL: {url}") + print(f" Saving to: {filepath}") + + if filepath.exists(): + print(" ✓ Already exists, skipping") + continue + + if download_file(url, filepath): + file_size = os.path.getsize(filepath) + print(f" ✓ Downloaded ({file_size:,} bytes)") + downloaded += 1 + else: + print(f" ✗ Failed to download") + failed += 1 + + # Be polite to SEC servers + time.sleep(0.5) + + print("\n" + "=" * 60) + print(f"Download complete: {downloaded} downloaded, {failed} failed") + print(f"Fixtures saved to: {fixtures_dir.absolute()}") + + # Show directory structure + print("\nFixture structure:") + for company_dir in sorted(fixtures_dir.iterdir()): + if company_dir.is_dir(): + files = list(company_dir.glob("*.xml")) + if files: + print(f" {company_dir.name}/") + for f in sorted(files)[:3]: # Show first 3 files + size = os.path.getsize(f) + print(f" - {f.name} ({size:,} bytes)") + if len(files) > 3: + print(f" ... and {len(files)-3} more files") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/rust/vendor/crabrl/scripts/generate_benchmark_charts.py b/rust/vendor/crabrl/scripts/generate_benchmark_charts.py new file mode 100644 index 0000000..bb4221d --- /dev/null +++ b/rust/vendor/crabrl/scripts/generate_benchmark_charts.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python3 +"""Generate benchmark charts for crabrl README""" + +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches +import numpy as np +from matplotlib.patches import FancyBboxPatch +import seaborn as sns + +# Set style +plt.style.use('seaborn-v0_8-darkgrid') +sns.set_palette("husl") + +# Performance data (based on claims and benchmarks) +parsers = ['crabrl', 'Traditional\nXBRL Parser', 'Arelle', 'Other\nParsers'] +parse_times = [7.2, 360, 1080, 720] # microseconds for sample file +throughput = [140000, 2800, 930, 1400] # facts per second + +# Speed improvement factors +speed_factors = [1, 50, 150, 100] + +# Create figure with subplots +fig = plt.figure(figsize=(16, 10)) +fig.suptitle('crabrl Performance Benchmarks', fontsize=24, fontweight='bold', y=0.98) + +# Color scheme +colors = ['#2ecc71', '#e74c3c', '#f39c12', '#95a5a6'] +highlight_color = '#27ae60' + +# 1. Parse Time Comparison (Bar Chart) +ax1 = plt.subplot(2, 3, 1) +bars1 = ax1.bar(parsers, parse_times, color=colors, edgecolor='black', linewidth=2) +bars1[0].set_color(highlight_color) +bars1[0].set_edgecolor('#229954') +bars1[0].set_linewidth(3) + +ax1.set_ylabel('Parse Time (μs)', fontsize=12, fontweight='bold') +ax1.set_title('Parse Time Comparison\n(Lower is Better)', fontsize=14, fontweight='bold') +ax1.set_ylim(0, max(parse_times) * 1.2) + +# Add value labels on bars +for bar, value in zip(bars1, parse_times): + height = bar.get_height() + ax1.text(bar.get_x() + bar.get_width()/2., height + max(parse_times) * 0.02, + f'{value:.1f}μs', ha='center', va='bottom', fontweight='bold', fontsize=10) + +# 2. Throughput Comparison (Bar Chart) +ax2 = plt.subplot(2, 3, 2) +bars2 = ax2.bar(parsers, np.array(throughput)/1000, color=colors, edgecolor='black', linewidth=2) +bars2[0].set_color(highlight_color) +bars2[0].set_edgecolor('#229954') +bars2[0].set_linewidth(3) + +ax2.set_ylabel('Throughput (K facts/sec)', fontsize=12, fontweight='bold') +ax2.set_title('Throughput Comparison\n(Higher is Better)', fontsize=14, fontweight='bold') +ax2.set_ylim(0, max(throughput)/1000 * 1.2) + +# Add value labels +for bar, value in zip(bars2, np.array(throughput)/1000): + height = bar.get_height() + ax2.text(bar.get_x() + bar.get_width()/2., height + max(throughput)/1000 * 0.02, + f'{value:.1f}K', ha='center', va='bottom', fontweight='bold', fontsize=10) + +# 3. Speed Improvement Factor +ax3 = plt.subplot(2, 3, 3) +x_pos = np.arange(len(parsers)) +bars3 = ax3.barh(x_pos, speed_factors, color=colors, edgecolor='black', linewidth=2) +bars3[0].set_color(highlight_color) +bars3[0].set_edgecolor('#229954') +bars3[0].set_linewidth(3) + +ax3.set_yticks(x_pos) +ax3.set_yticklabels(parsers) +ax3.set_xlabel('Speed Factor (vs Traditional)', fontsize=12, fontweight='bold') +ax3.set_title('Relative Speed\n(crabrl as baseline)', fontsize=14, fontweight='bold') +ax3.set_xlim(0, max(speed_factors) * 1.2) + +# Add value labels +for i, (bar, value) in enumerate(zip(bars3, speed_factors)): + width = bar.get_width() + label = f'{value}x' if i == 0 else f'1/{value}x slower' + ax3.text(width + max(speed_factors) * 0.02, bar.get_y() + bar.get_height()/2., + label, ha='left', va='center', fontweight='bold', fontsize=10) + +# 4. Memory Usage Comparison (Simulated) +ax4 = plt.subplot(2, 3, 4) +memory_usage = [50, 850, 1200, 650] # MB for 100k facts +bars4 = ax4.bar(parsers, memory_usage, color=colors, edgecolor='black', linewidth=2) +bars4[0].set_color(highlight_color) +bars4[0].set_edgecolor('#229954') +bars4[0].set_linewidth(3) + +ax4.set_ylabel('Memory Usage (MB)', fontsize=12, fontweight='bold') +ax4.set_title('Memory Efficiency\n(100K facts, Lower is Better)', fontsize=14, fontweight='bold') +ax4.set_ylim(0, max(memory_usage) * 1.2) + +# Add value labels +for bar, value in zip(bars4, memory_usage): + height = bar.get_height() + ax4.text(bar.get_x() + bar.get_width()/2., height + max(memory_usage) * 0.02, + f'{value}MB', ha='center', va='bottom', fontweight='bold', fontsize=10) + +# 5. Scalability Chart (Line Plot) +ax5 = plt.subplot(2, 3, 5) +file_sizes = np.array([1, 10, 50, 100, 500, 1000]) # MB +crabrl_times = file_sizes * 0.1 # Linear scaling +traditional_times = file_sizes * 5 # Much slower +arelle_times = file_sizes * 15 # Even slower + +ax5.plot(file_sizes, crabrl_times, 'o-', color=highlight_color, linewidth=3, + markersize=8, label='crabrl', markeredgecolor='#229954', markeredgewidth=2) +ax5.plot(file_sizes, traditional_times, 's-', color=colors[1], linewidth=2, + markersize=6, label='Traditional', alpha=0.7) +ax5.plot(file_sizes, arelle_times, '^-', color=colors[2], linewidth=2, + markersize=6, label='Arelle', alpha=0.7) + +ax5.set_xlabel('File Size (MB)', fontsize=12, fontweight='bold') +ax5.set_ylabel('Parse Time (seconds)', fontsize=12, fontweight='bold') +ax5.set_title('Scalability Performance\n(Linear vs Exponential)', fontsize=14, fontweight='bold') +ax5.legend(loc='upper left', fontsize=10, framealpha=0.9) +ax5.grid(True, alpha=0.3) +ax5.set_xlim(0, 1100) + +# 6. Feature Comparison Matrix +ax6 = plt.subplot(2, 3, 6) +ax6.axis('off') + +features = ['Speed', 'Memory', 'SEC EDGAR', 'Parallel', 'Streaming'] +feature_scores = { + 'crabrl': [5, 5, 5, 5, 4], + 'Traditional': [1, 2, 3, 1, 2], + 'Arelle': [1, 1, 5, 2, 2], + 'Others': [2, 3, 3, 2, 3] +} + +# Create feature matrix visualization +y_pos = 0.9 +ax6.text(0.5, y_pos, 'Feature Comparison', fontsize=14, fontweight='bold', + ha='center', transform=ax6.transAxes) + +y_pos -= 0.1 +x_positions = [0.2, 0.35, 0.5, 0.65, 0.8] +for i, feature in enumerate(features): + ax6.text(x_positions[i], y_pos, feature, fontsize=10, fontweight='bold', + ha='center', transform=ax6.transAxes) + +parser_names = ['crabrl', 'Traditional', 'Arelle', 'Others'] +y_positions = [0.65, 0.5, 0.35, 0.2] + +for j, (parser, scores) in enumerate(zip(parser_names, + [feature_scores['crabrl'], + feature_scores['Traditional'], + feature_scores['Arelle'], + feature_scores['Others']])): + ax6.text(0.05, y_positions[j], parser, fontsize=10, fontweight='bold', + ha='left', transform=ax6.transAxes) + + for i, score in enumerate(scores): + # Draw filled circles for score + for k in range(5): + circle = plt.Circle((x_positions[i] + k*0.02 - 0.04, y_positions[j]), + 0.008, transform=ax6.transAxes, + color=highlight_color if k < score and j == 0 else + '#34495e' if k < score else '#ecf0f1', + edgecolor='black', linewidth=1) + ax6.add_patch(circle) + +# Add performance badges +badge_y = 0.05 +badges = ['🚀 50-150x Faster', '💾 Low Memory', '⚡ Zero-Copy', '🔒 Production Ready'] +badge_x_positions = [0.125, 0.375, 0.625, 0.875] + +for badge, x_pos in zip(badges, badge_x_positions): + bbox = FancyBboxPatch((x_pos - 0.1, badge_y - 0.03), 0.2, 0.06, + boxstyle="round,pad=0.01", + facecolor=highlight_color, edgecolor='#229954', + linewidth=2, transform=ax6.transAxes, alpha=0.9) + ax6.add_patch(bbox) + ax6.text(x_pos, badge_y, badge, fontsize=9, fontweight='bold', + ha='center', va='center', transform=ax6.transAxes, color='white') + +# Adjust layout +plt.tight_layout() +plt.subplots_adjust(top=0.93, hspace=0.3, wspace=0.3) + +# Save the figure +plt.savefig('benchmarks/benchmark_results.png', dpi=150, bbox_inches='tight', + facecolor='white', edgecolor='none') +print("Saved: benchmarks/benchmark_results.png") + +# Create a simplified hero image for README header +fig2, ax = plt.subplots(figsize=(12, 4), facecolor='white') +ax.axis('off') + +# Title +ax.text(0.5, 0.85, 'crabrl', fontsize=48, fontweight='bold', + ha='center', transform=ax.transAxes, color='#2c3e50') +ax.text(0.5, 0.65, 'Lightning-Fast XBRL Parser', fontsize=20, + ha='center', transform=ax.transAxes, color='#7f8c8d') + +# Performance stats +stats = [ + ('50-150x', 'Faster than\ntraditional parsers'), + ('140K', 'Facts per\nsecond'), + ('< 50MB', 'Memory for\n100K facts'), + ('Zero-Copy', 'Parsing\narchitecture') +] + +x_positions = [0.125, 0.375, 0.625, 0.875] +for (value, desc), x_pos in zip(stats, x_positions): + # Value + ax.text(x_pos, 0.35, value, fontsize=28, fontweight='bold', + ha='center', transform=ax.transAxes, color=highlight_color) + # Description + ax.text(x_pos, 0.15, desc, fontsize=12, + ha='center', transform=ax.transAxes, color='#7f8c8d', + multialignment='center') + +plt.savefig('benchmarks/hero_banner.png', dpi=150, bbox_inches='tight', + facecolor='white', edgecolor='none') +print("Saved: benchmarks/hero_banner.png") + +# Create a speed comparison bar +fig3, ax = plt.subplots(figsize=(10, 3), facecolor='white') + +# Speed comparison visualization +speeds = [150, 100, 50, 1] +labels = ['crabrl\n150x faster', 'crabrl\n100x faster', 'crabrl\n50x faster', 'Baseline'] +colors_speed = [highlight_color, '#3498db', '#9b59b6', '#95a5a6'] + +y_pos = np.arange(len(labels)) +bars = ax.barh(y_pos, speeds, color=colors_speed, edgecolor='black', linewidth=2) + +ax.set_yticks(y_pos) +ax.set_yticklabels(labels, fontsize=11, fontweight='bold') +ax.set_xlabel('Relative Performance', fontsize=12, fontweight='bold') +ax.set_title('crabrl Speed Advantage', fontsize=16, fontweight='bold', pad=20) + +# Add speed labels +for bar, speed in zip(bars, speeds): + width = bar.get_width() + label = f'{speed}x' if speed > 1 else 'Traditional\nParsers' + ax.text(width + 3, bar.get_y() + bar.get_height()/2., + label, ha='left', va='center', fontweight='bold', fontsize=11) + +ax.set_xlim(0, 180) +ax.spines['top'].set_visible(False) +ax.spines['right'].set_visible(False) +ax.grid(axis='x', alpha=0.3) + +plt.tight_layout() +plt.savefig('benchmarks/speed_comparison.png', dpi=150, bbox_inches='tight', + facecolor='white', edgecolor='none') +print("Saved: benchmarks/speed_comparison.png") + +print("\n✅ All benchmark images generated successfully!") +print("\nYou can now add these to your README:") +print(" - benchmarks/hero_banner.png (header image)") +print(" - benchmarks/benchmark_results.png (detailed performance)") +print(" - benchmarks/speed_comparison.png (speed comparison)") \ No newline at end of file diff --git a/rust/vendor/crabrl/scripts/generate_clean_benchmarks.py b/rust/vendor/crabrl/scripts/generate_clean_benchmarks.py new file mode 100644 index 0000000..72e4086 --- /dev/null +++ b/rust/vendor/crabrl/scripts/generate_clean_benchmarks.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python3 +"""Generate clean benchmark charts for crabrl README""" + +import matplotlib.pyplot as plt +import numpy as np +from matplotlib.patches import Rectangle, FancyBboxPatch +import matplotlib.patches as mpatches + +# Set a professional style +plt.rcParams['font.family'] = 'sans-serif' +plt.rcParams['font.sans-serif'] = ['DejaVu Sans', 'Arial', 'Helvetica'] +plt.rcParams['axes.linewidth'] = 1.5 +plt.rcParams['axes.edgecolor'] = '#333333' + +# Color palette (professional and accessible) +PRIMARY_COLOR = '#00A86B' # Jade green +SECONDARY_COLOR = '#FF6B6B' # Coral red +TERTIARY_COLOR = '#4ECDC4' # Teal +QUATERNARY_COLOR = '#95E1D3' # Mint +GRAY_COLOR = '#95A5A6' +DARK_COLOR = '#2C3E50' +LIGHT_GRAY = '#ECF0F1' + +# Performance data +performance_data = { + 'crabrl': { + 'parse_time': 7.2, # microseconds + 'throughput': 140000, # facts/sec + 'memory': 50, # MB for 100k facts + 'speed_factor': 100, # average speedup + 'color': PRIMARY_COLOR + }, + 'Traditional': { + 'parse_time': 720, + 'throughput': 1400, + 'memory': 850, + 'speed_factor': 1, + 'color': SECONDARY_COLOR + }, + 'Arelle': { + 'parse_time': 1080, + 'throughput': 930, + 'memory': 1200, + 'speed_factor': 0.67, + 'color': TERTIARY_COLOR + } +} + +# Create main comparison chart +fig = plt.figure(figsize=(14, 8), facecolor='white') +fig.suptitle('crabrl Performance Benchmarks', fontsize=22, fontweight='bold', color=DARK_COLOR) + +# 1. Parse Speed Comparison +ax1 = plt.subplot(2, 3, 1) +parsers = list(performance_data.keys()) +parse_times = [performance_data[p]['parse_time'] for p in parsers] +colors = [performance_data[p]['color'] for p in parsers] + +bars = ax1.bar(parsers, parse_times, color=colors, edgecolor=DARK_COLOR, linewidth=2) +ax1.set_ylabel('Parse Time (μs)', fontsize=11, fontweight='bold', color=DARK_COLOR) +ax1.set_title('Parse Time\n(Lower is Better)', fontsize=12, fontweight='bold', color=DARK_COLOR) +ax1.set_yscale('log') # Log scale for better visualization +ax1.grid(axis='y', alpha=0.3, linestyle='--') + +# Add value labels +for bar, value in zip(bars, parse_times): + height = bar.get_height() + ax1.text(bar.get_x() + bar.get_width()/2., height * 1.1, + f'{value:.1f}μs', ha='center', va='bottom', fontweight='bold', fontsize=10) + +# 2. Throughput Comparison +ax2 = plt.subplot(2, 3, 2) +throughputs = [performance_data[p]['throughput'] for p in parsers] +bars = ax2.bar(parsers, np.array(throughputs)/1000, color=colors, edgecolor=DARK_COLOR, linewidth=2) +ax2.set_ylabel('Throughput (K facts/sec)', fontsize=11, fontweight='bold', color=DARK_COLOR) +ax2.set_title('Processing Speed\n(Higher is Better)', fontsize=12, fontweight='bold', color=DARK_COLOR) +ax2.grid(axis='y', alpha=0.3, linestyle='--') + +for bar, value in zip(bars, np.array(throughputs)/1000): + height = bar.get_height() + ax2.text(bar.get_x() + bar.get_width()/2., height + 2, + f'{value:.0f}K', ha='center', va='bottom', fontweight='bold', fontsize=10) + +# 3. Memory Usage +ax3 = plt.subplot(2, 3, 3) +memory_usage = [performance_data[p]['memory'] for p in parsers] +bars = ax3.bar(parsers, memory_usage, color=colors, edgecolor=DARK_COLOR, linewidth=2) +ax3.set_ylabel('Memory (MB)', fontsize=11, fontweight='bold', color=DARK_COLOR) +ax3.set_title('Memory Usage\n(100K facts)', fontsize=12, fontweight='bold', color=DARK_COLOR) +ax3.grid(axis='y', alpha=0.3, linestyle='--') + +for bar, value in zip(bars, memory_usage): + height = bar.get_height() + ax3.text(bar.get_x() + bar.get_width()/2., height + 20, + f'{value}MB', ha='center', va='bottom', fontweight='bold', fontsize=10) + +# 4. Speed Multiplier Visual +ax4 = plt.subplot(2, 3, 4) +ax4.axis('off') +ax4.set_title('Speed Advantage', fontsize=12, fontweight='bold', color=DARK_COLOR, pad=20) + +# Create speed comparison visual +y_base = 0.5 +bar_height = 0.15 +max_width = 0.8 + +# crabrl bar (baseline) +crabrl_rect = Rectangle((0.1, y_base), max_width, bar_height, + facecolor=PRIMARY_COLOR, edgecolor=DARK_COLOR, linewidth=2) +ax4.add_patch(crabrl_rect) +ax4.text(0.1 + max_width + 0.02, y_base + bar_height/2, '100x baseline', + va='center', fontweight='bold', fontsize=11) +ax4.text(0.05, y_base + bar_height/2, 'crabrl', va='center', ha='right', fontweight='bold') + +# Traditional parser bar +trad_width = max_width / 100 # 1/100th the speed +trad_rect = Rectangle((0.1, y_base - bar_height*1.5), trad_width, bar_height, + facecolor=SECONDARY_COLOR, edgecolor=DARK_COLOR, linewidth=2) +ax4.add_patch(trad_rect) +ax4.text(0.1 + trad_width + 0.02, y_base - bar_height*1.5 + bar_height/2, '1x', + va='center', fontweight='bold', fontsize=11) +ax4.text(0.05, y_base - bar_height*1.5 + bar_height/2, 'Others', va='center', ha='right', fontweight='bold') + +ax4.set_xlim(0, 1) +ax4.set_ylim(0, 1) + +# 5. Scalability Chart +ax5 = plt.subplot(2, 3, 5) +file_sizes = np.array([1, 10, 50, 100, 500, 1000]) # MB +crabrl_times = file_sizes * 0.01 # Linear scaling +traditional_times = file_sizes * 1.0 # Much slower +arelle_times = file_sizes * 1.5 # Even slower + +ax5.plot(file_sizes, crabrl_times, 'o-', color=PRIMARY_COLOR, linewidth=3, + markersize=8, label='crabrl', markeredgecolor=DARK_COLOR, markeredgewidth=1.5) +ax5.plot(file_sizes, traditional_times, 's-', color=SECONDARY_COLOR, linewidth=2, + markersize=6, label='Traditional', alpha=0.8) +ax5.plot(file_sizes, arelle_times, '^-', color=TERTIARY_COLOR, linewidth=2, + markersize=6, label='Arelle', alpha=0.8) + +ax5.set_xlabel('File Size (MB)', fontsize=11, fontweight='bold', color=DARK_COLOR) +ax5.set_ylabel('Parse Time (seconds)', fontsize=11, fontweight='bold', color=DARK_COLOR) +ax5.set_title('Scalability\n(Linear vs Exponential)', fontsize=12, fontweight='bold', color=DARK_COLOR) +ax5.legend(loc='upper left', fontsize=10, framealpha=0.95) +ax5.grid(True, alpha=0.3, linestyle='--') +ax5.set_xlim(0, 1100) + +# 6. Key Features +ax6 = plt.subplot(2, 3, 6) +ax6.axis('off') +ax6.set_title('Key Advantages', fontsize=12, fontweight='bold', color=DARK_COLOR, y=0.95) + +features = [ + ('50-150x Faster', 'Than traditional parsers'), + ('Zero-Copy', 'Memory efficient design'), + ('Production Ready', 'SEC EDGAR optimized'), + ('Rust Powered', 'Safe and concurrent') +] + +y_start = 0.75 +for i, (title, desc) in enumerate(features): + y_pos = y_start - i * 0.2 + + # Feature box + bbox = FancyBboxPatch((0.05, y_pos - 0.05), 0.9, 0.12, + boxstyle="round,pad=0.02", + facecolor=PRIMARY_COLOR if i == 0 else LIGHT_GRAY, + edgecolor=DARK_COLOR, + linewidth=1.5, alpha=0.3 if i > 0 else 0.2) + ax6.add_patch(bbox) + + # Title + ax6.text(0.1, y_pos + 0.02, title, fontsize=11, fontweight='bold', + color=PRIMARY_COLOR if i == 0 else DARK_COLOR) + # Description + ax6.text(0.1, y_pos - 0.02, desc, fontsize=9, color=GRAY_COLOR) + +# Adjust layout +plt.tight_layout() +plt.subplots_adjust(top=0.92, hspace=0.4, wspace=0.3) + +# Save +plt.savefig('benchmarks/performance_charts.png', dpi=150, bbox_inches='tight', + facecolor='white', edgecolor='none') +print("Saved: benchmarks/performance_charts.png") + +# Create simple speed comparison bar +fig2, ax = plt.subplots(figsize=(10, 4), facecolor='white') + +# Data +parsers = ['crabrl', 'Parser B', 'Parser C', 'Arelle'] +speeds = [150, 3, 2, 1] # Relative to slowest +colors = [PRIMARY_COLOR, QUATERNARY_COLOR, TERTIARY_COLOR, SECONDARY_COLOR] + +# Create horizontal bars +y_pos = np.arange(len(parsers)) +bars = ax.barh(y_pos, speeds, color=colors, edgecolor=DARK_COLOR, linewidth=2, height=0.6) + +# Styling +ax.set_yticks(y_pos) +ax.set_yticklabels(parsers, fontsize=12, fontweight='bold') +ax.set_xlabel('Relative Speed (Higher is Better)', fontsize=12, fontweight='bold', color=DARK_COLOR) +ax.set_title('crabrl vs Traditional XBRL Parsers', fontsize=16, fontweight='bold', color=DARK_COLOR, pad=20) + +# Add value labels +for bar, speed in zip(bars, speeds): + width = bar.get_width() + label = f'{speed}x faster' if speed > 1 else 'Baseline' + ax.text(width + 2, bar.get_y() + bar.get_height()/2., + label, ha='left', va='center', fontweight='bold', fontsize=11) + +# Add impressive stats annotation +ax.text(0.98, 0.02, 'Up to 150x faster on SEC EDGAR filings', + transform=ax.transAxes, ha='right', fontsize=10, + style='italic', color=GRAY_COLOR) + +ax.set_xlim(0, 170) +ax.spines['top'].set_visible(False) +ax.spines['right'].set_visible(False) +ax.grid(axis='x', alpha=0.3, linestyle='--') + +plt.tight_layout() +plt.savefig('benchmarks/speed_comparison_clean.png', dpi=150, bbox_inches='tight', + facecolor='white', edgecolor='none') +print("Saved: benchmarks/speed_comparison_clean.png") + +# Create a minimal header image +fig3, ax = plt.subplots(figsize=(12, 3), facecolor='white') +ax.axis('off') + +# Background gradient effect using rectangles +for i in range(10): + alpha = 0.02 * (10 - i) + rect = Rectangle((i/10, 0), 0.1, 1, transform=ax.transAxes, + facecolor=PRIMARY_COLOR, alpha=alpha) + ax.add_patch(rect) + +# Title and tagline +ax.text(0.5, 0.65, 'crabrl', fontsize=42, fontweight='bold', + ha='center', transform=ax.transAxes, color=DARK_COLOR) +ax.text(0.5, 0.35, 'Lightning-Fast XBRL Parser for Rust', fontsize=16, + ha='center', transform=ax.transAxes, color=GRAY_COLOR) + +plt.savefig('benchmarks/header.png', dpi=150, bbox_inches='tight', + facecolor='white', edgecolor='none') +print("Saved: benchmarks/header.png") + +print("\n✅ Clean benchmark visualizations created successfully!") +print("\nGenerated files:") +print(" - benchmarks/header.png - Minimal header for README") +print(" - benchmarks/performance_charts.png - Comprehensive performance metrics") +print(" - benchmarks/speed_comparison_clean.png - Simple speed comparison") +print("\nYou can now add these images to your GitHub README!") \ No newline at end of file diff --git a/rust/vendor/crabrl/src/allocator.rs b/rust/vendor/crabrl/src/allocator.rs new file mode 100644 index 0000000..8642a50 --- /dev/null +++ b/rust/vendor/crabrl/src/allocator.rs @@ -0,0 +1,177 @@ +use bumpalo::Bump; +use std::cell::RefCell; +use std::mem::MaybeUninit; +use std::ptr::NonNull; +use std::sync::Arc; +use parking_lot::Mutex; +use string_interner::{DefaultBackend, Symbol}; +use string_interner::symbol::SymbolU32; + +const ARENA_SIZE: usize = 64 * 1024 * 1024; // 64MB arenas +const POOL_SIZE: usize = 1024; + +#[repr(align(64))] +pub struct ArenaAllocator { + current: RefCell, + arenas: RefCell>, + string_interner: Arc>>, +} + +impl ArenaAllocator { + pub fn new() -> Self { + Self { + current: RefCell::new(Bump::with_capacity(ARENA_SIZE)), + arenas: RefCell::new(Vec::with_capacity(16)), + string_interner: Arc::new(Mutex::new(string_interner::StringInterner::new())), + } + } + + #[inline(always)] + pub fn alloc(&self, val: T) -> &T { + unsafe { + let ptr = self.current.borrow().alloc(val) as *const T; + &*ptr + } + } + + #[inline(always)] + pub fn alloc_slice(&self, slice: &[T]) -> &[T] { + unsafe { + let ptr = self.current.borrow().alloc_slice_copy(slice) as *const [T]; + &*ptr + } + } + + #[inline(always)] + pub fn alloc_str(&self, s: &str) -> &str { + unsafe { + let ptr = self.current.borrow().alloc_str(s) as *const str; + &*ptr + } + } + + #[inline(always)] + pub fn intern_string(&self, s: &str) -> u32 { + let mut interner = self.string_interner.lock(); + interner.get_or_intern(s).to_usize() as u32 + } + + #[inline(always)] + pub fn get_interned(&self, id: u32) -> Option { + let interner = self.string_interner.lock(); + let symbol = SymbolU32::try_from_usize(id as usize)?; + interner.resolve(symbol) + .map(|s| s.to_string()) + } + + pub fn reset(&self) { + let mut current = self.current.borrow_mut(); + current.reset(); + + let mut arenas = self.arenas.borrow_mut(); + for arena in arenas.iter_mut() { + arena.reset(); + } + } + + pub fn new_arena(&self) { + let mut arenas = self.arenas.borrow_mut(); + let old = std::mem::replace(&mut *self.current.borrow_mut(), + Bump::with_capacity(ARENA_SIZE)); + arenas.push(old); + } +} + +pub struct ObjectPool { + pool: Vec>, + factory: fn() -> T, +} + +impl ObjectPool { + pub fn new(capacity: usize, factory: fn() -> T) -> Self { + let mut pool = Vec::with_capacity(capacity); + for _ in 0..capacity { + pool.push(Box::new(factory())); + } + Self { pool, factory } + } + + #[inline(always)] + pub fn acquire(&mut self) -> Box { + self.pool.pop().unwrap_or_else(|| Box::new((self.factory)())) + } + + #[inline(always)] + pub fn release(&mut self, obj: Box) { + if self.pool.len() < POOL_SIZE { + self.pool.push(obj); + } + } +} + +#[repr(C, align(64))] +pub struct StackBuffer { + data: [MaybeUninit; N], + len: usize, +} + +impl StackBuffer { + #[inline(always)] + pub const fn new() -> Self { + Self { + data: unsafe { MaybeUninit::uninit().assume_init() }, + len: 0, + } + } + + #[inline(always)] + pub fn push(&mut self, byte: u8) -> bool { + if self.len < N { + self.data[self.len] = MaybeUninit::new(byte); + self.len += 1; + true + } else { + false + } + } + + #[inline(always)] + pub fn as_slice(&self) -> &[u8] { + unsafe { + std::slice::from_raw_parts( + self.data.as_ptr() as *const u8, + self.len + ) + } + } + + #[inline(always)] + pub fn clear(&mut self) { + self.len = 0; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_arena_allocator() { + let arena = ArenaAllocator::new(); + let s1 = arena.alloc_str("hello"); + let s2 = arena.alloc_str("world"); + assert_eq!(s1, "hello"); + assert_eq!(s2, "world"); + } + + #[test] + fn test_string_interning() { + let arena = ArenaAllocator::new(); + let id1 = arena.intern_string("test"); + let id2 = arena.intern_string("test"); + assert_eq!(id1, id2); + + let s = arena.get_interned(id1).unwrap(); + assert_eq!(s, "test"); + } +} diff --git a/rust/vendor/crabrl/src/cache.rs b/rust/vendor/crabrl/src/cache.rs new file mode 100644 index 0000000..988214c --- /dev/null +++ b/rust/vendor/crabrl/src/cache.rs @@ -0,0 +1,47 @@ +use dashmap::DashMap; +use std::sync::Arc; +use std::hash::Hash; + +pub struct LockFreeCache { + map: Arc>, + capacity: usize, +} + +impl LockFreeCache +where + K: Eq + Hash + Clone, + V: Clone, +{ + pub fn new(capacity: usize) -> Self { + Self { + map: Arc::new(DashMap::with_capacity(capacity)), + capacity, + } + } + + #[inline(always)] + pub fn get(&self, key: &K) -> Option { + self.map.get(key).map(|v| v.clone()) + } + + #[inline(always)] + pub fn insert(&self, key: K, value: V) { + if self.map.len() >= self.capacity { + if let Some(entry) = self.map.iter().next() { + let k = entry.key().clone(); + drop(entry); + self.map.remove(&k); + } + } + self.map.insert(key, value); + } + + #[inline(always)] + pub fn contains(&self, key: &K) -> bool { + self.map.contains_key(key) + } + + pub fn clear(&self) { + self.map.clear(); + } +} diff --git a/rust/vendor/crabrl/src/instance.rs b/rust/vendor/crabrl/src/instance.rs new file mode 100644 index 0000000..ffeab7d --- /dev/null +++ b/rust/vendor/crabrl/src/instance.rs @@ -0,0 +1,21 @@ +use crate::model::Document; +use crate::Result; + +pub struct InstanceValidator { + strict: bool, +} + +impl InstanceValidator { + pub fn new() -> Self { + Self { strict: false } + } + + pub fn with_strict(mut self, strict: bool) -> Self { + self.strict = strict; + self + } + + pub fn validate(&self, _document: &Document) -> Result<()> { + Ok(()) + } +} diff --git a/rust/vendor/crabrl/src/lib.rs b/rust/vendor/crabrl/src/lib.rs new file mode 100644 index 0000000..1b08fd8 --- /dev/null +++ b/rust/vendor/crabrl/src/lib.rs @@ -0,0 +1,123 @@ +//! crabrl - High-performance XBRL parser and validator +//! +//! Licensed under AGPL-3.0 + +pub mod model; +pub mod simple_parser; +pub mod validator; + +// Use simple parser for now +pub use simple_parser::Parser; + +// Re-export main types +pub use model::{Context, Document, Fact, Unit}; + +// Create validator wrapper for the CLI +#[derive(Default)] +pub struct Validator { + inner: validator::XbrlValidator, + #[allow(dead_code)] + strict: bool, +} + +impl Validator { + pub fn new() -> Self { + Self::default() + } + + pub fn with_config(config: ValidationConfig) -> Self { + let mut inner = validator::XbrlValidator::new(); + if config.strict { + inner = inner.strict(); + } + Self { + inner, + strict: config.strict, + } + } + + pub fn sec_edgar() -> Self { + Self { + inner: validator::XbrlValidator::new().strict(), + strict: true, + } + } + + pub fn validate(&self, doc: &Document) -> Result { + let start = std::time::Instant::now(); + + // Clone doc for validation (validator mutates it) + let mut doc_copy = doc.clone(); + + // Run validation + let is_valid = self.inner.validate(&mut doc_copy).is_ok(); + + Ok(ValidationResult { + is_valid, + errors: if is_valid { + Vec::new() + } else { + vec!["Validation failed".to_string()] + }, + warnings: Vec::new(), + stats: ValidationStats { + facts_validated: doc.facts.len(), + duration_ms: start.elapsed().as_millis() as u64, + }, + }) + } +} + +/// Simple validation config for CLI +#[derive(Default)] +pub struct ValidationConfig { + pub strict: bool, +} + +impl ValidationConfig { + pub fn sec_edgar() -> Self { + Self { strict: true } + } +} + +/// Simple validation result for CLI +pub struct ValidationResult { + pub is_valid: bool, + pub errors: Vec, + pub warnings: Vec, + pub stats: ValidationStats, +} + +pub struct ValidationStats { + pub facts_validated: usize, + pub duration_ms: u64, +} + +pub type Result = std::result::Result; + +#[derive(Debug)] +pub enum Error { + Io(std::io::Error), + Parse(String), + Validation(String), + NotFound(String), +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::Io(e) => write!(f, "IO error: {}", e), + Error::Parse(s) => write!(f, "Parse error: {}", s), + Error::Validation(s) => write!(f, "Validation error: {}", s), + Error::NotFound(s) => write!(f, "Not found: {}", s), + } + } +} + +impl std::error::Error for Error {} + +impl From for Error { + fn from(err: std::io::Error) -> Self { + Error::Io(err) + } +} diff --git a/rust/vendor/crabrl/src/linkbase.rs b/rust/vendor/crabrl/src/linkbase.rs new file mode 100644 index 0000000..6a07f63 --- /dev/null +++ b/rust/vendor/crabrl/src/linkbase.rs @@ -0,0 +1,438 @@ +// Linkbase processing for XBRL +use crate::{Error, Result, model::*}; +use compact_str::CompactString; +use std::collections::HashMap; +use std::path::Path; + +pub struct LinkbaseProcessor { + presentation_links: HashMap>, + calculation_links: HashMap>, + definition_links: HashMap>, + label_links: HashMap>, + reference_links: HashMap>, +} + +impl LinkbaseProcessor { + pub fn new() -> Self { + Self { + presentation_links: HashMap::new(), + calculation_links: HashMap::new(), + definition_links: HashMap::new(), + label_links: HashMap::new(), + reference_links: HashMap::new(), + } + } + + pub fn load_linkbase>(&mut self, path: P) -> Result<()> { + let content = std::fs::read(path)?; + self.parse_linkbase(&content) + } + + pub fn parse_linkbase(&mut self, data: &[u8]) -> Result<()> { + // Skip BOM if present + let data = if data.starts_with(&[0xEF, 0xBB, 0xBF]) { + &data[3..] + } else { + data + }; + + let text = std::str::from_utf8(data) + .map_err(|_| Error::Parse("Invalid UTF-8 in linkbase".to_string()))?; + + // Detect linkbase type and parse accordingly + if text.contains("presentationLink") { + self.parse_presentation_linkbase(text)?; + } + if text.contains("calculationLink") { + self.parse_calculation_linkbase(text)?; + } + if text.contains("definitionLink") { + self.parse_definition_linkbase(text)?; + } + if text.contains("labelLink") { + self.parse_label_linkbase(text)?; + } + if text.contains("referenceLink") { + self.parse_reference_linkbase(text)?; + } + + Ok(()) + } + + fn parse_presentation_linkbase(&mut self, text: &str) -> Result<()> { + // Parse presentation arcs + let mut pos = 0; + while let Some(arc_start) = text[pos..].find("") { + let arc_text = &text[arc_start..arc_start + arc_end]; + + let mut link = PresentationLink { + from: CompactString::new(""), + to: CompactString::new(""), + order: 1.0, + priority: None, + use_attribute: None, + }; + + // Extract from + if let Some(from_start) = arc_text.find("xlink:from=\"") { + let from_start = from_start + 12; + if let Some(from_end) = arc_text[from_start..].find('"') { + link.from = CompactString::from(&arc_text[from_start..from_start + from_end]); + } + } + + // Extract to + if let Some(to_start) = arc_text.find("xlink:to=\"") { + let to_start = to_start + 10; + if let Some(to_end) = arc_text[to_start..].find('"') { + link.to = CompactString::from(&arc_text[to_start..to_start + to_end]); + } + } + + // Extract order + if let Some(order_start) = arc_text.find("order=\"") { + let order_start = order_start + 7; + if let Some(order_end) = arc_text[order_start..].find('"') { + if let Ok(order) = arc_text[order_start..order_start + order_end].parse() { + link.order = order; + } + } + } + + // Extract priority + if let Some(priority_start) = arc_text.find("priority=\"") { + let priority_start = priority_start + 10; + if let Some(priority_end) = arc_text[priority_start..].find('"') { + if let Ok(priority) = arc_text[priority_start..priority_start + priority_end].parse() { + link.priority = Some(priority); + } + } + } + + // Extract use + if let Some(use_start) = arc_text.find("use=\"") { + let use_start = use_start + 5; + if let Some(use_end) = arc_text[use_start..].find('"') { + link.use_attribute = Some(CompactString::from(&arc_text[use_start..use_start + use_end])); + } + } + + self.presentation_links + .entry(link.from.clone()) + .or_insert_with(Vec::new) + .push(link); + } + } + + Ok(()) + } + + fn parse_calculation_linkbase(&mut self, text: &str) -> Result<()> { + // Parse calculation arcs + let mut pos = 0; + while let Some(arc_start) = text[pos..].find("") { + let arc_text = &text[arc_start..arc_start + arc_end]; + + let mut link = CalculationLink { + from: CompactString::new(""), + to: CompactString::new(""), + weight: 1.0, + order: 1.0, + }; + + // Extract from + if let Some(from_start) = arc_text.find("xlink:from=\"") { + let from_start = from_start + 12; + if let Some(from_end) = arc_text[from_start..].find('"') { + link.from = CompactString::from(&arc_text[from_start..from_start + from_end]); + } + } + + // Extract to + if let Some(to_start) = arc_text.find("xlink:to=\"") { + let to_start = to_start + 10; + if let Some(to_end) = arc_text[to_start..].find('"') { + link.to = CompactString::from(&arc_text[to_start..to_start + to_end]); + } + } + + // Extract weight + if let Some(weight_start) = arc_text.find("weight=\"") { + let weight_start = weight_start + 8; + if let Some(weight_end) = arc_text[weight_start..].find('"') { + if let Ok(weight) = arc_text[weight_start..weight_start + weight_end].parse() { + link.weight = weight; + } + } + } + + // Extract order + if let Some(order_start) = arc_text.find("order=\"") { + let order_start = order_start + 7; + if let Some(order_end) = arc_text[order_start..].find('"') { + if let Ok(order) = arc_text[order_start..order_start + order_end].parse() { + link.order = order; + } + } + } + + self.calculation_links + .entry(link.from.clone()) + .or_insert_with(Vec::new) + .push(link); + } + } + + Ok(()) + } + + fn parse_definition_linkbase(&mut self, text: &str) -> Result<()> { + // Parse definition arcs + let mut pos = 0; + while let Some(arc_start) = text[pos..].find("") { + let arc_text = &text[arc_start..arc_start + arc_end]; + + let mut link = DefinitionLink { + from: CompactString::new(""), + to: CompactString::new(""), + arcrole: CompactString::new(""), + order: 1.0, + }; + + // Extract from + if let Some(from_start) = arc_text.find("xlink:from=\"") { + let from_start = from_start + 12; + if let Some(from_end) = arc_text[from_start..].find('"') { + link.from = CompactString::from(&arc_text[from_start..from_start + from_end]); + } + } + + // Extract to + if let Some(to_start) = arc_text.find("xlink:to=\"") { + let to_start = to_start + 10; + if let Some(to_end) = arc_text[to_start..].find('"') { + link.to = CompactString::from(&arc_text[to_start..to_start + to_end]); + } + } + + // Extract arcrole + if let Some(arcrole_start) = arc_text.find("xlink:arcrole=\"") { + let arcrole_start = arcrole_start + 15; + if let Some(arcrole_end) = arc_text[arcrole_start..].find('"') { + link.arcrole = CompactString::from(&arc_text[arcrole_start..arcrole_start + arcrole_end]); + } + } + + // Extract order + if let Some(order_start) = arc_text.find("order=\"") { + let order_start = order_start + 7; + if let Some(order_end) = arc_text[order_start..].find('"') { + if let Ok(order) = arc_text[order_start..order_start + order_end].parse() { + link.order = order; + } + } + } + + self.definition_links + .entry(link.from.clone()) + .or_insert_with(Vec::new) + .push(link); + } + } + + Ok(()) + } + + fn parse_label_linkbase(&mut self, text: &str) -> Result<()> { + // Parse labels + let mut pos = 0; + while let Some(label_start) = text[pos..].find("") { + let label_text = &text[label_start..label_start + label_end]; + + let mut link = LabelLink { + concept: CompactString::new(""), + label: CompactString::new(""), + role: CompactString::new(""), + lang: CompactString::new("en"), + }; + + // Extract label ID for concept mapping + if let Some(id_start) = label_text.find("xlink:label=\"") { + let id_start = id_start + 13; + if let Some(id_end) = label_text[id_start..].find('"') { + link.concept = CompactString::from(&label_text[id_start..id_start + id_end]); + } + } + + // Extract role + if let Some(role_start) = label_text.find("xlink:role=\"") { + let role_start = role_start + 12; + if let Some(role_end) = label_text[role_start..].find('"') { + link.role = CompactString::from(&label_text[role_start..role_start + role_end]); + } + } + + // Extract lang + if let Some(lang_start) = label_text.find("xml:lang=\"") { + let lang_start = lang_start + 10; + if let Some(lang_end) = label_text[lang_start..].find('"') { + link.lang = CompactString::from(&label_text[lang_start..lang_start + lang_end]); + } + } + + // Extract label text content + if let Some(content_start) = label_text.find('>') { + let content = &label_text[content_start + 1..]; + link.label = CompactString::from(content.trim()); + } + + self.label_links + .entry(link.concept.clone()) + .or_insert_with(Vec::new) + .push(link); + } + } + + Ok(()) + } + + fn parse_reference_linkbase(&mut self, text: &str) -> Result<()> { + // Parse references - simplified version + let mut pos = 0; + while let Some(ref_start) = text[pos..].find("") { + let ref_text = &text[ref_start..ref_start + ref_end]; + + let mut reference = Reference { + role: CompactString::new(""), + parts: HashMap::new(), + }; + + // Extract role + if let Some(role_start) = ref_text.find("xlink:role=\"") { + let role_start = role_start + 12; + if let Some(role_end) = ref_text[role_start..].find('"') { + reference.role = CompactString::from(&ref_text[role_start..role_start + role_end]); + } + } + + // Parse reference parts (simplified) + let parts = ["Name", "Number", "Section", "Subsection", "Paragraph", "Subparagraph", "Clause"]; + for part in &parts { + let tag = format!("') { + let content_start = part_start + content_start + 1; + if let Some(content_end) = ref_text[content_start..].find('<') { + let content = &ref_text[content_start..content_start + content_end]; + reference.parts.insert( + CompactString::from(*part), + content.trim().to_string() + ); + } + } + } + } + + // Find concept this reference belongs to + if let Some(label_start) = ref_text.find("xlink:label=\"") { + let label_start = label_start + 13; + if let Some(label_end) = ref_text[label_start..].find('"') { + let concept = CompactString::from(&ref_text[label_start..label_start + label_end]); + + let link = ReferenceLink { + concept: concept.clone(), + reference, + }; + + self.reference_links + .entry(concept) + .or_insert_with(Vec::new) + .push(link); + } + } + } + } + + Ok(()) + } + + pub fn get_presentation_tree(&self, root: &str) -> Vec<&PresentationLink> { + self.presentation_links + .get(root) + .map(|links| { + let mut sorted = links.iter().collect::>(); + sorted.sort_by(|a, b| a.order.partial_cmp(&b.order).unwrap()); + sorted + }) + .unwrap_or_default() + } + + pub fn calculate_total(&self, parent: &str, facts: &HashMap) -> f64 { + if let Some(links) = self.calculation_links.get(parent) { + links.iter() + .map(|link| { + facts.get(link.to.as_str()) + .map(|value| value * link.weight) + .unwrap_or(0.0) + }) + .sum() + } else { + facts.get(parent).copied().unwrap_or(0.0) + } + } + + pub fn get_label(&self, concept: &str, role: &str, lang: &str) -> Option<&str> { + self.label_links + .get(concept) + .and_then(|labels| { + labels.iter() + .find(|l| l.role == role && l.lang == lang) + .or_else(|| labels.iter().find(|l| l.lang == lang)) + .or_else(|| labels.first()) + }) + .map(|l| l.label.as_str()) + } + + pub fn validate_calculations(&self, facts: &HashMap) -> Vec { + let mut errors = Vec::new(); + + for (parent, links) in &self.calculation_links { + let calculated = self.calculate_total(parent, facts); + if let Some(&actual) = facts.get(parent.as_str()) { + let diff = (calculated - actual).abs(); + let tolerance = 0.01; // Allow small rounding differences + + if diff > tolerance { + errors.push(ValidationError::CalculationInconsistency { + concept: parent.to_string(), + expected: calculated, + actual, + }); + } + } + } + + errors + } +} diff --git a/rust/vendor/crabrl/src/main.rs b/rust/vendor/crabrl/src/main.rs new file mode 100644 index 0000000..fd20bac --- /dev/null +++ b/rust/vendor/crabrl/src/main.rs @@ -0,0 +1,181 @@ +//! crabrl CLI - High-performance XBRL parser and validator + +use anyhow::{Context, Result}; +use clap::{Parser as ClapParser, Subcommand}; +use colored::*; +use std::path::PathBuf; +use std::time::Instant; + +use crabrl::{Parser, ValidationConfig, Validator}; + +/// High-performance XBRL parser and validator +#[derive(ClapParser)] +#[command(name = "crabrl")] +#[command(author, version, about, long_about = None)] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Subcommand)] +enum Commands { + /// Parse an XBRL file + Parse { + /// Input file + input: PathBuf, + + /// Output as JSON + #[arg(short, long)] + json: bool, + + /// Show statistics + #[arg(short, long)] + stats: bool, + }, + + /// Validate an XBRL file + Validate { + /// Input file + input: PathBuf, + + /// Validation profile (generic, sec-edgar) + #[arg(short, long, default_value = "generic")] + profile: String, + + /// Treat warnings as errors + #[arg(long)] + strict: bool, + }, + + /// Benchmark parsing performance + Bench { + /// Input file + input: PathBuf, + + /// Number of iterations + #[arg(short, long, default_value = "100")] + iterations: usize, + }, +} + +fn main() -> Result<()> { + let cli = Cli::parse(); + + match cli.command { + Commands::Parse { + input, + json: _, + stats, + } => { + let start = Instant::now(); + let parser = Parser::new(); + let doc = parser + .parse_file(&input) + .with_context(|| format!("Failed to parse {}", input.display()))?; + let elapsed = start.elapsed(); + + println!("{} {}", "✓".green().bold(), input.display()); + println!(" Facts: {}", doc.facts.len()); + println!(" Contexts: {}", doc.contexts.len()); + println!(" Units: {}", doc.units.len()); + + if stats { + println!(" Time: {:.2}ms", elapsed.as_secs_f64() * 1000.0); + println!( + " Throughput: {:.0} facts/sec", + doc.facts.len() as f64 / elapsed.as_secs_f64() + ); + } + } + + Commands::Validate { + input, + profile, + strict, + } => { + let parser = Parser::new(); + let doc = parser + .parse_file(&input) + .with_context(|| format!("Failed to parse {}", input.display()))?; + + let config = match profile.as_str() { + "sec-edgar" => ValidationConfig::sec_edgar(), + _ => ValidationConfig::default(), + }; + + let validator = Validator::with_config(config); + let result = validator.validate(&doc)?; + + if result.is_valid { + println!( + "{} {} - Document is valid", + "✓".green().bold(), + input.display() + ); + } else { + println!( + "{} {} - Validation failed", + "✗".red().bold(), + input.display() + ); + println!(" Errors: {}", result.errors.len()); + println!(" Warnings: {}", result.warnings.len()); + + for error in result.errors.iter().take(5) { + println!(" {} {}", "ERROR:".red(), error); + } + + if result.errors.len() > 5 { + println!(" ... and {} more errors", result.errors.len() - 5); + } + + if strict && !result.warnings.is_empty() { + std::process::exit(1); + } + + if !result.is_valid { + std::process::exit(1); + } + } + } + + Commands::Bench { input, iterations } => { + let parser = Parser::new(); + + // Warmup + for _ in 0..3 { + let _ = parser.parse_file(&input)?; + } + + let mut times = Vec::with_capacity(iterations); + let mut doc_facts = 0; + + for _ in 0..iterations { + let start = Instant::now(); + let doc = parser.parse_file(&input)?; + times.push(start.elapsed()); + doc_facts = doc.facts.len(); + } + + times.sort(); + let min = times[0]; + let max = times[times.len() - 1]; + let median = times[times.len() / 2]; + let mean = times.iter().sum::() / times.len() as u32; + + println!("Benchmark Results for {}", input.display()); + println!(" Iterations: {}", iterations); + println!(" Facts: {}", doc_facts); + println!(" Min: {:.3}ms", min.as_secs_f64() * 1000.0); + println!(" Median: {:.3}ms", median.as_secs_f64() * 1000.0); + println!(" Mean: {:.3}ms", mean.as_secs_f64() * 1000.0); + println!(" Max: {:.3}ms", max.as_secs_f64() * 1000.0); + println!( + " Throughput: {:.0} facts/sec", + doc_facts as f64 / mean.as_secs_f64() + ); + } + } + + Ok(()) +} diff --git a/rust/vendor/crabrl/src/model.rs b/rust/vendor/crabrl/src/model.rs new file mode 100644 index 0000000..98df459 --- /dev/null +++ b/rust/vendor/crabrl/src/model.rs @@ -0,0 +1,347 @@ +use std::collections::HashMap; + +// ============================================================================ +// Core XBRL Data Structures - Full Specification Support +// ============================================================================ + +#[repr(C, align(64))] +#[derive(Clone)] +pub struct FactStorage { + pub concept_ids: Vec, + pub context_ids: Vec, + pub unit_ids: Vec, + pub values: Vec, + pub decimals: Vec>, + pub ids: Vec>, + pub footnote_refs: Vec>, +} + +#[derive(Debug, Clone)] +pub enum FactValue { + Text(String), + Decimal(f64), + Integer(i64), + Boolean(bool), + Date(String), + DateTime(String), + Nil, +} + +impl FactStorage { + pub fn with_capacity(capacity: usize) -> Self { + Self { + concept_ids: Vec::with_capacity(capacity), + context_ids: Vec::with_capacity(capacity), + unit_ids: Vec::with_capacity(capacity), + values: Vec::with_capacity(capacity), + decimals: Vec::with_capacity(capacity), + ids: Vec::with_capacity(capacity), + footnote_refs: Vec::with_capacity(capacity), + } + } + + #[inline(always)] + pub fn len(&self) -> usize { + self.concept_ids.len() + } + + pub fn is_empty(&self) -> bool { + self.concept_ids.is_empty() + } +} + +// Full fact representation with all XBRL features +#[derive(Debug, Clone)] +pub struct Fact { + pub id: Option, + pub concept: String, + pub context_ref: String, + pub unit_ref: Option, + pub value: String, + pub decimals: Option, + pub precision: Option, + pub nil: bool, + pub nil_reason: Option, + pub footnote_refs: Vec, +} + +// Context with full dimension support +#[derive(Debug, Clone)] +pub struct Context { + pub id: String, + pub entity: Entity, + pub period: Period, + pub scenario: Option, +} + +#[derive(Debug, Clone)] +pub struct Entity { + pub identifier: String, + pub scheme: String, + pub segment: Option, +} + +// Dimensional data support +#[derive(Debug, Clone)] +pub struct Segment { + pub explicit_members: Vec, + pub typed_members: Vec, +} + +#[derive(Debug, Clone)] +pub struct DimensionMember { + pub dimension: String, + pub member: String, +} + +#[derive(Debug, Clone)] +pub struct TypedMember { + pub dimension: String, + pub value: String, // XML content +} + +#[derive(Debug, Clone)] +pub struct Scenario { + pub explicit_members: Vec, + pub typed_members: Vec, +} + +// Period with forever support +#[derive(Debug, Clone)] +pub enum Period { + Instant { date: String }, + Duration { start: String, end: String }, + Forever, +} + +// Complex unit support with divide/multiply +#[derive(Debug, Clone)] +pub struct Unit { + pub id: String, + pub unit_type: UnitType, +} + +#[derive(Debug, Clone)] +pub enum UnitType { + Simple(Vec), + Divide { + numerator: Vec, + denominator: Vec, + }, + Multiply(Vec), +} + +#[derive(Debug, Clone)] +pub struct Measure { + pub namespace: String, + pub name: String, +} + +// Tuple support for structured data +#[derive(Debug, Clone)] +pub struct Tuple { + pub id: Option, + pub name: String, + pub facts: Vec, +} + +#[derive(Debug, Clone)] +pub enum FactOrTuple { + Fact(Fact), + Tuple(Box), +} + +// Footnote support +#[derive(Debug, Clone)] +pub struct Footnote { + pub id: String, + pub role: Option, + pub lang: Option, + pub content: String, + pub fact_refs: Vec, +} + +// Fraction support +#[derive(Debug, Clone)] +pub struct FractionValue { + pub numerator: f64, + pub denominator: f64, +} + +// Schema and taxonomy support +#[derive(Debug, Clone)] +pub struct Schema { + pub target_namespace: String, + pub elements: HashMap, + pub types: HashMap, + pub imports: Vec, +} + +#[derive(Debug, Clone)] +pub struct SchemaElement { + pub name: String, + pub element_type: String, + pub substitution_group: Option, + pub period_type: Option, + pub balance: Option, + pub abstract_element: bool, + pub nillable: bool, +} + +#[derive(Debug, Clone)] +pub struct SchemaType { + pub name: String, + pub base_type: Option, + pub restrictions: Vec, +} + +#[derive(Debug, Clone)] +pub enum TypeRestriction { + MinInclusive(String), + MaxInclusive(String), + MinExclusive(String), + MaxExclusive(String), + Pattern(String), + Enumeration(Vec), + Length(usize), + MinLength(usize), + MaxLength(usize), +} + +#[derive(Debug, Clone)] +pub struct SchemaImport { + pub namespace: String, + pub schema_location: String, +} + +// Linkbase support +#[derive(Debug, Clone)] +pub struct Linkbase { + pub role: String, + pub links: Vec, +} + +#[derive(Debug, Clone)] +pub enum Link { + Presentation(PresentationLink), + Calculation(CalculationLink), + Definition(DefinitionLink), + Label(LabelLink), + Reference(ReferenceLink), +} + +#[derive(Debug, Clone)] +pub struct PresentationLink { + pub from: String, + pub to: String, + pub order: f32, + pub priority: Option, + pub use_attribute: Option, +} + +#[derive(Debug, Clone)] +pub struct CalculationLink { + pub from: String, + pub to: String, + pub weight: f64, + pub order: f32, +} + +#[derive(Debug, Clone)] +pub struct DefinitionLink { + pub from: String, + pub to: String, + pub arcrole: String, + pub order: f32, +} + +#[derive(Debug, Clone)] +pub struct LabelLink { + pub concept: String, + pub label: String, + pub role: String, + pub lang: String, +} + +#[derive(Debug, Clone)] +pub struct ReferenceLink { + pub concept: String, + pub reference: Reference, +} + +#[derive(Debug, Clone)] +pub struct Reference { + pub role: String, + pub parts: HashMap, +} + +// Main document structure with full XBRL support +#[derive(Clone)] +pub struct Document { + pub facts: FactStorage, + pub contexts: Vec, + pub units: Vec, + pub tuples: Vec, + pub footnotes: Vec, + pub presentation_links: Vec, + pub calculation_links: Vec, + pub definition_links: Vec, + pub label_links: Vec, + pub reference_links: Vec, + pub custom_links: Vec, + pub role_types: Vec, + pub arcrole_types: Vec, + pub schemas: Vec, + pub dimensions: Vec, + pub concept_names: Vec, +} + +impl Default for Document { + fn default() -> Self { + Self::new() + } +} + +impl Document { + pub fn new() -> Self { + Self { + facts: FactStorage::with_capacity(10000), + contexts: Vec::with_capacity(100), + units: Vec::with_capacity(50), + tuples: Vec::new(), + footnotes: Vec::new(), + presentation_links: Vec::new(), + calculation_links: Vec::new(), + definition_links: Vec::new(), + label_links: Vec::new(), + reference_links: Vec::new(), + custom_links: Vec::new(), + role_types: Vec::new(), + arcrole_types: Vec::new(), + schemas: Vec::new(), + dimensions: Vec::new(), + concept_names: Vec::new(), + } + } + + pub fn with_capacity(facts: usize, contexts: usize, units: usize) -> Self { + Self { + facts: FactStorage::with_capacity(facts), + contexts: Vec::with_capacity(contexts), + units: Vec::with_capacity(units), + tuples: Vec::new(), + footnotes: Vec::new(), + presentation_links: Vec::new(), + calculation_links: Vec::new(), + definition_links: Vec::new(), + label_links: Vec::new(), + reference_links: Vec::new(), + custom_links: Vec::new(), + role_types: Vec::new(), + arcrole_types: Vec::new(), + schemas: Vec::new(), + dimensions: Vec::new(), + concept_names: Vec::new(), + } + } +} diff --git a/rust/vendor/crabrl/src/parser.rs b/rust/vendor/crabrl/src/parser.rs new file mode 100644 index 0000000..723e034 --- /dev/null +++ b/rust/vendor/crabrl/src/parser.rs @@ -0,0 +1,1552 @@ +// Full XBRL 2.1 compliant parser with all features +use crate::{model::*, Error, Result}; +use compact_str::CompactString; +#[cfg(feature = "mmap")] +use memmap2::Mmap; +use std::fs::File; +use std::path::Path; +use std::collections::HashMap; + +pub struct Parser { + allocator: ArenaAllocator, + parallel: bool, + validate: bool, + load_schemas: bool, + load_linkbases: bool, +} + +impl Parser { + pub fn new() -> Self { + Self { + allocator: ArenaAllocator::new(), + parallel: true, + validate: false, + load_schemas: false, + load_linkbases: false, + } + } + + pub fn with_validation(mut self, validate: bool) -> Self { + self.validate = validate; + self + } + + pub fn with_parallel(mut self, parallel: bool) -> Self { + self.parallel = parallel; + self + } + + pub fn with_schema_loading(mut self, load: bool) -> Self { + self.load_schemas = load; + self + } + + pub fn with_linkbase_loading(mut self, load: bool) -> Self { + self.load_linkbases = load; + self + } + + pub fn parse_file>(&self, path: P) -> Result { + let path = path.as_ref(); + let content = std::fs::read(path)?; + self.parse_bytes_with_path(&content, Some(path.to_path_buf())) + } + + pub fn parse_bytes(&self, data: &[u8]) -> Result { + self.parse_bytes_with_path(data, None) + } + + fn parse_bytes_with_path(&self, data: &[u8], path: Option) -> Result { + // Skip BOM if present + let data = if data.starts_with(&[0xEF, 0xBB, 0xBF]) { + &data[3..] + } else { + data + }; + + let mut parser = FullXbrlParser::new(data, &self.allocator); + parser.validate = self.validate; + parser.load_schemas = self.load_schemas; + parser.load_linkbases = self.load_linkbases; + parser.file_path = path; + parser.parse() + } +} + +struct FullXbrlParser<'a> { + scanner: SimdScanner<'a>, + allocator: &'a ArenaAllocator, + doc: Document, + in_xbrl_root: bool, + current_tuple_stack: Vec, + validate: bool, + load_schemas: bool, + load_linkbases: bool, + file_path: Option, +} + +// Include base parsing methods +include!("parser_base.rs"); + +impl<'a> FullXbrlParser<'a> { + fn new(data: &'a [u8], allocator: &'a ArenaAllocator) -> Self { + Self { + scanner: SimdScanner::new(data), + allocator, + doc: Document::new(), + in_xbrl_root: false, + current_tuple_stack: Vec::new(), + validate: false, + load_schemas: false, + load_linkbases: false, + file_path: None, + } + } + + fn parse(&mut self) -> Result { + self.scanner.skip_whitespace(); + + while !self.scanner.is_eof() { + self.scanner.skip_whitespace(); + + if self.scanner.peek() != Some(b'<') { + // Skip text content between tags + while self.scanner.peek() != Some(b'<') && !self.scanner.is_eof() { + self.scanner.advance(1); + } + continue; + } + + self.scanner.advance(1); // consume '<' + + if self.scanner.peek() == Some(b'?') { + self.skip_processing_instruction()?; + } else if self.scanner.peek() == Some(b'!') { + if self.peek_ahead(3) == Some(b"!--") { + self.skip_comment()?; + } else if self.peek_ahead(8) == Some(b"![CDATA[") { + // We're in an element, handle CDATA + continue; + } else { + self.skip_doctype()?; + } + } else if self.scanner.peek() == Some(b'/') { + // Closing tag + self.scanner.advance(1); // consume '/' + let tag_name = self.read_tag_name()?; + self.skip_to_tag_end()?; + + // Check if we're closing the xbrl root + if tag_name == "xbrl" || tag_name.ends_with(":xbrl") { + self.in_xbrl_root = false; + break; // Done parsing + } + + // Check if we're closing a tuple + if !self.current_tuple_stack.is_empty() { + let last_tuple = self.current_tuple_stack.last().unwrap(); + if tag_name == last_tuple.name || tag_name.ends_with(&format!(":{}", last_tuple.name)) { + let tuple = self.current_tuple_stack.pop().unwrap(); + + if self.current_tuple_stack.is_empty() { + self.document.tuples.push(tuple); + } else { + let parent = self.current_tuple_stack.last_mut().unwrap(); + parent.facts.push(FactOrTuple::Tuple(Box::new(tuple))); + } + } + } + } else { + // Opening tag + self.parse_element()?; + } + } + + // Perform validation if requested + if self.validate { + self.document.validate(); + } + + Ok(std::mem::take(&mut self.document)) + } + + fn parse_element(&mut self) -> Result<()> { + let tag_name = self.read_tag_name()?; + + // Check for xbrl root element + if tag_name == "xbrl" || tag_name.ends_with(":xbrl") { + self.parse_xbrl_root()?; + self.in_xbrl_root = true; + return Ok(()); + } + + // Only parse these elements if we're inside xbrl root + if !self.in_xbrl_root { + self.skip_element_from_tag()?; + return Ok(()); + } + + // Parse XBRL elements + if tag_name.ends_with(":context") || tag_name == "context" { + self.parse_context()?; + } else if tag_name.ends_with(":unit") || tag_name == "unit" { + self.parse_unit()?; + } else if tag_name.ends_with(":schemaRef") || tag_name == "schemaRef" { + self.parse_schema_ref()?; + } else if tag_name.ends_with(":footnoteLink") || tag_name == "footnoteLink" { + self.parse_footnote_link()?; + } else if tag_name.contains(':') { + // This could be a fact or a tuple + // Check if it's a known non-fact element (but allow xbrli:context and xbrli:unit) + let is_structural = tag_name.starts_with("link:") || + tag_name.starts_with("xbrldi:") || + (tag_name.starts_with("xbrli:") && + !tag_name.ends_with(":context") && + !tag_name.ends_with(":unit")); + if !is_structural { + // Try to determine if it's a tuple by looking ahead + if self.is_tuple(&tag_name) { + self.parse_tuple(tag_name)?; + } else { + self.parse_fact(tag_name)?; + } + } else { + self.skip_element_from_tag()?; + } + } else { + self.skip_element_from_tag()?; + } + + Ok(()) + } + + fn parse_context(&mut self) -> Result<()> { + let attrs = self.parse_attributes()?; + let id = attrs.iter() + .find(|(n, _)| *n == "id") + .map(|(_, v)| CompactString::from(*v)) + .ok_or_else(|| Error::Parse("Context missing id".to_string()))?; + + self.skip_to_tag_end()?; + + // Initialize context components + let mut entity = None; + let mut period = None; + let mut scenario = None; + + // Parse context children + loop { + self.scanner.skip_whitespace(); + + // Skip any text content + while self.scanner.peek() != Some(b'<') && !self.scanner.is_eof() { + self.scanner.advance(1); + } + + if self.scanner.is_eof() { + break; + } + + let saved_pos = self.scanner.pos; + self.scanner.advance(1); // consume '<' + + if self.scanner.peek() == Some(b'/') { + // Closing tag - check if it's our context + self.scanner.advance(1); + let tag = self.read_tag_name()?; + if tag.ends_with("context") || tag == "context" { + self.skip_to_tag_end()?; + break; + } + // Not our closing tag, restore and skip this element + self.scanner.pos = saved_pos; + break; + } + + // Parse child element + let tag = self.read_tag_name()?; + + if tag.ends_with("entity") { + entity = Some(self.parse_entity()?); + } else if tag.ends_with("period") { + period = Some(self.parse_period()?); + } else if tag.ends_with("scenario") { + scenario = Some(self.parse_scenario()?); + } else { + self.skip_element_from_tag()?; + } + } + + if let (Some(entity), Some(period)) = (entity, period) { + self.document.contexts.push(Context { + id, + entity, + period, + scenario, + }); + } + + Ok(()) + } + + fn parse_entity(&mut self) -> Result { + let _attrs = self.parse_attributes()?; + self.skip_to_tag_end()?; + + let mut identifier = CompactString::new(""); + let mut scheme = CompactString::new(""); + let mut segment = None; + + // Parse entity children + loop { + self.scanner.skip_whitespace(); + + // Skip any text content + while self.scanner.peek() != Some(b'<') && !self.scanner.is_eof() { + self.scanner.advance(1); + } + + if self.scanner.is_eof() { + break; + } + + let saved_pos = self.scanner.pos; + self.scanner.advance(1); // consume '<' + + if self.scanner.peek() == Some(b'/') { + // Closing tag + self.scanner.advance(1); + let tag = self.read_tag_name()?; + if tag.ends_with("entity") || tag == "entity" { + self.skip_to_tag_end()?; + break; + } + self.scanner.pos = saved_pos; + break; + } + + let tag = self.read_tag_name()?; + + if tag.ends_with("identifier") { + let attrs = self.parse_attributes()?; + scheme = attrs.iter() + .find(|(n, _)| *n == "scheme") + .map(|(_, v)| CompactString::from(*v)) + .unwrap_or_default(); + + self.skip_to_tag_end()?; + identifier = CompactString::from(self.read_text_content()?); + + // Skip closing tag + self.skip_closing_tag("identifier")?; + } else if tag.ends_with("segment") { + segment = Some(self.parse_segment()?); + } else { + self.skip_element_from_tag()?; + } + } + + Ok(Entity { + identifier, + scheme, + segment, + }) + } + + fn parse_segment(&mut self) -> Result { + let _attrs = self.parse_attributes()?; + self.skip_to_tag_end()?; + + let mut explicit_members = Vec::new(); + let mut typed_members = Vec::new(); + + // Parse segment children + loop { + self.scanner.skip_whitespace(); + + // Skip any text content until we find a tag + while self.scanner.peek() != Some(b'<') && !self.scanner.is_eof() { + self.scanner.advance(1); + } + + if self.scanner.is_eof() { + break; + } + + let saved_pos = self.scanner.pos; + self.scanner.advance(1); // consume '<' + + // Check for comment + if self.scanner.peek() == Some(b'!') { + if self.peek_ahead(3) == Some(b"!--") { + self.scanner.pos = saved_pos; + self.scanner.advance(1); // skip '<' + self.skip_comment()?; + continue; + } + } + + if self.scanner.peek() == Some(b'/') { + // Closing tag + self.scanner.advance(1); + let tag = self.read_tag_name()?; + if tag.ends_with("segment") || tag == "segment" { + self.skip_to_tag_end()?; + break; + } + // Not our closing tag - should not happen in well-formed XML + self.scanner.pos = saved_pos; + break; + } + + let tag = self.read_tag_name()?; + + if tag.ends_with("explicitMember") { + let attrs = self.parse_attributes()?; + let dimension = attrs.iter() + .find(|(n, _)| *n == "dimension") + .map(|(_, v)| CompactString::from(*v)) + .unwrap_or_default(); + + self.skip_to_tag_end()?; + let member = CompactString::from(self.read_text_content()?); + + explicit_members.push(DimensionMember { dimension, member }); + self.skip_closing_tag("explicitMember")?; + } else if tag.ends_with("typedMember") { + let attrs = self.parse_attributes()?; + let dimension = attrs.iter() + .find(|(n, _)| *n == "dimension") + .map(|(_, v)| CompactString::from(*v)) + .unwrap_or_default(); + + self.skip_to_tag_end()?; + // Read the entire XML content as typed member value + let value = self.read_xml_content_until_closing("typedMember")?; + + typed_members.push(TypedMember { dimension, value }); + self.skip_closing_tag("typedMember")?; + } else { + self.skip_element_from_tag()?; + } + } + + Ok(Segment { + explicit_members, + typed_members, + }) + } + + fn parse_scenario(&mut self) -> Result { + let _attrs = self.parse_attributes()?; + self.skip_to_tag_end()?; + + let mut explicit_members = Vec::new(); + let mut typed_members = Vec::new(); + + // Parse scenario children (same structure as segment) + loop { + self.scanner.skip_whitespace(); + + // Skip any text content until we find a tag + while self.scanner.peek() != Some(b'<') && !self.scanner.is_eof() { + self.scanner.advance(1); + } + + if self.scanner.is_eof() { + break; + } + + let saved_pos = self.scanner.pos; + self.scanner.advance(1); // consume '<' + + // Check for comment + if self.scanner.peek() == Some(b'!') { + if self.peek_ahead(3) == Some(b"!--") { + self.scanner.pos = saved_pos; + self.scanner.advance(1); + self.skip_comment()?; + continue; + } + } + + if self.scanner.peek() == Some(b'/') { + // Closing tag + self.scanner.advance(1); + let tag = self.read_tag_name()?; + if tag.ends_with("scenario") || tag == "scenario" { + self.skip_to_tag_end()?; + break; + } + self.scanner.pos = saved_pos; + break; + } + + let tag = self.read_tag_name()?; + + if tag.ends_with("explicitMember") { + let attrs = self.parse_attributes()?; + let dimension = attrs.iter() + .find(|(n, _)| *n == "dimension") + .map(|(_, v)| CompactString::from(*v)) + .unwrap_or_default(); + + self.skip_to_tag_end()?; + let member = CompactString::from(self.read_text_content()?); + + explicit_members.push(DimensionMember { dimension, member }); + self.skip_closing_tag("explicitMember")?; + } else if tag.ends_with("typedMember") { + let attrs = self.parse_attributes()?; + let dimension = attrs.iter() + .find(|(n, _)| *n == "dimension") + .map(|(_, v)| CompactString::from(*v)) + .unwrap_or_default(); + + self.skip_to_tag_end()?; + let value = self.read_xml_content_until_closing("typedMember")?; + + typed_members.push(TypedMember { dimension, value }); + self.skip_closing_tag("typedMember")?; + } else { + self.skip_element_from_tag()?; + } + } + + Ok(Scenario { + explicit_members, + typed_members, + }) + } + + fn parse_period(&mut self) -> Result { + let _attrs = self.parse_attributes()?; + self.skip_to_tag_end()?; + + let mut instant = None; + let mut start_date = None; + let mut end_date = None; + let mut forever = false; + + // Parse period children + loop { + self.scanner.skip_whitespace(); + + if self.scanner.peek() != Some(b'<') { + break; + } + + let saved_pos = self.scanner.pos; + self.scanner.advance(1); + + if self.scanner.peek() == Some(b'/') { + // Closing tag + self.scanner.advance(1); + let tag = self.read_tag_name()?; + if tag.ends_with("period") { + self.skip_to_tag_end()?; + break; + } + self.scanner.pos = saved_pos; + break; + } + + let tag = self.read_tag_name()?; + + if tag.ends_with("instant") { + self.skip_to_tag_end()?; + instant = Some(CompactString::from(self.read_text_content()?)); + self.skip_closing_tag("instant")?; + } else if tag.ends_with("startDate") { + self.skip_to_tag_end()?; + start_date = Some(CompactString::from(self.read_text_content()?)); + self.skip_closing_tag("startDate")?; + } else if tag.ends_with("endDate") { + self.skip_to_tag_end()?; + end_date = Some(CompactString::from(self.read_text_content()?)); + self.skip_closing_tag("endDate")?; + } else if tag.ends_with("forever") { + forever = true; + self.skip_element_from_tag()?; + } else { + self.skip_element_from_tag()?; + } + } + + Ok(Period { + instant, + start_date, + end_date, + forever, + }) + } + + fn parse_unit(&mut self) -> Result<()> { + let attrs = self.parse_attributes()?; + let id = attrs.iter() + .find(|(n, _)| *n == "id") + .map(|(_, v)| CompactString::from(*v)) + .ok_or_else(|| Error::Parse("Unit missing id".to_string()))?; + + self.skip_to_tag_end()?; + + let mut unit_type = None; + + // Parse unit children + loop { + self.scanner.skip_whitespace(); + + if self.scanner.peek() != Some(b'<') { + break; + } + + let saved_pos = self.scanner.pos; + self.scanner.advance(1); + + if self.scanner.peek() == Some(b'/') { + // Closing tag + self.scanner.advance(1); + let tag = self.read_tag_name()?; + if tag.ends_with("unit") { + self.skip_to_tag_end()?; + break; + } + self.scanner.pos = saved_pos; + break; + } + + let tag = self.read_tag_name()?; + + if tag.ends_with("measure") { + // Simple unit + self.skip_to_tag_end()?; + let measure_text = self.read_text_content()?; + let measure = self.parse_measure(measure_text); + + if unit_type.is_none() { + unit_type = Some(UnitType::Simple(vec![measure])); + } else if let Some(UnitType::Simple(ref mut measures)) = unit_type { + measures.push(measure); + } + + self.skip_closing_tag("measure")?; + } else if tag.ends_with("divide") { + // Complex division unit + unit_type = Some(self.parse_unit_divide()?); + } else { + self.skip_element_from_tag()?; + } + } + + if let Some(unit_type) = unit_type { + self.document.units.push(Unit { id, unit_type }); + } + + Ok(()) + } + + fn parse_unit_divide(&mut self) -> Result { + let _attrs = self.parse_attributes()?; + self.skip_to_tag_end()?; + + let mut numerator = Vec::new(); + let mut denominator = Vec::new(); + + // Parse divide children + loop { + self.scanner.skip_whitespace(); + + if self.scanner.peek() != Some(b'<') { + break; + } + + let saved_pos = self.scanner.pos; + self.scanner.advance(1); + + if self.scanner.peek() == Some(b'/') { + // Closing tag + self.scanner.advance(1); + let tag = self.read_tag_name()?; + if tag.ends_with("divide") { + self.skip_to_tag_end()?; + break; + } + self.scanner.pos = saved_pos; + break; + } + + let tag = self.read_tag_name()?; + + if tag.ends_with("unitNumerator") { + self.skip_to_tag_end()?; + numerator = self.parse_unit_measures()?; + self.skip_closing_tag("unitNumerator")?; + } else if tag.ends_with("unitDenominator") { + self.skip_to_tag_end()?; + denominator = self.parse_unit_measures()?; + self.skip_closing_tag("unitDenominator")?; + } else { + self.skip_element_from_tag()?; + } + } + + Ok(UnitType::Divide { numerator, denominator }) + } + + fn parse_unit_measures(&mut self) -> Result> { + let mut measures = Vec::new(); + + loop { + self.scanner.skip_whitespace(); + + if self.scanner.peek() != Some(b'<') { + break; + } + + let saved_pos = self.scanner.pos; + self.scanner.advance(1); + + if self.scanner.peek() == Some(b'/') { + // End of measures + self.scanner.pos = saved_pos; + break; + } + + let tag = self.read_tag_name()?; + + if tag.ends_with("measure") { + self.skip_to_tag_end()?; + let measure_text = self.read_text_content()?; + measures.push(self.parse_measure(measure_text)); + self.skip_closing_tag("measure")?; + } else { + self.scanner.pos = saved_pos; + break; + } + } + + Ok(measures) + } + + fn parse_measure(&self, text: &str) -> Measure { + let (namespace, name) = if let Some(colon_pos) = text.find(':') { + ( + CompactString::from(&text[..colon_pos]), + CompactString::from(&text[colon_pos + 1..]) + ) + } else { + (CompactString::new(""), CompactString::from(text)) + }; + + Measure { namespace, name } + } + + // Continue in next part... +}// Parser part 2: Facts, Tuples, Footnotes, and Helper Functions + +impl<'a> FullXbrlParser<'a> { + fn parse_fact(&mut self, tag_name: &str) -> Result<()> { + let attrs = self.parse_attributes()?; + + // Check for xsi:nil attribute + let is_nil = attrs.iter() + .any(|(n, v)| *n == "xsi:nil" && (*v == "true" || *v == "1")); + + let nil_reason = if is_nil { + attrs.iter() + .find(|(n, _)| *n == "nilReason") + .map(|(_, v)| CompactString::from(*v)) + } else { + None + }; + + let context_ref = attrs.iter() + .find(|(n, _)| *n == "contextRef") + .map(|(_, v)| CompactString::from(*v)); + + let unit_ref = attrs.iter() + .find(|(n, _)| *n == "unitRef") + .map(|(_, v)| CompactString::from(*v)); + + let id = attrs.iter() + .find(|(n, _)| *n == "id") + .map(|(_, v)| CompactString::from(*v)); + + let decimals = attrs.iter() + .find(|(n, _)| *n == "decimals") + .and_then(|(_, v)| v.parse::().ok()); + + let precision = attrs.iter() + .find(|(n, _)| *n == "precision") + .and_then(|(_, v)| v.parse::().ok()); + + // Check if it's a self-closing tag + let is_self_closing = self.check_self_closing(); + + self.skip_to_tag_end()?; + + let value = if is_self_closing || is_nil { + String::new() + } else { + // Check for special fact types (fraction, mixed content) + let value = if self.scanner.peek() == Some(b'<') { + // Check if it's a fraction + if self.peek_tag_name()?.ends_with("numerator") { + self.parse_fraction_value()? + } else { + // Mixed content or nested elements + self.read_mixed_content_until_closing(tag_name)? + } + } else { + // Simple text content (may include CDATA) + self.read_text_content_with_cdata()? + }; + + // Skip closing tag if not self-closing + if !is_self_closing { + self.skip_closing_tag(tag_name)?; + } + + value + }; + + if let Some(context_ref) = context_ref { + let fact = Fact { + id, + concept: CompactString::from(tag_name), + context_ref, + unit_ref, + value: value.clone(), + decimals, + precision, + nil: is_nil, + nil_reason, + footnote_refs: Vec::new(), // Will be populated by footnote links + }; + + // If we're inside a tuple, add to tuple instead of document + if !self.current_tuple_stack.is_empty() { + let tuple = self.current_tuple_stack.last_mut().unwrap(); + tuple.facts.push(FactOrTuple::Fact(fact)); + } else { + // Add to document facts + let concept_id = self.allocator.intern_string(tag_name); + let context_id = self.get_or_create_context_id(&fact.context_ref)?; + let unit_id = fact.unit_ref.as_ref() + .and_then(|u| self.get_or_create_unit_id(u).ok()) + .unwrap_or(0); + + let (value_type, fact_value) = self.parse_fact_value(&value, is_nil)?; + + let mut flags = 0u8; + if is_nil { + flags |= FactFlags::NIL.bits(); + } + if precision.is_some() { + flags |= FactFlags::HAS_PRECISION.bits(); + } + if decimals.is_some() { + flags |= FactFlags::HAS_DECIMALS.bits(); + } + if !self.current_tuple_stack.is_empty() { + flags |= FactFlags::IN_TUPLE.bits(); + } + + self.document.facts.push(CompactFact { + concept_id, + context_id, + unit_id, + value_type, + flags, + padding: [0; 6], + value: fact_value, + }); + } + } + + Ok(()) + } + + fn parse_tuple(&mut self, tag_name: &str) -> Result<()> { + let attrs = self.parse_attributes()?; + + let id = attrs.iter() + .find(|(n, _)| *n == "id") + .map(|(_, v)| CompactString::from(*v)); + + self.skip_to_tag_end()?; + + // Create new tuple and push to stack + let tuple = Tuple { + id, + name: CompactString::from(tag_name), + facts: Vec::new(), + }; + + self.current_tuple_stack.push(tuple); + + // The tuple will be popped when we encounter its closing tag + + Ok(()) + } + + fn parse_footnote_link(&mut self) -> Result<()> { + let attrs = self.parse_attributes()?; + + let role = attrs.iter() + .find(|(n, _)| n.ends_with("role")) + .map(|(_, v)| CompactString::from(*v)); + + self.skip_to_tag_end()?; + + let mut footnotes_map: HashMap = HashMap::new(); + let mut fact_footnote_links: Vec<(String, String)> = Vec::new(); + + // Parse footnote link children + loop { + self.scanner.skip_whitespace(); + + if self.scanner.peek() != Some(b'<') { + break; + } + + let saved_pos = self.scanner.pos; + self.scanner.advance(1); + + if self.scanner.peek() == Some(b'/') { + // Closing tag + self.scanner.advance(1); + let tag = self.read_tag_name()?; + if tag.ends_with("footnoteLink") { + self.skip_to_tag_end()?; + break; + } + self.scanner.pos = saved_pos; + break; + } + + let tag = self.read_tag_name()?; + + if tag.ends_with("footnote") { + let attrs = self.parse_attributes()?; + + let id = attrs.iter() + .find(|(n, _)| n.ends_with("label") || *n == "id") + .map(|(_, v)| v.to_string()) + .unwrap_or_default(); + + let lang = attrs.iter() + .find(|(n, _)| n.ends_with("lang")) + .map(|(_, v)| CompactString::from(*v)); + + self.skip_to_tag_end()?; + let content = self.read_text_content_with_cdata()?; + self.skip_closing_tag("footnote")?; + + footnotes_map.insert(id.clone(), Footnote { + id: CompactString::from(id), + role: role.clone(), + lang, + content, + fact_refs: Vec::new(), + }); + } else if tag.ends_with("footnoteArc") { + let attrs = self.parse_attributes()?; + + let from = attrs.iter() + .find(|(n, _)| n.ends_with("from")) + .map(|(_, v)| v.to_string()) + .unwrap_or_default(); + + let to = attrs.iter() + .find(|(n, _)| n.ends_with("to")) + .map(|(_, v)| v.to_string()) + .unwrap_or_default(); + + fact_footnote_links.push((from, to)); + self.skip_element_from_tag()?; + } else { + self.skip_element_from_tag()?; + } + } + + // Process footnote links + for (fact_ref, footnote_ref) in fact_footnote_links { + if let Some(footnote) = footnotes_map.get_mut(&footnote_ref) { + footnote.fact_refs.push(CompactString::from(fact_ref)); + } + } + + // Add footnotes to document + for (_, footnote) in footnotes_map { + self.document.footnotes.push(footnote); + } + + Ok(()) + } + + fn parse_fraction_value(&mut self) -> Result { + let mut numerator = String::new(); + let mut denominator = String::new(); + + loop { + self.scanner.skip_whitespace(); + + if self.scanner.peek() != Some(b'<') { + break; + } + + let saved_pos = self.scanner.pos; + self.scanner.advance(1); + + if self.scanner.peek() == Some(b'/') { + self.scanner.pos = saved_pos; + break; + } + + let tag = self.read_tag_name()?; + + if tag.ends_with("numerator") { + self.skip_to_tag_end()?; + numerator = self.read_text_content()?.to_string(); + self.skip_closing_tag("numerator")?; + } else if tag.ends_with("denominator") { + self.skip_to_tag_end()?; + denominator = self.read_text_content()?.to_string(); + self.skip_closing_tag("denominator")?; + } else { + self.skip_element_from_tag()?; + } + } + + // Return as fraction string + Ok(format!("{}/{}", numerator, denominator)) + } + + fn parse_fact_value(&self, value: &str, is_nil: bool) -> Result<(u8, FactValue)> { + if is_nil { + return Ok((ValueType::Nil as u8, FactValue { integer: 0 })); + } + + if value.is_empty() { + return Ok((ValueType::String as u8, FactValue { string_id: 0 })); + } + + // Check for fraction + if value.contains('/') && !value.contains(' ') { + if let Some((num, den)) = value.split_once('/') { + if num.parse::().is_ok() && den.parse::().is_ok() { + return Ok((ValueType::Fraction as u8, FactValue { string_id: self.allocator.intern_string(value) })); + } + } + } + + // Handle parentheses for negative numbers + let cleaned_value = if value.starts_with('(') && value.ends_with(')') { + format!("-{}", &value[1..value.len()-1]) + } else { + value.to_string() + }; + + // Try parsing as number + if let Ok(decimal) = cleaned_value.parse::() { + Ok((ValueType::Decimal as u8, FactValue { decimal })) + } else if let Ok(integer) = cleaned_value.parse::() { + Ok((ValueType::Integer as u8, FactValue { integer })) + } else if value == "true" || value == "false" { + let boolean = if value == "true" { 1 } else { 0 }; + Ok((ValueType::Boolean as u8, FactValue { boolean })) + } else { + // Store as string + let string_id = self.allocator.intern_string(value); + Ok((ValueType::String as u8, FactValue { string_id })) + } + } + + fn parse_xbrl_root(&mut self) -> Result<()> { + let attrs = self.parse_attributes()?; + + for (name, value) in attrs { + if name.starts_with("xmlns") { + let ns_name = if name.len() > 6 && name.chars().nth(5) == Some(':') { + CompactString::from(&name[6..]) + } else { + CompactString::new("") + }; + self.document.namespaces.insert(ns_name, CompactString::from(value)); + } + } + + self.skip_to_tag_end()?; + Ok(()) + } + + fn parse_schema_ref(&mut self) -> Result<()> { + let attrs = self.parse_attributes()?; + if let Some((_, href)) = attrs.iter().find(|(n, _)| n.ends_with("href")) { + self.document.schema_ref = Some(CompactString::from(*href)); + + // If schema loading is enabled, load the schema + if self.load_schemas { + self.load_schema_from_ref(href)?; + } + } + self.skip_element_from_tag()?; + Ok(()) + } + + fn load_schema_from_ref(&mut self, schema_location: &str) -> Result<()> { + // Parse schema location to handle relative and absolute paths + let schema_path = if schema_location.starts_with("http://") || schema_location.starts_with("https://") { + // Remote schema - would need HTTP client to fetch + // For now, we'll try to find it locally in a schemas directory + let filename = schema_location.split('/').last().unwrap_or("schema.xsd"); + format!("schemas/{}", filename) + } else if schema_location.starts_with("/") { + // Absolute path + schema_location.to_string() + } else { + // Relative path - resolve relative to the current XBRL file + if let Some(base_dir) = self.file_path.as_ref().and_then(|p| p.parent()) { + base_dir.join(schema_location).to_string_lossy().to_string() + } else { + schema_location.to_string() + } + }; + + // Check if schema file exists + let schema_path = std::path::Path::new(&schema_path); + if !schema_path.exists() { + // Schema not found locally - this is common for remote schemas + // In production, we would download and cache them + return Ok(()); + } + + // Load and parse the schema + let schema_content = std::fs::read(schema_path)?; + self.parse_schema_content(&schema_content)?; + + Ok(()) + } + + fn parse_schema_content(&mut self, content: &[u8]) -> Result<()> { + let mut schema = Schema { + target_namespace: CompactString::new(""), + elements: HashMap::new(), + types: HashMap::new(), + imports: Vec::new(), + }; + + // Basic XSD parsing using quick-xml + let mut reader = quick_xml::Reader::from_reader(content); + reader.trim_text(true); + + let mut buf = Vec::new(); + let mut current_element: Option = None; + let mut current_type: Option = None; + + loop { + match reader.read_event_into(&mut buf) { + Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => { + let tag_name = e.name(); + let local_name = std::str::from_utf8(tag_name.local_name().as_ref()) + .unwrap_or(""); + + match local_name { + "schema" => { + // Extract target namespace + for attr in e.attributes().flatten() { + let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or(""); + if key == "targetNamespace" { + let value = std::str::from_utf8(&attr.value).unwrap_or(""); + schema.target_namespace = CompactString::new(value); + } + } + } + "element" => { + let mut element = SchemaElement { + name: CompactString::new(""), + element_type: CompactString::new(""), + substitution_group: None, + period_type: None, + balance: None, + abstract_element: false, + nillable: false, + }; + + for attr in e.attributes().flatten() { + let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or(""); + let value = std::str::from_utf8(&attr.value).unwrap_or(""); + + match key { + "name" => element.name = CompactString::new(value), + "type" => element.element_type = CompactString::new(value), + "substitutionGroup" => element.substitution_group = Some(CompactString::new(value)), + "periodType" => element.period_type = Some(CompactString::new(value)), + "balance" => element.balance = Some(CompactString::new(value)), + "abstract" => element.abstract_element = value == "true", + "nillable" => element.nillable = value == "true", + _ => {} + } + } + + if !element.name.is_empty() { + if matches!(e, Event::Empty(_)) { + // Self-closing element tag + schema.elements.insert(element.name.clone(), element); + } else { + current_element = Some(element); + } + } + } + "complexType" | "simpleType" => { + let mut schema_type = SchemaType { + name: CompactString::new(""), + base_type: None, + restrictions: Vec::new(), + }; + + for attr in e.attributes().flatten() { + let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or(""); + let value = std::str::from_utf8(&attr.value).unwrap_or(""); + + if key == "name" { + schema_type.name = CompactString::new(value); + } + } + + if !schema_type.name.is_empty() { + current_type = Some(schema_type); + } + } + "restriction" => { + if let Some(ref mut t) = current_type { + for attr in e.attributes().flatten() { + let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or(""); + let value = std::str::from_utf8(&attr.value).unwrap_or(""); + + if key == "base" { + t.base_type = Some(CompactString::new(value)); + } + } + } + } + "minInclusive" | "maxInclusive" | "minExclusive" | "maxExclusive" | + "pattern" | "length" | "minLength" | "maxLength" => { + if let Some(ref mut t) = current_type { + for attr in e.attributes().flatten() { + let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or(""); + let value = std::str::from_utf8(&attr.value).unwrap_or(""); + + if key == "value" { + let restriction = match local_name { + "minInclusive" => TypeRestriction::MinInclusive(value.to_string()), + "maxInclusive" => TypeRestriction::MaxInclusive(value.to_string()), + "minExclusive" => TypeRestriction::MinExclusive(value.to_string()), + "maxExclusive" => TypeRestriction::MaxExclusive(value.to_string()), + "pattern" => TypeRestriction::Pattern(value.to_string()), + "length" => TypeRestriction::Length(value.parse().unwrap_or(0)), + "minLength" => TypeRestriction::MinLength(value.parse().unwrap_or(0)), + "maxLength" => TypeRestriction::MaxLength(value.parse().unwrap_or(0)), + _ => continue, + }; + t.restrictions.push(restriction); + } + } + } + } + "enumeration" => { + if let Some(ref mut t) = current_type { + for attr in e.attributes().flatten() { + let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or(""); + let value = std::str::from_utf8(&attr.value).unwrap_or(""); + + if key == "value" { + // Find or create enumeration restriction + let mut found = false; + for restriction in &mut t.restrictions { + if let TypeRestriction::Enumeration(ref mut values) = restriction { + values.push(value.to_string()); + found = true; + break; + } + } + if !found { + t.restrictions.push(TypeRestriction::Enumeration(vec![value.to_string()])); + } + } + } + } + } + "import" => { + let mut import = SchemaImport { + namespace: CompactString::new(""), + schema_location: CompactString::new(""), + }; + + for attr in e.attributes().flatten() { + let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or(""); + let value = std::str::from_utf8(&attr.value).unwrap_or(""); + + match key { + "namespace" => import.namespace = CompactString::new(value), + "schemaLocation" => import.schema_location = CompactString::new(value), + _ => {} + } + } + + if !import.namespace.is_empty() || !import.schema_location.is_empty() { + schema.imports.push(import); + } + } + _ => {} + } + } + Ok(Event::End(ref e)) => { + let tag_name = e.name(); + let local_name = std::str::from_utf8(tag_name.local_name().as_ref()) + .unwrap_or(""); + + match local_name { + "element" => { + if let Some(element) = current_element.take() { + schema.elements.insert(element.name.clone(), element); + } + } + "complexType" | "simpleType" => { + if let Some(schema_type) = current_type.take() { + schema.types.insert(schema_type.name.clone(), schema_type); + } + } + _ => {} + } + } + Ok(Event::Eof) => break, + Err(e) => return Err(Error::Parse(format!("Schema parse error: {}", e))), + _ => {} + } + buf.clear(); + } + + // Add the parsed schema to the document + self.doc.schemas.push(schema); + + // Process imports recursively if schema loading is enabled + if self.load_schemas { + let imports = self.doc.schemas.last().unwrap().imports.clone(); + for import in imports { + if !import.schema_location.is_empty() { + self.load_schema_from_ref(&import.schema_location)?; + } + } + } + + Ok(()) + } + + fn is_tuple(&mut self, _tag_name: &str) -> bool { + // Look ahead to see if this element contains other facts + // For now, we'll use a simple heuristic: if it doesn't have contextRef, it might be a tuple + let attrs = match self.peek_attributes() { + Ok(attrs) => attrs, + Err(_) => return false, + }; + + !attrs.iter().any(|(n, _)| *n == "contextRef") + } + + fn get_or_create_context_id(&self, context_ref: &str) -> Result { + self.document.contexts.iter() + .position(|c| c.id == context_ref) + .map(|i| i as u16) + .ok_or_else(|| Error::NotFound(format!("Context: {}", context_ref))) + } + + fn get_or_create_unit_id(&self, unit_ref: &str) -> Result { + self.document.units.iter() + .position(|u| u.id == unit_ref) + .map(|i| (i + 1) as u16) // 0 means no unit + .ok_or_else(|| Error::NotFound(format!("Unit: {}", unit_ref))) + } + + // Helper methods for reading content + + fn read_text_content_with_cdata(&mut self) -> Result { + let mut content = String::new(); + + while !self.scanner.is_eof() { + if self.scanner.peek() == Some(b'<') { + // Check for CDATA + if self.peek_ahead(9) == Some(b" + let start = self.scanner.pos; + while !self.scanner.is_eof() { + if self.scanner.peek() == Some(b']') { + if self.peek_ahead(3) == Some(b"]]>") { + let cdata = std::str::from_utf8(&self.scanner.data[start..self.scanner.pos]) + .map_err(|_| Error::Parse("Invalid UTF-8 in CDATA".to_string()))?; + content.push_str(cdata); + self.scanner.advance(3); + break; + } + } + self.scanner.advance(1); + } + } else { + // End of text content + break; + } + } else { + // Regular text + let start = self.scanner.pos; + while self.scanner.peek() != Some(b'<') && !self.scanner.is_eof() { + self.scanner.advance(1); + } + let text = std::str::from_utf8(&self.scanner.data[start..self.scanner.pos]) + .map_err(|_| Error::Parse("Invalid UTF-8 in text".to_string()))?; + content.push_str(text); + } + } + + // Decode HTML entities + Ok(self.decode_entities(&content)) + } + + fn read_mixed_content_until_closing(&mut self, tag_name: &str) -> Result { + let mut content = String::new(); + let mut depth = 1; + + while depth > 0 && !self.scanner.is_eof() { + if self.scanner.peek() == Some(b'<') { + // Check what kind of tag + if self.peek_ahead(2) == Some(b"") { + let cdata = std::str::from_utf8(&self.scanner.data[start..self.scanner.pos]) + .map_err(|_| Error::Parse("Invalid UTF-8 in CDATA".to_string()))?; + content.push_str(cdata); + self.scanner.advance(3); + break; + } + self.scanner.advance(1); + } + } else { + // Opening tag or other + content.push('<'); + self.scanner.advance(1); + } + } else { + // Regular character + if let Some(ch) = self.scanner.peek() { + content.push(ch as char); + self.scanner.advance(1); + } + } + } + + Ok(self.decode_entities(&content)) + } + + fn read_xml_content_until_closing(&mut self, tag_name: &str) -> Result { + // Similar to mixed content but preserves XML structure + self.read_mixed_content_until_closing(tag_name) + } + + fn decode_entities(&self, text: &str) -> String { + text.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace(""", "\"") + .replace("'", "'") + .replace("'", "'") + } + + fn peek_ahead(&self, n: usize) -> Option<&'a [u8]> { + if self.scanner.pos + n <= self.scanner.data.len() { + Some(&self.scanner.data[self.scanner.pos..self.scanner.pos + n]) + } else { + None + } + } + + fn peek_tag_name(&mut self) -> Result { + let saved_pos = self.scanner.pos; + self.scanner.skip_whitespace(); + + if self.scanner.peek() == Some(b'<') { + self.scanner.advance(1); + let tag = self.read_tag_name()?.to_string(); + self.scanner.pos = saved_pos; + Ok(tag) + } else { + self.scanner.pos = saved_pos; + Err(Error::Parse("Expected tag".to_string())) + } + } + + fn peek_attributes(&mut self) -> Result> { + let saved_pos = self.scanner.pos; + let attrs = self.parse_attributes(); + self.scanner.pos = saved_pos; + attrs + } + + fn check_self_closing(&self) -> bool { + // Check if the previous characters indicate self-closing tag + if self.scanner.pos >= 2 { + self.scanner.data[self.scanner.pos - 2] == b'/' && self.scanner.data[self.scanner.pos - 1] == b'>' + } else { + false + } + } + + fn skip_closing_tag(&mut self, tag_name: &str) -> Result<()> { + self.scanner.skip_whitespace(); + if self.scanner.peek() == Some(b'<') { + self.scanner.advance(1); + if self.scanner.peek() == Some(b'/') { + self.scanner.advance(1); + let tag = self.read_tag_name()?; + if tag == tag_name || tag.ends_with(tag_name) || tag_name.ends_with(&tag) { + self.skip_to_tag_end()?; + return Ok(()); + } + } + } + Ok(()) + } + + fn skip_doctype(&mut self) -> Result<()> { + // Skip DOCTYPE declaration + while !self.scanner.is_eof() { + if self.scanner.peek() == Some(b'>') { + self.scanner.advance(1); + break; + } + self.scanner.advance(1); + } + Ok(()) + } + + // Implement remaining base methods from parser.rs + // ... (include all the base parsing methods like read_tag_name, parse_attributes, etc.) +} diff --git a/rust/vendor/crabrl/src/parser_base.rs b/rust/vendor/crabrl/src/parser_base.rs new file mode 100644 index 0000000..004ae7b --- /dev/null +++ b/rust/vendor/crabrl/src/parser_base.rs @@ -0,0 +1,238 @@ +// Base parsing methods for FullXbrlParser + +impl<'a> FullXbrlParser<'a> { + #[inline(always)] + fn read_tag_name(&mut self) -> Result<&'a str> { + let start = self.scanner.pos; + while let Some(ch) = self.scanner.peek() { + if ch == b' ' || ch == b'>' || ch == b'/' || ch == b'\t' || ch == b'\n' || ch == b'\r' { + break; + } + self.scanner.advance(1); + } + let end = self.scanner.pos; + + if start == end { + return Err(Error::Parse("Empty tag name".to_string())); + } + + std::str::from_utf8(&self.scanner.data[start..end]) + .map_err(|_| Error::Parse("Invalid UTF-8 in tag name".to_string())) + } + + #[inline(always)] + fn parse_attributes(&mut self) -> Result> { + let mut attrs = Vec::new(); + + loop { + self.scanner.skip_whitespace(); + + match self.scanner.peek() { + Some(b'>') => { + // End of tag + break; + } + Some(b'/') => { + // Self-closing tag + self.scanner.advance(1); + if self.scanner.peek() == Some(b'>') { + break; + } + } + None => return Err(Error::Parse("Unexpected EOF in attributes".to_string())), + _ => {} + } + + let name_start = self.scanner.pos; + while let Some(ch) = self.scanner.peek() { + if ch == b'=' || ch == b' ' || ch == b'>' || ch == b'/' { + break; + } + self.scanner.advance(1); + } + + if self.scanner.pos == name_start { + break; // No more attributes + } + + let name = std::str::from_utf8(&self.scanner.data[name_start..self.scanner.pos]) + .map_err(|_| Error::Parse("Invalid UTF-8 in attribute name".to_string()))?; + + self.scanner.skip_whitespace(); + + if self.scanner.peek() != Some(b'=') { + continue; + } + self.scanner.advance(1); + + self.scanner.skip_whitespace(); + + let quote = self.scanner.peek() + .ok_or_else(|| Error::Parse("Expected quote".to_string()))?; + + if quote != b'"' && quote != b'\'' { + return Err(Error::Parse("Expected quote in attribute".to_string())); + } + + self.scanner.advance(1); + let value_start = self.scanner.pos; + + while let Some(ch) = self.scanner.peek() { + if ch == quote { + break; + } + self.scanner.advance(1); + } + + let value = std::str::from_utf8(&self.scanner.data[value_start..self.scanner.pos]) + .map_err(|_| Error::Parse("Invalid UTF-8 in attribute value".to_string()))?; + + self.scanner.advance(1); // Skip closing quote + + attrs.push((name, value)); + } + + Ok(attrs) + } + + #[inline(always)] + fn skip_to_tag_end(&mut self) -> Result<()> { + while let Some(ch) = self.scanner.peek() { + if ch == b'>' { + self.scanner.advance(1); + return Ok(()); + } + self.scanner.advance(1); + } + Err(Error::Parse("Expected '>'".to_string())) + } + + #[inline(always)] + fn read_text_content(&mut self) -> Result<&'a str> { + let start = self.scanner.pos; + while let Some(ch) = self.scanner.peek() { + if ch == b'<' { + break; + } + self.scanner.advance(1); + } + + let text = std::str::from_utf8(&self.scanner.data[start..self.scanner.pos]) + .map_err(|_| Error::Parse("Invalid UTF-8 in text content".to_string()))?; + + Ok(text.trim()) + } + + #[inline(always)] + fn skip_element_from_tag(&mut self) -> Result<()> { + // We've already read the tag name, now skip to end of opening tag + self.skip_to_tag_end()?; + + // Check if it was self-closing + if self.scanner.pos >= 2 && self.scanner.data[self.scanner.pos - 2] == b'/' { + return Ok(()); // Self-closing tag, we're done + } + + // Skip element content and find matching closing tag + let mut depth = 1; + + while depth > 0 && !self.scanner.is_eof() { + // Find next tag + while let Some(ch) = self.scanner.peek() { + if ch == b'<' { + break; + } + self.scanner.advance(1); + } + + if self.scanner.is_eof() { + break; + } + + self.scanner.advance(1); // consume '<' + + if self.scanner.peek() == Some(b'/') { + depth -= 1; + } else if self.scanner.peek() != Some(b'!') && self.scanner.peek() != Some(b'?') { + // Check if it's a self-closing tag + let mut is_self_closing = false; + let _saved_pos = self.scanner.pos; + + // Skip to end of tag to check + while let Some(ch) = self.scanner.peek() { + if ch == b'/' { + if self.scanner.pos + 1 < self.scanner.data.len() + && self.scanner.data[self.scanner.pos + 1] == b'>' { + is_self_closing = true; + } + } + if ch == b'>' { + self.scanner.advance(1); + break; + } + self.scanner.advance(1); + } + + if !is_self_closing { + depth += 1; + } + + continue; + } + + // Skip to end of this tag + while let Some(ch) = self.scanner.peek() { + if ch == b'>' { + self.scanner.advance(1); + break; + } + self.scanner.advance(1); + } + } + + Ok(()) + } + + #[inline(always)] + fn skip_processing_instruction(&mut self) -> Result<()> { + // Skip until ?> + while !self.scanner.is_eof() { + if self.scanner.peek() == Some(b'?') { + self.scanner.advance(1); + if self.scanner.peek() == Some(b'>') { + self.scanner.advance(1); + return Ok(()); + } + } else { + self.scanner.advance(1); + } + } + Err(Error::Parse("Unclosed processing instruction".to_string())) + } + + #[inline(always)] + fn skip_comment(&mut self) -> Result<()> { + // Skip until --> + while !self.scanner.is_eof() { + if self.scanner.peek() == Some(b'-') { + self.scanner.advance(1); + if self.scanner.peek() == Some(b'-') { + self.scanner.advance(1); + if self.scanner.peek() == Some(b'>') { + self.scanner.advance(1); + return Ok(()); + } + } + } else { + self.scanner.advance(1); + } + } + Err(Error::Parse("Unclosed comment".to_string())) + } +} + +impl Default for Parser { + fn default() -> Self { + Self::new() + } +} diff --git a/rust/vendor/crabrl/src/schema.rs b/rust/vendor/crabrl/src/schema.rs new file mode 100644 index 0000000..e3b45e3 --- /dev/null +++ b/rust/vendor/crabrl/src/schema.rs @@ -0,0 +1,275 @@ +// Schema loading and validation for XBRL +use crate::{Error, Result, model::*}; +use compact_str::CompactString; +use std::collections::HashMap; +use std::path::Path; + +pub struct SchemaLoader { + cache: HashMap, +} + +impl SchemaLoader { + pub fn new() -> Self { + Self { + cache: HashMap::new(), + } + } + + pub fn load_schema>(&mut self, path: P) -> Result<&Schema> { + let path_str = path.as_ref().to_string_lossy(); + let key = CompactString::from(path_str.as_ref()); + + if self.cache.contains_key(&key) { + return Ok(self.cache.get(&key).unwrap()); + } + + let schema = self.parse_schema_file(path)?; + self.cache.insert(key.clone(), schema); + Ok(self.cache.get(&key).unwrap()) + } + + fn parse_schema_file>(&self, path: P) -> Result { + let content = std::fs::read(path)?; + self.parse_schema_bytes(&content) + } + + fn parse_schema_bytes(&self, data: &[u8]) -> Result { + // Simple XML parsing for schema + let mut schema = Schema { + target_namespace: CompactString::new(""), + elements: HashMap::new(), + types: HashMap::new(), + imports: Vec::new(), + }; + + // Skip BOM if present + let data = if data.starts_with(&[0xEF, 0xBB, 0xBF]) { + &data[3..] + } else { + data + }; + + let text = std::str::from_utf8(data) + .map_err(|_| Error::Parse("Invalid UTF-8 in schema".to_string()))?; + + // Extract target namespace + if let Some(ns_start) = text.find("targetNamespace=\"") { + let ns_start = ns_start + 17; + if let Some(ns_end) = text[ns_start..].find('"') { + schema.target_namespace = CompactString::from(&text[ns_start..ns_start + ns_end]); + } + } + + // Parse elements + let mut pos = 0; + while let Some(elem_start) = text[pos..].find("") { + elem_start + end + 2 + } else if let Some(end) = text[elem_start..].find("") { + elem_start + end + 13 + } else { + continue; + }; + + let elem_text = &text[elem_start..elem_end]; + + // Extract element attributes + let mut element = SchemaElement { + name: CompactString::new(""), + element_type: CompactString::new(""), + substitution_group: None, + period_type: None, + balance: None, + abstract_element: elem_text.contains("abstract=\"true\""), + nillable: elem_text.contains("nillable=\"true\""), + }; + + // Extract name + if let Some(name_start) = elem_text.find("name=\"") { + let name_start = name_start + 6; + if let Some(name_end) = elem_text[name_start..].find('"') { + element.name = CompactString::from(&elem_text[name_start..name_start + name_end]); + } + } + + // Extract type + if let Some(type_start) = elem_text.find("type=\"") { + let type_start = type_start + 6; + if let Some(type_end) = elem_text[type_start..].find('"') { + element.element_type = CompactString::from(&elem_text[type_start..type_start + type_end]); + } + } + + // Extract substitutionGroup + if let Some(sg_start) = elem_text.find("substitutionGroup=\"") { + let sg_start = sg_start + 19; + if let Some(sg_end) = elem_text[sg_start..].find('"') { + element.substitution_group = Some(CompactString::from(&elem_text[sg_start..sg_start + sg_end])); + } + } + + // Extract XBRL-specific attributes + if let Some(pt_start) = elem_text.find("xbrli:periodType=\"") { + let pt_start = pt_start + 18; + if let Some(pt_end) = elem_text[pt_start..].find('"') { + element.period_type = Some(CompactString::from(&elem_text[pt_start..pt_start + pt_end])); + } + } + + if let Some(bal_start) = elem_text.find("xbrli:balance=\"") { + let bal_start = bal_start + 15; + if let Some(bal_end) = elem_text[bal_start..].find('"') { + element.balance = Some(CompactString::from(&elem_text[bal_start..bal_start + bal_end])); + } + } + + if !element.name.is_empty() { + schema.elements.insert(element.name.clone(), element); + } + } + + // Parse imports + pos = 0; + while let Some(import_start) = text[pos..].find("") { + let import_text = &text[import_start..import_start + import_end]; + + let mut import = SchemaImport { + namespace: CompactString::new(""), + schema_location: CompactString::new(""), + }; + + if let Some(ns_start) = import_text.find("namespace=\"") { + let ns_start = ns_start + 11; + if let Some(ns_end) = import_text[ns_start..].find('"') { + import.namespace = CompactString::from(&import_text[ns_start..ns_start + ns_end]); + } + } + + if let Some(loc_start) = import_text.find("schemaLocation=\"") { + let loc_start = loc_start + 16; + if let Some(loc_end) = import_text[loc_start..].find('"') { + import.schema_location = CompactString::from(&import_text[loc_start..loc_start + loc_end]); + } + } + + schema.imports.push(import); + } + } + + Ok(schema) + } + + pub fn validate_element(&self, name: &str, value: &str, schema: &Schema) -> Result<()> { + if let Some(element) = schema.elements.get(name) { + // Check if element is abstract + if element.abstract_element { + return Err(Error::Validation(format!("Element {} is abstract", name))); + } + + // Validate type + if let Some(type_def) = schema.types.get(&element.element_type) { + self.validate_type(value, type_def)?; + } + + Ok(()) + } else { + // Element not found in schema - might be from imported schema + Ok(()) + } + } + + fn validate_type(&self, value: &str, type_def: &SchemaType) -> Result<()> { + for restriction in &type_def.restrictions { + match restriction { + TypeRestriction::MinInclusive(min) => { + if let (Ok(val), Ok(min_val)) = (value.parse::(), min.parse::()) { + if val < min_val { + return Err(Error::Validation(format!("Value {} is less than minimum {}", val, min_val))); + } + } + } + TypeRestriction::MaxInclusive(max) => { + if let (Ok(val), Ok(max_val)) = (value.parse::(), max.parse::()) { + if val > max_val { + return Err(Error::Validation(format!("Value {} is greater than maximum {}", val, max_val))); + } + } + } + TypeRestriction::Pattern(pattern) => { + // Simple pattern matching - could use regex for complex patterns + if !value.contains(pattern) { + return Err(Error::Validation(format!("Value {} doesn't match pattern {}", value, pattern))); + } + } + TypeRestriction::MinLength(min) => { + if value.len() < *min { + return Err(Error::Validation(format!("Value length {} is less than minimum {}", value.len(), min))); + } + } + TypeRestriction::MaxLength(max) => { + if value.len() > *max { + return Err(Error::Validation(format!("Value length {} is greater than maximum {}", value.len(), max))); + } + } + _ => {} + } + } + Ok(()) + } +} + +// Schema validator for documents +pub struct SchemaValidator { + schemas: Vec, +} + +impl SchemaValidator { + pub fn new() -> Self { + Self { + schemas: Vec::new(), + } + } + + pub fn add_schema(&mut self, schema: Schema) { + self.schemas.push(schema); + } + + pub fn validate_document(&self, doc: &Document) -> Vec { + let mut errors = Vec::new(); + + // Validate facts against schemas + for i in 0..doc.facts.len() { + if let Some(_fact) = doc.facts.get(i) { + // Would need to map fact concept_id back to concept name + // and validate against schema + // This is simplified for now + } + } + + // Check for required elements + for schema in &self.schemas { + for (name, element) in &schema.elements { + if !element.nillable && !element.abstract_element { + // Check if this required element exists in document + // This would require reverse mapping from concept names to facts + let _found = false; + // if !found { + // errors.push(ValidationError::MissingRequiredElement { + // element: name.to_string(), + // }); + // } + } + } + } + + errors + } +} diff --git a/rust/vendor/crabrl/src/sec.rs b/rust/vendor/crabrl/src/sec.rs new file mode 100644 index 0000000..90e7c74 --- /dev/null +++ b/rust/vendor/crabrl/src/sec.rs @@ -0,0 +1,51 @@ +// SEC EDGAR XBRL filing support (local files only) +use crate::{Parser, Document, Result}; +use std::path::Path; + +pub struct SecFilingParser { + parser: Parser, +} + +impl SecFilingParser { + pub fn new() -> Self { + Self { + parser: Parser::new().with_validation(true), + } + } + + pub fn parse_filing>(&self, path: P) -> Result { + self.parser.parse_file(path) + } + + pub fn with_validation(mut self, validate: bool) -> Self { + self.parser = self.parser.with_validation(validate); + self + } +} + +// Test utilities for SEC filings +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_local_sec_filing() { + let parser = SecFilingParser::new(); + + // Test with local test files + if std::path::Path::new("test_data/test_tiny.xbrl").exists() { + match parser.parse_filing("test_data/test_tiny.xbrl") { + Ok(doc) => { + println!("Successfully parsed filing:"); + println!(" Facts: {}", doc.facts.len()); + println!(" Contexts: {}", doc.contexts.len()); + println!(" Units: {}", doc.units.len()); + assert!(doc.contexts.len() > 0, "Should have contexts"); + } + Err(e) => { + eprintln!("Failed to parse filing: {}", e); + } + } + } + } +} diff --git a/rust/vendor/crabrl/src/simd.rs b/rust/vendor/crabrl/src/simd.rs new file mode 100644 index 0000000..4cfb453 --- /dev/null +++ b/rust/vendor/crabrl/src/simd.rs @@ -0,0 +1,208 @@ +use memchr::{memchr, memchr2, memchr3}; +use std::arch::x86_64::*; + +const XML_TAG_START: u8 = b'<'; +const XML_TAG_END: u8 = b'>'; +const XML_SLASH: u8 = b'/'; +const XML_QUOTE: u8 = b'"'; +const XML_EQUALS: u8 = b'='; +const XML_SPACE: u8 = b' '; + +#[inline(always)] +pub fn find_tag_start(haystack: &[u8]) -> Option { + memchr(XML_TAG_START, haystack) +} + +#[inline(always)] +pub fn find_tag_end(haystack: &[u8]) -> Option { + memchr(XML_TAG_END, haystack) +} + +#[inline(always)] +pub fn find_quote(haystack: &[u8]) -> Option { + memchr(XML_QUOTE, haystack) +} + +#[inline(always)] +pub fn find_any_delimiter(haystack: &[u8]) -> Option { + memchr3(XML_TAG_START, XML_TAG_END, XML_QUOTE, haystack) +} + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn find_pattern_avx2(haystack: &[u8], pattern: &[u8]) -> Option { + if pattern.is_empty() || haystack.len() < pattern.len() { + return None; + } + + let first_byte = _mm256_set1_epi8(pattern[0] as i8); + let mut i = 0; + + while i + 32 <= haystack.len() { + let chunk = _mm256_loadu_si256(haystack.as_ptr().add(i) as *const _); + let cmp = _mm256_cmpeq_epi8(chunk, first_byte); + let mask = _mm256_movemask_epi8(cmp); + + if mask != 0 { + for bit_pos in 0..32 { + if (mask & (1 << bit_pos)) != 0 { + let pos = i + bit_pos; + if pos + pattern.len() <= haystack.len() + && &haystack[pos..pos + pattern.len()] == pattern { + return Some(pos); + } + } + } + } + i += 32; + } + + while i < haystack.len() - pattern.len() + 1 { + if &haystack[i..i + pattern.len()] == pattern { + return Some(i); + } + i += 1; + } + + None +} + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn skip_whitespace_avx2(data: &[u8], mut pos: usize) -> usize { + let space = _mm256_set1_epi8(0x20); + let tab = _mm256_set1_epi8(0x09); + let newline = _mm256_set1_epi8(0x0A); + let carriage = _mm256_set1_epi8(0x0D); + + while pos + 32 <= data.len() { + let chunk = _mm256_loadu_si256(data.as_ptr().add(pos) as *const _); + + let is_space = _mm256_cmpeq_epi8(chunk, space); + let is_tab = _mm256_cmpeq_epi8(chunk, tab); + let is_newline = _mm256_cmpeq_epi8(chunk, newline); + let is_carriage = _mm256_cmpeq_epi8(chunk, carriage); + + let is_whitespace = _mm256_or_si256( + _mm256_or_si256(is_space, is_tab), + _mm256_or_si256(is_newline, is_carriage) + ); + + let mask = _mm256_movemask_epi8(is_whitespace); + + if mask != -1 { + for i in 0..32 { + if (mask & (1 << i)) == 0 { + return pos + i; + } + } + } + + pos += 32; + } + + while pos < data.len() { + match data[pos] { + b' ' | b'\t' | b'\n' | b'\r' => pos += 1, + _ => break, + } + } + + pos +} + +#[inline(always)] +pub fn skip_whitespace(data: &[u8], mut pos: usize) -> usize { + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx2") && data.len() - pos >= 32 { + return unsafe { skip_whitespace_avx2(data, pos) }; + } + } + + while pos < data.len() { + match data[pos] { + b' ' | b'\t' | b'\n' | b'\r' => pos += 1, + _ => break, + } + } + pos +} + +#[inline(always)] +pub fn find_pattern(haystack: &[u8], pattern: &[u8]) -> Option { + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx2") && haystack.len() >= 32 { + return unsafe { find_pattern_avx2(haystack, pattern) }; + } + } + + haystack.windows(pattern.len()) + .position(|window| window == pattern) +} + +pub struct SimdScanner<'a> { + pub data: &'a [u8], + pub pos: usize, +} + +impl<'a> SimdScanner<'a> { + #[inline(always)] + pub fn new(data: &'a [u8]) -> Self { + Self { data, pos: 0 } + } + + #[inline(always)] + pub fn skip_whitespace(&mut self) { + self.pos = skip_whitespace(self.data, self.pos); + } + + #[inline(always)] + pub fn find_next(&self, byte: u8) -> Option { + memchr(byte, &self.data[self.pos..]).map(|i| self.pos + i) + } + + #[inline(always)] + pub fn find_pattern(&self, pattern: &[u8]) -> Option { + find_pattern(&self.data[self.pos..], pattern).map(|i| self.pos + i) + } + + #[inline(always)] + pub fn advance(&mut self, n: usize) { + self.pos = (self.pos + n).min(self.data.len()); + } + + #[inline(always)] + pub fn peek(&self) -> Option { + self.data.get(self.pos).copied() + } + + #[inline(always)] + pub fn remaining(&self) -> &'a [u8] { + &self.data[self.pos..] + } + + #[inline(always)] + pub fn is_eof(&self) -> bool { + self.pos >= self.data.len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_find_pattern() { + let haystack = b""; + let pattern = b"context"; + assert_eq!(find_pattern(haystack, pattern), Some(6)); + } + + #[test] + fn test_skip_whitespace() { + let data = b" \t\n\r"; + assert_eq!(skip_whitespace(data, 0), 6); + } +} diff --git a/rust/vendor/crabrl/src/simple_parser.rs b/rust/vendor/crabrl/src/simple_parser.rs new file mode 100644 index 0000000..0f6aa36 --- /dev/null +++ b/rust/vendor/crabrl/src/simple_parser.rs @@ -0,0 +1,99 @@ +//! Simple working XBRL parser + +use crate::{model::*, Result}; +use std::path::Path; + +#[derive(Default)] +pub struct Parser { + #[allow(dead_code)] + load_linkbases: bool, +} + +impl Parser { + pub fn new() -> Self { + Self::default() + } + + pub fn parse_str(&self, content: &str) -> Result { + self.parse_bytes(content.as_bytes()) + } + + pub fn parse_file>(&self, path: P) -> Result { + let content = std::fs::read(path)?; + self.parse_bytes(&content) + } + + pub fn parse_bytes(&self, data: &[u8]) -> Result { + // Simple XML parsing - just count elements for now + let text = String::from_utf8_lossy(data); + + // Count facts (very simplified) + let fact_count = text.matches(", + pub linkbases: Vec, +} + +pub struct Schema { + pub target_namespace: CompactString, + pub elements: HashMap, +} + +pub struct Element { + pub name: CompactString, + pub element_type: CompactString, + pub substitution_group: Option, + pub period_type: Option, +} + +pub struct Linkbase { + pub role: CompactString, + pub arcs: Vec, +} + +pub struct Arc { + pub from: CompactString, + pub to: CompactString, + pub order: f32, + pub weight: f32, +} + +impl Taxonomy { + pub fn new() -> Self { + Self { + schemas: Vec::new(), + linkbases: Vec::new(), + } + } + + pub fn load_schema(&mut self, _path: &str) -> Result<()> { + Ok(()) + } + + pub fn load_linkbase(&mut self, _path: &str) -> Result<()> { + Ok(()) + } +} diff --git a/rust/vendor/crabrl/src/validator.rs b/rust/vendor/crabrl/src/validator.rs new file mode 100644 index 0000000..c5bcd21 --- /dev/null +++ b/rust/vendor/crabrl/src/validator.rs @@ -0,0 +1,601 @@ +// Comprehensive XBRL validation +use crate::{model::*, Error, Result}; +use std::collections::HashSet; + +#[derive(Debug, Clone)] +pub enum ValidationError { + InvalidContextRef { + fact_index: usize, + context_id: u16, + }, + InvalidUnitRef { + fact_index: usize, + unit_id: u16, + }, + CalculationInconsistency { + concept: String, + expected: f64, + actual: f64, + }, + InvalidDataType { + concept: String, + expected_type: String, + actual_value: String, + }, + MissingRequiredElement { + element: String, + }, + DuplicateId { + id: String, + }, +} + +pub struct XbrlValidator { + strict_mode: bool, + #[allow(dead_code)] + check_calculations: bool, + check_duplicates: bool, + check_contexts: bool, + check_units: bool, + #[allow(dead_code)] + check_datatypes: bool, + decimal_tolerance: f64, +} + +impl Default for XbrlValidator { + fn default() -> Self { + Self { + strict_mode: false, + check_calculations: true, + check_duplicates: true, + check_contexts: true, + check_units: true, + check_datatypes: true, + decimal_tolerance: 0.01, + } + } +} + +impl XbrlValidator { + pub fn new() -> Self { + Self::default() + } + + pub fn strict(mut self) -> Self { + self.strict_mode = true; + self + } + + pub fn with_tolerance(mut self, tolerance: f64) -> Self { + self.decimal_tolerance = tolerance; + self + } + + pub fn validate(&self, doc: &mut Document) -> Result<()> { + let mut validation_errors = Vec::new(); + + // Context validation + if self.check_contexts { + validation_errors.extend(self.validate_contexts(doc)); + } + + // Unit validation + if self.check_units { + validation_errors.extend(self.validate_units(doc)); + } + + // Fact validation + validation_errors.extend(self.validate_facts(doc)); + + // Duplicate detection + if self.check_duplicates { + validation_errors.extend(self.check_duplicate_facts(doc)); + } + + // Return error in strict mode if any validation errors + if self.strict_mode && !validation_errors.is_empty() { + return Err(Error::Validation(format!( + "Validation failed with {} errors", + validation_errors.len() + ))); + } + + Ok(()) + } + + fn validate_contexts(&self, doc: &Document) -> Vec { + let mut errors = Vec::new(); + let mut context_ids = HashSet::new(); + + for ctx in &doc.contexts { + // Check for duplicate context IDs + if !context_ids.insert(ctx.id.clone()) { + errors.push(ValidationError::DuplicateId { + id: ctx.id.to_string(), + }); + } + + // Validate entity identifier + if ctx.entity.identifier.is_empty() { + errors.push(ValidationError::MissingRequiredElement { + element: format!("Entity identifier for context {}", ctx.id), + }); + } + + // Validate period + if let Period::Duration { start, end } = &ctx.period { + if start > end { + errors.push(ValidationError::InvalidDataType { + concept: format!("context_{}", ctx.id), + expected_type: "valid period".to_string(), + actual_value: format!("start {} > end {}", start, end), + }); + } + } + } + + errors + } + + fn validate_units(&self, doc: &Document) -> Vec { + let mut errors = Vec::new(); + let mut unit_ids = HashSet::new(); + + for unit in &doc.units { + // Check for duplicate unit IDs + if !unit_ids.insert(unit.id.clone()) { + errors.push(ValidationError::DuplicateId { + id: unit.id.to_string(), + }); + } + + // Validate measures + match &unit.unit_type { + UnitType::Simple(measures) => { + if measures.is_empty() { + errors.push(ValidationError::MissingRequiredElement { + element: format!("Measures for unit {}", unit.id), + }); + } + } + UnitType::Divide { + numerator, + denominator, + } => { + if numerator.is_empty() || denominator.is_empty() { + errors.push(ValidationError::MissingRequiredElement { + element: format!("Numerator/denominator for unit {}", unit.id), + }); + } + } + UnitType::Multiply(measures) => { + if measures.is_empty() { + errors.push(ValidationError::MissingRequiredElement { + element: format!("Measures for unit {}", unit.id), + }); + } + } + } + } + + errors + } + + fn validate_facts(&self, doc: &Document) -> Vec { + let mut errors = Vec::new(); + + // Validate fact references + for i in 0..doc.facts.len() { + if i < doc.facts.context_ids.len() { + let context_id = doc.facts.context_ids[i]; + if context_id as usize >= doc.contexts.len() { + errors.push(ValidationError::InvalidContextRef { + fact_index: i, + context_id, + }); + } + } + + if i < doc.facts.unit_ids.len() { + let unit_id = doc.facts.unit_ids[i]; + if unit_id > 0 && unit_id as usize > doc.units.len() { + errors.push(ValidationError::InvalidUnitRef { + fact_index: i, + unit_id, + }); + } + } + } + + errors + } + + fn check_duplicate_facts(&self, doc: &Document) -> Vec { + let mut errors = Vec::new(); + let mut fact_keys = HashSet::new(); + + for i in 0..doc.facts.len() { + if i < doc.facts.concept_ids.len() && i < doc.facts.context_ids.len() { + let key = (doc.facts.concept_ids[i], doc.facts.context_ids[i]); + if !fact_keys.insert(key) && self.strict_mode { + errors.push(ValidationError::DuplicateId { + id: format!("Duplicate fact at index {}", i), + }); + } + } + } + + errors + } +} + +// Type alias for validation rules +type ValidationRule = Box Vec>; + +// Validation context and rules +pub struct ValidationContext { + pub profile: ValidationProfile, + pub custom_rules: Vec, +} + +#[derive(Debug, Clone, Copy)] +pub enum ValidationProfile { + Generic, + SecEdgar, + Ifrs, + UsGaap, +} + +impl ValidationContext { + pub fn new(profile: ValidationProfile) -> Self { + Self { + profile, + custom_rules: Vec::new(), + } + } + + pub fn add_rule(&mut self, rule: F) + where + F: Fn(&Document) -> Vec + 'static, + { + self.custom_rules.push(Box::new(rule)); + } + + pub fn validate(&self, doc: &Document) -> Vec { + let mut errors = Vec::new(); + + // Apply profile-specific rules + match self.profile { + ValidationProfile::SecEdgar => { + errors.extend(sec_validation_rules(doc)); + } + ValidationProfile::Ifrs => { + errors.extend(ifrs_validation_rules(doc)); + } + _ => {} + } + + // Apply custom rules + for rule in &self.custom_rules { + errors.extend(rule(doc)); + } + + errors + } +} + +// SEC EDGAR specific validation rules +pub fn sec_validation_rules(doc: &Document) -> Vec { + let mut errors = Vec::new(); + + // Check for required DEI contexts + let mut has_current_period = false; + let mut has_entity_info = false; + let mut has_dei_elements = false; + + for ctx in &doc.contexts { + // Check for current period context + if ctx.id.contains("CurrentYear") + || ctx.id.contains("CurrentPeriod") + || ctx.id.contains("DocumentPeriodEndDate") + { + has_current_period = true; + } + + // Validate CIK format (10 digits) + if ctx.entity.scheme.contains("sec.gov/CIK") { + has_entity_info = true; + let cik = &ctx.entity.identifier; + if cik.len() != 10 || !cik.chars().all(|c| c.is_ascii_digit()) { + errors.push(ValidationError::InvalidDataType { + concept: "CIK".to_string(), + expected_type: "10-digit number".to_string(), + actual_value: cik.to_string(), + }); + } + } + } + + // Check for DEI elements in facts + for i in 0..doc.facts.concept_ids.len() { + if i < doc.concept_names.len() { + let concept = &doc.concept_names[i]; + if concept.contains("dei:") + || concept.contains("DocumentType") + || concept.contains("EntityRegistrantName") + { + has_dei_elements = true; + } + } + } + + // Required elements validation + if !has_current_period { + errors.push(ValidationError::MissingRequiredElement { + element: "Current period context required for SEC filing".to_string(), + }); + } + + if !has_entity_info { + errors.push(ValidationError::MissingRequiredElement { + element: "Entity CIK information required for SEC filing".to_string(), + }); + } + + if !has_dei_elements { + errors.push(ValidationError::MissingRequiredElement { + element: "DEI (Document and Entity Information) elements required".to_string(), + }); + } + + // Validate segment reporting if present + for ctx in &doc.contexts { + if let Some(segment) = &ctx.entity.segment { + // Check explicit members have valid dimension references + for member in &segment.explicit_members { + if member.dimension.is_empty() || member.member.is_empty() { + errors.push(ValidationError::InvalidDataType { + concept: format!("segment_{}", ctx.id), + expected_type: "valid dimension member".to_string(), + actual_value: format!("{}:{}", member.dimension, member.member), + }); + } + } + } + } + + // Validate calculation consistency for monetary items + let mut monetary_facts: Vec<(usize, f64)> = Vec::new(); + for i in 0..doc.facts.len() { + if i < doc.facts.values.len() { + if let FactValue::Decimal(val) = &doc.facts.values[i] { + // Check if this is a monetary fact (has USD unit) + if i < doc.facts.unit_ids.len() { + let unit_id = doc.facts.unit_ids[i] as usize; + if unit_id < doc.units.len() { + if let UnitType::Simple(measures) = &doc.units[unit_id].unit_type { + if measures.iter().any(|m| m.name == "USD" || m.name == "usd") { + monetary_facts.push((i, *val)); + } + } + } + } + } + } + } + + // Basic calculation validation - check for reasonable values + for (idx, value) in monetary_facts { + if value.is_nan() || value.is_infinite() { + errors.push(ValidationError::InvalidDataType { + concept: format!("fact_{}", idx), + expected_type: "valid monetary amount".to_string(), + actual_value: format!("{}", value), + }); + } + // Check for suspiciously large values (> $10 trillion) + if value.abs() > 10_000_000_000_000.0 { + errors.push(ValidationError::InvalidDataType { + concept: format!("fact_{}", idx), + expected_type: "reasonable monetary amount".to_string(), + actual_value: format!("${:.2}", value), + }); + } + } + + errors +} + +// IFRS specific validation rules +pub fn ifrs_validation_rules(doc: &Document) -> Vec { + let mut errors = Vec::new(); + + // Check for IFRS-required contexts + let mut has_reporting_period = false; + let mut has_comparative_period = false; + let mut has_entity_info = false; + + for ctx in &doc.contexts { + // Check for reporting period + match &ctx.period { + Period::Duration { start, end: _ } => { + has_reporting_period = true; + // IFRS requires comparative information + if start.contains("PY") + || ctx.id.contains("PriorYear") + || ctx.id.contains("Comparative") + { + has_comparative_period = true; + } + } + Period::Instant { date } => { + if !date.is_empty() { + has_reporting_period = true; + } + } + _ => {} + } + + // Validate entity information + if !ctx.entity.identifier.is_empty() { + has_entity_info = true; + } + } + + // Required contexts validation + if !has_reporting_period { + errors.push(ValidationError::MissingRequiredElement { + element: "Reporting period required for IFRS filing".to_string(), + }); + } + + if !has_comparative_period { + errors.push(ValidationError::MissingRequiredElement { + element: "Comparative period information required by IFRS".to_string(), + }); + } + + if !has_entity_info { + errors.push(ValidationError::MissingRequiredElement { + element: "Entity identification required for IFRS filing".to_string(), + }); + } + + // Validate dimensional structure + let mut dimension_validations = Vec::new(); + for ctx in &doc.contexts { + // Check segment dimensions + if let Some(segment) = &ctx.entity.segment { + for member in &segment.explicit_members { + // IFRS dimensions should follow specific patterns + if !member.dimension.contains(":") { + dimension_validations + .push(format!("Invalid dimension format: {}", member.dimension)); + } + if member.dimension.contains("ifrs") || member.dimension.contains("ifrs-full") { + // Valid IFRS dimension + if member.member.is_empty() { + errors.push(ValidationError::InvalidDataType { + concept: format!("dimension_{}", ctx.id), + expected_type: "valid IFRS dimension member".to_string(), + actual_value: member.dimension.to_string(), + }); + } + } + } + + // Check typed members for IFRS compliance + for typed in &segment.typed_members { + if typed.dimension.contains("ifrs") && typed.value.is_empty() { + errors.push(ValidationError::InvalidDataType { + concept: format!("typed_dimension_{}", ctx.id), + expected_type: "non-empty typed dimension value".to_string(), + actual_value: typed.dimension.to_string(), + }); + } + } + } + + // Check scenario dimensions (alternative to segment) + if let Some(scenario) = &ctx.scenario { + for member in &scenario.explicit_members { + if member.dimension.contains("ifrs") && member.member.is_empty() { + errors.push(ValidationError::InvalidDataType { + concept: format!("scenario_dimension_{}", ctx.id), + expected_type: "valid IFRS scenario member".to_string(), + actual_value: member.dimension.to_string(), + }); + } + } + } + } + + // Check for mandatory IFRS disclosures in facts + let mut has_financial_position = false; + let mut has_comprehensive_income = false; + let mut has_cash_flows = false; + let mut has_changes_in_equity = false; + + for i in 0..doc.concept_names.len() { + let concept = &doc.concept_names[i]; + let lower = concept.to_lowercase(); + + if lower.contains("financialposition") + || lower.contains("balancesheet") + || lower.contains("assets") + || lower.contains("liabilities") + { + has_financial_position = true; + } + + if lower.contains("comprehensiveincome") + || lower.contains("profitorloss") + || lower.contains("income") + || lower.contains("revenue") + { + has_comprehensive_income = true; + } + + if lower.contains("cashflow") || lower.contains("cashflows") { + has_cash_flows = true; + } + + if lower.contains("changesinequity") || lower.contains("equity") { + has_changes_in_equity = true; + } + } + + // Validate mandatory statements + if !has_financial_position { + errors.push(ValidationError::MissingRequiredElement { + element: "Statement of Financial Position required by IFRS".to_string(), + }); + } + + if !has_comprehensive_income { + errors.push(ValidationError::MissingRequiredElement { + element: "Statement of Comprehensive Income required by IFRS".to_string(), + }); + } + + if !has_cash_flows { + errors.push(ValidationError::MissingRequiredElement { + element: "Statement of Cash Flows required by IFRS".to_string(), + }); + } + + if !has_changes_in_equity { + errors.push(ValidationError::MissingRequiredElement { + element: "Statement of Changes in Equity required by IFRS".to_string(), + }); + } + + // Validate presentation linkbase relationships + for link in &doc.presentation_links { + // Check order is valid (typically 1.0 to 999.0) + if link.order < 0.0 || link.order > 1000.0 { + errors.push(ValidationError::InvalidDataType { + concept: format!("presentation_link_{}_{}", link.from, link.to), + expected_type: "valid presentation order (0-1000)".to_string(), + actual_value: format!("{}", link.order), + }); + } + } + + // Validate calculation relationships + for link in &doc.calculation_links { + // Check weight is reasonable (-1.0 or 1.0 typically) + if link.weight != 1.0 && link.weight != -1.0 && link.weight != 0.0 { + // Unusual weight, might be an error + if link.weight.abs() > 10.0 { + errors.push(ValidationError::InvalidDataType { + concept: format!("calculation_link_{}_{}", link.from, link.to), + expected_type: "reasonable calculation weight".to_string(), + actual_value: format!("{}", link.weight), + }); + } + } + } + + errors +}