Integrate crabrl parser into taxonomy hydration

This commit is contained in:
2026-03-16 15:18:01 -04:00
parent cf084793ed
commit a58b07456e
23 changed files with 4696 additions and 2466 deletions

View File

@@ -1,96 +1,239 @@
import { describe, expect, it } from 'bun:test';
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import { Database } from 'bun:sqlite';
import { __dbInternals } from './index';
import { describe, expect, it } from "bun:test";
import { readFileSync } from "node:fs";
import { join } from "node:path";
import { Database } from "bun:sqlite";
import { __dbInternals } from "./index";
function applyMigration(client: Database, fileName: string) {
const sql = readFileSync(join(process.cwd(), 'drizzle', fileName), 'utf8');
const sql = readFileSync(join(process.cwd(), "drizzle", fileName), "utf8");
client.exec(sql);
}
describe('sqlite schema compatibility bootstrap', () => {
it('adds missing watchlist columns and taxonomy tables for older local databases', () => {
const client = new Database(':memory:');
client.exec('PRAGMA foreign_keys = ON;');
describe("sqlite schema compatibility bootstrap", () => {
it("adds missing watchlist columns and taxonomy tables for older local databases", () => {
const client = new Database(":memory:");
client.exec("PRAGMA foreign_keys = ON;");
applyMigration(client, '0000_cold_silver_centurion.sql');
applyMigration(client, '0001_glossy_statement_snapshots.sql');
applyMigration(client, '0002_workflow_task_projection_metadata.sql');
applyMigration(client, '0003_task_stage_event_timeline.sql');
applyMigration(client, '0009_task_notification_context.sql');
applyMigration(client, "0000_cold_silver_centurion.sql");
applyMigration(client, "0001_glossy_statement_snapshots.sql");
applyMigration(client, "0002_workflow_task_projection_metadata.sql");
applyMigration(client, "0003_task_stage_event_timeline.sql");
applyMigration(client, "0009_task_notification_context.sql");
expect(__dbInternals.hasColumn(client, 'watchlist_item', 'category')).toBe(false);
expect(__dbInternals.hasColumn(client, 'watchlist_item', 'status')).toBe(false);
expect(__dbInternals.hasColumn(client, 'holding', 'company_name')).toBe(false);
expect(__dbInternals.hasTable(client, 'filing_taxonomy_snapshot')).toBe(false);
expect(__dbInternals.hasTable(client, 'research_journal_entry')).toBe(false);
expect(__dbInternals.hasTable(client, 'research_artifact')).toBe(false);
expect(__dbInternals.hasTable(client, 'research_memo')).toBe(false);
expect(__dbInternals.hasColumn(client, "watchlist_item", "category")).toBe(
false,
);
expect(__dbInternals.hasColumn(client, "watchlist_item", "status")).toBe(
false,
);
expect(__dbInternals.hasColumn(client, "holding", "company_name")).toBe(
false,
);
expect(__dbInternals.hasTable(client, "filing_taxonomy_snapshot")).toBe(
false,
);
expect(__dbInternals.hasTable(client, "research_journal_entry")).toBe(
false,
);
expect(__dbInternals.hasTable(client, "research_artifact")).toBe(false);
expect(__dbInternals.hasTable(client, "research_memo")).toBe(false);
__dbInternals.ensureLocalSqliteSchema(client);
expect(__dbInternals.hasColumn(client, 'watchlist_item', 'category')).toBe(true);
expect(__dbInternals.hasColumn(client, 'watchlist_item', 'tags')).toBe(true);
expect(__dbInternals.hasColumn(client, 'watchlist_item', 'status')).toBe(true);
expect(__dbInternals.hasColumn(client, 'watchlist_item', 'priority')).toBe(true);
expect(__dbInternals.hasColumn(client, 'watchlist_item', 'updated_at')).toBe(true);
expect(__dbInternals.hasColumn(client, 'watchlist_item', 'last_reviewed_at')).toBe(true);
expect(__dbInternals.hasColumn(client, 'holding', 'company_name')).toBe(true);
expect(__dbInternals.hasTable(client, 'filing_taxonomy_snapshot')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_snapshot', 'parser_engine')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_snapshot', 'parser_version')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_snapshot', 'taxonomy_regime')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_snapshot', 'faithful_rows')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_snapshot', 'surface_rows')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_snapshot', 'detail_rows')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_snapshot', 'kpi_rows')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_snapshot', 'normalization_summary')).toBe(true);
expect(__dbInternals.hasTable(client, 'filing_taxonomy_context')).toBe(true);
expect(__dbInternals.hasTable(client, 'filing_taxonomy_fact')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_concept', 'balance')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_concept', 'period_type')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_concept', 'data_type')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_concept', 'authoritative_concept_key')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_concept', 'mapping_method')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_concept', 'surface_key')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_concept', 'detail_parent_surface_key')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_concept', 'kpi_key')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_concept', 'residual_flag')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_fact', 'data_type')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_fact', 'authoritative_concept_key')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_fact', 'mapping_method')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_fact', 'surface_key')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_fact', 'detail_parent_surface_key')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_fact', 'kpi_key')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_fact', 'residual_flag')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_fact', 'precision')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_fact', 'nil')).toBe(true);
expect(__dbInternals.hasColumn(client, 'task_run', 'stage_context')).toBe(true);
expect(__dbInternals.hasColumn(client, 'task_stage_event', 'stage_context')).toBe(true);
expect(__dbInternals.hasTable(client, 'research_journal_entry')).toBe(true);
expect(__dbInternals.hasTable(client, 'search_document')).toBe(true);
expect(__dbInternals.hasTable(client, 'search_chunk')).toBe(true);
expect(__dbInternals.hasTable(client, 'research_artifact')).toBe(true);
expect(__dbInternals.hasTable(client, 'research_memo')).toBe(true);
expect(__dbInternals.hasTable(client, 'research_memo_evidence')).toBe(true);
expect(__dbInternals.hasTable(client, 'company_overview_cache')).toBe(true);
expect(__dbInternals.hasColumn(client, "watchlist_item", "category")).toBe(
true,
);
expect(__dbInternals.hasColumn(client, "watchlist_item", "tags")).toBe(
true,
);
expect(__dbInternals.hasColumn(client, "watchlist_item", "status")).toBe(
true,
);
expect(__dbInternals.hasColumn(client, "watchlist_item", "priority")).toBe(
true,
);
expect(
__dbInternals.hasColumn(client, "watchlist_item", "updated_at"),
).toBe(true);
expect(
__dbInternals.hasColumn(client, "watchlist_item", "last_reviewed_at"),
).toBe(true);
expect(__dbInternals.hasColumn(client, "holding", "company_name")).toBe(
true,
);
expect(__dbInternals.hasTable(client, "filing_taxonomy_snapshot")).toBe(
true,
);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_snapshot",
"parser_engine",
),
).toBe(true);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_snapshot",
"parser_version",
),
).toBe(true);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_snapshot",
"taxonomy_regime",
),
).toBe(true);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_snapshot",
"faithful_rows",
),
).toBe(true);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_snapshot",
"surface_rows",
),
).toBe(true);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_snapshot",
"detail_rows",
),
).toBe(true);
expect(
__dbInternals.hasColumn(client, "filing_taxonomy_snapshot", "kpi_rows"),
).toBe(true);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_snapshot",
"computed_definitions",
),
).toBe(true);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_snapshot",
"normalization_summary",
),
).toBe(true);
expect(__dbInternals.hasTable(client, "filing_taxonomy_context")).toBe(
true,
);
expect(__dbInternals.hasTable(client, "filing_taxonomy_fact")).toBe(true);
expect(
__dbInternals.hasColumn(client, "filing_taxonomy_concept", "balance"),
).toBe(true);
expect(
__dbInternals.hasColumn(client, "filing_taxonomy_concept", "period_type"),
).toBe(true);
expect(
__dbInternals.hasColumn(client, "filing_taxonomy_concept", "data_type"),
).toBe(true);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_concept",
"authoritative_concept_key",
),
).toBe(true);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_concept",
"mapping_method",
),
).toBe(true);
expect(
__dbInternals.hasColumn(client, "filing_taxonomy_concept", "surface_key"),
).toBe(true);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_concept",
"detail_parent_surface_key",
),
).toBe(true);
expect(
__dbInternals.hasColumn(client, "filing_taxonomy_concept", "kpi_key"),
).toBe(true);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_concept",
"residual_flag",
),
).toBe(true);
expect(
__dbInternals.hasColumn(client, "filing_taxonomy_fact", "data_type"),
).toBe(true);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_fact",
"authoritative_concept_key",
),
).toBe(true);
expect(
__dbInternals.hasColumn(client, "filing_taxonomy_fact", "mapping_method"),
).toBe(true);
expect(
__dbInternals.hasColumn(client, "filing_taxonomy_fact", "surface_key"),
).toBe(true);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_fact",
"detail_parent_surface_key",
),
).toBe(true);
expect(
__dbInternals.hasColumn(client, "filing_taxonomy_fact", "kpi_key"),
).toBe(true);
expect(
__dbInternals.hasColumn(client, "filing_taxonomy_fact", "residual_flag"),
).toBe(true);
expect(
__dbInternals.hasColumn(client, "filing_taxonomy_fact", "precision"),
).toBe(true);
expect(__dbInternals.hasColumn(client, "filing_taxonomy_fact", "nil")).toBe(
true,
);
expect(__dbInternals.hasColumn(client, "task_run", "stage_context")).toBe(
true,
);
expect(
__dbInternals.hasColumn(client, "task_stage_event", "stage_context"),
).toBe(true);
expect(__dbInternals.hasTable(client, "research_journal_entry")).toBe(true);
expect(__dbInternals.hasTable(client, "search_document")).toBe(true);
expect(__dbInternals.hasTable(client, "search_chunk")).toBe(true);
expect(__dbInternals.hasTable(client, "research_artifact")).toBe(true);
expect(__dbInternals.hasTable(client, "research_memo")).toBe(true);
expect(__dbInternals.hasTable(client, "research_memo_evidence")).toBe(true);
expect(__dbInternals.hasTable(client, "company_overview_cache")).toBe(true);
__dbInternals.loadSqliteExtensions(client);
__dbInternals.ensureSearchVirtualTables(client);
expect(__dbInternals.hasTable(client, 'search_chunk_fts')).toBe(true);
expect(__dbInternals.hasTable(client, 'search_chunk_vec')).toBe(true);
expect(__dbInternals.hasTable(client, "search_chunk_fts")).toBe(true);
expect(__dbInternals.hasTable(client, "search_chunk_vec")).toBe(true);
client.close();
});
it('backfills legacy taxonomy snapshot sidecar columns and remains idempotent', () => {
const client = new Database(':memory:');
client.exec('PRAGMA foreign_keys = ON;');
it("backfills legacy taxonomy snapshot sidecar columns and remains idempotent", () => {
const client = new Database(":memory:");
client.exec("PRAGMA foreign_keys = ON;");
applyMigration(client, '0000_cold_silver_centurion.sql');
applyMigration(client, '0005_financial_taxonomy_v3.sql');
applyMigration(client, "0000_cold_silver_centurion.sql");
applyMigration(client, "0005_financial_taxonomy_v3.sql");
client.exec(`
INSERT INTO \`filing\` (
@@ -114,7 +257,8 @@ describe('sqlite schema compatibility bootstrap', () => {
);
`);
const statementRows = '{"income":[{"label":"Revenue","value":1}],"balance":[],"cash_flow":[],"equity":[],"comprehensive_income":[]}';
const statementRows =
'{"income":[{"label":"Revenue","value":1}],"balance":[],"cash_flow":[],"equity":[],"comprehensive_income":[]}';
client.exec(`
INSERT INTO \`filing_taxonomy_snapshot\` (
@@ -143,7 +287,9 @@ describe('sqlite schema compatibility bootstrap', () => {
__dbInternals.ensureLocalSqliteSchema(client);
__dbInternals.ensureLocalSqliteSchema(client);
const row = client.query(`
const row = client
.query(
`
SELECT
\`parser_engine\`,
\`parser_version\`,
@@ -152,10 +298,13 @@ describe('sqlite schema compatibility bootstrap', () => {
\`surface_rows\`,
\`detail_rows\`,
\`kpi_rows\`,
\`computed_definitions\`,
\`normalization_summary\`
FROM \`filing_taxonomy_snapshot\`
WHERE \`filing_id\` = 1
`).get() as {
`,
)
.get() as {
parser_engine: string;
parser_version: string;
taxonomy_regime: string;
@@ -163,66 +312,116 @@ describe('sqlite schema compatibility bootstrap', () => {
surface_rows: string | null;
detail_rows: string | null;
kpi_rows: string | null;
computed_definitions: string | null;
normalization_summary: string | null;
};
expect(row.parser_engine).toBe('fiscal-xbrl');
expect(row.parser_version).toBe('unknown');
expect(row.taxonomy_regime).toBe('unknown');
expect(row.parser_engine).toBe("fiscal-xbrl");
expect(row.parser_version).toBe("unknown");
expect(row.taxonomy_regime).toBe("unknown");
expect(row.faithful_rows).toBe(statementRows);
expect(row.surface_rows).toBe('{"income":[],"balance":[],"cash_flow":[],"equity":[],"comprehensive_income":[]}');
expect(row.detail_rows).toBe('{"income":{},"balance":{},"cash_flow":{},"equity":{},"comprehensive_income":{}}');
expect(row.kpi_rows).toBe('[]');
expect(row.surface_rows).toBe(
'{"income":[],"balance":[],"cash_flow":[],"equity":[],"comprehensive_income":[]}',
);
expect(row.detail_rows).toBe(
'{"income":{},"balance":{},"cash_flow":{},"equity":{},"comprehensive_income":{}}',
);
expect(row.kpi_rows).toBe("[]");
expect(row.computed_definitions).toBe("[]");
expect(row.normalization_summary).toBeNull();
client.close();
});
it('repairs partial taxonomy sidecar drift without requiring a table rebuild', () => {
const client = new Database(':memory:');
client.exec('PRAGMA foreign_keys = ON;');
it("repairs partial taxonomy sidecar drift without requiring a table rebuild", () => {
const client = new Database(":memory:");
client.exec("PRAGMA foreign_keys = ON;");
applyMigration(client, '0000_cold_silver_centurion.sql');
applyMigration(client, '0005_financial_taxonomy_v3.sql');
client.exec("ALTER TABLE `filing_taxonomy_snapshot` ADD `parser_engine` text NOT NULL DEFAULT 'legacy-ts';");
applyMigration(client, "0000_cold_silver_centurion.sql");
applyMigration(client, "0005_financial_taxonomy_v3.sql");
client.exec(
"ALTER TABLE `filing_taxonomy_snapshot` ADD `parser_engine` text NOT NULL DEFAULT 'legacy-ts';",
);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_snapshot', 'parser_engine')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_snapshot', 'normalization_summary')).toBe(false);
expect(__dbInternals.hasTable(client, 'filing_taxonomy_context')).toBe(false);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_snapshot",
"parser_engine",
),
).toBe(true);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_snapshot",
"normalization_summary",
),
).toBe(false);
expect(__dbInternals.hasTable(client, "filing_taxonomy_context")).toBe(
false,
);
__dbInternals.ensureLocalSqliteSchema(client);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_snapshot', 'parser_version')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_snapshot', 'taxonomy_regime')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_snapshot', 'normalization_summary')).toBe(true);
expect(__dbInternals.hasTable(client, 'filing_taxonomy_context')).toBe(true);
client.close();
});
it('throws on missing parser_engine column when verifyCriticalSchema is called', () => {
const client = new Database(':memory:');
client.exec('PRAGMA foreign_keys = ON;');
applyMigration(client, '0000_cold_silver_centurion.sql');
applyMigration(client, '0005_financial_taxonomy_v3.sql');
expect(__dbInternals.hasTable(client, 'filing_taxonomy_snapshot')).toBe(true);
expect(__dbInternals.hasColumn(client, 'filing_taxonomy_snapshot', 'parser_engine')).toBe(false);
expect(() => __dbInternals.verifyCriticalSchema(client)).toThrow(
/filing_taxonomy_snapshot is missing columns: parser_engine/
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_snapshot",
"parser_version",
),
).toBe(true);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_snapshot",
"taxonomy_regime",
),
).toBe(true);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_snapshot",
"normalization_summary",
),
).toBe(true);
expect(__dbInternals.hasTable(client, "filing_taxonomy_context")).toBe(
true,
);
client.close();
});
it('verifyCriticalSchema passes when all required columns exist', () => {
const client = new Database(':memory:');
client.exec('PRAGMA foreign_keys = ON;');
it("throws on missing parser_engine column when verifyCriticalSchema is called", () => {
const client = new Database(":memory:");
client.exec("PRAGMA foreign_keys = ON;");
applyMigration(client, '0000_cold_silver_centurion.sql');
applyMigration(client, '0005_financial_taxonomy_v3.sql');
applyMigration(client, "0000_cold_silver_centurion.sql");
applyMigration(client, "0005_financial_taxonomy_v3.sql");
expect(__dbInternals.hasTable(client, "filing_taxonomy_snapshot")).toBe(
true,
);
expect(
__dbInternals.hasColumn(
client,
"filing_taxonomy_snapshot",
"parser_engine",
),
).toBe(false);
expect(() => __dbInternals.verifyCriticalSchema(client)).toThrow(
/filing_taxonomy_snapshot is missing columns: parser_engine/,
);
client.close();
});
it("verifyCriticalSchema passes when all required columns exist", () => {
const client = new Database(":memory:");
client.exec("PRAGMA foreign_keys = ON;");
applyMigration(client, "0000_cold_silver_centurion.sql");
applyMigration(client, "0005_financial_taxonomy_v3.sql");
__dbInternals.ensureLocalSqliteSchema(client);

File diff suppressed because it is too large Load Diff

View File

@@ -1,9 +1,11 @@
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import type { Database } from 'bun:sqlite';
import { readFileSync } from "node:fs";
import { join } from "node:path";
import type { Database } from "bun:sqlite";
const DEFAULT_SURFACE_ROWS_JSON = '{"income":[],"balance":[],"cash_flow":[],"equity":[],"comprehensive_income":[]}';
const DEFAULT_DETAIL_ROWS_JSON = '{"income":{},"balance":{},"cash_flow":{},"equity":{},"comprehensive_income":{}}';
const DEFAULT_SURFACE_ROWS_JSON =
'{"income":[],"balance":[],"cash_flow":[],"equity":[],"comprehensive_income":[]}';
const DEFAULT_DETAIL_ROWS_JSON =
'{"income":{},"balance":{},"cash_flow":{},"equity":{},"comprehensive_income":{}}';
type MissingColumnDefinition = {
name: string;
@@ -12,36 +14,49 @@ type MissingColumnDefinition = {
export function hasTable(client: Database, tableName: string) {
const row = client
.query('SELECT name FROM sqlite_master WHERE type = ? AND name = ? LIMIT 1')
.get('table', tableName) as { name: string } | null;
.query("SELECT name FROM sqlite_master WHERE type = ? AND name = ? LIMIT 1")
.get("table", tableName) as { name: string } | null;
return row !== null;
}
export function hasColumn(client: Database, tableName: string, columnName: string) {
export function hasColumn(
client: Database,
tableName: string,
columnName: string,
) {
if (!hasTable(client, tableName)) {
return false;
}
const rows = client.query(`PRAGMA table_info(${tableName})`).all() as Array<{ name: string }>;
const rows = client.query(`PRAGMA table_info(${tableName})`).all() as Array<{
name: string;
}>;
return rows.some((row) => row.name === columnName);
}
export function applySqlFile(client: Database, fileName: string) {
const sql = readFileSync(join(process.cwd(), 'drizzle', fileName), 'utf8');
const sql = readFileSync(join(process.cwd(), "drizzle", fileName), "utf8");
client.exec(sql);
}
export function applyBaseSchemaCompat(client: Database) {
const sql = readFileSync(join(process.cwd(), 'drizzle', '0000_cold_silver_centurion.sql'), 'utf8')
.replaceAll('CREATE TABLE `', 'CREATE TABLE IF NOT EXISTS `')
.replaceAll('CREATE UNIQUE INDEX `', 'CREATE UNIQUE INDEX IF NOT EXISTS `')
.replaceAll('CREATE INDEX `', 'CREATE INDEX IF NOT EXISTS `');
const sql = readFileSync(
join(process.cwd(), "drizzle", "0000_cold_silver_centurion.sql"),
"utf8",
)
.replaceAll("CREATE TABLE `", "CREATE TABLE IF NOT EXISTS `")
.replaceAll("CREATE UNIQUE INDEX `", "CREATE UNIQUE INDEX IF NOT EXISTS `")
.replaceAll("CREATE INDEX `", "CREATE INDEX IF NOT EXISTS `");
client.exec(sql);
}
function ensureColumns(client: Database, tableName: string, columns: MissingColumnDefinition[]) {
function ensureColumns(
client: Database,
tableName: string,
columns: MissingColumnDefinition[],
) {
if (!hasTable(client, tableName)) {
return;
}
@@ -54,7 +69,7 @@ function ensureColumns(client: Database, tableName: string, columns: MissingColu
}
function ensureResearchWorkspaceSchema(client: Database) {
if (!hasTable(client, 'research_artifact')) {
if (!hasTable(client, "research_artifact")) {
client.exec(`
CREATE TABLE IF NOT EXISTS \`research_artifact\` (
\`id\` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
@@ -84,7 +99,7 @@ function ensureResearchWorkspaceSchema(client: Database) {
`);
}
if (!hasTable(client, 'research_memo')) {
if (!hasTable(client, "research_memo")) {
client.exec(`
CREATE TABLE IF NOT EXISTS \`research_memo\` (
\`id\` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
@@ -110,7 +125,7 @@ function ensureResearchWorkspaceSchema(client: Database) {
`);
}
if (!hasTable(client, 'research_memo_evidence')) {
if (!hasTable(client, "research_memo_evidence")) {
client.exec(`
CREATE TABLE IF NOT EXISTS \`research_memo_evidence\` (
\`id\` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
@@ -126,15 +141,33 @@ function ensureResearchWorkspaceSchema(client: Database) {
`);
}
client.exec('CREATE INDEX IF NOT EXISTS `research_artifact_ticker_idx` ON `research_artifact` (`user_id`, `ticker`, `updated_at`);');
client.exec('CREATE INDEX IF NOT EXISTS `research_artifact_kind_idx` ON `research_artifact` (`user_id`, `kind`, `updated_at`);');
client.exec('CREATE INDEX IF NOT EXISTS `research_artifact_accession_idx` ON `research_artifact` (`user_id`, `accession_number`);');
client.exec('CREATE INDEX IF NOT EXISTS `research_artifact_source_idx` ON `research_artifact` (`user_id`, `source`, `updated_at`);');
client.exec('CREATE UNIQUE INDEX IF NOT EXISTS `research_memo_ticker_uidx` ON `research_memo` (`user_id`, `ticker`);');
client.exec('CREATE INDEX IF NOT EXISTS `research_memo_updated_idx` ON `research_memo` (`user_id`, `updated_at`);');
client.exec('CREATE INDEX IF NOT EXISTS `research_memo_evidence_memo_idx` ON `research_memo_evidence` (`memo_id`, `section`, `sort_order`);');
client.exec('CREATE INDEX IF NOT EXISTS `research_memo_evidence_artifact_idx` ON `research_memo_evidence` (`artifact_id`);');
client.exec('CREATE UNIQUE INDEX IF NOT EXISTS `research_memo_evidence_unique_uidx` ON `research_memo_evidence` (`memo_id`, `artifact_id`, `section`);');
client.exec(
"CREATE INDEX IF NOT EXISTS `research_artifact_ticker_idx` ON `research_artifact` (`user_id`, `ticker`, `updated_at`);",
);
client.exec(
"CREATE INDEX IF NOT EXISTS `research_artifact_kind_idx` ON `research_artifact` (`user_id`, `kind`, `updated_at`);",
);
client.exec(
"CREATE INDEX IF NOT EXISTS `research_artifact_accession_idx` ON `research_artifact` (`user_id`, `accession_number`);",
);
client.exec(
"CREATE INDEX IF NOT EXISTS `research_artifact_source_idx` ON `research_artifact` (`user_id`, `source`, `updated_at`);",
);
client.exec(
"CREATE UNIQUE INDEX IF NOT EXISTS `research_memo_ticker_uidx` ON `research_memo` (`user_id`, `ticker`);",
);
client.exec(
"CREATE INDEX IF NOT EXISTS `research_memo_updated_idx` ON `research_memo` (`user_id`, `updated_at`);",
);
client.exec(
"CREATE INDEX IF NOT EXISTS `research_memo_evidence_memo_idx` ON `research_memo_evidence` (`memo_id`, `section`, `sort_order`);",
);
client.exec(
"CREATE INDEX IF NOT EXISTS `research_memo_evidence_artifact_idx` ON `research_memo_evidence` (`artifact_id`);",
);
client.exec(
"CREATE UNIQUE INDEX IF NOT EXISTS `research_memo_evidence_unique_uidx` ON `research_memo_evidence` (`memo_id`, `artifact_id`, `section`);",
);
client.exec(`
CREATE VIRTUAL TABLE IF NOT EXISTS \`research_artifact_fts\` USING fts5(
artifact_id UNINDEXED,
@@ -268,7 +301,7 @@ function ensureResearchWorkspaceSchema(client: Database) {
);
`);
client.exec('DELETE FROM `research_artifact_fts`;');
client.exec("DELETE FROM `research_artifact_fts`;");
client.exec(`
INSERT INTO \`research_artifact_fts\` (
\`artifact_id\`,
@@ -297,39 +330,71 @@ function ensureResearchWorkspaceSchema(client: Database) {
}
const TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS = [
'parser_engine',
'parser_version',
'taxonomy_regime',
'fiscal_pack',
'faithful_rows',
'surface_rows',
'detail_rows',
'kpi_rows',
'normalization_summary'
"parser_engine",
"parser_version",
"taxonomy_regime",
"fiscal_pack",
"faithful_rows",
"surface_rows",
"detail_rows",
"kpi_rows",
"computed_definitions",
"normalization_summary",
] as const;
function ensureTaxonomySnapshotCompat(client: Database) {
if (!hasTable(client, 'filing_taxonomy_snapshot')) {
if (!hasTable(client, "filing_taxonomy_snapshot")) {
return;
}
ensureColumns(client, 'filing_taxonomy_snapshot', [
{ name: 'parser_engine', sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `parser_engine` text NOT NULL DEFAULT 'fiscal-xbrl';" },
{ name: 'parser_version', sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `parser_version` text NOT NULL DEFAULT 'unknown';" },
{ name: 'taxonomy_regime', sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `taxonomy_regime` text NOT NULL DEFAULT 'unknown';" },
{ name: 'fiscal_pack', sql: 'ALTER TABLE `filing_taxonomy_snapshot` ADD `fiscal_pack` text;' },
{ name: 'faithful_rows', sql: 'ALTER TABLE `filing_taxonomy_snapshot` ADD `faithful_rows` text;' },
{ name: 'surface_rows', sql: 'ALTER TABLE `filing_taxonomy_snapshot` ADD `surface_rows` text;' },
{ name: 'detail_rows', sql: 'ALTER TABLE `filing_taxonomy_snapshot` ADD `detail_rows` text;' },
{ name: 'kpi_rows', sql: 'ALTER TABLE `filing_taxonomy_snapshot` ADD `kpi_rows` text;' },
{ name: 'normalization_summary', sql: 'ALTER TABLE `filing_taxonomy_snapshot` ADD `normalization_summary` text;' }
ensureColumns(client, "filing_taxonomy_snapshot", [
{
name: "parser_engine",
sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `parser_engine` text NOT NULL DEFAULT 'fiscal-xbrl';",
},
{
name: "parser_version",
sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `parser_version` text NOT NULL DEFAULT 'unknown';",
},
{
name: "taxonomy_regime",
sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `taxonomy_regime` text NOT NULL DEFAULT 'unknown';",
},
{
name: "fiscal_pack",
sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `fiscal_pack` text;",
},
{
name: "faithful_rows",
sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `faithful_rows` text;",
},
{
name: "surface_rows",
sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `surface_rows` text;",
},
{
name: "detail_rows",
sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `detail_rows` text;",
},
{
name: "kpi_rows",
sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `kpi_rows` text;",
},
{
name: "computed_definitions",
sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `computed_definitions` text;",
},
{
name: "normalization_summary",
sql: "ALTER TABLE `filing_taxonomy_snapshot` ADD `normalization_summary` text;",
},
]);
for (const columnName of TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS) {
if (!hasColumn(client, 'filing_taxonomy_snapshot', columnName)) {
if (!hasColumn(client, "filing_taxonomy_snapshot", columnName)) {
throw new Error(
`Schema compat failed: filing_taxonomy_snapshot missing required column '${columnName}'. ` +
`Delete the database file and restart to rebuild schema.`
`Delete the database file and restart to rebuild schema.`,
);
}
}
@@ -340,12 +405,13 @@ function ensureTaxonomySnapshotCompat(client: Database) {
\`faithful_rows\` = COALESCE(\`faithful_rows\`, \`statement_rows\`),
\`surface_rows\` = COALESCE(\`surface_rows\`, '${DEFAULT_SURFACE_ROWS_JSON}'),
\`detail_rows\` = COALESCE(\`detail_rows\`, '${DEFAULT_DETAIL_ROWS_JSON}'),
\`kpi_rows\` = COALESCE(\`kpi_rows\`, '[]');
\`kpi_rows\` = COALESCE(\`kpi_rows\`, '[]'),
\`computed_definitions\` = COALESCE(\`computed_definitions\`, '[]');
`);
}
function ensureTaxonomyContextCompat(client: Database) {
if (!hasTable(client, 'filing_taxonomy_context')) {
if (!hasTable(client, "filing_taxonomy_context")) {
client.exec(`
CREATE TABLE IF NOT EXISTS \`filing_taxonomy_context\` (
\`id\` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
@@ -364,35 +430,93 @@ function ensureTaxonomyContextCompat(client: Database) {
`);
}
client.exec('CREATE INDEX IF NOT EXISTS `filing_taxonomy_context_snapshot_idx` ON `filing_taxonomy_context` (`snapshot_id`);');
client.exec('CREATE UNIQUE INDEX IF NOT EXISTS `filing_taxonomy_context_uidx` ON `filing_taxonomy_context` (`snapshot_id`,`context_id`);');
client.exec(
"CREATE INDEX IF NOT EXISTS `filing_taxonomy_context_snapshot_idx` ON `filing_taxonomy_context` (`snapshot_id`);",
);
client.exec(
"CREATE UNIQUE INDEX IF NOT EXISTS `filing_taxonomy_context_uidx` ON `filing_taxonomy_context` (`snapshot_id`,`context_id`);",
);
}
function ensureTaxonomyConceptCompat(client: Database) {
ensureColumns(client, 'filing_taxonomy_concept', [
{ name: 'balance', sql: 'ALTER TABLE `filing_taxonomy_concept` ADD `balance` text;' },
{ name: 'period_type', sql: 'ALTER TABLE `filing_taxonomy_concept` ADD `period_type` text;' },
{ name: 'data_type', sql: 'ALTER TABLE `filing_taxonomy_concept` ADD `data_type` text;' },
{ name: 'authoritative_concept_key', sql: 'ALTER TABLE `filing_taxonomy_concept` ADD `authoritative_concept_key` text;' },
{ name: 'mapping_method', sql: 'ALTER TABLE `filing_taxonomy_concept` ADD `mapping_method` text;' },
{ name: 'surface_key', sql: 'ALTER TABLE `filing_taxonomy_concept` ADD `surface_key` text;' },
{ name: 'detail_parent_surface_key', sql: 'ALTER TABLE `filing_taxonomy_concept` ADD `detail_parent_surface_key` text;' },
{ name: 'kpi_key', sql: 'ALTER TABLE `filing_taxonomy_concept` ADD `kpi_key` text;' },
{ name: 'residual_flag', sql: 'ALTER TABLE `filing_taxonomy_concept` ADD `residual_flag` integer NOT NULL DEFAULT false;' }
ensureColumns(client, "filing_taxonomy_concept", [
{
name: "balance",
sql: "ALTER TABLE `filing_taxonomy_concept` ADD `balance` text;",
},
{
name: "period_type",
sql: "ALTER TABLE `filing_taxonomy_concept` ADD `period_type` text;",
},
{
name: "data_type",
sql: "ALTER TABLE `filing_taxonomy_concept` ADD `data_type` text;",
},
{
name: "authoritative_concept_key",
sql: "ALTER TABLE `filing_taxonomy_concept` ADD `authoritative_concept_key` text;",
},
{
name: "mapping_method",
sql: "ALTER TABLE `filing_taxonomy_concept` ADD `mapping_method` text;",
},
{
name: "surface_key",
sql: "ALTER TABLE `filing_taxonomy_concept` ADD `surface_key` text;",
},
{
name: "detail_parent_surface_key",
sql: "ALTER TABLE `filing_taxonomy_concept` ADD `detail_parent_surface_key` text;",
},
{
name: "kpi_key",
sql: "ALTER TABLE `filing_taxonomy_concept` ADD `kpi_key` text;",
},
{
name: "residual_flag",
sql: "ALTER TABLE `filing_taxonomy_concept` ADD `residual_flag` integer NOT NULL DEFAULT false;",
},
]);
}
function ensureTaxonomyFactCompat(client: Database) {
ensureColumns(client, 'filing_taxonomy_fact', [
{ name: 'data_type', sql: 'ALTER TABLE `filing_taxonomy_fact` ADD `data_type` text;' },
{ name: 'authoritative_concept_key', sql: 'ALTER TABLE `filing_taxonomy_fact` ADD `authoritative_concept_key` text;' },
{ name: 'mapping_method', sql: 'ALTER TABLE `filing_taxonomy_fact` ADD `mapping_method` text;' },
{ name: 'surface_key', sql: 'ALTER TABLE `filing_taxonomy_fact` ADD `surface_key` text;' },
{ name: 'detail_parent_surface_key', sql: 'ALTER TABLE `filing_taxonomy_fact` ADD `detail_parent_surface_key` text;' },
{ name: 'kpi_key', sql: 'ALTER TABLE `filing_taxonomy_fact` ADD `kpi_key` text;' },
{ name: 'residual_flag', sql: 'ALTER TABLE `filing_taxonomy_fact` ADD `residual_flag` integer NOT NULL DEFAULT false;' },
{ name: 'precision', sql: 'ALTER TABLE `filing_taxonomy_fact` ADD `precision` text;' },
{ name: 'nil', sql: 'ALTER TABLE `filing_taxonomy_fact` ADD `nil` integer NOT NULL DEFAULT false;' }
ensureColumns(client, "filing_taxonomy_fact", [
{
name: "data_type",
sql: "ALTER TABLE `filing_taxonomy_fact` ADD `data_type` text;",
},
{
name: "authoritative_concept_key",
sql: "ALTER TABLE `filing_taxonomy_fact` ADD `authoritative_concept_key` text;",
},
{
name: "mapping_method",
sql: "ALTER TABLE `filing_taxonomy_fact` ADD `mapping_method` text;",
},
{
name: "surface_key",
sql: "ALTER TABLE `filing_taxonomy_fact` ADD `surface_key` text;",
},
{
name: "detail_parent_surface_key",
sql: "ALTER TABLE `filing_taxonomy_fact` ADD `detail_parent_surface_key` text;",
},
{
name: "kpi_key",
sql: "ALTER TABLE `filing_taxonomy_fact` ADD `kpi_key` text;",
},
{
name: "residual_flag",
sql: "ALTER TABLE `filing_taxonomy_fact` ADD `residual_flag` integer NOT NULL DEFAULT false;",
},
{
name: "precision",
sql: "ALTER TABLE `filing_taxonomy_fact` ADD `precision` text;",
},
{
name: "nil",
sql: "ALTER TABLE `filing_taxonomy_fact` ADD `nil` integer NOT NULL DEFAULT false;",
},
]);
}
@@ -405,18 +529,18 @@ function ensureTaxonomyCompat(client: Database) {
export function ensureLocalSqliteSchema(client: Database) {
const missingBaseSchema = [
'filing',
'watchlist_item',
'holding',
'task_run',
'portfolio_insight'
"filing",
"watchlist_item",
"holding",
"task_run",
"portfolio_insight",
].some((tableName) => !hasTable(client, tableName));
if (missingBaseSchema) {
applyBaseSchemaCompat(client);
}
if (!hasTable(client, 'user')) {
if (!hasTable(client, "user")) {
client.exec(`
CREATE TABLE IF NOT EXISTS \`user\` (
\`id\` text PRIMARY KEY NOT NULL,
@@ -432,10 +556,12 @@ export function ensureLocalSqliteSchema(client: Database) {
\`banExpires\` integer
);
`);
client.exec('CREATE UNIQUE INDEX IF NOT EXISTS `user_email_uidx` ON `user` (`email`);');
client.exec(
"CREATE UNIQUE INDEX IF NOT EXISTS `user_email_uidx` ON `user` (`email`);",
);
}
if (!hasTable(client, 'organization')) {
if (!hasTable(client, "organization")) {
client.exec(`
CREATE TABLE IF NOT EXISTS \`organization\` (
\`id\` text PRIMARY KEY NOT NULL,
@@ -446,46 +572,86 @@ export function ensureLocalSqliteSchema(client: Database) {
\`metadata\` text
);
`);
client.exec('CREATE UNIQUE INDEX IF NOT EXISTS `organization_slug_uidx` ON `organization` (`slug`);');
client.exec(
"CREATE UNIQUE INDEX IF NOT EXISTS `organization_slug_uidx` ON `organization` (`slug`);",
);
}
if (!hasTable(client, 'filing_statement_snapshot')) {
applySqlFile(client, '0001_glossy_statement_snapshots.sql');
if (!hasTable(client, "filing_statement_snapshot")) {
applySqlFile(client, "0001_glossy_statement_snapshots.sql");
}
ensureColumns(client, 'task_run', [
{ name: 'stage', sql: "ALTER TABLE `task_run` ADD `stage` text NOT NULL DEFAULT 'queued';" },
{ name: 'stage_detail', sql: 'ALTER TABLE `task_run` ADD `stage_detail` text;' },
{ name: 'stage_context', sql: 'ALTER TABLE `task_run` ADD `stage_context` text;' },
{ name: 'resource_key', sql: 'ALTER TABLE `task_run` ADD `resource_key` text;' },
{ name: 'notification_read_at', sql: 'ALTER TABLE `task_run` ADD `notification_read_at` text;' },
{ name: 'notification_silenced_at', sql: 'ALTER TABLE `task_run` ADD `notification_silenced_at` text;' }
ensureColumns(client, "task_run", [
{
name: "stage",
sql: "ALTER TABLE `task_run` ADD `stage` text NOT NULL DEFAULT 'queued';",
},
{
name: "stage_detail",
sql: "ALTER TABLE `task_run` ADD `stage_detail` text;",
},
{
name: "stage_context",
sql: "ALTER TABLE `task_run` ADD `stage_context` text;",
},
{
name: "resource_key",
sql: "ALTER TABLE `task_run` ADD `resource_key` text;",
},
{
name: "notification_read_at",
sql: "ALTER TABLE `task_run` ADD `notification_read_at` text;",
},
{
name: "notification_silenced_at",
sql: "ALTER TABLE `task_run` ADD `notification_silenced_at` text;",
},
]);
if (!hasTable(client, 'task_stage_event')) {
applySqlFile(client, '0003_task_stage_event_timeline.sql');
if (!hasTable(client, "task_stage_event")) {
applySqlFile(client, "0003_task_stage_event_timeline.sql");
}
if (hasTable(client, 'task_stage_event') && !hasColumn(client, 'task_stage_event', 'stage_context')) {
client.exec('ALTER TABLE `task_stage_event` ADD `stage_context` text;');
if (
hasTable(client, "task_stage_event") &&
!hasColumn(client, "task_stage_event", "stage_context")
) {
client.exec("ALTER TABLE `task_stage_event` ADD `stage_context` text;");
}
client.exec('CREATE INDEX IF NOT EXISTS `task_user_updated_idx` ON `task_run` (`user_id`, `updated_at`);');
client.exec(
"CREATE INDEX IF NOT EXISTS `task_user_updated_idx` ON `task_run` (`user_id`, `updated_at`);",
);
client.exec(`CREATE UNIQUE INDEX IF NOT EXISTS task_active_resource_uidx
ON task_run (user_id, task_type, resource_key)
WHERE resource_key IS NOT NULL AND status IN ('queued', 'running');`);
ensureColumns(client, 'watchlist_item', [
{ name: 'category', sql: 'ALTER TABLE `watchlist_item` ADD `category` text;' },
{ name: 'tags', sql: 'ALTER TABLE `watchlist_item` ADD `tags` text;' },
{ name: 'status', sql: "ALTER TABLE `watchlist_item` ADD `status` text NOT NULL DEFAULT 'backlog';" },
{ name: 'priority', sql: "ALTER TABLE `watchlist_item` ADD `priority` text NOT NULL DEFAULT 'medium';" },
{ name: 'updated_at', sql: "ALTER TABLE `watchlist_item` ADD `updated_at` text NOT NULL DEFAULT '';" },
{ name: 'last_reviewed_at', sql: 'ALTER TABLE `watchlist_item` ADD `last_reviewed_at` text;' }
ensureColumns(client, "watchlist_item", [
{
name: "category",
sql: "ALTER TABLE `watchlist_item` ADD `category` text;",
},
{ name: "tags", sql: "ALTER TABLE `watchlist_item` ADD `tags` text;" },
{
name: "status",
sql: "ALTER TABLE `watchlist_item` ADD `status` text NOT NULL DEFAULT 'backlog';",
},
{
name: "priority",
sql: "ALTER TABLE `watchlist_item` ADD `priority` text NOT NULL DEFAULT 'medium';",
},
{
name: "updated_at",
sql: "ALTER TABLE `watchlist_item` ADD `updated_at` text NOT NULL DEFAULT '';",
},
{
name: "last_reviewed_at",
sql: "ALTER TABLE `watchlist_item` ADD `last_reviewed_at` text;",
},
]);
if (hasTable(client, 'watchlist_item')) {
if (hasTable(client, "watchlist_item")) {
client.exec(`
UPDATE \`watchlist_item\`
SET
@@ -503,27 +669,32 @@ WHERE resource_key IS NOT NULL AND status IN ('queued', 'running');`);
END;
`);
client.exec('CREATE INDEX IF NOT EXISTS `watchlist_user_updated_idx` ON `watchlist_item` (`user_id`, `updated_at`);');
client.exec(
"CREATE INDEX IF NOT EXISTS `watchlist_user_updated_idx` ON `watchlist_item` (`user_id`, `updated_at`);",
);
}
if (hasTable(client, 'holding') && !hasColumn(client, 'holding', 'company_name')) {
client.exec('ALTER TABLE `holding` ADD `company_name` text;');
if (
hasTable(client, "holding") &&
!hasColumn(client, "holding", "company_name")
) {
client.exec("ALTER TABLE `holding` ADD `company_name` text;");
}
if (!hasTable(client, 'filing_taxonomy_snapshot')) {
applySqlFile(client, '0005_financial_taxonomy_v3.sql');
if (!hasTable(client, "filing_taxonomy_snapshot")) {
applySqlFile(client, "0005_financial_taxonomy_v3.sql");
}
ensureTaxonomyCompat(client);
if (!hasTable(client, 'company_financial_bundle')) {
applySqlFile(client, '0007_company_financial_bundles.sql');
if (!hasTable(client, "company_financial_bundle")) {
applySqlFile(client, "0007_company_financial_bundles.sql");
}
if (!hasTable(client, 'company_overview_cache')) {
applySqlFile(client, '0012_company_overview_cache.sql');
if (!hasTable(client, "company_overview_cache")) {
applySqlFile(client, "0012_company_overview_cache.sql");
}
if (!hasTable(client, 'research_journal_entry')) {
if (!hasTable(client, "research_journal_entry")) {
client.exec(`
CREATE TABLE IF NOT EXISTS \`research_journal_entry\` (
\`id\` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
@@ -539,12 +710,16 @@ WHERE resource_key IS NOT NULL AND status IN ('queued', 'running');`);
FOREIGN KEY (\`user_id\`) REFERENCES \`user\`(\`id\`) ON UPDATE no action ON DELETE cascade
);
`);
client.exec('CREATE INDEX IF NOT EXISTS `research_journal_ticker_idx` ON `research_journal_entry` (`user_id`, `ticker`, `created_at`);');
client.exec('CREATE INDEX IF NOT EXISTS `research_journal_accession_idx` ON `research_journal_entry` (`user_id`, `accession_number`);');
client.exec(
"CREATE INDEX IF NOT EXISTS `research_journal_ticker_idx` ON `research_journal_entry` (`user_id`, `ticker`, `created_at`);",
);
client.exec(
"CREATE INDEX IF NOT EXISTS `research_journal_accession_idx` ON `research_journal_entry` (`user_id`, `accession_number`);",
);
}
if (!hasTable(client, 'search_document')) {
applySqlFile(client, '0008_search_rag.sql');
if (!hasTable(client, "search_document")) {
applySqlFile(client, "0008_search_rag.sql");
}
ensureResearchWorkspaceSchema(client);
@@ -555,7 +730,7 @@ export const __sqliteSchemaCompatInternals = {
applySqlFile,
hasColumn,
hasTable,
TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS
TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS,
};
export { TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS };

View File

@@ -1,120 +1,146 @@
import { describe, expect, it } from 'bun:test';
import { __filingTaxonomyInternals } from './filing-taxonomy';
import { describe, expect, it } from "bun:test";
import { __filingTaxonomyInternals } from "./filing-taxonomy";
describe('filing taxonomy snapshot normalization', () => {
it('normalizes legacy snake_case nested snapshot payloads in toSnapshotRecord', () => {
describe("filing taxonomy snapshot normalization", () => {
it("normalizes legacy snake_case nested snapshot payloads in toSnapshotRecord", () => {
const record = __filingTaxonomyInternals.toSnapshotRecord({
id: 1,
filing_id: 10,
ticker: 'MSFT',
filing_date: '2026-01-28',
filing_type: '10-Q',
parse_status: 'ready',
ticker: "MSFT",
filing_date: "2026-01-28",
filing_type: "10-Q",
parse_status: "ready",
parse_error: null,
source: 'xbrl_instance',
parser_engine: 'fiscal-xbrl',
parser_version: '0.1.0',
taxonomy_regime: 'us-gaap',
fiscal_pack: 'core',
periods: [{
id: 'fy-2025',
filing_id: 10,
accession_number: '0001',
filing_date: '2026-01-28',
period_start: '2025-01-01',
period_end: '2025-12-31',
filing_type: '10-Q',
period_label: 'FY 2025'
}],
source: "xbrl_instance",
parser_engine: "fiscal-xbrl",
parser_version: "0.1.0",
taxonomy_regime: "us-gaap",
fiscal_pack: "core",
periods: [
{
id: "fy-2025",
filing_id: 10,
accession_number: "0001",
filing_date: "2026-01-28",
period_start: "2025-01-01",
period_end: "2025-12-31",
filing_type: "10-Q",
period_label: "FY 2025",
},
],
faithful_rows: {
income: [{
key: 'revenue',
label: 'Revenue',
concept_key: 'us-gaap:Revenue',
qname: 'us-gaap:Revenue',
namespace_uri: 'http://fasb.org/us-gaap/2025',
local_name: 'Revenue',
is_extension: false,
statement: 'income',
role_uri: 'income',
order: 10,
depth: 0,
parent_key: null,
values: { 'fy-2025': 10 },
units: { 'fy-2025': 'iso4217:USD' },
has_dimensions: false,
source_fact_ids: [1]
}],
income: [
{
key: "revenue",
label: "Revenue",
concept_key: "us-gaap:Revenue",
qname: "us-gaap:Revenue",
namespace_uri: "http://fasb.org/us-gaap/2025",
local_name: "Revenue",
is_extension: false,
statement: "income",
role_uri: "income",
order: 10,
depth: 0,
parent_key: null,
values: { "fy-2025": 10 },
units: { "fy-2025": "iso4217:USD" },
has_dimensions: false,
source_fact_ids: [1],
},
],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
comprehensive_income: [],
},
statement_rows: null,
surface_rows: {
income: [{
key: 'revenue',
label: 'Revenue',
category: 'revenue',
template_section: 'revenue',
order: 10,
unit: 'currency',
values: { 'fy-2025': 10 },
source_concepts: ['us-gaap:Revenue'],
source_row_keys: ['revenue'],
source_fact_ids: [1],
formula_key: null,
has_dimensions: false,
resolved_source_row_keys: { 'fy-2025': 'revenue' },
statement: 'income',
detail_count: 1,
resolution_method: 'direct',
confidence: 'high',
warning_codes: ['legacy_surface']
}],
income: [
{
key: "revenue",
label: "Revenue",
category: "revenue",
template_section: "revenue",
order: 10,
unit: "currency",
values: { "fy-2025": 10 },
source_concepts: ["us-gaap:Revenue"],
source_row_keys: ["revenue"],
source_fact_ids: [1],
formula_key: null,
has_dimensions: false,
resolved_source_row_keys: { "fy-2025": "revenue" },
statement: "income",
detail_count: 1,
resolution_method: "direct",
confidence: "high",
warning_codes: ["legacy_surface"],
},
],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
comprehensive_income: [],
},
detail_rows: {
income: {
revenue: [{
key: 'revenue_detail',
parent_surface_key: 'revenue',
label: 'Revenue Detail',
concept_key: 'us-gaap:RevenueDetail',
qname: 'us-gaap:RevenueDetail',
namespace_uri: 'http://fasb.org/us-gaap/2025',
local_name: 'RevenueDetail',
unit: 'iso4217:USD',
values: { 'fy-2025': 10 },
source_fact_ids: [2],
is_extension: false,
dimensions_summary: ['region:americas'],
residual_flag: false
}]
revenue: [
{
key: "revenue_detail",
parent_surface_key: "revenue",
label: "Revenue Detail",
concept_key: "us-gaap:RevenueDetail",
qname: "us-gaap:RevenueDetail",
namespace_uri: "http://fasb.org/us-gaap/2025",
local_name: "RevenueDetail",
unit: "iso4217:USD",
values: { "fy-2025": 10 },
source_fact_ids: [2],
is_extension: false,
dimensions_summary: ["region:americas"],
residual_flag: false,
},
],
},
balance: {},
cash_flow: {},
equity: {},
comprehensive_income: {}
comprehensive_income: {},
},
kpi_rows: [{
key: 'cloud_growth',
label: 'Cloud Growth',
category: 'operating_kpi',
unit: 'percent',
order: 10,
segment: null,
axis: null,
member: null,
values: { 'fy-2025': 0.25 },
source_concepts: ['msft:CloudGrowth'],
source_fact_ids: [3],
provenance_type: 'taxonomy',
has_dimensions: false
}],
kpi_rows: [
{
key: "cloud_growth",
label: "Cloud Growth",
category: "operating_kpi",
unit: "percent",
order: 10,
segment: null,
axis: null,
member: null,
values: { "fy-2025": 0.25 },
source_concepts: ["msft:CloudGrowth"],
source_fact_ids: [3],
provenance_type: "taxonomy",
has_dimensions: false,
},
],
computed_definitions: [
{
key: "gross_margin",
label: "Gross Margin",
category: "margins",
order: 10,
unit: "percent",
computation: {
type: "ratio",
numerator: "gross_profit",
denominator: "revenue",
},
supported_cadences: ["annual", "quarterly"],
requires_external_data: [],
},
],
derived_metrics: null,
validation_result: null,
normalization_summary: {
@@ -123,169 +149,228 @@ describe('filing taxonomy snapshot normalization', () => {
kpi_row_count: 1,
unmapped_row_count: 0,
material_unmapped_row_count: 0,
warnings: ['legacy_warning']
warnings: ["legacy_warning"],
},
facts_count: 3,
concepts_count: 3,
dimensions_count: 1,
created_at: '2026-01-28T00:00:00.000Z',
updated_at: '2026-01-28T00:00:00.000Z'
created_at: "2026-01-28T00:00:00.000Z",
updated_at: "2026-01-28T00:00:00.000Z",
} as never);
expect(record.periods[0]).toMatchObject({
filingId: 10,
accessionNumber: '0001',
filingDate: '2026-01-28',
periodStart: '2025-01-01',
periodEnd: '2025-12-31',
periodLabel: 'FY 2025'
accessionNumber: "0001",
filingDate: "2026-01-28",
periodStart: "2025-01-01",
periodEnd: "2025-12-31",
periodLabel: "FY 2025",
});
expect(record.faithful_rows.income[0]).toMatchObject({
conceptKey: 'us-gaap:Revenue',
namespaceUri: 'http://fasb.org/us-gaap/2025',
localName: 'Revenue',
roleUri: 'income',
conceptKey: "us-gaap:Revenue",
namespaceUri: "http://fasb.org/us-gaap/2025",
localName: "Revenue",
roleUri: "income",
parentKey: null,
hasDimensions: false,
sourceFactIds: [1]
sourceFactIds: [1],
});
expect(record.surface_rows.income[0]).toMatchObject({
templateSection: 'revenue',
sourceConcepts: ['us-gaap:Revenue'],
sourceRowKeys: ['revenue'],
templateSection: "revenue",
sourceConcepts: ["us-gaap:Revenue"],
sourceRowKeys: ["revenue"],
sourceFactIds: [1],
formulaKey: null,
hasDimensions: false,
resolvedSourceRowKeys: { 'fy-2025': 'revenue' },
resolvedSourceRowKeys: { "fy-2025": "revenue" },
detailCount: 1,
resolutionMethod: 'direct',
warningCodes: ['legacy_surface']
resolutionMethod: "direct",
warningCodes: ["legacy_surface"],
});
expect(record.detail_rows.income.revenue?.[0]).toMatchObject({
parentSurfaceKey: 'revenue',
conceptKey: 'us-gaap:RevenueDetail',
namespaceUri: 'http://fasb.org/us-gaap/2025',
parentSurfaceKey: "revenue",
conceptKey: "us-gaap:RevenueDetail",
namespaceUri: "http://fasb.org/us-gaap/2025",
sourceFactIds: [2],
dimensionsSummary: ['region:americas'],
residualFlag: false
dimensionsSummary: ["region:americas"],
residualFlag: false,
});
expect(record.kpi_rows[0]).toMatchObject({
sourceConcepts: ['msft:CloudGrowth'],
sourceConcepts: ["msft:CloudGrowth"],
sourceFactIds: [3],
provenanceType: 'taxonomy',
hasDimensions: false
provenanceType: "taxonomy",
hasDimensions: false,
});
expect(record.computed_definitions).toEqual([
{
key: "gross_margin",
label: "Gross Margin",
category: "margins",
order: 10,
unit: "percent",
computation: {
type: "ratio",
numerator: "gross_profit",
denominator: "revenue",
},
supported_cadences: ["annual", "quarterly"],
requires_external_data: [],
},
]);
expect(record.normalization_summary).toEqual({
surfaceRowCount: 1,
detailRowCount: 1,
kpiRowCount: 1,
unmappedRowCount: 0,
materialUnmappedRowCount: 0,
warnings: ['legacy_warning']
warnings: ["legacy_warning"],
});
});
it('keeps mixed camelCase and snake_case payloads compatible', () => {
const normalized = __filingTaxonomyInternals.normalizeFilingTaxonomySnapshotPayload({
periods: [{
id: 'fy-2025',
filingId: 10,
accessionNumber: '0001',
filingDate: '2026-01-28',
periodStart: '2025-01-01',
periodEnd: '2025-12-31',
filingType: '10-K',
periodLabel: 'FY 2025'
}],
faithful_rows: {
income: [{
key: 'revenue',
label: 'Revenue',
conceptKey: 'us-gaap:Revenue',
qname: 'us-gaap:Revenue',
namespaceUri: 'http://fasb.org/us-gaap/2025',
localName: 'Revenue',
isExtension: false,
statement: 'income',
roleUri: 'income',
order: 10,
depth: 0,
parentKey: null,
values: { 'fy-2025': 10 },
units: { 'fy-2025': 'iso4217:USD' },
hasDimensions: false,
sourceFactIds: [1]
}],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
},
statement_rows: null,
surface_rows: {
income: [{
key: 'revenue',
label: 'Revenue',
category: 'revenue',
order: 10,
unit: 'currency',
values: { 'fy-2025': 10 },
source_concepts: ['us-gaap:Revenue'],
source_row_keys: ['revenue'],
source_fact_ids: [1],
formula_key: null,
has_dimensions: false,
resolved_source_row_keys: { 'fy-2025': 'revenue' }
}],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
},
detail_rows: {
income: {
revenue: [{
key: 'revenue_detail',
parentSurfaceKey: 'revenue',
label: 'Revenue Detail',
conceptKey: 'us-gaap:RevenueDetail',
qname: 'us-gaap:RevenueDetail',
namespaceUri: 'http://fasb.org/us-gaap/2025',
localName: 'RevenueDetail',
unit: 'iso4217:USD',
values: { 'fy-2025': 10 },
sourceFactIds: [2],
isExtension: false,
dimensionsSummary: [],
residualFlag: false
}]
it("keeps mixed camelCase and snake_case payloads compatible", () => {
const normalized =
__filingTaxonomyInternals.normalizeFilingTaxonomySnapshotPayload({
periods: [
{
id: "fy-2025",
filingId: 10,
accessionNumber: "0001",
filingDate: "2026-01-28",
periodStart: "2025-01-01",
periodEnd: "2025-12-31",
filingType: "10-K",
periodLabel: "FY 2025",
},
],
faithful_rows: {
income: [
{
key: "revenue",
label: "Revenue",
conceptKey: "us-gaap:Revenue",
qname: "us-gaap:Revenue",
namespaceUri: "http://fasb.org/us-gaap/2025",
localName: "Revenue",
isExtension: false,
statement: "income",
roleUri: "income",
order: 10,
depth: 0,
parentKey: null,
values: { "fy-2025": 10 },
units: { "fy-2025": "iso4217:USD" },
hasDimensions: false,
sourceFactIds: [1],
},
],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: [],
},
balance: {},
cash_flow: {},
equity: {},
comprehensive_income: {}
},
kpi_rows: [],
normalization_summary: {
surfaceRowCount: 1,
detail_row_count: 1,
kpiRowCount: 0,
unmapped_row_count: 0,
materialUnmappedRowCount: 0,
warnings: []
}
});
statement_rows: null,
surface_rows: {
income: [
{
key: "revenue",
label: "Revenue",
category: "revenue",
order: 10,
unit: "currency",
values: { "fy-2025": 10 },
source_concepts: ["us-gaap:Revenue"],
source_row_keys: ["revenue"],
source_fact_ids: [1],
formula_key: null,
has_dimensions: false,
resolved_source_row_keys: { "fy-2025": "revenue" },
},
],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: [],
},
detail_rows: {
income: {
revenue: [
{
key: "revenue_detail",
parentSurfaceKey: "revenue",
label: "Revenue Detail",
conceptKey: "us-gaap:RevenueDetail",
qname: "us-gaap:RevenueDetail",
namespaceUri: "http://fasb.org/us-gaap/2025",
localName: "RevenueDetail",
unit: "iso4217:USD",
values: { "fy-2025": 10 },
sourceFactIds: [2],
isExtension: false,
dimensionsSummary: [],
residualFlag: false,
},
],
},
balance: {},
cash_flow: {},
equity: {},
comprehensive_income: {},
},
kpi_rows: [],
computed_definitions: [
{
key: "revenue_yoy",
label: "Revenue YoY",
category: "growth",
order: 20,
unit: "percent",
computation: {
type: "yoy_growth",
source: "revenue",
},
supportedCadences: ["annual"],
requiresExternalData: [],
},
],
normalization_summary: {
surfaceRowCount: 1,
detail_row_count: 1,
kpiRowCount: 0,
unmapped_row_count: 0,
materialUnmappedRowCount: 0,
warnings: [],
},
});
expect(normalized.periods[0]?.filingId).toBe(10);
expect(normalized.surface_rows.income[0]?.sourceConcepts).toEqual(['us-gaap:Revenue']);
expect(normalized.detail_rows.income.revenue?.[0]?.parentSurfaceKey).toBe('revenue');
expect(normalized.surface_rows.income[0]?.sourceConcepts).toEqual([
"us-gaap:Revenue",
]);
expect(normalized.detail_rows.income.revenue?.[0]?.parentSurfaceKey).toBe(
"revenue",
);
expect(normalized.normalization_summary).toEqual({
surfaceRowCount: 1,
detailRowCount: 1,
kpiRowCount: 0,
unmappedRowCount: 0,
materialUnmappedRowCount: 0,
warnings: []
warnings: [],
});
expect(normalized.computed_definitions).toEqual([
{
key: "revenue_yoy",
label: "Revenue YoY",
category: "growth",
order: 20,
unit: "percent",
computation: {
type: "yoy_growth",
source: "revenue",
},
supported_cadences: ["annual"],
requires_external_data: [],
},
]);
});
});

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +1,5 @@
import {
beforeEach,
describe,
expect,
it,
mock
} from 'bun:test';
import type { Filing, Holding, Task } from '@/lib/types';
import { beforeEach, describe, expect, it, mock } from "bun:test";
import type { Filing, Holding, Task } from "@/lib/types";
const stageUpdates: Array<{
taskId: string;
@@ -14,290 +8,373 @@ const stageUpdates: Array<{
context: Record<string, unknown> | null;
}> = [];
const mockRunAiAnalysis = mock(async (_prompt: string, _instruction: string, options?: { workload?: string }) => {
if (options?.workload === 'extraction') {
return {
provider: 'zhipu',
model: 'glm-extract',
text: JSON.stringify({
summary: 'Revenue growth remained resilient despite FX pressure.',
keyPoints: ['Revenue up year-over-year'],
redFlags: ['Debt service burden is rising'],
followUpQuestions: ['Is margin guidance sustainable?'],
portfolioSignals: ['Monitor leverage trend'],
segmentSpecificData: ['Services segment outgrew hardware segment.'],
geographicRevenueBreakdown: ['EMEA revenue grew faster than Americas.'],
companySpecificData: ['Same-store sales increased 4.2%.'],
secApiCrossChecks: ['Revenue from SEC API aligns with filing narrative.'],
confidence: 0.72
})
};
}
const mockRunAiAnalysis = mock(
async (
_prompt: string,
_instruction: string,
options?: { workload?: string },
) => {
if (options?.workload === "extraction") {
return {
provider: "zhipu",
model: "glm-extract",
text: JSON.stringify({
summary: "Revenue growth remained resilient despite FX pressure.",
keyPoints: ["Revenue up year-over-year"],
redFlags: ["Debt service burden is rising"],
followUpQuestions: ["Is margin guidance sustainable?"],
portfolioSignals: ["Monitor leverage trend"],
segmentSpecificData: ["Services segment outgrew hardware segment."],
geographicRevenueBreakdown: [
"EMEA revenue grew faster than Americas.",
],
companySpecificData: ["Same-store sales increased 4.2%."],
secApiCrossChecks: [
"Revenue from SEC API aligns with filing narrative.",
],
confidence: 0.72,
}),
};
}
return {
provider: 'zhipu',
model: options?.workload === 'report' ? 'glm-report' : 'glm-generic',
text: 'Structured output'
};
});
return {
provider: "zhipu",
model: options?.workload === "report" ? "glm-report" : "glm-generic",
text: "Structured output",
};
},
);
const mockBuildPortfolioSummary = mock((_holdings: Holding[]) => ({
positions: 14,
total_value: '100000',
total_gain_loss: '1000',
total_cost_basis: '99000',
avg_return_pct: '0.01'
total_value: "100000",
total_gain_loss: "1000",
total_cost_basis: "99000",
avg_return_pct: "0.01",
}));
const mockGetQuote = mock(async (ticker: string) => {
return ticker === 'MSFT' ? 410 : 205;
return ticker === "MSFT" ? 410 : 205;
});
const mockIndexSearchDocuments = mock(async (input: {
onStage?: (stage: 'collect' | 'fetch' | 'chunk' | 'embed' | 'persist', detail: string, context?: Record<string, unknown> | null) => Promise<void> | void;
}) => {
await input.onStage?.('collect', 'Collected 12 source records for search indexing', {
counters: {
const mockIndexSearchDocuments = mock(
async (input: {
onStage?: (
stage: "collect" | "fetch" | "chunk" | "embed" | "persist",
detail: string,
context?: Record<string, unknown> | null,
) => Promise<void> | void;
}) => {
await input.onStage?.(
"collect",
"Collected 12 source records for search indexing",
{
counters: {
sourcesCollected: 12,
deleted: 3,
},
},
);
await input.onStage?.(
"fetch",
"Preparing filing_brief 0000320193-26-000001",
{
progress: {
current: 1,
total: 12,
unit: "sources",
},
subject: {
ticker: "AAPL",
accessionNumber: "0000320193-26-000001",
},
},
);
await input.onStage?.(
"embed",
"Embedding 248 chunks for 0000320193-26-000001",
{
progress: {
current: 1,
total: 12,
unit: "sources",
},
counters: {
chunksEmbedded: 248,
},
},
);
return {
sourcesCollected: 12,
deleted: 3
}
});
await input.onStage?.('fetch', 'Preparing filing_brief 0000320193-26-000001', {
progress: {
current: 1,
total: 12,
unit: 'sources'
},
subject: {
ticker: 'AAPL',
accessionNumber: '0000320193-26-000001'
}
});
await input.onStage?.('embed', 'Embedding 248 chunks for 0000320193-26-000001', {
progress: {
current: 1,
total: 12,
unit: 'sources'
},
counters: {
chunksEmbedded: 248
}
});
return {
sourcesCollected: 12,
indexed: 12,
skipped: 1,
deleted: 3,
chunksEmbedded: 248
};
});
indexed: 12,
skipped: 1,
deleted: 3,
chunksEmbedded: 248,
};
},
);
const sampleFiling = (): Filing => ({
id: 1,
ticker: 'AAPL',
filing_type: '10-Q',
filing_date: '2026-01-30',
accession_number: '0000320193-26-000001',
cik: '0000320193',
company_name: 'Apple Inc.',
filing_url: 'https://www.sec.gov/Archives/edgar/data/320193/000032019326000001/a10q.htm',
submission_url: 'https://data.sec.gov/submissions/CIK0000320193.json',
primary_document: 'a10q.htm',
ticker: "AAPL",
filing_type: "10-Q",
filing_date: "2026-01-30",
accession_number: "0000320193-26-000001",
cik: "0000320193",
company_name: "Apple Inc.",
filing_url:
"https://www.sec.gov/Archives/edgar/data/320193/000032019326000001/a10q.htm",
submission_url: "https://data.sec.gov/submissions/CIK0000320193.json",
primary_document: "a10q.htm",
metrics: {
revenue: 120_000_000_000,
netIncome: 25_000_000_000,
totalAssets: 410_000_000_000,
cash: 70_000_000_000,
debt: 98_000_000_000
debt: 98_000_000_000,
},
analysis: null,
created_at: '2026-01-30T00:00:00.000Z',
updated_at: '2026-01-30T00:00:00.000Z'
created_at: "2026-01-30T00:00:00.000Z",
updated_at: "2026-01-30T00:00:00.000Z",
});
const mockGetFilingByAccession = mock(async () => sampleFiling());
const mockListFilingsRecords = mock(async () => [sampleFiling(), {
...sampleFiling(),
id: 2,
accession_number: '0000320193-26-000002',
filing_date: '2026-02-28'
}]);
const mockListFilingsRecords = mock(async () => [
sampleFiling(),
{
...sampleFiling(),
id: 2,
accession_number: "0000320193-26-000002",
filing_date: "2026-02-28",
},
]);
const mockSaveFilingAnalysis = mock(async () => {});
const mockUpdateFilingMetricsById = mock(async () => {});
const mockUpsertFilingsRecords = mock(async () => ({
inserted: 2,
updated: 0
updated: 0,
}));
const mockDeleteCompanyFinancialBundlesForTicker = mock(async () => {});
const mockGetFilingTaxonomySnapshotByFilingId = mock(async () => null);
const mockUpsertFilingTaxonomySnapshot = mock(async () => {});
const mockValidateMetricsWithPdfLlm = mock(async () => ({
validation_result: {
status: "matched" as const,
checks: [],
validatedAt: "2026-03-09T00:00:00.000Z",
},
metric_validations: [],
}));
const mockApplyRefreshedPrices = mock(async () => 24);
const mockListHoldingsForPriceRefresh = mock(async () => [
{
id: 1,
user_id: 'user-1',
ticker: 'AAPL',
company_name: 'Apple Inc.',
shares: '10',
avg_cost: '150',
current_price: '200',
market_value: '2000',
gain_loss: '500',
gain_loss_pct: '0.33',
user_id: "user-1",
ticker: "AAPL",
company_name: "Apple Inc.",
shares: "10",
avg_cost: "150",
current_price: "200",
market_value: "2000",
gain_loss: "500",
gain_loss_pct: "0.33",
last_price_at: null,
created_at: '2026-03-09T00:00:00.000Z',
updated_at: '2026-03-09T00:00:00.000Z'
created_at: "2026-03-09T00:00:00.000Z",
updated_at: "2026-03-09T00:00:00.000Z",
},
{
id: 2,
user_id: 'user-1',
ticker: 'MSFT',
company_name: 'Microsoft Corporation',
shares: '4',
avg_cost: '300',
current_price: '400',
market_value: '1600',
gain_loss: '400',
gain_loss_pct: '0.25',
user_id: "user-1",
ticker: "MSFT",
company_name: "Microsoft Corporation",
shares: "4",
avg_cost: "300",
current_price: "400",
market_value: "1600",
gain_loss: "400",
gain_loss_pct: "0.25",
last_price_at: null,
created_at: '2026-03-09T00:00:00.000Z',
updated_at: '2026-03-09T00:00:00.000Z'
}
created_at: "2026-03-09T00:00:00.000Z",
updated_at: "2026-03-09T00:00:00.000Z",
},
]);
const mockListUserHoldings = mock(async () => await mockListHoldingsForPriceRefresh());
const mockListUserHoldings = mock(
async () => await mockListHoldingsForPriceRefresh(),
);
const mockCreatePortfolioInsight = mock(async () => {});
const mockUpdateTaskStage = mock(async (taskId: string, stage: string, detail: string | null, context?: Record<string, unknown> | null) => {
stageUpdates.push({
taskId,
stage,
detail,
context: context ?? null
});
});
const mockUpdateTaskStage = mock(
async (
taskId: string,
stage: string,
detail: string | null,
context?: Record<string, unknown> | null,
) => {
stageUpdates.push({
taskId,
stage,
detail,
context: context ?? null,
});
},
);
const mockFetchPrimaryFilingText = mock(async () => ({
text: 'Revenue accelerated in services and margins improved.',
source: 'primary_document' as const
text: "Revenue accelerated in services and margins improved.",
source: "primary_document" as const,
}));
const mockFetchRecentFilings = mock(async () => ([
const mockFetchRecentFilings = mock(async () => [
{
ticker: 'AAPL',
filingType: '10-Q',
filingDate: '2026-01-30',
accessionNumber: '0000320193-26-000001',
cik: '0000320193',
companyName: 'Apple Inc.',
filingUrl: 'https://www.sec.gov/Archives/edgar/data/320193/000032019326000001/a10q.htm',
submissionUrl: 'https://data.sec.gov/submissions/CIK0000320193.json',
primaryDocument: 'a10q.htm'
ticker: "AAPL",
filingType: "10-Q",
filingDate: "2026-01-30",
accessionNumber: "0000320193-26-000001",
cik: "0000320193",
companyName: "Apple Inc.",
filingUrl:
"https://www.sec.gov/Archives/edgar/data/320193/000032019326000001/a10q.htm",
submissionUrl: "https://data.sec.gov/submissions/CIK0000320193.json",
primaryDocument: "a10q.htm",
},
{
ticker: 'AAPL',
filingType: '10-K',
filingDate: '2025-10-30',
accessionNumber: '0000320193-25-000001',
cik: '0000320193',
companyName: 'Apple Inc.',
filingUrl: 'https://www.sec.gov/Archives/edgar/data/320193/000032019325000001/a10k.htm',
submissionUrl: 'https://data.sec.gov/submissions/CIK0000320193.json',
primaryDocument: 'a10k.htm'
}
]));
ticker: "AAPL",
filingType: "10-K",
filingDate: "2025-10-30",
accessionNumber: "0000320193-25-000001",
cik: "0000320193",
companyName: "Apple Inc.",
filingUrl:
"https://www.sec.gov/Archives/edgar/data/320193/000032019325000001/a10k.htm",
submissionUrl: "https://data.sec.gov/submissions/CIK0000320193.json",
primaryDocument: "a10k.htm",
},
]);
const mockEnqueueTask = mock(async () => ({
id: 'search-task-1'
}));
const mockHydrateFilingTaxonomySnapshot = mock(async (input: { filingId: number }) => ({
filing_id: input.filingId,
ticker: 'AAPL',
filing_date: '2026-01-30',
filing_type: '10-Q',
parse_status: 'ready',
parse_error: null,
source: 'xbrl_instance',
periods: [],
statement_rows: {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: []
},
derived_metrics: {
revenue: 120_000_000_000
},
validation_result: {
status: 'matched',
checks: [],
validatedAt: '2026-03-09T00:00:00.000Z'
},
facts_count: 1,
concepts_count: 1,
dimensions_count: 0,
assets: [],
concepts: [],
facts: [],
metric_validations: []
id: "search-task-1",
}));
const mockHydrateFilingTaxonomySnapshot = mock(
async (input: { filingId: number }) => ({
filing_id: input.filingId,
ticker: "AAPL",
filing_date: "2026-01-30",
filing_type: "10-Q",
parse_status: "ready",
parse_error: null,
source: "xbrl_instance",
periods: [],
statement_rows: {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: [],
},
faithful_rows: {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: [],
},
surface_rows: {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: [],
},
detail_rows: {
income: {},
balance: {},
cash_flow: {},
equity: {},
comprehensive_income: {},
},
kpi_rows: [],
computed_definitions: [],
derived_metrics: {
revenue: 120_000_000_000,
},
validation_result: {
status: "matched",
checks: [],
validatedAt: "2026-03-09T00:00:00.000Z",
},
facts_count: 1,
concepts_count: 1,
dimensions_count: 0,
assets: [],
concepts: [],
facts: [],
metric_validations: [],
xbrl_validation: {
status: "passed",
},
}),
);
mock.module('@/lib/server/ai', () => ({
runAiAnalysis: mockRunAiAnalysis
mock.module("@/lib/server/ai", () => ({
runAiAnalysis: mockRunAiAnalysis,
}));
mock.module('@/lib/server/portfolio', () => ({
buildPortfolioSummary: mockBuildPortfolioSummary
mock.module("@/lib/server/portfolio", () => ({
buildPortfolioSummary: mockBuildPortfolioSummary,
}));
mock.module('@/lib/server/prices', () => ({
getQuote: mockGetQuote
mock.module("@/lib/server/prices", () => ({
getQuote: mockGetQuote,
}));
mock.module('@/lib/server/search', () => ({
indexSearchDocuments: mockIndexSearchDocuments
mock.module("@/lib/server/search", () => ({
indexSearchDocuments: mockIndexSearchDocuments,
}));
mock.module('@/lib/server/repos/filings', () => ({
mock.module("@/lib/server/repos/filings", () => ({
getFilingByAccession: mockGetFilingByAccession,
listFilingsRecords: mockListFilingsRecords,
saveFilingAnalysis: mockSaveFilingAnalysis,
updateFilingMetricsById: mockUpdateFilingMetricsById,
upsertFilingsRecords: mockUpsertFilingsRecords
upsertFilingsRecords: mockUpsertFilingsRecords,
}));
mock.module('@/lib/server/repos/company-financial-bundles', () => ({
deleteCompanyFinancialBundlesForTicker: mockDeleteCompanyFinancialBundlesForTicker
mock.module("@/lib/server/repos/company-financial-bundles", () => ({
deleteCompanyFinancialBundlesForTicker:
mockDeleteCompanyFinancialBundlesForTicker,
}));
mock.module('@/lib/server/repos/filing-taxonomy', () => ({
mock.module("@/lib/server/repos/filing-taxonomy", () => ({
getFilingTaxonomySnapshotByFilingId: mockGetFilingTaxonomySnapshotByFilingId,
upsertFilingTaxonomySnapshot: mockUpsertFilingTaxonomySnapshot
upsertFilingTaxonomySnapshot: mockUpsertFilingTaxonomySnapshot,
}));
mock.module('@/lib/server/repos/holdings', () => ({
mock.module("@/lib/server/repos/holdings", () => ({
applyRefreshedPrices: mockApplyRefreshedPrices,
listHoldingsForPriceRefresh: mockListHoldingsForPriceRefresh,
listUserHoldings: mockListUserHoldings
listUserHoldings: mockListUserHoldings,
}));
mock.module('@/lib/server/repos/insights', () => ({
createPortfolioInsight: mockCreatePortfolioInsight
mock.module("@/lib/server/repos/insights", () => ({
createPortfolioInsight: mockCreatePortfolioInsight,
}));
mock.module('@/lib/server/repos/tasks', () => ({
updateTaskStage: mockUpdateTaskStage
mock.module("@/lib/server/repos/tasks", () => ({
updateTaskStage: mockUpdateTaskStage,
}));
mock.module('@/lib/server/sec', () => ({
mock.module("@/lib/server/sec", () => ({
fetchPrimaryFilingText: mockFetchPrimaryFilingText,
fetchRecentFilings: mockFetchRecentFilings
fetchRecentFilings: mockFetchRecentFilings,
}));
mock.module('@/lib/server/tasks', () => ({
enqueueTask: mockEnqueueTask
mock.module("@/lib/server/tasks", () => ({
enqueueTask: mockEnqueueTask,
}));
mock.module('@/lib/server/taxonomy/engine', () => ({
hydrateFilingTaxonomySnapshot: mockHydrateFilingTaxonomySnapshot
mock.module("@/lib/server/taxonomy/engine", () => ({
hydrateFilingTaxonomySnapshot: mockHydrateFilingTaxonomySnapshot,
}));
mock.module("@/lib/server/taxonomy/pdf-validation", () => ({
validateMetricsWithPdfLlm: mockValidateMetricsWithPdfLlm,
}));
const { runTaskProcessor } = await import('./task-processors');
const { runTaskProcessor } = await import("./task-processors");
function taskFactory(overrides: Partial<Task> = {}): Task {
return {
id: 'task-1',
user_id: 'user-1',
task_type: 'sync_filings',
status: 'running',
stage: 'running',
stage_detail: 'Running',
id: "task-1",
user_id: "user-1",
task_type: "sync_filings",
status: "running",
stage: "running",
stage_detail: "Running",
stage_context: null,
resource_key: null,
notification_read_at: null,
@@ -308,24 +385,24 @@ function taskFactory(overrides: Partial<Task> = {}): Task {
error: null,
attempts: 1,
max_attempts: 3,
workflow_run_id: 'run-1',
created_at: '2026-03-09T00:00:00.000Z',
updated_at: '2026-03-09T00:00:00.000Z',
workflow_run_id: "run-1",
created_at: "2026-03-09T00:00:00.000Z",
updated_at: "2026-03-09T00:00:00.000Z",
finished_at: null,
notification: {
title: 'Task',
statusLine: 'Running',
title: "Task",
statusLine: "Running",
detailLine: null,
tone: 'info',
tone: "info",
progress: null,
stats: [],
actions: []
actions: [],
},
...overrides
...overrides,
};
}
describe('task processor outcomes', () => {
describe("task processor outcomes", () => {
beforeEach(() => {
stageUpdates.length = 0;
mockRunAiAnalysis.mockClear();
@@ -335,78 +412,108 @@ describe('task processor outcomes', () => {
mockCreatePortfolioInsight.mockClear();
mockUpdateTaskStage.mockClear();
mockEnqueueTask.mockClear();
mockValidateMetricsWithPdfLlm.mockClear();
});
it('returns sync filing completion detail and progress context', async () => {
const outcome = await runTaskProcessor(taskFactory({
task_type: 'sync_filings',
payload: {
ticker: 'AAPL',
limit: 2
}
}));
it("returns sync filing completion detail and progress context", async () => {
const outcome = await runTaskProcessor(
taskFactory({
task_type: "sync_filings",
payload: {
ticker: "AAPL",
limit: 2,
},
}),
);
expect(outcome.completionDetail).toContain('Synced 2 filings for AAPL');
expect(outcome.completionDetail).toContain("Synced 2 filings for AAPL");
expect(outcome.result.fetched).toBe(2);
expect(outcome.result.searchTaskId).toBe('search-task-1');
expect(outcome.result.searchTaskId).toBe("search-task-1");
expect(outcome.completionContext?.counters?.hydrated).toBe(2);
expect(stageUpdates.some((entry) => entry.stage === 'sync.extract_taxonomy' && entry.context?.subject)).toBe(true);
expect(
stageUpdates.some(
(entry) =>
entry.stage === "sync.extract_taxonomy" && entry.context?.subject,
),
).toBe(true);
expect(mockValidateMetricsWithPdfLlm).toHaveBeenCalled();
expect(mockUpsertFilingTaxonomySnapshot).toHaveBeenCalled();
});
it('returns refresh price completion detail with live quote progress', async () => {
const outcome = await runTaskProcessor(taskFactory({
task_type: 'refresh_prices'
}));
it("returns refresh price completion detail with live quote progress", async () => {
const outcome = await runTaskProcessor(
taskFactory({
task_type: "refresh_prices",
}),
);
expect(outcome.completionDetail).toBe('Refreshed prices for 2 tickers across 2 holdings.');
expect(outcome.completionDetail).toBe(
"Refreshed prices for 2/2 tickers across 2 holdings.",
);
expect(outcome.result.updatedCount).toBe(24);
expect(stageUpdates.filter((entry) => entry.stage === 'refresh.fetch_quotes')).toHaveLength(3);
expect(
stageUpdates.filter((entry) => entry.stage === "refresh.fetch_quotes"),
).toHaveLength(3);
expect(stageUpdates.at(-1)?.context?.counters).toBeDefined();
});
it('returns analyze filing completion detail with report metadata', async () => {
const outcome = await runTaskProcessor(taskFactory({
task_type: 'analyze_filing',
payload: {
accessionNumber: '0000320193-26-000001'
}
}));
it("returns analyze filing completion detail with report metadata", async () => {
const outcome = await runTaskProcessor(
taskFactory({
task_type: "analyze_filing",
payload: {
accessionNumber: "0000320193-26-000001",
},
}),
);
expect(outcome.completionDetail).toBe('Analysis report generated for AAPL 10-Q 0000320193-26-000001.');
expect(outcome.result.ticker).toBe('AAPL');
expect(outcome.result.filingType).toBe('10-Q');
expect(outcome.result.model).toBe('glm-report');
expect(outcome.completionDetail).toBe(
"Analysis report generated for AAPL 10-Q 0000320193-26-000001.",
);
expect(outcome.result.ticker).toBe("AAPL");
expect(outcome.result.filingType).toBe("10-Q");
expect(outcome.result.model).toBe("glm-report");
expect(mockSaveFilingAnalysis).toHaveBeenCalled();
});
it('returns index search completion detail and counters', async () => {
const outcome = await runTaskProcessor(taskFactory({
task_type: 'index_search',
payload: {
ticker: 'AAPL',
sourceKinds: ['filing_brief']
}
}));
it("returns index search completion detail and counters", async () => {
const outcome = await runTaskProcessor(
taskFactory({
task_type: "index_search",
payload: {
ticker: "AAPL",
sourceKinds: ["filing_brief"],
},
}),
);
expect(outcome.completionDetail).toBe('Indexed 12 sources, embedded 248 chunks, skipped 1, deleted 3 stale documents.');
expect(outcome.completionDetail).toBe(
"Indexed 12 sources, embedded 248 chunks, skipped 1, deleted 3 stale documents.",
);
expect(outcome.result.indexed).toBe(12);
expect(outcome.completionContext?.counters?.chunksEmbedded).toBe(248);
expect(stageUpdates.some((entry) => entry.stage === 'search.embed')).toBe(true);
expect(stageUpdates.some((entry) => entry.stage === "search.embed")).toBe(
true,
);
});
it('returns portfolio insight completion detail and summary payload', async () => {
const outcome = await runTaskProcessor(taskFactory({
task_type: 'portfolio_insights'
}));
it("returns portfolio insight completion detail and summary payload", async () => {
const outcome = await runTaskProcessor(
taskFactory({
task_type: "portfolio_insights",
}),
);
expect(outcome.completionDetail).toBe('Generated portfolio insight for 14 holdings.');
expect(outcome.result.provider).toBe('zhipu');
expect(outcome.completionDetail).toBe(
"Generated portfolio insight for 14 holdings.",
);
expect(outcome.result.provider).toBe("zhipu");
expect(outcome.result.summary).toEqual({
positions: 14,
total_value: '100000',
total_gain_loss: '1000',
total_cost_basis: '99000',
avg_return_pct: '0.01'
total_value: "100000",
total_gain_loss: "1000",
total_cost_basis: "99000",
avg_return_pct: "0.01",
});
expect(mockCreatePortfolioInsight).toHaveBeenCalled();
});

File diff suppressed because it is too large Load Diff

View File

@@ -1,37 +1,43 @@
import { beforeEach, describe, expect, it, mock } from 'bun:test';
import { beforeEach, describe, expect, it, mock } from "bun:test";
import type { FinancialStatementKind } from '@/lib/types';
import type { TaxonomyHydrationInput, TaxonomyHydrationResult } from '@/lib/server/taxonomy/types';
import type { FinancialStatementKind } from "@/lib/types";
import type {
TaxonomyHydrationInput,
TaxonomyHydrationResult,
} from "@/lib/server/taxonomy/types";
function createStatementRecord<T>(factory: () => T): Record<FinancialStatementKind, T> {
function createStatementRecord<T>(
factory: () => T,
): Record<FinancialStatementKind, T> {
return {
income: factory(),
balance: factory(),
cash_flow: factory(),
equity: factory(),
comprehensive_income: factory()
comprehensive_income: factory(),
};
}
function createHydrationResult(): TaxonomyHydrationResult {
return {
filing_id: 1,
ticker: 'TEST',
filing_date: '2025-12-31',
filing_type: '10-K',
parse_status: 'ready',
ticker: "TEST",
filing_date: "2025-12-31",
filing_type: "10-K",
parse_status: "ready",
parse_error: null,
source: 'xbrl_instance_with_linkbase',
parser_engine: 'fiscal-xbrl',
parser_version: '0.1.0',
taxonomy_regime: 'us-gaap',
fiscal_pack: 'core',
source: "xbrl_instance_with_linkbase",
parser_engine: "fiscal-xbrl",
parser_version: "0.1.0",
taxonomy_regime: "us-gaap",
fiscal_pack: "core",
periods: [],
faithful_rows: createStatementRecord(() => []),
statement_rows: createStatementRecord(() => []),
surface_rows: createStatementRecord(() => []),
detail_rows: createStatementRecord(() => ({})),
kpi_rows: [],
computed_definitions: [],
contexts: [],
derived_metrics: null,
validation_result: null,
@@ -48,42 +54,44 @@ function createHydrationResult(): TaxonomyHydrationResult {
kpi_row_count: 0,
unmapped_row_count: 0,
material_unmapped_row_count: 0,
warnings: ['rust_warning']
warnings: ["rust_warning"],
},
xbrl_validation: {
status: 'passed'
}
status: "passed",
},
};
}
const mockHydrateFromSidecar = mock(async () => createHydrationResult());
mock.module('@/lib/server/taxonomy/parser-client', () => ({
hydrateFilingTaxonomySnapshotFromSidecar: mockHydrateFromSidecar
mock.module("@/lib/server/taxonomy/parser-client", () => ({
hydrateFilingTaxonomySnapshotFromSidecar: mockHydrateFromSidecar,
}));
describe('taxonomy engine rust path', () => {
describe("taxonomy engine rust path", () => {
beforeEach(() => {
mockHydrateFromSidecar.mockClear();
});
it('returns sidecar output directly from the Rust sidecar', async () => {
const { hydrateFilingTaxonomySnapshot } = await import('@/lib/server/taxonomy/engine');
it("returns sidecar output directly from the Rust sidecar", async () => {
const { hydrateFilingTaxonomySnapshot } =
await import("@/lib/server/taxonomy/engine");
const input: TaxonomyHydrationInput = {
filingId: 1,
ticker: 'TEST',
cik: '0000000001',
accessionNumber: '0000000001-25-000001',
filingDate: '2025-12-31',
filingType: '10-K',
filingUrl: 'https://www.sec.gov/Archives/edgar/data/1/000000000125000001/',
primaryDocument: 'test-20251231.htm'
ticker: "TEST",
cik: "0000000001",
accessionNumber: "0000000001-25-000001",
filingDate: "2025-12-31",
filingType: "10-K",
filingUrl:
"https://www.sec.gov/Archives/edgar/data/1/000000000125000001/",
primaryDocument: "test-20251231.htm",
};
const result = await hydrateFilingTaxonomySnapshot(input);
expect(mockHydrateFromSidecar).toHaveBeenCalledTimes(1);
expect(result.parser_engine).toBe('fiscal-xbrl');
expect(result.normalization_summary.warnings).toEqual(['rust_warning']);
expect(result.parser_engine).toBe("fiscal-xbrl");
expect(result.normalization_summary.warnings).toEqual(["rust_warning"]);
});
});

View File

@@ -0,0 +1,286 @@
import { beforeEach, describe, expect, it, mock } from "bun:test";
import type {
TaxonomyHydrationInput,
TaxonomyHydrationResult,
} from "@/lib/server/taxonomy/types";
import { __parserClientInternals } from "@/lib/server/taxonomy/parser-client";
function streamFromText(text: string) {
const encoded = new TextEncoder().encode(text);
return new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(encoded);
controller.close();
},
});
}
function sampleHydrationResult(): TaxonomyHydrationResult {
return {
filing_id: 1,
ticker: "AAPL",
filing_date: "2026-01-30",
filing_type: "10-Q",
parse_status: "ready",
parse_error: null,
source: "xbrl_instance",
parser_engine: "fiscal-xbrl",
parser_version: "0.1.0",
taxonomy_regime: "us-gaap",
fiscal_pack: "core",
periods: [],
faithful_rows: {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: [],
},
statement_rows: {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: [],
},
surface_rows: {
income: [],
balance: [],
cash_flow: [],
equity: [],
comprehensive_income: [],
},
detail_rows: {
income: {},
balance: {},
cash_flow: {},
equity: {},
comprehensive_income: {},
},
kpi_rows: [],
computed_definitions: [],
contexts: [],
derived_metrics: null,
validation_result: null,
facts_count: 0,
concepts_count: 0,
dimensions_count: 0,
assets: [],
concepts: [],
facts: [],
metric_validations: [],
normalization_summary: {
surface_row_count: 0,
detail_row_count: 0,
kpi_row_count: 0,
unmapped_row_count: 0,
material_unmapped_row_count: 0,
warnings: [],
},
xbrl_validation: {
status: "passed",
},
};
}
function sampleInput(): TaxonomyHydrationInput {
return {
filingId: 1,
ticker: "AAPL",
cik: "0000320193",
accessionNumber: "0000320193-26-000001",
filingDate: "2026-01-30",
filingType: "10-Q",
filingUrl:
"https://www.sec.gov/Archives/edgar/data/320193/000032019326000001/",
primaryDocument: "a10q.htm",
};
}
const passThroughTimeout = ((handler: TimerHandler, timeout?: number) =>
globalThis.setTimeout(
handler,
timeout,
)) as unknown as typeof globalThis.setTimeout;
const immediateTimeout = ((handler: TimerHandler) => {
if (typeof handler === "function") {
handler();
}
return 1 as unknown as ReturnType<typeof globalThis.setTimeout>;
}) as unknown as typeof globalThis.setTimeout;
describe("parser client", () => {
beforeEach(() => {
delete process.env.FISCAL_XBRL_BIN;
delete process.env.XBRL_ENGINE_TIMEOUT_MS;
});
it("throws when the sidecar binary cannot be resolved", () => {
expect(() =>
__parserClientInternals.resolveFiscalXbrlBinary({
existsSync: () => false,
}),
).toThrow(/Rust XBRL sidecar binary is required/);
});
it("returns parsed sidecar JSON on success", async () => {
const stdinWrite = mock(() => {});
const stdinEnd = mock(() => {});
const result = await __parserClientInternals.hydrateFromSidecarImpl(
sampleInput(),
{
existsSync: () => true,
spawn: mock(() => ({
stdin: {
write: stdinWrite,
end: stdinEnd,
},
stdout: streamFromText(JSON.stringify(sampleHydrationResult())),
stderr: streamFromText(""),
exited: Promise.resolve(0),
kill: mock(() => {}),
})) as never,
setTimeout: passThroughTimeout,
clearTimeout,
},
);
expect(result.parser_engine).toBe("fiscal-xbrl");
expect(stdinWrite).toHaveBeenCalledTimes(1);
expect(stdinEnd).toHaveBeenCalledTimes(1);
});
it("throws when the sidecar exits non-zero", async () => {
await expect(
__parserClientInternals.hydrateFromSidecarImpl(sampleInput(), {
existsSync: () => true,
spawn: mock(() => ({
stdin: {
write: () => {},
end: () => {},
},
stdout: streamFromText(""),
stderr: streamFromText("fatal parse error"),
exited: Promise.resolve(3),
kill: mock(() => {}),
})) as never,
setTimeout: passThroughTimeout,
clearTimeout,
}),
).rejects.toThrow(/exit code 3/);
});
it("throws on invalid JSON stdout", async () => {
await expect(
__parserClientInternals.hydrateFromSidecarImpl(sampleInput(), {
existsSync: () => true,
spawn: mock(() => ({
stdin: {
write: () => {},
end: () => {},
},
stdout: streamFromText("{not json"),
stderr: streamFromText(""),
exited: Promise.resolve(0),
kill: mock(() => {}),
})) as never,
setTimeout: passThroughTimeout,
clearTimeout,
}),
).rejects.toThrow();
});
it("kills the sidecar when the timeout fires", async () => {
const kill = mock(() => {});
await expect(
__parserClientInternals.hydrateFromSidecarImpl(sampleInput(), {
existsSync: () => true,
spawn: mock(() => ({
stdin: {
write: () => {},
end: () => {},
},
stdout: streamFromText(""),
stderr: streamFromText("killed"),
exited: Promise.resolve(137),
kill,
})) as never,
setTimeout: immediateTimeout,
clearTimeout: () => {},
}),
).rejects.toThrow(/exit code 137/);
expect(kill).toHaveBeenCalledTimes(1);
});
it("retries retryable sidecar failures but not invalid requests", async () => {
let attempts = 0;
const spawn = mock(() => {
attempts += 1;
const exitCode = attempts < 3 ? 1 : 0;
const stdout =
exitCode === 0 ? JSON.stringify(sampleHydrationResult()) : "";
const stderr = exitCode === 0 ? "" : "process killed";
return {
stdin: {
write: () => {},
end: () => {},
},
stdout: streamFromText(stdout),
stderr: streamFromText(stderr),
exited: Promise.resolve(exitCode),
kill: mock(() => {}),
};
});
const result =
await __parserClientInternals.hydrateFilingTaxonomySnapshotFromSidecarWithDeps(
sampleInput(),
{
existsSync: () => true,
spawn: spawn as never,
setTimeout: passThroughTimeout,
clearTimeout,
},
);
expect(result.parser_version).toBe("0.1.0");
expect(attempts).toBe(3);
attempts = 0;
const invalidRequestSpawn = mock(() => {
attempts += 1;
return {
stdin: {
write: () => {},
end: () => {},
},
stdout: streamFromText(""),
stderr: streamFromText("invalid request: bad command"),
exited: Promise.resolve(6),
kill: mock(() => {}),
};
});
await expect(
__parserClientInternals.hydrateFilingTaxonomySnapshotFromSidecarWithDeps(
sampleInput(),
{
existsSync: () => true,
spawn: invalidRequestSpawn as never,
setTimeout: passThroughTimeout,
clearTimeout,
},
),
).rejects.toThrow(/invalid request/);
expect(attempts).toBe(1);
});
});

View File

@@ -1,36 +1,89 @@
import { existsSync } from 'node:fs';
import { join } from 'node:path';
import type { TaxonomyHydrationInput, TaxonomyHydrationResult } from '@/lib/server/taxonomy/types';
import { withRetry } from '@/lib/server/utils/retry';
import { existsSync } from "node:fs";
import { join } from "node:path";
import type {
TaxonomyHydrationInput,
TaxonomyHydrationResult,
} from "@/lib/server/taxonomy/types";
import { withRetry } from "@/lib/server/utils/retry";
type SpawnedSidecar = {
stdin: { write: (chunk: Uint8Array) => void; end: () => void };
stdout: ReadableStream<Uint8Array>;
stderr: ReadableStream<Uint8Array>;
exited: Promise<number>;
kill: () => void;
};
type SidecarDeps = {
existsSync: typeof existsSync;
spawn: typeof Bun.spawn;
setTimeout: typeof globalThis.setTimeout;
clearTimeout: typeof globalThis.clearTimeout;
};
function candidateBinaryPaths() {
return [
process.env.FISCAL_XBRL_BIN?.trim(),
join(process.cwd(), 'bin', 'fiscal-xbrl'),
join(process.cwd(), 'rust', 'target', 'release', 'fiscal-xbrl'),
join(process.cwd(), 'rust', 'target', 'debug', 'fiscal-xbrl')
].filter((value): value is string => typeof value === 'string' && value.length > 0);
join(process.cwd(), "bin", "fiscal-xbrl"),
join(process.cwd(), "rust", "target", "release", "fiscal-xbrl"),
join(process.cwd(), "rust", "target", "debug", "fiscal-xbrl"),
].filter(
(value): value is string => typeof value === "string" && value.length > 0,
);
}
export function resolveFiscalXbrlBinary() {
const resolved = candidateBinaryPaths().find((path) => existsSync(path));
return resolveFiscalXbrlBinaryWithDeps({
existsSync,
});
}
function resolveFiscalXbrlBinaryWithDeps(
deps: Pick<SidecarDeps, "existsSync">,
) {
const resolved = candidateBinaryPaths().find((path) => deps.existsSync(path));
if (!resolved) {
throw new Error('Rust XBRL sidecar binary is required but was not found. Set FISCAL_XBRL_BIN or build `fiscal-xbrl` under rust/target.');
throw new Error(
"Rust XBRL sidecar binary is required but was not found. Set FISCAL_XBRL_BIN or build `fiscal-xbrl` under rust/target.",
);
}
return resolved;
}
export async function hydrateFilingTaxonomySnapshotFromSidecar(
input: TaxonomyHydrationInput
input: TaxonomyHydrationInput,
): Promise<TaxonomyHydrationResult> {
return withRetry(() => hydrateFromSidecarImpl(input));
return hydrateFilingTaxonomySnapshotFromSidecarWithDeps(input, {
existsSync,
spawn: Bun.spawn,
setTimeout: globalThis.setTimeout,
clearTimeout: globalThis.clearTimeout,
});
}
async function hydrateFromSidecarImpl(input: TaxonomyHydrationInput): Promise<TaxonomyHydrationResult> {
const binary = resolveFiscalXbrlBinary();
const timeoutMs = Math.max(Number(process.env.XBRL_ENGINE_TIMEOUT_MS ?? 45_000), 1_000);
const command = [binary, 'hydrate-filing'];
async function hydrateFilingTaxonomySnapshotFromSidecarWithDeps(
input: TaxonomyHydrationInput,
deps: SidecarDeps,
): Promise<TaxonomyHydrationResult> {
return withRetry(() => hydrateFromSidecarImpl(input, deps));
}
async function hydrateFromSidecarImpl(
input: TaxonomyHydrationInput,
deps: SidecarDeps = {
existsSync,
spawn: Bun.spawn,
setTimeout: globalThis.setTimeout,
clearTimeout: globalThis.clearTimeout,
},
): Promise<TaxonomyHydrationResult> {
const binary = resolveFiscalXbrlBinaryWithDeps(deps);
const timeoutMs = Math.max(
Number(process.env.XBRL_ENGINE_TIMEOUT_MS ?? 45_000),
1_000,
);
const command = [binary, "hydrate-filing"];
const requestBody = JSON.stringify({
filingId: input.filingId,
ticker: input.ticker,
@@ -40,22 +93,24 @@ async function hydrateFromSidecarImpl(input: TaxonomyHydrationInput): Promise<Ta
filingType: input.filingType,
filingUrl: input.filingUrl,
primaryDocument: input.primaryDocument,
cacheDir: process.env.FISCAL_XBRL_CACHE_DIR ?? join(process.cwd(), '.cache', 'xbrl')
cacheDir:
process.env.FISCAL_XBRL_CACHE_DIR ??
join(process.cwd(), ".cache", "xbrl"),
});
const child = Bun.spawn(command, {
stdin: 'pipe',
stdout: 'pipe',
stderr: 'pipe',
const child = deps.spawn(command, {
stdin: "pipe",
stdout: "pipe",
stderr: "pipe",
env: {
...process.env
}
});
...process.env,
},
}) as SpawnedSidecar;
child.stdin.write(new TextEncoder().encode(requestBody));
child.stdin.end();
const timeout = setTimeout(() => {
const timeout = deps.setTimeout(() => {
child.kill();
}, timeoutMs);
@@ -63,7 +118,7 @@ async function hydrateFromSidecarImpl(input: TaxonomyHydrationInput): Promise<Ta
const [stdout, stderr, exitCode] = await Promise.all([
new Response(child.stdout).text(),
new Response(child.stderr).text(),
child.exited
child.exited,
]);
if (stderr.trim().length > 0) {
@@ -71,11 +126,20 @@ async function hydrateFromSidecarImpl(input: TaxonomyHydrationInput): Promise<Ta
}
if (exitCode !== 0) {
throw new Error(`Rust XBRL sidecar failed with exit code ${exitCode}: ${stderr.trim() || stdout.trim() || 'no error output'}`);
throw new Error(
`Rust XBRL sidecar failed with exit code ${exitCode}: ${stderr.trim() || stdout.trim() || "no error output"}`,
);
}
return JSON.parse(stdout) as TaxonomyHydrationResult;
} finally {
clearTimeout(timeout);
deps.clearTimeout(timeout);
}
}
export const __parserClientInternals = {
candidateBinaryPaths,
hydrateFilingTaxonomySnapshotFromSidecarWithDeps,
hydrateFromSidecarImpl,
resolveFiscalXbrlBinary: resolveFiscalXbrlBinaryWithDeps,
};

View File

@@ -1,14 +1,15 @@
import type {
Filing,
FinancialStatementKind,
MetricValidationResult
} from '@/lib/types';
MetricValidationResult,
} from "@/lib/types";
import type { ComputedDefinition } from "@/lib/generated";
import type {
FilingTaxonomyAssetType,
FilingTaxonomyParseStatus,
FilingTaxonomyPeriod,
FilingTaxonomySource
} from '@/lib/server/repos/filing-taxonomy';
FilingTaxonomySource,
} from "@/lib/server/repos/filing-taxonomy";
export type TaxonomyAsset = {
asset_type: FilingTaxonomyAssetType;
@@ -99,12 +100,12 @@ export type TaxonomyConcept = {
};
export type TaxonomyMetricValidationCheck = {
metric_key: keyof NonNullable<Filing['metrics']>;
metric_key: keyof NonNullable<Filing["metrics"]>;
taxonomy_value: number | null;
llm_value: number | null;
absolute_diff: number | null;
relative_diff: number | null;
status: 'not_run' | 'matched' | 'mismatch' | 'error';
status: "not_run" | "matched" | "mismatch" | "error";
evidence_pages: number[];
pdf_url: string | null;
provider: string | null;
@@ -119,7 +120,7 @@ export type TaxonomyHydrationPeriod = {
filing_date: string;
period_start: string | null;
period_end: string | null;
filing_type: '10-K' | '10-Q';
filing_type: "10-K" | "10-Q";
period_label: string;
};
@@ -148,7 +149,7 @@ export type TaxonomyHydrationSurfaceRow = {
category: string;
template_section?: string;
order: number;
unit: 'currency' | 'count' | 'shares' | 'percent' | 'ratio';
unit: "currency" | "count" | "shares" | "percent" | "ratio";
values: Record<string, number | null>;
source_concepts: string[];
source_row_keys: string[];
@@ -156,10 +157,14 @@ export type TaxonomyHydrationSurfaceRow = {
formula_key: string | null;
has_dimensions: boolean;
resolved_source_row_keys: Record<string, string | null>;
statement?: 'income' | 'balance' | 'cash_flow';
statement?: "income" | "balance" | "cash_flow";
detail_count?: number;
resolution_method?: 'direct' | 'surface_bridge' | 'formula_derived' | 'not_meaningful';
confidence?: 'high' | 'medium' | 'low';
resolution_method?:
| "direct"
| "surface_bridge"
| "formula_derived"
| "not_meaningful";
confidence?: "high" | "medium" | "low";
warning_codes?: string[];
};
@@ -183,7 +188,7 @@ export type TaxonomyHydrationStructuredKpiRow = {
key: string;
label: string;
category: string;
unit: 'currency' | 'count' | 'shares' | 'percent' | 'ratio';
unit: "currency" | "count" | "shares" | "percent" | "ratio";
order: number;
segment: string | null;
axis: string | null;
@@ -191,7 +196,7 @@ export type TaxonomyHydrationStructuredKpiRow = {
values: Record<string, number | null>;
source_concepts: string[];
source_fact_ids: number[];
provenance_type: 'taxonomy' | 'structured_note';
provenance_type: "taxonomy" | "structured_note";
has_dimensions: boolean;
};
@@ -205,7 +210,7 @@ export type TaxonomyHydrationNormalizationSummary = {
};
export type XbrlValidationResult = {
status: 'passed' | 'warning' | 'error';
status: "passed" | "warning" | "error";
message?: string;
};
@@ -215,7 +220,7 @@ export type TaxonomyHydrationInput = {
cik: string;
accessionNumber: string;
filingDate: string;
filingType: '10-K' | '10-Q';
filingType: "10-K" | "10-Q";
filingUrl: string | null;
primaryDocument: string | null;
};
@@ -224,20 +229,30 @@ export type TaxonomyHydrationResult = {
filing_id: number;
ticker: string;
filing_date: string;
filing_type: '10-K' | '10-Q';
filing_type: "10-K" | "10-Q";
parse_status: FilingTaxonomyParseStatus;
parse_error: string | null;
source: FilingTaxonomySource;
parser_engine: string;
parser_version: string;
taxonomy_regime: 'us-gaap' | 'ifrs-full' | 'unknown';
taxonomy_regime: "us-gaap" | "ifrs-full" | "unknown";
fiscal_pack: string | null;
periods: TaxonomyHydrationPeriod[];
faithful_rows: Record<FinancialStatementKind, TaxonomyHydrationStatementRow[]>;
statement_rows: Record<FinancialStatementKind, TaxonomyHydrationStatementRow[]>;
faithful_rows: Record<
FinancialStatementKind,
TaxonomyHydrationStatementRow[]
>;
statement_rows: Record<
FinancialStatementKind,
TaxonomyHydrationStatementRow[]
>;
surface_rows: Record<FinancialStatementKind, TaxonomyHydrationSurfaceRow[]>;
detail_rows: Record<FinancialStatementKind, Record<string, TaxonomyHydrationDetailRow[]>>;
detail_rows: Record<
FinancialStatementKind,
Record<string, TaxonomyHydrationDetailRow[]>
>;
kpi_rows: TaxonomyHydrationStructuredKpiRow[];
computed_definitions: ComputedDefinition[];
contexts: Array<{
context_id: string;
entity_identifier: string | null;
@@ -248,7 +263,7 @@ export type TaxonomyHydrationResult = {
segment_json: Record<string, unknown> | null;
scenario_json: Record<string, unknown> | null;
}>;
derived_metrics: Filing['metrics'];
derived_metrics: Filing["metrics"];
validation_result: MetricValidationResult | null;
facts_count: number;
concepts_count: number;