Add history window controls and expand taxonomy pack support

- add 3Y/5Y/10Y financial history filtering and reorganize normalization details UI
- add new fiscal taxonomy surface/income bridge/KPI packs and update Rust taxonomy loading
- auto-detect Homebrew SQLite for native `sqlite-vec` in local dev/e2e with docs and env guidance
This commit is contained in:
2026-03-18 23:40:28 -04:00
parent f8426c4dde
commit 17de3dd72d
102 changed files with 14978 additions and 1316 deletions

View File

@@ -0,0 +1,120 @@
import { describe, expect, it } from "vitest";
import {
filterPeriodsByHistoryWindow,
financialHistoryLimit,
} from "@/lib/financials/history-window";
import type { FinancialStatementPeriod } from "@/lib/types";
function createPeriod(year: number, month = 12, day = 31) {
const suffix = `${year}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`;
return {
id: suffix,
filingId: year,
accessionNumber: `0000-${year}`,
filingDate: `${year + 1}-02-01`,
periodStart: `${year}-01-01`,
periodEnd: suffix,
filingType: "10-K",
periodLabel: `FY ${year}`,
} satisfies FinancialStatementPeriod;
}
describe("financialHistoryLimit", () => {
it("scales fetch size for annual and ltm cadences", () => {
expect(financialHistoryLimit("annual", 3)).toBe(5);
expect(financialHistoryLimit("ltm", 10)).toBe(12);
});
it("fetches a buffered number of quarterly periods", () => {
expect(financialHistoryLimit("quarterly", 3)).toBe(16);
expect(financialHistoryLimit("quarterly", 5)).toBe(24);
expect(financialHistoryLimit("quarterly", 10)).toBe(44);
});
});
describe("filterPeriodsByHistoryWindow", () => {
it("keeps the latest three annual periods", () => {
const periods = [
createPeriod(2018),
createPeriod(2019),
createPeriod(2020),
createPeriod(2021),
createPeriod(2022),
createPeriod(2023),
createPeriod(2024),
];
expect(
filterPeriodsByHistoryWindow(periods, "annual", 3).map(
(period) => period.id,
),
).toEqual(["2022-12-31", "2023-12-31", "2024-12-31"]);
});
it("keeps the exact trailing annual count for longer windows", () => {
const periods = Array.from({ length: 12 }, (_, index) =>
createPeriod(2013 + index),
);
expect(
filterPeriodsByHistoryWindow(periods, "annual", 10).map(
(period) => period.id,
),
).toEqual([
"2015-12-31",
"2016-12-31",
"2017-12-31",
"2018-12-31",
"2019-12-31",
"2020-12-31",
"2021-12-31",
"2022-12-31",
"2023-12-31",
"2024-12-31",
]);
});
it("keeps the exact trailing quarterly count", () => {
const periods = [
createPeriod(2022, 3, 31),
createPeriod(2022, 6, 30),
createPeriod(2022, 9, 30),
createPeriod(2022, 12, 31),
createPeriod(2023, 3, 31),
createPeriod(2023, 6, 30),
createPeriod(2023, 9, 30),
createPeriod(2023, 12, 31),
createPeriod(2024, 3, 31),
createPeriod(2024, 6, 30),
createPeriod(2024, 9, 30),
createPeriod(2024, 12, 31),
createPeriod(2025, 3, 31),
];
expect(
filterPeriodsByHistoryWindow(periods, "quarterly", 3).map(
(period) => period.id,
),
).toEqual([
"2022-06-30",
"2022-09-30",
"2022-12-31",
"2023-03-31",
"2023-06-30",
"2023-09-30",
"2023-12-31",
"2024-03-31",
"2024-06-30",
"2024-09-30",
"2024-12-31",
"2025-03-31",
]);
});
it("returns all periods when the list is shorter than the requested window", () => {
const periods = [createPeriod(2024)];
expect(filterPeriodsByHistoryWindow(periods, "annual", 5)).toEqual(periods);
});
});

View File

@@ -0,0 +1,42 @@
import type { FinancialCadence, FinancialStatementPeriod } from "@/lib/types";
export type FinancialHistoryWindow = 3 | 5 | 10;
export function financialHistoryLimit(
cadence: FinancialCadence,
window: FinancialHistoryWindow,
) {
if (cadence === "quarterly") {
return window * 4 + 4;
}
return window + 2;
}
function visiblePeriodCount(
cadence: FinancialCadence,
window: FinancialHistoryWindow,
) {
if (cadence === "quarterly") {
return window * 4;
}
return window;
}
export function filterPeriodsByHistoryWindow(
periods: FinancialStatementPeriod[],
cadence: FinancialCadence,
window: FinancialHistoryWindow,
) {
if (periods.length === 0) {
return periods;
}
const count = visiblePeriodCount(cadence, window);
if (periods.length <= count) {
return periods;
}
return periods.slice(-count);
}

View File

@@ -3,27 +3,123 @@ import type {
FinancialCategory,
FinancialSurfaceKind,
SurfaceDetailMap,
SurfaceFinancialRow
} from '@/lib/types';
SurfaceFinancialRow,
} from "@/lib/types";
const SURFACE_CHILDREN: Partial<Record<Extract<FinancialSurfaceKind, 'income_statement' | 'balance_sheet' | 'cash_flow_statement'>, Record<string, string[]>>> = {
const SURFACE_CHILDREN: Partial<
Record<
Extract<
FinancialSurfaceKind,
"income_statement" | "balance_sheet" | "cash_flow_statement"
>,
Record<string, string[]>
>
> = {
income_statement: {
operating_expenses: [
'selling_general_and_administrative',
'research_and_development',
'other_operating_expense'
]
}
"selling_general_and_administrative",
"research_and_development",
"other_operating_expense",
],
},
balance_sheet: {
total_assets: [
"current_assets",
"non_current_assets",
"assets_held_for_sale",
],
current_assets: [
"cash_and_equivalents",
"short_term_investments",
"accounts_receivable",
"inventory",
"prepaid_expenses",
"other_current_assets",
],
non_current_assets: [
"property_plant_equipment_net",
"goodwill",
"intangible_assets_net",
"long_term_investments",
"deferred_tax_assets",
"other_non_current_assets",
],
total_liabilities: [
"current_liabilities",
"non_current_liabilities",
"liabilities_held_for_sale",
],
current_liabilities: [
"accounts_payable",
"short_term_debt",
"current_portion_of_long_term_debt",
"accrued_liabilities",
"deferred_revenue",
"other_current_liabilities",
],
non_current_liabilities: [
"long_term_debt",
"deferred_tax_liabilities",
"deferred_revenue_non_current",
"other_non_current_liabilities",
],
total_equity: [
"stockholders_equity",
"minority_interest",
"total_liabilities_and_equity",
],
stockholders_equity: [
"common_stock",
"retained_earnings",
"additional_paid_in_capital",
"treasury_stock",
"accumulated_other_comprehensive_income",
"other_stockholders_equity",
],
},
cash_flow_statement: {
net_cash_from_operating: [
"operating_cash_flow_adjustments",
"changes_in_working_capital",
],
operating_cash_flow_adjustments: [
"depreciation_and_amortization",
"stock_based_compensation",
"deferred_taxes",
"other_non_cash_items",
],
changes_in_working_capital: [
"change_in_accounts_receivable",
"change_in_inventory",
"change_in_accounts_payable",
"change_in_other_working_capital",
],
net_cash_from_investing: [
"capital_expenditures",
"acquisitions",
"purchases_of_investments",
"sales_of_investments",
"other_investing_activities",
],
net_cash_from_financing: [
"debt_issuance",
"debt_repayment",
"stock_issuance",
"stock_repurchase",
"dividends_paid",
"other_financing_activities",
],
},
};
export type StatementInspectorSelection = {
kind: 'surface' | 'detail';
kind: "surface" | "detail";
key: string;
parentKey?: string;
};
export type StatementTreeDetailNode = {
kind: 'detail';
kind: "detail";
id: string;
level: number;
row: DetailFinancialRow;
@@ -32,7 +128,7 @@ export type StatementTreeDetailNode = {
};
export type StatementTreeSurfaceNode = {
kind: 'surface';
kind: "surface";
id: string;
level: number;
row: SurfaceFinancialRow;
@@ -45,7 +141,9 @@ export type StatementTreeSurfaceNode = {
matchesSearch: boolean;
};
export type StatementTreeNode = StatementTreeSurfaceNode | StatementTreeDetailNode;
export type StatementTreeNode =
| StatementTreeSurfaceNode
| StatementTreeDetailNode;
export type StatementTreeSection = {
key: string;
@@ -62,13 +160,13 @@ export type StatementTreeModel = {
export type ResolvedStatementSelection =
| {
kind: 'surface';
kind: "surface";
row: SurfaceFinancialRow;
childSurfaceRows: SurfaceFinancialRow[];
detailRows: DetailFinancialRow[];
}
| {
kind: 'detail';
kind: "detail";
row: DetailFinancialRow;
parentSurfaceRow: SurfaceFinancialRow | null;
};
@@ -79,11 +177,16 @@ type Categories = Array<{
count: number;
}>;
const UNMAPPED_DETAIL_GROUP_KEY = 'unmapped';
const UNMAPPED_SECTION_KEY = 'unmapped_residual';
const UNMAPPED_SECTION_LABEL = 'Unmapped / Residual';
const UNMAPPED_DETAIL_GROUP_KEY = "unmapped";
const UNMAPPED_SECTION_KEY = "unmapped_residual";
const UNMAPPED_SECTION_LABEL = "Unmapped / Residual";
function surfaceConfigForKind(surfaceKind: Extract<FinancialSurfaceKind, 'income_statement' | 'balance_sheet' | 'cash_flow_statement'>) {
function surfaceConfigForKind(
surfaceKind: Extract<
FinancialSurfaceKind,
"income_statement" | "balance_sheet" | "cash_flow_statement"
>,
) {
return SURFACE_CHILDREN[surfaceKind] ?? {};
}
@@ -101,9 +204,9 @@ function searchTextForSurface(row: SurfaceFinancialRow) {
row.key,
...(row.sourceConcepts ?? []),
...(row.sourceRowKeys ?? []),
...(row.warningCodes ?? [])
...(row.warningCodes ?? []),
]
.join(' ')
.join(" ")
.toLowerCase();
}
@@ -115,13 +218,16 @@ function searchTextForDetail(row: DetailFinancialRow) {
row.conceptKey,
row.qname,
row.localName,
...(row.dimensionsSummary ?? [])
...(row.dimensionsSummary ?? []),
]
.join(' ')
.join(" ")
.toLowerCase();
}
function sortSurfaceRows(left: SurfaceFinancialRow, right: SurfaceFinancialRow) {
function sortSurfaceRows(
left: SurfaceFinancialRow,
right: SurfaceFinancialRow,
) {
if (left.order !== right.order) {
return left.order - right.order;
}
@@ -141,15 +247,24 @@ function buildUnmappedDetailNodes(input: {
return [...(input.statementDetails?.[UNMAPPED_DETAIL_GROUP_KEY] ?? [])]
.sort(sortDetailRows)
.filter((detail) => normalizedSearch.length === 0 || searchTextForDetail(detail).includes(normalizedSearch))
.map((detail) => ({
kind: 'detail',
id: detailNodeId(UNMAPPED_DETAIL_GROUP_KEY, detail),
level: 0,
row: detail,
parentSurfaceKey: UNMAPPED_DETAIL_GROUP_KEY,
matchesSearch: normalizedSearch.length > 0 && searchTextForDetail(detail).includes(normalizedSearch)
}) satisfies StatementTreeDetailNode);
.filter(
(detail) =>
normalizedSearch.length === 0 ||
searchTextForDetail(detail).includes(normalizedSearch),
)
.map(
(detail) =>
({
kind: "detail",
id: detailNodeId(UNMAPPED_DETAIL_GROUP_KEY, detail),
level: 0,
row: detail,
parentSurfaceKey: UNMAPPED_DETAIL_GROUP_KEY,
matchesSearch:
normalizedSearch.length > 0 &&
searchTextForDetail(detail).includes(normalizedSearch),
}) satisfies StatementTreeDetailNode,
);
}
function countNodes(nodes: StatementTreeNode[]) {
@@ -157,7 +272,7 @@ function countNodes(nodes: StatementTreeNode[]) {
for (const node of nodes) {
count += 1;
if (node.kind === 'surface') {
if (node.kind === "surface") {
count += countNodes(node.children);
}
}
@@ -166,7 +281,10 @@ function countNodes(nodes: StatementTreeNode[]) {
}
export function buildStatementTree(input: {
surfaceKind: Extract<FinancialSurfaceKind, 'income_statement' | 'balance_sheet' | 'cash_flow_statement'>;
surfaceKind: Extract<
FinancialSurfaceKind,
"income_statement" | "balance_sheet" | "cash_flow_statement"
>;
rows: SurfaceFinancialRow[];
statementDetails: SurfaceDetailMap | null;
categories: Categories;
@@ -188,44 +306,70 @@ export function buildStatementTree(input: {
const normalizedSearch = normalize(input.searchQuery);
const autoExpandedKeys = new Set<string>();
const buildSurfaceNode = (row: SurfaceFinancialRow, level: number): StatementTreeSurfaceNode | null => {
const buildSurfaceNode = (
row: SurfaceFinancialRow,
level: number,
): StatementTreeSurfaceNode | null => {
const childSurfaceRows = (config[row.key] ?? [])
.map((key) => rowByKey.get(key))
.filter((candidate): candidate is SurfaceFinancialRow => Boolean(candidate))
.filter((candidate): candidate is SurfaceFinancialRow =>
Boolean(candidate),
)
.sort(sortSurfaceRows);
const detailRows = [...(input.statementDetails?.[row.key] ?? [])].sort(sortDetailRows);
const detailRows = [...(input.statementDetails?.[row.key] ?? [])].sort(
sortDetailRows,
);
const childSurfaceNodes = childSurfaceRows
.map((childRow) => buildSurfaceNode(childRow, level + 1))
.filter((node): node is StatementTreeSurfaceNode => Boolean(node));
const detailNodes = detailRows
.filter((detail) => normalizedSearch.length === 0 || searchTextForDetail(detail).includes(normalizedSearch))
.map((detail) => ({
kind: 'detail',
id: detailNodeId(row.key, detail),
level: level + 1,
row: detail,
parentSurfaceKey: row.key,
matchesSearch: normalizedSearch.length > 0 && searchTextForDetail(detail).includes(normalizedSearch)
}) satisfies StatementTreeDetailNode);
.filter(
(detail) =>
normalizedSearch.length === 0 ||
searchTextForDetail(detail).includes(normalizedSearch),
)
.map(
(detail) =>
({
kind: "detail",
id: detailNodeId(row.key, detail),
level: level + 1,
row: detail,
parentSurfaceKey: row.key,
matchesSearch:
normalizedSearch.length > 0 &&
searchTextForDetail(detail).includes(normalizedSearch),
}) satisfies StatementTreeDetailNode,
);
const children = [...childSurfaceNodes, ...detailNodes];
const matchesSearch = normalizedSearch.length > 0 && searchTextForSurface(row).includes(normalizedSearch);
const hasMatchingDescendant = normalizedSearch.length > 0 && children.length > 0;
const matchesSearch =
normalizedSearch.length > 0 &&
searchTextForSurface(row).includes(normalizedSearch);
const hasMatchingDescendant =
normalizedSearch.length > 0 && children.length > 0;
if (normalizedSearch.length > 0 && !matchesSearch && !hasMatchingDescendant) {
if (
normalizedSearch.length > 0 &&
!matchesSearch &&
!hasMatchingDescendant
) {
return null;
}
const childSurfaceKeys = childSurfaceRows.map((candidate) => candidate.key);
const directDetailCount = detailRows.length;
const autoExpanded = normalizedSearch.length > 0 && !matchesSearch && children.length > 0;
const expanded = children.length > 0 && (input.expandedRowKeys.has(row.key) || autoExpanded);
const autoExpanded =
normalizedSearch.length > 0 && !matchesSearch && children.length > 0;
const expanded =
children.length > 0 &&
(input.expandedRowKeys.has(row.key) || autoExpanded);
if (autoExpanded) {
autoExpandedKeys.add(row.key);
}
return {
kind: 'surface',
kind: "surface",
id: row.key,
level,
row,
@@ -235,7 +379,7 @@ export function buildStatementTree(input: {
expandable: children.length > 0,
expanded,
autoExpanded,
matchesSearch
matchesSearch,
};
};
@@ -246,75 +390,101 @@ export function buildStatementTree(input: {
.filter((node): node is StatementTreeSurfaceNode => Boolean(node));
if (input.categories.length === 0) {
const sections: StatementTreeSection[] = rootNodes.length > 0
? [{ key: 'ungrouped', label: null, nodes: rootNodes }]
: [];
const sections: StatementTreeSection[] =
rootNodes.length > 0
? [{ key: "ungrouped", label: null, nodes: rootNodes }]
: [];
const unmappedNodes = buildUnmappedDetailNodes({
statementDetails: input.statementDetails,
searchQuery: input.searchQuery
searchQuery: input.searchQuery,
});
if (unmappedNodes.length > 0) {
sections.push({
key: UNMAPPED_SECTION_KEY,
label: UNMAPPED_SECTION_LABEL,
nodes: unmappedNodes
nodes: unmappedNodes,
});
}
return {
sections,
autoExpandedKeys,
visibleNodeCount: sections.reduce((sum, section) => sum + countNodes(section.nodes), 0),
totalNodeCount: input.rows.length + Object.values(input.statementDetails ?? {}).reduce((sum, rows) => sum + rows.length, 0)
visibleNodeCount: sections.reduce(
(sum, section) => sum + countNodes(section.nodes),
0,
),
totalNodeCount:
input.rows.length +
Object.values(input.statementDetails ?? {}).reduce(
(sum, rows) => sum + rows.length,
0,
),
};
}
const sections: StatementTreeSection[] = [];
const categoriesByKey = new Map(input.categories.map((category) => [category.key, category.label]));
const categoriesByKey = new Map(
input.categories.map((category) => [category.key, category.label]),
);
for (const category of input.categories) {
const nodes = rootNodes.filter((node) => node.row.category === category.key);
const nodes = rootNodes.filter(
(node) => node.row.category === category.key,
);
if (nodes.length > 0) {
sections.push({
key: category.key,
label: category.label,
nodes
nodes,
});
}
}
const uncategorized = rootNodes.filter((node) => !categoriesByKey.has(node.row.category));
const uncategorized = rootNodes.filter(
(node) => !categoriesByKey.has(node.row.category),
);
if (uncategorized.length > 0) {
sections.push({
key: 'uncategorized',
key: "uncategorized",
label: null,
nodes: uncategorized
nodes: uncategorized,
});
}
const unmappedNodes = buildUnmappedDetailNodes({
statementDetails: input.statementDetails,
searchQuery: input.searchQuery
searchQuery: input.searchQuery,
});
if (unmappedNodes.length > 0) {
sections.push({
key: UNMAPPED_SECTION_KEY,
label: UNMAPPED_SECTION_LABEL,
nodes: unmappedNodes
nodes: unmappedNodes,
});
}
return {
sections,
autoExpandedKeys,
visibleNodeCount: sections.reduce((sum, section) => sum + countNodes(section.nodes), 0),
totalNodeCount: input.rows.length + Object.values(input.statementDetails ?? {}).reduce((sum, rows) => sum + rows.length, 0)
visibleNodeCount: sections.reduce(
(sum, section) => sum + countNodes(section.nodes),
0,
),
totalNodeCount:
input.rows.length +
Object.values(input.statementDetails ?? {}).reduce(
(sum, rows) => sum + rows.length,
0,
),
};
}
export function resolveStatementSelection(input: {
surfaceKind: Extract<FinancialSurfaceKind, 'income_statement' | 'balance_sheet' | 'cash_flow_statement'>;
surfaceKind: Extract<
FinancialSurfaceKind,
"income_statement" | "balance_sheet" | "cash_flow_statement"
>;
rows: SurfaceFinancialRow[];
statementDetails: SurfaceDetailMap | null;
selection: StatementInspectorSelection | null;
@@ -328,7 +498,7 @@ export function resolveStatementSelection(input: {
const rowByKey = new Map(input.rows.map((row) => [row.key, row]));
const config = surfaceConfigForKind(input.surfaceKind);
if (selection.kind === 'surface') {
if (selection.kind === "surface") {
const row = rowByKey.get(selection.key);
if (!row) {
return null;
@@ -336,32 +506,38 @@ export function resolveStatementSelection(input: {
const childSurfaceRows = (config[row.key] ?? [])
.map((key) => rowByKey.get(key))
.filter((candidate): candidate is SurfaceFinancialRow => Boolean(candidate))
.filter((candidate): candidate is SurfaceFinancialRow =>
Boolean(candidate),
)
.sort(sortSurfaceRows);
return {
kind: 'surface',
kind: "surface",
row,
childSurfaceRows,
detailRows: [...(input.statementDetails?.[row.key] ?? [])].sort(sortDetailRows)
detailRows: [...(input.statementDetails?.[row.key] ?? [])].sort(
sortDetailRows,
),
};
}
const parentSurfaceKey = selection.parentKey ?? null;
const detailRows = parentSurfaceKey === UNMAPPED_DETAIL_GROUP_KEY
? input.statementDetails?.[UNMAPPED_DETAIL_GROUP_KEY] ?? []
: parentSurfaceKey
? input.statementDetails?.[parentSurfaceKey] ?? []
: Object.values(input.statementDetails ?? {}).flat();
const row = detailRows.find((candidate) => candidate.key === selection.key) ?? null;
const detailRows =
parentSurfaceKey === UNMAPPED_DETAIL_GROUP_KEY
? (input.statementDetails?.[UNMAPPED_DETAIL_GROUP_KEY] ?? [])
: parentSurfaceKey
? (input.statementDetails?.[parentSurfaceKey] ?? [])
: Object.values(input.statementDetails ?? {}).flat();
const row =
detailRows.find((candidate) => candidate.key === selection.key) ?? null;
if (!row) {
return null;
}
return {
kind: 'detail',
kind: "detail",
row,
parentSurfaceRow: rowByKey.get(row.parentSurfaceKey) ?? null
parentSurfaceRow: rowByKey.get(row.parentSurfaceKey) ?? null,
};
}

View File

@@ -1,19 +1,19 @@
import { mkdirSync } from 'node:fs';
import { dirname } from 'node:path';
import { Database } from 'bun:sqlite';
import { drizzle } from 'drizzle-orm/bun-sqlite';
import { load as loadSqliteVec } from 'sqlite-vec';
import { mkdirSync } from "node:fs";
import { dirname } from "node:path";
import { Database } from "bun:sqlite";
import { drizzle } from "drizzle-orm/bun-sqlite";
import { load as loadSqliteVec } from "sqlite-vec";
import {
ensureFinancialIngestionSchemaHealthy,
resolveFinancialSchemaRepairMode
} from './financial-ingestion-schema';
import { schema } from './schema';
resolveFinancialSchemaRepairMode,
} from "./financial-ingestion-schema";
import { schema } from "./schema";
import {
ensureLocalSqliteSchema,
hasColumn,
hasTable,
TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS
} from './sqlite-schema-compat';
TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS,
} from "./sqlite-schema-compat";
type AppDrizzleDb = ReturnType<typeof createDb>;
@@ -25,15 +25,15 @@ declare global {
}
function getDatabasePath() {
const raw = process.env.DATABASE_URL?.trim() || 'file:data/fiscal.sqlite';
let databasePath = raw.startsWith('file:') ? raw.slice(5) : raw;
const raw = process.env.DATABASE_URL?.trim() || "file:data/fiscal.sqlite";
let databasePath = raw.startsWith("file:") ? raw.slice(5) : raw;
if (databasePath.startsWith('///')) {
if (databasePath.startsWith("///")) {
databasePath = databasePath.slice(2);
}
if (!databasePath) {
throw new Error('DATABASE_URL must point to a SQLite file path.');
throw new Error("DATABASE_URL must point to a SQLite file path.");
}
return databasePath;
@@ -48,7 +48,7 @@ function configureCustomSqliteRuntime() {
}
const customSqlitePath = process.env.SQLITE_CUSTOM_LIB_PATH?.trim();
if (process.platform === 'darwin' && customSqlitePath) {
if (process.platform === "darwin" && customSqlitePath) {
Database.setCustomSQLite(customSqlitePath);
}
@@ -56,9 +56,11 @@ function configureCustomSqliteRuntime() {
}
function loadSqliteExtensions(client: Database) {
try {
const customVectorExtensionPath = process.env.SQLITE_VEC_EXTENSION_PATH?.trim();
const customVectorExtensionPath =
process.env.SQLITE_VEC_EXTENSION_PATH?.trim();
const customSqlitePath = process.env.SQLITE_CUSTOM_LIB_PATH?.trim();
try {
if (customVectorExtensionPath) {
client.loadExtension(customVectorExtensionPath);
} else {
@@ -69,8 +71,18 @@ function loadSqliteExtensions(client: Database) {
} catch (error) {
vectorExtensionStatus.set(client, false);
const reason = error instanceof Error ? error.message : 'Unknown sqlite extension error';
console.warn(`[sqlite] sqlite-vec unavailable, falling back to table-backed vector storage: ${reason}`);
const reason =
error instanceof Error ? error.message : "Unknown sqlite extension error";
if (customSqlitePath || customVectorExtensionPath) {
console.warn(
`[sqlite] sqlite-vec native extension load failed (SQLITE_CUSTOM_LIB_PATH=${customSqlitePath ?? "unset"}, SQLITE_VEC_EXTENSION_PATH=${customVectorExtensionPath ?? "package default"}). Falling back to table-backed vector storage: ${reason}`,
);
return;
}
console.warn(
`[sqlite] sqlite-vec unavailable, falling back to table-backed vector storage: ${reason}`,
);
}
}
@@ -98,18 +110,18 @@ function ensureSearchVirtualTables(client: Database) {
if (isVectorExtensionLoaded(client)) {
client.exec(`
CREATE VIRTUAL TABLE IF NOT EXISTS \`search_chunk_vec\` USING vec0(
\`chunk_id\` integer PRIMARY KEY,
\`embedding\` float[256],
\`scope\` text,
\`user_id\` text,
\`source_kind\` text,
\`ticker\` text,
\`accession_number\` text,
\`filing_date\` text,
+\`document_id\` integer,
+\`chunk_index\` integer,
+\`citation_label\` text
CREATE VIRTUAL TABLE IF NOT EXISTS search_chunk_vec USING vec0(
chunk_id integer PRIMARY KEY,
embedding float[256],
scope text,
user_id text,
source_kind text,
ticker text,
accession_number text,
filing_date text,
+document_id integer,
+chunk_index integer,
+citation_label text
);
`);
return;
@@ -130,17 +142,19 @@ function ensureSearchVirtualTables(client: Database) {
\`citation_label\` text NOT NULL
);
`);
client.exec('CREATE INDEX IF NOT EXISTS `search_chunk_vec_lookup_idx` ON `search_chunk_vec` (`scope`, `user_id`, `source_kind`, `ticker`);');
client.exec(
"CREATE INDEX IF NOT EXISTS `search_chunk_vec_lookup_idx` ON `search_chunk_vec` (`scope`, `user_id`, `source_kind`, `ticker`);",
);
}
function verifyCriticalSchema(client: Database) {
if (!hasTable(client, 'filing_taxonomy_snapshot')) {
if (!hasTable(client, "filing_taxonomy_snapshot")) {
return;
}
const missingColumns: string[] = [];
for (const columnName of TAXONOMY_SNAPSHOT_REQUIRED_COLUMNS) {
if (!hasColumn(client, 'filing_taxonomy_snapshot', columnName)) {
if (!hasColumn(client, "filing_taxonomy_snapshot", columnName)) {
missingColumns.push(columnName);
}
}
@@ -148,8 +162,8 @@ function verifyCriticalSchema(client: Database) {
if (missingColumns.length > 0) {
throw new Error(
`[db] CRITICAL: Database schema is incompatible. ` +
`filing_taxonomy_snapshot is missing columns: ${missingColumns.join(', ')}. ` +
`Delete the database file and restart to rebuild schema.`
`filing_taxonomy_snapshot is missing columns: ${missingColumns.join(", ")}. ` +
`Delete the database file and restart to rebuild schema.`,
);
}
}
@@ -159,19 +173,21 @@ export function getSqliteClient() {
configureCustomSqliteRuntime();
const databasePath = getDatabasePath();
if (databasePath !== ':memory:') {
if (databasePath !== ":memory:") {
mkdirSync(dirname(databasePath), { recursive: true });
}
const client = new Database(databasePath, { create: true });
client.exec('PRAGMA foreign_keys = ON;');
client.exec('PRAGMA journal_mode = WAL;');
client.exec('PRAGMA busy_timeout = 5000;');
client.exec("PRAGMA foreign_keys = ON;");
client.exec("PRAGMA journal_mode = WAL;");
client.exec("PRAGMA busy_timeout = 5000;");
loadSqliteExtensions(client);
ensureLocalSqliteSchema(client);
verifyCriticalSchema(client);
ensureFinancialIngestionSchemaHealthy(client, {
mode: resolveFinancialSchemaRepairMode(process.env.FINANCIAL_SCHEMA_REPAIR_MODE)
mode: resolveFinancialSchemaRepairMode(
process.env.FINANCIAL_SCHEMA_REPAIR_MODE,
),
});
ensureSearchVirtualTables(client);
@@ -200,5 +216,5 @@ export const __dbInternals = {
hasTable,
isVectorExtensionLoaded,
loadSqliteExtensions,
verifyCriticalSchema
verifyCriticalSchema,
};