Add history window controls and expand taxonomy pack support
- add 3Y/5Y/10Y financial history filtering and reorganize normalization details UI - add new fiscal taxonomy surface/income bridge/KPI packs and update Rust taxonomy loading - auto-detect Homebrew SQLite for native `sqlite-vec` in local dev/e2e with docs and env guidance
This commit is contained in:
169
scripts/dev.ts
169
scripts/dev.ts
@@ -1,14 +1,15 @@
|
||||
import { spawn } from 'node:child_process';
|
||||
import { mkdirSync, readFileSync } from 'node:fs';
|
||||
import { Database } from 'bun:sqlite';
|
||||
import { createServer } from 'node:net';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { spawn } from "node:child_process";
|
||||
import { mkdirSync, readFileSync } from "node:fs";
|
||||
import { Database } from "bun:sqlite";
|
||||
import { createServer } from "node:net";
|
||||
import { dirname, join } from "node:path";
|
||||
import {
|
||||
ensureFinancialIngestionSchemaHealthy,
|
||||
resolveFinancialSchemaRepairMode
|
||||
} from '../lib/server/db/financial-ingestion-schema';
|
||||
import { ensureLocalSqliteSchema } from '../lib/server/db/sqlite-schema-compat';
|
||||
import { buildLocalDevConfig, resolveSqlitePath } from './dev-env';
|
||||
resolveFinancialSchemaRepairMode,
|
||||
} from "../lib/server/db/financial-ingestion-schema";
|
||||
import { ensureLocalSqliteSchema } from "../lib/server/db/sqlite-schema-compat";
|
||||
import { buildLocalDevConfig, resolveSqlitePath } from "./dev-env";
|
||||
import { applyLocalSqliteVectorEnv } from "./sqlite-vector-env";
|
||||
|
||||
type DrizzleJournal = {
|
||||
entries: Array<{ tag: string }>;
|
||||
@@ -28,7 +29,7 @@ async function isPortAvailable(port: number, host: string) {
|
||||
return await new Promise<boolean>((resolve) => {
|
||||
const server = createServer();
|
||||
|
||||
server.once('error', () => resolve(false));
|
||||
server.once("error", () => resolve(false));
|
||||
server.listen(port, host, () => {
|
||||
server.close(() => resolve(true));
|
||||
});
|
||||
@@ -44,28 +45,39 @@ async function pickLocalPort(host: string) {
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`Unable to find an open local dev port from: ${candidatePorts.join(', ')}`);
|
||||
throw new Error(
|
||||
`Unable to find an open local dev port from: ${candidatePorts.join(", ")}`,
|
||||
);
|
||||
}
|
||||
|
||||
function hasTable(database: Database, tableName: string) {
|
||||
const row = database
|
||||
.query('SELECT name FROM sqlite_master WHERE type = ? AND name = ? LIMIT 1')
|
||||
.get('table', tableName) as { name: string } | null;
|
||||
try {
|
||||
const row = database
|
||||
.query("SELECT name FROM sqlite_master WHERE type='table' AND name = ?")
|
||||
.get(tableName) as { name: string } | null;
|
||||
|
||||
return row !== null;
|
||||
return row !== null;
|
||||
} catch (e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function readMigrationFiles() {
|
||||
const journal = JSON.parse(
|
||||
readFileSync(join(process.cwd(), 'drizzle', 'meta', '_journal.json'), 'utf8')
|
||||
readFileSync(
|
||||
join(process.cwd(), "drizzle", "meta", "_journal.json"),
|
||||
"utf8",
|
||||
),
|
||||
) as DrizzleJournal;
|
||||
|
||||
return journal.entries.map((entry) => join(process.cwd(), 'drizzle', `${entry.tag}.sql`));
|
||||
return journal.entries.map((entry) =>
|
||||
join(process.cwd(), "drizzle", `${entry.tag}.sql`),
|
||||
);
|
||||
}
|
||||
|
||||
function bootstrapFreshDatabase(databaseUrl: string) {
|
||||
const databasePath = resolveSqlitePath(databaseUrl);
|
||||
if (!databasePath || databasePath === ':memory:') {
|
||||
if (!databasePath || databasePath === ":memory:") {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -74,16 +86,16 @@ function bootstrapFreshDatabase(databaseUrl: string) {
|
||||
const database = new Database(databasePath, { create: true });
|
||||
|
||||
try {
|
||||
database.exec('PRAGMA foreign_keys = ON;');
|
||||
database.exec("PRAGMA foreign_keys = ON;");
|
||||
|
||||
const existingCoreTables = [
|
||||
'user',
|
||||
'filing',
|
||||
'watchlist_item',
|
||||
'filing_statement_snapshot',
|
||||
'filing_taxonomy_snapshot',
|
||||
'task_run',
|
||||
'company_financial_bundle'
|
||||
"user",
|
||||
"filing",
|
||||
"watchlist_item",
|
||||
"filing_statement_snapshot",
|
||||
"filing_taxonomy_snapshot",
|
||||
"task_run",
|
||||
"company_financial_bundle",
|
||||
];
|
||||
|
||||
if (existingCoreTables.some((tableName) => hasTable(database, tableName))) {
|
||||
@@ -91,7 +103,7 @@ function bootstrapFreshDatabase(databaseUrl: string) {
|
||||
}
|
||||
|
||||
for (const migrationFile of readMigrationFiles()) {
|
||||
database.exec(readFileSync(migrationFile, 'utf8'));
|
||||
database.exec(readFileSync(migrationFile, "utf8"));
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -102,7 +114,9 @@ function bootstrapFreshDatabase(databaseUrl: string) {
|
||||
|
||||
function exitFromResult(result: ExitResult) {
|
||||
if (result.signal) {
|
||||
process.exit(result.signal === 'SIGINT' || result.signal === 'SIGTERM' ? 0 : 1);
|
||||
process.exit(
|
||||
result.signal === "SIGINT" || result.signal === "SIGTERM" ? 0 : 1,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -110,49 +124,52 @@ function exitFromResult(result: ExitResult) {
|
||||
}
|
||||
|
||||
const explicitPort = trim(process.env.PORT) || trim(process.env.APP_PORT);
|
||||
const bindHost = trim(process.env.HOSTNAME) || trim(process.env.HOST) || '127.0.0.1';
|
||||
const resolvedPort = explicitPort || await pickLocalPort(bindHost);
|
||||
const bindHost =
|
||||
trim(process.env.HOSTNAME) || trim(process.env.HOST) || "127.0.0.1";
|
||||
const resolvedPort = explicitPort || (await pickLocalPort(bindHost));
|
||||
const config = buildLocalDevConfig({
|
||||
...process.env,
|
||||
HOSTNAME: bindHost,
|
||||
PORT: resolvedPort
|
||||
PORT: resolvedPort,
|
||||
});
|
||||
const env = {
|
||||
...config.env
|
||||
const initialEnv = {
|
||||
...config.env,
|
||||
} as NodeJS.ProcessEnv;
|
||||
const { config: sqliteVectorConfig, env } =
|
||||
applyLocalSqliteVectorEnv(initialEnv);
|
||||
|
||||
delete env.NO_COLOR;
|
||||
|
||||
const databasePath = resolveSqlitePath(env.DATABASE_URL ?? '');
|
||||
if (databasePath && databasePath !== ':memory:') {
|
||||
const databasePath = resolveSqlitePath(env.DATABASE_URL ?? "");
|
||||
if (databasePath && databasePath !== ":memory:") {
|
||||
mkdirSync(dirname(databasePath), { recursive: true });
|
||||
}
|
||||
|
||||
mkdirSync(env.WORKFLOW_LOCAL_DATA_DIR ?? '.workflow-data', { recursive: true });
|
||||
mkdirSync(env.WORKFLOW_LOCAL_DATA_DIR ?? ".workflow-data", { recursive: true });
|
||||
|
||||
const initializedDatabase = bootstrapFreshDatabase(env.DATABASE_URL ?? '');
|
||||
const initializedDatabase = bootstrapFreshDatabase(env.DATABASE_URL ?? "");
|
||||
|
||||
if (!initializedDatabase && databasePath && databasePath !== ':memory:') {
|
||||
if (!initializedDatabase && databasePath && databasePath !== ":memory:") {
|
||||
const client = new Database(databasePath, { create: true });
|
||||
|
||||
try {
|
||||
client.exec('PRAGMA foreign_keys = ON;');
|
||||
client.exec("PRAGMA foreign_keys = ON;");
|
||||
ensureLocalSqliteSchema(client);
|
||||
const repairResult = ensureFinancialIngestionSchemaHealthy(client, {
|
||||
mode: resolveFinancialSchemaRepairMode(env.FINANCIAL_SCHEMA_REPAIR_MODE)
|
||||
mode: resolveFinancialSchemaRepairMode(env.FINANCIAL_SCHEMA_REPAIR_MODE),
|
||||
});
|
||||
|
||||
if (repairResult.mode === 'repaired') {
|
||||
if (repairResult.mode === "repaired") {
|
||||
console.info(
|
||||
`[dev] repaired financial ingestion schema (missing indexes: ${repairResult.repair?.missingIndexesBefore.join(', ') || 'none'}; duplicate groups resolved: ${repairResult.repair?.duplicateGroupsResolved ?? 0}; bundle cache cleared: ${repairResult.repair?.bundleCacheCleared ? 'yes' : 'no'})`
|
||||
`[dev] repaired financial ingestion schema (missing indexes: ${repairResult.repair?.missingIndexesBefore.join(", ") || "none"}; duplicate groups resolved: ${repairResult.repair?.duplicateGroupsResolved ?? 0}; bundle cache cleared: ${repairResult.repair?.bundleCacheCleared ? "yes" : "no"})`,
|
||||
);
|
||||
} else if (repairResult.mode === 'drifted') {
|
||||
} else if (repairResult.mode === "drifted") {
|
||||
console.warn(
|
||||
`[dev] financial ingestion schema drift detected (missing indexes: ${repairResult.missingIndexes.join(', ') || 'none'}; duplicate groups: ${repairResult.duplicateGroups})`
|
||||
`[dev] financial ingestion schema drift detected (missing indexes: ${repairResult.missingIndexes.join(", ") || "none"}; duplicate groups: ${repairResult.duplicateGroups})`,
|
||||
);
|
||||
} else if (repairResult.mode === 'failed') {
|
||||
} else if (repairResult.mode === "failed") {
|
||||
console.warn(
|
||||
`[dev] financial ingestion schema repair failed: ${repairResult.error ?? 'unknown error'}`
|
||||
`[dev] financial ingestion schema repair failed: ${repairResult.error ?? "unknown error"}`,
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
@@ -162,41 +179,69 @@ if (!initializedDatabase && databasePath && databasePath !== ':memory:') {
|
||||
|
||||
console.info(`[dev] local origin ${config.publicOrigin}`);
|
||||
console.info(`[dev] sqlite ${env.DATABASE_URL}`);
|
||||
console.info(`[dev] workflow ${env.WORKFLOW_TARGET_WORLD} (${env.WORKFLOW_LOCAL_DATA_DIR})`);
|
||||
if (!explicitPort && resolvedPort !== '3000') {
|
||||
console.info(`[dev] port 3000 is busy, using http://localhost:${resolvedPort} instead`);
|
||||
console.info(
|
||||
`[dev] workflow ${env.WORKFLOW_TARGET_WORLD} (${env.WORKFLOW_LOCAL_DATA_DIR})`,
|
||||
);
|
||||
if (sqliteVectorConfig.mode === "native") {
|
||||
console.info(
|
||||
`[dev] sqlite-vec native extension enabled (${sqliteVectorConfig.sqliteLibPath})`,
|
||||
);
|
||||
}
|
||||
if (!explicitPort && resolvedPort !== "3000") {
|
||||
console.info(
|
||||
`[dev] port 3000 is busy, using http://localhost:${resolvedPort} instead`,
|
||||
);
|
||||
}
|
||||
if (initializedDatabase) {
|
||||
console.info('[dev] initialized the local SQLite schema from drizzle SQL files');
|
||||
console.info(
|
||||
"[dev] initialized the local SQLite schema from drizzle SQL files",
|
||||
);
|
||||
}
|
||||
|
||||
if (config.overrides.authOriginChanged) {
|
||||
console.info('[dev] forcing Better Auth origin/trusted origins to the local origin');
|
||||
console.info(
|
||||
"[dev] forcing Better Auth origin/trusted origins to the local origin",
|
||||
);
|
||||
}
|
||||
|
||||
if (config.overrides.apiBaseChanged) {
|
||||
console.info('[dev] forcing NEXT_PUBLIC_API_URL to same-origin for local dev');
|
||||
console.info(
|
||||
"[dev] forcing NEXT_PUBLIC_API_URL to same-origin for local dev",
|
||||
);
|
||||
}
|
||||
|
||||
if (config.overrides.databaseChanged) {
|
||||
console.info('[dev] using a local SQLite database instead of the deployment path');
|
||||
console.info(
|
||||
"[dev] using a local SQLite database instead of the deployment path",
|
||||
);
|
||||
}
|
||||
|
||||
if (config.overrides.workflowChanged) {
|
||||
console.info('[dev] forcing Workflow to the local runtime for local dev');
|
||||
console.info("[dev] forcing Workflow to the local runtime for local dev");
|
||||
}
|
||||
|
||||
if (config.overrides.secretFallbackUsed) {
|
||||
console.info('[dev] using the built-in local Better Auth secret because BETTER_AUTH_SECRET is unset or still a placeholder');
|
||||
console.info(
|
||||
"[dev] using the built-in local Better Auth secret because BETTER_AUTH_SECRET is unset or still a placeholder",
|
||||
);
|
||||
}
|
||||
|
||||
const child = spawn(
|
||||
'bun',
|
||||
['--bun', 'next', 'dev', '--turbopack', '--hostname', config.bindHost, '--port', config.port],
|
||||
"bun",
|
||||
[
|
||||
"--bun",
|
||||
"next",
|
||||
"dev",
|
||||
"--turbopack",
|
||||
"--hostname",
|
||||
config.bindHost,
|
||||
"--port",
|
||||
config.port,
|
||||
],
|
||||
{
|
||||
env,
|
||||
stdio: 'inherit'
|
||||
}
|
||||
stdio: "inherit",
|
||||
},
|
||||
);
|
||||
|
||||
function forwardSignal(signal: NodeJS.Signals) {
|
||||
@@ -205,9 +250,9 @@ function forwardSignal(signal: NodeJS.Signals) {
|
||||
}
|
||||
}
|
||||
|
||||
process.on('SIGINT', () => forwardSignal('SIGINT'));
|
||||
process.on('SIGTERM', () => forwardSignal('SIGTERM'));
|
||||
process.on("SIGINT", () => forwardSignal("SIGINT"));
|
||||
process.on("SIGTERM", () => forwardSignal("SIGTERM"));
|
||||
|
||||
child.on('exit', (code, signal) => {
|
||||
child.on("exit", (code, signal) => {
|
||||
exitFromResult({ code, signal });
|
||||
});
|
||||
|
||||
@@ -1,11 +1,16 @@
|
||||
import { mkdirSync, rmSync } from 'node:fs';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { Database } from 'bun:sqlite';
|
||||
import { ensureFinancialIngestionSchemaHealthy } from '../lib/server/db/financial-ingestion-schema';
|
||||
import { ensureLocalSqliteSchema } from '../lib/server/db/sqlite-schema-compat';
|
||||
import { mkdirSync, rmSync } from "node:fs";
|
||||
import { dirname, join } from "node:path";
|
||||
import { Database } from "bun:sqlite";
|
||||
import { ensureFinancialIngestionSchemaHealthy } from "../lib/server/db/financial-ingestion-schema";
|
||||
import { ensureLocalSqliteSchema } from "../lib/server/db/sqlite-schema-compat";
|
||||
import { applyLocalSqliteVectorEnv } from "./sqlite-vector-env";
|
||||
|
||||
export const E2E_DATABASE_PATH = join(process.cwd(), 'data', 'e2e.sqlite');
|
||||
export const E2E_WORKFLOW_DATA_DIR = join(process.cwd(), '.workflow-data', 'e2e');
|
||||
export const E2E_DATABASE_PATH = join(process.cwd(), "data", "e2e.sqlite");
|
||||
export const E2E_WORKFLOW_DATA_DIR = join(
|
||||
process.cwd(),
|
||||
".workflow-data",
|
||||
"e2e",
|
||||
);
|
||||
|
||||
type PrepareE2eDatabaseOptions = {
|
||||
databasePath?: string;
|
||||
@@ -16,6 +21,11 @@ function removeFileIfPresent(path: string) {
|
||||
rmSync(path, { force: true });
|
||||
}
|
||||
|
||||
const appliedVectorEnv = applyLocalSqliteVectorEnv(process.env);
|
||||
if (appliedVectorEnv.env !== process.env) {
|
||||
Object.assign(process.env, appliedVectorEnv.env);
|
||||
}
|
||||
|
||||
export function prepareE2eDatabase(options: PrepareE2eDatabaseOptions = {}) {
|
||||
const databasePath = options.databasePath ?? E2E_DATABASE_PATH;
|
||||
const workflowDataDir = options.workflowDataDir ?? E2E_WORKFLOW_DATA_DIR;
|
||||
@@ -30,9 +40,9 @@ export function prepareE2eDatabase(options: PrepareE2eDatabaseOptions = {}) {
|
||||
const database = new Database(databasePath, { create: true });
|
||||
|
||||
try {
|
||||
database.exec('PRAGMA foreign_keys = ON;');
|
||||
database.exec("PRAGMA foreign_keys = ON;");
|
||||
ensureLocalSqliteSchema(database);
|
||||
ensureFinancialIngestionSchemaHealthy(database, { mode: 'auto' });
|
||||
ensureFinancialIngestionSchemaHealthy(database, { mode: "auto" });
|
||||
} finally {
|
||||
database.close();
|
||||
}
|
||||
|
||||
@@ -1,37 +1,49 @@
|
||||
import { spawn } from 'node:child_process';
|
||||
import { mkdirSync } from 'node:fs';
|
||||
import { prepareE2eDatabase, E2E_DATABASE_PATH, E2E_WORKFLOW_DATA_DIR } from './e2e-prepare';
|
||||
import { spawn } from "node:child_process";
|
||||
import { mkdirSync } from "node:fs";
|
||||
import {
|
||||
prepareE2eDatabase,
|
||||
E2E_DATABASE_PATH,
|
||||
E2E_WORKFLOW_DATA_DIR,
|
||||
} from "./e2e-prepare";
|
||||
import { applyLocalSqliteVectorEnv } from "./sqlite-vector-env";
|
||||
|
||||
const host = process.env.PLAYWRIGHT_HOST ?? '127.0.0.1';
|
||||
const port = process.env.PLAYWRIGHT_PORT ?? '3400';
|
||||
const host = process.env.PLAYWRIGHT_HOST ?? "127.0.0.1";
|
||||
const port = process.env.PLAYWRIGHT_PORT ?? "3400";
|
||||
const baseURL = process.env.PLAYWRIGHT_BASE_URL ?? `http://${host}:${port}`;
|
||||
const env: NodeJS.ProcessEnv = {
|
||||
const initialEnv: NodeJS.ProcessEnv = {
|
||||
...process.env,
|
||||
BETTER_AUTH_BASE_URL: baseURL,
|
||||
BETTER_AUTH_SECRET: 'playwright-e2e-secret-playwright-e2e-secret',
|
||||
BETTER_AUTH_SECRET: "playwright-e2e-secret-playwright-e2e-secret",
|
||||
BETTER_AUTH_TRUSTED_ORIGINS: baseURL,
|
||||
DATABASE_URL: `file:${E2E_DATABASE_PATH}`,
|
||||
HOSTNAME: host,
|
||||
NEXT_PUBLIC_API_URL: '',
|
||||
NEXT_PUBLIC_API_URL: "",
|
||||
PORT: port,
|
||||
SEC_USER_AGENT: 'Fiscal Clone Playwright <support@fiscal.local>',
|
||||
SEC_USER_AGENT: "Fiscal Clone Playwright <support@fiscal.local>",
|
||||
WORKFLOW_LOCAL_DATA_DIR: E2E_WORKFLOW_DATA_DIR,
|
||||
WORKFLOW_LOCAL_QUEUE_CONCURRENCY: '1',
|
||||
WORKFLOW_TARGET_WORLD: 'local'
|
||||
WORKFLOW_LOCAL_QUEUE_CONCURRENCY: "1",
|
||||
WORKFLOW_TARGET_WORLD: "local",
|
||||
};
|
||||
const { config: sqliteVectorConfig, env } =
|
||||
applyLocalSqliteVectorEnv(initialEnv);
|
||||
|
||||
delete env.NO_COLOR;
|
||||
|
||||
prepareE2eDatabase();
|
||||
mkdirSync(E2E_WORKFLOW_DATA_DIR, { recursive: true });
|
||||
if (sqliteVectorConfig.mode === "native") {
|
||||
console.info(
|
||||
`[e2e] sqlite-vec native extension enabled (${sqliteVectorConfig.sqliteLibPath})`,
|
||||
);
|
||||
}
|
||||
|
||||
const child = spawn(
|
||||
'bun',
|
||||
['--bun', 'next', 'dev', '--turbopack', '--hostname', host, '--port', port],
|
||||
"bun",
|
||||
["--bun", "next", "dev", "--turbopack", "--hostname", host, "--port", port],
|
||||
{
|
||||
stdio: 'inherit',
|
||||
env
|
||||
}
|
||||
stdio: "inherit",
|
||||
env,
|
||||
},
|
||||
);
|
||||
|
||||
function forwardSignal(signal: NodeJS.Signals) {
|
||||
@@ -40,12 +52,12 @@ function forwardSignal(signal: NodeJS.Signals) {
|
||||
}
|
||||
}
|
||||
|
||||
process.on('SIGINT', () => forwardSignal('SIGINT'));
|
||||
process.on('SIGTERM', () => forwardSignal('SIGTERM'));
|
||||
process.on("SIGINT", () => forwardSignal("SIGINT"));
|
||||
process.on("SIGTERM", () => forwardSignal("SIGTERM"));
|
||||
|
||||
child.on('exit', (code, signal) => {
|
||||
child.on("exit", (code, signal) => {
|
||||
if (signal) {
|
||||
process.exit(signal === 'SIGINT' || signal === 'SIGTERM' ? 0 : 1);
|
||||
process.exit(signal === "SIGINT" || signal === "SIGTERM" ? 0 : 1);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -93,6 +93,29 @@ const PACK_ORDER = [
|
||||
"insurance",
|
||||
"reit_real_estate",
|
||||
"broker_asset_manager",
|
||||
"agriculture",
|
||||
"contractors_construction",
|
||||
"contractors_federal_government",
|
||||
"development_stage",
|
||||
"entertainment_broadcasters",
|
||||
"entertainment_cable_television",
|
||||
"entertainment_casinos",
|
||||
"entertainment_films",
|
||||
"entertainment_music",
|
||||
"extractive_mining",
|
||||
"mortgage_banking",
|
||||
"title_plant",
|
||||
"franchisors",
|
||||
"not_for_profit",
|
||||
"plan_defined_benefit",
|
||||
"plan_defined_contribution",
|
||||
"plan_health_welfare",
|
||||
"real_estate_general",
|
||||
"real_estate_common_interest",
|
||||
"real_estate_retail_land",
|
||||
"real_estate_time_sharing",
|
||||
"software",
|
||||
"steamship",
|
||||
] as const;
|
||||
type PackName = (typeof PACK_ORDER)[number];
|
||||
|
||||
@@ -452,6 +475,34 @@ export { ${packs.map((p) => `${p.pack.toUpperCase().replace(/-/g, "_")}_KPIS`).j
|
||||
`;
|
||||
}
|
||||
|
||||
function buildUnionSurfaceDefinitions(
|
||||
surfacePacks: Map<PackName, SurfacePackFile>,
|
||||
): Map<string, SurfaceDefinition[]> {
|
||||
const surfacesByStatement = new Map<string, SurfaceDefinition[]>();
|
||||
const seen = new Set<string>();
|
||||
|
||||
for (const pack of PACK_ORDER) {
|
||||
const file = surfacePacks.get(pack);
|
||||
if (!file) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const surface of file.surfaces) {
|
||||
const dedupeKey = `${surface.statement}:${surface.surface_key}`;
|
||||
if (seen.has(dedupeKey)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
seen.add(dedupeKey);
|
||||
const existing = surfacesByStatement.get(surface.statement) || [];
|
||||
existing.push(surface);
|
||||
surfacesByStatement.set(surface.statement, existing);
|
||||
}
|
||||
}
|
||||
|
||||
return surfacesByStatement;
|
||||
}
|
||||
|
||||
function generateMainIndex(): string {
|
||||
return `// Auto-generated by scripts/generate-taxonomy.ts
|
||||
// DO NOT EDIT MANUALLY - changes will be overwritten
|
||||
@@ -533,17 +584,9 @@ async function main() {
|
||||
writeFileSync(join(OUTPUT_DIR, "types.ts"), generateTypesFile());
|
||||
|
||||
log("Generating surfaces...");
|
||||
const coreSurfaces = surfacePacks.get("core");
|
||||
if (coreSurfaces) {
|
||||
const surfacesByStatement = new Map<string, SurfaceDefinition[]>();
|
||||
|
||||
for (const surface of coreSurfaces.surfaces) {
|
||||
const existing = surfacesByStatement.get(surface.statement) || [];
|
||||
existing.push(surface);
|
||||
surfacesByStatement.set(surface.statement, existing);
|
||||
}
|
||||
|
||||
for (const [statement, surfaces] of surfacesByStatement) {
|
||||
const unionSurfaceDefinitions = buildUnionSurfaceDefinitions(surfacePacks);
|
||||
if (unionSurfaceDefinitions.size > 0) {
|
||||
for (const [statement, surfaces] of unionSurfaceDefinitions) {
|
||||
writeFileSync(
|
||||
join(OUTPUT_DIR, "surfaces", `${statement}.ts`),
|
||||
generateSurfaceFile(statement, surfaces),
|
||||
@@ -552,7 +595,7 @@ async function main() {
|
||||
|
||||
writeFileSync(
|
||||
join(OUTPUT_DIR, "surfaces", "index.ts"),
|
||||
generateSurfacesIndex(surfacesByStatement),
|
||||
generateSurfacesIndex(unionSurfaceDefinitions),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -592,7 +635,10 @@ async function main() {
|
||||
log("Generating main index...");
|
||||
writeFileSync(join(OUTPUT_DIR, "index.ts"), generateMainIndex());
|
||||
|
||||
const surfaceCount = coreSurfaces?.surfaces.length || 0;
|
||||
const surfaceCount = [...unionSurfaceDefinitions.values()].reduce(
|
||||
(sum, surfaces) => sum + surfaces.length,
|
||||
0,
|
||||
);
|
||||
const computedCount = computedFiles.reduce(
|
||||
(sum, f) => sum + f.definitions.length,
|
||||
0,
|
||||
|
||||
146
scripts/sqlite-vector-env.test.ts
Normal file
146
scripts/sqlite-vector-env.test.ts
Normal file
@@ -0,0 +1,146 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
applyLocalSqliteVectorEnv,
|
||||
resolveLocalSqliteVectorConfig,
|
||||
} from "./sqlite-vector-env";
|
||||
|
||||
function createFileExists(paths: string[]) {
|
||||
const existingPaths = new Set(paths);
|
||||
return (path: string) => existingPaths.has(path);
|
||||
}
|
||||
|
||||
describe("resolveLocalSqliteVectorConfig", () => {
|
||||
it("prefers explicit env paths when both are configured", () => {
|
||||
const config = resolveLocalSqliteVectorConfig({
|
||||
env: {
|
||||
SQLITE_CUSTOM_LIB_PATH: "/custom/libsqlite3.dylib",
|
||||
SQLITE_VEC_EXTENSION_PATH: "/custom/vec0.dylib",
|
||||
},
|
||||
fileExists: createFileExists([
|
||||
"/custom/libsqlite3.dylib",
|
||||
"/custom/vec0.dylib",
|
||||
]),
|
||||
platform: "darwin",
|
||||
resolveVectorExtensionPath: () => "/package/vec0.dylib",
|
||||
});
|
||||
|
||||
expect(config).toEqual({
|
||||
mode: "native",
|
||||
source: "explicit-env",
|
||||
sqliteLibPath: "/custom/libsqlite3.dylib",
|
||||
vectorExtensionPath: "/custom/vec0.dylib",
|
||||
});
|
||||
});
|
||||
|
||||
it("auto-detects the Apple Silicon Homebrew SQLite path", () => {
|
||||
const config = resolveLocalSqliteVectorConfig({
|
||||
env: {},
|
||||
fileExists: createFileExists([
|
||||
"/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib",
|
||||
"/package/vec0.dylib",
|
||||
]),
|
||||
platform: "darwin",
|
||||
resolveVectorExtensionPath: () => "/package/vec0.dylib",
|
||||
});
|
||||
|
||||
expect(config).toEqual({
|
||||
mode: "native",
|
||||
source: "autodetect-homebrew",
|
||||
sqliteLibPath: "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib",
|
||||
vectorExtensionPath: "/package/vec0.dylib",
|
||||
});
|
||||
});
|
||||
|
||||
it("auto-detects the Intel Homebrew SQLite path", () => {
|
||||
const config = resolveLocalSqliteVectorConfig({
|
||||
env: {},
|
||||
fileExists: createFileExists([
|
||||
"/usr/local/opt/sqlite/lib/libsqlite3.dylib",
|
||||
"/package/vec0.dylib",
|
||||
]),
|
||||
platform: "darwin",
|
||||
resolveVectorExtensionPath: () => "/package/vec0.dylib",
|
||||
});
|
||||
|
||||
expect(config).toEqual({
|
||||
mode: "native",
|
||||
source: "autodetect-homebrew",
|
||||
sqliteLibPath: "/usr/local/opt/sqlite/lib/libsqlite3.dylib",
|
||||
vectorExtensionPath: "/package/vec0.dylib",
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back when no SQLite library is available", () => {
|
||||
const config = resolveLocalSqliteVectorConfig({
|
||||
env: {},
|
||||
fileExists: createFileExists(["/package/vec0.dylib"]),
|
||||
platform: "darwin",
|
||||
resolveVectorExtensionPath: () => "/package/vec0.dylib",
|
||||
});
|
||||
|
||||
expect(config).toEqual({
|
||||
mode: "fallback",
|
||||
reason: "sqlite-lib-missing",
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back when the vector extension path cannot be resolved", () => {
|
||||
const config = resolveLocalSqliteVectorConfig({
|
||||
env: {},
|
||||
fileExists: createFileExists([
|
||||
"/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib",
|
||||
]),
|
||||
platform: "darwin",
|
||||
resolveVectorExtensionPath: () => {
|
||||
throw new Error("missing extension");
|
||||
},
|
||||
});
|
||||
|
||||
expect(config).toEqual({
|
||||
mode: "fallback",
|
||||
reason: "vector-extension-missing",
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back outside macOS", () => {
|
||||
const config = resolveLocalSqliteVectorConfig({
|
||||
env: {},
|
||||
fileExists: createFileExists([
|
||||
"/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib",
|
||||
"/package/vec0.dylib",
|
||||
]),
|
||||
platform: "linux",
|
||||
resolveVectorExtensionPath: () => "/package/vec0.dylib",
|
||||
});
|
||||
|
||||
expect(config).toEqual({
|
||||
mode: "fallback",
|
||||
reason: "non-macos",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("applyLocalSqliteVectorEnv", () => {
|
||||
it("injects resolved native paths without overwriting explicit values", () => {
|
||||
const initialEnv = {
|
||||
DATABASE_URL: "file:data/fiscal.sqlite",
|
||||
SQLITE_CUSTOM_LIB_PATH: "/custom/libsqlite3.dylib",
|
||||
SQLITE_VEC_EXTENSION_PATH: "/custom/vec0.dylib",
|
||||
};
|
||||
|
||||
const result = applyLocalSqliteVectorEnv(initialEnv, {
|
||||
fileExists: createFileExists([
|
||||
"/custom/libsqlite3.dylib",
|
||||
"/custom/vec0.dylib",
|
||||
]),
|
||||
platform: "darwin",
|
||||
resolveVectorExtensionPath: () => "/package/vec0.dylib",
|
||||
});
|
||||
|
||||
expect(result.env).toEqual({
|
||||
...initialEnv,
|
||||
SQLITE_CUSTOM_LIB_PATH: "/custom/libsqlite3.dylib",
|
||||
SQLITE_VEC_EXTENSION_PATH: "/custom/vec0.dylib",
|
||||
});
|
||||
});
|
||||
});
|
||||
144
scripts/sqlite-vector-env.ts
Normal file
144
scripts/sqlite-vector-env.ts
Normal file
@@ -0,0 +1,144 @@
|
||||
import { existsSync } from "node:fs";
|
||||
import { getLoadablePath } from "sqlite-vec";
|
||||
|
||||
const HOMEBREW_SQLITE_LIBRARY_PATHS = [
|
||||
"/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib",
|
||||
"/usr/local/opt/sqlite/lib/libsqlite3.dylib",
|
||||
] as const;
|
||||
|
||||
export type LocalSqliteVectorConfig =
|
||||
| {
|
||||
mode: "native";
|
||||
source: "explicit-env" | "autodetect-homebrew";
|
||||
sqliteLibPath: string;
|
||||
vectorExtensionPath: string;
|
||||
}
|
||||
| {
|
||||
mode: "fallback";
|
||||
reason: "non-macos" | "sqlite-lib-missing" | "vector-extension-missing";
|
||||
};
|
||||
|
||||
type ResolveLocalSqliteVectorConfigOptions = {
|
||||
env?: Record<string, string | undefined>;
|
||||
fileExists?: (path: string) => boolean;
|
||||
platform?: NodeJS.Platform;
|
||||
resolveVectorExtensionPath?: () => string;
|
||||
};
|
||||
|
||||
function trim(value: string | undefined) {
|
||||
const candidate = value?.trim();
|
||||
return candidate ? candidate : undefined;
|
||||
}
|
||||
|
||||
function defaultResolveVectorExtensionPath() {
|
||||
return getLoadablePath();
|
||||
}
|
||||
|
||||
function resolveVectorExtensionPath(
|
||||
env: Record<string, string | undefined>,
|
||||
resolvePath: () => string,
|
||||
fileExists: (path: string) => boolean,
|
||||
) {
|
||||
const explicitPath = trim(env.SQLITE_VEC_EXTENSION_PATH);
|
||||
if (explicitPath) {
|
||||
return fileExists(explicitPath) ? explicitPath : null;
|
||||
}
|
||||
|
||||
try {
|
||||
const packagePath = resolvePath();
|
||||
return fileExists(packagePath) ? packagePath : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function resolveLocalSqliteVectorConfig(
|
||||
options: ResolveLocalSqliteVectorConfigOptions = {},
|
||||
): LocalSqliteVectorConfig {
|
||||
const env = options.env ?? process.env;
|
||||
const platform = options.platform ?? process.platform;
|
||||
const fileExists = options.fileExists ?? existsSync;
|
||||
const resolvePath =
|
||||
options.resolveVectorExtensionPath ?? defaultResolveVectorExtensionPath;
|
||||
|
||||
if (platform !== "darwin") {
|
||||
return {
|
||||
mode: "fallback",
|
||||
reason: "non-macos",
|
||||
};
|
||||
}
|
||||
|
||||
const vectorExtensionPath = resolveVectorExtensionPath(
|
||||
env,
|
||||
resolvePath,
|
||||
fileExists,
|
||||
);
|
||||
if (!vectorExtensionPath) {
|
||||
return {
|
||||
mode: "fallback",
|
||||
reason: "vector-extension-missing",
|
||||
};
|
||||
}
|
||||
|
||||
const explicitSqliteLibPath = trim(env.SQLITE_CUSTOM_LIB_PATH);
|
||||
if (explicitSqliteLibPath) {
|
||||
if (!fileExists(explicitSqliteLibPath)) {
|
||||
return {
|
||||
mode: "fallback",
|
||||
reason: "sqlite-lib-missing",
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
mode: "native",
|
||||
source: "explicit-env",
|
||||
sqliteLibPath: explicitSqliteLibPath,
|
||||
vectorExtensionPath,
|
||||
};
|
||||
}
|
||||
|
||||
for (const sqliteLibPath of HOMEBREW_SQLITE_LIBRARY_PATHS) {
|
||||
if (!fileExists(sqliteLibPath)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return {
|
||||
mode: "native",
|
||||
source: "autodetect-homebrew",
|
||||
sqliteLibPath,
|
||||
vectorExtensionPath,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
mode: "fallback",
|
||||
reason: "sqlite-lib-missing",
|
||||
};
|
||||
}
|
||||
|
||||
export function applyLocalSqliteVectorEnv<
|
||||
T extends Record<string, string | undefined>,
|
||||
>(env: T, options: ResolveLocalSqliteVectorConfigOptions = {}) {
|
||||
const config = resolveLocalSqliteVectorConfig({ ...options, env });
|
||||
|
||||
if (config.mode !== "native") {
|
||||
return {
|
||||
config,
|
||||
env,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
config,
|
||||
env: {
|
||||
...env,
|
||||
SQLITE_CUSTOM_LIB_PATH:
|
||||
env.SQLITE_CUSTOM_LIB_PATH?.trim() || config.sqliteLibPath,
|
||||
SQLITE_VEC_EXTENSION_PATH:
|
||||
env.SQLITE_VEC_EXTENSION_PATH?.trim() || config.vectorExtensionPath,
|
||||
} as T & {
|
||||
SQLITE_CUSTOM_LIB_PATH: string;
|
||||
SQLITE_VEC_EXTENSION_PATH: string;
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
import type { FinancialStatementKind } from '@/lib/types';
|
||||
import { hydrateFilingTaxonomySnapshot } from '@/lib/server/taxonomy/engine';
|
||||
import type { TaxonomyHydrationInput } from '@/lib/server/taxonomy/types';
|
||||
import type { FinancialStatementKind } from "@/lib/types";
|
||||
import { hydrateFilingTaxonomySnapshot } from "@/lib/server/taxonomy/engine";
|
||||
import type { TaxonomyHydrationInput } from "@/lib/server/taxonomy/types";
|
||||
|
||||
type ValidationCase = {
|
||||
name: string;
|
||||
@@ -16,135 +16,262 @@ type ValidationFailure = {
|
||||
};
|
||||
|
||||
const UNIVERSAL_INCOME_KEYS = [
|
||||
'revenue',
|
||||
'gross_profit',
|
||||
'operating_expenses',
|
||||
'operating_income',
|
||||
'income_tax_expense',
|
||||
'net_income'
|
||||
"revenue",
|
||||
"gross_profit",
|
||||
"operating_expenses",
|
||||
"operating_income",
|
||||
"income_tax_expense",
|
||||
"net_income",
|
||||
] as const;
|
||||
|
||||
const EXPENSE_BREAKDOWN_KEYS = [
|
||||
'selling_general_and_administrative',
|
||||
'research_and_development',
|
||||
'other_operating_expense'
|
||||
"selling_general_and_administrative",
|
||||
"research_and_development",
|
||||
"other_operating_expense",
|
||||
] as const;
|
||||
|
||||
const CORPUS: ValidationCase[] = [
|
||||
{
|
||||
name: 'core-msft-2026-01-28',
|
||||
expectedPack: 'core',
|
||||
name: "core-msft-2026-01-28",
|
||||
expectedPack: "core",
|
||||
input: {
|
||||
filingId: 1,
|
||||
ticker: 'MSFT',
|
||||
cik: '0000789019',
|
||||
accessionNumber: '0001193125-26-027207',
|
||||
filingDate: '2026-01-28',
|
||||
filingType: '10-Q',
|
||||
filingUrl: 'https://www.sec.gov/Archives/edgar/data/789019/000119312526027207/',
|
||||
primaryDocument: 'msft-20251231.htm'
|
||||
ticker: "MSFT",
|
||||
cik: "0000789019",
|
||||
accessionNumber: "0001193125-26-027207",
|
||||
filingDate: "2026-01-28",
|
||||
filingType: "10-Q",
|
||||
filingUrl:
|
||||
"https://www.sec.gov/Archives/edgar/data/789019/000119312526027207/",
|
||||
primaryDocument: "msft-20251231.htm",
|
||||
},
|
||||
requiredSurfaceKeys: {
|
||||
income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS],
|
||||
balance: ['total_assets']
|
||||
}
|
||||
balance: ["total_assets"],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'bank-jpm-2026-02-13',
|
||||
expectedPack: 'bank_lender',
|
||||
name: "bank-jpm-2026-02-13",
|
||||
expectedPack: "bank_lender",
|
||||
input: {
|
||||
filingId: 2,
|
||||
ticker: 'JPM',
|
||||
cik: '0000019617',
|
||||
accessionNumber: '0001628280-26-008131',
|
||||
filingDate: '2026-02-13',
|
||||
filingType: '10-K',
|
||||
filingUrl: 'https://www.sec.gov/Archives/edgar/data/19617/000162828026008131/',
|
||||
primaryDocument: 'jpm-20251231.htm'
|
||||
ticker: "JPM",
|
||||
cik: "0000019617",
|
||||
accessionNumber: "0001628280-26-008131",
|
||||
filingDate: "2026-02-13",
|
||||
filingType: "10-K",
|
||||
filingUrl:
|
||||
"https://www.sec.gov/Archives/edgar/data/19617/000162828026008131/",
|
||||
primaryDocument: "jpm-20251231.htm",
|
||||
},
|
||||
requiredSurfaceKeys: {
|
||||
income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS, 'net_interest_income', 'noninterest_income'],
|
||||
balance: ['loans', 'deposits']
|
||||
income: [
|
||||
...UNIVERSAL_INCOME_KEYS,
|
||||
...EXPENSE_BREAKDOWN_KEYS,
|
||||
"net_interest_income",
|
||||
"noninterest_income",
|
||||
],
|
||||
balance: ["loans", "deposits"],
|
||||
},
|
||||
requiredKpiKeys: ['net_interest_margin']
|
||||
requiredKpiKeys: ["net_interest_margin"],
|
||||
},
|
||||
{
|
||||
name: 'insurance-aig-2026-02-12',
|
||||
expectedPack: 'insurance',
|
||||
name: "insurance-aig-2026-02-12",
|
||||
expectedPack: "insurance",
|
||||
input: {
|
||||
filingId: 3,
|
||||
ticker: 'AIG',
|
||||
cik: '0000005272',
|
||||
accessionNumber: '0000005272-26-000023',
|
||||
filingDate: '2026-02-12',
|
||||
filingType: '10-K',
|
||||
filingUrl: 'https://www.sec.gov/Archives/edgar/data/5272/000000527226000023/',
|
||||
primaryDocument: 'aig-20251231.htm'
|
||||
ticker: "AIG",
|
||||
cik: "0000005272",
|
||||
accessionNumber: "0000005272-26-000023",
|
||||
filingDate: "2026-02-12",
|
||||
filingType: "10-K",
|
||||
filingUrl:
|
||||
"https://www.sec.gov/Archives/edgar/data/5272/000000527226000023/",
|
||||
primaryDocument: "aig-20251231.htm",
|
||||
},
|
||||
requiredSurfaceKeys: {
|
||||
income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS, 'premiums', 'claims_and_benefits'],
|
||||
balance: ['policy_liabilities']
|
||||
income: [
|
||||
...UNIVERSAL_INCOME_KEYS,
|
||||
...EXPENSE_BREAKDOWN_KEYS,
|
||||
"premiums",
|
||||
"claims_and_benefits",
|
||||
],
|
||||
balance: ["policy_liabilities"],
|
||||
},
|
||||
requiredKpiKeys: ['combined_ratio']
|
||||
requiredKpiKeys: ["combined_ratio"],
|
||||
},
|
||||
{
|
||||
name: 'reit-o-2026-02-25',
|
||||
expectedPack: 'reit_real_estate',
|
||||
name: "reit-o-2026-02-25",
|
||||
expectedPack: "reit_real_estate",
|
||||
input: {
|
||||
filingId: 4,
|
||||
ticker: 'O',
|
||||
cik: '0000726728',
|
||||
accessionNumber: '0000726728-26-000011',
|
||||
filingDate: '2026-02-25',
|
||||
filingType: '10-K',
|
||||
filingUrl: 'https://www.sec.gov/Archives/edgar/data/726728/000072672826000011/',
|
||||
primaryDocument: 'o-20251231.htm'
|
||||
ticker: "O",
|
||||
cik: "0000726728",
|
||||
accessionNumber: "0000726728-26-000011",
|
||||
filingDate: "2026-02-25",
|
||||
filingType: "10-K",
|
||||
filingUrl:
|
||||
"https://www.sec.gov/Archives/edgar/data/726728/000072672826000011/",
|
||||
primaryDocument: "o-20251231.htm",
|
||||
},
|
||||
requiredSurfaceKeys: {
|
||||
income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS, 'rental_revenue'],
|
||||
balance: ['investment_property', 'total_assets']
|
||||
income: [
|
||||
...UNIVERSAL_INCOME_KEYS,
|
||||
...EXPENSE_BREAKDOWN_KEYS,
|
||||
"rental_revenue",
|
||||
],
|
||||
balance: ["investment_property", "total_assets"],
|
||||
},
|
||||
requiredKpiKeys: ['property_count']
|
||||
requiredKpiKeys: ["property_count"],
|
||||
},
|
||||
{
|
||||
name: 'broker-blk-2026-02-25',
|
||||
expectedPack: 'broker_asset_manager',
|
||||
name: "broker-blk-2026-02-25",
|
||||
expectedPack: "broker_asset_manager",
|
||||
input: {
|
||||
filingId: 5,
|
||||
ticker: 'BLK',
|
||||
cik: '0002012383',
|
||||
accessionNumber: '0001193125-26-071966',
|
||||
filingDate: '2026-02-25',
|
||||
filingType: '10-K',
|
||||
filingUrl: 'https://www.sec.gov/Archives/edgar/data/2012383/000119312526071966/',
|
||||
primaryDocument: 'blk-20251231.htm'
|
||||
ticker: "BLK",
|
||||
cik: "0002012383",
|
||||
accessionNumber: "0001193125-26-071966",
|
||||
filingDate: "2026-02-25",
|
||||
filingType: "10-K",
|
||||
filingUrl:
|
||||
"https://www.sec.gov/Archives/edgar/data/2012383/000119312526071966/",
|
||||
primaryDocument: "blk-20251231.htm",
|
||||
},
|
||||
requiredSurfaceKeys: {
|
||||
income: [...UNIVERSAL_INCOME_KEYS, ...EXPENSE_BREAKDOWN_KEYS, 'fee_revenue'],
|
||||
balance: ['total_assets', 'total_liabilities']
|
||||
income: [
|
||||
...UNIVERSAL_INCOME_KEYS,
|
||||
...EXPENSE_BREAKDOWN_KEYS,
|
||||
"fee_revenue",
|
||||
],
|
||||
balance: ["total_assets", "total_liabilities"],
|
||||
},
|
||||
requiredKpiKeys: ['aum', 'fee_paying_aum']
|
||||
}
|
||||
requiredKpiKeys: ["aum", "fee_paying_aum"],
|
||||
},
|
||||
{
|
||||
name: "software-orcl-2025-06-18",
|
||||
expectedPack: "software",
|
||||
input: {
|
||||
filingId: 6,
|
||||
ticker: "ORCL",
|
||||
cik: "0001341439",
|
||||
accessionNumber: "0000950170-25-087926",
|
||||
filingDate: "2025-06-18",
|
||||
filingType: "10-K",
|
||||
filingUrl:
|
||||
"https://www.sec.gov/Archives/edgar/data/1341439/000095017025087926/",
|
||||
primaryDocument: "orcl-20250531.htm",
|
||||
},
|
||||
requiredSurfaceKeys: {
|
||||
income: [
|
||||
...UNIVERSAL_INCOME_KEYS,
|
||||
...EXPENSE_BREAKDOWN_KEYS,
|
||||
"software_license_revenue",
|
||||
"maintenance_and_support_revenue",
|
||||
"cost_of_software_revenue",
|
||||
],
|
||||
balance: ["capitalized_software_costs", "deferred_revenue"],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "mining-fcx-2025-02-14",
|
||||
expectedPack: "extractive_mining",
|
||||
input: {
|
||||
filingId: 7,
|
||||
ticker: "FCX",
|
||||
cik: "0000831259",
|
||||
accessionNumber: "0000831259-25-000006",
|
||||
filingDate: "2025-02-14",
|
||||
filingType: "10-K",
|
||||
filingUrl:
|
||||
"https://www.sec.gov/Archives/edgar/data/831259/000083125925000006/",
|
||||
primaryDocument: "fcx-20241231.htm",
|
||||
},
|
||||
requiredSurfaceKeys: {
|
||||
income: [
|
||||
...UNIVERSAL_INCOME_KEYS,
|
||||
...EXPENSE_BREAKDOWN_KEYS,
|
||||
"mining_revenue",
|
||||
"production_costs",
|
||||
"exploration_expense",
|
||||
],
|
||||
balance: ["mining_properties", "rehabilitation_liabilities"],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "mortgage-rkt-2026-02-27",
|
||||
expectedPack: "mortgage_banking",
|
||||
input: {
|
||||
filingId: 8,
|
||||
ticker: "RKT",
|
||||
cik: "0001805284",
|
||||
accessionNumber: "0001628280-26-013283",
|
||||
filingDate: "2026-02-27",
|
||||
filingType: "10-K",
|
||||
filingUrl:
|
||||
"https://www.sec.gov/Archives/edgar/data/1805284/000162828026013283/",
|
||||
primaryDocument: "rkt-20251231.htm",
|
||||
},
|
||||
requiredSurfaceKeys: {
|
||||
income: [
|
||||
...UNIVERSAL_INCOME_KEYS,
|
||||
...EXPENSE_BREAKDOWN_KEYS,
|
||||
"mortgage_banking_revenue",
|
||||
"servicing_fees",
|
||||
"interest_income",
|
||||
],
|
||||
balance: ["loans_held_for_sale", "mortgage_servicing_rights"],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "casino-mgm-2026-02-11",
|
||||
expectedPack: "entertainment_casinos",
|
||||
input: {
|
||||
filingId: 9,
|
||||
ticker: "MGM",
|
||||
cik: "0000789570",
|
||||
accessionNumber: "0000789570-26-000018",
|
||||
filingDate: "2026-02-11",
|
||||
filingType: "10-K",
|
||||
filingUrl:
|
||||
"https://www.sec.gov/Archives/edgar/data/789570/000078957026000018/",
|
||||
primaryDocument: "mgm-20251231.htm",
|
||||
},
|
||||
requiredSurfaceKeys: {
|
||||
income: [
|
||||
...UNIVERSAL_INCOME_KEYS,
|
||||
...EXPENSE_BREAKDOWN_KEYS,
|
||||
"gaming_revenue",
|
||||
"hotel_and_resort_revenue",
|
||||
"food_and_beverage_revenue",
|
||||
],
|
||||
balance: ["casino_properties", "gaming_tax_liability"],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const FALLBACK_WARNINGS = new Set([
|
||||
'surface_rows_deferred_to_typescript',
|
||||
'ts_compact_surface_fallback_used'
|
||||
"surface_rows_deferred_to_typescript",
|
||||
"ts_compact_surface_fallback_used",
|
||||
]);
|
||||
|
||||
function parseCaseFilter(argv: string[]) {
|
||||
for (const arg of argv) {
|
||||
if (arg === '--help' || arg === '-h') {
|
||||
console.log('Validate live SEC representative filings for each active taxonomy pack.');
|
||||
console.log('');
|
||||
console.log('Usage:');
|
||||
console.log(' bun run scripts/validate-taxonomy-packs.ts');
|
||||
console.log(' bun run scripts/validate-taxonomy-packs.ts --case=bank-jpm-2026-02-13');
|
||||
if (arg === "--help" || arg === "-h") {
|
||||
console.log(
|
||||
"Validate live SEC representative filings for each active taxonomy pack.",
|
||||
);
|
||||
console.log("");
|
||||
console.log("Usage:");
|
||||
console.log(" bun run scripts/validate-taxonomy-packs.ts");
|
||||
console.log(
|
||||
" bun run scripts/validate-taxonomy-packs.ts --case=bank-jpm-2026-02-13",
|
||||
);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (arg.startsWith('--case=')) {
|
||||
const value = arg.slice('--case='.length).trim();
|
||||
if (arg.startsWith("--case=")) {
|
||||
const value = arg.slice("--case=".length).trim();
|
||||
return value.length > 0 ? value : null;
|
||||
}
|
||||
}
|
||||
@@ -154,38 +281,50 @@ function parseCaseFilter(argv: string[]) {
|
||||
|
||||
function keysForStatement(
|
||||
result: Awaited<ReturnType<typeof hydrateFilingTaxonomySnapshot>>,
|
||||
statement: FinancialStatementKind
|
||||
statement: FinancialStatementKind,
|
||||
) {
|
||||
return (result.surface_rows[statement] ?? []).map((row) => row.key);
|
||||
}
|
||||
|
||||
async function validateCase(testCase: ValidationCase): Promise<ValidationFailure | null> {
|
||||
async function validateCase(
|
||||
testCase: ValidationCase,
|
||||
): Promise<ValidationFailure | null> {
|
||||
const startedAt = Date.now();
|
||||
const result = await hydrateFilingTaxonomySnapshot(testCase.input);
|
||||
const issues: string[] = [];
|
||||
const warnings = result.normalization_summary.warnings ?? [];
|
||||
const kpiKeys = result.kpi_rows.map((row) => row.key);
|
||||
|
||||
if (result.parse_status !== 'ready') {
|
||||
issues.push(`parse_status=${result.parse_status}${result.parse_error ? ` parse_error=${result.parse_error}` : ''}`);
|
||||
if (result.parse_status !== "ready") {
|
||||
issues.push(
|
||||
`parse_status=${result.parse_status}${result.parse_error ? ` parse_error=${result.parse_error}` : ""}`,
|
||||
);
|
||||
}
|
||||
|
||||
if (result.fiscal_pack !== testCase.expectedPack) {
|
||||
issues.push(`fiscal_pack=${result.fiscal_pack ?? 'null'} expected=${testCase.expectedPack}`);
|
||||
issues.push(
|
||||
`fiscal_pack=${result.fiscal_pack ?? "null"} expected=${testCase.expectedPack}`,
|
||||
);
|
||||
}
|
||||
|
||||
if ((Object.values(result.surface_rows) as Array<Array<{ key: string }>>).every((rows) => rows.length === 0)) {
|
||||
issues.push('surface_rows are empty');
|
||||
if (
|
||||
(Object.values(result.surface_rows) as Array<Array<{ key: string }>>).every(
|
||||
(rows) => rows.length === 0,
|
||||
)
|
||||
) {
|
||||
issues.push("surface_rows are empty");
|
||||
}
|
||||
|
||||
const fallbackWarning = warnings.find((warning) => FALLBACK_WARNINGS.has(warning));
|
||||
const fallbackWarning = warnings.find((warning) =>
|
||||
FALLBACK_WARNINGS.has(warning),
|
||||
);
|
||||
if (fallbackWarning) {
|
||||
issues.push(`unexpected fallback warning=${fallbackWarning}`);
|
||||
}
|
||||
|
||||
for (const [statement, requiredKeys] of Object.entries(testCase.requiredSurfaceKeys) as Array<
|
||||
[FinancialStatementKind, string[]]
|
||||
>) {
|
||||
for (const [statement, requiredKeys] of Object.entries(
|
||||
testCase.requiredSurfaceKeys,
|
||||
) as Array<[FinancialStatementKind, string[]]>) {
|
||||
const actualKeys = new Set(keysForStatement(result, statement));
|
||||
for (const requiredKey of requiredKeys) {
|
||||
if (!actualKeys.has(requiredKey)) {
|
||||
@@ -201,20 +340,20 @@ async function validateCase(testCase: ValidationCase): Promise<ValidationFailure
|
||||
}
|
||||
|
||||
const durationMs = Date.now() - startedAt;
|
||||
const incomeKeys = keysForStatement(result, 'income');
|
||||
const balanceKeys = keysForStatement(result, 'balance');
|
||||
const incomeKeys = keysForStatement(result, "income");
|
||||
const balanceKeys = keysForStatement(result, "balance");
|
||||
console.log(
|
||||
[
|
||||
`[validate-taxonomy-packs] ${testCase.name}`,
|
||||
`status=${issues.length === 0 ? 'pass' : 'fail'}`,
|
||||
`status=${issues.length === 0 ? "pass" : "fail"}`,
|
||||
`parse=${result.parse_status}`,
|
||||
`pack=${result.fiscal_pack ?? 'null'}`,
|
||||
`income=${incomeKeys.join(',') || '-'}`,
|
||||
`balance=${balanceKeys.join(',') || '-'}`,
|
||||
`kpis=${kpiKeys.join(',') || '-'}`,
|
||||
`warnings=${warnings.join(',') || '-'}`,
|
||||
`durationMs=${durationMs}`
|
||||
].join(' ')
|
||||
`pack=${result.fiscal_pack ?? "null"}`,
|
||||
`income=${incomeKeys.join(",") || "-"}`,
|
||||
`balance=${balanceKeys.join(",") || "-"}`,
|
||||
`kpis=${kpiKeys.join(",") || "-"}`,
|
||||
`warnings=${warnings.join(",") || "-"}`,
|
||||
`durationMs=${durationMs}`,
|
||||
].join(" "),
|
||||
);
|
||||
|
||||
if (issues.length === 0) {
|
||||
@@ -223,12 +362,13 @@ async function validateCase(testCase: ValidationCase): Promise<ValidationFailure
|
||||
|
||||
return {
|
||||
name: testCase.name,
|
||||
issues
|
||||
issues,
|
||||
};
|
||||
}
|
||||
|
||||
async function main() {
|
||||
process.env.XBRL_ENGINE_TIMEOUT_MS = process.env.XBRL_ENGINE_TIMEOUT_MS ?? '180000';
|
||||
process.env.XBRL_ENGINE_TIMEOUT_MS =
|
||||
process.env.XBRL_ENGINE_TIMEOUT_MS ?? "180000";
|
||||
|
||||
const requestedCase = parseCaseFilter(process.argv.slice(2));
|
||||
const selectedCases = requestedCase
|
||||
@@ -253,7 +393,7 @@ async function main() {
|
||||
} catch (error) {
|
||||
failures.push({
|
||||
name: testCase.name,
|
||||
issues: [error instanceof Error ? error.message : String(error)]
|
||||
issues: [error instanceof Error ? error.message : String(error)],
|
||||
});
|
||||
}
|
||||
|
||||
@@ -264,7 +404,7 @@ async function main() {
|
||||
`[validate-taxonomy-packs] completed cases=${selectedCases.length} failures=${failures.length} durationSec=${(
|
||||
(Date.now() - startedAt) /
|
||||
1000
|
||||
).toFixed(1)}`
|
||||
).toFixed(1)}`,
|
||||
);
|
||||
|
||||
if (failures.length === 0) {
|
||||
|
||||
Reference in New Issue
Block a user