Integrate crabrl parser into taxonomy hydration
This commit is contained in:
@@ -1,36 +1,89 @@
|
||||
import { existsSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import type { TaxonomyHydrationInput, TaxonomyHydrationResult } from '@/lib/server/taxonomy/types';
|
||||
import { withRetry } from '@/lib/server/utils/retry';
|
||||
import { existsSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import type {
|
||||
TaxonomyHydrationInput,
|
||||
TaxonomyHydrationResult,
|
||||
} from "@/lib/server/taxonomy/types";
|
||||
import { withRetry } from "@/lib/server/utils/retry";
|
||||
|
||||
type SpawnedSidecar = {
|
||||
stdin: { write: (chunk: Uint8Array) => void; end: () => void };
|
||||
stdout: ReadableStream<Uint8Array>;
|
||||
stderr: ReadableStream<Uint8Array>;
|
||||
exited: Promise<number>;
|
||||
kill: () => void;
|
||||
};
|
||||
|
||||
type SidecarDeps = {
|
||||
existsSync: typeof existsSync;
|
||||
spawn: typeof Bun.spawn;
|
||||
setTimeout: typeof globalThis.setTimeout;
|
||||
clearTimeout: typeof globalThis.clearTimeout;
|
||||
};
|
||||
|
||||
function candidateBinaryPaths() {
|
||||
return [
|
||||
process.env.FISCAL_XBRL_BIN?.trim(),
|
||||
join(process.cwd(), 'bin', 'fiscal-xbrl'),
|
||||
join(process.cwd(), 'rust', 'target', 'release', 'fiscal-xbrl'),
|
||||
join(process.cwd(), 'rust', 'target', 'debug', 'fiscal-xbrl')
|
||||
].filter((value): value is string => typeof value === 'string' && value.length > 0);
|
||||
join(process.cwd(), "bin", "fiscal-xbrl"),
|
||||
join(process.cwd(), "rust", "target", "release", "fiscal-xbrl"),
|
||||
join(process.cwd(), "rust", "target", "debug", "fiscal-xbrl"),
|
||||
].filter(
|
||||
(value): value is string => typeof value === "string" && value.length > 0,
|
||||
);
|
||||
}
|
||||
|
||||
export function resolveFiscalXbrlBinary() {
|
||||
const resolved = candidateBinaryPaths().find((path) => existsSync(path));
|
||||
return resolveFiscalXbrlBinaryWithDeps({
|
||||
existsSync,
|
||||
});
|
||||
}
|
||||
|
||||
function resolveFiscalXbrlBinaryWithDeps(
|
||||
deps: Pick<SidecarDeps, "existsSync">,
|
||||
) {
|
||||
const resolved = candidateBinaryPaths().find((path) => deps.existsSync(path));
|
||||
if (!resolved) {
|
||||
throw new Error('Rust XBRL sidecar binary is required but was not found. Set FISCAL_XBRL_BIN or build `fiscal-xbrl` under rust/target.');
|
||||
throw new Error(
|
||||
"Rust XBRL sidecar binary is required but was not found. Set FISCAL_XBRL_BIN or build `fiscal-xbrl` under rust/target.",
|
||||
);
|
||||
}
|
||||
|
||||
return resolved;
|
||||
}
|
||||
|
||||
export async function hydrateFilingTaxonomySnapshotFromSidecar(
|
||||
input: TaxonomyHydrationInput
|
||||
input: TaxonomyHydrationInput,
|
||||
): Promise<TaxonomyHydrationResult> {
|
||||
return withRetry(() => hydrateFromSidecarImpl(input));
|
||||
return hydrateFilingTaxonomySnapshotFromSidecarWithDeps(input, {
|
||||
existsSync,
|
||||
spawn: Bun.spawn,
|
||||
setTimeout: globalThis.setTimeout,
|
||||
clearTimeout: globalThis.clearTimeout,
|
||||
});
|
||||
}
|
||||
|
||||
async function hydrateFromSidecarImpl(input: TaxonomyHydrationInput): Promise<TaxonomyHydrationResult> {
|
||||
const binary = resolveFiscalXbrlBinary();
|
||||
const timeoutMs = Math.max(Number(process.env.XBRL_ENGINE_TIMEOUT_MS ?? 45_000), 1_000);
|
||||
const command = [binary, 'hydrate-filing'];
|
||||
async function hydrateFilingTaxonomySnapshotFromSidecarWithDeps(
|
||||
input: TaxonomyHydrationInput,
|
||||
deps: SidecarDeps,
|
||||
): Promise<TaxonomyHydrationResult> {
|
||||
return withRetry(() => hydrateFromSidecarImpl(input, deps));
|
||||
}
|
||||
|
||||
async function hydrateFromSidecarImpl(
|
||||
input: TaxonomyHydrationInput,
|
||||
deps: SidecarDeps = {
|
||||
existsSync,
|
||||
spawn: Bun.spawn,
|
||||
setTimeout: globalThis.setTimeout,
|
||||
clearTimeout: globalThis.clearTimeout,
|
||||
},
|
||||
): Promise<TaxonomyHydrationResult> {
|
||||
const binary = resolveFiscalXbrlBinaryWithDeps(deps);
|
||||
const timeoutMs = Math.max(
|
||||
Number(process.env.XBRL_ENGINE_TIMEOUT_MS ?? 45_000),
|
||||
1_000,
|
||||
);
|
||||
const command = [binary, "hydrate-filing"];
|
||||
const requestBody = JSON.stringify({
|
||||
filingId: input.filingId,
|
||||
ticker: input.ticker,
|
||||
@@ -40,22 +93,24 @@ async function hydrateFromSidecarImpl(input: TaxonomyHydrationInput): Promise<Ta
|
||||
filingType: input.filingType,
|
||||
filingUrl: input.filingUrl,
|
||||
primaryDocument: input.primaryDocument,
|
||||
cacheDir: process.env.FISCAL_XBRL_CACHE_DIR ?? join(process.cwd(), '.cache', 'xbrl')
|
||||
cacheDir:
|
||||
process.env.FISCAL_XBRL_CACHE_DIR ??
|
||||
join(process.cwd(), ".cache", "xbrl"),
|
||||
});
|
||||
|
||||
const child = Bun.spawn(command, {
|
||||
stdin: 'pipe',
|
||||
stdout: 'pipe',
|
||||
stderr: 'pipe',
|
||||
const child = deps.spawn(command, {
|
||||
stdin: "pipe",
|
||||
stdout: "pipe",
|
||||
stderr: "pipe",
|
||||
env: {
|
||||
...process.env
|
||||
}
|
||||
});
|
||||
...process.env,
|
||||
},
|
||||
}) as SpawnedSidecar;
|
||||
|
||||
child.stdin.write(new TextEncoder().encode(requestBody));
|
||||
child.stdin.end();
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
const timeout = deps.setTimeout(() => {
|
||||
child.kill();
|
||||
}, timeoutMs);
|
||||
|
||||
@@ -63,7 +118,7 @@ async function hydrateFromSidecarImpl(input: TaxonomyHydrationInput): Promise<Ta
|
||||
const [stdout, stderr, exitCode] = await Promise.all([
|
||||
new Response(child.stdout).text(),
|
||||
new Response(child.stderr).text(),
|
||||
child.exited
|
||||
child.exited,
|
||||
]);
|
||||
|
||||
if (stderr.trim().length > 0) {
|
||||
@@ -71,11 +126,20 @@ async function hydrateFromSidecarImpl(input: TaxonomyHydrationInput): Promise<Ta
|
||||
}
|
||||
|
||||
if (exitCode !== 0) {
|
||||
throw new Error(`Rust XBRL sidecar failed with exit code ${exitCode}: ${stderr.trim() || stdout.trim() || 'no error output'}`);
|
||||
throw new Error(
|
||||
`Rust XBRL sidecar failed with exit code ${exitCode}: ${stderr.trim() || stdout.trim() || "no error output"}`,
|
||||
);
|
||||
}
|
||||
|
||||
return JSON.parse(stdout) as TaxonomyHydrationResult;
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
deps.clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
export const __parserClientInternals = {
|
||||
candidateBinaryPaths,
|
||||
hydrateFilingTaxonomySnapshotFromSidecarWithDeps,
|
||||
hydrateFromSidecarImpl,
|
||||
resolveFiscalXbrlBinary: resolveFiscalXbrlBinaryWithDeps,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user