mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-05-10 23:10:08 +02:00
# Description of Changes
Flesh out the RAG system and connect it to the PDF Question Agent so it
can respond to questions about PDFs of an extremely large size.
I'd expect lots more work will need to be done to finish off the RAG
system to really be what we need, but this should be a reasonable start
which will let us connect it to tools and have the ingestion mostly
handled automatically. I'm leaving file deletion and proper file ID
management to be done in a future PR. We also need to consider whether
all tools should retrieve content exclusively via RAG, or whether it's
beneficial to have tools sometimes fetch the direct content and other
times fetch it from RAG.
A diagram of the expected interaction is as follows:
```mermaid
sequenceDiagram
autonumber
actor U as User
participant FE as Frontend<br/>(ChatPanel)
participant J as Java<br/>(AiWorkflowService)
participant O as Engine:<br/>OrchestratorAgent
participant QA as Engine:<br/>PdfQuestionAgent
participant RAG as Engine:<br/>RagService + SqliteVecStore
participant V as VoyageAI<br/>(embeddings)
participant L as LLM<br/>(Claude / etc.)
U->>FE: types "Summarise this PDF"<br/>(PDF already uploaded)
FE->>J: POST /api/v1/ai/orchestrate/stream<br/>multipart: fileInputs[], userMessage
Note over J: ByteHashFileIdStrategy<br/>id = sha256(bytes)[:16]
J->>O: POST /api/v1/orchestrator<br/>{ files:[{id,name}], userMessage }
O->>L: route via fast model
L-->>O: delegate_pdf_question
O->>QA: PdfQuestionRequest
loop for each file
QA->>RAG: has_collection(file.id)
RAG-->>QA: false
end
QA-->>O: NeedIngestResponse(files_to_ingest)
O-->>J: { outcome:"need_ingest", filesToIngest:[...] }
Note over J: onNeedIngest
loop per file
J->>J: PDFBox: extract page text
J->>O: POST /api/v1/rag/documents<br/>(long-running timeout)
O->>RAG: chunk + stage documents
O->>V: embed_documents (batches of 256)
V-->>O: embeddings
O->>RAG: add_documents
O-->>J: { chunks_indexed: N }
end
Note over J: retry with resumeWith=pdf_question
J->>O: POST /api/v1/orchestrator
Note over O: fast-path to PdfQuestionAgent
O->>QA: PdfQuestionRequest
Note over QA: build RagCapability<br/>pinned to file IDs
QA->>L: run(prompt) with search_knowledge tool
loop up to max_searches
L->>QA: search_knowledge(query)
QA->>V: embed_query
V-->>QA: query vector
QA->>RAG: search(vector, collections=[file.id])
RAG-->>QA: top-k chunks
QA-->>L: formatted chunks
end
Note over QA: once budget spent,<br/>prepare() hides the tool
L-->>QA: PdfQuestionAnswerResponse
QA-->>O: answer
O-->>J: { outcome:"answer", answer, evidence }
J-->>FE: SSE "result"
FE->>U: assistant bubble
```
103 lines
3.7 KiB
Python
103 lines
3.7 KiB
Python
"""Tests for ``PdfQuestionAgent.orchestrate`` — classifier-driven first-turn
|
|
routing and prompt pinning. The legacy text-grounded ``handle`` path is
|
|
covered separately in ``tests/test_pdf_question_agent.py``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from unittest.mock import AsyncMock, patch
|
|
|
|
import pytest
|
|
|
|
from stirling.agents.pdf_questions import _MATH_SYNTH_SYSTEM_PROMPT, PdfQuestionAgent
|
|
from stirling.contracts import (
|
|
AiFile,
|
|
EditPlanResponse,
|
|
MathAuditorToolReportArtifact,
|
|
OrchestratorRequest,
|
|
PdfQuestionAnswerResponse,
|
|
SupportedCapability,
|
|
)
|
|
from stirling.contracts.ledger import Discrepancy, DiscrepancyKind, Severity, Verdict
|
|
from stirling.models import FileId
|
|
from stirling.models.agent_tool_models import AgentToolId
|
|
from stirling.services.runtime import AppRuntime
|
|
|
|
|
|
@dataclass
|
|
class _StubResult:
|
|
output: str
|
|
|
|
|
|
def _make_verdict() -> Verdict:
|
|
return Verdict(
|
|
session_id="s1",
|
|
discrepancies=[
|
|
Discrepancy(
|
|
page=0,
|
|
kind=DiscrepancyKind.TALLY,
|
|
severity=Severity.ERROR,
|
|
description="Total mismatch.",
|
|
stated="$215,000",
|
|
expected="$215,500",
|
|
context="Revenue row",
|
|
)
|
|
],
|
|
pages_examined=[0],
|
|
rounds_taken=1,
|
|
summary="One discrepancy.",
|
|
clean=False,
|
|
)
|
|
|
|
|
|
@pytest.mark.anyio
|
|
async def test_orchestrate_classifier_true_returns_math_audit_plan(runtime: AppRuntime) -> None:
|
|
"""First turn — classifier says math; the response is an EditPlanResponse
|
|
(``outcome=PLAN``) with ``resume_with=PDF_QUESTION``. The caller runs the
|
|
plan and re-invokes the orchestrator with the verdict in artifacts."""
|
|
agent = PdfQuestionAgent(runtime)
|
|
request = OrchestratorRequest(
|
|
user_message="ist die mathematik korrekt?",
|
|
files=[AiFile(id=FileId("report-id"), name="report.pdf")],
|
|
)
|
|
|
|
with patch.object(agent._math_intent_classifier, "classify", AsyncMock(return_value=True)):
|
|
response = await agent.orchestrate(request)
|
|
|
|
assert isinstance(response, EditPlanResponse)
|
|
assert response.resume_with == SupportedCapability.PDF_QUESTION
|
|
assert len(response.steps) == 1
|
|
assert response.steps[0].tool == AgentToolId.MATH_AUDITOR_AGENT
|
|
|
|
|
|
@pytest.mark.anyio
|
|
async def test_orchestrate_resume_synthesises_answer_without_calling_classifier(
|
|
runtime: AppRuntime,
|
|
) -> None:
|
|
"""Resume turn — Verdict in artifacts. The math-synth LLM is mocked; we
|
|
verify the answer is plumbed through and that the classifier is short-
|
|
circuited (no point asking 'is this math?' when we already have a Verdict)."""
|
|
agent = PdfQuestionAgent(runtime)
|
|
verdict = _make_verdict()
|
|
request = OrchestratorRequest(
|
|
user_message="ist die mathematik korrekt?",
|
|
files=[AiFile(id=FileId("report-id"), name="report.pdf")],
|
|
artifacts=[MathAuditorToolReportArtifact(report=verdict)],
|
|
)
|
|
canned_answer = "Die Summe stimmt nicht: angegeben $215,000, erwartet $215,500."
|
|
classifier_mock = AsyncMock(return_value=False)
|
|
with patch.object(agent._math_synth_agent, "run", return_value=_StubResult(output=canned_answer)):
|
|
with patch.object(agent._math_intent_classifier, "classify", classifier_mock):
|
|
response = await agent.orchestrate(request)
|
|
|
|
assert isinstance(response, PdfQuestionAnswerResponse)
|
|
assert response.answer == canned_answer
|
|
classifier_mock.assert_not_called()
|
|
|
|
|
|
def test_math_synth_prompt_requires_verbatim_quoting() -> None:
|
|
"""If this prompt is rephrased and drops the verbatim rule, the LLM may
|
|
paraphrase numeric values from the Verdict."""
|
|
assert "verbatim" in _MATH_SYNTH_SYSTEM_PROMPT.lower()
|