mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-05-10 23:10:08 +02:00
# Description of Changes
Flesh out the RAG system and connect it to the PDF Question Agent so it
can respond to questions about PDFs of an extremely large size.
I'd expect lots more work will need to be done to finish off the RAG
system to really be what we need, but this should be a reasonable start
which will let us connect it to tools and have the ingestion mostly
handled automatically. I'm leaving file deletion and proper file ID
management to be done in a future PR. We also need to consider whether
all tools should retrieve content exclusively via RAG, or whether it's
beneficial to have tools sometimes fetch the direct content and other
times fetch it from RAG.
A diagram of the expected interaction is as follows:
```mermaid
sequenceDiagram
autonumber
actor U as User
participant FE as Frontend<br/>(ChatPanel)
participant J as Java<br/>(AiWorkflowService)
participant O as Engine:<br/>OrchestratorAgent
participant QA as Engine:<br/>PdfQuestionAgent
participant RAG as Engine:<br/>RagService + SqliteVecStore
participant V as VoyageAI<br/>(embeddings)
participant L as LLM<br/>(Claude / etc.)
U->>FE: types "Summarise this PDF"<br/>(PDF already uploaded)
FE->>J: POST /api/v1/ai/orchestrate/stream<br/>multipart: fileInputs[], userMessage
Note over J: ByteHashFileIdStrategy<br/>id = sha256(bytes)[:16]
J->>O: POST /api/v1/orchestrator<br/>{ files:[{id,name}], userMessage }
O->>L: route via fast model
L-->>O: delegate_pdf_question
O->>QA: PdfQuestionRequest
loop for each file
QA->>RAG: has_collection(file.id)
RAG-->>QA: false
end
QA-->>O: NeedIngestResponse(files_to_ingest)
O-->>J: { outcome:"need_ingest", filesToIngest:[...] }
Note over J: onNeedIngest
loop per file
J->>J: PDFBox: extract page text
J->>O: POST /api/v1/rag/documents<br/>(long-running timeout)
O->>RAG: chunk + stage documents
O->>V: embed_documents (batches of 256)
V-->>O: embeddings
O->>RAG: add_documents
O-->>J: { chunks_indexed: N }
end
Note over J: retry with resumeWith=pdf_question
J->>O: POST /api/v1/orchestrator
Note over O: fast-path to PdfQuestionAgent
O->>QA: PdfQuestionRequest
Note over QA: build RagCapability<br/>pinned to file IDs
QA->>L: run(prompt) with search_knowledge tool
loop up to max_searches
L->>QA: search_knowledge(query)
QA->>V: embed_query
V-->>QA: query vector
QA->>RAG: search(vector, collections=[file.id])
RAG-->>QA: top-k chunks
QA-->>L: formatted chunks
end
Note over QA: once budget spent,<br/>prepare() hides the tool
L-->>QA: PdfQuestionAnswerResponse
QA-->>O: answer
O-->>J: { outcome:"answer", answer, evidence }
J-->>FE: SSE "result"
FE->>U: assistant bubble
```
104 lines
3.1 KiB
Python
104 lines
3.1 KiB
Python
from stirling.config import AppSettings
|
|
from stirling.contracts import (
|
|
AgentExecutionRequest,
|
|
AgentSpec,
|
|
AgentSpecStep,
|
|
AiFile,
|
|
EditPlanResponse,
|
|
ExecutionContext,
|
|
ExtractedFileText,
|
|
ExtractedTextArtifact,
|
|
OrchestratorRequest,
|
|
PdfQuestionAnswerResponse,
|
|
PdfTextSelection,
|
|
ToolOperationStep,
|
|
)
|
|
from stirling.models import FileId
|
|
from stirling.models.tool_models import Angle, RotatePdfParams, ToolEndpoint
|
|
|
|
|
|
def test_orchestrator_request_accepts_user_message() -> None:
|
|
request = OrchestratorRequest(
|
|
user_message="Rotate the PDF",
|
|
files=[AiFile(id=FileId("test-id"), name="test.pdf")],
|
|
artifacts=[
|
|
ExtractedTextArtifact(
|
|
files=[
|
|
ExtractedFileText(
|
|
file_name="test.pdf",
|
|
pages=[PdfTextSelection(page_number=1, text="Hello")],
|
|
)
|
|
]
|
|
)
|
|
],
|
|
)
|
|
|
|
assert request.user_message == "Rotate the PDF"
|
|
assert len(request.artifacts) == 1
|
|
|
|
|
|
def test_agent_execution_request_uses_typed_agent_spec() -> None:
|
|
steps: list[AgentSpecStep] = [
|
|
ToolOperationStep(
|
|
tool=ToolEndpoint.ROTATE_PDF,
|
|
parameters=RotatePdfParams(angle=Angle(90)),
|
|
)
|
|
]
|
|
request = AgentExecutionRequest(
|
|
agent_spec=AgentSpec(
|
|
name="Invoice cleanup",
|
|
description="Normalise inbound invoices",
|
|
objective="Prepare uploads for accounting review",
|
|
steps=steps,
|
|
),
|
|
current_step_index=0,
|
|
execution_context=ExecutionContext(input_files=["invoice.pdf"]),
|
|
)
|
|
|
|
assert request.agent_spec.steps[0].kind == "tool"
|
|
|
|
|
|
def test_edit_plan_response_has_typed_steps() -> None:
|
|
steps = [ToolOperationStep(tool=ToolEndpoint.ROTATE_PDF, parameters=RotatePdfParams(angle=Angle(90)))]
|
|
response = EditPlanResponse(
|
|
summary="Rotate the input PDF by 90 degrees.",
|
|
steps=steps,
|
|
)
|
|
|
|
assert response.steps[0].tool == ToolEndpoint.ROTATE_PDF
|
|
|
|
|
|
def test_pdf_question_answer_defaults_evidence_list() -> None:
|
|
response = PdfQuestionAnswerResponse(answer="The invoice total is 120.00")
|
|
|
|
assert response.evidence == []
|
|
|
|
|
|
def test_app_settings_accepts_model_configuration() -> None:
|
|
from pathlib import Path
|
|
|
|
from stirling.config import RagBackend
|
|
|
|
settings = AppSettings(
|
|
smart_model_name="claude-sonnet-4-5-20250929",
|
|
fast_model_name="claude-haiku-4-5-20251001",
|
|
smart_model_max_tokens=8192,
|
|
fast_model_max_tokens=2048,
|
|
rag_backend=RagBackend.SQLITE,
|
|
rag_embedding_model="voyageai:voyage-4",
|
|
rag_store_path=Path(":memory:"),
|
|
rag_pgvector_dsn="",
|
|
rag_chunk_size=512,
|
|
rag_chunk_overlap=64,
|
|
rag_default_top_k=5,
|
|
rag_max_searches=5,
|
|
max_pages=200,
|
|
max_characters=200_000,
|
|
posthog_enabled=False,
|
|
posthog_api_key="",
|
|
posthog_host="https://eu.i.posthog.com",
|
|
)
|
|
|
|
assert settings.smart_model_name
|
|
assert settings.fast_model_max_tokens == 2048
|