Files
Stirling-PDF/engine/tests/test_pdf_question_agent.py
James Brunton 3e94157137 Add document context for edit agent (#6152)
# Description of Changes
Adds the ability for the Edit agent to request the content of the
document before it decides which parameters it needs. This makes it able
to process requests like `Split the document after the page containing
the "My Section" section`, allowing for document context-based requests
for all[^1] tools.

I had to make a few changes elsewhere to make this work, including:
- Moving the requesting of content out of the Question Agent and into a
common location
- Added specific API docs for the Split param because the generic ones
were not specific enough for the AI to be able to reliably perform the
correct operation
- Fixed an issue in the tool models generator which caused the Redact
params to only be half-generated (causing Pydantic to crash when the AI
tried to run Redact)
- Added missing logging to a bunch of tools and hooked it up properly so
it'll print to stderr
- Made the limits for the max pages/chars to extract from PDFs
configurable via env var

[^1]: Many of the tools can't actually do anything useful with the
context at this stage, but will just need the tool API to be extended
with new features like page-specific operations to be automatically able
to do smart operations without needing to change the Edit agent itself.
2026-04-23 13:19:27 +00:00

87 lines
2.6 KiB
Python

from __future__ import annotations
import pytest
from stirling.agents import PdfQuestionAgent
from stirling.contracts import (
ExtractedFileText,
NeedContentResponse,
PdfQuestionAnswerResponse,
PdfQuestionNotFoundResponse,
PdfQuestionRequest,
PdfTextSelection,
)
from stirling.services.runtime import AppRuntime
class StubPdfQuestionAgent(PdfQuestionAgent):
def __init__(self, runtime: AppRuntime, response: PdfQuestionAnswerResponse | PdfQuestionNotFoundResponse) -> None:
super().__init__(runtime)
self.response = response
async def _run_answer_agent(
self,
request: PdfQuestionRequest,
) -> PdfQuestionAnswerResponse | PdfQuestionNotFoundResponse:
return self.response
def invoice_page() -> ExtractedFileText:
return ExtractedFileText(
file_name="invoice.pdf",
pages=[PdfTextSelection(page_number=1, text="Invoice total: 120.00")],
)
@pytest.mark.anyio
async def test_pdf_question_agent_requires_extracted_text(runtime: AppRuntime) -> None:
agent = PdfQuestionAgent(runtime)
response = await agent.handle(
PdfQuestionRequest(question="What is the total?", page_text=[], file_names=["test.pdf"])
)
assert isinstance(response, NeedContentResponse)
@pytest.mark.anyio
async def test_pdf_question_agent_returns_grounded_answer(runtime: AppRuntime) -> None:
agent = StubPdfQuestionAgent(
runtime,
PdfQuestionAnswerResponse(
answer="The invoice total is 120.00.",
evidence=[invoice_page()],
),
)
response = await agent.handle(
PdfQuestionRequest(
question="What is the total?",
page_text=[invoice_page()],
file_names=["invoice.pdf"],
)
)
assert isinstance(response, PdfQuestionAnswerResponse)
assert response.answer == "The invoice total is 120.00."
@pytest.mark.anyio
async def test_pdf_question_agent_returns_not_found_when_text_is_insufficient(runtime: AppRuntime) -> None:
agent = StubPdfQuestionAgent(runtime, PdfQuestionNotFoundResponse(reason="The answer is not present in the text."))
response = await agent.handle(
PdfQuestionRequest(
question="What is the total?",
page_text=[
ExtractedFileText(
file_name="invoice.pdf",
pages=[PdfTextSelection(page_number=1, text="This page contains only a shipping address.")],
)
],
file_names=["invoice.pdf"],
)
)
assert isinstance(response, PdfQuestionNotFoundResponse)