diff --git a/.gitignore b/.gitignore index 48d38e3e56..880715ff97 100644 --- a/.gitignore +++ b/.gitignore @@ -255,3 +255,6 @@ docs/type3/signatures/ # Type3 sample PDFs (development only) **/type3/samples/ + +# Claude +.claude/ diff --git a/engine/config/.env.example b/engine/config/.env.example index 64fb0e67ae..55e2cfc309 100644 --- a/engine/config/.env.example +++ b/engine/config/.env.example @@ -7,3 +7,8 @@ STIRLING_FAST_MODEL=anthropic:claude-haiku-4-5 # Default output token limits applied by the engine for each model tier. STIRLING_SMART_MODEL_MAX_TOKENS=8192 STIRLING_FAST_MODEL_MAX_TOKENS=2048 + +# PostHog analytics. Set STIRLING_POSTHOG_ENABLED=true and provide an API key to enable. +STIRLING_POSTHOG_ENABLED=false +STIRLING_POSTHOG_API_KEY=phc_VOdeYnlevc2T63m3myFGjeBlRcIusRgmhfx6XL5a1iz +STIRLING_POSTHOG_HOST=https://eu.i.posthog.com diff --git a/engine/pyproject.toml b/engine/pyproject.toml index d8c665f373..17da20b1b0 100644 --- a/engine/pyproject.toml +++ b/engine/pyproject.toml @@ -10,6 +10,8 @@ dependencies = [ "pydantic-settings>=2.0.0", "python-dotenv>=1.2.1", "uvicorn>=0.35.0", + "opentelemetry-sdk>=1.39.0", + "posthog>=3.0.0", ] [dependency-groups] diff --git a/engine/src/stirling/api/app.py b/engine/src/stirling/api/app.py index d415f13d6f..76a915001a 100644 --- a/engine/src/stirling/api/app.py +++ b/engine/src/stirling/api/app.py @@ -4,8 +4,11 @@ from contextlib import asynccontextmanager from typing import Annotated from fastapi import Depends, FastAPI +from pydantic_ai import Agent +from pydantic_ai.models.instrumented import InstrumentationSettings from stirling.agents import ExecutionPlanningAgent, OrchestratorAgent, PdfEditAgent, PdfQuestionAgent, UserSpecAgent +from stirling.api.middleware import UserIdMiddleware from stirling.api.routes import ( agent_draft_router, execution_router, @@ -15,7 +18,7 @@ from stirling.api.routes import ( ) from stirling.config import AppSettings, load_settings from stirling.contracts import HealthResponse -from stirling.services import build_runtime +from stirling.services import build_runtime, setup_posthog_tracking def _load_startup_settings(fast_api: FastAPI) -> AppSettings: @@ -37,10 +40,16 @@ async def lifespan(fast_api: FastAPI): fast_api.state.pdf_question_agent = PdfQuestionAgent(runtime) fast_api.state.user_spec_agent = UserSpecAgent(runtime) fast_api.state.execution_planning_agent = ExecutionPlanningAgent(runtime) + tracer_provider = setup_posthog_tracking(settings) + if tracer_provider: + Agent.instrument_all(InstrumentationSettings(tracer_provider=tracer_provider)) yield + if tracer_provider: + tracer_provider.shutdown() app = FastAPI(title="Stirling AI Engine", lifespan=lifespan, version="0.1.0") +app.add_middleware(UserIdMiddleware) app.include_router(orchestrator_router) app.include_router(pdf_edit_router) app.include_router(pdf_question_router) diff --git a/engine/src/stirling/api/middleware.py b/engine/src/stirling/api/middleware.py new file mode 100644 index 0000000000..8d5be8b036 --- /dev/null +++ b/engine/src/stirling/api/middleware.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint +from starlette.requests import Request +from starlette.responses import Response + +from stirling.services.tracking import current_user_id + +_USER_ID_HEADER = "X-User-Id" + + +class UserIdMiddleware(BaseHTTPMiddleware): + """Extract X-User-Id header and set it as the current user for PostHog tracking.""" + + async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response: + user_id = request.headers.get(_USER_ID_HEADER) + if user_id: + token = current_user_id.set(user_id) + try: + return await call_next(request) + finally: + current_user_id.reset(token) + return await call_next(request) diff --git a/engine/src/stirling/config/settings.py b/engine/src/stirling/config/settings.py index e4ddc81d17..92fcf47952 100644 --- a/engine/src/stirling/config/settings.py +++ b/engine/src/stirling/config/settings.py @@ -19,6 +19,10 @@ class AppSettings(BaseSettings): smart_model_max_tokens: int = Field(validation_alias="STIRLING_SMART_MODEL_MAX_TOKENS") fast_model_max_tokens: int = Field(validation_alias="STIRLING_FAST_MODEL_MAX_TOKENS") + posthog_enabled: bool = Field(validation_alias="STIRLING_POSTHOG_ENABLED") + posthog_api_key: str = Field(validation_alias="STIRLING_POSTHOG_API_KEY") + posthog_host: str = Field(validation_alias="STIRLING_POSTHOG_HOST") + @lru_cache(maxsize=1) def load_settings() -> AppSettings: diff --git a/engine/src/stirling/services/__init__.py b/engine/src/stirling/services/__init__.py index d4c79f4910..8894f41149 100644 --- a/engine/src/stirling/services/__init__.py +++ b/engine/src/stirling/services/__init__.py @@ -1,9 +1,11 @@ """Shared services used by the Stirling AI runtime.""" from .runtime import AppRuntime, build_model_settings, build_runtime +from .tracking import setup_posthog_tracking __all__ = [ "AppRuntime", "build_model_settings", "build_runtime", + "setup_posthog_tracking", ] diff --git a/engine/src/stirling/services/tracking.py b/engine/src/stirling/services/tracking.py new file mode 100644 index 0000000000..f8c93524a1 --- /dev/null +++ b/engine/src/stirling/services/tracking.py @@ -0,0 +1,233 @@ +from __future__ import annotations + +import json +from collections import OrderedDict +from collections.abc import Mapping +from contextvars import ContextVar +from typing import Any + +from opentelemetry.context import Context +from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor, TracerProvider +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( # No public import for these constants yet + GEN_AI_INPUT_MESSAGES, + GEN_AI_OPERATION_NAME, + GEN_AI_OUTPUT_MESSAGES, + GEN_AI_REQUEST_MAX_TOKENS, + GEN_AI_REQUEST_MODEL, + GEN_AI_REQUEST_TEMPERATURE, + GEN_AI_RESPONSE_MODEL, + GEN_AI_SYSTEM, + GEN_AI_TOOL_DEFINITIONS, + GEN_AI_USAGE_INPUT_TOKENS, + GEN_AI_USAGE_OUTPUT_TOKENS, + GenAiOperationNameValues, +) +from opentelemetry.semconv.attributes.server_attributes import SERVER_ADDRESS, SERVER_PORT +from opentelemetry.trace import Span +from posthog.client import Client as PostHogClient + +from stirling.config import AppSettings + +# Per-request user ID, set by middleware from the X-User-Id header. +# When not set, PostHog generates a random ID and marks the event as personless. +current_user_id: ContextVar[str | None] = ContextVar("current_user_id", default=None) + + +class LRUSet: + """Least Recently Used Set: a set with a maximum size that evicts the oldest entries first.""" + + def __init__(self, max_size: int) -> None: + self._max_size = max_size + self._data: OrderedDict[str, None] = OrderedDict() + + def __contains__(self, key: str) -> bool: + return key in self._data + + def add(self, key: str) -> None: + self._data[key] = None + if len(self._data) > self._max_size: + self._data.popitem(last=False) + + +def _parse_json_attr(attrs: Mapping[str, Any], key: str) -> Any | None: + """Parse a JSON string span attribute, returning None on failure.""" + raw = attrs.get(key) + if raw is None: + return None + try: + return json.loads(str(raw)) + except (json.JSONDecodeError, TypeError): + return None + + +def _transform_output_choices(choices: list[Any]) -> list[Any]: + """Transform Pydantic AI's parts-based output format to PostHog-compatible format. + + Pydantic AI emits: ``[{"role": "assistant", "parts": [{"type": "tool_call", "name": "..."}]}]`` + PostHog expects: ``[{"role": "assistant", "tool_calls": [{"type": "function", "function": {"name": "..."}}]}]`` + """ + for choice in choices: + if not isinstance(choice, dict) or "parts" not in choice: + continue + tool_calls = [] + for part in choice.get("parts", []): + if isinstance(part, dict) and part.get("type") == "tool_call": + tool_calls.append( + { + "type": "function", + "id": part.get("id", ""), + "function": {"name": part.get("name", "")}, + } + ) + if tool_calls: + choice["tool_calls"] = tool_calls + choice["content"] = choice.pop("parts") + return choices + + +def _extract_user_message(attrs: Mapping[str, Any]) -> str: + """Extract the last user message text from the input messages span attribute.""" + messages = _parse_json_attr(attrs, GEN_AI_INPUT_MESSAGES) + if not isinstance(messages, list): + return "" + for msg in reversed(messages): + if not isinstance(msg, dict): + continue + if msg.get("role") == "user": + for part in msg.get("parts", []): + if isinstance(part, dict) and part.get("type") == "text": + return str(part.get("content", "")) + return "" + + +# TODO: Replace with an official PostHog integration if one ever exists +class PostHogSpanProcessor(SpanProcessor): + """Translates Pydantic AI OpenTelemetry spans into PostHog $ai_generation events.""" + + def __init__(self, client: PostHogClient) -> None: + self._client = client + self._seen_traces = LRUSet(max_size=10_000) + + def on_start(self, span: Span, parent_context: Context | None = None) -> None: + pass + + def on_end(self, span: ReadableSpan) -> None: + attrs = dict(span.attributes or {}) + if attrs.get(GEN_AI_OPERATION_NAME) != GenAiOperationNameValues.CHAT.value: + return + + properties = self._build_generation_properties(span, attrs) + self._maybe_emit_trace_event(span, attrs, properties) + self._client.capture( + distinct_id=current_user_id.get(), + event="$ai_generation", + properties=properties, + ) + + def _build_generation_properties(self, span: ReadableSpan, attrs: Mapping[str, Any]) -> dict[str, object]: + """Build the $ai_generation event properties from span data.""" + properties: dict[str, object] = { + "$ai_provider": attrs.get(GEN_AI_SYSTEM, ""), + "$ai_model": attrs.get(GEN_AI_RESPONSE_MODEL) or attrs.get(GEN_AI_REQUEST_MODEL, ""), + "$ai_input_tokens": attrs.get(GEN_AI_USAGE_INPUT_TOKENS, 0), + "$ai_output_tokens": attrs.get(GEN_AI_USAGE_OUTPUT_TOKENS, 0), + } + + if span.context: + properties["$ai_trace_id"] = format(span.context.trace_id, "032x") + properties["$ai_span_id"] = format(span.context.span_id, "016x") + if span.parent and span.parent.span_id: + properties["$ai_parent_id"] = format(span.parent.span_id, "016x") + if span.start_time and span.end_time: + properties["$ai_latency"] = (span.end_time - span.start_time) / 1e9 + + self._add_message_properties(properties, attrs) + self._add_model_parameters(properties, attrs) + self._add_tool_definitions(properties, attrs) + self._add_base_url(properties, attrs) + + return properties + + def _maybe_emit_trace_event( + self, span: ReadableSpan, attrs: Mapping[str, Any], properties: dict[str, object] + ) -> None: + """Emit an $ai_trace event for the first span seen per trace ID.""" + trace_id = str(properties.get("$ai_trace_id", "")) + if not trace_id or trace_id in self._seen_traces: + return + + self._seen_traces.add(trace_id) + trace_properties: dict[str, object] = { + "$ai_trace_id": trace_id, + "$ai_trace_name": _extract_user_message(attrs), + "$ai_provider": attrs.get(GEN_AI_SYSTEM, ""), + } + if span.start_time and span.end_time: + trace_properties["$ai_latency"] = (span.end_time - span.start_time) / 1e9 + self._client.capture( + distinct_id=current_user_id.get(), + event="$ai_trace", + properties=trace_properties, + ) + + @staticmethod + def _add_message_properties(properties: dict[str, object], attrs: Mapping[str, Any]) -> None: + input_messages = _parse_json_attr(attrs, GEN_AI_INPUT_MESSAGES) + if input_messages is not None: + properties["$ai_input"] = input_messages + + output_messages = _parse_json_attr(attrs, GEN_AI_OUTPUT_MESSAGES) + if isinstance(output_messages, list): + properties["$ai_output_choices"] = _transform_output_choices(output_messages) + elif output_messages is not None: + properties["$ai_output_choices"] = output_messages + + @staticmethod + def _add_model_parameters(properties: dict[str, object], attrs: Mapping[str, Any]) -> None: + model_parameters: dict[str, object] = {} + if GEN_AI_REQUEST_TEMPERATURE in attrs: + model_parameters["temperature"] = attrs[GEN_AI_REQUEST_TEMPERATURE] + if GEN_AI_REQUEST_MAX_TOKENS in attrs: + model_parameters["max_tokens"] = attrs[GEN_AI_REQUEST_MAX_TOKENS] + if model_parameters: + properties["$ai_model_parameters"] = model_parameters + + @staticmethod + def _add_tool_definitions(properties: dict[str, object], attrs: Mapping[str, Any]) -> None: + tools = _parse_json_attr(attrs, GEN_AI_TOOL_DEFINITIONS) + if tools is not None: + properties["$ai_tools"] = tools + + @staticmethod + def _add_base_url(properties: dict[str, object], attrs: Mapping[str, Any]) -> None: + parts: list[str] = [] + if host := attrs.get(SERVER_ADDRESS): + parts.append(str(host)) + if port := attrs.get(SERVER_PORT): + parts.append(str(port)) + if parts: + properties["$ai_base_url"] = ":".join(parts) + + def shutdown(self) -> None: + self._client.shutdown() + + def force_flush(self, timeout_millis: int = 30000) -> bool: + self._client.flush() + return True + + +def setup_posthog_tracking(settings: AppSettings) -> TracerProvider | None: + """Configure OpenTelemetry with a PostHog span processor for LLM analytics. + + Returns the TracerProvider so it can be shut down on app exit, + or None when tracking is disabled. + """ + if not settings.posthog_enabled or not settings.posthog_api_key: + return None + + client = PostHogClient(project_api_key=settings.posthog_api_key, host=settings.posthog_host) + processor = PostHogSpanProcessor(client) + + provider = TracerProvider() + provider.add_span_processor(processor) + return provider diff --git a/engine/tests/conftest.py b/engine/tests/conftest.py new file mode 100644 index 0000000000..309bbd0ac7 --- /dev/null +++ b/engine/tests/conftest.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from collections.abc import Iterator + +import pytest + +from stirling.config import AppSettings, load_settings +from stirling.services import build_runtime +from stirling.services.runtime import AppRuntime + + +@pytest.fixture(autouse=True) +def clear_settings_cache() -> Iterator[None]: + load_settings.cache_clear() + yield + load_settings.cache_clear() + + +def build_app_settings() -> AppSettings: + return AppSettings( + smart_model_name="test", + fast_model_name="test", + smart_model_max_tokens=8192, + fast_model_max_tokens=2048, + posthog_enabled=False, + posthog_api_key="", + posthog_host="https://eu.i.posthog.com", + ) + + +@pytest.fixture +def app_settings() -> AppSettings: + return build_app_settings() + + +@pytest.fixture +def runtime(app_settings: AppSettings) -> AppRuntime: + return build_runtime(app_settings) diff --git a/engine/tests/test_pdf_edit_agent.py b/engine/tests/test_pdf_edit_agent.py index e78b1b9255..aa35341b01 100644 --- a/engine/tests/test_pdf_edit_agent.py +++ b/engine/tests/test_pdf_edit_agent.py @@ -5,7 +5,6 @@ from dataclasses import dataclass import pytest from stirling.agents import PdfEditAgent, PdfEditParameterSelector, PdfEditPlanSelection -from stirling.config import AppSettings from stirling.contracts import ( EditCannotDoResponse, EditClarificationRequest, @@ -14,16 +13,7 @@ from stirling.contracts import ( ToolOperationStep, ) from stirling.models.tool_models import CompressParams, OperationId, RotateParams -from stirling.services import build_runtime - - -def build_test_settings() -> AppSettings: - return AppSettings( - smart_model_name="test", - fast_model_name="test", - smart_model_max_tokens=8192, - fast_model_max_tokens=2048, - ) +from stirling.services.runtime import AppRuntime @dataclass(frozen=True) @@ -61,10 +51,11 @@ class RecordingParameterSelector: class StubPdfEditAgent(PdfEditAgent): def __init__( self, + runtime: AppRuntime, selection: PdfEditPlanSelection | EditClarificationRequest | EditCannotDoResponse, parameter_selector: RecordingParameterSelector | PdfEditParameterSelector | None = None, ) -> None: - super().__init__(build_runtime(build_test_settings())) + super().__init__(runtime) self.selection = selection if parameter_selector is not None: self.parameter_selector = parameter_selector @@ -77,9 +68,10 @@ class StubPdfEditAgent(PdfEditAgent): @pytest.mark.anyio -async def test_pdf_edit_agent_builds_multi_step_plan() -> None: +async def test_pdf_edit_agent_builds_multi_step_plan(runtime: AppRuntime) -> None: parameter_selector = RecordingParameterSelector() agent = StubPdfEditAgent( + runtime, PdfEditPlanSelection( operations=[OperationId.ROTATE, OperationId.COMPRESS], summary="Rotate the PDF, then compress it.", @@ -104,9 +96,10 @@ async def test_pdf_edit_agent_builds_multi_step_plan() -> None: @pytest.mark.anyio -async def test_pdf_edit_agent_passes_previous_steps_to_parameter_selector() -> None: +async def test_pdf_edit_agent_passes_previous_steps_to_parameter_selector(runtime: AppRuntime) -> None: parameter_selector = RecordingParameterSelector() agent = StubPdfEditAgent( + runtime, PdfEditPlanSelection( operations=[OperationId.ROTATE, OperationId.COMPRESS], summary="Rotate the PDF, then compress it.", @@ -134,12 +127,13 @@ async def test_pdf_edit_agent_passes_previous_steps_to_parameter_selector() -> N @pytest.mark.anyio -async def test_pdf_edit_agent_returns_clarification_without_partial_plan() -> None: +async def test_pdf_edit_agent_returns_clarification_without_partial_plan(runtime: AppRuntime) -> None: agent = StubPdfEditAgent( + runtime, EditClarificationRequest( question="Which pages should be rotated?", reason="The request does not say which pages to change.", - ) + ), ) response = await agent.handle(PdfEditRequest(user_message="Rotate some pages.")) @@ -148,11 +142,12 @@ async def test_pdf_edit_agent_returns_clarification_without_partial_plan() -> No @pytest.mark.anyio -async def test_pdf_edit_agent_returns_cannot_do_without_partial_plan() -> None: +async def test_pdf_edit_agent_returns_cannot_do_without_partial_plan(runtime: AppRuntime) -> None: agent = StubPdfEditAgent( + runtime, EditCannotDoResponse( reason="This request requires OCR, which is not part of PDF edit planning.", - ) + ), ) response = await agent.handle(PdfEditRequest(user_message="Read this scan and summarize it.")) diff --git a/engine/tests/test_pdf_question_agent.py b/engine/tests/test_pdf_question_agent.py index a4a1ce7bfd..b284870b76 100644 --- a/engine/tests/test_pdf_question_agent.py +++ b/engine/tests/test_pdf_question_agent.py @@ -3,7 +3,6 @@ from __future__ import annotations import pytest from stirling.agents import PdfQuestionAgent -from stirling.config import AppSettings from stirling.contracts import ( ExtractedFileText, PdfQuestionAnswerResponse, @@ -12,12 +11,12 @@ from stirling.contracts import ( PdfQuestionRequest, PdfTextSelection, ) -from stirling.services import build_runtime +from stirling.services.runtime import AppRuntime class StubPdfQuestionAgent(PdfQuestionAgent): - def __init__(self, response: PdfQuestionAnswerResponse | PdfQuestionNotFoundResponse) -> None: - super().__init__(build_runtime(build_test_settings())) + def __init__(self, runtime: AppRuntime, response: PdfQuestionAnswerResponse | PdfQuestionNotFoundResponse) -> None: + super().__init__(runtime) self.response = response async def _run_answer_agent( @@ -27,15 +26,6 @@ class StubPdfQuestionAgent(PdfQuestionAgent): return self.response -def build_test_settings() -> AppSettings: - return AppSettings( - smart_model_name="test", - fast_model_name="test", - smart_model_max_tokens=8192, - fast_model_max_tokens=2048, - ) - - def invoice_page() -> ExtractedFileText: return ExtractedFileText( file_name="invoice.pdf", @@ -44,8 +34,8 @@ def invoice_page() -> ExtractedFileText: @pytest.mark.anyio -async def test_pdf_question_agent_requires_extracted_text() -> None: - agent = PdfQuestionAgent(build_runtime(build_test_settings())) +async def test_pdf_question_agent_requires_extracted_text(runtime: AppRuntime) -> None: + agent = PdfQuestionAgent(runtime) response = await agent.handle( PdfQuestionRequest(question="What is the total?", page_text=[], file_names=["test.pdf"]) @@ -55,12 +45,13 @@ async def test_pdf_question_agent_requires_extracted_text() -> None: @pytest.mark.anyio -async def test_pdf_question_agent_returns_grounded_answer() -> None: +async def test_pdf_question_agent_returns_grounded_answer(runtime: AppRuntime) -> None: agent = StubPdfQuestionAgent( + runtime, PdfQuestionAnswerResponse( answer="The invoice total is 120.00.", evidence=[invoice_page()], - ) + ), ) response = await agent.handle( @@ -76,8 +67,8 @@ async def test_pdf_question_agent_returns_grounded_answer() -> None: @pytest.mark.anyio -async def test_pdf_question_agent_returns_not_found_when_text_is_insufficient() -> None: - agent = StubPdfQuestionAgent(PdfQuestionNotFoundResponse(reason="The answer is not present in the text.")) +async def test_pdf_question_agent_returns_not_found_when_text_is_insufficient(runtime: AppRuntime) -> None: + agent = StubPdfQuestionAgent(runtime, PdfQuestionNotFoundResponse(reason="The answer is not present in the text.")) response = await agent.handle( PdfQuestionRequest( diff --git a/engine/tests/test_stirling_api.py b/engine/tests/test_stirling_api.py index 774fb28d9d..b002f389b0 100644 --- a/engine/tests/test_stirling_api.py +++ b/engine/tests/test_stirling_api.py @@ -1,3 +1,4 @@ +from conftest import build_app_settings from fastapi.testclient import TestClient from stirling.api import app @@ -8,7 +9,7 @@ from stirling.api.dependencies import ( get_pdf_question_agent, get_user_spec_agent, ) -from stirling.config import AppSettings, load_settings +from stirling.config import load_settings from stirling.contracts import ( AgentDraft, AgentDraftRequest, @@ -27,16 +28,6 @@ from stirling.contracts import ( from stirling.models.tool_models import RotateParams -class StubSettingsProvider: - def __call__(self) -> AppSettings: - return AppSettings( - smart_model_name="test", - fast_model_name="test", - smart_model_max_tokens=8192, - fast_model_max_tokens=2048, - ) - - class StubOrchestratorAgent: async def handle(self, request: OrchestratorRequest) -> PdfQuestionNeedContentResponse: return PdfQuestionNeedContentResponse(reason=request.user_message, files=[], max_pages=1, max_characters=1000) @@ -72,41 +63,16 @@ class StubExecutionPlanningAgent: return CannotContinueExecutionAction(reason=str(request.current_step_index)) +app.dependency_overrides[load_settings] = build_app_settings +app.dependency_overrides[get_orchestrator_agent] = lambda: StubOrchestratorAgent() +app.dependency_overrides[get_pdf_edit_agent] = lambda: StubPdfEditAgent() +app.dependency_overrides[get_pdf_question_agent] = lambda: StubPdfQuestionAgent() +app.dependency_overrides[get_user_spec_agent] = lambda: StubUserSpecAgent() +app.dependency_overrides[get_execution_planning_agent] = lambda: StubExecutionPlanningAgent() + client: TestClient = TestClient(app) -def override_settings() -> AppSettings: - return StubSettingsProvider()() - - -def override_orchestrator_agent() -> StubOrchestratorAgent: - return StubOrchestratorAgent() - - -def override_pdf_edit_agent() -> StubPdfEditAgent: - return StubPdfEditAgent() - - -def override_pdf_question_agent() -> StubPdfQuestionAgent: - return StubPdfQuestionAgent() - - -def override_user_spec_agent() -> StubUserSpecAgent: - return StubUserSpecAgent() - - -def override_execution_agent() -> StubExecutionPlanningAgent: - return StubExecutionPlanningAgent() - - -app.dependency_overrides[load_settings] = override_settings -app.dependency_overrides[get_orchestrator_agent] = override_orchestrator_agent -app.dependency_overrides[get_pdf_edit_agent] = override_pdf_edit_agent -app.dependency_overrides[get_pdf_question_agent] = override_pdf_question_agent -app.dependency_overrides[get_user_spec_agent] = override_user_spec_agent -app.dependency_overrides[get_execution_planning_agent] = override_execution_agent - - def test_health_route() -> None: response = client.get("/health") diff --git a/engine/tests/test_stirling_contracts.py b/engine/tests/test_stirling_contracts.py index 63e283a641..b56e420764 100644 --- a/engine/tests/test_stirling_contracts.py +++ b/engine/tests/test_stirling_contracts.py @@ -1,8 +1,4 @@ -from collections.abc import Iterator - -import pytest - -from stirling.config import AppSettings, load_settings +from stirling.config import AppSettings from stirling.contracts import ( AgentExecutionRequest, AgentSpec, @@ -76,19 +72,15 @@ def test_pdf_question_answer_defaults_evidence_list() -> None: assert response.evidence == [] -@pytest.fixture(autouse=True) -def clear_settings_cache() -> Iterator[None]: - load_settings.cache_clear() - yield - load_settings.cache_clear() - - def test_app_settings_accepts_model_configuration() -> None: settings = AppSettings( smart_model_name="claude-sonnet-4-5-20250929", fast_model_name="claude-haiku-4-5-20251001", smart_model_max_tokens=8192, fast_model_max_tokens=2048, + posthog_enabled=False, + posthog_api_key="", + posthog_host="https://eu.i.posthog.com", ) assert settings.smart_model_name diff --git a/engine/tests/test_user_spec_agent.py b/engine/tests/test_user_spec_agent.py index fee8595b6f..bc492b179e 100644 --- a/engine/tests/test_user_spec_agent.py +++ b/engine/tests/test_user_spec_agent.py @@ -4,7 +4,6 @@ import pytest from pydantic import ValidationError from stirling.agents import UserSpecAgent -from stirling.config import AppSettings from stirling.contracts import ( AgentDraft, AgentDraftRequest, @@ -15,21 +14,12 @@ from stirling.contracts import ( ToolOperationStep, ) from stirling.models.tool_models import CompressParams, OperationId, RotateParams -from stirling.services import build_runtime - - -def build_test_settings() -> AppSettings: - return AppSettings( - smart_model_name="test", - fast_model_name="test", - smart_model_max_tokens=8192, - fast_model_max_tokens=2048, - ) +from stirling.services.runtime import AppRuntime class StubUserSpecAgent(UserSpecAgent): - def __init__(self, draft_result: AgentDraft, revision_result: AgentDraft) -> None: - super().__init__(build_runtime(build_test_settings())) + def __init__(self, runtime: AppRuntime, draft_result: AgentDraft, revision_result: AgentDraft) -> None: + super().__init__(runtime) self.draft_result = draft_result self.revision_result = revision_result self.edit_plan = EditPlanResponse( @@ -53,8 +43,8 @@ class StubUserSpecAgent(UserSpecAgent): class ClarifyingUserSpecAgent(UserSpecAgent): - def __init__(self) -> None: - super().__init__(build_runtime(build_test_settings())) + def __init__(self, runtime: AppRuntime) -> None: + super().__init__(runtime) async def _build_edit_plan(self, user_message: str) -> EditClarificationRequest: return EditClarificationRequest( @@ -64,8 +54,9 @@ class ClarifyingUserSpecAgent(UserSpecAgent): @pytest.mark.anyio -async def test_user_spec_agent_drafts_agent_spec() -> None: +async def test_user_spec_agent_drafts_agent_spec(runtime: AppRuntime) -> None: agent = StubUserSpecAgent( + runtime, AgentDraft( name="Invoice Cleanup", description="Prepare invoices for review.", @@ -100,7 +91,7 @@ async def test_user_spec_agent_drafts_agent_spec() -> None: @pytest.mark.anyio -async def test_user_spec_agent_revises_existing_draft() -> None: +async def test_user_spec_agent_revises_existing_draft(runtime: AppRuntime) -> None: current_draft = AgentDraft( name="Invoice Cleanup", description="Prepare invoices for review.", @@ -113,6 +104,7 @@ async def test_user_spec_agent_revises_existing_draft() -> None: ], ) agent = StubUserSpecAgent( + runtime, draft_result=current_draft, revision_result=AgentDraft( name="Invoice Cleanup", @@ -152,8 +144,8 @@ def test_tool_operation_step_rejects_mismatched_parameters() -> None: @pytest.mark.anyio -async def test_user_spec_agent_propagates_edit_clarification() -> None: - agent = ClarifyingUserSpecAgent() +async def test_user_spec_agent_propagates_edit_clarification(runtime: AppRuntime) -> None: + agent = ClarifyingUserSpecAgent(runtime) response = await agent.draft(AgentDraftRequest(user_message="Build an agent to rotate some pages.")) diff --git a/engine/uv.lock b/engine/uv.lock index 3f29481bac..c54957bf92 100644 --- a/engine/uv.lock +++ b/engine/uv.lock @@ -194,6 +194,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/53/23/b65f568ed0c22f1efacb744d2db1a33c8068f384b8c9b482b52ebdbc3ef6/authlib-1.6.9-py2.py3-none-any.whl", hash = "sha256:f08b4c14e08f0861dc18a32357b33fbcfd2ea86cfe3fe149484b4d764c4a0ac3", size = 244197, upload-time = "2026-03-02T07:44:00.307Z" }, ] +[[package]] +name = "backoff" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/d7/5bbeb12c44d7c4f2fb5b56abce497eb5ed9f34d85701de869acedd602619/backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba", size = 17001, upload-time = "2022-10-05T19:19:32.061Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" }, +] + [[package]] name = "beartype" version = "0.22.9" @@ -531,6 +540,8 @@ version = "0.1.0" source = { editable = "." } dependencies = [ { name = "fastapi" }, + { name = "opentelemetry-sdk" }, + { name = "posthog" }, { name = "pydantic" }, { name = "pydantic-ai" }, { name = "pydantic-settings" }, @@ -548,6 +559,8 @@ dev = [ [package.metadata] requires-dist = [ { name = "fastapi", specifier = ">=0.116.0" }, + { name = "opentelemetry-sdk", specifier = ">=1.39.0" }, + { name = "posthog", specifier = ">=3.0.0" }, { name = "pydantic", specifier = ">=2.0.0" }, { name = "pydantic-ai", specifier = ">=1.67.0" }, { name = "pydantic-settings", specifier = ">=2.0.0" }, @@ -1581,6 +1594,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "posthog" +version = "7.9.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "backoff" }, + { name = "distro" }, + { name = "python-dateutil" }, + { name = "requests" }, + { name = "six" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1c/a7/2865487853061fbd62383492237b546d2d8f7c1846272350d2b9e14138cd/posthog-7.9.12.tar.gz", hash = "sha256:ebabf2eb2e1c1fbf22b0759df4644623fa43cc6c9dcbe9fd429b7937d14251ec", size = 176828, upload-time = "2026-03-12T09:01:15.184Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/65/a9/7a803aed5a5649cf78ea7b31e90d0080181ba21f739243e1741a1e607f1f/posthog-7.9.12-py3-none-any.whl", hash = "sha256:7175bd1698a566bfea98a016c64e3456399f8046aeeca8f1d04ae5bf6c5a38d0", size = 202469, upload-time = "2026-03-12T09:01:13.38Z" }, +] + [[package]] name = "prompt-toolkit" version = "3.0.52"