Add SaaS AI engine (#5907)

This commit is contained in:
James Brunton
2026-03-16 11:01:50 +00:00
committed by GitHub
parent cddc8e6df0
commit c58a6092ec
182 changed files with 29961 additions and 3 deletions

View File

@@ -0,0 +1,3 @@
from .routes import register_edit_routes
__all__ = ["register_edit_routes"]

View File

@@ -0,0 +1,96 @@
"""
Confirmation intent classification during AWAITING_CONFIRM state.
CRITICAL: Prevents misexecution when user changes mind during confirmation.
"""
from config import FAST_MODEL
from llm_utils import run_ai
from models import ChatMessage, ConfirmationAnswer, ConfirmationIntent
from models.tool_models import OperationId
from prompts import confirmation_intent_system_prompt, confirmation_question_system_prompt
def classify_confirmation_intent(
message: str,
pending_plan_summary: str,
history: list[ChatMessage],
*,
session_id: str | None = None,
) -> ConfirmationIntent | None:
"""
Classify user intent during confirmation phase.
CRITICAL: This prevents misexecution when user changes mind.
Returns:
- confirm: User agrees, execute plan
- cancel: User cancels, clear plan
- modify: User wants to change the plan (we'll clear + replan)
- new_request: User wants something different (clear + route as fresh)
- question: User asks about the plan (answer without executing)
Examples:
"yes" → confirm
"cancel" → cancel
"actually delete page 7" → modify
"never mind, compress it" → new_request
"what will this do?" → question
Implementation notes:
- For "modify": We implement minimal safe behavior (clear + replan)
- No complex patching needed - just ensure old plan never executes
"""
system_prompt = confirmation_intent_system_prompt(pending_plan_summary)
messages = [ChatMessage(role="system", content=system_prompt)]
messages.extend(history[-3:]) # Last few messages for context
messages.append(ChatMessage(role="user", content=message))
decision = run_ai(
FAST_MODEL,
messages,
ConfirmationIntent,
tag="edit_confirmation_intent",
log_label="edit-confirmation-intent",
log_exchange=True,
session_id=session_id,
)
return decision
def answer_confirmation_question(
question: str,
plan_summary: str,
operations: list[OperationId],
history: list[ChatMessage],
*,
session_id: str | None = None,
) -> str:
"""
Answer user's question about pending plan without executing.
Args:
question: User's question
plan_summary: Summary of pending plan
operations: Operation objects for details
history: Conversation history
session_id: Session ID for logging
Returns:
Answer to user's question
"""
system_prompt = confirmation_question_system_prompt(plan_summary, operations)
messages = [ChatMessage(role="system", content=system_prompt)]
messages.extend(history[-3:])
messages.append(ChatMessage(role="user", content=question))
response = run_ai(
FAST_MODEL,
messages,
ConfirmationAnswer,
tag="edit_confirmation_question",
log_label="edit-confirmation-question",
log_exchange=True,
session_id=session_id,
)
if response and response.message:
return response.message.strip()
raise RuntimeError("AI confirmation question response failed.")

View File

@@ -0,0 +1,199 @@
from models import PdfPreflight
from models.tool_models import OperationId
REQUIRED_CLARIFICATIONS = {
"removePassword": ["password"],
"deletePages": ["pageNumbers"],
}
DESTRUCTIVE_OPERATIONS = {
"removePassword": "This will remove all security from your PDF.",
"sanitize": "This will remove all metadata and hidden content.",
"flatten": "This will convert all form fields to static content (irreversible).",
"deletePages": "This will permanently delete the specified pages.",
}
DEFAULT_OPERATION_OVERRIDES = {
"addPageNumbers": {
"fontType": "times",
"position": 8,
"pageNumbers": "all",
"pagesToNumber": "all",
"customMargin": "medium",
"customText": "{n}",
},
"processPdfWithOCR": {
"languages": ["eng"],
"ocrType": "skip-text",
"ocrRenderType": "hocr",
},
"optimizePdf": {
"optimizeLevel": 6,
"grayscale": False,
"linearize": False,
"normalize": False,
},
"removeBlankPages": {
"threshold": 10,
"whitePercent": 95,
},
}
# Risk policy table: Single source of truth for operation risk assessment
OPERATION_RISK_POLICY = [
# High risk - destructive content removal (always confirm)
{
"op": "deletePages",
"risk": "high",
"always_confirm": True,
"reason": "destructive content removal",
"warning": "This will permanently delete the specified pages.",
},
{
"op": "removePassword",
"risk": "high",
"always_confirm": True,
"reason": "removes all security",
"warning": "This will remove all security from your PDF.",
},
{
"op": "sanitize",
"risk": "high",
"always_confirm": True,
"reason": "removes metadata and hidden content",
"warning": "This will remove all metadata and hidden content.",
},
{
"op": "flatten",
"risk": "high",
"always_confirm": True,
"reason": "irreversible form field conversion",
"warning": "This will convert all form fields to static content (irreversible).",
},
# Medium risk - lossy transformations
{
"op": "optimizePdf",
"risk": "medium",
"always_confirm": False,
"reason": "lossy compression",
"confirm_if": lambda preflight: (preflight.file_size_mb or 0) > 50, # > 50MB
},
{
"op": "processPdfWithOCR",
"risk": "medium",
"always_confirm": False,
"reason": "may alter text layer",
},
{
"op": "extractImages",
"risk": "medium",
"always_confirm": False,
"reason": "creates derivative content",
},
# Low risk - non-destructive transformations
{
"op": "rotatePDF",
"risk": "low",
"always_confirm": False,
},
{
"op": "splitPdf",
"risk": "low",
"always_confirm": False,
},
{
"op": "mergePdfs",
"risk": "low",
"always_confirm": False,
},
{
"op": "addPageNumbers",
"risk": "low",
"always_confirm": False,
},
{
"op": "addWatermark",
"risk": "low",
"always_confirm": False,
},
]
def get_operation_risk(operation_id: OperationId, preflight: PdfPreflight | None = None) -> dict:
"""
Get risk assessment for operation.
Returns:
{
"level": "low" | "medium" | "high",
"reason": "...",
"should_confirm": bool,
"warning": "..." (if high risk)
}
"""
preflight = preflight or PdfPreflight()
for policy in OPERATION_RISK_POLICY:
if policy["op"] == operation_id:
should_confirm = policy.get("always_confirm", False)
# Check conditional confirmation
if not should_confirm and "confirm_if" in policy:
confirm_fn = policy["confirm_if"]
if callable(confirm_fn):
should_confirm = confirm_fn(preflight)
return {
"level": policy["risk"],
"reason": policy.get("reason", ""),
"should_confirm": should_confirm,
"warning": policy.get("warning"),
}
# Default: assume low risk
return {
"level": "low",
"reason": "",
"should_confirm": False,
"warning": None,
}
def assess_plan_risk(operation_ids: list[OperationId], preflight: PdfPreflight | None = None) -> dict:
"""
Assess combined risk for multiple operations.
Args:
operation_ids: List of operation IDs in plan
preflight: File metadata
Returns:
{
"level": "low" | "medium" | "high",
"reasons": [list of risk reasons],
"should_confirm": bool (if any op requires confirmation)
}
"""
risks = [get_operation_risk(op_id, preflight) for op_id in operation_ids]
# Highest risk level wins
if any(r["level"] == "high" for r in risks):
level = "high"
elif any(r["level"] == "medium" for r in risks):
level = "medium"
else:
level = "low"
# Multi-op with any high risk should confirm
should_confirm = len(operation_ids) > 1 and level == "high"
# Or any single op that always requires confirmation
should_confirm = should_confirm or any(r["should_confirm"] for r in risks)
reasons = [r["reason"] for r in risks if r["reason"]]
return {
"level": level,
"reasons": reasons,
"should_confirm": should_confirm,
}

View File

@@ -0,0 +1,130 @@
import textwrap
from dataclasses import asdict
from config import FAST_MODEL
from file_processing_agent import ToolCatalogService
from llm_utils import run_ai
from models import AskUserMessage, ChatMessage, DefaultsDecision, IntentDecision
from prompts import (
edit_defaults_decision_system_prompt,
edit_info_system_prompt,
edit_intent_classification_system_prompt,
)
def wants_defaults(message: str, session_id: str | None = None) -> bool:
system_prompt = edit_defaults_decision_system_prompt()
messages = [
ChatMessage(role="system", content=system_prompt),
ChatMessage(role="user", content=message),
]
decision = run_ai(
FAST_MODEL,
messages,
DefaultsDecision,
tag="edit_defaults_decision",
log_label="edit-defaults-decision",
log_exchange=True,
session_id=session_id,
)
return decision.use_defaults
def classify_edit_intent(
message: str,
history: list[ChatMessage],
*,
session_id: str | None = None,
) -> IntentDecision | None:
system_prompt = edit_intent_classification_system_prompt()
messages = [ChatMessage(role="system", content=system_prompt)]
messages.extend(history)
messages.append(ChatMessage(role="user", content=message))
decision = run_ai(
FAST_MODEL,
messages,
IntentDecision,
tag="edit_intent_decision",
log_label="edit-intent-decision",
log_exchange=True,
session_id=session_id,
)
return decision
def answer_conversational_info(
message: str,
history: list[ChatMessage],
tool_catalog: ToolCatalogService,
*,
session_id: str | None = None,
) -> str:
"""Handle conversational queries without files (greetings, help requests, capability questions)."""
selection_index = tool_catalog.build_selection_index()
system_instructions = textwrap.dedent("""\
Answer the user's question about capabilities.
Be friendly, clear, and helpful.
This system can:
1. Edit PDF files - compress, merge, split, rotate, watermark, OCR, convert, add security, and many more operations
2. Create new PDF documents - generate professional documents from descriptions (business proposals, reports, resumes, etc.)
3. Create smart folders - set up automated PDF processing workflows that run on uploaded files
If the user is greeting you (hello, hi, hey), respond warmly and briefly explain what you can help with.
If asking about capabilities (what can you do, help), provide a clear overview of all three main features.
For PDF editing questions, reference the available tools from the tool_catalog below.
Use bullets when listing multiple options.
Keep responses concise but informative.
Encourage them to upload a PDF to edit it, or ask to create a new document.
Do not mention session IDs, technical details, or backend concepts.
""").strip()
system_payload = {
"instructions": system_instructions,
"tool_catalog": [asdict(entry) for entry in selection_index],
}
messages = [
ChatMessage(role="system", content=[system_payload]),
*history,
ChatMessage(role="user", content=message),
]
response = run_ai(
FAST_MODEL,
messages,
AskUserMessage,
tag="conversational_info_response",
log_label="conversational-info-response",
log_exchange=True,
session_id=session_id,
)
return response.message
def answer_edit_info(
message: str,
history: list[ChatMessage],
file_name: str,
file_type: str | None,
tool_catalog: ToolCatalogService,
*,
session_id: str | None = None,
) -> str:
catalog_text = tool_catalog.build_catalog_prompt()
system_prompt = edit_info_system_prompt(file_name, file_type, catalog_text)
messages = [ChatMessage(role="system", content=system_prompt)]
messages.extend(history)
messages.append(ChatMessage(role="user", content=message))
response = run_ai(
FAST_MODEL,
messages,
AskUserMessage,
tag="edit_info_response",
log_label="edit-info-response",
log_exchange=True,
session_id=session_id,
)
if response and response.message.strip():
return response.message.strip()
raise RuntimeError("AI edit info response failed.")

View File

@@ -0,0 +1,11 @@
"""Custom exceptions for the editing module."""
class InsufficientCreditsError(Exception):
"""Raised when an operation is blocked due to insufficient credits."""
def __init__(self, status_code: int = 429, error_body: str = "", error_json: dict | None = None):
self.status_code = status_code
self.error_body = error_body
self.error_json = error_json or {}
super().__init__(f"Insufficient credits (HTTP {status_code})")

View File

@@ -0,0 +1,676 @@
import json
import logging
import mimetypes
import os
import uuid
from pathlib import Path
from flask import jsonify
from flask.typing import ResponseReturnValue
from werkzeug.datastructures import FileStorage
from werkzeug.security import safe_join
import analytics
import models
from config import OUTPUT_DIR
from file_processing_agent import ToolCatalogService
from .constants import assess_plan_risk, get_operation_risk
from .decisions import (
answer_edit_info,
classify_edit_intent,
)
from .operations import (
answer_pdf_question,
apply_smart_defaults,
build_pdf_text_context,
build_plan_summary,
create_session_file,
format_disambiguation_question,
get_pdf_preflight,
sanitize_filename,
validate_operation_chain,
)
from .session_store import EditSession, EditSessionFile, EditSessionStore, PendingOperation, PendingPlan
from .state_router import route_message
logger = logging.getLogger(__name__)
class EditService:
def __init__(self, session_store: EditSessionStore, tool_catalog: ToolCatalogService) -> None:
self.sessions = session_store
self.tool_catalog = tool_catalog
self.edit_upload_dir = os.path.join(OUTPUT_DIR, "uploads")
def _strip_file_context_history(self, messages: list[models.ChatMessage]) -> list[models.ChatMessage]:
filtered: list[models.ChatMessage] = []
for msg in messages:
content = msg.content
if isinstance(content, list):
new_content = [
item for item in content if not (isinstance(item, dict) and item.get("type") == "file_context")
]
if not new_content:
continue
if new_content == content:
filtered.append(msg)
else:
filtered.append(models.ChatMessage(role=msg.role, content=new_content))
else:
filtered.append(msg)
return filtered
def _build_status_response(self, session: EditSession) -> ResponseReturnValue:
if session.file_path:
filename = os.path.relpath(session.file_path, OUTPUT_DIR)
response = models.EditMessageResponse(
assistant_message="",
result_file_url=f"/output/{filename}",
result_file_name=session.file_name,
result_files=[models.EditResultFile(url=f"/output/{filename}", name=session.file_name)],
)
return jsonify(response.model_dump(by_alias=True, exclude_none=True))
response = models.EditMessageResponse(assistant_message="")
return jsonify(response.model_dump(by_alias=True, exclude_none=True))
def _primary_file(self, session: EditSession) -> EditSessionFile | None:
if session.files:
return session.files[0]
if session.file_path:
return EditSessionFile(
file_id="primary",
file_path=session.file_path,
file_name=session.file_name,
file_type=session.file_type,
preflight=session.preflight,
)
return None
def _ensure_file_context(self, session: EditSession) -> None:
if not session.file_path:
return
if session.file_context and session.file_context_path == session.file_path:
return
context = build_pdf_text_context(session.file_path)
session.file_context = context
session.file_context_path = session.file_path
message = models.ChatMessage(role="assistant", content=[context])
for index in range(len(session.messages) - 1, -1, -1):
if session.messages[index].role == "user":
session.messages.insert(index, message)
return
session.messages.append(message)
def create_session(self, files: list[FileStorage]) -> ResponseReturnValue:
if not files:
return jsonify({"error": "Missing file upload"}), 400
session_id = str(uuid.uuid4())
session_files: list[EditSessionFile] = []
for index, file in enumerate(files):
original_name = sanitize_filename(file.filename or f"upload-{index + 1}.pdf")
extension = Path(original_name).suffix or ".pdf"
if extension.lower() != ".pdf":
return jsonify({"error": "Only PDF files are supported right now."}), 400
session_dir = os.path.join(OUTPUT_DIR, session_id)
os.makedirs(session_dir, exist_ok=True)
file_path = os.path.join(session_dir, original_name)
file.save(file_path)
# Create session file with proper type detection and preflight
session_file = create_session_file(
file_path=file_path,
file_name=original_name,
content_type=file.mimetype,
content_disposition=None,
)
session_files.append(session_file)
primary = session_files[0]
session = EditSession(
session_id=session_id,
file_path=primary.file_path,
file_name=primary.file_name,
file_type=primary.file_type,
preflight=primary.preflight,
files=session_files,
)
self.sessions.set(session)
page_counts = [
page_count for item in session_files if isinstance((page_count := item.preflight.page_count), int)
]
size_values = [
file_size_mb
for item in session_files
if isinstance((file_size_mb := item.preflight.file_size_mb), (int, float))
]
analytics.track_event(
user_id=session_id,
event_name="edit_session_created",
properties={
"session_id": session_id,
"file_count": len(session_files),
"total_pages": sum(page_counts),
"total_size_mb": round(sum(size_values), 2),
"has_text_layer": any(item.preflight.has_text_layer for item in session_files),
"has_encrypted": any(item.preflight.is_encrypted for item in session_files),
},
)
response = models.EditSessionResponse(
session_id=session_id,
file_name=primary.file_name,
file_type=primary.file_type,
)
return jsonify(response.model_dump(by_alias=True, exclude_none=True))
def add_attachment(self, session_id: str, name: str | None, file: FileStorage | None) -> ResponseReturnValue:
session = self.sessions.get(session_id)
if not session:
return jsonify({"error": "Edit session not found"}), 404
if not file or not name:
return jsonify({"error": "Missing attachment or name"}), 400
original_name = sanitize_filename(file.filename or "attachment")
file_type = file.mimetype or mimetypes.guess_type(original_name)[0]
extension = Path(original_name).suffix or ""
attachment_id = uuid.uuid4().hex
stored_name = f"{session_id}-attachment-{attachment_id}{extension}"
os.makedirs(self.edit_upload_dir, exist_ok=True)
file_path = safe_join(self.edit_upload_dir, stored_name)
if file_path is None:
return jsonify({"error": "Invalid file path"}), 400
file.save(file_path)
session.attachments[name] = EditSessionFile(
file_id=attachment_id,
file_path=file_path,
file_name=original_name,
file_type=file_type,
)
return jsonify({"name": name, "file_name": original_name})
def handle_message(self, session_id: str, payload: models.EditMessageRequest) -> ResponseReturnValue:
session = self.sessions.get(session_id)
if not session:
return jsonify({"error": "Edit session not found"}), 404
user_message = payload.message.strip()
if not user_message:
return jsonify({"error": "Message is required"}), 400
if payload.action == "status":
return self._build_status_response(session)
if payload.action in {"confirm", "cancel"} and not session.pending_plan:
response = models.EditMessageResponse(assistant_message="")
return jsonify(response.model_dump(by_alias=True, exclude_none=True))
# Add user message to history
session.messages.append(models.ChatMessage(role="user", content=user_message))
# NEW: Use state router for pending plan handling
if session.pending_plan:
routing_result = route_message(
session,
user_message,
self._strip_file_context_history(session.messages),
)
if routing_result.action == "execute":
if routing_result.plan is None:
assistant_message = "The pending plan could not be found. Please try again."
session.messages.append(models.ChatMessage(role="assistant", content=assistant_message))
response = models.EditMessageResponse(assistant_message=assistant_message)
return jsonify(response.model_dump(by_alias=True, exclude_none=True)), 500
# Consume the plan immediately to avoid duplicate confirm requests
session.pending_plan = None
# Execute the pending plan
return self._execute_pending_plan(session, routing_result.plan)
elif routing_result.action == "cancelled":
assistant_message = routing_result.message or "Cancelled. Let me know if you want to do something else."
# Consume the plan immediately to avoid duplicate cancel requests
session.pending_plan = None
session.messages.append(models.ChatMessage(role="assistant", content=assistant_message))
response = models.EditMessageResponse(assistant_message=assistant_message)
return jsonify(response.model_dump(by_alias=True, exclude_none=True))
elif routing_result.action == "answer_question":
assistant_message = routing_result.message or "Please confirm to proceed or cancel to stop."
session.messages.append(models.ChatMessage(role="assistant", content=assistant_message))
response = models.EditMessageResponse(assistant_message=assistant_message)
return jsonify(response.model_dump(by_alias=True, exclude_none=True))
elif routing_result.action == "already_executed":
assistant_message = routing_result.message or "This plan has already been executed."
session.messages.append(models.ChatMessage(role="assistant", content=assistant_message))
response = models.EditMessageResponse(assistant_message=assistant_message)
return jsonify(response.model_dump(by_alias=True, exclude_none=True))
elif routing_result.action == "route_fresh":
# Clear pending and continue to fresh request handling below
session.pending_plan = None
elif routing_result.action == "error":
assistant_message = routing_result.error or "Something went wrong. Please try again."
session.messages.append(models.ChatMessage(role="assistant", content=assistant_message))
response = models.EditMessageResponse(assistant_message=assistant_message)
return jsonify(response.model_dump(by_alias=True, exclude_none=True)), 500
# Repeat request handling - use atomic execution for consistency
if session.last_operation_id and self._is_repeat_request(user_message):
operation_id = session.last_operation_id
param_model = self.tool_catalog.get_operation(operation_id)
if not param_model:
session.last_operation_id = None
session.last_parameters = None
else:
parameters = apply_smart_defaults(
user_message,
session.last_parameters or param_model.model_validate({}),
)
# Create plan and execute atomically (same as new requests)
plan = PendingPlan(
state="AWAITING_CONFIRM",
ops=[PendingOperation(operation_id=operation_id, parameters=parameters)],
risk_level="low",
risk_reasons=[],
source_message=user_message,
)
return self._execute_pending_plan(session, plan)
intent = payload.edit_intent
if not intent:
intent = classify_edit_intent(
user_message,
self._strip_file_context_history(session.messages),
session_id=session.session_id,
)
if intent and intent.mode == "document_question":
primary = self._primary_file(session)
if not primary:
assistant_message = "I couldn't find a file in this session. Please upload a PDF first."
elif primary.preflight.has_text_layer is False:
assistant_message = "I couldn't read text in this PDF. Want me to run OCR first?"
else:
self._ensure_file_context(session)
assistant_message = answer_pdf_question(primary.file_path, user_message)
session.messages.append(models.ChatMessage(role="assistant", content=assistant_message))
response = models.EditMessageResponse(assistant_message=assistant_message)
return jsonify(response.model_dump(by_alias=True, exclude_none=True))
if intent and intent.mode in {"info", "ambiguous"}:
if intent.mode == "info":
if intent.requires_file_context:
self._ensure_file_context(session)
primary = self._primary_file(session)
if primary:
assistant_message = answer_pdf_question(primary.file_path, user_message)
else:
assistant_message = "I couldn't find a file in this session. Please upload a PDF first."
else:
assistant_message = answer_edit_info(
user_message,
self._strip_file_context_history(session.messages),
session.file_name,
session.file_type,
self.tool_catalog,
session_id=session.session_id,
)
else:
assistant_message = "Do you want me to run a tool on this file, or just explain the options?"
session.messages.append(models.ChatMessage(role="assistant", content=assistant_message))
response = models.EditMessageResponse(assistant_message=assistant_message, needs_more_info=True)
return jsonify(response.model_dump(by_alias=True, exclude_none=True))
if intent and intent.requires_file_context:
self._ensure_file_context(session)
selection_history = (
session.messages
if intent and intent.requires_file_context
else self._strip_file_context_history(session.messages)
)
selection = self.tool_catalog.select_edit_tool(
history=selection_history,
uploaded_files=[
models.UploadedFileInfo(name=item.file_name, type=item.file_type) for item in session.files
],
preflight=session.preflight,
session_id=session.session_id,
)
logger.info(
"[EDIT] selection action=%s operation_ids=%s",
selection.action,
selection.operation_ids,
)
selected_ops = self._selection_operations(session, selection, user_message, selection_history)
analytics.track_event(
user_id=session.session_id,
event_name="edit_tool_selected",
properties={
"session_id": session.session_id,
"selection_action": selection.action,
"operation_ids": [op_id for op_id, _ in selected_ops],
"operation_count": len(selected_ops),
"intent_mode": intent.mode if intent else None,
"has_file_context": bool(intent and intent.requires_file_context),
},
)
if selection.action == "call_tool" and not selected_ops:
logger.warning(
"[EDIT] selection has no operations session_id=%s message=%s payload=%s",
session.session_id,
user_message,
json.dumps(selection.model_dump(), ensure_ascii=True),
)
assistant_message = format_disambiguation_question()
session.messages.append(models.ChatMessage(role="assistant", content=assistant_message))
response = models.EditMessageResponse(
assistant_message=assistant_message,
needs_more_info=True,
)
return jsonify(response.model_dump(by_alias=True, exclude_none=True))
logger.info(
"[EDIT] selected_ops session_id=%s count=%s ops=%s",
session.session_id,
len(selected_ops),
[op_id for op_id, _ in selected_ops],
)
if selection.action == "ask_user":
if not selected_ops:
assistant_message = selection.response_message or "I could not find a matching tool for that request."
session.messages.append(models.ChatMessage(role="assistant", content=assistant_message))
response = models.EditMessageResponse(assistant_message=assistant_message, needs_more_info=True)
return jsonify(response.model_dump(by_alias=True, exclude_none=True))
# Use _handle_selected_ops for consistency - it handles missing params and PendingPlan creation
return self._handle_selected_ops(
selected_ops,
user_message=user_message,
session=session,
response_message=selection.response_message,
)
if selection.action != "call_tool" or not selected_ops:
assistant_message = selection.response_message or "I could not find a matching tool for that request."
logger.info(
"[EDIT] no_tool/no_ops action=%s message=%s",
selection.action,
assistant_message[:100] if assistant_message else None,
)
session.messages.append(models.ChatMessage(role="assistant", content=assistant_message))
response = models.EditMessageResponse(assistant_message=assistant_message)
return jsonify(response.model_dump(by_alias=True, exclude_none=True))
return self._handle_selected_ops(
selected_ops,
user_message=user_message,
session=session,
response_message=selection.response_message,
)
def _execute_pending_plan(self, session: EditSession, plan: PendingPlan) -> ResponseReturnValue:
"""
Convert pending plan into frontend-executable tool calls.
Marks plan as executed for idempotency.
"""
# Check idempotency
if plan.plan_id in session.executed_plan_ids:
assistant_message = "This operation has already been executed."
session.messages.append(models.ChatMessage(role="assistant", content=assistant_message))
response = models.EditMessageResponse(assistant_message=assistant_message)
return jsonify(response.model_dump(by_alias=True, exclude_none=True))
tool_calls: list[models.EditToolCall] = []
for pending_op in plan.ops:
param_model = self.tool_catalog.get_operation(pending_op.operation_id)
if not param_model:
continue
tool_calls.append(
models.EditToolCall(
operation_id=pending_op.operation_id,
parameters=pending_op.parameters,
)
)
if not tool_calls:
assistant_message = "I could not build a runnable tool plan. Please try rephrasing the request."
session.messages.append(models.ChatMessage(role="assistant", content=assistant_message))
response = models.EditMessageResponse(assistant_message=assistant_message)
return jsonify(response.model_dump(by_alias=True, exclude_none=True)), 500
session.executed_plan_ids.add(plan.plan_id)
session.pending_plan = None
if plan.ops:
session.last_operation_id = plan.ops[-1].operation_id
session.last_parameters = plan.ops[-1].parameters
execution_mode = "single" if len(tool_calls) == 1 else "pipeline"
pipeline_name = "AI Generated Pipeline" if execution_mode == "pipeline" else None
analytics.track_event(
user_id=session.session_id,
event_name="edit_plan_emitted_for_frontend_execution",
properties={
"session_id": session.session_id,
"operation_ids": [op.operation_id for op in plan.ops],
"operation_count": len(plan.ops),
"risk_level": plan.risk_level,
"risk_reasons_count": len(plan.risk_reasons),
"execution_mode": execution_mode,
},
)
session.messages.append(models.ChatMessage(role="assistant", content="Prepared tool plan for frontend"))
response = models.EditMessageResponse(
assistant_message="",
tool_calls=tool_calls,
execute_on_frontend=True,
frontend_plan=models.FrontendExecutionPlan(
mode=execution_mode,
steps=[
models.FrontendExecutionStep(
operation_id=call.operation_id,
parameters=call.parameters,
)
for call in tool_calls
],
pipeline_name=pipeline_name,
),
)
return jsonify(response.model_dump(by_alias=True, exclude_none=True))
def _selection_operations(
self,
session: EditSession,
selection: models.EditToolSelection,
user_message: str,
history: list[models.ChatMessage],
) -> list[tuple[models.tool_models.OperationId, models.tool_models.ParamToolModel | None]]:
ops: list[tuple[models.tool_models.OperationId, models.tool_models.ParamToolModel | None]] = []
for operation_id in selection.operation_ids:
param_model = self.tool_catalog.get_operation(operation_id)
if not param_model:
continue
params = self.tool_catalog.extract_operation_parameters(
operation_id=operation_id,
previous_operations=ops,
user_message=user_message,
history=history,
preflight=session.preflight,
session_id=session.session_id,
)
ops.append((operation_id, params))
return ops
def _is_repeat_request(self, message: str) -> bool:
value = message.strip().lower()
# Don't treat as repeat if user is requesting new/additional actions
# E.g., "compress and rotate again", "make it smaller, and rotate again"
action_words = [
"compress",
"optimize",
"smaller",
"larger",
"bigger",
"rotate",
"split",
"merge",
"delete",
"extract",
"add",
"remove",
"convert",
"repair",
"unlock",
"watermark",
"sign",
"flatten",
"ocr",
"searchable",
"linearize",
"grayscale",
]
if any(action in value for action in action_words):
# If message contains action words, parse it as a new request, not a repeat
return False
# Only treat as repeat if it's JUST asking to repeat with no new actions
return any(
phrase in value
for phrase in (
"do that again",
"do it again",
"repeat that",
"repeat it",
"redo that",
"redo it",
"same again",
"run again",
"try again",
)
)
def _handle_selected_ops(
self,
selected_ops: list[tuple[models.tool_models.OperationId, models.tool_models.ParamToolModel | None]],
user_message: str,
session: EditSession,
*,
response_message: str | None = None,
) -> ResponseReturnValue:
"""
Handle selected operations using execution and parameter completion.
Creates PendingPlan and routes through state machine.
"""
# Refresh preflight for up-to-date metadata (only for PDFs)
if session.files and session.files[0].file_path:
if session.files[0].file_type == "application/pdf":
session.preflight = get_pdf_preflight(session.files[0].file_path)
else:
session.preflight = models.PdfPreflight()
# Process each operation (first pass without forcing defaults)
pending_ops: list[PendingOperation] = []
operations: list[models.tool_models.OperationId] = []
for operation_id, raw_parameters in selected_ops:
param_model = self.tool_catalog.get_operation(operation_id)
if not param_model:
assistant_message = "I could not find that tool. Please try another request."
session.messages.append(models.ChatMessage(role="assistant", content=assistant_message))
response = models.EditMessageResponse(assistant_message=assistant_message)
return jsonify(response.model_dump(by_alias=True, exclude_none=True))
operations.append(operation_id)
# Apply defaults
parameters = apply_smart_defaults(
user_message,
raw_parameters or param_model.model_validate({}),
)
pending_ops.append(PendingOperation(operation_id=operation_id, parameters=parameters))
# Validate operation chain compatibility
validation = validate_operation_chain(operations)
if not validation.is_valid:
error_msg = validation.error_message or "Incompatible operation chain"
session.messages.append(models.ChatMessage(role="assistant", content=error_msg))
# Return structured data for frontend to format with translated names
response = models.EditMessageResponse(
assistant_message="", # Frontend will format from validation_error
result_json=(
{"validation_error": validation.error_data.model_dump(by_alias=True, mode="json")}
if validation.error_data
else None
),
)
return jsonify(response.model_dump(by_alias=True, exclude_none=True)), 400
# No missing params - assess risk
risk_assessment = assess_plan_risk(operations, session.preflight)
# Create pending plan or execute immediately
if risk_assessment.get("should_confirm"):
# Need confirmation - create AWAITING_CONFIRM plan
plan = PendingPlan(
state="AWAITING_CONFIRM",
ops=pending_ops,
risk_level=risk_assessment["level"],
risk_reasons=risk_assessment.get("reasons", []),
source_message=user_message,
)
session.pending_plan = plan
plan_summary = build_plan_summary(operations)
plan_summary += "\n\nConfirm to proceed or cancel to stop."
session.messages.append(models.ChatMessage(role="assistant", content=plan_summary))
# Build tool calls for preview
tool_calls = [
models.EditToolCall(
operation_id=op.operation_id,
parameters=op.parameters,
)
for op in pending_ops
]
# Get warning if high risk
warning = None
if len(operations) == 1:
op_risk = get_operation_risk(operations[0], session.preflight)
warning = op_risk.get("warning")
response = models.EditMessageResponse(
assistant_message=plan_summary,
confirmation_required=True,
warning=warning,
tool_calls=tool_calls,
)
return jsonify(response.model_dump(by_alias=True, exclude_none=True))
# Low risk - execute immediately using atomic execution
plan = PendingPlan(
state="AWAITING_CONFIRM", # Use confirm state but execute immediately
ops=pending_ops,
risk_level=risk_assessment["level"],
risk_reasons=risk_assessment.get("reasons", []),
source_message=user_message,
)
return self._execute_pending_plan(session, plan)

View File

@@ -0,0 +1,309 @@
import logging
import os
import re
import uuid
from dataclasses import dataclass
from typing import Any
from pypdf import PdfReader
from config import SMART_MODEL
from llm_utils import run_ai
from models import ChatMessage, IncompatibleChainError, OperationRef, PdfAnswer, PdfPreflight, tool_models
from pdf_text_editor import convert_pdf_to_text_editor_document
from prompts import pdf_qa_system_prompt
from .session_store import EditSessionFile
logger = logging.getLogger(__name__)
def sanitize_filename(filename: str) -> str:
cleaned = re.sub(r"[^a-zA-Z0-9._-]+", "_", filename or "")
return cleaned.strip("._") or "upload.pdf"
def infer_smart_defaults(
user_message: str,
parameters: tool_models.ParamToolModel,
) -> tool_models.ParamToolModel:
# TODO: Get rid of this function. It only works in English and shouldn't be necessary
params = parameters.model_copy()
text = user_message.lower()
if isinstance(params, tool_models.RotateParams):
desired_angle = 90
if any(word in text for word in ["right", "clockwise"]):
desired_angle = 90
elif any(word in text for word in ["left", "counter", "anticlockwise", "anti-clockwise"]):
desired_angle = 270
elif any(word in text for word in ["upside", "180"]):
desired_angle = 180
if params.angle not in {90, 180, 270}:
params.angle = desired_angle
return params
if isinstance(params, tool_models.OcrParams):
if any(word in text for word in ["searchable", "text layer", "text overlaid"]):
params.ocr_render_type = "sandwich"
elif any(word in text for word in ["hocr", "layout", "bounding boxes"]):
params.ocr_render_type = "hocr"
if any(word in text for word in ["spanish", "español", "espanol"]):
params.languages = ["spa"]
return params
if isinstance(params, tool_models.WatermarkParams):
if params.watermark_type is None:
has_text = bool(params.watermark_text)
has_image = params.watermark_image is not None
if has_image and not has_text:
params.watermark_type = "image"
else:
params.watermark_type = "text"
return params
return params
def format_disambiguation_question() -> str:
return (
"I can help with rotate, OCR (make searchable), compress, split, merge, extract, and more. "
"Which change do you want?"
)
# Operations that must be last in a chain — either because they produce non-PDF output,
# or because their output (e.g. an encrypted PDF) cannot be processed by subsequent operations.
TERMINAL_OPERATIONS = {
# Conversion operations (produce various file formats)
"pdfToCsv", # Produces CSV
"pdfToExcel", # Produces Excel
"pdfToHtml", # Produces HTML
"pdfToXml", # Produces XML
"pdfToText", # Produces plain text
"processPdfToRTForTXT", # Produces RTF/TXT
"convertPdfToCbr", # Produces CBR
"convertPdfToCbz", # Produces CBZ
# Analysis operations (produce JSON/Boolean responses)
"containsImage", # Returns Boolean
"containsText", # Returns Boolean
"getPdfInfo", # Returns JSON
"getBasicInfo", # Returns JSON
"getDocumentProperties", # Returns JSON
"getAnnotationInfo", # Returns JSON
"getFontInfo", # Returns JSON
"getFormFields", # Returns JSON
"getPageCount", # Returns JSON
"getPageDimensions", # Returns JSON
"getSecurityInfo", # Returns JSON
"pageCount", # Returns JSON
"pageRotation", # Returns JSON
"pageSize", # Returns JSON
"fileSize", # Returns JSON
"validateSignature", # Returns JSON
# Security — produces encrypted PDF that cannot be processed by subsequent operations
"addPassword",
}
@dataclass(frozen=True)
class ValidationResult:
"""Result of operation chain validation."""
is_valid: bool
error_message: str | None = None
error_data: IncompatibleChainError | None = None
def validate_operation_chain(operations: list[tool_models.OperationId]) -> ValidationResult:
"""
Validate that operation chain is compatible (output of N can be input to N+1).
Returns:
ValidationResult with is_valid, error_message, and error_data.
error_data contains structured info for frontend formatting with translated names.
"""
if len(operations) <= 1:
return ValidationResult(is_valid=True)
for i, operation_id in enumerate(operations[:-1]): # Check all except last
if operation_id in TERMINAL_OPERATIONS:
next_op_id = operations[i + 1]
# Return structured data for frontend to format with translated names
# Include path/method so frontend can use getToolFromToolCall() for lookup
error_data = IncompatibleChainError(
type="incompatible_chain",
current_operation=OperationRef(
operation_id=operation_id,
),
next_operation=OperationRef(
operation_id=next_op_id,
),
)
# Fallback message using summaries (in case frontend doesn't handle it)
current_name = operation_id
next_name = next_op_id
error_message = (
f"Cannot chain '{current_name}' with '{next_name}'. "
f"'{current_name}' must be the last operation in a chain. "
f"Please run '{current_name}' as the final operation, or remove it from the chain."
)
return ValidationResult(
is_valid=False,
error_message=error_message,
error_data=error_data,
)
return ValidationResult(is_valid=True)
def build_plan_summary(ops: list[tool_models.OperationId]) -> str:
if not ops:
return "I will run the requested tools."
if len(ops) == 1:
return f"I will run {ops[0]}."
return "I will run " + ", then ".join(ops) + "."
def get_pdf_preflight(file_path: str) -> PdfPreflight:
file_size = os.path.getsize(file_path)
reader = PdfReader(file_path)
is_encrypted = bool(reader.is_encrypted)
if reader.is_encrypted:
reader.decrypt("")
page_count = len(reader.pages)
text_found = False
for page in reader.pages[:2]:
extracted = page.extract_text()
if len(extracted.strip()) > 20:
text_found = True
break
return PdfPreflight(
file_size_mb=round(file_size / (1024 * 1024), 2),
is_encrypted=is_encrypted,
page_count=page_count,
has_text_layer=text_found,
)
def create_session_file(
file_path: str,
file_name: str,
content_type: str | None,
content_disposition: str | None = None,
) -> EditSessionFile:
"""
Create an EditSessionFile with proper type detection and preflight handling.
Only runs PDF preflight for actual PDF files. For non-PDF files, uses empty preflight dict.
Args:
file_path: Path to the file on disk
file_name: Default filename to use if not in content_disposition
content_type: MIME type from response (None defaults to application/octet-stream)
content_disposition: Content-Disposition header for filename extraction
Returns:
EditSessionFile with proper file_type and preflight data
"""
# Normalize content type (avoid defaulting to PDF)
normalized_content_type = content_type or "application/octet-stream"
file_type = normalized_content_type.split(";")[0].strip()
# Extract filename from content_disposition if available
derived_name = file_name
if content_disposition and "filename=" in content_disposition:
derived_name = content_disposition.split("filename=")[-1].strip('"')
# Only get PDF preflight for actual PDF files
preflight = get_pdf_preflight(file_path) if file_type == "application/pdf" else PdfPreflight()
return EditSessionFile(
file_id=uuid.uuid4().hex,
file_path=file_path,
file_name=derived_name,
file_type=file_type,
preflight=preflight,
)
def build_pdf_text_context(
file_path: str,
*,
max_pages: int = 12,
max_chars_per_page: int = 600,
max_total_chars: int = 4000,
) -> dict[str, Any]:
doc = convert_pdf_to_text_editor_document(file_path)
pages = doc.document.pages if doc else []
context_pages: list[dict[str, Any]] = []
total_chars = 0
for index, page in enumerate(pages[:max_pages]):
text_chunks = []
for elem in page.text_elements:
if elem.text:
text_chunks.append(str(elem.text))
combined = " ".join(text_chunks)
combined = " ".join(combined.split())
if not combined:
continue
snippet = combined[:max_chars_per_page]
total_chars += len(snippet)
if total_chars > max_total_chars:
break
context_pages.append({"page": index + 1, "text": snippet})
return {
"type": "file_context",
"page_count": len(pages),
"pages": context_pages,
}
def answer_pdf_question(file_path: str, question: str) -> str:
doc = convert_pdf_to_text_editor_document(file_path)
pages = doc.document.pages if doc else []
snippets: list[str] = []
for page in pages:
for elem in page.text_elements:
text = elem.text
if text:
snippets.append(str(text))
if not snippets:
raise RuntimeError("No readable text found in PDF.")
context = " ".join(snippets)
context = " ".join(context.split())
max_context = 10000
if len(context) > max_context:
context = context[:max_context]
system_prompt = pdf_qa_system_prompt() + "\nReturn JSON matching the provided schema."
user_prompt = f"Question: {question}\n\nPDF text:\n{context}"
messages = [
ChatMessage(role="system", content=system_prompt),
ChatMessage(role="user", content=user_prompt),
]
response = run_ai(
SMART_MODEL,
messages,
PdfAnswer,
tag="edit_pdf_answer",
max_tokens=500,
)
answer = response.answer.strip()
normalized_answer = re.sub(r"\s+", " ", answer).strip().lower()
normalized_context = re.sub(r"\s+", " ", context).strip().lower()
copied_context = bool(normalized_answer) and normalized_answer in normalized_context
if copied_context:
raise RuntimeError("AI answer echoed the source text.")
return answer
def apply_smart_defaults(
message: str,
parameters: tool_models.ParamToolModel,
) -> tool_models.ParamToolModel:
return infer_smart_defaults(message, parameters)

View File

@@ -0,0 +1,43 @@
from __future__ import annotations
from models import tool_models
def dump_params(params: tool_models.ParamToolModel | None) -> dict[str, object]:
if params is None:
return {}
return params.model_dump(by_alias=True, exclude_none=True)
def normalize_param_keys(
param_model: tool_models.ParamToolModelType | None,
data: dict[str, object],
) -> dict[str, object]:
if param_model is None or not data:
return data
field_map: dict[str, str] = {}
for name, field in param_model.model_fields.items():
alias = field.alias or name
field_map[name.lower()] = alias
field_map[alias.lower()] = alias
normalized: dict[str, object] = {}
for key, value in data.items():
mapped = field_map.get(key.lower())
normalized[mapped or key] = value
return normalized
def merge_param_updates(
param_model: tool_models.ParamToolModelType | None,
base: tool_models.ParamToolModel | None,
updates: dict[str, object],
) -> tool_models.ParamToolModel | None:
if param_model is None:
return None
data = dump_params(base)
data.update(updates)
if not data:
return None
normalized = normalize_param_keys(param_model, data)
return param_model.model_validate(normalized)

View File

@@ -0,0 +1,104 @@
import logging
import os
import subprocess
import uuid
from flask import Blueprint, jsonify, request
from werkzeug.security import safe_join
from config import OUTPUT_DIR
from file_processing_agent import ToolCatalogService
from llm_utils import AIProviderOverloadedError
from models import EditMessageRequest, PdfEditorUploadResponse
from pdf_text_editor import convert_pdf_to_text_editor_document
from .service import EditService
from .session_store import EditSessionStore
logger = logging.getLogger(__name__)
edit_blueprint = Blueprint("edit", __name__)
_edit_service = EditService(EditSessionStore(), ToolCatalogService())
def register_edit_routes(app) -> None:
app.register_blueprint(edit_blueprint)
def _json_body(model):
return model.model_validate(request.get_json(silent=True) or {})
@edit_blueprint.route("/api/edit/sessions", methods=["POST"])
def create_edit_session():
files = request.files.getlist("file")
return _edit_service.create_session(files)
@edit_blueprint.route("/api/edit/sessions/<session_id>/messages", methods=["POST"])
def edit_session_message(session_id: str):
payload = _json_body(EditMessageRequest)
try:
return _edit_service.handle_message(session_id, payload)
except AIProviderOverloadedError as exc:
logger.warning("[EDIT] AI provider overloaded session_id=%s (exc=%s)", session_id, exc)
response = {
"assistantMessage": "The AI service is temporarily unavailable. Please try again later.",
"needsMoreInfo": True,
}
return jsonify(response), 503
@edit_blueprint.route("/api/edit/sessions/<session_id>/attachments", methods=["POST"])
def edit_session_attachment(session_id: str):
name = request.form.get("name")
file = request.files.get("file")
return _edit_service.add_attachment(session_id, name, file)
@edit_blueprint.route("/api/pdf-editor/document", methods=["GET"])
def pdf_editor_document():
"""Expose a JSON snapshot of the PDF for rich text editing."""
pdf_url = request.args.get("pdfUrl")
if not pdf_url:
return jsonify({"error": "Missing pdfUrl"}), 400
filename = os.path.basename(pdf_url.split("?")[0])
if not filename:
return jsonify({"error": "Invalid pdf file"}), 400
if not filename.lower().endswith(".pdf"):
return jsonify({"error": "Invalid pdf file"}), 400
pdf_path = safe_join(OUTPUT_DIR, filename)
if pdf_path is None or not os.path.exists(pdf_path):
return jsonify({"error": "PDF not found"}), 404
try:
document = convert_pdf_to_text_editor_document(pdf_path)
return jsonify(document.model_dump(by_alias=True, exclude_none=True))
except FileNotFoundError:
return jsonify({"error": "Conversion failed"}), 500
except subprocess.CalledProcessError as exc:
logger.error("[PDF-EDITOR] Conversion failed: %s", exc)
return jsonify({"error": "Conversion failed"}), 500
except Exception as exc:
logger.error("[PDF-EDITOR] Unexpected conversion failure: %s", exc)
return jsonify({"error": "Conversion failed"}), 500
@edit_blueprint.route("/api/pdf-editor/upload", methods=["POST"])
def pdf_editor_upload():
"""Accept an edited PDF and save it so the preview can refresh."""
file = request.files.get("file")
if not file:
return jsonify({"error": "Missing file"}), 400
job_id = str(uuid.uuid4())
filename = f"{job_id}-edited.pdf"
output_path = os.path.join(OUTPUT_DIR, filename)
os.makedirs(OUTPUT_DIR, exist_ok=True)
file.save(output_path)
logger.info("[PDF-EDITOR] uploaded edited PDF job_id=%s -> %s", job_id, filename)
response = PdfEditorUploadResponse(pdf_url=f"/output/{filename}")
return jsonify(response.model_dump(by_alias=True, exclude_none=True))

View File

@@ -0,0 +1,3 @@
from .handlers import EditService
__all__ = ["EditService"]

View File

@@ -0,0 +1,81 @@
import time
import uuid
from dataclasses import dataclass, field
from typing import Any, Literal
from models import ChatMessage, PdfPreflight, tool_models
@dataclass
class EditSessionFile:
file_id: str
file_path: str
file_name: str
file_type: str | None
preflight: PdfPreflight = field(default_factory=PdfPreflight)
@dataclass
class PendingOperation:
"""Single operation in a pending plan."""
operation_id: tool_models.OperationId
parameters: tool_models.ParamToolModel
@dataclass
class PendingPlan:
"""
Unified pending plan for both awaiting params and awaiting confirmation.
This replaces the old separate pending_operations/pending_operation_id/pending_requirements.
"""
plan_id: str = field(default_factory=lambda: str(uuid.uuid4()))
state: Literal["AWAITING_CONFIRM"] = "AWAITING_CONFIRM"
ops: list[PendingOperation] = field(default_factory=list)
risk_level: str = "low"
risk_reasons: list[str] = field(default_factory=list)
created_at: float = field(default_factory=time.time)
source_message: str | None = None
@dataclass
class EditSession:
session_id: str
file_path: str
file_name: str
file_type: str | None
messages: list[ChatMessage] = field(default_factory=list)
# Unified pending plan
pending_plan: PendingPlan | None = None
# Last executed operation (for repeat requests)
last_operation_id: tool_models.OperationId | None = None
last_parameters: tool_models.ParamToolModel | None = None
# File metadata
preflight: PdfPreflight = field(default_factory=PdfPreflight)
files: list[EditSessionFile] = field(default_factory=list)
attachments: dict[str, EditSessionFile] = field(default_factory=dict)
# Document context (for Q&A)
file_context: dict[str, Any] | None = None
file_context_path: str | None = None
# Idempotency tracking
executed_plan_ids: set[str] = field(default_factory=set)
class EditSessionStore:
def __init__(self) -> None:
self._sessions: dict[str, EditSession] = {}
def get(self, session_id: str) -> EditSession | None:
return self._sessions.get(session_id)
def set(self, session: EditSession) -> None:
self._sessions[session.session_id] = session
def delete(self, session_id: str) -> None:
self._sessions.pop(session_id, None)

View File

@@ -0,0 +1,158 @@
"""
Explicit state machine router for edit flow.
Routes messages based on pending_plan.state: AWAITING_CONFIRM | None (fresh request).
"""
import logging
from dataclasses import dataclass
from typing import Any, Literal
from models import ChatMessage
from .confirmation import answer_confirmation_question, classify_confirmation_intent
from .operations import build_plan_summary
from .session_store import EditSession, PendingPlan
logger = logging.getLogger(__name__)
RoutingAction = Literal[
"execute",
"answer_question",
"route_fresh",
"error",
"already_executed",
"cancelled",
]
@dataclass
class StateRoutingResult:
"""Result of state routing - tells handler what to do next."""
action: RoutingAction
plan: PendingPlan | None = None
message: str | None = None
error: str | None = None
followup_intent: Any | None = None
keep_pending: bool | None = None
plan_id: str | None = None
def route_message(
session: EditSession,
user_message: str,
history: list[ChatMessage],
) -> StateRoutingResult:
"""
Route message based on pending_plan state.
State machine:
No pending_plan → route as fresh request
AWAITING_CONFIRM → handle confirmation (confirm/cancel/modify/new_request/question)
Returns:
StateRoutingResult with action and context
"""
if not session.pending_plan:
return StateRoutingResult(action="route_fresh")
if session.pending_plan.state == "AWAITING_CONFIRM":
return _handle_awaiting_confirm(session, user_message, history)
logger.error(f"[STATE_ROUTER] Unknown state: {session.pending_plan.state}")
return StateRoutingResult(action="error", error="Invalid pending plan state")
def _handle_awaiting_confirm(
session: EditSession,
user_message: str,
history: list[ChatMessage],
) -> StateRoutingResult:
"""
Handle message during AWAITING_CONFIRM state.
CRITICAL: Never ignore messages. Always classify intent.
Actions:
- confirm → execute plan
- cancel → clear plan
- modify/new_request → clear plan + route as fresh
- question → answer without executing
"""
plan = session.pending_plan
assert plan is not None
# Build plan summary for context
operations = [op.operation_id for op in plan.ops]
plan_summary = build_plan_summary(operations)
# Classify confirmation intent
intent = classify_confirmation_intent(
user_message,
plan_summary,
history,
session_id=session.session_id,
)
if not intent:
# Fallback: treat as question (safe default)
logger.warning("[STATE_ROUTER] No confirmation intent, defaulting to question")
intent_action = "question"
else:
intent_action = intent.action
logger.info(
f"[STATE_ROUTER] confirm_state session_id={session.session_id} intent={intent_action} plan_id={plan.plan_id}"
)
if intent_action == "confirm":
# Check idempotency
if plan.plan_id in session.executed_plan_ids:
return StateRoutingResult(
action="already_executed", plan_id=plan.plan_id, message="This plan has already been executed."
)
return StateRoutingResult(
action="execute",
plan=plan,
)
elif intent_action == "cancel":
# Clear pending plan
session.pending_plan = None
return StateRoutingResult(
action="cancelled", message="Cancelled. Let me know if you want to do something else."
)
elif intent_action in ("modify", "new_request"):
# Minimal safe behavior: clear + replan
# Don't try to patch - just treat as fresh request
logger.info(f"[STATE_ROUTER] {intent_action} detected, clearing plan and routing fresh")
session.pending_plan = None
return StateRoutingResult(
action="route_fresh",
message=None, # Don't add extra message, just route
)
elif intent_action == "question":
# Answer question without executing
answer = answer_confirmation_question(
user_message,
plan_summary,
operations,
history,
session_id=session.session_id,
)
return StateRoutingResult(
action="answer_question",
message=answer,
keep_pending=True, # Keep plan for later confirmation
)
else:
logger.warning(f"[STATE_ROUTER] Unknown confirmation intent: {intent_action}")
return StateRoutingResult(
action="answer_question",
message="I didn't understand that. Please confirm to proceed or cancel to stop.",
keep_pending=True,
)