diff --git a/.editorconfig b/.editorconfig index 872fe6c2ca..665a74a09a 100644 --- a/.editorconfig +++ b/.editorconfig @@ -15,6 +15,7 @@ max_line_length = 100 [*.py] indent_size = 4 +max_line_length = 120 [*.gradle] indent_size = 4 diff --git a/.github/workflows/ai-engine.yml b/.github/workflows/ai-engine.yml index c0cd927841..a90246f887 100644 --- a/.github/workflows/ai-engine.yml +++ b/.github/workflows/ai-engine.yml @@ -20,54 +20,79 @@ jobs: with: enable-cache: true + - name: Set up JDK 25 + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 + with: + java-version: "25" + distribution: "temurin" + + - name: Setup Gradle + uses: gradle/actions/setup-gradle@f29f5a9d7b09a7c6b29859002d29d24e1674c884 # v5.0.1 + with: + gradle-version: 9.3.1 + - name: Install Task uses: go-task/setup-task@3be4020d41929789a01026e0e427a4321ce0ad44 # v2.0.0 - - name: Run fixers - # Ignore errors here because we're going to add comments for them in the following steps before actually failing - run: task engine:fix || true + - name: Regenerate tool models + run: task engine:tool-models - - name: Check for fixer changes - id: fixer_changes + - name: Verify tool models are up to date run: | - if git diff --quiet; then - echo "changed=false" >> "$GITHUB_OUTPUT" - else - echo "changed=true" >> "$GITHUB_OUTPUT" + if ! git diff --exit-code engine/src/stirling/models/tool_models.py; then + echo "tool_models.py is out of date." + echo "Run 'task engine:tool-models' locally and commit the updated file." + exit 1 fi - - name: Post fixer suggestions - if: steps.fixer_changes.outputs.changed == 'true' && github.event_name == 'pull_request' - uses: reviewdog/action-suggester@v1 - continue-on-error: true - with: - tool_name: engine-make-fix - github_token: ${{ secrets.GITHUB_TOKEN }} - filter_mode: file - fail_level: any - level: info + - name: Run fixers + run: task engine:fix - - name: Comment on fixer suggestions - if: steps.fixer_changes.outputs.changed == 'true' && github.event_name == 'pull_request' + - name: Verify fixes are committed + id: fixer_changes + run: | + if ! git diff --quiet; then + git --no-pager diff --stat + echo "::error::There are issues with your Python code that will need to be fixed before they can be merged in. Run 'task engine:fix' to auto-fix what can be fixed automatically, then run 'task engine:check' to see what still needs fixing manually." + exit 1 + fi + + - name: Comment on fixer failures + if: steps.fixer_changes.outcome == 'failure' && github.event_name == 'pull_request' + continue-on-error: true uses: actions/github-script@v7 with: script: | - await github.rest.issues.createComment({ + const marker = ''; + const body = [ + marker, + '### Engine Check Failed', + '', + 'There are issues with your Python code that will need to be fixed before they can be merged in.', + '', + 'Run `task engine:fix` to auto-fix what can be fixed automatically, then run `task engine:check` to see what still needs fixing manually.', + ].join('\n'); + const { data: comments } = await github.rest.issues.listComments({ owner: context.repo.owner, repo: context.repo.repo, issue_number: context.issue.number, - body: "The Python code in your PR has formatting/linting issues. Consider running `task engine:fix` locally or setting up your editor's Ruff integration to auto-format and lint your files as you go, or commit the suggested changes on this PR.", }); - - - name: Verify fixer changes are committed - if: steps.fixer_changes.outputs.changed == 'true' - run: | - if ! git diff --exit-code; then - echo "Fixes are out of date." - echo "Apply the reviewdog suggestions or run 'task engine:fix' from the repo root and commit the updated files." - git --no-pager diff --stat - exit 1 - fi + const existing = comments.find(c => c.body.includes(marker)); + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body, + }); + } - name: Run linting run: task engine:lint diff --git a/.taskfiles/backend.yml b/.taskfiles/backend.yml index 3308e7d8b4..7e487d092c 100644 --- a/.taskfiles/backend.yml +++ b/.taskfiles/backend.yml @@ -66,10 +66,16 @@ tasks: swagger: desc: "Generate OpenAPI docs" cmds: - - cmd: cmd /c gradlew.bat :stirling-pdf:generateOpenApiDocs + - cmd: cmd /c gradlew.bat :stirling-pdf:copySwaggerDoc platforms: [windows] - - cmd: ./gradlew :stirling-pdf:generateOpenApiDocs + - cmd: ./gradlew :stirling-pdf:copySwaggerDoc platforms: [linux, darwin] + sources: + - app/core/src/main/java/**/*.java + - app/proprietary/src/main/java/**/*.java + - app/common/src/main/java/**/*.java + generates: + - SwaggerDoc.json check: desc: "Backend quality gate" diff --git a/.taskfiles/engine.yml b/.taskfiles/engine.yml index c23ed8d7cf..cb357a9dc5 100644 --- a/.taskfiles/engine.yml +++ b/.taskfiles/engine.yml @@ -13,7 +13,7 @@ tasks: status: - test -d .venv - prep: + prepare: desc: "Set up engine .env from template" deps: [install] cmds: @@ -26,7 +26,7 @@ tasks: run: desc: "Run engine server" - deps: [prep] + deps: [prepare] ignore_error: true dir: src env: @@ -36,7 +36,7 @@ tasks: dev: desc: "Start engine dev server with hot reload" - deps: [prep] + deps: [prepare] ignore_error: true dir: src env: @@ -76,7 +76,7 @@ tasks: test: desc: "Run tests" - deps: [prep] + deps: [prepare] cmds: - uv run pytest tests @@ -95,11 +95,15 @@ tasks: - task: test tool-models: - desc: "Generate tool_models.py from frontend TypeScript defs" - deps: [install] + desc: "Generate tool_models.py from Java OpenAPI spec (SwaggerDoc.json)" + deps: [install, ":backend:swagger"] cmds: - - uv run python scripts/generate_tool_models.py --output src/stirling/models/tool_models.py - - task: fix + - uv run python scripts/generate_tool_models.py --spec ../SwaggerDoc.json --output src/stirling/models/tool_models.py + sources: + - ../SwaggerDoc.json + - scripts/generate_tool_models.py + generates: + - src/stirling/models/tool_models.py clean: desc: "Clean build artifacts" diff --git a/AGENTS.md b/AGENTS.md index b97f114dc1..6fe3cacc99 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -29,6 +29,8 @@ This project uses [Task](https://taskfile.dev/) as a unified command runner. All - **Code formatting**: `task format` (or `task backend:format` for Java only) - **Full quality gate**: `task check` (runs lint + typecheck + test across all components) +After modifying any files in the project, you must run the relevant `task check` command that covers that area of the code. For example, when editing frontend files run `task frontend:check`; for Python engine files run `task engine:check`; for Java backend files run `task backend:check`. + ### Docker Development - **Build standard**: `task docker:build` (or `docker build -t stirling-pdf -f docker/embedded/Dockerfile .`) - **Build fat version**: `task docker:build:fat` @@ -68,8 +70,8 @@ Development for the AI engine happens in the `engine/` folder. The frontend call - `frontend/config/.env.saas.example` — SaaS-only vars - `frontend/config/.env.desktop.example` — desktop (Tauri)-only vars - Never use `|| 'hardcoded-fallback'` inline — put defaults in the example files -- `task frontend:prep` / `prep:saas` / `prep:desktop` auto-create the env files from examples on first run, and error if any required keys are missing -- Prep runs automatically as a dependency of all `dev*`, `build*`, and `desktop*` tasks +- `task frontend:prepare` / `prepare:saas` / `prepare:desktop` auto-create the env files from examples on first run, and error if any required keys are missing +- Prepare runs automatically as a dependency of all `dev*`, `build*`, and `desktop*` tasks - See `frontend/README.md#environment-variables` for full documentation #### Import Paths - CRITICAL diff --git a/Taskfile.yml b/Taskfile.yml index d6138a7ad4..4840a82697 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -44,7 +44,7 @@ tasks: desc: "Start backend + frontend + engine concurrently" deps: - backend:dev - - frontend:dev + - frontend:dev:prototypes - engine:dev # ============================================================ diff --git a/app/common/src/main/java/stirling/software/common/service/InternalApiClient.java b/app/common/src/main/java/stirling/software/common/service/InternalApiClient.java new file mode 100644 index 0000000000..ba53e7fdef --- /dev/null +++ b/app/common/src/main/java/stirling/software/common/service/InternalApiClient.java @@ -0,0 +1,184 @@ +package stirling.software.common.service; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.regex.Pattern; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.env.Environment; +import org.springframework.core.io.FileSystemResource; +import org.springframework.core.io.Resource; +import org.springframework.http.*; +import org.springframework.stereotype.Service; +import org.springframework.util.MultiValueMap; +import org.springframework.web.client.RequestCallback; +import org.springframework.web.client.RestTemplate; + +import jakarta.servlet.ServletContext; + +import lombok.extern.slf4j.Slf4j; + +import stirling.software.common.model.enumeration.Role; +import stirling.software.common.util.TempFile; +import stirling.software.common.util.TempFileManager; + +/** + * Dispatches HTTP POST requests to internal Stirling API endpoints via loopback. Used by + * PipelineProcessor and AiWorkflowService to execute tool operations programmatically without + * leaving the JVM network stack. + */ +@Service +@Slf4j +public class InternalApiClient { + + // Allowlist for internal dispatch. Matches a fixed namespace prefix, + // but rejects traversal (..), URL-encoding (%), query/fragment, backslashes, and any other + // character that could alter the resolved endpoint on the local Spring server. + private static final Pattern ALLOWED_ENDPOINT_PATH = + Pattern.compile("^/api/v1/(general|misc|security|convert|filter)(/[A-Za-z0-9_-]+)+$"); + + private final ServletContext servletContext; + private final UserServiceInterface userService; + private final TempFileManager tempFileManager; + private final Environment environment; + + public InternalApiClient( + ServletContext servletContext, + @Autowired(required = false) UserServiceInterface userService, + TempFileManager tempFileManager, + Environment environment) { + this.servletContext = servletContext; + this.userService = userService; + this.tempFileManager = tempFileManager; + this.environment = environment; + } + + /** + * POST to an internal API endpoint. The endpointPath must start with one of the allowed + * prefixes (e.g. {@code /api/v1/misc/compress-pdf}). + * + * @param endpointPath API path (e.g. {@code /api/v1/general/rotate-pdf}) + * @param body multipart form body (fileInput + parameters) + * @return response with the result file as a {@link TempFileResource} body + */ + public ResponseEntity post(String endpointPath, MultiValueMap body) { + validateUrl(endpointPath); + String url = getBaseUrl() + endpointPath; + + RestTemplate restTemplate = new RestTemplate(); + HttpHeaders headers = new HttpHeaders(); + String apiKey = getApiKeyForUser(); + if (apiKey != null && !apiKey.isEmpty()) { + headers.add("X-API-KEY", apiKey); + } + + HttpEntity> entity = new HttpEntity<>(body, headers); + RequestCallback requestCallback = restTemplate.httpEntityCallback(entity, Resource.class); + + return restTemplate.execute( + url, + HttpMethod.POST, + requestCallback, + response -> { + try { + TempFile tempFile = tempFileManager.createManagedTempFile("internal-api"); + Files.copy( + response.getBody(), + tempFile.getPath(), + java.nio.file.StandardCopyOption.REPLACE_EXISTING); + String filename = extractFilename(response.getHeaders()); + TempFileResource resource = new TempFileResource(tempFile, filename); + return ResponseEntity.status(response.getStatusCode()) + .headers(response.getHeaders()) + .body(resource); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + } + + /** + * Extract the filename from a response's {@code Content-Disposition} header. Returns {@code + * null} if the header is missing or has no filename. + */ + private static String extractFilename(HttpHeaders headers) { + String contentDisposition = headers.getFirst(HttpHeaders.CONTENT_DISPOSITION); + if (contentDisposition == null || contentDisposition.isBlank()) { + return null; + } + for (String part : contentDisposition.split(";")) { + String trimmed = part.trim(); + if (trimmed.startsWith("filename")) { + String[] kv = trimmed.split("=", 2); + if (kv.length != 2) { + continue; + } + String value = kv[1].trim().replace("\"", ""); + return URLDecoder.decode(value, StandardCharsets.UTF_8); + } + } + return null; + } + + private String getBaseUrl() { + // Resolve the port lazily so desktop mode (server.port=0, OS-assigned) dispatches to the + // actual bound port. Spring publishes local.server.port once the web server is up; fall + // back to the configured server.port for early calls (tests, non-web contexts). + String port = environment.getProperty("local.server.port"); + if (port == null) { + port = environment.getProperty("server.port", "8080"); + } + return "http://localhost:" + port + servletContext.getContextPath(); + } + + private String getApiKeyForUser() { + if (userService == null) return ""; + String username = userService.getCurrentUsername(); + if (username != null && !username.equals("anonymousUser")) { + return userService.getApiKeyForUser(username); + } + return userService.getApiKeyForUser(Role.INTERNAL_API_USER.getRoleId()); + } + + private void validateUrl(String endpointPath) { + if (endpointPath == null || !ALLOWED_ENDPOINT_PATH.matcher(endpointPath).matches()) { + log.warn("Blocked internal API request to disallowed path: {}", endpointPath); + throw new SecurityException( + "Internal API dispatch not permitted for endpoint: " + endpointPath); + } + } + + /** + * A {@link FileSystemResource} that holds a reference to its backing {@link TempFile}. + * + *

If a display filename is supplied (typically parsed from the upstream response's {@code + * Content-Disposition} header), it is returned from {@link #getFilename()} instead of the + * underlying temp file's path-based name. + */ + public static class TempFileResource extends FileSystemResource { + private final TempFile tempFile; + private final String displayFilename; + + public TempFileResource(TempFile tempFile) { + this(tempFile, null); + } + + public TempFileResource(TempFile tempFile, String displayFilename) { + super(tempFile.getFile()); + this.tempFile = tempFile; + this.displayFilename = displayFilename; + } + + public TempFile getTempFile() { + return tempFile; + } + + @Override + public String getFilename() { + return displayFilename != null ? displayFilename : super.getFilename(); + } + } +} diff --git a/app/common/src/main/java/stirling/software/common/service/ToolMetadataService.java b/app/common/src/main/java/stirling/software/common/service/ToolMetadataService.java new file mode 100644 index 0000000000..662878b741 --- /dev/null +++ b/app/common/src/main/java/stirling/software/common/service/ToolMetadataService.java @@ -0,0 +1,18 @@ +package stirling.software.common.service; + +/** Provides metadata about tool endpoints for internal dispatch. */ +public interface ToolMetadataService { + + /** Returns true if the given operation path accepts multiple input files. */ + boolean isMultiInput(String operationPath); + + /** + * Returns true when the endpoint's ZIP response is a transport for multiple typed results and + * should be unpacked: multi-output endpoints (Type:SIMO / Type:MIMO) and wrapper declarations + * such as {@code Output:ZIP-PDF} or {@code Output:IMAGE/ZIP}. + * + *

Returns false for a bare {@code Output:ZIP} (e.g. {@code get-attachments}), where the + * archive itself is the deliverable and should be kept packed. + */ + boolean shouldUnpackZipResponse(String operationPath); +} diff --git a/app/common/src/main/java/stirling/software/common/util/RegexPatternUtils.java b/app/common/src/main/java/stirling/software/common/util/RegexPatternUtils.java index de59f09c64..48e419a0ae 100644 --- a/app/common/src/main/java/stirling/software/common/util/RegexPatternUtils.java +++ b/app/common/src/main/java/stirling/software/common/util/RegexPatternUtils.java @@ -538,9 +538,9 @@ public final class RegexPatternUtils { getPattern("[^a-zA-Z0-9 ]"); // Input sanitization getPattern("[^a-zA-Z0-9]"); // Filename sanitization // API doc patterns - getPattern("Output:(\\w+)"); // precompiled single-escaped for runtime regex \w - getPattern("Input:(\\w+)"); - getPattern("Type:(\\w+)"); + getPattern("Output:\\s*(\\w+)"); + getPattern("Input:\\s*(\\w+)"); + getPattern("Type:\\s*(\\w+)"); log.debug("Pre-compiled {} common regex patterns", patternCache.size()); } @@ -552,19 +552,19 @@ public final class RegexPatternUtils { /* Pattern for matching Output: in API descriptions */ public Pattern getApiDocOutputTypePattern() { - return getPattern("Output:(\\w+)"); + return getPattern("Output:\\s*(\\w+)"); } /* Pattern for matching Input: in API descriptions */ public Pattern getApiDocInputTypePattern() { - return getPattern("Input:(\\w+)"); + return getPattern("Input:\\s*(\\w+)"); } /** * Pattern for matching Type: in API descriptions */ public Pattern getApiDocTypePattern() { - return getPattern("Type:(\\w+)"); + return getPattern("Type:\\s*(\\w+)"); } /* Pattern for validating file extensions (2-4 alphanumeric, case-insensitive) */ diff --git a/app/common/src/main/java/stirling/software/common/util/ZipExtractionUtils.java b/app/common/src/main/java/stirling/software/common/util/ZipExtractionUtils.java new file mode 100644 index 0000000000..b49d1d2106 --- /dev/null +++ b/app/common/src/main/java/stirling/software/common/util/ZipExtractionUtils.java @@ -0,0 +1,142 @@ +package stirling.software.common.util; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Consumer; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +import org.springframework.core.io.FileSystemResource; +import org.springframework.core.io.Resource; + +import io.github.pixee.security.ZipSecurity; + +import lombok.experimental.UtilityClass; +import lombok.extern.slf4j.Slf4j; + +/** + * Helpers for detecting and extracting ZIP-formatted responses returned from Stirling API + * endpoints. Shared between {@code PipelineProcessor} and {@code AiWorkflowService} so both callers + * unpack ZIPs consistently (hardened against zip-slip, depth-limited, backed by managed temp + * files). + */ +@Slf4j +@UtilityClass +public class ZipExtractionUtils { + + private static final int MAX_UNZIP_DEPTH = 10; + private static final byte[] ZIP_MAGIC = {0x50, 0x4B, 0x03, 0x04}; + + /** + * Returns true if the resource starts with the standard ZIP magic bytes. CBZ files are + * explicitly treated as non-ZIP. + */ + public static boolean isZip(Resource data) throws IOException { + return isZip(data, null); + } + + /** + * Returns true if the resource starts with the standard ZIP magic bytes. Files named with the + * {@code .cbz} extension are excluded (handled separately by the comic viewer). + */ + public static boolean isZip(Resource data, String filename) throws IOException { + if (data == null || data.contentLength() < ZIP_MAGIC.length) { + return false; + } + if (filename != null && filename.toLowerCase().endsWith(".cbz")) { + return false; + } + try (InputStream is = data.getInputStream()) { + byte[] header = new byte[ZIP_MAGIC.length]; + if (is.read(header) < ZIP_MAGIC.length) { + return false; + } + for (int i = 0; i < ZIP_MAGIC.length; i++) { + if (header[i] != ZIP_MAGIC[i]) { + return false; + } + } + return true; + } + } + + /** + * Extract a ZIP resource into a flat list of resources, one per file entry. Nested ZIPs are + * recursively extracted up to {@link #MAX_UNZIP_DEPTH}. Each entry is materialized as a + * hardened-extracted managed temp file so downstream consumers can stream the bytes. + */ + public static List extractZip(Resource zip, TempFileManager tempFileManager) + throws IOException { + return extractZip(zip, tempFileManager, null); + } + + /** + * Extract a ZIP resource into a flat list of resources. Each created {@link TempFile} is also + * passed to {@code tempFileConsumer} when non-null, giving callers the option to register the + * temp files with an auxiliary lifecycle (e.g. {@code PipelineResult}). + */ + public static List extractZip( + Resource zip, TempFileManager tempFileManager, Consumer tempFileConsumer) + throws IOException { + return extractZipInternal(zip, tempFileManager, tempFileConsumer, 0); + } + + private static List extractZipInternal( + Resource zip, + TempFileManager tempFileManager, + Consumer tempFileConsumer, + int depth) + throws IOException { + if (depth > MAX_UNZIP_DEPTH) { + log.warn( + "ZIP nesting depth {} exceeds limit {}, treating as file", + depth, + MAX_UNZIP_DEPTH); + return List.of(zip); + } + log.debug("Unzipping data of length: {}", zip.contentLength()); + List extracted = new ArrayList<>(); + try (InputStream bais = zip.getInputStream(); + ZipInputStream zis = ZipSecurity.createHardenedInputStream(bais)) { + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + if (entry.isDirectory()) { + continue; + } + TempFile tempFile = tempFileManager.createManagedTempFile("unzip"); + if (tempFileConsumer != null) { + tempFileConsumer.accept(tempFile); + } + try (OutputStream os = Files.newOutputStream(tempFile.getPath())) { + byte[] buffer = new byte[4096]; + int count; + while ((count = zis.read(buffer)) != -1) { + os.write(buffer, 0, count); + } + } + final String filename = entry.getName(); + Resource fileResource = + new FileSystemResource(tempFile.getFile()) { + @Override + public String getFilename() { + return filename; + } + }; + if (isZip(fileResource, filename)) { + log.debug("Nested ZIP entry {} — recursing", filename); + extracted.addAll( + extractZipInternal( + fileResource, tempFileManager, tempFileConsumer, depth + 1)); + } else { + extracted.add(fileResource); + } + } + } + log.debug("Unzipping completed. {} files extracted.", extracted.size()); + return extracted; + } +} diff --git a/app/common/src/test/java/stirling/software/common/service/InternalApiClientTest.java b/app/common/src/test/java/stirling/software/common/service/InternalApiClientTest.java new file mode 100644 index 0000000000..f815e92e71 --- /dev/null +++ b/app/common/src/test/java/stirling/software/common/service/InternalApiClientTest.java @@ -0,0 +1,159 @@ +package stirling.software.common.service; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.*; +import static org.mockito.Mockito.*; + +import java.io.ByteArrayInputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.springframework.core.io.ByteArrayResource; +import org.springframework.core.io.Resource; +import org.springframework.http.*; +import org.springframework.http.client.ClientHttpResponse; +import org.springframework.mock.env.MockEnvironment; +import org.springframework.util.LinkedMultiValueMap; +import org.springframework.util.MultiValueMap; +import org.springframework.web.client.RequestCallback; +import org.springframework.web.client.ResponseExtractor; +import org.springframework.web.client.RestTemplate; + +import jakarta.servlet.ServletContext; + +import stirling.software.common.util.TempFile; +import stirling.software.common.util.TempFileManager; + +@ExtendWith(MockitoExtension.class) +class InternalApiClientTest { + + @Mock ServletContext servletContext; + @Mock UserServiceInterface userService; + @Mock TempFileManager tempFileManager; + + InternalApiClient client; + + @BeforeEach + void setUp() { + lenient().when(servletContext.getContextPath()).thenReturn(""); + MockEnvironment environment = new MockEnvironment().withProperty("server.port", "8080"); + client = new InternalApiClient(servletContext, userService, tempFileManager, environment); + } + + @Test + void postDoesNotForceContentType() throws Exception { + MultiValueMap body = new LinkedMultiValueMap<>(); + body.add("fileInput", namedResource("input.pdf", "data")); + + Path tempPath = Files.createTempFile("internal-api-test", ".tmp"); + TempFile tempFile = mock(TempFile.class); + when(tempFile.getPath()).thenReturn(tempPath); + when(tempFile.getFile()).thenReturn(tempPath.toFile()); + when(tempFileManager.createManagedTempFile("internal-api")).thenReturn(tempFile); + + HttpHeaders[] captured = {null}; + + try (var ignored = + mockConstruction( + RestTemplate.class, + (rt, ctx) -> { + when(rt.httpEntityCallback(any(), eq(Resource.class))) + .thenAnswer( + inv -> { + HttpEntity entity = inv.getArgument(0); + captured[0] = entity.getHeaders(); + return (RequestCallback) req -> {}; + }); + + when(rt.execute(anyString(), eq(HttpMethod.POST), any(), any())) + .thenAnswer(inv -> fakeOkResponse(inv.getArgument(3))); + })) { + + ResponseEntity response = client.post("/api/v1/general/merge-pdfs", body); + + assertNotNull(response); + assertEquals(HttpStatus.OK, response.getStatusCode()); + assertNotNull(response.getBody()); + assertNull(captured[0].getContentType(), "Content-Type should not be forced"); + } finally { + Files.deleteIfExists(tempPath); + } + } + + @Test + void postRejectsDisallowedPath() { + MultiValueMap body = new LinkedMultiValueMap<>(); + assertThrows(SecurityException.class, () -> client.post("/api/v1/admin/settings", body)); + } + + @Test + void postRejectsPathTraversal() { + MultiValueMap body = new LinkedMultiValueMap<>(); + assertThrows( + SecurityException.class, + () -> client.post("/api/v1/misc/../../actuator/env", body)); + } + + @Test + void postRejectsUrlEncodedCharacters() { + MultiValueMap body = new LinkedMultiValueMap<>(); + assertThrows( + SecurityException.class, () -> client.post("/api/v1/misc/%2e%2e/actuator", body)); + } + + @Test + void postRejectsQueryString() { + MultiValueMap body = new LinkedMultiValueMap<>(); + assertThrows( + SecurityException.class, + () -> client.post("/api/v1/misc/compress-pdf?redirect=evil", body)); + } + + @Test + void postRejectsEmptySegment() { + MultiValueMap body = new LinkedMultiValueMap<>(); + assertThrows(SecurityException.class, () -> client.post("/api/v1/misc//foo", body)); + } + + @Test + void postRejectsTrailingSlash() { + MultiValueMap body = new LinkedMultiValueMap<>(); + assertThrows(SecurityException.class, () -> client.post("/api/v1/misc/foo/", body)); + } + + @Test + void postRejectsNullPath() { + MultiValueMap body = new LinkedMultiValueMap<>(); + assertThrows(SecurityException.class, () -> client.post(null, body)); + } + + /** Create a ByteArrayResource with a filename (required for multipart). */ + private static Resource namedResource(String filename, String content) { + return new ByteArrayResource(content.getBytes(StandardCharsets.UTF_8)) { + @Override + public String getFilename() { + return filename; + } + }; + } + + /** Simulate a successful HTTP response through a RestTemplate ResponseExtractor. */ + @SuppressWarnings("unchecked") + private static ResponseEntity fakeOkResponse(Object extractorArg) throws Exception { + var extractor = (ResponseExtractor>) extractorArg; + ClientHttpResponse response = mock(ClientHttpResponse.class); + when(response.getBody()) + .thenReturn(new ByteArrayInputStream("ok".getBytes(StandardCharsets.UTF_8))); + HttpHeaders headers = new HttpHeaders(); + headers.add(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"out.pdf\""); + when(response.getHeaders()).thenReturn(headers); + lenient().when(response.getStatusCode()).thenReturn(HttpStatus.OK); + return extractor.extractData(response); + } +} diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/SplitPdfByChaptersController.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/SplitPdfByChaptersController.java index 784754b44c..e623cb6bdf 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/SplitPdfByChaptersController.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/SplitPdfByChaptersController.java @@ -124,7 +124,9 @@ public class SplitPdfByChaptersController { @MultiFileResponse @Operation( summary = "Split PDFs by Chapters", - description = "Splits a PDF into chapters and returns a ZIP file.") + description = + "Splits a PDF into chapters and returns a ZIP file. Input:PDF Output:ZIP-PDF" + + " Type:SISO") public ResponseEntity splitPdf( @ModelAttribute SplitPdfByChaptersRequest request) throws Exception { MultipartFile file = request.getFileInput(); diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java index 0f4401ee73..fde2cfa900 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java @@ -11,36 +11,26 @@ import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Map.Entry; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.core.io.FileSystemResource; import org.springframework.core.io.Resource; import org.springframework.http.*; import org.springframework.stereotype.Service; import org.springframework.util.LinkedMultiValueMap; import org.springframework.util.MultiValueMap; -import org.springframework.web.client.RequestCallback; -import org.springframework.web.client.RestTemplate; import org.springframework.web.multipart.MultipartFile; import io.github.pixee.security.Filenames; -import io.github.pixee.security.ZipSecurity; - -import jakarta.servlet.ServletContext; import lombok.extern.slf4j.Slf4j; -import stirling.software.SPDF.SPDFApplication; import stirling.software.SPDF.model.PipelineConfig; import stirling.software.SPDF.model.PipelineOperation; import stirling.software.SPDF.model.PipelineResult; import stirling.software.SPDF.service.ApiDocService; -import stirling.software.common.model.enumeration.Role; -import stirling.software.common.service.UserServiceInterface; -import stirling.software.common.util.TempFile; +import stirling.software.common.service.InternalApiClient; import stirling.software.common.util.TempFileManager; +import stirling.software.common.util.ZipExtractionUtils; @Service @Slf4j @@ -48,20 +38,16 @@ public class PipelineProcessor { private final ApiDocService apiDocService; - private final UserServiceInterface userService; - - private final ServletContext servletContext; + private final InternalApiClient internalApiClient; private final TempFileManager tempFileManager; public PipelineProcessor( ApiDocService apiDocService, - @Autowired(required = false) UserServiceInterface userService, - ServletContext servletContext, + InternalApiClient internalApiClient, TempFileManager tempFileManager) { this.apiDocService = apiDocService; - this.userService = userService; - this.servletContext = servletContext; + this.internalApiClient = internalApiClient; this.tempFileManager = tempFileManager; } @@ -84,48 +70,6 @@ public class PipelineProcessor { return name.substring(0, underscoreIndex) + extension; } - // Allowlist of URL path prefixes permitted through the pipeline. - private static final List ALLOWED_PIPELINE_PATH_PREFIXES = - List.of( - "/api/v1/general/", - "/api/v1/misc/", - "/api/v1/security/", - "/api/v1/convert/", - "/api/v1/filter/"); - - private void validatePipelineUrl(String url) { - // Strip scheme+host to get the path portion for comparison - String path = url; - int schemeEnd = url.indexOf("://"); - if (schemeEnd != -1) { - int pathStart = url.indexOf('/', schemeEnd + 3); - path = pathStart != -1 ? url.substring(pathStart) : "/"; - } - final String pathToCheck = path; - boolean allowed = ALLOWED_PIPELINE_PATH_PREFIXES.stream().anyMatch(pathToCheck::contains); - if (!allowed) { - log.warn("Blocked pipeline request to disallowed URL: {}", url); - throw new SecurityException( - "Pipeline operation not permitted for endpoint: " + pathToCheck); - } - } - - private String getApiKeyForUser() { - if (userService == null) return ""; - String username = userService.getCurrentUsername(); - if (username != null && !username.equals("anonymousUser")) { - return userService.getApiKeyForUser(username); - } - // Scheduled/internal context — no user in security context - return userService.getApiKeyForUser(Role.INTERNAL_API_USER.getRoleId()); - } - - private String getBaseUrl() { - String contextPath = servletContext.getContextPath(); - String port = SPDFApplication.getStaticPort(); - return "http://localhost:" + port + contextPath + "/"; - } - PipelineResult runPipelineAgainstFiles(List outputFiles, PipelineConfig config) throws Exception { PipelineResult result = new PipelineResult(); @@ -153,7 +97,6 @@ public class PipelineProcessor { "Invalid operation: " + operation + " with parameters: " + parameters); } - String url = getBaseUrl() + operation; List newOutputFiles = new ArrayList<>(); if (!isMultiInputOperation) { for (Resource file : outputFiles) { @@ -175,12 +118,15 @@ public class PipelineProcessor { body.add(entry.getKey(), entry.getValue()); } } - ResponseEntity response = sendWebRequest(url, body); + ResponseEntity response = + internalApiClient.post(operation, body); // If the operation is filter and the response body is null or empty, // skip // this // file - if (response.getBody() instanceof TempFileResource tempFileResource) { + if (response.getBody() + instanceof + InternalApiClient.TempFileResource tempFileResource) { result.addTempFile(tempFileResource.getTempFile()); } @@ -257,8 +203,9 @@ public class PipelineProcessor { body.add(entry.getKey(), entry.getValue()); } } - ResponseEntity response = sendWebRequest(url, body); - if (response.getBody() instanceof TempFileResource tempFileResource) { + ResponseEntity response = internalApiClient.post(operation, body); + if (response.getBody() + instanceof InternalApiClient.TempFileResource tempFileResource) { result.addTempFile(tempFileResource.getTempFile()); } // Handle the response @@ -312,43 +259,6 @@ public class PipelineProcessor { return result; } - /* package */ ResponseEntity sendWebRequest( - String url, MultiValueMap body) { - validatePipelineUrl(url); - RestTemplate restTemplate = new RestTemplate(); - // Set up headers, including API key - HttpHeaders headers = new HttpHeaders(); - String apiKey = getApiKeyForUser(); - if (apiKey != null && !apiKey.isEmpty()) { - headers.add("X-API-KEY", apiKey); - } - - // Let the message converter set the multipart boundary/content type - HttpEntity> entity = new HttpEntity<>(body, headers); - - RequestCallback requestCallback = - restTemplate.httpEntityCallback(entity, Resource.class /* response type hint */); - return restTemplate.execute( - url, - HttpMethod.POST, - requestCallback, - response -> { - try { - TempFile tempFile = tempFileManager.createManagedTempFile("pipeline"); - Files.copy( - response.getBody(), - tempFile.getPath(), - java.nio.file.StandardCopyOption.REPLACE_EXISTING); - TempFileResource resource = new TempFileResource(tempFile); - return ResponseEntity.status(response.getStatusCode()) - .headers(response.getHeaders()) - .body(resource); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - }); - } - private List processOutputFiles( String operation, ResponseEntity response, @@ -367,13 +277,15 @@ public class PipelineProcessor { newFilename = removeTrailingNaming(extractFilename(response)); } // Check if the response body is a zip file - if (isZip(response.getBody(), newFilename)) { + if (ZipExtractionUtils.isZip(response.getBody(), newFilename)) { // Unzip the file and add all the files to the new output files - newOutputFiles.addAll(unzip(response.getBody(), result)); + newOutputFiles.addAll( + ZipExtractionUtils.extractZip( + response.getBody(), tempFileManager, result::addTempFile)); } else { final Resource tempResource = response.getBody(); - if (tempResource instanceof TempFileResource) { - result.addTempFile(((TempFileResource) tempResource).getTempFile()); + if (tempResource instanceof InternalApiClient.TempFileResource tfr) { + result.addTempFile(tfr.getTempFile()); } Resource outputResource = new FileSystemResource(tempResource.getFile()) { @@ -456,97 +368,4 @@ public class PipelineProcessor { log.info("Files successfully loaded. Starting processing..."); return outputFiles; } - - private boolean isZip(Resource data, String filename) throws IOException { - if (data == null || data.contentLength() < 4) { - return false; - } - if (filename != null) { - String lower = filename.toLowerCase(); - if (lower.endsWith(".cbz")) { - // Treat CBZ as non-zip for our unzipping purposes - return false; - } - } - // Check the first four bytes of the data against the standard zip magic number - try (InputStream is = data.getInputStream()) { - byte[] header = new byte[4]; - if (is.read(header) < 4) { - return false; - } - return header[0] == 0x50 && header[1] == 0x4B && header[2] == 0x03 && header[3] == 0x04; - } - } - - private boolean isZip(Resource data) throws IOException { - return isZip(data, null); - } - - private static final int MAX_UNZIP_DEPTH = 10; - - private List unzip(Resource data, PipelineResult result) throws IOException { - return unzip(data, result, 0); - } - - private List unzip(Resource data, PipelineResult result, int depth) - throws IOException { - if (depth > MAX_UNZIP_DEPTH) { - log.warn( - "ZIP nesting depth {} exceeds limit {}, treating as file", - depth, - MAX_UNZIP_DEPTH); - return List.of(data); - } - log.info("Unzipping data of length: {}", data.contentLength()); - List unzippedFiles = new ArrayList<>(); - try (InputStream bais = data.getInputStream(); - ZipInputStream zis = ZipSecurity.createHardenedInputStream(bais)) { - ZipEntry entry; - while ((entry = zis.getNextEntry()) != null) { - if (entry.isDirectory()) { - continue; - } - TempFile tempFile = tempFileManager.createManagedTempFile("unzip"); - result.addTempFile(tempFile); - try (OutputStream os = Files.newOutputStream(tempFile.getPath())) { - byte[] buffer = new byte[4096]; - int count; - while ((count = zis.read(buffer)) != -1) { - os.write(buffer, 0, count); - } - } - final String filename = entry.getName(); - Resource fileResource = - new FileSystemResource(tempFile.getFile()) { - - @Override - public String getFilename() { - return filename; - } - }; - // If the unzipped file is a zip file, unzip it - if (isZip(fileResource, filename)) { - log.info("File {} is a zip file. Unzipping...", filename); - unzippedFiles.addAll(unzip(fileResource, result, depth + 1)); - } else { - unzippedFiles.add(fileResource); - } - } - } - log.info("Unzipping completed. {} files were unzipped.", unzippedFiles.size()); - return unzippedFiles; - } - - private static class TempFileResource extends FileSystemResource { - private final TempFile tempFile; - - public TempFileResource(TempFile tempFile) { - super(tempFile.getFile()); - this.tempFile = tempFile; - } - - public TempFile getTempFile() { - return tempFile; - } - } } diff --git a/app/core/src/main/java/stirling/software/SPDF/model/api/general/RotatePDFRequest.java b/app/core/src/main/java/stirling/software/SPDF/model/api/general/RotatePDFRequest.java index 030a0df42b..43695de3d4 100644 --- a/app/core/src/main/java/stirling/software/SPDF/model/api/general/RotatePDFRequest.java +++ b/app/core/src/main/java/stirling/software/SPDF/model/api/general/RotatePDFRequest.java @@ -13,7 +13,8 @@ public class RotatePDFRequest extends PDFFile { @Schema( description = - "The angle by which to rotate the PDF file. This should be a multiple of 90.", + "The clockwise angle by which to rotate the PDF file. Must be a multiple of" + + " 90.", type = "integer", requiredMode = Schema.RequiredMode.REQUIRED, allowableValues = {"0", "90", "180", "270"}) diff --git a/app/core/src/main/java/stirling/software/SPDF/service/ApiDocService.java b/app/core/src/main/java/stirling/software/SPDF/service/ApiDocService.java index e60c74e223..b0d2e5a4a0 100644 --- a/app/core/src/main/java/stirling/software/SPDF/service/ApiDocService.java +++ b/app/core/src/main/java/stirling/software/SPDF/service/ApiDocService.java @@ -6,6 +6,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.HttpEntity; @@ -30,7 +31,14 @@ import tools.jackson.databind.ObjectMapper; @Service @Slf4j -public class ApiDocService { +public class ApiDocService implements stirling.software.common.service.ToolMetadataService { + + // Matches a bare "Output:ZIP" declaration (i.e. ZIP is not followed by "-" or "/"). + // Bare ZIP means the archive itself is the deliverable (e.g. get-attachments), so it + // should not be auto-unpacked. Wrapper forms like Output:ZIP-PDF or Output:IMAGE/ZIP + // use ZIP as transport for multiple typed results and are safe to unpack. + private static final Pattern BARE_ZIP_OUTPUT = + Pattern.compile("Output\\s*:\\s*ZIP(?![-/])", Pattern.CASE_INSENSITIVE); private final Map apiDocumentation = new HashMap<>(); @@ -149,6 +157,7 @@ public class ApiDocService { return endpoint.areParametersValid(parameters); } + @Override public boolean isMultiInput(String operationName) { if (apiDocsJsonRootNode == null || apiDocumentation.isEmpty()) { loadApiDocumentation(); @@ -166,5 +175,36 @@ public class ApiDocService { } return false; } + + @Override + public boolean shouldUnpackZipResponse(String operationName) { + if (apiDocsJsonRootNode == null || apiDocumentation.isEmpty()) { + loadApiDocumentation(); + } + if (!apiDocumentation.containsKey(operationName)) { + return false; + } + ApiEndpoint endpoint = apiDocumentation.get(operationName); + String description = endpoint.getDescription(); + Matcher typeMatcher = + RegexPatternUtils.getInstance().getApiDocTypePattern().matcher(description); + if (typeMatcher.find()) { + String type = typeMatcher.group(1); + // Multi-output endpoints (SIMO/MIMO) return a ZIP of their outputs. + if (type.endsWith("MO")) { + return true; + } + } + Matcher outputMatcher = + RegexPatternUtils.getInstance().getApiDocOutputTypePattern().matcher(description); + if (outputMatcher.find()) { + String output = outputMatcher.group(1).toUpperCase(Locale.ROOT); + if (output.startsWith("ZIP")) { + // Bare "Output:ZIP" is a single-archive deliverable, not a transport. + return !BARE_ZIP_OUTPUT.matcher(description).find(); + } + } + return false; + } } // Model class for API Endpoint diff --git a/app/core/src/test/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessorTest.java b/app/core/src/test/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessorTest.java index e413a52dec..e87d08a863 100644 --- a/app/core/src/test/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessorTest.java +++ b/app/core/src/test/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessorTest.java @@ -4,8 +4,6 @@ import static org.junit.jupiter.api.Assertions.*; import static org.mockito.ArgumentMatchers.*; import static org.mockito.Mockito.*; -import java.io.ByteArrayInputStream; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.List; @@ -15,24 +13,18 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; -import org.mockito.MockedConstruction; import org.mockito.junit.jupiter.MockitoExtension; import org.springframework.core.io.ByteArrayResource; import org.springframework.core.io.FileSystemResource; import org.springframework.core.io.Resource; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; -import org.springframework.http.client.ClientHttpResponse; -import org.springframework.util.LinkedMultiValueMap; -import org.springframework.util.MultiValueMap; - -import jakarta.servlet.ServletContext; import stirling.software.SPDF.model.PipelineConfig; import stirling.software.SPDF.model.PipelineOperation; import stirling.software.SPDF.model.PipelineResult; import stirling.software.SPDF.service.ApiDocService; -import stirling.software.common.service.UserServiceInterface; +import stirling.software.common.service.InternalApiClient; import stirling.software.common.util.TempFileManager; @ExtendWith(MockitoExtension.class) @@ -40,9 +32,7 @@ class PipelineProcessorTest { @Mock ApiDocService apiDocService; - @Mock UserServiceInterface userService; - - @Mock ServletContext servletContext; + @Mock InternalApiClient internalApiClient; @Mock TempFileManager tempFileManager; @@ -51,9 +41,7 @@ class PipelineProcessorTest { @BeforeEach void setUp() throws Exception { pipelineProcessor = - spy( - new PipelineProcessor( - apiDocService, userService, servletContext, tempFileManager)); + new PipelineProcessor(apiDocService, internalApiClient, tempFileManager); } @Test @@ -65,7 +53,6 @@ class PipelineProcessorTest { config.setOperations(List.of(op)); Resource file = new MyFileByteArrayResource(); - List files = List.of(file); when(apiDocService.isMultiInput("/api/v1/filter/filter-page-count")).thenReturn(false); @@ -74,13 +61,11 @@ class PipelineProcessorTest { when(apiDocService.isValidOperation(eq("/api/v1/filter/filter-page-count"), anyMap())) .thenReturn(true); - // Use a FileSystemResource backed by a temp file to avoid FileNotFoundException Path emptyTemp = Files.createTempFile("empty", ".tmp"); Resource emptyResource = new FileSystemResource(emptyTemp.toFile()); - doReturn(new ResponseEntity<>(emptyResource, HttpStatus.OK)) - .when(pipelineProcessor) - .sendWebRequest(anyString(), any()); + when(internalApiClient.post(anyString(), any())) + .thenReturn(new ResponseEntity<>(emptyResource, HttpStatus.OK)); PipelineResult result = pipelineProcessor.runPipelineAgainstFiles(files, config); @@ -118,105 +103,18 @@ class PipelineProcessorTest { when(apiDocService.getExtensionTypes(anyBoolean(), anyString())).thenReturn(List.of("pdf")); when(apiDocService.isValidOperation(anyString(), anyMap())).thenReturn(true); - doReturn(new ResponseEntity<>(outputResource, HttpStatus.OK)) - .when(pipelineProcessor) - .sendWebRequest(anyString(), any()); + when(internalApiClient.post(anyString(), any())) + .thenReturn(new ResponseEntity<>(outputResource, HttpStatus.OK)); PipelineResult result = pipelineProcessor.runPipelineAgainstFiles(files, config); - verify(pipelineProcessor).sendWebRequest(anyString(), any()); + verify(internalApiClient).post(anyString(), any()); assertFalse(result.isHasErrors()); - // Clean up Files.deleteIfExists(tempPath); } - @Test - void sendWebRequestDoesNotForceContentType() throws Exception { - MultiValueMap body = new LinkedMultiValueMap<>(); - body.add( - "fileInput", - new ByteArrayResource("data".getBytes(StandardCharsets.UTF_8)) { - @Override - public String getFilename() { - return "input.pdf"; - } - }); - - Path tempPath = Files.createTempFile("pipeline-test", ".tmp"); - var tempFile = mock(stirling.software.common.util.TempFile.class); - when(tempFile.getPath()).thenReturn(tempPath); - when(tempFile.getFile()).thenReturn(tempPath.toFile()); - when(tempFileManager.createManagedTempFile("pipeline")).thenReturn(tempFile); - - var capturedHeaders = new org.springframework.http.HttpHeaders[1]; - - try (MockedConstruction ignored = - mockConstruction( - org.springframework.web.client.RestTemplate.class, - (mock, context) -> { - when(mock.httpEntityCallback(any(), eq(Resource.class))) - .thenAnswer( - invocation -> { - var entity = invocation.getArgument(0); - capturedHeaders[0] = - ((org.springframework.http.HttpEntity) - entity) - .getHeaders(); - return (org.springframework.web.client - .RequestCallback) - request -> {}; - }); - - when(mock.execute( - anyString(), - eq(org.springframework.http.HttpMethod.POST), - any(), - any())) - .thenAnswer( - invocation -> { - @SuppressWarnings("unchecked") - var extractor = - (org.springframework.web.client - .ResponseExtractor< - ResponseEntity>) - invocation.getArgument(3); - ClientHttpResponse response = - mock(ClientHttpResponse.class); - when(response.getBody()) - .thenReturn( - new ByteArrayInputStream( - "ok" - .getBytes( - StandardCharsets - .UTF_8))); - var headers = - new org.springframework.http.HttpHeaders(); - headers.add( - org.springframework.http.HttpHeaders - .CONTENT_DISPOSITION, - "attachment; filename=\"out.pdf\""); - when(response.getHeaders()).thenReturn(headers); - lenient() - .when(response.getStatusCode()) - .thenReturn(HttpStatus.OK); - return extractor.extractData(response); - }); - })) { - ResponseEntity response = - pipelineProcessor.sendWebRequest( - "http://localhost/api/v1/general/merge-pdfs", body); - - assertNotNull(response); - assertEquals(HttpStatus.OK, response.getStatusCode()); - assertNotNull(response.getBody()); - assertNull(capturedHeaders[0].getContentType()); - } finally { - Files.deleteIfExists(tempPath); - } - } - private static class MyFileByteArrayResource extends ByteArrayResource { public MyFileByteArrayResource() { super("data".getBytes()); diff --git a/app/core/src/test/java/stirling/software/SPDF/service/ApiDocServiceTest.java b/app/core/src/test/java/stirling/software/SPDF/service/ApiDocServiceTest.java index 302cb8fd81..42b56a9cfa 100644 --- a/app/core/src/test/java/stirling/software/SPDF/service/ApiDocServiceTest.java +++ b/app/core/src/test/java/stirling/software/SPDF/service/ApiDocServiceTest.java @@ -9,6 +9,8 @@ import java.util.Map; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; @@ -348,6 +350,123 @@ class ApiDocServiceTest { assertTrue(apiDocService.isMultiInput("/miso")); } + @Test + void shouldUnpackZipResponseDetectsMultiOutputType() throws Exception { + String json = "{\"description\": \"Output:PDF Type:SIMO\"}"; + JsonNode postNode = mapper.readTree(json); + ApiEndpoint endpoint = new ApiEndpoint("/split", postNode); + setApiDocumentation(Map.of("/split", endpoint)); + setApiDocsJsonRootNode(); + assertTrue(apiDocService.shouldUnpackZipResponse("/split")); + } + + @Test + void shouldUnpackZipResponseDetectsMimoType() throws Exception { + String json = "{\"description\": \"Output:PDF Type:MIMO\"}"; + JsonNode postNode = mapper.readTree(json); + ApiEndpoint endpoint = new ApiEndpoint("/overlay", postNode); + setApiDocumentation(Map.of("/overlay", endpoint)); + setApiDocsJsonRootNode(); + assertTrue(apiDocService.shouldUnpackZipResponse("/overlay")); + } + + @Test + void shouldUnpackZipResponseDetectsZipOutputDeclaration() throws Exception { + String json = "{\"description\": \"Output:ZIP-PDF Type:SISO\"}"; + JsonNode postNode = mapper.readTree(json); + ApiEndpoint endpoint = new ApiEndpoint("/split-by-sections", postNode); + setApiDocumentation(Map.of("/split-by-sections", endpoint)); + setApiDocsJsonRootNode(); + assertTrue(apiDocService.shouldUnpackZipResponse("/split-by-sections")); + } + + @Test + void shouldUnpackZipResponseReturnsFalseForSisoPdf() throws Exception { + String json = "{\"description\": \"Input:PDF Output:PDF Type:SISO\"}"; + JsonNode postNode = mapper.readTree(json); + ApiEndpoint endpoint = new ApiEndpoint("/rotate", postNode); + setApiDocumentation(Map.of("/rotate", endpoint)); + setApiDocsJsonRootNode(); + assertFalse(apiDocService.shouldUnpackZipResponse("/rotate")); + } + + @Test + void shouldUnpackZipResponseReturnsFalseForUnknownOperation() throws Exception { + setApiDocumentation(Map.of()); + assertFalse(apiDocService.shouldUnpackZipResponse("/unknown")); + } + + /** + * Coverage test: every Stirling endpoint whose ZIP response is a transport for multiple typed + * results (SIMO/MIMO or Output:ZIP-PDF / Output:IMAGE/ZIP etc.) must be classified as {@code + * shouldUnpackZipResponse = true}. Descriptions below are the real + * {@code @Operation(description=...)} strings from each controller, so if a controller is + * renamed, tweaked or introduced without a {@code Type:} / {@code Output:ZIP-*} tag, this test + * breaks, surfacing the bug before {@code AiWorkflowService} silently registers a multi-result + * ZIP as a single file. + * + *

Add a new row here whenever a new unpack-eligible endpoint is introduced. Descriptions can + * be trimmed to the part containing the relevant tags. + */ + @ParameterizedTest(name = "{0} → shouldUnpackZipResponse") + @CsvSource( + textBlock = + """ + /api/v1/general/split-pages, 'Split pages. Input:PDF Output:PDF Type:SIMO' + /api/v1/general/split-pdf-by-sections, 'Split. Input:PDF Output:ZIP-PDF Type:SISO' + /api/v1/general/split-by-size-or-count, 'Split by size. Input:PDF Output:ZIP-PDF Type:SISO' + /api/v1/general/split-pdf-by-chapters, 'Split by chapters. Input:PDF Output:ZIP-PDF Type:SISO' + /api/v1/general/split-for-poster-print, 'Poster split. Input: PDF Output: ZIP-PDF Type: SISO' + /api/v1/general/overlay-pdfs, 'Overlay PDFs. Input:PDF Output:PDF Type:MIMO' + /api/v1/misc/auto-split-pdf, 'Auto split. Input:PDF Output:ZIP-PDF Type:SISO' + /api/v1/misc/extract-images, 'Extract images. Output:IMAGE/ZIP Type:SIMO' + /api/v1/misc/extract-image-scans, 'Extract image scans. Input:PDF Output:IMAGE/ZIP Type:SIMO' + """) + void shouldUnpackZipResponseClassifiesKnownUnpackableEndpoints( + String endpoint, String description) throws Exception { + String json = mapper.writeValueAsString(Map.of("description", description)); + JsonNode postNode = mapper.readTree(json); + setApiDocumentation(Map.of(endpoint, new ApiEndpoint(endpoint, postNode))); + setApiDocsJsonRootNode(); + assertTrue( + apiDocService.shouldUnpackZipResponse(endpoint), + () -> + "Expected shouldUnpackZipResponse=true for " + + endpoint + + " with description: " + + description); + } + + /** + * Inverse coverage: endpoints whose ZIP response is the deliverable itself (or that return + * single non-ZIP files) must not be flagged for unpacking. Catches regressions where a change + * to the classifier accidentally widens the positive match. + */ + @ParameterizedTest(name = "{0} → !shouldUnpackZipResponse") + @CsvSource( + textBlock = + """ + /api/v1/general/rotate-pdf, 'Rotate. Input:PDF Output:PDF Type:SISO' + /api/v1/general/merge-pdfs, 'Merge. Input:PDF Output:PDF Type:MISO' + /api/v1/misc/compress-pdf, 'Compress. Input:PDF Output:PDF Type:SISO' + /api/v1/misc/flatten, 'Flatten forms. Input:PDF Output:PDF Type:SISO' + /api/v1/security/get-attachments, 'Extract attachments. Input:PDF Output:ZIP Type:SISO' + """) + void shouldUnpackZipResponseRejectsNonUnpackableEndpoints(String endpoint, String description) + throws Exception { + String json = mapper.writeValueAsString(Map.of("description", description)); + JsonNode postNode = mapper.readTree(json); + setApiDocumentation(Map.of(endpoint, new ApiEndpoint(endpoint, postNode))); + setApiDocsJsonRootNode(); + assertFalse( + apiDocService.shouldUnpackZipResponse(endpoint), + () -> + "Expected shouldUnpackZipResponse=false for " + + endpoint + + " with description: " + + description); + } + @Test void constructorAcceptsNullUserService() { ApiDocService service = new ApiDocService(mapper, servletContext, null); diff --git a/app/proprietary/src/main/java/stirling/software/proprietary/config/AsyncConfig.java b/app/proprietary/src/main/java/stirling/software/proprietary/config/AsyncConfig.java index 5c0bf509ae..a45714ba65 100644 --- a/app/proprietary/src/main/java/stirling/software/proprietary/config/AsyncConfig.java +++ b/app/proprietary/src/main/java/stirling/software/proprietary/config/AsyncConfig.java @@ -10,6 +10,7 @@ import org.springframework.context.annotation.Configuration; import org.springframework.core.task.TaskDecorator; import org.springframework.core.task.support.TaskExecutorAdapter; import org.springframework.scheduling.annotation.EnableAsync; +import org.springframework.security.concurrent.DelegatingSecurityContextExecutor; @Configuration @EnableAsync @@ -49,11 +50,18 @@ public class AsyncConfig { return adapter; } + /** + * AI orchestration runs on a background executor, so the incoming request's {@code + * SecurityContext} must be propagated for downstream calls to see the authenticated user. + * Without this, {@code JobOwnershipService} scopes job keys without a user prefix and + * authenticated downloads fail with 403; {@code InternalApiClient} also falls back to the + * internal-API-user key instead of the caller's. + */ @Bean(name = "aiStreamExecutor") public Executor aiStreamExecutor() { TaskExecutorAdapter adapter = new TaskExecutorAdapter(Executors.newVirtualThreadPerTaskExecutor()); adapter.setTaskDecorator(new MDCContextTaskDecorator()); - return adapter; + return new DelegatingSecurityContextExecutor(adapter); } } diff --git a/app/proprietary/src/main/java/stirling/software/proprietary/controller/api/AiEngineController.java b/app/proprietary/src/main/java/stirling/software/proprietary/controller/api/AiEngineController.java index 3a93730876..279f408815 100644 --- a/app/proprietary/src/main/java/stirling/software/proprietary/controller/api/AiEngineController.java +++ b/app/proprietary/src/main/java/stirling/software/proprietary/controller/api/AiEngineController.java @@ -1,10 +1,12 @@ package stirling.software.proprietary.controller.api; import java.io.IOException; +import java.util.List; import java.util.Map; import java.util.concurrent.Executor; import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; import org.springframework.http.HttpStatus; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; @@ -25,8 +27,12 @@ import jakarta.validation.Valid; import lombok.extern.slf4j.Slf4j; +import stirling.software.common.model.job.ResultFile; +import stirling.software.common.service.JobOwnershipService; +import stirling.software.common.service.TaskManager; import stirling.software.proprietary.model.api.ai.AiWorkflowRequest; import stirling.software.proprietary.model.api.ai.AiWorkflowResponse; +import stirling.software.proprietary.model.api.ai.AiWorkflowResultFile; import stirling.software.proprietary.service.AiEngineClient; import stirling.software.proprietary.service.AiWorkflowService; @@ -45,16 +51,30 @@ public class AiEngineController { private final AiWorkflowService aiWorkflowService; private final ObjectMapper objectMapper; private final Executor aiStreamExecutor; + private final TaskManager taskManager; + private final JobOwnershipService jobOwnershipService; + + /** + * SSE emitter timeout. Long enough to accommodate multi-gigabyte PDF workflows (OCR on a + * 1000-page scan, splitting a huge PDF, etc.) without the emitter completing out from under the + * executor. Configurable via {@code stirling.ai.streamTimeoutMs}. + */ + @Value("${stirling.ai.streamTimeoutMs:1800000}") + private long streamTimeoutMs; public AiEngineController( AiEngineClient aiEngineClient, AiWorkflowService aiWorkflowService, ObjectMapper objectMapper, - @Qualifier("aiStreamExecutor") Executor aiStreamExecutor) { + @Qualifier("aiStreamExecutor") Executor aiStreamExecutor, + TaskManager taskManager, + JobOwnershipService jobOwnershipService) { this.aiEngineClient = aiEngineClient; this.aiWorkflowService = aiWorkflowService; this.objectMapper = objectMapper; this.aiStreamExecutor = aiStreamExecutor; + this.taskManager = taskManager; + this.jobOwnershipService = jobOwnershipService; } @GetMapping("/health") @@ -70,10 +90,14 @@ public class AiEngineController { @Operation( summary = "Run an AI workflow against a PDF", description = - "Accepts a PDF upload and a user message and returns an AI workflow result") - public ResponseEntity orchestrate( - @Valid @ModelAttribute AiWorkflowRequest request) throws IOException { - return ResponseEntity.ok(aiWorkflowService.orchestrate(request)); + "Accepts PDF uploads and a user message and returns an AI workflow result." + + " When the workflow produces files, they are registered with the job" + + " system and downloadable via GET /api/v1/general/files/{fileId}.") + public AiWorkflowResponse orchestrate(@Valid @ModelAttribute AiWorkflowRequest request) + throws IOException { + AiWorkflowResponse result = aiWorkflowService.orchestrate(request); + registerFileResultAsJob(result); + return result; } @PostMapping(value = "/orchestrate/stream", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @@ -83,11 +107,23 @@ public class AiEngineController { "Accepts a PDF upload and a user message, returns SSE events with progress" + " updates followed by the final AI workflow result") public SseEmitter orchestrateStream(@Valid @ModelAttribute AiWorkflowRequest request) { - SseEmitter emitter = new SseEmitter(180_000L); + SseEmitter emitter = new SseEmitter(streamTimeoutMs); emitter.onTimeout( () -> { - log.warn("SSE emitter timed out for AI orchestration stream"); + // Emit an explicit error frame so the frontend reports a timeout rather than + // silently seeing the stream end without a result. + log.warn( + "SSE emitter timed out for AI orchestration stream after {} ms", + streamTimeoutMs); + sendEvent( + emitter, + "error", + Map.of( + "message", + "AI workflow timed out after " + + (streamTimeoutMs / 1000) + + " seconds")); emitter.complete(); }); emitter.onError(e -> log.warn("SSE emitter error for AI orchestration stream", e)); @@ -102,15 +138,50 @@ public class AiEngineController { AiWorkflowResponse result = aiWorkflowService.orchestrate( request, progress -> sendEvent(emitter, "progress", progress)); + registerFileResultAsJob(result); sendEvent(emitter, "result", result); emitter.complete(); } catch (Exception e) { log.error("AI orchestration stream failed", e); + // Emit an error frame for the frontend and then complete normally. Using + // completeWithError here as well would double-complete the emitter - the error + // frame already conveys the failure to the client. sendEvent(emitter, "error", Map.of("message", e.getMessage())); - emitter.completeWithError(e); + emitter.complete(); } } + /** + * Register any file results produced by the workflow with {@link TaskManager} so they are + * downloadable via {@code GET /api/v1/general/files/{fileId}}. Uses {@code + * setMultipleFileResults} so the fileIds we registered earlier are not mangled by TaskManager's + * ZIP auto-extract path. + */ + private void registerFileResultAsJob(AiWorkflowResponse result) { + List files = result.getResultFiles(); + if (files == null || files.isEmpty()) { + return; + } + // Scope the job key to the current user so the download endpoint's ownership check + // passes when security is enabled. NoOpJobOwnershipService returns the UUID unchanged + // when security is off. + String jobKey = + jobOwnershipService.createScopedJobKey(java.util.UUID.randomUUID().toString()); + taskManager.createTask(jobKey); + List jobFiles = + files.stream() + .map( + f -> + ResultFile.builder() + .fileId(f.getFileId()) + .fileName(f.getFileName()) + .contentType(f.getContentType()) + .build()) + .toList(); + taskManager.setMultipleFileResults(jobKey, jobFiles); + taskManager.setComplete(jobKey); + } + private void sendEvent(SseEmitter emitter, String name, Object data) { try { emitter.send(SseEmitter.event().name(name).data(data, MediaType.APPLICATION_JSON)); diff --git a/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowOutcome.java b/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowOutcome.java index 78ce09b7fb..577f24e5fd 100644 --- a/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowOutcome.java +++ b/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowOutcome.java @@ -15,6 +15,7 @@ public enum AiWorkflowOutcome { PLAN("plan"), NEED_CLARIFICATION("need_clarification"), CANNOT_DO("cannot_do"), + DRAFT("draft"), TOOL_CALL("tool_call"), COMPLETED("completed"), UNSUPPORTED_CAPABILITY("unsupported_capability"), diff --git a/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowPhase.java b/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowPhase.java index bb1759fb3c..b1ab9fff23 100644 --- a/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowPhase.java +++ b/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowPhase.java @@ -8,6 +8,7 @@ public enum AiWorkflowPhase { ANALYZING("analyzing"), CALLING_ENGINE("calling_engine"), EXTRACTING_CONTENT("extracting_content"), + EXECUTING_TOOL("executing_tool"), PROCESSING("processing"); private final String value; diff --git a/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowProgressEvent.java b/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowProgressEvent.java index c063e14f6e..92c15a0004 100644 --- a/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowProgressEvent.java +++ b/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowProgressEvent.java @@ -1,15 +1,38 @@ package stirling.software.proprietary.model.api.ai; +import com.fasterxml.jackson.annotation.JsonInclude; + import lombok.AllArgsConstructor; import lombok.Data; @Data @AllArgsConstructor +@JsonInclude(JsonInclude.Include.NON_NULL) public class AiWorkflowProgressEvent { private AiWorkflowPhase phase; private long timestamp; + /** The tool endpoint path being executed, for {@link AiWorkflowPhase#EXECUTING_TOOL} events. */ + private String tool; + + /** + * 1-based index of the current plan step, for {@link AiWorkflowPhase#EXECUTING_TOOL} events. + */ + private Integer stepIndex; + + /** Total number of plan steps, for {@link AiWorkflowPhase#EXECUTING_TOOL} events. */ + private Integer stepCount; + public static AiWorkflowProgressEvent of(AiWorkflowPhase phase) { - return new AiWorkflowProgressEvent(phase, System.currentTimeMillis()); + return new AiWorkflowProgressEvent(phase, System.currentTimeMillis(), null, null, null); + } + + public static AiWorkflowProgressEvent executingTool(String tool, int stepIndex, int stepCount) { + return new AiWorkflowProgressEvent( + AiWorkflowPhase.EXECUTING_TOOL, + System.currentTimeMillis(), + tool, + stepIndex, + stepCount); } } diff --git a/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowResponse.java b/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowResponse.java index 1e04bece8d..ff28b2e9eb 100644 --- a/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowResponse.java +++ b/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowResponse.java @@ -44,6 +44,30 @@ public class AiWorkflowResponse { @Schema(description = "Structured tool steps when the workflow returns a plan") private List> steps = new ArrayList<>(); + @Schema( + description = + "Tool endpoint path for tool_call outcomes (e.g. /api/v1/misc/compress-pdf)") + private String tool; + + @Schema(description = "Tool parameters for tool_call outcomes") + private Map parameters; + + @Schema(description = "Result file ID after tool execution completes (single-file result)") + private String fileId; + + @Schema(description = "Result filename after tool execution completes (single-file result)") + private String fileName; + + @Schema(description = "Result MIME type after tool execution completes (single-file result)") + private String contentType; + + @Schema( + description = + "Result files produced by the workflow. Always populated on completed outcomes" + + " with at least one entry; for single-file results this mirrors" + + " fileId/fileName/contentType.") + private List resultFiles = new ArrayList<>(); + @Schema(description = "Per-file text extraction requests from the AI engine") private List files = new ArrayList<>(); diff --git a/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowResultFile.java b/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowResultFile.java new file mode 100644 index 0000000000..57207d2bc9 --- /dev/null +++ b/app/proprietary/src/main/java/stirling/software/proprietary/model/api/ai/AiWorkflowResultFile.java @@ -0,0 +1,24 @@ +package stirling.software.proprietary.model.api.ai; + +import io.swagger.v3.oas.annotations.media.Schema; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** A single file produced by a completed AI workflow. */ +@Data +@NoArgsConstructor +@AllArgsConstructor +@Schema(description = "Descriptor for a file produced by an AI workflow") +public class AiWorkflowResultFile { + + @Schema(description = "Stirling file ID — download with GET /api/v1/general/files/{fileId}") + private String fileId; + + @Schema(description = "Original filename for the file") + private String fileName; + + @Schema(description = "MIME type of the file", example = "application/pdf") + private String contentType; +} diff --git a/app/proprietary/src/main/java/stirling/software/proprietary/service/AiWorkflowService.java b/app/proprietary/src/main/java/stirling/software/proprietary/service/AiWorkflowService.java index 4a966f6390..0d02a44381 100644 --- a/app/proprietary/src/main/java/stirling/software/proprietary/service/AiWorkflowService.java +++ b/app/proprietary/src/main/java/stirling/software/proprietary/service/AiWorkflowService.java @@ -7,16 +7,33 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; +import org.apache.commons.io.FilenameUtils; import org.apache.pdfbox.pdmodel.PDDocument; +import org.springframework.core.io.FileSystemResource; +import org.springframework.core.io.Resource; +import org.springframework.http.HttpStatus; +import org.springframework.http.MediaType; +import org.springframework.http.MediaTypeFactory; +import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Service; +import org.springframework.util.LinkedMultiValueMap; +import org.springframework.util.MultiValueMap; import org.springframework.web.multipart.MultipartFile; +import io.github.pixee.security.Filenames; + import lombok.Data; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import stirling.software.common.service.CustomPDFDocumentFactory; +import stirling.software.common.service.FileStorage; +import stirling.software.common.service.InternalApiClient; +import stirling.software.common.service.ToolMetadataService; import stirling.software.common.util.ExceptionUtils; +import stirling.software.common.util.TempFile; +import stirling.software.common.util.TempFileManager; +import stirling.software.common.util.ZipExtractionUtils; import stirling.software.proprietary.model.api.ai.AiWorkflowFileInput; import stirling.software.proprietary.model.api.ai.AiWorkflowFileRequest; import stirling.software.proprietary.model.api.ai.AiWorkflowOutcome; @@ -24,6 +41,7 @@ import stirling.software.proprietary.model.api.ai.AiWorkflowPhase; import stirling.software.proprietary.model.api.ai.AiWorkflowProgressEvent; import stirling.software.proprietary.model.api.ai.AiWorkflowRequest; import stirling.software.proprietary.model.api.ai.AiWorkflowResponse; +import stirling.software.proprietary.model.api.ai.AiWorkflowResultFile; import stirling.software.proprietary.service.PdfContentExtractor.LoadedFile; import stirling.software.proprietary.service.PdfContentExtractor.PdfContentResult; import stirling.software.proprietary.service.PdfContentExtractor.WorkflowArtifact; @@ -39,6 +57,10 @@ public class AiWorkflowService { private final AiEngineClient aiEngineClient; private final PdfContentExtractor pdfContentExtractor; private final ObjectMapper objectMapper; + private final InternalApiClient internalApiClient; + private final FileStorage fileStorage; + private final ToolMetadataService toolMetadataService; + private final TempFileManager tempFileManager; @FunctionalInterface public interface ProgressListener { @@ -89,12 +111,13 @@ public class AiWorkflowService { AiWorkflowResponse response = invokeOrchestrator(request); return switch (response.getOutcome()) { case NEED_CONTENT -> onNeedContent(response, filesByName, request, listener); + case TOOL_CALL -> onToolCall(response, filesByName, listener); + case PLAN -> onPlan(response, filesByName, listener); case ANSWER, NOT_FOUND, - PLAN, NEED_CLARIFICATION, CANNOT_DO, - TOOL_CALL, + DRAFT, COMPLETED, UNSUPPORTED_CAPABILITY, CANNOT_CONTINUE -> @@ -174,6 +197,199 @@ public class AiWorkflowService { } } + @SuppressWarnings("unchecked") + private WorkflowState onToolCall( + AiWorkflowResponse response, + Map filesByName, + ProgressListener listener) { + String endpointPath = response.getTool(); + Map parameters = response.getParameters(); + if (endpointPath == null || endpointPath.isBlank()) { + return new WorkflowState.Terminal( + cannotContinue("AI engine returned tool_call without a tool endpoint.")); + } + if (parameters == null) { + parameters = Map.of(); + } + + try { + List inputFiles = toResources(filesByName); + listener.onProgress(AiWorkflowProgressEvent.executingTool(endpointPath, 1, 1)); + List results = executeStep(endpointPath, parameters, inputFiles); + return new WorkflowState.Terminal( + buildCompletedResponse( + response.getRationale(), + results, + new ArrayList<>(filesByName.keySet()))); + } catch (Exception e) { + log.error("Failed to execute tool {}: {}", endpointPath, e.getMessage(), e); + return new WorkflowState.Terminal( + cannotContinue("Tool execution failed: " + e.getMessage())); + } + } + + @SuppressWarnings("unchecked") + private WorkflowState onPlan( + AiWorkflowResponse response, + Map filesByName, + ProgressListener listener) { + List> steps = response.getSteps(); + if (steps == null || steps.isEmpty()) { + return new WorkflowState.Terminal( + cannotContinue("AI engine returned a plan with no steps.")); + } + + try { + List currentFiles = toResources(filesByName); + + for (int i = 0; i < steps.size(); i++) { + Map step = steps.get(i); + String endpointPath = (String) step.get("tool"); + Map parameters = + step.containsKey("parameters") + ? (Map) step.get("parameters") + : Map.of(); + + if (endpointPath == null || endpointPath.isBlank()) { + return new WorkflowState.Terminal( + cannotContinue("Plan step " + (i + 1) + " has no tool endpoint.")); + } + + listener.onProgress( + AiWorkflowProgressEvent.executingTool(endpointPath, i + 1, steps.size())); + currentFiles = executeStep(endpointPath, parameters, currentFiles); + } + + return new WorkflowState.Terminal( + buildCompletedResponse( + response.getSummary(), + currentFiles, + new ArrayList<>(filesByName.keySet()))); + } catch (Exception e) { + log.error("Failed to execute plan: {}", e.getMessage(), e); + return new WorkflowState.Terminal( + cannotContinue("Plan execution failed: " + e.getMessage())); + } + } + + /** + * Execute a single tool step. If the endpoint accepts multiple files, all files are sent in one + * call. Otherwise, the endpoint is called once per file. ZIP responses are unpacked so each + * inner file is treated as its own result (e.g. split outputs a ZIP of pages). + */ + private List executeStep( + String endpointPath, Map parameters, List inputFiles) + throws IOException { + List results = new ArrayList<>(); + if (toolMetadataService.isMultiInput(endpointPath)) { + results.addAll(callEndpoint(endpointPath, parameters, inputFiles)); + } else { + for (Resource file : inputFiles) { + results.addAll(callEndpoint(endpointPath, parameters, List.of(file))); + } + } + return results; + } + + /** + * Call an endpoint and return the response body. Endpoints that are declared as ZIP-returning + * in the API spec (multi-output, or {@code Output:ZIP-*}) are unpacked into their individual + * entries so callers always see a flat list of result files. + */ + private List callEndpoint( + String endpointPath, Map parameters, List files) + throws IOException { + MultiValueMap body = new LinkedMultiValueMap<>(); + for (Resource file : files) { + body.add("fileInput", file); + } + for (Map.Entry entry : parameters.entrySet()) { + if (entry.getValue() instanceof List list) { + for (Object item : list) { + body.add(entry.getKey(), item); + } + } else { + body.add(entry.getKey(), entry.getValue()); + } + } + ResponseEntity response = internalApiClient.post(endpointPath, body); + if (!HttpStatus.OK.equals(response.getStatusCode()) || response.getBody() == null) { + throw new IOException( + "Tool returned HTTP " + response.getStatusCode() + " for " + endpointPath); + } + Resource resource = response.getBody(); + if (toolMetadataService.shouldUnpackZipResponse(endpointPath)) { + return ZipExtractionUtils.extractZip(resource, tempFileManager); + } + return List.of(resource); + } + + private List toResources(Map filesByName) throws IOException { + List resources = new ArrayList<>(); + for (MultipartFile file : filesByName.values()) { + TempFile tempFile = tempFileManager.createManagedTempFile("ai-workflow"); + file.transferTo(tempFile.getPath()); + final String originalName = Filenames.toSimpleFileName(file.getOriginalFilename()); + resources.add( + new FileSystemResource(tempFile.getFile()) { + @Override + public String getFilename() { + return originalName; + } + }); + } + return resources; + } + + private AiWorkflowResponse buildCompletedResponse( + String summary, List resultFiles, List inputFileNames) + throws IOException { + // Store every output file individually so each gets its own Stirling file ID and the + // frontend can add them as independent variants without going through a zip. + boolean preserveInputNames = inputFileNames.size() == resultFiles.size(); + List descriptors = new ArrayList<>(); + for (int i = 0; i < resultFiles.size(); i++) { + Resource resource = resultFiles.get(i); + String responseName = resource.getFilename(); + String inputName = preserveInputNames ? inputFileNames.get(i) : null; + // Prefer the input name only for 1:1 operations where the output keeps the same + // extension (rotate, compress, etc.). For converters and other extension-changing + // tools, the response filename from Content-Disposition is authoritative. + String name; + if (inputName != null + && FilenameUtils.getExtension(inputName) + .equalsIgnoreCase(FilenameUtils.getExtension(responseName))) { + name = inputName; + } else if (responseName != null) { + name = responseName; + } else { + name = "result-" + (i + 1); + } + String contentType = + MediaTypeFactory.getMediaType(name) + .orElse(MediaType.APPLICATION_OCTET_STREAM) + .toString(); + String fileId; + try (java.io.InputStream is = resource.getInputStream()) { + fileId = fileStorage.storeInputStream(is, name).fileId(); + } + descriptors.add(new AiWorkflowResultFile(fileId, name, contentType)); + } + + AiWorkflowResponse completed = new AiWorkflowResponse(); + completed.setOutcome(AiWorkflowOutcome.COMPLETED); + completed.setSummary(summary); + completed.setResultFiles(descriptors); + // Mirror the first file into the legacy single-file fields so existing clients still work. + if (!descriptors.isEmpty()) { + AiWorkflowResultFile first = descriptors.getFirst(); + completed.setFileId(first.getFileId()); + completed.setFileName(first.getFileName()); + completed.setContentType(first.getContentType()); + } + return completed; + } + private void validateRequest(AiWorkflowRequest request) { for (AiWorkflowFileInput fileInput : request.getFileInputs()) { if (fileInput.getFileInput().isEmpty()) { diff --git a/app/proprietary/src/test/java/stirling/software/proprietary/service/AiWorkflowServiceTest.java b/app/proprietary/src/test/java/stirling/software/proprietary/service/AiWorkflowServiceTest.java new file mode 100644 index 0000000000..2872e0db91 --- /dev/null +++ b/app/proprietary/src/test/java/stirling/software/proprietary/service/AiWorkflowServiceTest.java @@ -0,0 +1,335 @@ +package stirling.software.proprietary.service; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.springframework.core.io.ByteArrayResource; +import org.springframework.core.io.Resource; +import org.springframework.http.ResponseEntity; +import org.springframework.mock.web.MockMultipartFile; +import org.springframework.util.MultiValueMap; + +import stirling.software.common.model.ApplicationProperties; +import stirling.software.common.service.CustomPDFDocumentFactory; +import stirling.software.common.service.FileStorage; +import stirling.software.common.service.FileStorage.StoredFile; +import stirling.software.common.service.InternalApiClient; +import stirling.software.common.service.ToolMetadataService; +import stirling.software.common.util.TempFileManager; +import stirling.software.common.util.TempFileRegistry; +import stirling.software.proprietary.model.api.ai.AiWorkflowFileInput; +import stirling.software.proprietary.model.api.ai.AiWorkflowOutcome; +import stirling.software.proprietary.model.api.ai.AiWorkflowRequest; +import stirling.software.proprietary.model.api.ai.AiWorkflowResponse; + +import tools.jackson.databind.ObjectMapper; +import tools.jackson.databind.json.JsonMapper; + +/** + * Smoke tests for {@link AiWorkflowService}. Covers the TOOL_CALL and PLAN execution paths, + * ZIP-response unpacking (split endpoints), multi-input dispatch (merge endpoints), and the 1:1 + * input-to-output filename preservation rule. + * + *

External collaborators (engine client, internal API client, tool metadata, file storage) are + * mocked. {@link TempFileManager} is constructed with a real in-test registry so the service's + * temp-file handling exercises real code. + */ +@ExtendWith(MockitoExtension.class) +class AiWorkflowServiceTest { + + private static final String ROTATE_ENDPOINT = "/api/v1/general/rotate-pdf"; + private static final String SPLIT_ENDPOINT = "/api/v1/general/split-pages"; + private static final String MERGE_ENDPOINT = "/api/v1/general/merge-pdfs"; + private static final String COMPRESS_ENDPOINT = "/api/v1/misc/compress-pdf"; + + @Mock private CustomPDFDocumentFactory pdfDocumentFactory; + @Mock private AiEngineClient aiEngineClient; + @Mock private PdfContentExtractor pdfContentExtractor; + @Mock private InternalApiClient internalApiClient; + @Mock private FileStorage fileStorage; + @Mock private ToolMetadataService toolMetadataService; + + @TempDir Path tempDir; + + private TempFileManager tempFileManager; + private ObjectMapper objectMapper; + private AiWorkflowService service; + + @BeforeEach + void setUp() { + ApplicationProperties props = new ApplicationProperties(); + props.getSystem().getTempFileManagement().setBaseTmpDir(tempDir.toString()); + props.getSystem().getTempFileManagement().setPrefix("ai-test-"); + tempFileManager = new TempFileManager(new TempFileRegistry(), props); + objectMapper = JsonMapper.builder().build(); + + service = + new AiWorkflowService( + pdfDocumentFactory, + aiEngineClient, + pdfContentExtractor, + objectMapper, + internalApiClient, + fileStorage, + toolMetadataService, + tempFileManager); + } + + @Test + void toolCallSingleFilePreservesInputFilename() throws IOException { + MockMultipartFile input = pdf("input.pdf", "original-pdf-bytes"); + stubOrchestrator( + """ + {"outcome":"tool_call","tool":"%s","parameters":{"angle":90},"rationale":"Rotating"} + """ + .formatted(ROTATE_ENDPOINT)); + when(toolMetadataService.isMultiInput(ROTATE_ENDPOINT)).thenReturn(false); + when(toolMetadataService.shouldUnpackZipResponse(ROTATE_ENDPOINT)).thenReturn(false); + stubEndpoint(ROTATE_ENDPOINT, pdfResource("rotated-bytes", "rotated.pdf")); + AtomicInteger ids = stubFileStorage(); + + AiWorkflowResponse result = service.orchestrate(requestFor(input, "rotate 90")); + + assertEquals(AiWorkflowOutcome.COMPLETED, result.getOutcome()); + assertEquals(1, result.getResultFiles().size()); + // 1:1 mapping — the single output should inherit the single input's filename. + assertEquals("input.pdf", result.getResultFiles().get(0).getFileName()); + assertEquals("file-1", result.getResultFiles().get(0).getFileId()); + assertEquals(1, ids.get()); + verify(internalApiClient, times(1)).post(eq(ROTATE_ENDPOINT), any()); + } + + @Test + void toolCallZipResponseUnpacksIntoMultipleResults() throws IOException { + MockMultipartFile input = pdf("doc.pdf", "original"); + stubOrchestrator( + """ + {"outcome":"tool_call","tool":"%s","parameters":{},"rationale":"Splitting"} + """ + .formatted(SPLIT_ENDPOINT)); + when(toolMetadataService.isMultiInput(SPLIT_ENDPOINT)).thenReturn(false); + when(toolMetadataService.shouldUnpackZipResponse(SPLIT_ENDPOINT)).thenReturn(true); + stubEndpoint( + SPLIT_ENDPOINT, + zipResource( + "doc.zip", + List.of( + new ZipEntryBytes("page-1.pdf", "page-one"), + new ZipEntryBytes("page-2.pdf", "page-two"), + new ZipEntryBytes("page-3.pdf", "page-three")))); + stubFileStorage(); + + AiWorkflowResponse result = service.orchestrate(requestFor(input, "split")); + + assertEquals(AiWorkflowOutcome.COMPLETED, result.getOutcome()); + assertEquals(3, result.getResultFiles().size()); + // Input count (1) != output count (3) so the per-entry filename is kept. + assertEquals("page-1.pdf", result.getResultFiles().get(0).getFileName()); + assertEquals("page-2.pdf", result.getResultFiles().get(1).getFileName()); + assertEquals("page-3.pdf", result.getResultFiles().get(2).getFileName()); + } + + @Test + void multiInputEndpointIsCalledOnceWithAllFiles() throws IOException { + MockMultipartFile a = pdf("a.pdf", "a-bytes"); + MockMultipartFile b = pdf("b.pdf", "b-bytes"); + stubOrchestrator( + """ + {"outcome":"tool_call","tool":"%s","parameters":{},"rationale":"Merging"} + """ + .formatted(MERGE_ENDPOINT)); + when(toolMetadataService.isMultiInput(MERGE_ENDPOINT)).thenReturn(true); + when(toolMetadataService.shouldUnpackZipResponse(MERGE_ENDPOINT)).thenReturn(false); + stubEndpoint(MERGE_ENDPOINT, pdfResource("merged-bytes", "merged.pdf")); + stubFileStorage(); + + AiWorkflowResponse result = + service.orchestrate(requestFor(new MockMultipartFile[] {a, b}, "merge these")); + + assertEquals(AiWorkflowOutcome.COMPLETED, result.getOutcome()); + assertEquals(1, result.getResultFiles().size()); + // Two inputs but only one output → filename is not preserved from either input. + assertEquals("merged.pdf", result.getResultFiles().get(0).getFileName()); + verify(internalApiClient, times(1)).post(eq(MERGE_ENDPOINT), any()); + } + + @Test + void singleInputEndpointIsCalledOncePerFile() throws IOException { + MockMultipartFile a = pdf("a.pdf", "a-bytes"); + MockMultipartFile b = pdf("b.pdf", "b-bytes"); + stubOrchestrator( + """ + {"outcome":"tool_call","tool":"%s","parameters":{"angle":90},"rationale":"Rotating"} + """ + .formatted(ROTATE_ENDPOINT)); + when(toolMetadataService.isMultiInput(ROTATE_ENDPOINT)).thenReturn(false); + when(toolMetadataService.shouldUnpackZipResponse(ROTATE_ENDPOINT)).thenReturn(false); + stubEndpoint(ROTATE_ENDPOINT, pdfResource("rotated", "rotated.pdf")); + stubFileStorage(); + + AiWorkflowResponse result = + service.orchestrate(requestFor(new MockMultipartFile[] {a, b}, "rotate both")); + + assertEquals(AiWorkflowOutcome.COMPLETED, result.getOutcome()); + assertEquals(2, result.getResultFiles().size()); + // Per-file loop dispatches one call per input file. + verify(internalApiClient, times(2)).post(eq(ROTATE_ENDPOINT), any()); + // 1:1 mapping preserves each input's filename. + assertEquals("a.pdf", result.getResultFiles().get(0).getFileName()); + assertEquals("b.pdf", result.getResultFiles().get(1).getFileName()); + } + + @Test + void planExecutesStepsSequentially() throws IOException { + MockMultipartFile input = pdf("input.pdf", "bytes"); + stubOrchestrator( + """ + { + "outcome":"plan", + "summary":"Rotate then compress", + "steps":[ + {"tool":"%s","parameters":{"angle":90}}, + {"tool":"%s","parameters":{}} + ] + } + """ + .formatted(ROTATE_ENDPOINT, COMPRESS_ENDPOINT)); + when(toolMetadataService.isMultiInput(anyString())).thenReturn(false); + when(toolMetadataService.shouldUnpackZipResponse(anyString())).thenReturn(false); + stubEndpoint(ROTATE_ENDPOINT, pdfResource("rotated", "rotated.pdf")); + stubEndpoint(COMPRESS_ENDPOINT, pdfResource("compressed", "compressed.pdf")); + stubFileStorage(); + + AiWorkflowResponse result = service.orchestrate(requestFor(input, "rotate and compress")); + + assertEquals(AiWorkflowOutcome.COMPLETED, result.getOutcome()); + assertEquals(1, result.getResultFiles().size()); + // 1:1 input → output mapping at the plan level preserves the input's filename. + assertEquals("input.pdf", result.getResultFiles().get(0).getFileName()); + verify(internalApiClient, times(1)).post(eq(ROTATE_ENDPOINT), any()); + verify(internalApiClient, times(1)).post(eq(COMPRESS_ENDPOINT), any()); + } + + @Test + void toolCallWithoutEndpointFallsBackToCannotContinue() throws IOException { + MockMultipartFile input = pdf("input.pdf", "bytes"); + stubOrchestrator("{\"outcome\":\"tool_call\",\"parameters\":{}}"); + + AiWorkflowResponse result = service.orchestrate(requestFor(input, "do something")); + + assertEquals(AiWorkflowOutcome.CANNOT_CONTINUE, result.getOutcome()); + assertNotNull(result.getReason()); + verify(internalApiClient, never()).post(anyString(), any()); + } + + // --- helpers --- + + private void stubOrchestrator(String responseJson) throws IOException { + when(aiEngineClient.post(eq("/api/v1/orchestrator"), anyString())).thenReturn(responseJson); + } + + private void stubEndpoint(String endpoint, Resource body) { + when(internalApiClient.post(eq(endpoint), any(MultiValueMap.class))) + .thenReturn(ResponseEntity.ok(body)); + } + + /** + * Stub {@link FileStorage#storeInputStream} with sequential file IDs and an accurate byte + * count. Returns the counter so tests can assert how many stores happened. + */ + private AtomicInteger stubFileStorage() throws IOException { + AtomicInteger counter = new AtomicInteger(); + when(fileStorage.storeInputStream(any(InputStream.class), anyString())) + .thenAnswer( + inv -> { + InputStream is = inv.getArgument(0); + long size = is.readAllBytes().length; + return new StoredFile("file-" + counter.incrementAndGet(), size); + }); + return counter; + } + + private static MockMultipartFile pdf(String filename, String content) { + return new MockMultipartFile("fileInput", filename, "application/pdf", content.getBytes()); + } + + private static AiWorkflowRequest requestFor(MockMultipartFile file, String message) { + return requestFor(new MockMultipartFile[] {file}, message); + } + + private static AiWorkflowRequest requestFor(MockMultipartFile[] files, String message) { + AiWorkflowRequest request = new AiWorkflowRequest(); + List inputs = new ArrayList<>(); + for (MockMultipartFile file : files) { + AiWorkflowFileInput fileInput = new AiWorkflowFileInput(); + fileInput.setFileInput(file); + inputs.add(fileInput); + } + request.setFileInputs(inputs); + request.setUserMessage(message); + return request; + } + + private static ByteArrayResource pdfResource(String content, String filename) { + return new ByteArrayResource(content.getBytes()) { + @Override + public String getFilename() { + return filename; + } + }; + } + + private static ByteArrayResource zipResource(String filename, List entries) + throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (ZipOutputStream zos = new ZipOutputStream(baos)) { + for (ZipEntryBytes entry : entries) { + zos.putNextEntry(new ZipEntry(entry.name())); + zos.write(entry.bytes()); + zos.closeEntry(); + } + } + byte[] zipBytes = baos.toByteArray(); + return new ByteArrayResource(zipBytes) { + @Override + public String getFilename() { + return filename; + } + + @Override + public InputStream getInputStream() { + return new ByteArrayInputStream(zipBytes); + } + }; + } + + private record ZipEntryBytes(String name, byte[] bytes) { + ZipEntryBytes(String name, String content) { + this(name, content.getBytes()); + } + } +} diff --git a/engine/AGENTS.md b/engine/AGENTS.md index 6fcef4c249..8e45662740 100644 --- a/engine/AGENTS.md +++ b/engine/AGENTS.md @@ -16,7 +16,7 @@ All engine commands can be run from the repository root using Task: - `task engine:lint` — run ruff linting - `task engine:typecheck` — run pyright - `task engine:format` — format code with ruff -- `task engine:tool-models` — generate tool_models.py from frontend TypeScript defs +- `task engine:tool-models` — generate tool_models.py from Java OpenAPI spec ## Code Style diff --git a/engine/pyproject.toml b/engine/pyproject.toml index b410cc6d2a..ba10d05141 100644 --- a/engine/pyproject.toml +++ b/engine/pyproject.toml @@ -16,8 +16,10 @@ dependencies = [ [dependency-groups] dev = [ + "datamodel-code-generator[ruff]>=0.26.0", "pytest>=8.0.0", "pyright>=1.1.408", + "referencing>=0.35.0", "ruff>=0.14.10", ] diff --git a/engine/scripts/generate_tool_models.py b/engine/scripts/generate_tool_models.py index bc339acb0a..9c67fcae49 100644 --- a/engine/scripts/generate_tool_models.py +++ b/engine/scripts/generate_tool_models.py @@ -1,509 +1,222 @@ #!/usr/bin/env python3 +"""Generate Python tool models from the Java backend's OpenAPI spec (SwaggerDoc.json). + +Uses datamodel-code-generator to convert OpenAPI request schemas to Pydantic models. +Run via: + task engine:tool-models +""" + from __future__ import annotations import argparse import json -import keyword -import re -import subprocess -from collections.abc import Callable from dataclasses import dataclass from pathlib import Path from typing import Any -TOOL_MODELS_HEADER = """# AUTO-GENERATED FILE. DO NOT EDIT. -# Generated by scripts/generate_tool_models.py from frontend TypeScript sources. -# ruff: noqa: N815 -""" +from datamodel_code_generator import InputFileType, PythonVersion, generate +from datamodel_code_generator.enums import DataModelType +from datamodel_code_generator.format import Formatter +from referencing import Registry, Resource +from referencing.jsonschema import DRAFT202012 +# Fields inherited from PDFFile base class — not tool parameters. +BASE_CLASS_FIELDS = frozenset({"fileInput", "fileId"}) -OPERATION_TYPE_RE = re.compile(r"operationType\s*:\s*['\"]([A-Za-z0-9_]+)['\"]") -DEFAULT_REF_RE = re.compile(r"defaultParameters\s*:\s*([A-Za-z0-9_]+)") -DEFAULT_SHORTHAND_RE = re.compile(r"\bdefaultParameters\b") -IMPORT_RE = re.compile(r"import\s*\{([^}]+)\}\s*from\s*['\"]([^'\"]+)['\"]") -VAR_OBJ_RE_TEMPLATE = r"(?:export\s+)?const\s+{name}\b[^=]*=\s*\{{" +_ENGINE_ROOT = Path(__file__).resolve().parents[1] + +_FILE_HEADER = ( + "# AUTO-GENERATED FILE. DO NOT EDIT.\n" + "# Generated by scripts/generate_tool_models.py from Java OpenAPI spec (SwaggerDoc.json).\n" + "# ruff: noqa: E501" +) @dataclass -class ToolModelSpec: - tool_id: str - params: dict[str, Any] - param_types: dict[str, Any] +class ToolSpec: + path: str + enum_name: str + class_name: str -class ParseError(Exception): - pass +@dataclass +class DiscoveryResult: + tools: list[ToolSpec] + combined_schema: dict[str, Any] -def _find_matching(text: str, start: int, open_char: str, close_char: str) -> int: - depth = 0 - i = start - in_str: str | None = None - while i < len(text): - ch = text[i] - if in_str: - if ch == "\\": - i += 2 - continue - if ch == in_str: - in_str = None - i += 1 - continue - if ch in {"'", '"'}: - in_str = ch - elif ch == open_char: - depth += 1 - elif ch == close_char: - depth -= 1 - if depth == 0: - return i - i += 1 - raise ParseError(f"Unmatched {open_char}{close_char} block") +class ToolDiscovery: + """Discovers tool endpoints from an OpenAPI spec and builds a combined JSON Schema.""" - -def _extract_block(text: str, pattern: str) -> str | None: - match = re.search(pattern, text) - if not match: - return None - brace_start = text.find("{", match.end() - 1) - if brace_start == -1: - return None - brace_end = _find_matching(text, brace_start, "{", "}") - return text[brace_start : brace_end + 1] - - -def _split_top_level_items(obj_body: str) -> list[str]: - items: list[str] = [] - depth_obj = depth_arr = 0 - in_str: str | None = None - token_start = 0 - i = 0 - while i < len(obj_body): - ch = obj_body[i] - if in_str: - if ch == "\\": - i += 2 - continue - if ch == in_str: - in_str = None - i += 1 - continue - if ch in {"'", '"'}: - in_str = ch - elif ch == "{": - depth_obj += 1 - elif ch == "}": - depth_obj -= 1 - elif ch == "[": - depth_arr += 1 - elif ch == "]": - depth_arr -= 1 - elif ch == "," and depth_obj == 0 and depth_arr == 0: - piece = obj_body[token_start:i].strip() - if piece: - items.append(piece) - token_start = i + 1 - i += 1 - tail = obj_body[token_start:].strip() - if tail: - items.append(tail) - return items - - -def _resolve_import_path(repo_root: Path, current_file: Path, module_path: str) -> Path | None: - candidates: list[Path] = [] - if module_path.startswith("@app/"): - rel = module_path[len("@app/") :] - candidates.extend( - [ - repo_root / "frontend/src/core" / f"{rel}.ts", - repo_root / "frontend/src/core" / f"{rel}.tsx", - repo_root / "frontend/src/saas" / f"{rel}.ts", - repo_root / "frontend/src/saas" / f"{rel}.tsx", - repo_root / "frontend/src" / f"{rel}.ts", - repo_root / "frontend/src" / f"{rel}.tsx", - ] - ) - elif module_path.startswith("."): - base = (current_file.parent / module_path).resolve() - candidates.extend([Path(f"{base}.ts"), Path(f"{base}.tsx")]) - for candidate in candidates: - if candidate.exists(): - return candidate - return None - - -def _parse_literal_value(value: str, resolver: Callable[[str], dict[str, Any] | None]) -> Any: - value = value.strip() - if not value: - return None - if value.startswith("{") and value.endswith("}"): - return _parse_object_literal(value, resolver) - if value.startswith("[") and value.endswith("]"): - inner = value[1:-1].strip() - if not inner: - return [] - return [_parse_literal_value(item, resolver) for item in _split_top_level_items(inner)] - if value.startswith(("'", '"')) and value.endswith(("'", '"')): - return value[1:-1] - if value in {"true", "false"}: - return value == "true" - if value == "null": - return None - if re.fullmatch(r"-?\d+", value): - return int(value) - if re.fullmatch(r"-?\d+\.\d+", value): - return float(value) - resolved = resolver(value) - if resolved is not None: - return resolved - return None - - -def _parse_object_literal(obj_text: str, resolver: Callable[[str], dict[str, Any] | None]) -> dict[str, Any]: - body = obj_text.strip()[1:-1] - result: dict[str, Any] = {} - for item in _split_top_level_items(body): - if item.startswith("..."): - spread_name = item[3:].strip() - spread = resolver(spread_name) - if isinstance(spread, dict): - result.update(spread) - continue - if ":" not in item: - continue - key, raw_value = item.split(":", 1) - key = key.strip().strip("'\"") - result[key] = _parse_literal_value(raw_value.strip(), resolver) - return result - - -def _extract_imports(source: str) -> dict[str, str]: - imports: dict[str, str] = {} - for names, module_path in IMPORT_RE.findall(source): - for part in names.split(","): - segment = part.strip() - if not segment: - continue - if " as " in segment: - original, alias = [x.strip() for x in segment.split(" as ", 1)] - imports[alias] = module_path - imports[original] = module_path - else: - imports[segment] = module_path - return imports - - -def _resolve_object_identifier(repo_root: Path, file_path: Path, source: str, identifier: str) -> dict[str, Any] | None: - var_pattern = VAR_OBJ_RE_TEMPLATE.format(name=re.escape(identifier)) - block = _extract_block(source, var_pattern) - imports = _extract_imports(source) - - def resolver(name: str) -> dict[str, Any] | None: - local_block = _extract_block(source, VAR_OBJ_RE_TEMPLATE.format(name=re.escape(name))) - if local_block: - return _parse_object_literal(local_block, resolver) - import_path = imports.get(name) - if not import_path: - return None - resolved_file = _resolve_import_path(repo_root, file_path, import_path) - if not resolved_file: - return None - imported_source = resolved_file.read_text(encoding="utf-8") - return _resolve_object_identifier(repo_root, resolved_file, imported_source, name) - - if block: - return _parse_object_literal(block, resolver) - import_path = imports.get(identifier) - if not import_path: - return None - resolved_file = _resolve_import_path(repo_root, file_path, import_path) - if not resolved_file: - return None - imported_source = resolved_file.read_text(encoding="utf-8") - return _resolve_object_identifier(repo_root, resolved_file, imported_source, identifier) - - -def _infer_py_type(value: Any) -> str: - if isinstance(value, bool): - return "bool" - if isinstance(value, int): - return "int" - if isinstance(value, float): - return "float" - if isinstance(value, str): - return "str" - if isinstance(value, list): - return "list[Any]" - if isinstance(value, dict): - return "dict[str, Any]" - return "Any" - - -def _spec_is_none(spec: dict[str, Any]) -> bool: - return spec.get("kind") == "null" - - -def _py_type_from_spec(spec: dict[str, Any]) -> str: - kind = spec.get("kind") - if kind == "string": - return "str" - if kind == "number": - return "float" - if kind == "boolean": - return "bool" - if kind == "date": - return "str" - if kind == "enum": - values = spec.get("values") - if isinstance(values, list) and values: - literal_values = ", ".join(_py_repr(v) for v in values) - return f"Literal[{literal_values}]" - if kind == "ref": - ref_name = spec.get("name") - if isinstance(ref_name, str) and ref_name.endswith("Parameters"): - return f"{ref_name[:-10]}Params" - if kind == "array": - element = spec.get("element") - inner = _py_type_from_spec(element) if isinstance(element, dict) else "Any" - return f"list[{inner}]" - if kind == "object": - dict_value = spec.get("dictValue") - if isinstance(dict_value, dict): - inner = _py_type_from_spec(dict_value) - return f"dict[str, {inner}]" - properties = spec.get("properties") - if isinstance(properties, dict) and properties: - property_types = {_py_type_from_spec(p) for p in properties.values() if isinstance(p, dict)} - if len(property_types) == 1: - inner = next(iter(property_types)) - return f"dict[str, {inner}]" - return "dict[str, Any]" - if kind in {"null"}: - return "Any" - return "Any" - - -def _to_class_name(tool_id: str) -> str: - cleaned = re.sub(r"([a-z0-9])([A-Z])", r"\1 \2", tool_id) - cleaned = re.sub(r"[^A-Za-z0-9]+", " ", cleaned) - parts = [part.capitalize() for part in cleaned.split() if part] - return "".join(parts) + "Params" - - -def _to_snake_case(name: str) -> str: - snake = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", name) - snake = re.sub(r"[^A-Za-z0-9]+", "_", snake).strip("_").lower() - if not snake: - snake = "param" - if snake[0].isdigit(): - snake = f"param_{snake}" - if keyword.iskeyword(snake): - snake = f"{snake}_" - return snake - - -def _build_field_name_map(params: dict[str, Any]) -> dict[str, str]: - field_map: dict[str, str] = {} - used: set[str] = set() - for original_key in sorted(params): - base_name = _to_snake_case(original_key) - candidate = base_name - suffix = 2 - while candidate in used: - candidate = f"{base_name}_{suffix}" - suffix += 1 - used.add(candidate) - field_map[original_key] = candidate - return field_map - - -def _to_enum_member_name(tool_id: str) -> str: - return _to_snake_case(tool_id).upper() - - -def _build_enum_member_map(specs: list[ToolModelSpec]) -> dict[str, str]: - member_map: dict[str, str] = {} - used: set[str] = set() - for spec in specs: - base_name = _to_enum_member_name(spec.tool_id) - candidate = base_name - suffix = 2 - while candidate in used: - candidate = f"{base_name}_{suffix}" - suffix += 1 - used.add(candidate) - member_map[spec.tool_id] = candidate - return member_map - - -def _py_repr(value: Any) -> str: - return ( - json.dumps(value, ensure_ascii=True).replace("true", "True").replace("false", "False").replace("null", "None") + # Namespaces exposed to the LLM as callable tools. Largely matches ``InternalApiClient.java``. + # Note: ``/api/v1/filter/`` is intentionally excluded because those APIs are for pipeline processing, + # not tool execution. + ALLOWED_PATH_PREFIXES = ( + "/api/v1/general/", + "/api/v1/misc/", + "/api/v1/security/", + "/api/v1/convert/", ) + def __init__(self, spec: dict[str, Any]): + resource = Resource.from_contents(spec, default_specification=DRAFT202012) + self.resolver = Registry().with_resource("", resource).resolver() + self.spec = spec -def discover_tool_specs(repo_root: Path) -> list[ToolModelSpec]: - frontend_dir = repo_root / "frontend" - extractor = frontend_dir / "scripts/export-tool-specs.ts" - command = ["node", "--import", "tsx", str(extractor)] - result = subprocess.run( - command, - check=True, - capture_output=True, - text=True, - cwd=str(frontend_dir), + def discover(self) -> DiscoveryResult: + tools: list[ToolSpec] = [] + defs: dict[str, Any] = {} + used_enum: set[str] = set() + used_class: set[str] = set() + + for path, path_item in sorted(self.spec.get("paths", {}).items()): + if "{" in path or not any(path.startswith(p) for p in self.ALLOWED_PATH_PREFIXES): + continue + properties = self._get_request_properties(path_item) + if not properties: + continue + clean_props = self._filter_properties(properties) + if not clean_props: + continue + + enum_name = _deduplicate(_path_to_enum_name(path), used_enum) + class_name = _deduplicate(_path_to_class_name(path), used_class) + + defs[class_name] = {"type": "object", "properties": clean_props} + tools.append(ToolSpec(path, enum_name, class_name)) + + combined_schema: dict[str, Any] = { + "$defs": defs, + "anyOf": [{"$ref": f"#/$defs/{t.class_name}"} for t in tools], + } + return DiscoveryResult(tools=tools, combined_schema=combined_schema) + + def _resolve_ref(self, schema: dict[str, Any]) -> dict[str, Any]: + if "$ref" in schema: + return self.resolver.lookup(schema["$ref"]).contents + return schema + + def _get_request_properties(self, path_item: dict[str, Any]) -> dict[str, Any] | None: + post = path_item.get("post") + if not post: + return None + content = post.get("requestBody", {}).get("content", {}) + for media_type in ("multipart/form-data", "application/json"): + if media_type in content: + schema = content[media_type].get("schema") + if schema: + return self._resolve_ref(schema).get("properties") + return None + + def _filter_properties(self, properties: dict[str, Any]) -> dict[str, Any]: + """Remove base-class fields and binary upload fields, resolving any $refs.""" + clean: dict[str, Any] = {} + for name, prop in properties.items(): + if name in BASE_CLASS_FIELDS: + continue + prop = self._resolve_ref(prop) + if prop.get("type") == "string" and prop.get("format") == "binary": + continue + clean[name] = prop + return clean + + +def _tool_name_segments(path: str) -> str: + """Extract a descriptive name from the endpoint path. + + Converters use two segments (e.g. /api/v1/convert/cbr/pdf → cbr-to-pdf). + Other tools use the last segment (e.g. /api/v1/misc/compress-pdf → compress-pdf). + """ + parts = path.rstrip("/").split("/") + if "/api/v1/convert/" in path and len(parts) >= 6: + return f"{parts[-2]}-to-{parts[-1]}" + return parts[-1] + + +def _path_to_enum_name(path: str) -> str: + return _tool_name_segments(path).replace("-", "_").upper() + + +def _path_to_class_name(path: str) -> str: + return "".join(p.capitalize() for p in _tool_name_segments(path).split("-")) + "Params" + + +def _deduplicate(name: str, used: set[str]) -> str: + """Return name, appending 2, 3, ... if already in used. Adds result to used.""" + candidate = name + n = 2 + while candidate in used: + candidate = f"{name}{n}" + n += 1 + used.add(candidate) + return candidate + + +def generate_models_code(combined_schema: dict[str, Any]) -> str: + """Run datamodel-code-generator once on the combined schema.""" + code = generate( + input_=json.dumps(combined_schema, sort_keys=True), + input_file_type=InputFileType.JsonSchema, + output_model_type=DataModelType.PydanticV2BaseModel, + target_python_version=PythonVersion.PY_313, + snake_case_field=True, + base_class="stirling.models.base.ApiModel", + field_constraints=True, + no_alias=True, + set_default_enum_member=True, + additional_imports=["enum.StrEnum"], + enable_version_header=False, + custom_file_header=_FILE_HEADER, + formatters=[Formatter.RUFF_FORMAT, Formatter.RUFF_CHECK], + settings_path=_ENGINE_ROOT / "pyproject.toml", ) - raw = json.loads(result.stdout) - specs: list[ToolModelSpec] = [] - for item in raw: - tool_id = item.get("tool_id") - if not isinstance(tool_id, str) or not tool_id: - continue - params = item.get("params") - param_types = item.get("param_types") - specs.append( - ToolModelSpec( - tool_id=tool_id, - params=params if isinstance(params, dict) else {}, - param_types=param_types if isinstance(param_types, dict) else {}, - ) - ) - return sorted(specs, key=lambda spec: spec.tool_id) + return str(code or "") -def write_models_module(out_path: Path, specs: list[ToolModelSpec]) -> None: - lines: list[str] = [ - TOOL_MODELS_HEADER, - "from __future__ import annotations\n\n", - "from enum import StrEnum\n", - "from typing import Any, Literal\n\n", - "from stirling.models.base import ApiModel\n", +def write_output(out_path: Path, tools: list[ToolSpec], models_code: str) -> None: + union_lines = ["type ParamToolModel = ("] + for i, tool in enumerate(tools): + prefix = " | " if i > 0 else " " + union_lines.append(f"{prefix}{tool.class_name}") + union_lines.append(")") + union_lines.append("type ParamToolModelType = type[ParamToolModel]") + + enum_lines = [ + "class ToolEndpoint(StrEnum):", + *(f' {t.enum_name} = "{t.path}"' for t in tools), ] - class_names: dict[str, str] = {spec.tool_id: _to_class_name(spec.tool_id) for spec in specs} - class_name_to_tool_id = {name: tool_id for tool_id, name in class_names.items()} + ops_lines = [ + "OPERATIONS: dict[ToolEndpoint, ParamToolModelType] = {", + *(f" ToolEndpoint.{t.enum_name}: {t.class_name}," for t in tools), + "}", + ] - def extract_class_dependencies(spec: ToolModelSpec) -> set[str]: - deps: set[str] = set() - if not isinstance(spec.param_types, dict): - return deps - for entry in spec.param_types.values(): - if not isinstance(entry, dict): - continue - type_spec = entry - if "type" in entry and isinstance(entry.get("type"), dict): - type_spec = entry["type"] - if not isinstance(type_spec, dict): - continue - if type_spec.get("kind") != "ref": - continue - ref_name = type_spec.get("name") - if isinstance(ref_name, str) and ref_name.endswith("Parameters"): - ref_class = f"{ref_name[:-10]}Params" - if ref_class in class_name_to_tool_id: - deps.add(ref_class) - return deps - - dependencies_by_class: dict[str, set[str]] = {} - for spec in specs: - class_name = class_names[spec.tool_id] - dependencies_by_class[class_name] = extract_class_dependencies(spec) - - remaining = set(class_names.values()) - ordered_class_names: list[str] = [] - while remaining: - progress = False - for class_name in sorted(remaining): - deps = dependencies_by_class.get(class_name, set()) - if deps.issubset(set(ordered_class_names)): - ordered_class_names.append(class_name) - remaining.remove(class_name) - progress = True - break - if not progress: - ordered_class_names.extend(sorted(remaining)) - break - - ordered_specs = [next(spec for spec in specs if class_names[spec.tool_id] == name) for name in ordered_class_names] - - for spec in ordered_specs: - class_name = class_names[spec.tool_id] - lines.append(f"class {class_name}(ApiModel):\n") - all_param_keys = set(spec.params) - if isinstance(spec.param_types, dict): - all_param_keys.update(spec.param_types.keys()) - - if not all_param_keys: - lines.append(" pass\n\n\n") - continue - - field_name_map = _build_field_name_map({key: True for key in all_param_keys}) - for key in sorted(all_param_keys): - field_name = field_name_map[key] - value = spec.params.get(key) - type_spec = spec.param_types.get(key) if isinstance(spec.param_types, dict) else None - if isinstance(type_spec, dict): - py_type = _py_type_from_spec(type_spec) - else: - py_type = _infer_py_type(value) - - if value is None and (isinstance(type_spec, dict) and _spec_is_none(type_spec)): - if py_type != "Any" and "| None" not in py_type: - py_type = f"{py_type} | None" - lines.append(f" {field_name}: {py_type} = None\n") - elif value is None: - lines.append(f" {field_name}: {py_type} | None = None\n") - else: - if isinstance(type_spec, dict) and type_spec.get("kind") == "ref" and isinstance(value, dict): - lines.append(f" {field_name}: {py_type} = {py_type}.model_validate({_py_repr(value)})\n") - continue - lines.append(f" {field_name}: {py_type} = {_py_repr(value)}\n") - lines.append("\n\n") - - if class_names: - union_members = " | ".join(class_names[tool_id] for tool_id in sorted(class_names)) - lines.append(f"type ParamToolModel = {union_members}\n") - lines.append("type ParamToolModelType = type[ParamToolModel]\n\n") - else: - lines.append("type ParamToolModel = ApiModel\n") - lines.append("type ParamToolModelType = type[ParamToolModel]\n\n") - - enum_member_map = _build_enum_member_map(specs) - - lines.append("class OperationId(StrEnum):\n") - - for spec in specs: - lines.append(f" {enum_member_map[spec.tool_id]} = {spec.tool_id!r}\n") - - lines.extend( - [ - "\n\n", - "OPERATIONS: dict[OperationId, ParamToolModelType] = {\n", - ] - ) - - for spec in specs: - model_name = _to_class_name(spec.tool_id) - lines.append(f" OperationId.{enum_member_map[spec.tool_id]}: {model_name},\n") - lines.append("}\n") - out_path.write_text("".join(lines), encoding="utf-8") + parts = [models_code, "\n", *union_lines, "\n", *enum_lines, "\n", *ops_lines, ""] + out_path.write_text("\n".join(parts), encoding="utf-8") def main() -> None: - parser = argparse.ArgumentParser(description="Generate tool models from frontend TypeScript tool definitions") - parser.add_argument("--spec", help="Deprecated (ignored)", default="") - parser.add_argument("--output", default="", help="Path to tool_models.py") - parser.add_argument("--ai-output", default="", help="Deprecated (ignored)") + parser = argparse.ArgumentParser(description="Generate Python tool models from Java OpenAPI spec") + parser.add_argument("--spec", required=True, help="Path to SwaggerDoc.json") + parser.add_argument("--output", required=True, help="Path to output tool_models.py") args = parser.parse_args() - repo_root = Path(__file__).resolve().parents[3] - specs = discover_tool_specs(repo_root) + spec_path = Path(args.spec) + if not spec_path.exists(): + raise SystemExit(f"OpenAPI spec not found at {spec_path}\nRun 'task engine:tool-models' to generate it.") + output_path = Path(args.output) - output_path = Path(args.output) if args.output else (repo_root / "src/stirling/models/tool_models.py") + with open(spec_path) as f: + spec = json.load(f) - write_models_module(output_path, specs) - print(f"Wrote {len(specs)} tool model specs") + result = ToolDiscovery(spec).discover() + models_code = generate_models_code(result.combined_schema) + write_output(output_path, result.tools, models_code) + + print(f"Generated {len(result.tools)} tool models from {spec_path.name}") + for tool in result.tools: + print(f" {tool.enum_name}: {tool.path} → {tool.class_name}") if __name__ == "__main__": diff --git a/engine/src/stirling/agents/orchestrator.py b/engine/src/stirling/agents/orchestrator.py index 3ddefcf432..e4c17d0059 100644 --- a/engine/src/stirling/agents/orchestrator.py +++ b/engine/src/stirling/agents/orchestrator.py @@ -74,7 +74,7 @@ class OrchestratorAgent: system_prompt=( "You are the top-level orchestrator. " "Choose exactly one output function that best handles the request. " - "Use delegate_pdf_edit for requested PDF modifications. " + "Use delegate_pdf_edit for requested modifications of single or multiple PDFs. " "Use delegate_pdf_question for questions about PDF contents. " "Use delegate_user_spec for requests to create or define an agent spec. " "Use math_auditor_agent for requests to check arithmetic, validate " @@ -116,7 +116,9 @@ class OrchestratorAgent: return await self._run_pdf_edit(ctx.deps.request) async def _run_pdf_edit(self, request: OrchestratorRequest) -> PdfEditResponse: - return await PdfEditAgent(self.runtime).handle(PdfEditRequest(user_message=request.user_message)) + return await PdfEditAgent(self.runtime).handle( + PdfEditRequest(user_message=request.user_message, file_names=request.file_names) + ) async def delegate_pdf_question(self, ctx: RunContext[OrchestratorDeps]) -> PdfQuestionResponse: return await self._run_pdf_question(ctx.deps.request) diff --git a/engine/src/stirling/agents/pdf_edit.py b/engine/src/stirling/agents/pdf_edit.py index 1ba014dec4..b5ad8c431d 100644 --- a/engine/src/stirling/agents/pdf_edit.py +++ b/engine/src/stirling/agents/pdf_edit.py @@ -14,13 +14,13 @@ from stirling.contracts import ( PdfEditResponse, ToolOperationStep, ) -from stirling.models import OPERATIONS, ApiModel, OperationId, ParamToolModel +from stirling.models import OPERATIONS, ApiModel, ParamToolModel, ToolEndpoint from stirling.services import AppRuntime class PdfEditPlanSelection(ApiModel): outcome: Literal["plan"] = "plan" - operations: list[OperationId] = Field(min_length=1) + operations: list[ToolEndpoint] = Field(min_length=1) summary: str rationale: str | None = None @@ -41,7 +41,7 @@ class PdfEditParameterSelector: async def select( self, request: PdfEditRequest, - operation_plan: list[OperationId], + operation_plan: list[ToolEndpoint], operation_index: int, generated_steps: list[ToolOperationStep], ) -> ParamToolModel: @@ -51,7 +51,7 @@ class PdfEditParameterSelector: self._build_parameter_prompt(request, operation_plan, operation_index, generated_steps), output_type=NativeOutput(parameter_model), instructions=( - f"Generate only the parameters for the PDF operation `{operation_id.value}`. " + f"Generate only the parameters for the PDF operation `{operation_id.name}`. " "Do not include fields from any other operation." ), ) @@ -60,12 +60,12 @@ class PdfEditParameterSelector: def _build_parameter_prompt( self, request: PdfEditRequest, - operation_plan: list[OperationId], + operation_plan: list[ToolEndpoint], operation_index: int, generated_steps: list[ToolOperationStep], ) -> str: operation_id = operation_plan[operation_index] - operation_list = ", ".join(operation.value for operation in operation_plan) + operation_list = ", ".join(operation.name for operation in operation_plan) file_names = ", ".join(request.file_names) if request.file_names else "No file names were provided." generated_steps_text = ( "\n".join( @@ -79,7 +79,7 @@ class PdfEditParameterSelector: f"Files: {file_names}\n" f"Operation plan: {operation_list}\n" f"Selected operation index: {operation_index + 1} of {len(operation_plan)}\n" - f"Selected operation: {operation_id.value}\n" + f"Selected operation: {operation_id.name}\n" f"Already generated steps:\n{generated_steps_text}\n" "Return only the parameter object for the selected operation." ) @@ -153,4 +153,4 @@ class PdfEditAgent: ) def _supported_operations_prompt(self) -> str: - return ", ".join(operation_id.value for operation_id in self.supported_operations) + return ", ".join(f"{op.name} ({op.value})" for op in self.supported_operations) diff --git a/engine/src/stirling/contracts/agent_specs.py b/engine/src/stirling/contracts/agent_specs.py index 684872ea8d..933d2e6856 100644 --- a/engine/src/stirling/contracts/agent_specs.py +++ b/engine/src/stirling/contracts/agent_specs.py @@ -4,7 +4,7 @@ from typing import Annotated, Literal from pydantic import Field -from stirling.models import ApiModel, OperationId +from stirling.models import ApiModel, ToolEndpoint from .common import StepKind, ToolOperationStep @@ -13,7 +13,7 @@ class AiToolAgentStep(ApiModel): kind: Literal[StepKind.AI_TOOL] = StepKind.AI_TOOL title: str description: str - tool: OperationId + tool: ToolEndpoint instruction: str diff --git a/engine/src/stirling/contracts/common.py b/engine/src/stirling/contracts/common.py index 20c4670020..9ee8856ddb 100644 --- a/engine/src/stirling/contracts/common.py +++ b/engine/src/stirling/contracts/common.py @@ -5,7 +5,7 @@ from typing import Literal, assert_never from pydantic import Field, model_validator -from stirling.models import OPERATIONS, ApiModel, OperationId +from stirling.models import OPERATIONS, ApiModel, ToolEndpoint from stirling.models.agent_tool_models import AGENT_OPERATIONS, AgentToolId, AnyParamModel, AnyToolId @@ -110,7 +110,7 @@ class ToolOperationStep(ApiModel): def validate_tool_parameter_pairing(self) -> ToolOperationStep: if isinstance(self.tool, AgentToolId): expected_type = AGENT_OPERATIONS[self.tool] - elif isinstance(self.tool, OperationId): + elif isinstance(self.tool, ToolEndpoint): expected_type = OPERATIONS[self.tool] else: assert_never(self.tool) diff --git a/engine/src/stirling/contracts/execution.py b/engine/src/stirling/contracts/execution.py index 6e1f9ad348..a5a227c70e 100644 --- a/engine/src/stirling/contracts/execution.py +++ b/engine/src/stirling/contracts/execution.py @@ -4,7 +4,7 @@ from typing import Annotated, Any, Literal from pydantic import Field -from stirling.models import ApiModel, OperationId, ParamToolModel +from stirling.models import ApiModel, ParamToolModel, ToolEndpoint from .agent_specs import AgentSpec from .common import WorkflowOutcome @@ -12,7 +12,7 @@ from .common import WorkflowOutcome class ExecutionStepResult(ApiModel): step_index: int - tool: OperationId | None = None + tool: ToolEndpoint | None = None success: bool output_summary: str | None = None output_data: dict[str, Any] = Field(default_factory=dict) @@ -33,7 +33,7 @@ class AgentExecutionRequest(ApiModel): class ToolCallExecutionAction(ApiModel): outcome: Literal[WorkflowOutcome.TOOL_CALL] = WorkflowOutcome.TOOL_CALL - tool: OperationId + tool: ToolEndpoint parameters: ParamToolModel rationale: str | None = None diff --git a/engine/src/stirling/models/__init__.py b/engine/src/stirling/models/__init__.py index 7877141aaf..0d4c774d0e 100644 --- a/engine/src/stirling/models/__init__.py +++ b/engine/src/stirling/models/__init__.py @@ -1,11 +1,11 @@ from . import tool_models from .base import ApiModel -from .tool_models import OPERATIONS, OperationId, ParamToolModel +from .tool_models import OPERATIONS, ParamToolModel, ToolEndpoint __all__ = [ "ApiModel", "OPERATIONS", - "OperationId", "ParamToolModel", + "ToolEndpoint", "tool_models", ] diff --git a/engine/src/stirling/models/agent_tool_models.py b/engine/src/stirling/models/agent_tool_models.py index fcef18ad4a..ecbcde78d7 100644 --- a/engine/src/stirling/models/agent_tool_models.py +++ b/engine/src/stirling/models/agent_tool_models.py @@ -1,6 +1,6 @@ """Agent tool IDs, parameter models, and registry. -tool_models.py is auto-generated from the frontend. This file is its +tool_models.py is auto-generated from the Java OpenAPI spec. This file is its manually-maintained counterpart for tools backed by AI agent pipelines. """ @@ -9,7 +9,7 @@ from __future__ import annotations from enum import StrEnum from stirling.models.base import ApiModel -from stirling.models.tool_models import OperationId, ParamToolModel +from stirling.models.tool_models import ParamToolModel, ToolEndpoint class AgentToolId(StrEnum): @@ -22,7 +22,7 @@ class MathAuditorAgentParams(ApiModel): type AgentParamModel = MathAuditorAgentParams -type AnyToolId = OperationId | AgentToolId +type AnyToolId = ToolEndpoint | AgentToolId type AnyParamModel = ParamToolModel | AgentParamModel AGENT_OPERATIONS: dict[AgentToolId, type[AgentParamModel]] = { diff --git a/engine/src/stirling/models/tool_models.py b/engine/src/stirling/models/tool_models.py index 334d88442d..d671441141 100644 --- a/engine/src/stirling/models/tool_models.py +++ b/engine/src/stirling/models/tool_models.py @@ -1,439 +1,1380 @@ # AUTO-GENERATED FILE. DO NOT EDIT. -# Generated by scripts/generate_tool_models.py from frontend TypeScript sources. +# Generated by scripts/generate_tool_models.py from Java OpenAPI spec (SwaggerDoc.json). +# ruff: noqa: E501 + from __future__ import annotations -from enum import StrEnum -from typing import Any, Literal +from enum import Enum, IntEnum, StrEnum +from typing import Any + +from pydantic import Field, RootModel, SecretStr from stirling.models.base import ApiModel class AddAttachmentsParams(ApiModel): - attachments: list[dict[str, Any]] = [] - - -class AdjustContrastParams(ApiModel): - blue: float = 100 - brightness: float = 100 - contrast: float = 100 - green: float = 100 - red: float = 100 - saturation: float = 100 - - -class AutoRenameParams(ApiModel): - use_first_text_as_fallback: bool = False - - -class AutomateParams(ApiModel): - pass - - -class BookletImpositionParams(ApiModel): - add_border: bool = False - add_gutter: bool = False - double_sided: bool = True - duplex_pass: Literal["BOTH", "FIRST", "SECOND"] = "BOTH" - flip_on_short_edge: bool = False - gutter_size: float = 12 - pages_per_sheet: float = 2 - spine_location: Literal["LEFT", "RIGHT"] = "LEFT" - - -class CertSignParams(ApiModel): - cert_file: dict[str, Any] | None = None - cert_type: Literal["", "PEM", "PKCS12", "PFX", "JKS"] = "" - jks_file: dict[str, Any] | None = None - location: str = "" - name: str = "" - p12_file: dict[str, Any] | None = None - page_number: float = 1 - password: str = "" - private_key_file: dict[str, Any] | None = None - reason: str = "" - show_logo: bool = True - show_signature: bool = False - sign_mode: Literal["MANUAL", "AUTO"] = "MANUAL" - - -class ChangeMetadataParams(ApiModel): - author: str = "" - creation_date: str | None = None - creator: str = "" - custom_metadata: list[dict[str, str]] = [] - delete_all: bool = False - keywords: str = "" - modification_date: str | None = None - producer: str = "" - subject: str = "" - title: str = "" - trapped: Literal["True", "False", "Unknown"] | None = None - - -class ChangePermissionsParams(ApiModel): - prevent_assembly: bool = False - prevent_extract_content: bool = False - prevent_extract_for_accessibility: bool = False - prevent_fill_in_form: bool = False - prevent_modify: bool = False - prevent_modify_annotations: bool = False - prevent_printing: bool = False - prevent_printing_faithful: bool = False - - -class AddPasswordParams(ApiModel): - key_length: float = 128 - owner_password: str = "" - password: str = "" - permissions: ChangePermissionsParams = ChangePermissionsParams.model_validate( - { - "preventAssembly": False, - "preventExtractContent": False, - "preventExtractForAccessibility": False, - "preventFillInForm": False, - "preventModify": False, - "preventModifyAnnotations": False, - "preventPrinting": False, - "preventPrintingFaithful": False, - } + attachments: list[bytes] | None = Field(None, description="The image file to be overlaid onto the PDF.") + convert_to_pdf_a3b: bool | None = Field( + False, description="Convert the resulting PDF to PDF/A-3b format after adding attachments" ) -class CompressParams(ApiModel): - compression_level: float = 5 - compression_method: Literal["quality", "filesize"] = "quality" - expected_size: str = "" - file_size_unit: Literal["KB", "MB"] = "MB" - file_size_value: str = "" - grayscale: bool = False +class AddImageParams(ApiModel): + every_page: bool | None = Field(False, description="Whether to overlay the image onto every page of the PDF.") + x: float | None = Field(0, description="The x-coordinate at which to place the top-left corner of the image.") + y: float | None = Field(0, description="The y-coordinate at which to place the top-left corner of the image.") -class ConvertParams(ApiModel): - cbz_options: dict[str, bool] = {"optimizeForEbook": False} - cbz_output_options: dict[str, float] = {"dpi": 150} - email_options: dict[str, Any] = { - "includeAttachments": True, - "maxAttachmentSizeMB": 10, - "downloadHtml": False, - "includeAllRecipients": False, - } - from_extension: str = "" - html_options: dict[str, float] = {"zoomLevel": 1} - image_options: dict[str, Any] = { - "colorType": "color", - "dpi": 300, - "singleOrMultiple": "multiple", - "fitOption": "maintainAspectRatio", - "autoRotate": True, - "combineImages": True, - } - is_smart_detection: bool = False - pdfa_options: dict[str, str] = {"outputFormat": "pdfa-1"} - smart_detection_type: Literal["mixed", "images", "web", "none"] = "none" - to_extension: str = "" +class CustomMargin(StrEnum): + small = "small" + medium = "medium" + large = "large" + x_large = "x-large" + + +class FontType(StrEnum): + helvetica = "helvetica" + courier = "courier" + times = "times" + + +class Position(IntEnum): + integer_1 = 1 + integer_2 = 2 + integer_3 = 3 + integer_4 = 4 + integer_5 = 5 + integer_6 = 6 + integer_7 = 7 + integer_8 = 8 + integer_9 = 9 + + +class AddPageNumbersParams(ApiModel): + custom_margin: CustomMargin | None = Field( + CustomMargin.medium, description="Custom margin: small/medium/large/x-large" + ) + custom_text: str | None = Field( + "{n}", + description="Custom text pattern. Available variables: {n}=current page number, {total}=total pages, {filename}=original filename", + examples=["Page {n} of {total}"], + ) + font_color: str | None = Field( + "#000000", description="Hex colour for page numbers (e.g. #FF0000)", examples=["#000000"] + ) + font_size: float | None = Field(12, description="Font size for page numbers", ge=1.0) + font_type: FontType | None = Field(None, description="Font type for page numbers") + page_numbers: str | None = Field( + "all", + description="The pages to select, Supports ranges (e.g., '1,3,5-9'), or 'all' or functions in the format 'an+b' where 'a' is the multiplier of the page number 'n', and 'b' is a constant (e.g., '2n+1', '3n', '6n-5')", + ) + pages_to_number: str | None = Field("all", description="Which pages to number (e.g. '1,3-5,7' or 'all')") + position: Position | None = Field( + None, + description="Position: 1-9 representing positions on the page (1=top-left, 2=top-center, 3=top-right, 4=middle-left, 5=middle-center, 6=middle-right, 7=bottom-left, 8=bottom-center, 9=bottom-right)", + ) + starting_number: int | None = Field(1, description="Starting number for page numbering", ge=1) + zero_pad: int | None = Field( + 0, description="Zero-padding width for page numbers (Bates Stamping). Set to 0 to disable padding", ge=0 + ) + + +class KeyLength(IntEnum): + integer_40 = 40 + integer_128 = 128 + integer_256 = 256 + + +class AddPasswordParams(ApiModel): + key_length: KeyLength | None = Field(None, description="The length of the encryption key") + owner_password: SecretStr | None = Field( + None, + description="The owner password to be added to the PDF file (Restricts what can be done with the document once it is opened)", + ) + password: SecretStr | None = Field( + None, description="The password to be added to the PDF file (Restricts the opening of the document itself.)" + ) + prevent_assembly: bool | None = Field(False, description="Whether document assembly is prevented") + prevent_extract_content: bool | None = Field(False, description="Whether content extraction is prevented") + prevent_extract_for_accessibility: bool | None = Field( + False, description="Whether content extraction for accessibility is prevented" + ) + prevent_fill_in_form: bool | None = Field(False, description="Whether form filling is prevented") + prevent_modify: bool | None = Field(False, description="Whether document modification is prevented") + prevent_modify_annotations: bool | None = Field( + False, description="Whether modification of annotations is prevented" + ) + prevent_printing: bool | None = Field(False, description="Whether printing of the document is prevented") + prevent_printing_faithful: bool | None = Field(False, description="Whether faithful printing is prevented") + + +class Alphabet(StrEnum): + roman = "roman" + arabic = "arabic" + japanese = "japanese" + korean = "korean" + chinese = "chinese" + thai = "thai" + + +class StampType(StrEnum): + text = "text" + image = "image" + + +class AddStampParams(ApiModel): + alphabet: Alphabet | None = Field(Alphabet.roman, description="The selected alphabet of the stamp text") + custom_color: str | None = Field("#d3d3d3", description="The color of the stamp text") + custom_margin: CustomMargin | None = Field( + CustomMargin.medium, description="Specifies the margin size for the stamp." + ) + font_size: float | None = Field(40, description="The font size of the stamp text and image in points.") + opacity: float | None = Field(0.5, description="The opacity of the stamp (0.0 - 1.0)") + override_x: float | None = Field( + -1, + description="Override X coordinate for stamp placement. If set, it will override the position-based calculation. Negative value means no override.", + ) + override_y: float | None = Field( + -1, + description="Override Y coordinate for stamp placement. If set, it will override the position-based calculation. Negative value means no override.", + ) + page_numbers: str | None = Field( + "all", + description="The pages to select, Supports ranges (e.g., '1,3,5-9'), or 'all' or functions in the format 'an+b' where 'a' is the multiplier of the page number 'n', and 'b' is a constant (e.g., '2n+1', '3n', '6n-5')", + ) + position: Position | None = Field( + Position.integer_8, + description="Position for stamp placement based on a 1-9 grid (1: bottom-left, 2: bottom-center, 3: bottom-right, 4: middle-left, 5: middle-center, 6: middle-right, 7: top-left, 8: top-center, 9: top-right)", + ) + rotation: float | None = Field(0, description="The rotation of the stamp in degrees") + stamp_text: str | None = Field("Stirling Software", description="The stamp text") + stamp_type: StampType | None = Field(None, description="The stamp type (text or image)") + + +class WatermarkType(StrEnum): + text = "text" + image = "image" + + +class AddWatermarkParams(ApiModel): + alphabet: Alphabet | None = Field(Alphabet.roman, description="The selected alphabet") + convert_pdf_to_image: bool | None = Field(False, description="Convert the redacted PDF to an image") + custom_color: str | None = Field("#d3d3d3", description="The color for watermark") + font_size: float | None = Field(30, description="The font size of the watermark text", ge=1.0) + height_spacer: int | None = Field(50, description="The height spacer between watermark elements", ge=0) + opacity: float | None = Field(0.5, description="The opacity of the watermark (0.0 - 1.0)") + rotation: float | None = Field(0, description="The rotation of the watermark in degrees") + watermark_text: str | None = Field("Stirling Software", description="The watermark text") + watermark_type: WatermarkType | None = Field(None, description="The watermark type (text or image)") + width_spacer: int | None = Field(50, description="The width spacer between watermark elements", ge=0) + + +class AutoRedactParams(ApiModel): + convert_pdf_to_image: bool | None = Field(False, description="Convert the redacted PDF to an image") + custom_padding: float | None = Field(None, description="Custom padding for redaction") + list_of_text: str | None = Field("text,text2", description="List of text to redact from the PDF") + redact_color: str | None = Field("#000000", description="The color for redaction") + use_regex: bool | None = Field(False, description="Whether to use regex for the listOfText") + whole_word_search: bool | None = Field(False, description="Whether to use whole word search") + + +class AutoRenameParams(ApiModel): + use_first_text_as_fallback: bool | None = Field( + False, + description="Flag indicating whether to use the first text as a fallback if no suitable title is found. Defaults to false.", + ) + + +class AutoSplitPdfParams(ApiModel): + duplex_mode: bool | None = Field( + False, + description="Flag indicating if the duplex mode is active, where the page after the divider also gets removed.", + ) + + +class DuplexPass(StrEnum): + both = "BOTH" + first = "FIRST" + second = "SECOND" + + +class PagesPerSheet(Enum): + number_2 = 2 + + +class SpineLocation(StrEnum): + left = "LEFT" + right = "RIGHT" + + +class BookletImpositionParams(ApiModel): + add_border: bool | None = Field(None, description="Boolean for if you wish to add border around the pages") + add_gutter: bool | None = Field(None, description="Add gutter margin (inner margin for binding)") + double_sided: bool | None = Field(None, description="Generate both front and back sides (double-sided printing)") + duplex_pass: DuplexPass | None = Field(DuplexPass.both, description="For manual duplex: which pass to generate") + flip_on_short_edge: bool | None = Field( + None, description="Flip back sides for short-edge duplex printing (default is long-edge)" + ) + gutter_size: float | None = Field(12, description="Gutter margin size in points (used when addGutter is true)") + pages_per_sheet: PagesPerSheet | None = Field( + PagesPerSheet.number_2, + description="The number of pages per side for booklet printing (always 2 for proper booklet).", + ) + spine_location: SpineLocation | None = Field(SpineLocation.left, description="The spine location for the booklet.") + + +class CbrToPdfParams(ApiModel): + optimize_for_ebook: bool | None = Field( + False, description="Optimize the output PDF for ebook reading using Ghostscript" + ) + + +class CbzToPdfParams(ApiModel): + optimize_for_ebook: bool | None = Field( + False, description="Optimize the output PDF for ebook reading using Ghostscript" + ) + + +class CertType(StrEnum): + pem = "PEM" + pkcs12 = "PKCS12" + pfx = "PFX" + jks = "JKS" + server = "SERVER" + + +class CertSignParams(ApiModel): + cert_type: CertType | None = Field(None, description="The type of the digital certificate") + location: str | None = Field("SPDF", description="The location where the PDF is signed") + name: str | None = Field("SPDF", description="The name of the signer") + page_number: int | None = Field( + 1, + description="The page number where the signature should be visible. This is required if showSignature is set to true", + ) + password: SecretStr | None = Field(None, description="The password for the keystore or the private key") + reason: str | None = Field("Signed by SPDF", description="The reason for signing the PDF") + show_logo: bool | None = Field( + True, description="Whether to visually show a signature logo along with the signature" + ) + show_signature: bool | None = Field(False, description="Whether to visually show the signature in the PDF file") + + +class LineArtEdgeLevel(IntEnum): + integer_1 = 1 + integer_2 = 2 + integer_3 = 3 + + +class OptimizeLevel(IntEnum): + integer_1 = 1 + integer_2 = 2 + integer_3 = 3 + integer_4 = 4 + integer_5 = 5 + integer_6 = 6 + integer_7 = 7 + integer_8 = 8 + integer_9 = 9 + + +class CompressPdfParams(ApiModel): + expected_output_size: str | None = Field("25KB", description="The expected output size, e.g. '100MB', '25KB', etc.") + grayscale: bool | None = Field(False, description="Whether to convert the PDF to grayscale. Default is false.") + line_art: bool | None = Field( + False, description="Whether to convert images to high-contrast line art using ImageMagick. Default is false." + ) + line_art_edge_level: LineArtEdgeLevel | None = Field( + LineArtEdgeLevel.integer_1, + description="Edge detection strength to use for line art conversion (1-3). This maps to ImageMagick's -edge radius.", + ) + line_art_threshold: float | None = Field(55, description="Threshold to use for line art conversion (0-100).") + linearize: bool | None = Field( + False, description="Whether to linearize the PDF for faster web viewing. Default is false." + ) + normalize: bool | None = Field( + False, description="Whether to normalize the PDF content for better compatibility. Default is false." + ) + optimize_level: OptimizeLevel | None = Field( + None, + description="The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.", + ) class CropParams(ApiModel): - crop_area: dict[str, float] = {"x": 0, "y": 0, "width": 595, "height": 842} + auto_crop: bool | None = Field(None, description="Enable auto-crop to detect and remove white space") + height: float | None = Field(None, description="The height of the crop area") + remove_data_outside_crop: bool | None = Field( + None, description="Whether to remove text outside the crop area (keeps images)" + ) + width: float | None = Field(None, description="The width of the crop area") + x: float | None = Field(None, description="The x-coordinate of the top-left corner of the crop area") + y: float | None = Field(None, description="The y-coordinate of the top-left corner of the crop area") + + +class DeleteAttachmentParams(ApiModel): + attachment_name: str | None = Field(None, description="The name of the attachment to delete") + + +class EmbedAllFonts(Enum): + boolean_true = True + boolean_false = False + + +class IncludePageNumbers(Enum): + boolean_true = True + boolean_false = False + + +class IncludeTableOfContents(Enum): + boolean_true = True + boolean_false = False + + +class OptimizeForEbook(Enum): + boolean_true = True + boolean_false = False + + +class EbookToPdfParams(ApiModel): + embed_all_fonts: EmbedAllFonts | None = Field( + EmbedAllFonts.boolean_false, description="Embed all fonts from the eBook into the generated PDF" + ) + include_page_numbers: IncludePageNumbers | None = Field( + IncludePageNumbers.boolean_false, description="Add page numbers to the generated PDF" + ) + include_table_of_contents: IncludeTableOfContents | None = Field( + IncludeTableOfContents.boolean_false, description="Add a generated table of contents to the resulting PDF" + ) + optimize_for_ebook: OptimizeForEbook | None = Field( + OptimizeForEbook.boolean_false, + description="Optimize the PDF for eBook reading (smaller file size, better rendering on eInk devices)", + ) class EditTableOfContentsParams(ApiModel): - bookmarks: list[dict[str, Any]] = [] - replace_existing: bool = True + bookmark_data: str | None = Field( + None, + description="Bookmark structure in JSON format", + examples=[ + '[{\\"title\\":\\"Chapter 1\\",\\"pageNumber\\":1,\\"children\\":[{\\"title\\":\\"Section 1.1\\",\\"pageNumber\\":2}]}]' + ], + ) + replace_existing: bool | None = Field( + None, description="Whether to replace existing bookmarks or append to them", examples=[True] + ) + + +class EmlToPdfParams(ApiModel): + download_html: bool | None = Field( + None, description="Download HTML intermediate file instead of PDF", examples=[False] + ) + include_all_recipients: bool | None = Field( + None, description="Include CC and BCC recipients in header (if available)", examples=[True] + ) + include_attachments: bool | None = Field( + None, description="Include email attachments in the PDF output", examples=[False] + ) + max_attachment_size_mb: int | None = Field( + None, + description="Maximum attachment size in MB to include (default 10MB, range: 1-100)", + examples=[10], + ge=1, + le=100, + ) + + +class ExtractImageScansParams(ApiModel): + angle_threshold: int | None = Field(5, description="The angle threshold for the image scan extraction") + border_size: int | None = Field(1, description="The border size for the image scan extraction") + min_area: int | None = Field(8000, description="The minimum area for the image scan extraction") + min_contour_area: int | None = Field(500, description="The minimum contour area for the image scan extraction") + tolerance: int | None = Field(20, description="The tolerance for the image scan extraction") + + +class Format(StrEnum): + png = "png" + jpeg = "jpeg" + gif = "gif" class ExtractImagesParams(ApiModel): - allow_duplicates: bool = False - format: Literal["png", "jpg", "gif"] = "png" - - -class ExtractPagesParams(ApiModel): - page_numbers: str = "" + format: Format | None = Field(Format.png, description="The output image format e.g., 'png', 'jpeg', or 'gif'") class FlattenParams(ApiModel): - flatten_only_forms: bool = False + flatten_only_forms: bool | None = Field( + False, description="True to flatten only the forms, false to flatten full PDF (Convert page to image)" + ) + render_dpi: int | None = Field( + None, description="Optional DPI for page rendering when flattening the full document.", ge=72 + ) -class MergeParams(ApiModel): - generate_table_of_contents: bool = False - remove_digital_signature: bool = False +class HtmlToPdfParams(ApiModel): + zoom: float | None = Field(1, description="Zoom level for displaying the website. Default is '1'.") -class OcrParams(ApiModel): - additional_options: list[str] = [] - languages: list[str] = [] - ocr_render_type: str = "hocr" - ocr_type: str = "skip-text" +class ColorType(StrEnum): + color = "color" + greyscale = "greyscale" + blackwhite = "blackwhite" + + +class FitOption(StrEnum): + fill_page = "fillPage" + fit_document_to_image = "fitDocumentToImage" + maintain_aspect_ratio = "maintainAspectRatio" + + +class ImgToPdfParams(ApiModel): + auto_rotate: bool | None = Field( + False, description="Whether to automatically rotate the images to better fit the PDF page" + ) + color_type: ColorType | None = Field(ColorType.color, description="The color type of the output image(s)") + fit_option: FitOption | None = Field( + FitOption.fill_page, description="Option to determine how the image will fit onto the page" + ) + + +class SortType(StrEnum): + order_provided = "orderProvided" + by_file_name = "byFileName" + by_date_modified = "byDateModified" + by_date_created = "byDateCreated" + by_pdf_title = "byPDFTitle" + + +class MergePdfsParams(ApiModel): + client_file_ids: str | None = Field( + None, description="JSON array of client-provided IDs for each uploaded file (same order as fileInput)" + ) + generate_toc: bool | None = Field( + False, + description="Flag indicating whether to generate a table of contents for the merged PDF. If true, a table of contents will be created using the input filenames as chapter names.", + ) + remove_cert_sign: bool | None = Field( + True, + description="Flag indicating whether to remove certification signatures from the merged PDF. If true, all certification signatures will be removed from the final merged document.", + ) + sort_type: SortType | None = Field( + SortType.order_provided, description="The type of sorting to be applied on the input files before merging." + ) + + +class Arrangement(StrEnum): + by_rows = "BY_ROWS" + by_columns = "BY_COLUMNS" + + +class Mode(StrEnum): + default = "DEFAULT" + custom = "CUSTOM" + + +class Orientation(StrEnum): + portrait = "PORTRAIT" + landscape = "LANDSCAPE" + + +class PagesPerSheet1(IntEnum): + integer_2 = 2 + integer_4 = 4 + integer_9 = 9 + integer_16 = 16 + + +class ReadingDirection(StrEnum): + ltr = "LTR" + rtl = "RTL" + + +class MultiPageLayoutParams(ApiModel): + add_border: bool | None = Field(None, description="Boolean for if you wish to add border around the pages") + arrangement: Arrangement | None = Field( + Arrangement.by_rows, + description="The arrangement of pages on the sheet: BY_ROWS fills pages row by row, while BY_COLUMNS fills pages column by column.", + ) + border_width: float | None = Field( + 1, description="Border width (in points) to apply around each page when merging", examples=[2], ge=0.0 + ) + bottom_margin: float | None = Field( + 0, description="Bottom margin (in points) to apply to the output pages when merging", examples=[200], ge=0.0 + ) + cols: float | None = Field(2, description="Number of columns", examples=[2], ge=1.0, le=300.0) + inner_margin: float | None = Field( + 0, description="Inner margin (in points) to apply around each page when merging", examples=[200], ge=0.0 + ) + left_margin: float | None = Field( + 0, description="Left margin (in points) to apply to the output pages when merging", examples=[200], ge=0.0 + ) + mode: Mode | None = Field( + Mode.default, description="Input mode: DEFAULT uses pagesPerSheet; CUSTOM uses explicit cols x rows." + ) + orientation: Orientation | None = Field(Orientation.portrait, description="The orientation of the output PDF pages") + pages_per_sheet: PagesPerSheet1 | None = Field( + None, description="The number of pages to fit onto a single sheet in the output PDF." + ) + reading_direction: ReadingDirection | None = Field( + ReadingDirection.ltr, + description="The direction in which pages are arranged on the sheet: LTR (left-to-right) or RTL (right-to-left).", + ) + right_margin: float | None = Field( + 0, description="Right margin (in points) to apply to the output pages when merging", examples=[200], ge=0.0 + ) + rows: float | None = Field(1, description="Number of rows", examples=[3], ge=1.0, le=300.0) + top_margin: float | None = Field( + 0, description="Top margin (in points) to apply to the output pages when merging", examples=[200], ge=0.0 + ) + + +class OcrRenderType(StrEnum): + hocr = "hocr" + sandwich = "sandwich" + + +class OcrType(StrEnum): + skip_text = "skip-text" + force_ocr = "force-ocr" + normal = "Normal" + + +class OcrPdfParams(ApiModel): + clean: bool | None = Field(None, description="Clean the input file if set to true") + clean_final: bool | None = Field(None, description="Clean the final output if set to true") + deskew: bool | None = Field(None, description="Deskew the input file if set to true") + languages: list[str] | None = Field( + ["eng"], description="List of languages to use in OCR processing, e.g., 'eng', 'deu'" + ) + ocr_render_type: OcrRenderType | None = Field( + OcrRenderType.hocr, description="Specify the OCR render type, either 'hocr' or 'sandwich'" + ) + ocr_type: OcrType | None = Field( + None, description="Specify the OCR type, e.g., 'skip-text', 'force-ocr', or 'Normal'" + ) + remove_images_after: bool | None = Field(None, description="Remove images from the output PDF if set to true") + sidecar: bool | None = Field(None, description="Include OCR text in a sidecar text file if set to true") + + +class OverlayMode(StrEnum): + sequential_overlay = "SequentialOverlay" + interleaved_overlay = "InterleavedOverlay" + fixed_repeat_overlay = "FixedRepeatOverlay" + + +class OverlayPosition(Enum): + number_0 = 0 + number_1 = 1 class OverlayPdfsParams(ApiModel): - counts: list[float] = [] - overlay_files: list[dict[str, Any]] = [] - overlay_mode: Literal["SequentialOverlay", "InterleavedOverlay", "FixedRepeatOverlay"] = "SequentialOverlay" - overlay_position: Literal[0, 1] = 0 + counts: list[int] | None = Field( + None, + description="An array of integers specifying the number of times each corresponding overlay file should be applied in the 'FixedRepeatOverlay' mode. This should match the length of the overlayFiles array.", + ) + overlay_files: list[bytes] | None = Field( + None, + description="An array of PDF files to be used as overlays on the base PDF. The order in these files is applied based on the selected mode.", + ) + overlay_mode: OverlayMode | None = Field( + None, + description="The mode of overlaying: 'SequentialOverlay' for sequential application, 'InterleavedOverlay' for round-robin application, 'FixedRepeatOverlay' for fixed repetition based on provided counts", + ) + overlay_position: OverlayPosition | None = Field( + None, description="Overlay position 0 is Foregound, 1 is Background" + ) -class PageLayoutParams(ApiModel): - add_border: bool = False - pages_per_sheet: float = 4 +class PdfToCbrParams(ApiModel): + dpi: int | None = Field( + None, description="The DPI (Dots Per Inch) for rendering PDF pages as images", examples=[150] + ) -class PdfToSinglePageParams(ApiModel): - pass +class PdfToCbzParams(ApiModel): + dpi: int | None = Field( + None, description="The DPI (Dots Per Inch) for rendering PDF pages as images", examples=[150] + ) -class RedactParams(ApiModel): - convert_pdfto_image: bool = True - custom_padding: float = 0.1 - mode: Literal["automatic", "manual"] = "automatic" - redact_color: str = "#000000" - use_regex: bool = False - whole_word_search: bool = False - words_to_redact: list[str] = [] +class PdfToCsvParams(ApiModel): + page_numbers: str | None = Field( + "all", + description="The pages to select, Supports ranges (e.g., '1,3,5-9'), or 'all' or functions in the format 'an+b' where 'a' is the multiplier of the page number 'n', and 'b' is a constant (e.g., '2n+1', '3n', '6n-5')", + ) -class RemoveAnnotationsParams(ApiModel): - pass +class DetectChapters(Enum): + boolean_true = True + boolean_false = False + + +class OutputFormat(StrEnum): + epub = "EPUB" + azw3 = "AZW3" + epub_1 = "EPUB" + azw3_1 = "AZW3" + + +class TargetDevice(StrEnum): + tablet_phone_images = "TABLET_PHONE_IMAGES" + kindle_eink_text = "KINDLE_EINK_TEXT" + tablet_phone_images_1 = "TABLET_PHONE_IMAGES" + kindle_eink_text_1 = "KINDLE_EINK_TEXT" + + +class PdfToEpubParams(ApiModel): + detect_chapters: DetectChapters | None = Field( + DetectChapters.boolean_true, description="Detect headings that look like chapters and insert EPUB page breaks." + ) + output_format: OutputFormat | None = Field(OutputFormat.epub, description="Choose the output format for the ebook.") + target_device: TargetDevice | None = Field( + TargetDevice.tablet_phone_images, description="Choose an output profile optimized for the reader device." + ) + + +class ImageFormat(StrEnum): + png = "png" + jpeg = "jpeg" + jpg = "jpg" + gif = "gif" + webp = "webp" + + +class SingleOrMultiple(StrEnum): + single = "single" + multiple = "multiple" + + +class PdfToImgParams(ApiModel): + color_type: ColorType | None = Field(ColorType.color, description="The color type of the output image(s)") + dpi: int | None = Field(300, description="The DPI (dots per inch) for the output image(s)") + image_format: ImageFormat | None = Field(ImageFormat.png, description="The output image format") + include_annotations: bool | None = Field( + False, description="Include annotations such as comments in the output image(s)" + ) + page_numbers: str | None = Field( + "all", + description="The pages to select, Supports ranges (e.g., '1,3,5-9'), or 'all' or functions in the format 'an+b' where 'a' is the multiplier of the page number 'n', and 'b' is a constant (e.g., '2n+1', '3n', '6n-5')", + ) + single_or_multiple: SingleOrMultiple | None = Field( + SingleOrMultiple.multiple, + description="Choose between a single image containing all pages or separate images for each page", + ) + + +class OutputFormat1(StrEnum): + pdfa = "pdfa" + pdfa_1 = "pdfa-1" + pdfa_2 = "pdfa-2" + pdfa_2b = "pdfa-2b" + pdfa_3 = "pdfa-3" + pdfa_3b = "pdfa-3b" + pdfx = "pdfx" + + +class PdfToPdfaParams(ApiModel): + output_format: OutputFormat1 | None = Field(None, description="The output format type (PDF/A or PDF/X)") + strict: bool | None = Field( + None, description="If true, the conversion will fail if the output is not perfectly compliant" + ) + + +class OutputFormat2(StrEnum): + ppt = "ppt" + pptx = "pptx" + odp = "odp" + + +class PdfToPresentationParams(ApiModel): + output_format: OutputFormat2 | None = Field(None, description="The output Presentation format") + + +class OutputFormat3(StrEnum): + rtf = "rtf" + txt = "txt" + + +class PdfToTextParams(ApiModel): + output_format: OutputFormat3 | None = Field(None, description="The output Text or RTF format") + + +class OutputFormat4(StrEnum): + eps = "eps" + ps = "ps" + pcl = "pcl" + xps = "xps" + + +class Prepress(Enum): + boolean_true = True + boolean_false = False + + +class PdfToVectorParams(ApiModel): + output_format: OutputFormat4 | None = Field(OutputFormat4.eps, description="Target vector format extension") + prepress: Prepress | None = Field(Prepress.boolean_false, description="Apply Ghostscript prepress settings") + + +class OutputFormat5(StrEnum): + doc = "doc" + docx = "docx" + odt = "odt" + + +class PdfToWordParams(ApiModel): + output_format: OutputFormat5 | None = Field(None, description="The output Word document format") + + +class PdfToXlsxParams(ApiModel): + page_numbers: str | None = Field( + "all", + description="The pages to select, Supports ranges (e.g., '1,3,5-9'), or 'all' or functions in the format 'an+b' where 'a' is the multiplier of the page number 'n', and 'b' is a constant (e.g., '2n+1', '3n', '6n-5')", + ) + + +class CustomMode(StrEnum): + custom = "CUSTOM" + reverse_order = "REVERSE_ORDER" + duplex_sort = "DUPLEX_SORT" + booklet_sort = "BOOKLET_SORT" + side_stitch_booklet_sort = "SIDE_STITCH_BOOKLET_SORT" + odd_even_split = "ODD_EVEN_SPLIT" + remove_first = "REMOVE_FIRST" + remove_last = "REMOVE_LAST" + remove_first_and_last = "REMOVE_FIRST_AND_LAST" + duplicate = "DUPLICATE" + + +class RearrangePagesParams(ApiModel): + custom_mode: CustomMode | None = Field( + None, + description="The custom mode for page rearrangement. Valid values are:\nCUSTOM: Uses order defined in PageNums DUPLICATE: Duplicate pages n times (if Page order defined as 4, then duplicates each page 4 times)REVERSE_ORDER: Reverses the order of all pages.\nDUPLEX_SORT: Sorts pages as if all fronts were scanned then all backs in reverse (1, n, 2, n-1, ...). BOOKLET_SORT: Arranges pages for booklet printing (last, first, second, second last, ...).\nODD_EVEN_SPLIT: Splits and arranges pages into odd and even numbered pages.\nREMOVE_FIRST: Removes the first page.\nREMOVE_LAST: Removes the last page.\nREMOVE_FIRST_AND_LAST: Removes both the first and the last pages.\n", + ) + page_numbers: str | None = Field( + "all", + description="The pages to select, Supports ranges (e.g., '1,3,5-9'), or 'all' or functions in the format 'an+b' where 'a' is the multiplier of the page number 'n', and 'b' is a constant (e.g., '2n+1', '3n', '6n-5')", + ) class RemoveBlanksParams(ApiModel): - include_blank_pages: bool = False - threshold: float = 10 - white_percent: float = 99.9 - - -class RemoveCertSignParams(ApiModel): - pass - - -class RemoveImageParams(ApiModel): - pass + threshold: int | None = Field(10, description="The threshold value to determine blank pages", ge=0, le=255) + white_percent: float | None = Field( + 99.9, description="The percentage of white color on a page to consider it as blank", ge=0.1, le=100.0 + ) class RemovePagesParams(ApiModel): - page_numbers: str = "" + page_numbers: str | None = Field( + "all", + description="The pages to select, Supports ranges (e.g., '1,3,5-9'), or 'all' or functions in the format 'an+b' where 'a' is the multiplier of the page number 'n', and 'b' is a constant (e.g., '2n+1', '3n', '6n-5')", + ) class RemovePasswordParams(ApiModel): - password: str = "" + password: SecretStr | None = Field(None, description="The password of the PDF file") -class ReorganizePagesParams(ApiModel): - custom_mode: str | None = None - page_numbers: str | None = None +class RenameAttachmentParams(ApiModel): + attachment_name: str | None = Field(None, description="The current name of the attachment to rename") + new_name: str | None = Field(None, description="The new name for the attachment") -class RepairParams(ApiModel): - pass +class HighContrastColorCombination(StrEnum): + white_text_on_black = "WHITE_TEXT_ON_BLACK" + black_text_on_white = "BLACK_TEXT_ON_WHITE" + yellow_text_on_black = "YELLOW_TEXT_ON_BLACK" + green_text_on_black = "GREEN_TEXT_ON_BLACK" + white_text_on_black_1 = "WHITE_TEXT_ON_BLACK" + black_text_on_white_1 = "BLACK_TEXT_ON_WHITE" + yellow_text_on_black_1 = "YELLOW_TEXT_ON_BLACK" + green_text_on_black_1 = "GREEN_TEXT_ON_BLACK" -class ReplaceColorParams(ApiModel): - back_ground_color: str = "#ffffff" - high_contrast_color_combination: Literal[ - "WHITE_TEXT_ON_BLACK", "BLACK_TEXT_ON_WHITE", "YELLOW_TEXT_ON_BLACK", "GREEN_TEXT_ON_BLACK" - ] = "WHITE_TEXT_ON_BLACK" - replace_and_invert_option: Literal[ - "HIGH_CONTRAST_COLOR", "CUSTOM_COLOR", "FULL_INVERSION", "COLOR_SPACE_CONVERSION" - ] = "HIGH_CONTRAST_COLOR" - text_color: str = "#000000" +class ReplaceAndInvertOption(StrEnum): + high_contrast_color = "HIGH_CONTRAST_COLOR" + custom_color = "CUSTOM_COLOR" + full_inversion = "FULL_INVERSION" + color_space_conversion = "COLOR_SPACE_CONVERSION" + high_contrast_color_1 = "HIGH_CONTRAST_COLOR" + custom_color_1 = "CUSTOM_COLOR" + full_inversion_1 = "FULL_INVERSION" + color_space_conversion_1 = "COLOR_SPACE_CONVERSION" -class RotateParams(ApiModel): - angle: float = 0 +class ReplaceInvertPdfParams(ApiModel): + back_ground_color: str | None = Field( + None, + description="If CUSTOM_COLOR option selected, then pick the custom color for background. Expected color value should be 24bit decimal value of a color", + ) + high_contrast_color_combination: HighContrastColorCombination | None = Field( + HighContrastColorCombination.white_text_on_black, + description="If HIGH_CONTRAST_COLOR option selected, then pick the default color option for text and background.", + ) + replace_and_invert_option: ReplaceAndInvertOption | None = Field( + ReplaceAndInvertOption.high_contrast_color, description="Replace and Invert color options of a pdf." + ) + text_color: str | None = Field( + None, + description="If CUSTOM_COLOR option selected, then pick the custom color for text. Expected color value should be 24bit decimal value of a color", + ) -class SanitizeParams(ApiModel): - remove_embedded_files: bool = True - remove_fonts: bool = False - remove_java_script: bool = True - remove_links: bool = False - remove_metadata: bool = False - remove_xmpmetadata: bool = False +class Angle(IntEnum): + integer_0 = 0 + integer_90 = 90 + integer_180 = 180 + integer_270 = 270 + + +class RotatePdfParams(ApiModel): + angle: Angle | None = Field( + None, description="The clockwise angle by which to rotate the PDF file. Must be a multiple of 90." + ) + + +class SanitizePdfParams(ApiModel): + remove_embedded_files: bool | None = Field(True, description="Remove embedded files from the PDF") + remove_fonts: bool | None = Field(False, description="Remove fonts from the PDF") + remove_java_script: bool | None = Field(True, description="Remove JavaScript actions from the PDF") + remove_links: bool | None = Field(False, description="Remove links from the PDF") + remove_metadata: bool | None = Field(False, description="Remove document info metadata from the PDF") + remove_xmp_metadata: bool | None = Field(False, description="Remove XMP metadata from the PDF") + + +class PageSize(StrEnum): + a0 = "A0" + a1 = "A1" + a2 = "A2" + a3 = "A3" + a4 = "A4" + a5 = "A5" + a6 = "A6" + letter = "LETTER" + legal = "LEGAL" + keep = "KEEP" class ScalePagesParams(ApiModel): - page_size: Literal["KEEP", "A0", "A1", "A2", "A3", "A4", "A5", "A6", "LETTER", "LEGAL"] | None = None - scale_factor: float = 1 + page_size: PageSize | None = Field( + None, description="The scale of pages in the output PDF. Acceptable values are A0-A6, LETTER, LEGAL, KEEP." + ) + scale_factor: float | None = Field( + 1, description="The scale of the content on the pages of the output PDF. Acceptable values are floats.", ge=0.0 + ) -class ScannerImageSplitParams(ApiModel): - angle_threshold: float = 10 - border_size: float = 1 - min_area: float = 10000 - min_contour_area: float = 500 - tolerance: float = 30 +class Colorspace(StrEnum): + grayscale = "grayscale" + color = "color" -class SignParams(ApiModel): - font_family: str = "Helvetica" - font_size: float = 16 - location: str = "Digital" - reason: str = "Document signing" - signature_data: str | None = None - signature_position: dict[str, float] | None = None - signature_type: Literal["text", "image", "canvas"] = "canvas" - signer_name: str = "" - text_color: str = "#000000" +class Quality(StrEnum): + low = "low" + medium = "medium" + high = "high" -class SplitParams(ApiModel): - allow_duplicates: bool = False - bookmark_level: str = "1" - duplex_mode: bool = False - h_div: str = "2" - include_metadata: bool = False - merge: bool = False - method: Literal[ - "", "byPages", "bySections", "bySize", "byPageCount", "byDocCount", "byChapters", "byPageDivider" - ] = "" - pages: str = "" - split_value: str = "" - v_div: str = "2" +class Rotation(StrEnum): + none = "none" + slight = "slight" + moderate = "moderate" + severe = "severe" -class UnlockPdfformsParams(ApiModel): - pass +class ScannerEffectParams(ApiModel): + advanced_enabled: bool | None = Field(None, description="Whether advanced settings are enabled", examples=[False]) + blur: float | None = Field(None, description="Blur amount (0 = none, higher = more blur)", examples=[1.0]) + border: int | None = Field(None, description="Border thickness in pixels", examples=[20]) + brightness: float | None = Field(None, description="Brightness multiplier (1.0 = no change)", examples=[1.0]) + colorspace: Colorspace | None = Field(None, description="Colorspace for output image", examples=["grayscale"]) + contrast: float | None = Field(None, description="Contrast multiplier (1.0 = no change)", examples=[1.0]) + noise: float | None = Field(None, description="Noise amount (0 = none, higher = more noise)", examples=[8.0]) + quality: Quality | None = Field(None, description="Scan quality preset", examples=["high"]) + resolution: int | None = Field(None, description="Rendering resolution in DPI", examples=[300]) + rotate: int | None = Field(None, description="Base rotation in degrees", examples=[0]) + rotate_variance: int | None = Field(None, description="Random rotation variance in degrees", examples=[2]) + rotation: Rotation | None = Field(None, description="Rotation preset", examples=["none"]) + rotation_value: int | None = None + yellowish: bool | None = Field(None, description="Simulate yellowed paper", examples=[False]) -class WatermarkParams(ApiModel): - alphabet: str = "roman" - convert_pdfto_image: bool = False - custom_color: str = "#d3d3d3" - font_size: float = 12 - height_spacer: float = 50 - opacity: float = 50 - rotation: float = 0 - watermark_image: dict[str, Any] | None = None - watermark_text: str = "" - watermark_type: Literal["text", "image"] | None = None - width_spacer: float = 50 +class WorkflowType(StrEnum): + signing = "SIGNING" + review = "REVIEW" + approval = "APPROVAL" + + +class Request(ApiModel): + document_name: str | None = None + due_date: str | None = None + message: str | None = None + owner_email: str | None = None + participant_emails: list[str] | None = None + participant_user_ids: list[int] | None = None + workflow_metadata: str | None = None + workflow_type: WorkflowType | None = None + + +class SessionsParams(ApiModel): + request: Request | None = None + + +class SplitBySizeOrCountParams(ApiModel): + split_type: int | None = Field( + 0, description="Determines the type of split: 0 for size, 1 for page count, 2 for document count" + ) + split_value: str | None = Field( + "10MB", description="Value for split: size in MB (e.g., '10MB') or number of pages (e.g., '5')" + ) + + +class PageSize1(StrEnum): + a4 = "A4" + letter = "Letter" + a3 = "A3" + a5 = "A5" + legal = "Legal" + tabloid = "Tabloid" + + +class SplitForPosterPrintParams(ApiModel): + page_size: PageSize1 | None = Field( + None, description="Target page size for output chunks (e.g., 'A4', 'Letter', 'A3')" + ) + right_to_left: bool | None = Field(False, description="Split right-to-left instead of left-to-right") + xfactor: int | None = None + yfactor: int | None = None + + +class SplitPagesParams(ApiModel): + page_numbers: str | None = Field( + "all", + description="The pages to select, Supports ranges (e.g., '1,3,5-9'), or 'all' or functions in the format 'an+b' where 'a' is the multiplier of the page number 'n', and 'b' is a constant (e.g., '2n+1', '3n', '6n-5')", + ) + + +class SplitPdfByChaptersParams(ApiModel): + allow_duplicates: bool | None = Field(False, description="Whether to allow duplicates or not") + bookmark_level: int | None = Field(0, description="Maximum bookmark level required", ge=0) + include_metadata: bool | None = Field(False, description="Whether to include Metadata or not") + + +class SplitMode(StrEnum): + custom = "CUSTOM" + split_all_except_first_and_last = "SPLIT_ALL_EXCEPT_FIRST_AND_LAST" + split_all_except_first = "SPLIT_ALL_EXCEPT_FIRST" + split_all_except_last = "SPLIT_ALL_EXCEPT_LAST" + split_all = "SPLIT_ALL" + + +class SplitPdfBySectionsParams(ApiModel): + horizontal_divisions: int | None = Field( + 0, description="Number of horizontal divisions for each PDF page", ge=0, le=50 + ) + merge: bool | None = Field(False, description="Merge the split documents into a single PDF") + page_numbers: str | None = Field("SPLIT_ALL", description="Pages to be split by section") + split_mode: SplitMode | None = Field( + None, + description="Modes for page split. Valid values are:\nSPLIT_ALL_EXCEPT_FIRST_AND_LAST: Splits all except the first and the last pages.\nSPLIT_ALL_EXCEPT_FIRST: Splits all except the first page.\nSPLIT_ALL_EXCEPT_LAST: Splits all except the last page.\nSPLIT_ALL: Splits all pages.\nCUSTOM: Custom split.\n", + ) + vertical_divisions: int | None = Field(1, description="Number of vertical divisions for each PDF page", ge=0, le=50) + + +class SvgToPdfParams(ApiModel): + combine_into_single_pdf: bool | None = Field( + False, + description="Whether to combine all SVG files into a single PDF (each SVG as a separate page) or create separate PDF files for each SVG.", + ) + + +class TimestampPdfParams(ApiModel): + tsa_url: str | None = Field( + "http://timestamp.digicert.com", + description="URL of the RFC 3161 Time Stamp Authority (TSA) server. Must be one of the built-in presets (DigiCert, Sectigo, SSL.com, FreeTSA, MeSign) or an admin-configured URL in settings.yml (security.timestamp.customTsaUrls). If omitted, the server default is used.", + ) + + +class Trapped(StrEnum): + true = "True" + false = "False" + unknown = "Unknown" + + +class UpdateMetadataParams(ApiModel): + all_request_params: dict[str, str] | None = Field( + None, + description="Map list of key and value of custom parameters. Note these must start with customKey and customValue if they are non-standard", + ) + author: str | None = Field("author", description="The author of the document") + creation_date: str | None = Field( + "2023/10/01 12:00:00", + description="The creation date of the document (format: yyyy/MM/dd HH:mm:ss)", + pattern="yyyy/MM/dd HH:mm:ss", + ) + creator: str | None = Field("creator", description="The creator of the document") + delete_all: bool | None = Field(False, description="Delete all metadata if set to true") + keywords: str | None = Field("keywords", description="The keywords for the document") + modification_date: str | None = Field( + "2023/10/01 12:00:00", + description="The modification date of the document (format: yyyy/MM/dd HH:mm:ss)", + pattern="yyyy/MM/dd HH:mm:ss", + ) + producer: str | None = Field("producer", description="The producer of the document") + subject: str | None = Field("subject", description="The subject of the document") + title: str | None = Field("title", description="The title of the document") + trapped: Trapped | None = Field(Trapped.false, description="The trapped status of the document") + + +class UrlToPdfParams(ApiModel): + url_input: str | None = Field(None, description="The input URL to be converted to a PDF file") + + +class OutputFormat6(StrEnum): + eps = "eps" + ps = "ps" + pcl = "pcl" + xps = "xps" + + +class VectorToPdfParams(ApiModel): + output_format: OutputFormat6 | None = Field(OutputFormat6.eps, description="Target vector format extension") + prepress: Prepress | None = Field(Prepress.boolean_false, description="Apply Ghostscript prepress settings") + + +class RedactionArea(RootModel[Any]): + root: Any + + +class RedactParams(ApiModel): + convert_pdf_to_image: bool | None = Field(False, description="Convert the redacted PDF to an image") + page_numbers: str | None = Field( + "all", + description="The pages to select, Supports ranges (e.g., '1,3,5-9'), or 'all' or functions in the format 'an+b' where 'a' is the multiplier of the page number 'n', and 'b' is a constant (e.g., '2n+1', '3n', '6n-5')", + ) + page_redaction_color: str | None = Field("#000000", description="The color used to fully redact certain pages") + redactions: list[RedactionArea] | None = Field(None, description="A list of areas that should be redacted") + + +class Model( + RootModel[ + CbrToPdfParams + | CbzToPdfParams + | EbookToPdfParams + | EmlToPdfParams + | HtmlToPdfParams + | ImgToPdfParams + | PdfToCbrParams + | PdfToCbzParams + | PdfToCsvParams + | PdfToEpubParams + | PdfToImgParams + | PdfToPdfaParams + | PdfToPresentationParams + | PdfToTextParams + | PdfToVectorParams + | PdfToWordParams + | PdfToXlsxParams + | SvgToPdfParams + | UrlToPdfParams + | VectorToPdfParams + | BookletImpositionParams + | CropParams + | EditTableOfContentsParams + | MergePdfsParams + | MultiPageLayoutParams + | OverlayPdfsParams + | RearrangePagesParams + | RemovePagesParams + | RotatePdfParams + | ScalePagesParams + | SplitBySizeOrCountParams + | SplitForPosterPrintParams + | SplitPagesParams + | SplitPdfByChaptersParams + | SplitPdfBySectionsParams + | AddAttachmentsParams + | AddImageParams + | AddPageNumbersParams + | AddStampParams + | AutoRenameParams + | AutoSplitPdfParams + | CompressPdfParams + | DeleteAttachmentParams + | ExtractImageScansParams + | ExtractImagesParams + | FlattenParams + | OcrPdfParams + | RemoveBlanksParams + | RenameAttachmentParams + | ReplaceInvertPdfParams + | ScannerEffectParams + | UpdateMetadataParams + | AddPasswordParams + | AddWatermarkParams + | AutoRedactParams + | CertSignParams + | SessionsParams + | RedactParams + | RemovePasswordParams + | SanitizePdfParams + | TimestampPdfParams + ] +): + root: ( + CbrToPdfParams + | CbzToPdfParams + | EbookToPdfParams + | EmlToPdfParams + | HtmlToPdfParams + | ImgToPdfParams + | PdfToCbrParams + | PdfToCbzParams + | PdfToCsvParams + | PdfToEpubParams + | PdfToImgParams + | PdfToPdfaParams + | PdfToPresentationParams + | PdfToTextParams + | PdfToVectorParams + | PdfToWordParams + | PdfToXlsxParams + | SvgToPdfParams + | UrlToPdfParams + | VectorToPdfParams + | BookletImpositionParams + | CropParams + | EditTableOfContentsParams + | MergePdfsParams + | MultiPageLayoutParams + | OverlayPdfsParams + | RearrangePagesParams + | RemovePagesParams + | RotatePdfParams + | ScalePagesParams + | SplitBySizeOrCountParams + | SplitForPosterPrintParams + | SplitPagesParams + | SplitPdfByChaptersParams + | SplitPdfBySectionsParams + | AddAttachmentsParams + | AddImageParams + | AddPageNumbersParams + | AddStampParams + | AutoRenameParams + | AutoSplitPdfParams + | CompressPdfParams + | DeleteAttachmentParams + | ExtractImageScansParams + | ExtractImagesParams + | FlattenParams + | OcrPdfParams + | RemoveBlanksParams + | RenameAttachmentParams + | ReplaceInvertPdfParams + | ScannerEffectParams + | UpdateMetadataParams + | AddPasswordParams + | AddWatermarkParams + | AutoRedactParams + | CertSignParams + | SessionsParams + | RedactParams + | RemovePasswordParams + | SanitizePdfParams + | TimestampPdfParams + ) type ParamToolModel = ( - AddAttachmentsParams - | AddPasswordParams - | AdjustContrastParams - | AutoRenameParams - | AutomateParams + CbrToPdfParams + | CbzToPdfParams + | EbookToPdfParams + | EmlToPdfParams + | HtmlToPdfParams + | ImgToPdfParams + | PdfToCbrParams + | PdfToCbzParams + | PdfToCsvParams + | PdfToEpubParams + | PdfToImgParams + | PdfToPdfaParams + | PdfToPresentationParams + | PdfToTextParams + | PdfToVectorParams + | PdfToWordParams + | PdfToXlsxParams + | SvgToPdfParams + | UrlToPdfParams + | VectorToPdfParams | BookletImpositionParams - | CertSignParams - | ChangeMetadataParams - | ChangePermissionsParams - | CompressParams - | ConvertParams | CropParams | EditTableOfContentsParams - | ExtractImagesParams - | ExtractPagesParams - | FlattenParams - | MergeParams - | OcrParams + | MergePdfsParams + | MultiPageLayoutParams | OverlayPdfsParams - | PageLayoutParams - | PdfToSinglePageParams - | RedactParams - | RemoveAnnotationsParams - | RemoveBlanksParams - | RemoveCertSignParams - | RemoveImageParams + | RearrangePagesParams | RemovePagesParams - | RemovePasswordParams - | ReorganizePagesParams - | RepairParams - | ReplaceColorParams - | RotateParams - | SanitizeParams + | RotatePdfParams | ScalePagesParams - | ScannerImageSplitParams - | SignParams - | SplitParams - | UnlockPdfformsParams - | WatermarkParams + | SplitBySizeOrCountParams + | SplitForPosterPrintParams + | SplitPagesParams + | SplitPdfByChaptersParams + | SplitPdfBySectionsParams + | AddAttachmentsParams + | AddImageParams + | AddPageNumbersParams + | AddStampParams + | AutoRenameParams + | AutoSplitPdfParams + | CompressPdfParams + | DeleteAttachmentParams + | ExtractImageScansParams + | ExtractImagesParams + | FlattenParams + | OcrPdfParams + | RemoveBlanksParams + | RenameAttachmentParams + | ReplaceInvertPdfParams + | ScannerEffectParams + | UpdateMetadataParams + | AddPasswordParams + | AddWatermarkParams + | AutoRedactParams + | CertSignParams + | SessionsParams + | RedactParams + | RemovePasswordParams + | SanitizePdfParams + | TimestampPdfParams ) type ParamToolModelType = type[ParamToolModel] -class OperationId(StrEnum): - ADD_ATTACHMENTS = "addAttachments" - ADD_PASSWORD = "addPassword" - ADJUST_CONTRAST = "adjustContrast" - AUTO_RENAME = "autoRename" - AUTOMATE = "automate" - BOOKLET_IMPOSITION = "bookletImposition" - CERT_SIGN = "certSign" - CHANGE_METADATA = "changeMetadata" - CHANGE_PERMISSIONS = "changePermissions" - COMPRESS = "compress" - CONVERT = "convert" - CROP = "crop" - EDIT_TABLE_OF_CONTENTS = "editTableOfContents" - EXTRACT_IMAGES = "extractImages" - EXTRACT_PAGES = "extractPages" - FLATTEN = "flatten" - MERGE = "merge" - OCR = "ocr" - OVERLAY_PDFS = "overlayPdfs" - PAGE_LAYOUT = "pageLayout" - PDF_TO_SINGLE_PAGE = "pdfToSinglePage" - REDACT = "redact" - REMOVE_ANNOTATIONS = "removeAnnotations" - REMOVE_BLANKS = "removeBlanks" - REMOVE_CERT_SIGN = "removeCertSign" - REMOVE_IMAGE = "removeImage" - REMOVE_PAGES = "removePages" - REMOVE_PASSWORD = "removePassword" - REORGANIZE_PAGES = "reorganizePages" - REPAIR = "repair" - REPLACE_COLOR = "replaceColor" - ROTATE = "rotate" - SANITIZE = "sanitize" - SCALE_PAGES = "scalePages" - SCANNER_IMAGE_SPLIT = "scannerImageSplit" - SIGN = "sign" - SPLIT = "split" - UNLOCK_PDFFORMS = "unlockPDFForms" - WATERMARK = "watermark" +class ToolEndpoint(StrEnum): + CBR_TO_PDF = "/api/v1/convert/cbr/pdf" + CBZ_TO_PDF = "/api/v1/convert/cbz/pdf" + EBOOK_TO_PDF = "/api/v1/convert/ebook/pdf" + EML_TO_PDF = "/api/v1/convert/eml/pdf" + HTML_TO_PDF = "/api/v1/convert/html/pdf" + IMG_TO_PDF = "/api/v1/convert/img/pdf" + PDF_TO_CBR = "/api/v1/convert/pdf/cbr" + PDF_TO_CBZ = "/api/v1/convert/pdf/cbz" + PDF_TO_CSV = "/api/v1/convert/pdf/csv" + PDF_TO_EPUB = "/api/v1/convert/pdf/epub" + PDF_TO_IMG = "/api/v1/convert/pdf/img" + PDF_TO_PDFA = "/api/v1/convert/pdf/pdfa" + PDF_TO_PRESENTATION = "/api/v1/convert/pdf/presentation" + PDF_TO_TEXT = "/api/v1/convert/pdf/text" + PDF_TO_VECTOR = "/api/v1/convert/pdf/vector" + PDF_TO_WORD = "/api/v1/convert/pdf/word" + PDF_TO_XLSX = "/api/v1/convert/pdf/xlsx" + SVG_TO_PDF = "/api/v1/convert/svg/pdf" + URL_TO_PDF = "/api/v1/convert/url/pdf" + VECTOR_TO_PDF = "/api/v1/convert/vector/pdf" + BOOKLET_IMPOSITION = "/api/v1/general/booklet-imposition" + CROP = "/api/v1/general/crop" + EDIT_TABLE_OF_CONTENTS = "/api/v1/general/edit-table-of-contents" + MERGE_PDFS = "/api/v1/general/merge-pdfs" + MULTI_PAGE_LAYOUT = "/api/v1/general/multi-page-layout" + OVERLAY_PDFS = "/api/v1/general/overlay-pdfs" + REARRANGE_PAGES = "/api/v1/general/rearrange-pages" + REMOVE_PAGES = "/api/v1/general/remove-pages" + ROTATE_PDF = "/api/v1/general/rotate-pdf" + SCALE_PAGES = "/api/v1/general/scale-pages" + SPLIT_BY_SIZE_OR_COUNT = "/api/v1/general/split-by-size-or-count" + SPLIT_FOR_POSTER_PRINT = "/api/v1/general/split-for-poster-print" + SPLIT_PAGES = "/api/v1/general/split-pages" + SPLIT_PDF_BY_CHAPTERS = "/api/v1/general/split-pdf-by-chapters" + SPLIT_PDF_BY_SECTIONS = "/api/v1/general/split-pdf-by-sections" + ADD_ATTACHMENTS = "/api/v1/misc/add-attachments" + ADD_IMAGE = "/api/v1/misc/add-image" + ADD_PAGE_NUMBERS = "/api/v1/misc/add-page-numbers" + ADD_STAMP = "/api/v1/misc/add-stamp" + AUTO_RENAME = "/api/v1/misc/auto-rename" + AUTO_SPLIT_PDF = "/api/v1/misc/auto-split-pdf" + COMPRESS_PDF = "/api/v1/misc/compress-pdf" + DELETE_ATTACHMENT = "/api/v1/misc/delete-attachment" + EXTRACT_IMAGE_SCANS = "/api/v1/misc/extract-image-scans" + EXTRACT_IMAGES = "/api/v1/misc/extract-images" + FLATTEN = "/api/v1/misc/flatten" + OCR_PDF = "/api/v1/misc/ocr-pdf" + REMOVE_BLANKS = "/api/v1/misc/remove-blanks" + RENAME_ATTACHMENT = "/api/v1/misc/rename-attachment" + REPLACE_INVERT_PDF = "/api/v1/misc/replace-invert-pdf" + SCANNER_EFFECT = "/api/v1/misc/scanner-effect" + UPDATE_METADATA = "/api/v1/misc/update-metadata" + ADD_PASSWORD = "/api/v1/security/add-password" + ADD_WATERMARK = "/api/v1/security/add-watermark" + AUTO_REDACT = "/api/v1/security/auto-redact" + CERT_SIGN = "/api/v1/security/cert-sign" + SESSIONS = "/api/v1/security/cert-sign/sessions" + REDACT = "/api/v1/security/redact" + REMOVE_PASSWORD = "/api/v1/security/remove-password" + SANITIZE_PDF = "/api/v1/security/sanitize-pdf" + TIMESTAMP_PDF = "/api/v1/security/timestamp-pdf" -OPERATIONS: dict[OperationId, ParamToolModelType] = { - OperationId.ADD_ATTACHMENTS: AddAttachmentsParams, - OperationId.ADD_PASSWORD: AddPasswordParams, - OperationId.ADJUST_CONTRAST: AdjustContrastParams, - OperationId.AUTO_RENAME: AutoRenameParams, - OperationId.AUTOMATE: AutomateParams, - OperationId.BOOKLET_IMPOSITION: BookletImpositionParams, - OperationId.CERT_SIGN: CertSignParams, - OperationId.CHANGE_METADATA: ChangeMetadataParams, - OperationId.CHANGE_PERMISSIONS: ChangePermissionsParams, - OperationId.COMPRESS: CompressParams, - OperationId.CONVERT: ConvertParams, - OperationId.CROP: CropParams, - OperationId.EDIT_TABLE_OF_CONTENTS: EditTableOfContentsParams, - OperationId.EXTRACT_IMAGES: ExtractImagesParams, - OperationId.EXTRACT_PAGES: ExtractPagesParams, - OperationId.FLATTEN: FlattenParams, - OperationId.MERGE: MergeParams, - OperationId.OCR: OcrParams, - OperationId.OVERLAY_PDFS: OverlayPdfsParams, - OperationId.PAGE_LAYOUT: PageLayoutParams, - OperationId.PDF_TO_SINGLE_PAGE: PdfToSinglePageParams, - OperationId.REDACT: RedactParams, - OperationId.REMOVE_ANNOTATIONS: RemoveAnnotationsParams, - OperationId.REMOVE_BLANKS: RemoveBlanksParams, - OperationId.REMOVE_CERT_SIGN: RemoveCertSignParams, - OperationId.REMOVE_IMAGE: RemoveImageParams, - OperationId.REMOVE_PAGES: RemovePagesParams, - OperationId.REMOVE_PASSWORD: RemovePasswordParams, - OperationId.REORGANIZE_PAGES: ReorganizePagesParams, - OperationId.REPAIR: RepairParams, - OperationId.REPLACE_COLOR: ReplaceColorParams, - OperationId.ROTATE: RotateParams, - OperationId.SANITIZE: SanitizeParams, - OperationId.SCALE_PAGES: ScalePagesParams, - OperationId.SCANNER_IMAGE_SPLIT: ScannerImageSplitParams, - OperationId.SIGN: SignParams, - OperationId.SPLIT: SplitParams, - OperationId.UNLOCK_PDFFORMS: UnlockPdfformsParams, - OperationId.WATERMARK: WatermarkParams, +OPERATIONS: dict[ToolEndpoint, ParamToolModelType] = { + ToolEndpoint.CBR_TO_PDF: CbrToPdfParams, + ToolEndpoint.CBZ_TO_PDF: CbzToPdfParams, + ToolEndpoint.EBOOK_TO_PDF: EbookToPdfParams, + ToolEndpoint.EML_TO_PDF: EmlToPdfParams, + ToolEndpoint.HTML_TO_PDF: HtmlToPdfParams, + ToolEndpoint.IMG_TO_PDF: ImgToPdfParams, + ToolEndpoint.PDF_TO_CBR: PdfToCbrParams, + ToolEndpoint.PDF_TO_CBZ: PdfToCbzParams, + ToolEndpoint.PDF_TO_CSV: PdfToCsvParams, + ToolEndpoint.PDF_TO_EPUB: PdfToEpubParams, + ToolEndpoint.PDF_TO_IMG: PdfToImgParams, + ToolEndpoint.PDF_TO_PDFA: PdfToPdfaParams, + ToolEndpoint.PDF_TO_PRESENTATION: PdfToPresentationParams, + ToolEndpoint.PDF_TO_TEXT: PdfToTextParams, + ToolEndpoint.PDF_TO_VECTOR: PdfToVectorParams, + ToolEndpoint.PDF_TO_WORD: PdfToWordParams, + ToolEndpoint.PDF_TO_XLSX: PdfToXlsxParams, + ToolEndpoint.SVG_TO_PDF: SvgToPdfParams, + ToolEndpoint.URL_TO_PDF: UrlToPdfParams, + ToolEndpoint.VECTOR_TO_PDF: VectorToPdfParams, + ToolEndpoint.BOOKLET_IMPOSITION: BookletImpositionParams, + ToolEndpoint.CROP: CropParams, + ToolEndpoint.EDIT_TABLE_OF_CONTENTS: EditTableOfContentsParams, + ToolEndpoint.MERGE_PDFS: MergePdfsParams, + ToolEndpoint.MULTI_PAGE_LAYOUT: MultiPageLayoutParams, + ToolEndpoint.OVERLAY_PDFS: OverlayPdfsParams, + ToolEndpoint.REARRANGE_PAGES: RearrangePagesParams, + ToolEndpoint.REMOVE_PAGES: RemovePagesParams, + ToolEndpoint.ROTATE_PDF: RotatePdfParams, + ToolEndpoint.SCALE_PAGES: ScalePagesParams, + ToolEndpoint.SPLIT_BY_SIZE_OR_COUNT: SplitBySizeOrCountParams, + ToolEndpoint.SPLIT_FOR_POSTER_PRINT: SplitForPosterPrintParams, + ToolEndpoint.SPLIT_PAGES: SplitPagesParams, + ToolEndpoint.SPLIT_PDF_BY_CHAPTERS: SplitPdfByChaptersParams, + ToolEndpoint.SPLIT_PDF_BY_SECTIONS: SplitPdfBySectionsParams, + ToolEndpoint.ADD_ATTACHMENTS: AddAttachmentsParams, + ToolEndpoint.ADD_IMAGE: AddImageParams, + ToolEndpoint.ADD_PAGE_NUMBERS: AddPageNumbersParams, + ToolEndpoint.ADD_STAMP: AddStampParams, + ToolEndpoint.AUTO_RENAME: AutoRenameParams, + ToolEndpoint.AUTO_SPLIT_PDF: AutoSplitPdfParams, + ToolEndpoint.COMPRESS_PDF: CompressPdfParams, + ToolEndpoint.DELETE_ATTACHMENT: DeleteAttachmentParams, + ToolEndpoint.EXTRACT_IMAGE_SCANS: ExtractImageScansParams, + ToolEndpoint.EXTRACT_IMAGES: ExtractImagesParams, + ToolEndpoint.FLATTEN: FlattenParams, + ToolEndpoint.OCR_PDF: OcrPdfParams, + ToolEndpoint.REMOVE_BLANKS: RemoveBlanksParams, + ToolEndpoint.RENAME_ATTACHMENT: RenameAttachmentParams, + ToolEndpoint.REPLACE_INVERT_PDF: ReplaceInvertPdfParams, + ToolEndpoint.SCANNER_EFFECT: ScannerEffectParams, + ToolEndpoint.UPDATE_METADATA: UpdateMetadataParams, + ToolEndpoint.ADD_PASSWORD: AddPasswordParams, + ToolEndpoint.ADD_WATERMARK: AddWatermarkParams, + ToolEndpoint.AUTO_REDACT: AutoRedactParams, + ToolEndpoint.CERT_SIGN: CertSignParams, + ToolEndpoint.SESSIONS: SessionsParams, + ToolEndpoint.REDACT: RedactParams, + ToolEndpoint.REMOVE_PASSWORD: RemovePasswordParams, + ToolEndpoint.SANITIZE_PDF: SanitizePdfParams, + ToolEndpoint.TIMESTAMP_PDF: TimestampPdfParams, } diff --git a/engine/tests/test_pdf_edit_agent.py b/engine/tests/test_pdf_edit_agent.py index aa35341b01..fdbeb722ee 100644 --- a/engine/tests/test_pdf_edit_agent.py +++ b/engine/tests/test_pdf_edit_agent.py @@ -12,14 +12,14 @@ from stirling.contracts import ( PdfEditRequest, ToolOperationStep, ) -from stirling.models.tool_models import CompressParams, OperationId, RotateParams +from stirling.models.tool_models import Angle, FlattenParams, RotatePdfParams, ToolEndpoint from stirling.services.runtime import AppRuntime @dataclass(frozen=True) class ParameterSelectorCall: request: PdfEditRequest - operation_plan: list[OperationId] + operation_plan: list[ToolEndpoint] operation_index: int generated_steps: list[ToolOperationStep] @@ -31,10 +31,10 @@ class RecordingParameterSelector: async def select( self, request: PdfEditRequest, - operation_plan: list[OperationId], + operation_plan: list[ToolEndpoint], operation_index: int, generated_steps: list[ToolOperationStep], - ) -> RotateParams | CompressParams: + ) -> RotatePdfParams | FlattenParams: self.calls.append( ParameterSelectorCall( request=request, @@ -44,8 +44,8 @@ class RecordingParameterSelector: ) ) if operation_index == 0: - return RotateParams(angle=90) - return CompressParams(compression_level=5) + return RotatePdfParams(angle=Angle(90)) + return FlattenParams(flatten_only_forms=False, render_dpi=None) class StubPdfEditAgent(PdfEditAgent): @@ -73,7 +73,7 @@ async def test_pdf_edit_agent_builds_multi_step_plan(runtime: AppRuntime) -> Non agent = StubPdfEditAgent( runtime, PdfEditPlanSelection( - operations=[OperationId.ROTATE, OperationId.COMPRESS], + operations=[ToolEndpoint.ROTATE_PDF, ToolEndpoint.FLATTEN], summary="Rotate the PDF, then compress it.", rationale="The pages need reorientation before reducing file size.", ), @@ -90,9 +90,9 @@ async def test_pdf_edit_agent_builds_multi_step_plan(runtime: AppRuntime) -> Non assert isinstance(response, EditPlanResponse) assert response.summary == "Rotate the PDF, then compress it." assert response.rationale == "The pages need reorientation before reducing file size." - assert [step.tool for step in response.steps] == [OperationId.ROTATE, OperationId.COMPRESS] - assert isinstance(response.steps[0].parameters, RotateParams) - assert isinstance(response.steps[1].parameters, CompressParams) + assert [step.tool for step in response.steps] == [ToolEndpoint.ROTATE_PDF, ToolEndpoint.FLATTEN] + assert isinstance(response.steps[0].parameters, RotatePdfParams) + assert isinstance(response.steps[1].parameters, FlattenParams) @pytest.mark.anyio @@ -101,7 +101,7 @@ async def test_pdf_edit_agent_passes_previous_steps_to_parameter_selector(runtim agent = StubPdfEditAgent( runtime, PdfEditPlanSelection( - operations=[OperationId.ROTATE, OperationId.COMPRESS], + operations=[ToolEndpoint.ROTATE_PDF, ToolEndpoint.FLATTEN], summary="Rotate the PDF, then compress it.", ), parameter_selector=parameter_selector, @@ -120,8 +120,8 @@ async def test_pdf_edit_agent_passes_previous_steps_to_parameter_selector(runtim assert parameter_selector.calls[1].operation_index == 1 assert parameter_selector.calls[1].generated_steps == [ ToolOperationStep( - tool=OperationId.ROTATE, - parameters=RotateParams(angle=90), + tool=ToolEndpoint.ROTATE_PDF, + parameters=RotatePdfParams(angle=Angle(90)), ) ] diff --git a/engine/tests/test_stirling_api.py b/engine/tests/test_stirling_api.py index b002f389b0..2de187af08 100644 --- a/engine/tests/test_stirling_api.py +++ b/engine/tests/test_stirling_api.py @@ -25,7 +25,7 @@ from stirling.contracts import ( PdfQuestionNotFoundResponse, PdfQuestionRequest, ) -from stirling.models.tool_models import RotateParams +from stirling.models.tool_models import Angle, RotatePdfParams class StubOrchestratorAgent: @@ -127,8 +127,8 @@ def test_agent_revise_route() -> None: "steps": [ { "kind": "tool", - "tool": "rotate", - "parameters": RotateParams(angle=90).model_dump(by_alias=True), + "tool": "/api/v1/general/rotate-pdf", + "parameters": RotatePdfParams(angle=Angle(90)).model_dump(by_alias=True), } ], }, @@ -150,8 +150,8 @@ def test_next_action_route() -> None: "steps": [ { "kind": "tool", - "tool": "rotate", - "parameters": RotateParams(angle=90).model_dump(by_alias=True), + "tool": "/api/v1/general/rotate-pdf", + "parameters": RotatePdfParams(angle=Angle(90)).model_dump(by_alias=True), } ], }, diff --git a/engine/tests/test_stirling_contracts.py b/engine/tests/test_stirling_contracts.py index b56e420764..3e4138a928 100644 --- a/engine/tests/test_stirling_contracts.py +++ b/engine/tests/test_stirling_contracts.py @@ -12,7 +12,7 @@ from stirling.contracts import ( PdfTextSelection, ToolOperationStep, ) -from stirling.models.tool_models import OperationId, RotateParams +from stirling.models.tool_models import Angle, RotatePdfParams, ToolEndpoint def test_orchestrator_request_accepts_user_message() -> None: @@ -38,8 +38,8 @@ def test_orchestrator_request_accepts_user_message() -> None: def test_agent_execution_request_uses_typed_agent_spec() -> None: steps: list[AgentSpecStep] = [ ToolOperationStep( - tool=OperationId.ROTATE, - parameters=RotateParams(angle=90), + tool=ToolEndpoint.ROTATE_PDF, + parameters=RotatePdfParams(angle=Angle(90)), ) ] request = AgentExecutionRequest( @@ -57,13 +57,13 @@ def test_agent_execution_request_uses_typed_agent_spec() -> None: def test_edit_plan_response_has_typed_steps() -> None: - steps = [ToolOperationStep(tool=OperationId.ROTATE, parameters=RotateParams(angle=90))] + steps = [ToolOperationStep(tool=ToolEndpoint.ROTATE_PDF, parameters=RotatePdfParams(angle=Angle(90)))] response = EditPlanResponse( summary="Rotate the input PDF by 90 degrees.", steps=steps, ) - assert response.steps[0].tool == OperationId.ROTATE + assert response.steps[0].tool == ToolEndpoint.ROTATE_PDF def test_pdf_question_answer_defaults_evidence_list() -> None: diff --git a/engine/tests/test_user_spec_agent.py b/engine/tests/test_user_spec_agent.py index bc492b179e..1f9de664c3 100644 --- a/engine/tests/test_user_spec_agent.py +++ b/engine/tests/test_user_spec_agent.py @@ -13,7 +13,7 @@ from stirling.contracts import ( EditPlanResponse, ToolOperationStep, ) -from stirling.models.tool_models import CompressParams, OperationId, RotateParams +from stirling.models.tool_models import Angle, FlattenParams, RotatePdfParams, ToolEndpoint from stirling.services.runtime import AppRuntime @@ -26,8 +26,8 @@ class StubUserSpecAgent(UserSpecAgent): summary="Rotate the document.", steps=[ ToolOperationStep( - tool=OperationId.ROTATE, - parameters=RotateParams(angle=90), + tool=ToolEndpoint.ROTATE_PDF, + parameters=RotatePdfParams(angle=Angle(90)), ) ], ) @@ -63,8 +63,8 @@ async def test_user_spec_agent_drafts_agent_spec(runtime: AppRuntime) -> None: objective="Normalize invoices before accounting review.", steps=[ ToolOperationStep( - tool=OperationId.ROTATE, - parameters=RotateParams(angle=90), + tool=ToolEndpoint.ROTATE_PDF, + parameters=RotatePdfParams(angle=Angle(90)), ) ], ), @@ -98,8 +98,8 @@ async def test_user_spec_agent_revises_existing_draft(runtime: AppRuntime) -> No objective="Normalize invoices before accounting review.", steps=[ ToolOperationStep( - tool=OperationId.ROTATE, - parameters=RotateParams(angle=90), + tool=ToolEndpoint.ROTATE_PDF, + parameters=RotatePdfParams(angle=Angle(90)), ) ], ) @@ -112,12 +112,12 @@ async def test_user_spec_agent_revises_existing_draft(runtime: AppRuntime) -> No objective="Normalize invoices before accounting review.", steps=[ ToolOperationStep( - tool=OperationId.ROTATE, - parameters=RotateParams(angle=90), + tool=ToolEndpoint.ROTATE_PDF, + parameters=RotatePdfParams(angle=Angle(90)), ), ToolOperationStep( - tool=OperationId.COMPRESS, - parameters=CompressParams(compression_level=5), + tool=ToolEndpoint.FLATTEN, + parameters=FlattenParams(flatten_only_forms=False, render_dpi=None), ), ], ), @@ -138,8 +138,8 @@ async def test_user_spec_agent_revises_existing_draft(runtime: AppRuntime) -> No def test_tool_operation_step_rejects_mismatched_parameters() -> None: with pytest.raises(ValidationError): ToolOperationStep( - tool=OperationId.ROTATE, - parameters=CompressParams(compression_level=5), + tool=ToolEndpoint.ROTATE_PDF, + parameters=FlattenParams(flatten_only_forms=False, render_dpi=None), ) diff --git a/engine/uv.lock b/engine/uv.lock index c54957bf92..7d91203d39 100644 --- a/engine/uv.lock +++ b/engine/uv.lock @@ -1,6 +1,10 @@ version = 1 revision = 3 requires-python = ">=3.13" +resolution-markers = [ + "python_full_version >= '3.14'", + "python_full_version < '3.14'", +] [[package]] name = "ag-ui-protocol" @@ -212,6 +216,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/71/cc/18245721fa7747065ab478316c7fea7c74777d07f37ae60db2e84f8172e8/beartype-0.22.9-py3-none-any.whl", hash = "sha256:d16c9bbc61ea14637596c5f6fbff2ee99cbe3573e46a716401734ef50c3060c2", size = 1333658, upload-time = "2025-12-13T06:50:28.266Z" }, ] +[[package]] +name = "black" +version = "26.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "mypy-extensions" }, + { name = "packaging" }, + { name = "pathspec" }, + { name = "platformdirs" }, + { name = "pytokens" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e1/c5/61175d618685d42b005847464b8fb4743a67b1b8fdb75e50e5a96c31a27a/black-26.3.1.tar.gz", hash = "sha256:2c50f5063a9641c7eed7795014ba37b0f5fa227f3d408b968936e24bc0566b07", size = 666155, upload-time = "2026-03-12T03:36:03.593Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f5/77/5728052a3c0450c53d9bb3945c4c46b91baa62b2cafab6801411b6271e45/black-26.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:855822d90f884905362f602880ed8b5df1b7e3ee7d0db2502d4388a954cc8c54", size = 1895034, upload-time = "2026-03-12T03:40:21.813Z" }, + { url = "https://files.pythonhosted.org/packages/52/73/7cae55fdfdfbe9d19e9a8d25d145018965fe2079fa908101c3733b0c55a0/black-26.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8a33d657f3276328ce00e4d37fe70361e1ec7614da5d7b6e78de5426cb56332f", size = 1718503, upload-time = "2026-03-12T03:40:23.666Z" }, + { url = "https://files.pythonhosted.org/packages/e1/87/af89ad449e8254fdbc74654e6467e3c9381b61472cc532ee350d28cfdafb/black-26.3.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f1cd08e99d2f9317292a311dfe578fd2a24b15dbce97792f9c4d752275c1fa56", size = 1793557, upload-time = "2026-03-12T03:40:25.497Z" }, + { url = "https://files.pythonhosted.org/packages/43/10/d6c06a791d8124b843bf325ab4ac7d2f5b98731dff84d6064eafd687ded1/black-26.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:c7e72339f841b5a237ff14f7d3880ddd0fc7f98a1199e8c4327f9a4f478c1839", size = 1422766, upload-time = "2026-03-12T03:40:27.14Z" }, + { url = "https://files.pythonhosted.org/packages/59/4f/40a582c015f2d841ac24fed6390bd68f0fc896069ff3a886317959c9daf8/black-26.3.1-cp313-cp313-win_arm64.whl", hash = "sha256:afc622538b430aa4c8c853f7f63bc582b3b8030fd8c80b70fb5fa5b834e575c2", size = 1232140, upload-time = "2026-03-12T03:40:28.882Z" }, + { url = "https://files.pythonhosted.org/packages/d5/da/e36e27c9cebc1311b7579210df6f1c86e50f2d7143ae4fcf8a5017dc8809/black-26.3.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2d6bfaf7fd0993b420bed691f20f9492d53ce9a2bcccea4b797d34e947318a78", size = 1889234, upload-time = "2026-03-12T03:40:30.964Z" }, + { url = "https://files.pythonhosted.org/packages/0e/7b/9871acf393f64a5fa33668c19350ca87177b181f44bb3d0c33b2d534f22c/black-26.3.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f89f2ab047c76a9c03f78d0d66ca519e389519902fa27e7a91117ef7611c0568", size = 1720522, upload-time = "2026-03-12T03:40:32.346Z" }, + { url = "https://files.pythonhosted.org/packages/03/87/e766c7f2e90c07fb7586cc787c9ae6462b1eedab390191f2b7fc7f6170a9/black-26.3.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b07fc0dab849d24a80a29cfab8d8a19187d1c4685d8a5e6385a5ce323c1f015f", size = 1787824, upload-time = "2026-03-12T03:40:33.636Z" }, + { url = "https://files.pythonhosted.org/packages/ac/94/2424338fb2d1875e9e83eed4c8e9c67f6905ec25afd826a911aea2b02535/black-26.3.1-cp314-cp314-win_amd64.whl", hash = "sha256:0126ae5b7c09957da2bdbd91a9ba1207453feada9e9fe51992848658c6c8e01c", size = 1445855, upload-time = "2026-03-12T03:40:35.442Z" }, + { url = "https://files.pythonhosted.org/packages/86/43/0c3338bd928afb8ee7471f1a4eec3bdbe2245ccb4a646092a222e8669840/black-26.3.1-cp314-cp314-win_arm64.whl", hash = "sha256:92c0ec1f2cc149551a2b7b47efc32c866406b6891b0ee4625e95967c8f4acfb1", size = 1258109, upload-time = "2026-03-12T03:40:36.832Z" }, + { url = "https://files.pythonhosted.org/packages/8e/0d/52d98722666d6fc6c3dd4c76df339501d6efd40e0ff95e6186a7b7f0befd/black-26.3.1-py3-none-any.whl", hash = "sha256:2bd5aa94fc267d38bb21a70d7410a89f1a1d318841855f698746f8e7f51acd1b", size = 207542, upload-time = "2026-03-12T03:36:01.668Z" }, +] + [[package]] name = "boto3" version = "1.42.74" @@ -485,6 +516,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0b/2261922126b2e50c601fe22d7ff5194e0a4d50e654836260c0665e24d862/cyclopts-4.10.1-py3-none-any.whl", hash = "sha256:35f37257139380a386d9fe4475e1e7c87ca7795765ef4f31abba579fcfcb6ecd", size = 204331, upload-time = "2026-03-23T14:43:02.625Z" }, ] +[[package]] +name = "datamodel-code-generator" +version = "0.56.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "argcomplete" }, + { name = "black" }, + { name = "genson" }, + { name = "inflect" }, + { name = "isort" }, + { name = "jinja2" }, + { name = "pydantic" }, + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/03/7d/7fc2bb3d8946ca45851da3f23497a2c6e252e92558ccbd89d609cf1e13d4/datamodel_code_generator-0.56.0.tar.gz", hash = "sha256:e7c003fb5421b890aabe12f66ae65b57198b04cfe1da7c40810798020835b3a8", size = 837708, upload-time = "2026-04-04T09:46:19.636Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/3a/7f169ffc7a2d69a4f9158b1ac083f685b7f4a1a8a1db5d1e4abbb4e741b7/datamodel_code_generator-0.56.0-py3-none-any.whl", hash = "sha256:a0559683fbe90cdf2ce9b6637e3adae3e3a8056a8d0516df581d486e2834ead2", size = 256545, upload-time = "2026-04-04T09:46:17.582Z" }, +] + +[package.optional-dependencies] +ruff = [ + { name = "ruff" }, +] + [[package]] name = "distro" version = "1.9.0" @@ -551,8 +606,10 @@ dependencies = [ [package.dev-dependencies] dev = [ + { name = "datamodel-code-generator", extra = ["ruff"] }, { name = "pyright" }, { name = "pytest" }, + { name = "referencing" }, { name = "ruff" }, ] @@ -570,8 +627,10 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ + { name = "datamodel-code-generator", extras = ["ruff"], specifier = ">=0.26.0" }, { name = "pyright", specifier = ">=1.1.408" }, { name = "pytest", specifier = ">=8.0.0" }, + { name = "referencing", specifier = ">=0.35.0" }, { name = "ruff", specifier = ">=0.14.10" }, ] @@ -783,6 +842,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a3/f6/8ef7e4c286deb2709d11ca96a5237caae3ef4876ab3c48095856cfd2df30/genai_prices-0.0.56-py3-none-any.whl", hash = "sha256:dbe86be8f3f556bed1b72209ed36851fec8b01793b3b220f42921a4e7da945f6", size = 68966, upload-time = "2026-03-20T20:33:02.555Z" }, ] +[[package]] +name = "genson" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c5/cf/2303c8ad276dcf5ee2ad6cf69c4338fd86ef0f471a5207b069adf7a393cf/genson-1.3.0.tar.gz", hash = "sha256:e02db9ac2e3fd29e65b5286f7135762e2cd8a986537c075b06fc5f1517308e37", size = 34919, upload-time = "2024-05-15T22:08:49.123Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/5c/e226de133afd8bb267ec27eead9ae3d784b95b39a287ed404caab39a5f50/genson-1.3.0-py3-none-any.whl", hash = "sha256:468feccd00274cc7e4c09e84b08704270ba8d95232aa280f65b986139cec67f7", size = 21470, upload-time = "2024-05-15T22:08:47.056Z" }, +] + [[package]] name = "google-auth" version = "2.49.1" @@ -1010,6 +1078,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fa/5e/f8e9a1d23b9c20a551a8a02ea3637b4642e22c2626e3a13a9a29cdea99eb/importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151", size = 27865, upload-time = "2025-12-21T10:00:18.329Z" }, ] +[[package]] +name = "inflect" +version = "7.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "more-itertools" }, + { name = "typeguard" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/78/c6/943357d44a21fd995723d07ccaddd78023eace03c1846049a2645d4324a3/inflect-7.5.0.tar.gz", hash = "sha256:faf19801c3742ed5a05a8ce388e0d8fe1a07f8d095c82201eb904f5d27ad571f", size = 73751, upload-time = "2024-12-28T17:11:18.897Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/eb/427ed2b20a38a4ee29f24dbe4ae2dafab198674fe9a85e3d6adf9e5f5f41/inflect-7.5.0-py3-none-any.whl", hash = "sha256:2aea70e5e70c35d8350b8097396ec155ffd68def678c7ff97f51aa69c1d92344", size = 35197, upload-time = "2024-12-28T17:11:15.931Z" }, +] + [[package]] name = "iniconfig" version = "2.3.0" @@ -1019,6 +1100,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, ] +[[package]] +name = "isort" +version = "8.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ef/7c/ec4ab396d31b3b395e2e999c8f46dec78c5e29209fac49d1f4dace04041d/isort-8.0.1.tar.gz", hash = "sha256:171ac4ff559cdc060bcfff550bc8404a486fee0caab245679c2abe7cb253c78d", size = 769592, upload-time = "2026-02-28T10:08:20.685Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3e/95/c7c34aa53c16353c56d0b802fba48d5f5caa2cdee7958acbcb795c830416/isort-8.0.1-py3-none-any.whl", hash = "sha256:28b89bc70f751b559aeca209e6120393d43fbe2490de0559662be7a9787e3d75", size = 89733, upload-time = "2026-02-28T10:08:19.466Z" }, +] + [[package]] name = "jaraco-classes" version = "3.4.0" @@ -1061,6 +1151,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" }, ] +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, +] + [[package]] name = "jiter" version = "0.13.0" @@ -1241,6 +1343,58 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, ] +[[package]] +name = "markupsafe" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" }, + { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" }, + { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" }, + { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" }, + { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" }, + { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" }, + { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" }, + { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" }, + { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" }, + { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" }, + { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" }, + { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" }, + { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" }, + { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" }, + { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" }, + { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" }, + { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" }, + { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" }, + { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" }, + { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" }, + { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" }, + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, +] + [[package]] name = "mcp" version = "1.26.0" @@ -1384,6 +1538,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/08/7036c080d7117f28a4af526d794aab6a84463126db031b007717c1a6676e/multidict-6.7.1-py3-none-any.whl", hash = "sha256:55d97cc6dae627efa6a6e548885712d4864b81110ac76fa4e534c03819fa4a56", size = 12319, upload-time = "2026-01-26T02:46:44.004Z" }, ] +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, +] + [[package]] name = "nexus-rpc" version = "1.2.0" @@ -1576,6 +1739,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/52/96/5a770e5c461462575474468e5af931cff9de036e7c2b4fea23c1c58d2cbe/pathable-0.5.0-py3-none-any.whl", hash = "sha256:646e3d09491a6351a0c82632a09c02cdf70a252e73196b36d8a15ba0a114f0a6", size = 16867, upload-time = "2026-02-20T08:46:59.536Z" }, ] +[[package]] +name = "pathspec" +version = "1.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fa/36/e27608899f9b8d4dff0617b2d9ab17ca5608956ca44461ac14ac48b44015/pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645", size = 131200, upload-time = "2026-01-27T03:59:46.938Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723", size = 55206, upload-time = "2026-01-27T03:59:45.137Z" }, +] + [[package]] name = "platformdirs" version = "4.9.4" @@ -2067,6 +2239,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1b/d0/397f9626e711ff749a95d96b7af99b9c566a9bb5129b8e4c10fc4d100304/python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155", size = 24579, upload-time = "2026-01-25T10:15:54.811Z" }, ] +[[package]] +name = "pytokens" +version = "0.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b6/34/b4e015b99031667a7b960f888889c5bd34ef585c85e1cb56a594b92836ac/pytokens-0.4.1.tar.gz", hash = "sha256:292052fe80923aae2260c073f822ceba21f3872ced9a68bb7953b348e561179a", size = 23015, upload-time = "2026-01-30T01:03:45.924Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/dc/08b1a080372afda3cceb4f3c0a7ba2bde9d6a5241f1edb02a22a019ee147/pytokens-0.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8bdb9d0ce90cbf99c525e75a2fa415144fd570a1ba987380190e8b786bc6ef9b", size = 160720, upload-time = "2026-01-30T01:03:13.843Z" }, + { url = "https://files.pythonhosted.org/packages/64/0c/41ea22205da480837a700e395507e6a24425151dfb7ead73343d6e2d7ffe/pytokens-0.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5502408cab1cb18e128570f8d598981c68a50d0cbd7c61312a90507cd3a1276f", size = 254204, upload-time = "2026-01-30T01:03:14.886Z" }, + { url = "https://files.pythonhosted.org/packages/e0/d2/afe5c7f8607018beb99971489dbb846508f1b8f351fcefc225fcf4b2adc0/pytokens-0.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29d1d8fb1030af4d231789959f21821ab6325e463f0503a61d204343c9b355d1", size = 268423, upload-time = "2026-01-30T01:03:15.936Z" }, + { url = "https://files.pythonhosted.org/packages/68/d4/00ffdbd370410c04e9591da9220a68dc1693ef7499173eb3e30d06e05ed1/pytokens-0.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:970b08dd6b86058b6dc07efe9e98414f5102974716232d10f32ff39701e841c4", size = 266859, upload-time = "2026-01-30T01:03:17.458Z" }, + { url = "https://files.pythonhosted.org/packages/a7/c9/c3161313b4ca0c601eeefabd3d3b576edaa9afdefd32da97210700e47652/pytokens-0.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:9bd7d7f544d362576be74f9d5901a22f317efc20046efe2034dced238cbbfe78", size = 103520, upload-time = "2026-01-30T01:03:18.652Z" }, + { url = "https://files.pythonhosted.org/packages/8f/a7/b470f672e6fc5fee0a01d9e75005a0e617e162381974213a945fcd274843/pytokens-0.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4a14d5f5fc78ce85e426aa159489e2d5961acf0e47575e08f35584009178e321", size = 160821, upload-time = "2026-01-30T01:03:19.684Z" }, + { url = "https://files.pythonhosted.org/packages/80/98/e83a36fe8d170c911f864bfded690d2542bfcfacb9c649d11a9e6eb9dc41/pytokens-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f50fd18543be72da51dd505e2ed20d2228c74e0464e4262e4899797803d7fa", size = 254263, upload-time = "2026-01-30T01:03:20.834Z" }, + { url = "https://files.pythonhosted.org/packages/0f/95/70d7041273890f9f97a24234c00b746e8da86df462620194cef1d411ddeb/pytokens-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc74c035f9bfca0255c1af77ddd2d6ae8419012805453e4b0e7513e17904545d", size = 268071, upload-time = "2026-01-30T01:03:21.888Z" }, + { url = "https://files.pythonhosted.org/packages/da/79/76e6d09ae19c99404656d7db9c35dfd20f2086f3eb6ecb496b5b31163bad/pytokens-0.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f66a6bbe741bd431f6d741e617e0f39ec7257ca1f89089593479347cc4d13324", size = 271716, upload-time = "2026-01-30T01:03:23.633Z" }, + { url = "https://files.pythonhosted.org/packages/79/37/482e55fa1602e0a7ff012661d8c946bafdc05e480ea5a32f4f7e336d4aa9/pytokens-0.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:b35d7e5ad269804f6697727702da3c517bb8a5228afa450ab0fa787732055fc9", size = 104539, upload-time = "2026-01-30T01:03:24.788Z" }, + { url = "https://files.pythonhosted.org/packages/30/e8/20e7db907c23f3d63b0be3b8a4fd1927f6da2395f5bcc7f72242bb963dfe/pytokens-0.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8fcb9ba3709ff77e77f1c7022ff11d13553f3c30299a9fe246a166903e9091eb", size = 168474, upload-time = "2026-01-30T01:03:26.428Z" }, + { url = "https://files.pythonhosted.org/packages/d6/81/88a95ee9fafdd8f5f3452107748fd04c24930d500b9aba9738f3ade642cc/pytokens-0.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79fc6b8699564e1f9b521582c35435f1bd32dd06822322ec44afdeba666d8cb3", size = 290473, upload-time = "2026-01-30T01:03:27.415Z" }, + { url = "https://files.pythonhosted.org/packages/cf/35/3aa899645e29b6375b4aed9f8d21df219e7c958c4c186b465e42ee0a06bf/pytokens-0.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d31b97b3de0f61571a124a00ffe9a81fb9939146c122c11060725bd5aea79975", size = 303485, upload-time = "2026-01-30T01:03:28.558Z" }, + { url = "https://files.pythonhosted.org/packages/52/a0/07907b6ff512674d9b201859f7d212298c44933633c946703a20c25e9d81/pytokens-0.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:967cf6e3fd4adf7de8fc73cd3043754ae79c36475c1c11d514fc72cf5490094a", size = 306698, upload-time = "2026-01-30T01:03:29.653Z" }, + { url = "https://files.pythonhosted.org/packages/39/2a/cbbf9250020a4a8dd53ba83a46c097b69e5eb49dd14e708f496f548c6612/pytokens-0.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:584c80c24b078eec1e227079d56dc22ff755e0ba8654d8383b2c549107528918", size = 116287, upload-time = "2026-01-30T01:03:30.912Z" }, + { url = "https://files.pythonhosted.org/packages/c6/78/397db326746f0a342855b81216ae1f0a32965deccfd7c830a2dbc66d2483/pytokens-0.4.1-py3-none-any.whl", hash = "sha256:26cef14744a8385f35d0e095dc8b3a7583f6c953c2e3d269c7f82484bf5ad2de", size = 13729, upload-time = "2026-01-30T01:03:45.029Z" }, +] + [[package]] name = "pywin32" version = "311" @@ -2525,6 +2721,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, ] +[[package]] +name = "typeguard" +version = "4.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2b/e8/66e25efcc18542d58706ce4e50415710593721aae26e794ab1dec34fb66f/typeguard-4.5.1.tar.gz", hash = "sha256:f6f8ecbbc819c9bc749983cc67c02391e16a9b43b8b27f15dc70ed7c4a007274", size = 80121, upload-time = "2026-02-19T16:09:03.392Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/88/b55b3117287a8540b76dbdd87733808d4d01c8067a3b339408c250bb3600/typeguard-4.5.1-py3-none-any.whl", hash = "sha256:44d2bf329d49a244110a090b55f5f91aa82d9a9834ebfd30bcc73651e4a8cc40", size = 36745, upload-time = "2026-02-19T16:09:01.6Z" }, +] + [[package]] name = "typer" version = "0.24.1" diff --git a/frontend/public/locales/en-GB/translation.toml b/frontend/public/locales/en-GB/translation.toml index aef0edf518..cf746a705d 100644 --- a/frontend/public/locales/en-GB/translation.toml +++ b/frontend/public/locales/en-GB/translation.toml @@ -2084,8 +2084,18 @@ thinking = "Thinking..." analyzing = "Analysing your request..." calling_engine = "AI is thinking..." extracting_content = "Extracting content from your documents..." +executing_tool_single = "Running {{tool}}..." +executing_tool_step = "Running {{tool}} (step {{step}} of {{total}})..." +executing_tool_generic = "Running tool..." +executing_tool_generic_step = "Running tool (step {{step}} of {{total}})..." processing = "Processing extracted content..." +[chat.toolsUsed] +summary_one = "Ran 1 tool" +summary_other = "Ran {{count}} tools" +summary = "Ran {{count}} tools" +unknownTool = "Unknown tool" + [certSign] allSigned = "All participants have signed. Ready to finalize." awaitingSignatures = "Awaiting signatures" diff --git a/frontend/src/prototypes/components/chat/ChatContext.tsx b/frontend/src/prototypes/components/chat/ChatContext.tsx index 3f006c8cf3..e6d42e57d7 100644 --- a/frontend/src/prototypes/components/chat/ChatContext.tsx +++ b/frontend/src/prototypes/components/chat/ChatContext.tsx @@ -6,23 +6,49 @@ import { useRef, type ReactNode, } from "react"; -import { useAllFiles } from "@app/contexts/FileContext"; +import { useAllFiles, useFileActions } from "@app/contexts/FileContext"; +import apiClient from "@app/services/apiClient"; import { getAuthHeaders } from "@app/services/apiClientSetup"; +import { createChildStub } from "@app/contexts/file/fileActions"; +import { + createNewStirlingFileStub, + createStirlingFile, + type StirlingFileStub, +} from "@app/types/fileContext"; +import type { ToolOperation } from "@app/types/file"; export interface ChatMessage { id: string; role: "user" | "assistant"; content: string; timestamp: number; + /** + * Tool endpoint paths executed during this assistant turn (e.g. + * {@code /api/v1/general/rotate-pdf}). Populated for assistant messages when the workflow + * ran one or more tools, in execution order. Undefined for user messages and for assistant + * turns that answered without running any tool. + */ + toolsUsed?: string[]; } export enum AiWorkflowPhase { ANALYZING = "analyzing", CALLING_ENGINE = "calling_engine", EXTRACTING_CONTENT = "extracting_content", + EXECUTING_TOOL = "executing_tool", PROCESSING = "processing", } +export interface AiWorkflowProgress { + phase: AiWorkflowPhase; + /** Tool endpoint path currently executing, for EXECUTING_TOOL events. */ + tool?: string; + /** 1-based step index, for EXECUTING_TOOL events. */ + stepIndex?: number; + /** Total number of plan steps, for EXECUTING_TOOL events. */ + stepCount?: number; +} + type AiWorkflowOutcome = | "answer" | "not_found" @@ -35,6 +61,13 @@ type AiWorkflowOutcome = | "unsupported_capability" | "cannot_continue"; +interface AiWorkflowResultFile { + /** Stirling file ID — download with /api/v1/general/files/{fileId}. */ + fileId: string; + fileName: string; + contentType: string; +} + interface AiWorkflowResponse { outcome: AiWorkflowOutcome; answer?: string; @@ -46,19 +79,25 @@ interface AiWorkflowResponse { message?: string; evidence?: Array<{ pageNumber: number; text: string }>; steps?: Array>; + /** Every file produced by the workflow (empty if the outcome has no files). */ + resultFiles?: AiWorkflowResultFile[]; + // Legacy single-file fields — mirror the first entry of resultFiles. + fileId?: string; + fileName?: string; + contentType?: string; } interface ChatState { messages: ChatMessage[]; isOpen: boolean; isLoading: boolean; - progressPhase: AiWorkflowPhase | null; + progress: AiWorkflowProgress | null; } type ChatAction = | { type: "ADD_MESSAGE"; message: ChatMessage } | { type: "SET_LOADING"; loading: boolean } - | { type: "SET_PROGRESS"; phase: AiWorkflowPhase | null } + | { type: "SET_PROGRESS"; progress: AiWorkflowProgress | null } | { type: "TOGGLE_OPEN" } | { type: "SET_OPEN"; open: boolean }; @@ -69,7 +108,7 @@ function chatReducer(state: ChatState, action: ChatAction): ChatState { case "SET_LOADING": return { ...state, isLoading: action.loading }; case "SET_PROGRESS": - return { ...state, progressPhase: action.phase }; + return { ...state, progress: action.progress }; case "TOGGLE_OPEN": return { ...state, isOpen: !state.isOpen }; case "SET_OPEN": @@ -114,10 +153,18 @@ function formatWorkflowResponse(data: AiWorkflowResponse): string { /** * Parses an SSE text stream and invokes callbacks for each named event. */ +interface ProgressEvent { + phase: string; + timestamp: number; + tool?: string; + stepIndex?: number; + stepCount?: number; +} + async function consumeSSEStream( response: Response, handlers: { - onProgress: (data: { phase: string; timestamp: number }) => void; + onProgress: (data: ProgressEvent) => void; onResult: (data: AiWorkflowResponse) => void; onError: (data: { message: string }) => void; }, @@ -171,7 +218,7 @@ interface ChatContextValue { messages: ChatMessage[]; isOpen: boolean; isLoading: boolean; - progressPhase: AiWorkflowPhase | null; + progress: AiWorkflowProgress | null; toggleOpen: () => void; setOpen: (open: boolean) => void; sendMessage: (content: string) => Promise; @@ -183,14 +230,86 @@ const initialState: ChatState = { messages: [], isOpen: false, isLoading: false, - progressPhase: null, + progress: null, }; export function ChatProvider({ children }: { children: ReactNode }) { const [state, dispatch] = useReducer(chatReducer, initialState); - const { files: activeFiles } = useAllFiles(); + const { files: activeFiles, fileStubs: activeFileStubs } = useAllFiles(); + const { actions: fileActions } = useFileActions(); const abortRef = useRef(null); + // Download a File from the Stirling files endpoint. + const downloadFile = useCallback( + async (descriptor: AiWorkflowResultFile): Promise => { + const response = await apiClient.get( + `/api/v1/general/files/${descriptor.fileId}`, + { responseType: "blob" }, + ); + return new File([response.data], descriptor.fileName, { + type: descriptor.contentType ?? response.data.type, + }); + }, + [], + ); + + // Import the files produced by an AI workflow result into FileContext. + // + // If the workflow produced the same number of outputs as inputs, map each output to its + // corresponding input as a new version in the same chain. Otherwise (merge, split, etc.) + // add the outputs as new root files. + const importResultFile = useCallback( + async ( + result: AiWorkflowResponse, + sourceStubs: StirlingFileStub[], + ): Promise => { + const descriptors = result.resultFiles?.length + ? result.resultFiles + : result.fileId && result.fileName && result.contentType + ? [ + { + fileId: result.fileId, + fileName: result.fileName, + contentType: result.contentType, + } satisfies AiWorkflowResultFile, + ] + : []; + if (descriptors.length === 0) return; + + const files = await Promise.all(descriptors.map(downloadFile)); + + const operation: ToolOperation = { + toolId: "ai-workflow", + timestamp: Date.now(), + }; + const isVersionMapping = + sourceStubs.length > 0 && files.length === sourceStubs.length; + const stubs = files.map((file, i) => + isVersionMapping + ? createChildStub(sourceStubs[i], operation, file) + : createNewStirlingFileStub(file), + ); + const stirlingFiles = files.map((file, i) => + createStirlingFile(file, stubs[i].id), + ); + + if (sourceStubs.length > 0) { + // Always consume the inputs so merge/split inputs are removed from the workbench. + // For 1:1 operations (rotate, compress) the outputs carry the version chain; for + // merge/split they're fresh roots. + await fileActions.consumeFiles( + sourceStubs.map((s) => s.id), + stirlingFiles, + stubs, + ); + } else { + // No inputs were provided (unlikely for completed workflows, but handle it safely). + await fileActions.addFiles(files, { selectFiles: true }); + } + }, + [fileActions, downloadFile], + ); + const toggleOpen = useCallback(() => dispatch({ type: "TOGGLE_OPEN" }), []); const setOpen = useCallback( (open: boolean) => dispatch({ type: "SET_OPEN", open }), @@ -212,7 +331,7 @@ export function ChatProvider({ children }: { children: ReactNode }) { }; dispatch({ type: "ADD_MESSAGE", message: userMessage }); dispatch({ type: "SET_LOADING", loading: true }); - dispatch({ type: "SET_PROGRESS", phase: null }); + dispatch({ type: "SET_PROGRESS", progress: null }); try { const formData = new FormData(); @@ -234,17 +353,29 @@ export function ChatProvider({ children }: { children: ReactNode }) { } let receivedResult = false; + const toolsUsed: string[] = []; await consumeSSEStream(response, { onProgress: (data) => { + if ( + data.phase === AiWorkflowPhase.EXECUTING_TOOL && + typeof data.tool === "string" + ) { + toolsUsed.push(data.tool); + } dispatch({ type: "SET_PROGRESS", - phase: data.phase as AiWorkflowPhase, + progress: { + phase: data.phase as AiWorkflowPhase, + tool: data.tool, + stepIndex: data.stepIndex, + stepCount: data.stepCount, + }, }); }, onResult: (data) => { receivedResult = true; - dispatch({ type: "SET_PROGRESS", phase: null }); + dispatch({ type: "SET_PROGRESS", progress: null }); const replyContent = formatWorkflowResponse(data); dispatch({ type: "ADD_MESSAGE", @@ -253,12 +384,28 @@ export function ChatProvider({ children }: { children: ReactNode }) { role: "assistant", content: replyContent, timestamp: Date.now(), + toolsUsed: toolsUsed.length > 0 ? toolsUsed : undefined, }, }); + if (data.fileId || data.resultFiles?.length) { + importResultFile(data, activeFileStubs).catch((err) => { + console.error("Failed to import AI result file", err); + dispatch({ + type: "ADD_MESSAGE", + message: { + id: crypto.randomUUID(), + role: "assistant", + content: + "The file was processed but I couldn't download it.", + timestamp: Date.now(), + }, + }); + }); + } }, onError: (data) => { receivedResult = true; - dispatch({ type: "SET_PROGRESS", phase: null }); + dispatch({ type: "SET_PROGRESS", progress: null }); dispatch({ type: "ADD_MESSAGE", message: { @@ -276,7 +423,7 @@ export function ChatProvider({ children }: { children: ReactNode }) { } } catch (e) { if ((e as Error).name === "AbortError") return; - dispatch({ type: "SET_PROGRESS", phase: null }); + dispatch({ type: "SET_PROGRESS", progress: null }); dispatch({ type: "ADD_MESSAGE", message: { @@ -294,7 +441,7 @@ export function ChatProvider({ children }: { children: ReactNode }) { } } }, - [activeFiles], + [activeFiles, activeFileStubs, importResultFile], ); return ( @@ -303,7 +450,7 @@ export function ChatProvider({ children }: { children: ReactNode }) { messages: state.messages, isOpen: state.isOpen, isLoading: state.isLoading, - progressPhase: state.progressPhase, + progress: state.progress, toggleOpen, setOpen, sendMessage, diff --git a/frontend/src/prototypes/components/chat/ChatPanel.tsx b/frontend/src/prototypes/components/chat/ChatPanel.tsx index 3c28cd1031..4428c824d6 100644 --- a/frontend/src/prototypes/components/chat/ChatPanel.tsx +++ b/frontend/src/prototypes/components/chat/ChatPanel.tsx @@ -1,4 +1,10 @@ -import { useRef, useEffect, useState, type KeyboardEvent } from "react"; +import { + useMemo, + useRef, + useEffect, + useState, + type KeyboardEvent, +} from "react"; import { useTranslation } from "react-i18next"; import { ActionIcon, @@ -11,19 +17,142 @@ import { Transition, Loader, Group, + Collapse, + UnstyledButton, + List, } from "@mantine/core"; import SendIcon from "@mui/icons-material/Send"; import ChatBubbleOutlineIcon from "@mui/icons-material/ChatBubbleOutline"; import CloseIcon from "@mui/icons-material/Close"; -import { useChat } from "@app/components/chat/ChatContext"; +import ExpandMoreIcon from "@mui/icons-material/ExpandMore"; +import ExpandLessIcon from "@mui/icons-material/ExpandLess"; +import { + useChat, + AiWorkflowPhase, + type AiWorkflowProgress, +} from "@app/components/chat/ChatContext"; +import { useTranslatedToolCatalog } from "@app/data/useTranslatedToolRegistry"; import "@app/components/chat/ChatPanel.css"; +type TranslateFn = (key: string, options?: Record) => string; + +/** Resolver mapping a tool endpoint path to its translated display name. */ +type ToolNameResolver = (endpoint: string) => string | null; + +/** + * Look up a tool's translated name from the tool catalog. The catalog's {@code operationConfig} + * exposes the full API endpoint path for each tool, so we key the lookup on the exact path that + * arrives in SSE progress events — no string parsing. + */ +function useToolNameResolver(): ToolNameResolver { + const { allTools } = useTranslatedToolCatalog(); + return useMemo(() => { + const nameByEndpoint = new Map(); + Object.values(allTools).forEach((tool) => { + const endpoint = tool.operationConfig?.endpoint; + // Only register tools with a static endpoint. Tools whose endpoint is a function + // (dynamic routing, e.g. Convert / Split) need runtime params to resolve, so they fall + // through to the generic progress message rather than mis-matching. + if (typeof endpoint === "string") { + nameByEndpoint.set(endpoint, tool.name); + } + }); + return (endpoint: string) => nameByEndpoint.get(endpoint) ?? null; + }, [allTools]); +} + +function formatProgress( + progress: AiWorkflowProgress, + t: TranslateFn, + resolveToolName: ToolNameResolver, +): string { + if (progress.phase === AiWorkflowPhase.EXECUTING_TOOL && progress.tool) { + const tool = resolveToolName(progress.tool); + const hasSteps = + progress.stepIndex != null && + progress.stepCount != null && + progress.stepCount > 1; + if (tool) { + return hasSteps + ? t("chat.progress.executing_tool_step", { + tool, + step: progress.stepIndex, + total: progress.stepCount, + }) + : t("chat.progress.executing_tool_single", { tool }); + } + // Unknown tool — fall back to a generic translated message rather than + // prettifying the endpoint path by hand. + return hasSteps + ? t("chat.progress.executing_tool_generic_step", { + step: progress.stepIndex, + total: progress.stepCount, + }) + : t("chat.progress.executing_tool_generic"); + } + return t(`chat.progress.${progress.phase}`); +} + +function ToolsUsedBlock({ + tools, + resolveToolName, + t, +}: { + tools: string[]; + resolveToolName: ToolNameResolver; + t: TranslateFn; +}) { + const [expanded, setExpanded] = useState(false); + const names = tools.map( + (endpoint) => resolveToolName(endpoint) ?? t("chat.toolsUsed.unknownTool"), + ); + const label = t("chat.toolsUsed.summary", { count: tools.length }); + return ( + + setExpanded((v) => !v)} + aria-expanded={expanded} + > + + {expanded ? ( + + ) : ( + + )} + + {label} + + + + + + {names.map((name, i) => ( + {name} + ))} + + + + ); +} + function ChatMessageBubble({ role, content, + toolsUsed, + resolveToolName, + t, }: { role: "user" | "assistant"; content: string; + toolsUsed?: string[]; + resolveToolName: ToolNameResolver; + t: TranslateFn; }) { return (

@@ -31,6 +160,13 @@ function ChatMessageBubble({ {content} + {toolsUsed && toolsUsed.length > 0 && ( + + )}
); @@ -38,14 +174,9 @@ function ChatMessageBubble({ export function ChatPanel() { const { t } = useTranslation(); - const { - messages, - isOpen, - isLoading, - progressPhase, - toggleOpen, - sendMessage, - } = useChat(); + const { messages, isOpen, isLoading, progress, toggleOpen, sendMessage } = + useChat(); + const resolveToolName = useToolNameResolver(); const [input, setInput] = useState(""); const scrollRef = useRef(null); const inputRef = useRef(null); @@ -129,6 +260,9 @@ export function ChatPanel() { key={msg.id} role={msg.role} content={msg.content} + toolsUsed={msg.toolsUsed} + resolveToolName={resolveToolName} + t={t} /> ))} {isLoading && ( @@ -141,8 +275,8 @@ export function ChatPanel() { - {progressPhase - ? t(`chat.progress.${progressPhase}`) + {progress + ? formatProgress(progress, t, resolveToolName) : t("chat.progress.thinking")}