diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/SPDFApplication.java b/stirling-pdf/src/main/java/stirling/software/SPDF/SPDFApplication.java index 2131b4239..9cdb6c6ae 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/SPDFApplication.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/SPDFApplication.java @@ -15,6 +15,7 @@ import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.core.env.Environment; import org.springframework.scheduling.annotation.EnableScheduling; +import org.springframework.context.annotation.PropertySource; // Added import import io.github.pixee.security.SystemCommand; @@ -38,6 +39,7 @@ import stirling.software.common.util.UrlUtils; "stirling.software.common", "stirling.software.proprietary" }) +@PropertySource("classpath:gemini-agent.properties") // Added annotation public class SPDFApplication { private static String serverPortStatic; diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/GeminiAgentController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/GeminiAgentController.java new file mode 100644 index 000000000..f76991009 --- /dev/null +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/GeminiAgentController.java @@ -0,0 +1,50 @@ +package stirling.software.SPDF.controller.api; + +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; +import lombok.RequiredArgsConstructor; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; +import stirling.software.SPDF.model.api.agent.AgentRequest; +import stirling.software.SPDF.model.api.agent.AgentResponse; +import stirling.software.SPDF.service.agent.GeminiAgentService; + + +import java.util.List; + + +@RestController +@RequestMapping("/api/v1/agent") +@Tag(name = "Agent", description = "Gemini Agent APIs") +@RequiredArgsConstructor +public class GeminiAgentController { + + @Autowired + private GeminiAgentService geminiAgentService; + + @PostMapping(value = "/execute", consumes = { MediaType.MULTIPART_FORM_DATA_VALUE }) + @Operation( + summary = "Process a user request through the Gemini agent", + description = "This endpoint takes a user prompt and optional files, processes them using the Gemini agent, and returns the result.") + public ResponseEntity executeTask( + @RequestPart(name = "request", required = true) AgentRequest agentRequestDetails, + @RequestPart(name = "files", required = false) List files) { + + // The AgentRequest DTO might need adjustment if MultipartFile is directly included. + // For now, we assume file references or that files are handled separately by the service. + // This example assumes files are passed to the service. + + AgentResponse response = geminiAgentService.processRequest( + agentRequestDetails.getUserPrompt(), + files, + agentRequestDetails.getAdditionalParams()); + + return ResponseEntity.ok(response); + } +} diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/model/api/agent/AgentRequest.java b/stirling-pdf/src/main/java/stirling/software/SPDF/model/api/agent/AgentRequest.java new file mode 100644 index 000000000..9c9f7b468 --- /dev/null +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/model/api/agent/AgentRequest.java @@ -0,0 +1,18 @@ +package stirling.software.SPDF.model.api.agent; + +import lombok.Data; +import lombok.NoArgsConstructor; +import org.springframework.web.multipart.MultipartFile; + +import java.util.List; +import java.util.Map; + +@Data +@NoArgsConstructor +public class AgentRequest { + private String userPrompt; + // Files will be handled as a separate @RequestPart in the controller + // and passed as a List to the service. + // This DTO therefore doesn't need to carry file information directly. + private Map additionalParams; +} diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/model/api/agent/AgentResponse.java b/stirling-pdf/src/main/java/stirling/software/SPDF/model/api/agent/AgentResponse.java new file mode 100644 index 000000000..b47f6e9ac --- /dev/null +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/model/api/agent/AgentResponse.java @@ -0,0 +1,12 @@ +package stirling.software.SPDF.model.api.agent; + +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@NoArgsConstructor +public class AgentResponse { + private String message; + private Object data; + private boolean success; +} diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/service/agent/GeminiAgentService.java b/stirling-pdf/src/main/java/stirling/software/SPDF/service/agent/GeminiAgentService.java new file mode 100644 index 000000000..291674ddb --- /dev/null +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/service/agent/GeminiAgentService.java @@ -0,0 +1,192 @@ +package stirling.software.SPDF.service.agent; + +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; +import org.springframework.web.multipart.MultipartFile; +import stirling.software.SPDF.model.api.agent.AgentRequest; +import stirling.software.SPDF.model.api.agent.AgentResponse; + +import java.util.List; +import java.util.Map; +// TODO: Add necessary imports for a Gemini client library + +@Service +public class GeminiAgentService { + + @Value("${gemini.api.key}") + private String geminiApiKey; + + // TODO: Inject a Gemini client if using a library + + public GeminiAgentService() { + // Constructor + // Initialize Gemini client here if needed + } + + public AgentResponse processRequest(String userPrompt, List files, Map additionalParams) { + AgentResponse agentResponse = new AgentResponse(); + + // 1. Validate inputs (userPrompt, files, etc.) + if (userPrompt == null || userPrompt.trim().isEmpty()) { + agentResponse.setSuccess(false); + agentResponse.setMessage("User prompt cannot be empty."); + return agentResponse; + } + + // 2. Prepare request for Gemini API + // - This will involve constructing the prompt, potentially including + // information about available Stirling-PDF tools/APIs. + // - If files are provided, decide how to represent them to Gemini + // (e.g., extract text, OCR, pass file references if supported). + String geminiPrompt = buildPromptForGemini(userPrompt, files, additionalParams); + + // 3. Call Gemini API + try { + // Placeholder for actual Gemini API call + // GeminiResponse geminiApiResponse = geminiClient.generateContent(geminiPrompt); + String geminiOutput = "Placeholder Gemini response: Would perform action X on PDF."; // Replace with actual API call + + // 4. Parse Gemini's response + // - Determine the action(s) to take based on Gemini's output + // (e.g., merge PDFs, add watermark, extract text). + // - Extract any parameters needed for the action. + String actionToPerform = parseActionFromGeminiResponse(geminiOutput); + Map actionParams = parseParamsFromGeminiResponse(geminiOutput); + + // 5. Orchestrate Stirling-PDF operations + // - This is where you'd call other services or controllers in Stirling-PDF. + // - For now, this is a placeholder. + Object resultData = executeStirlingPdfOperation(actionToPerform, actionParams, files); + + agentResponse.setSuccess(true); + agentResponse.setMessage("Gemini agent processed the request successfully."); + agentResponse.setData(resultData); + + } catch (Exception e) { + // Log the exception + agentResponse.setSuccess(false); + agentResponse.setMessage("Error processing request with Gemini agent: " + e.getMessage()); + agentResponse.setData(null); + } + + return agentResponse; + } + + private String buildPromptForGemini(String userPrompt, List files, Map additionalParams) { + // This prompt should instruct Gemini on how to interpret the user's request + // and what kind of output is expected (e.g., identify an action and parameters). + // It should also include a summary of available Stirling-PDF tools. + StringBuilder prompt = new StringBuilder(); + prompt.append("You are an AI assistant for Stirling-PDF, a powerful PDF manipulation tool.\n"); + prompt.append("Your primary goal is to understand the user's request and determine the single most appropriate Stirling-PDF operation to perform and the necessary parameters for that operation.\n\n"); + + prompt.append("## Available Stirling-PDF Operations:\n"); + prompt.append("Here is a list of operations you can request. For each operation, specify the 'operation' name and a 'parameters' JSON object.\n\n"); + + // General Operations + prompt.append("- operation: \"merge-pdfs\"\n"); + prompt.append(" description: \"Merges multiple PDF files into one single PDF.\"\n"); + prompt.append(" parameters: {\"sortType\": \"orderProvided|byFileName|byDateModified|...\", \"generateToc\": \"true|false\", \"removeCertSign\": \"true|false\"}\n\n"); + + prompt.append("- operation: \"split-pdf\"\n"); + prompt.append(" description: \"Splits a PDF into multiple files based on page ranges or extracting all pages.\"\n"); + prompt.append(" parameters: {\"splitType\": \"ranges|all\", \"ranges\": \"e.g., 1-3,5,7-end\"}\n\n"); + + prompt.append("- operation: \"rotate-pdf\"\n"); + prompt.append(" description: \"Rotates pages in a PDF file.\"\n"); + prompt.append(" parameters: {\"angle\": \"90|180|270\", \"pageFilter\": \"all|even|odd|custom\", \"pageNumbers\": \"e.g., 1,3-5\"}\n\n"); + + // Security Operations + prompt.append("- operation: \"add-watermark\"\n"); + prompt.append(" description: \"Adds a text or image watermark to a PDF.\"\n"); + prompt.append(" parameters: {\"watermarkType\": \"text|image\", \"watermarkText\": \"text_for_watermark (if type is text)\", \"watermarkImage\": \"reference_to_image_file (if type is image)\", \"fontSize\": float, \"opacity\": float (0.0-1.0), \"rotation\": float, ...}\n\n"); + + prompt.append("- operation: \"add-password\"\n"); + prompt.append(" description: \"Adds a password to protect a PDF.\"\n"); + prompt.append(" parameters: {\"ownerPassword\": \"password_string\", \"userPassword\": \"password_string\"}\n\n"); + + // Misc Operations + prompt.append("- operation: \"ocr-pdf\"\n"); + prompt.append(" description: \"Performs OCR (Optical Character Recognition) on a PDF to make its text selectable/searchable.\"\n"); + prompt.append(" parameters: {\"languages\": [\"eng\", \"spa\", ...], \"ocrType\": \"skip-text|force-ocr\", \"deskew\": \"true|false\"}\n\n"); + + prompt.append("- operation: \"compress-pdf\"\n"); + prompt.append(" description: \"Reduces the file size of a PDF.\"\n"); + prompt.append(" parameters: {\"compressionLevel\": \"low|medium|high|custom_0-100\"}\n\n"); + + // Conversion Operations + prompt.append("- operation: \"convert-to-pdfa\"\n"); + prompt.append(" description: \"Converts a PDF to PDF/A format for long-term archiving.\"\n"); + prompt.append(" parameters: {\"pdfStandard\": \"PDF/A-1B|PDF/A-2B|PDF/A-3B\"}\n\n"); + + prompt.append("- operation: \"pdf-to-word\"\n"); + prompt.append(" description: \"Converts a PDF file to a Word document (docx).\"\n"); + prompt.append(" parameters: {}\n\n"); // Assuming simple conversion, might need more params + + prompt.append("- operation: \"image-to-pdf\"\n"); + prompt.append(" description: \"Converts one or more image files to a PDF document.\"\n"); + prompt.append(" parameters: {\"pageSize\": \"A4|LETTER|AUTO\", \"orientation\": \"portrait|landscape\"}\n\n"); + + + prompt.append("## User Request Context:\n"); + prompt.append("User's request: \"").append(userPrompt).append("\"\n"); + + if (files != null && !files.isEmpty()) { + prompt.append("The user has provided the following file(s) for the operation (you will receive them separately):\n"); + for (int i = 0; i < files.size(); i++) { + prompt.append("- File ").append(i + 1).append(": ").append(files.get(i).getOriginalFilename()).append("\n"); + } + if (files.size() == 1) { + prompt.append("Assume this single file is the primary input unless the user specifies otherwise.\n"); + } else { + prompt.append("Determine from the user's prompt how these files should be used (e.g., all for merge, first as input second as watermark image).\n"); + } + } + if (additionalParams != null && !additionalParams.isEmpty()) { + prompt.append("Additional parameters provided: ").append(additionalParams.toString()).append("\n"); + } + + prompt.append("\n## Your Response Format:\n"); + prompt.append("Based on the user's request and the available operations, please identify the single most relevant operation and its parameters.\n"); + prompt.append("Respond with a JSON object containing two keys: 'operation' (a string matching one of the available operation names) and 'parameters' (a JSON object of the parameters for that operation).\n"); + prompt.append("If the user's request is ambiguous or requires an operation not listed, respond with {\"operation\": \"clarification_needed\", \"parameters\": {\"message\": \"Your clarification message here\"}}.\n"); + prompt.append("If multiple operations seem applicable, choose the one that seems most central to the user's request or ask for clarification.\n"); + prompt.append("Example response: {\"operation\": \"add-watermark\", \"parameters\": {\"watermarkType\": \"text\", \"watermarkText\": \"CONFIDENTIAL DRAFT\", \"opacity\": 0.3, \"fontSize\": 50.0}}\n"); + + return prompt.toString(); + } + + private String parseActionFromGeminiResponse(String geminiOutput) { + // TODO: Implement logic to parse the action from Gemini's response. + // This might involve JSON parsing if Gemini returns structured data, + // or regex/string matching for less structured output. + // For placeholder: + if (geminiOutput.contains("perform action X")) { + return "actionX"; + } + return "unknownAction"; + } + + private Map parseParamsFromGeminiResponse(String geminiOutput) { + // TODO: Implement logic to parse parameters from Gemini's response. + return Map.of(); // Placeholder + } + + private Object executeStirlingPdfOperation(String action, Map params, List files) { + // TODO: Implement the orchestration logic. + // This will involve a switch or if-else structure to call the appropriate + // Stirling-PDF service methods or make internal HTTP requests. + // Example: + // if ("merge".equals(action)) { + // // Call MergeService or make HTTP request to /api/v1/general/merge-pdfs + // } else if ("watermark".equals(action)) { + // // Call WatermarkService or make HTTP request to /api/v1/security/add-watermark + // } + return "Placeholder: Executed " + action + " with params " + params.toString() + " on " + (files != null ? files.size() : 0) + " files."; + } + + // Helper method to get API key (useful for client initialization if not done in constructor) + public String getGeminiApiKey() { + return geminiApiKey; + } +} diff --git a/stirling-pdf/src/main/resources/gemini-agent.properties b/stirling-pdf/src/main/resources/gemini-agent.properties new file mode 100644 index 000000000..14ad3ce26 --- /dev/null +++ b/stirling-pdf/src/main/resources/gemini-agent.properties @@ -0,0 +1,4 @@ +# Gemini AI Agent Configuration +# Ensure this API key is set via an environment variable or a secure configuration method in production. +# Example: GEMINI_API_KEY="your_actual_api_key_env_var" +gemini.api.key=${GEMINI_API_KEY:YOUR_GEMINI_API_KEY_HERE}