mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-02-01 20:10:35 +01:00
wip
This commit is contained in:
parent
6dcf20b9c9
commit
98d4949930
@ -631,6 +631,7 @@ public class ApplicationProperties {
|
|||||||
|
|
||||||
@Data
|
@Data
|
||||||
public static class Models {
|
public static class Models {
|
||||||
|
private String provider = "openai";
|
||||||
private String primary = "gpt-5-nano";
|
private String primary = "gpt-5-nano";
|
||||||
private String fallback = "gpt-5-mini";
|
private String fallback = "gpt-5-mini";
|
||||||
private String embedding = "text-embedding-3-small";
|
private String embedding = "text-embedding-3-small";
|
||||||
|
|||||||
@ -53,6 +53,7 @@ dependencies {
|
|||||||
api 'com.github.ben-manes.caffeine:caffeine'
|
api 'com.github.ben-manes.caffeine:caffeine'
|
||||||
api 'io.swagger.core.v3:swagger-core-jakarta:2.2.38'
|
api 'io.swagger.core.v3:swagger-core-jakarta:2.2.38'
|
||||||
implementation 'org.springframework.ai:spring-ai-openai'
|
implementation 'org.springframework.ai:spring-ai-openai'
|
||||||
|
implementation 'org.springframework.ai:spring-ai-ollama'
|
||||||
implementation 'com.bucket4j:bucket4j_jdk17-core:8.15.0'
|
implementation 'com.bucket4j:bucket4j_jdk17-core:8.15.0'
|
||||||
|
|
||||||
// https://mvnrepository.com/artifact/com.bucket4j/bucket4j_jdk17
|
// https://mvnrepository.com/artifact/com.bucket4j/bucket4j_jdk17
|
||||||
|
|||||||
@ -54,9 +54,11 @@ public class ChatbotController {
|
|||||||
.documentId(session.getDocumentId())
|
.documentId(session.getDocumentId())
|
||||||
.alphaWarning(settings.alphaWarning())
|
.alphaWarning(settings.alphaWarning())
|
||||||
.ocrRequested(session.isOcrRequested())
|
.ocrRequested(session.isOcrRequested())
|
||||||
|
.imageContentDetected(session.isImageContentDetected())
|
||||||
|
.textCharacters(session.getTextCharacters())
|
||||||
.maxCachedCharacters(cacheService.getMaxDocumentCharacters())
|
.maxCachedCharacters(cacheService.getMaxDocumentCharacters())
|
||||||
.createdAt(session.getCreatedAt())
|
.createdAt(session.getCreatedAt())
|
||||||
.warnings(defaultWarnings(settings))
|
.warnings(sessionWarnings(settings, session))
|
||||||
.metadata(session.getMetadata())
|
.metadata(session.getMetadata())
|
||||||
.build();
|
.build();
|
||||||
return ResponseEntity.status(HttpStatus.CREATED).body(response);
|
return ResponseEntity.status(HttpStatus.CREATED).body(response);
|
||||||
@ -81,9 +83,11 @@ public class ChatbotController {
|
|||||||
.documentId(session.getDocumentId())
|
.documentId(session.getDocumentId())
|
||||||
.alphaWarning(settings.alphaWarning())
|
.alphaWarning(settings.alphaWarning())
|
||||||
.ocrRequested(session.isOcrRequested())
|
.ocrRequested(session.isOcrRequested())
|
||||||
|
.imageContentDetected(session.isImageContentDetected())
|
||||||
|
.textCharacters(session.getTextCharacters())
|
||||||
.maxCachedCharacters(cacheService.getMaxDocumentCharacters())
|
.maxCachedCharacters(cacheService.getMaxDocumentCharacters())
|
||||||
.createdAt(session.getCreatedAt())
|
.createdAt(session.getCreatedAt())
|
||||||
.warnings(defaultWarnings(settings))
|
.warnings(sessionWarnings(settings, session))
|
||||||
.metadata(session.getMetadata())
|
.metadata(session.getMetadata())
|
||||||
.build();
|
.build();
|
||||||
return ResponseEntity.ok(response);
|
return ResponseEntity.ok(response);
|
||||||
@ -95,13 +99,19 @@ public class ChatbotController {
|
|||||||
return ResponseEntity.noContent().build();
|
return ResponseEntity.noContent().build();
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<String> defaultWarnings(ChatbotSettings settings) {
|
private List<String> sessionWarnings(ChatbotSettings settings, ChatbotSession session) {
|
||||||
List<String> warnings = new ArrayList<>();
|
List<String> warnings = new ArrayList<>();
|
||||||
if (settings.alphaWarning()) {
|
if (settings.alphaWarning()) {
|
||||||
warnings.add("Chatbot feature is in alpha and may change.");
|
warnings.add("Chatbot feature is in alpha and may change.");
|
||||||
}
|
}
|
||||||
warnings.add("Image-based content is not supported yet.");
|
warnings.add("Image-based content is not supported yet.");
|
||||||
|
if (session != null && session.isImageContentDetected()) {
|
||||||
|
warnings.add("Detected images will be ignored until image support ships.");
|
||||||
|
}
|
||||||
warnings.add("Only extracted text is sent for analysis.");
|
warnings.add("Only extracted text is sent for analysis.");
|
||||||
|
if (session != null && session.isOcrRequested()) {
|
||||||
|
warnings.add("OCR was requested – extra processing charges may apply.");
|
||||||
|
}
|
||||||
return warnings;
|
return warnings;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -23,6 +23,8 @@ public class ChatbotDocumentCacheEntry {
|
|||||||
private String text;
|
private String text;
|
||||||
private List<ChatbotTextChunk> chunks;
|
private List<ChatbotTextChunk> chunks;
|
||||||
private boolean ocrApplied;
|
private boolean ocrApplied;
|
||||||
|
private boolean imageContentDetected;
|
||||||
|
private long textCharacters;
|
||||||
private String vectorStoreId;
|
private String vectorStoreId;
|
||||||
private Instant storedAt;
|
private Instant storedAt;
|
||||||
|
|
||||||
|
|||||||
@ -19,6 +19,8 @@ public class ChatbotSession {
|
|||||||
private boolean ocrRequested;
|
private boolean ocrRequested;
|
||||||
private boolean warningsAccepted;
|
private boolean warningsAccepted;
|
||||||
private boolean alphaWarningRequired;
|
private boolean alphaWarningRequired;
|
||||||
|
private boolean imageContentDetected;
|
||||||
|
private long textCharacters;
|
||||||
private String cacheKey;
|
private String cacheKey;
|
||||||
private String vectorStoreId;
|
private String vectorStoreId;
|
||||||
private Instant createdAt;
|
private Instant createdAt;
|
||||||
|
|||||||
@ -20,4 +20,5 @@ public class ChatbotSessionCreateRequest {
|
|||||||
private Map<String, String> metadata;
|
private Map<String, String> metadata;
|
||||||
private boolean ocrRequested;
|
private boolean ocrRequested;
|
||||||
private boolean warningsAccepted;
|
private boolean warningsAccepted;
|
||||||
|
private boolean imagesDetected;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -20,7 +20,9 @@ public class ChatbotSessionResponse {
|
|||||||
private String documentId;
|
private String documentId;
|
||||||
private boolean alphaWarning;
|
private boolean alphaWarning;
|
||||||
private boolean ocrRequested;
|
private boolean ocrRequested;
|
||||||
|
private boolean imageContentDetected;
|
||||||
private long maxCachedCharacters;
|
private long maxCachedCharacters;
|
||||||
|
private long textCharacters;
|
||||||
private Instant createdAt;
|
private Instant createdAt;
|
||||||
private List<String> warnings;
|
private List<String> warnings;
|
||||||
private Map<String, String> metadata;
|
private Map<String, String> metadata;
|
||||||
|
|||||||
@ -64,7 +64,9 @@ public class ChatbotCacheService {
|
|||||||
String documentId,
|
String documentId,
|
||||||
String rawText,
|
String rawText,
|
||||||
Map<String, String> metadata,
|
Map<String, String> metadata,
|
||||||
boolean ocrApplied) {
|
boolean ocrApplied,
|
||||||
|
boolean imageContentDetected,
|
||||||
|
long textCharacters) {
|
||||||
Objects.requireNonNull(sessionId, "sessionId must not be null");
|
Objects.requireNonNull(sessionId, "sessionId must not be null");
|
||||||
Objects.requireNonNull(documentId, "documentId must not be null");
|
Objects.requireNonNull(documentId, "documentId must not be null");
|
||||||
Objects.requireNonNull(rawText, "rawText must not be null");
|
Objects.requireNonNull(rawText, "rawText must not be null");
|
||||||
@ -82,6 +84,8 @@ public class ChatbotCacheService {
|
|||||||
.metadata(metadata)
|
.metadata(metadata)
|
||||||
.text(rawText)
|
.text(rawText)
|
||||||
.ocrApplied(ocrApplied)
|
.ocrApplied(ocrApplied)
|
||||||
|
.imageContentDetected(imageContentDetected)
|
||||||
|
.textCharacters(textCharacters)
|
||||||
.storedAt(Instant.now())
|
.storedAt(Instant.now())
|
||||||
.build();
|
.build();
|
||||||
documentCache.put(cacheKey, entry);
|
documentCache.put(cacheKey, entry);
|
||||||
|
|||||||
@ -14,6 +14,8 @@ import org.springframework.ai.chat.messages.UserMessage;
|
|||||||
import org.springframework.ai.chat.model.ChatModel;
|
import org.springframework.ai.chat.model.ChatModel;
|
||||||
import org.springframework.ai.chat.model.ChatResponse;
|
import org.springframework.ai.chat.model.ChatResponse;
|
||||||
import org.springframework.ai.chat.prompt.Prompt;
|
import org.springframework.ai.chat.prompt.Prompt;
|
||||||
|
import org.springframework.ai.ollama.OllamaChatModel;
|
||||||
|
import org.springframework.ai.ollama.api.OllamaOptions;
|
||||||
import org.springframework.ai.openai.OpenAiChatModel;
|
import org.springframework.ai.openai.OpenAiChatModel;
|
||||||
import org.springframework.ai.openai.OpenAiChatOptions;
|
import org.springframework.ai.openai.OpenAiChatOptions;
|
||||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnBean;
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnBean;
|
||||||
@ -33,6 +35,7 @@ import stirling.software.proprietary.model.chatbot.ChatbotResponse;
|
|||||||
import stirling.software.proprietary.model.chatbot.ChatbotSession;
|
import stirling.software.proprietary.model.chatbot.ChatbotSession;
|
||||||
import stirling.software.proprietary.model.chatbot.ChatbotTextChunk;
|
import stirling.software.proprietary.model.chatbot.ChatbotTextChunk;
|
||||||
import stirling.software.proprietary.service.chatbot.ChatbotFeatureProperties.ChatbotSettings;
|
import stirling.software.proprietary.service.chatbot.ChatbotFeatureProperties.ChatbotSettings;
|
||||||
|
import stirling.software.proprietary.service.chatbot.ChatbotFeatureProperties.ChatbotSettings.ModelProvider;
|
||||||
import stirling.software.proprietary.service.chatbot.exception.ChatbotException;
|
import stirling.software.proprietary.service.chatbot.exception.ChatbotException;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@ -66,7 +69,7 @@ public class ChatbotConversationService {
|
|||||||
.findById(request.getSessionId())
|
.findById(request.getSessionId())
|
||||||
.orElseThrow(() -> new ChatbotException("Unknown chatbot session"));
|
.orElseThrow(() -> new ChatbotException("Unknown chatbot session"));
|
||||||
|
|
||||||
ensureModelSwitchCapability();
|
ensureModelSwitchCapability(settings);
|
||||||
|
|
||||||
ChatbotDocumentCacheEntry cacheEntry =
|
ChatbotDocumentCacheEntry cacheEntry =
|
||||||
cacheService
|
cacheService
|
||||||
@ -81,6 +84,7 @@ public class ChatbotConversationService {
|
|||||||
|
|
||||||
ModelReply nanoReply =
|
ModelReply nanoReply =
|
||||||
invokeModel(
|
invokeModel(
|
||||||
|
settings,
|
||||||
settings.models().primary(),
|
settings.models().primary(),
|
||||||
request.getPrompt(),
|
request.getPrompt(),
|
||||||
session,
|
session,
|
||||||
@ -100,6 +104,7 @@ public class ChatbotConversationService {
|
|||||||
List<ChatbotTextChunk> expandedContext = ensureMinimumContext(context, cacheEntry);
|
List<ChatbotTextChunk> expandedContext = ensureMinimumContext(context, cacheEntry);
|
||||||
finalReply =
|
finalReply =
|
||||||
invokeModel(
|
invokeModel(
|
||||||
|
settings,
|
||||||
settings.models().fallback(),
|
settings.models().fallback(),
|
||||||
request.getPrompt(),
|
request.getPrompt(),
|
||||||
session,
|
session,
|
||||||
@ -118,7 +123,7 @@ public class ChatbotConversationService {
|
|||||||
.cacheHit(true)
|
.cacheHit(true)
|
||||||
.respondedAt(Instant.now())
|
.respondedAt(Instant.now())
|
||||||
.warnings(warnings)
|
.warnings(warnings)
|
||||||
.metadata(buildMetadata(finalReply, context.size(), escalated))
|
.metadata(buildMetadata(settings, session, finalReply, context.size(), escalated))
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -126,6 +131,10 @@ public class ChatbotConversationService {
|
|||||||
List<String> warnings = new ArrayList<>();
|
List<String> warnings = new ArrayList<>();
|
||||||
warnings.add("Chatbot is in alpha – behaviour may change.");
|
warnings.add("Chatbot is in alpha – behaviour may change.");
|
||||||
warnings.add("Image content is not yet supported in answers.");
|
warnings.add("Image content is not yet supported in answers.");
|
||||||
|
if (session.isImageContentDetected()) {
|
||||||
|
warnings.add(
|
||||||
|
"Detected document images will be ignored until image support is available.");
|
||||||
|
}
|
||||||
if (session.isOcrRequested()) {
|
if (session.isOcrRequested()) {
|
||||||
warnings.add("OCR costs may apply for this session.");
|
warnings.add("OCR costs may apply for this session.");
|
||||||
}
|
}
|
||||||
@ -133,30 +142,44 @@ public class ChatbotConversationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private Map<String, Object> buildMetadata(
|
private Map<String, Object> buildMetadata(
|
||||||
ModelReply reply, int contextSize, boolean escalated) {
|
ChatbotSettings settings,
|
||||||
|
ChatbotSession session,
|
||||||
|
ModelReply reply,
|
||||||
|
int contextSize,
|
||||||
|
boolean escalated) {
|
||||||
Map<String, Object> metadata = new HashMap<>();
|
Map<String, Object> metadata = new HashMap<>();
|
||||||
metadata.put("contextSize", contextSize);
|
metadata.put("contextSize", contextSize);
|
||||||
metadata.put("requiresEscalation", reply.requiresEscalation());
|
metadata.put("requiresEscalation", reply.requiresEscalation());
|
||||||
metadata.put("escalated", escalated);
|
metadata.put("escalated", escalated);
|
||||||
metadata.put("rationale", reply.rationale());
|
metadata.put("rationale", reply.rationale());
|
||||||
|
metadata.put("modelProvider", settings.models().provider().name());
|
||||||
|
metadata.put("imageContentDetected", session.isImageContentDetected());
|
||||||
|
metadata.put("charactersCached", session.getTextCharacters());
|
||||||
return metadata;
|
return metadata;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void ensureModelSwitchCapability() {
|
private void ensureModelSwitchCapability(ChatbotSettings settings) {
|
||||||
if (!(chatModel instanceof OpenAiChatModel)) {
|
ModelProvider provider = settings.models().provider();
|
||||||
throw new ChatbotException(
|
switch (provider) {
|
||||||
"Chatbot requires OpenAI chat model to support runtime model switching");
|
case OPENAI -> {
|
||||||
|
if (!(chatModel instanceof OpenAiChatModel)) {
|
||||||
|
throw new ChatbotException(
|
||||||
|
"Chatbot requires an OpenAI chat model to support runtime model switching.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case OLLAMA -> {
|
||||||
|
if (!(chatModel instanceof OllamaChatModel)) {
|
||||||
|
throw new ChatbotException(
|
||||||
|
"Chatbot is configured for Ollama but no Ollama chat model bean is available.");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (modelSwitchVerified.compareAndSet(false, true)) {
|
if (modelSwitchVerified.compareAndSet(false, true)) {
|
||||||
ChatbotSettings settings = featureProperties.current();
|
|
||||||
OpenAiChatOptions primary =
|
|
||||||
OpenAiChatOptions.builder().model(settings.models().primary()).build();
|
|
||||||
OpenAiChatOptions fallback =
|
|
||||||
OpenAiChatOptions.builder().model(settings.models().fallback()).build();
|
|
||||||
log.info(
|
log.info(
|
||||||
"Verified runtime model override support ({} -> {})",
|
"Verified runtime model override support for provider {} ({} -> {})",
|
||||||
primary.getModel(),
|
provider,
|
||||||
fallback.getModel());
|
settings.models().primary(),
|
||||||
|
settings.models().fallback());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -178,12 +201,13 @@ public class ChatbotConversationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private ModelReply invokeModel(
|
private ModelReply invokeModel(
|
||||||
|
ChatbotSettings settings,
|
||||||
String model,
|
String model,
|
||||||
String prompt,
|
String prompt,
|
||||||
ChatbotSession session,
|
ChatbotSession session,
|
||||||
List<ChatbotTextChunk> context,
|
List<ChatbotTextChunk> context,
|
||||||
Map<String, String> metadata) {
|
Map<String, String> metadata) {
|
||||||
Prompt requestPrompt = buildPrompt(model, prompt, session, context, metadata);
|
Prompt requestPrompt = buildPrompt(settings, model, prompt, session, context, metadata);
|
||||||
ChatResponse response = chatModel.call(requestPrompt);
|
ChatResponse response = chatModel.call(requestPrompt);
|
||||||
String content =
|
String content =
|
||||||
Optional.ofNullable(response)
|
Optional.ofNullable(response)
|
||||||
@ -195,6 +219,7 @@ public class ChatbotConversationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private Prompt buildPrompt(
|
private Prompt buildPrompt(
|
||||||
|
ChatbotSettings settings,
|
||||||
String model,
|
String model,
|
||||||
String question,
|
String question,
|
||||||
ChatbotSession session,
|
ChatbotSession session,
|
||||||
@ -215,28 +240,46 @@ public class ChatbotConversationService {
|
|||||||
.reduce((left, right) -> left + ", " + right)
|
.reduce((left, right) -> left + ", " + right)
|
||||||
.orElse("none");
|
.orElse("none");
|
||||||
|
|
||||||
|
String imageDirective =
|
||||||
|
session.isImageContentDetected()
|
||||||
|
? "Images were detected in this PDF. You must explain that image analysis is not available."
|
||||||
|
: "No images detected in this PDF.";
|
||||||
|
|
||||||
String systemPrompt =
|
String systemPrompt =
|
||||||
"You are Stirling PDF Bot. Use provided context strictly. "
|
"You are Stirling PDF Bot. Use provided context strictly. "
|
||||||
+ "Respond in compact JSON with fields answer (string), confidence (0..1), requiresEscalation (boolean), rationale (string). "
|
+ "Respond in compact JSON with fields answer (string), confidence (0..1), requiresEscalation (boolean), rationale (string). "
|
||||||
+ "Explain limitations when context insufficient.";
|
+ "Explain limitations when context insufficient. Always note that image analysis is not supported yet.";
|
||||||
|
|
||||||
String userPrompt =
|
String userPrompt =
|
||||||
"Document metadata: "
|
"Document metadata: "
|
||||||
+ metadataSummary
|
+ metadataSummary
|
||||||
+ "\nOCR applied: "
|
+ "\nOCR applied: "
|
||||||
+ session.isOcrRequested()
|
+ session.isOcrRequested()
|
||||||
|
+ "\n"
|
||||||
|
+ imageDirective
|
||||||
+ "\nContext:\n"
|
+ "\nContext:\n"
|
||||||
+ contextBuilder
|
+ contextBuilder
|
||||||
+ "Question: "
|
+ "Question: "
|
||||||
+ question;
|
+ question;
|
||||||
|
|
||||||
OpenAiChatOptions options =
|
Object options = buildChatOptions(settings, model);
|
||||||
OpenAiChatOptions.builder().model(model).temperature(0.2).build();
|
|
||||||
|
|
||||||
return new Prompt(
|
return new Prompt(
|
||||||
List.of(new SystemMessage(systemPrompt), new UserMessage(userPrompt)), options);
|
List.of(new SystemMessage(systemPrompt), new UserMessage(userPrompt)), options);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Object buildChatOptions(ChatbotSettings settings, String model) {
|
||||||
|
return switch (settings.models().provider()) {
|
||||||
|
case OPENAI ->
|
||||||
|
OpenAiChatOptions.builder()
|
||||||
|
.model(model)
|
||||||
|
.temperature(0.2)
|
||||||
|
.responseFormat("json_object")
|
||||||
|
.build();
|
||||||
|
case OLLAMA -> OllamaOptions.builder().model(model).temperature(0.2).build();
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
private ModelReply parseModelResponse(String raw) {
|
private ModelReply parseModelResponse(String raw) {
|
||||||
if (!StringUtils.hasText(raw)) {
|
if (!StringUtils.hasText(raw)) {
|
||||||
throw new ChatbotException("Model returned empty response");
|
throw new ChatbotException("Model returned empty response");
|
||||||
|
|||||||
@ -3,6 +3,7 @@ package stirling.software.proprietary.service.chatbot;
|
|||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
import org.springframework.util.StringUtils;
|
||||||
|
|
||||||
import stirling.software.common.model.ApplicationProperties;
|
import stirling.software.common.model.ApplicationProperties;
|
||||||
import stirling.software.common.model.ApplicationProperties.Premium;
|
import stirling.software.common.model.ApplicationProperties.Premium;
|
||||||
@ -20,15 +21,18 @@ public class ChatbotFeatureProperties {
|
|||||||
|
|
||||||
public ChatbotSettings current() {
|
public ChatbotSettings current() {
|
||||||
Chatbot chatbot = resolveChatbot();
|
Chatbot chatbot = resolveChatbot();
|
||||||
|
ChatbotSettings.ModelSettings modelSettings =
|
||||||
|
new ChatbotSettings.ModelSettings(
|
||||||
|
resolveProvider(chatbot.getModels().getProvider()),
|
||||||
|
chatbot.getModels().getPrimary(),
|
||||||
|
chatbot.getModels().getFallback(),
|
||||||
|
chatbot.getModels().getEmbedding());
|
||||||
return new ChatbotSettings(
|
return new ChatbotSettings(
|
||||||
chatbot.isEnabled(),
|
chatbot.isEnabled(),
|
||||||
chatbot.isAlphaWarning(),
|
chatbot.isAlphaWarning(),
|
||||||
chatbot.getMaxPromptCharacters(),
|
chatbot.getMaxPromptCharacters(),
|
||||||
chatbot.getMinConfidenceNano(),
|
chatbot.getMinConfidenceNano(),
|
||||||
new ChatbotSettings.ModelSettings(
|
modelSettings,
|
||||||
chatbot.getModels().getPrimary(),
|
|
||||||
chatbot.getModels().getFallback(),
|
|
||||||
chatbot.getModels().getEmbedding()),
|
|
||||||
new ChatbotSettings.RagSettings(
|
new ChatbotSettings.RagSettings(
|
||||||
chatbot.getRag().getChunkSizeTokens(),
|
chatbot.getRag().getChunkSizeTokens(),
|
||||||
chatbot.getRag().getChunkOverlapTokens(),
|
chatbot.getRag().getChunkOverlapTokens(),
|
||||||
@ -53,6 +57,17 @@ public class ChatbotFeatureProperties {
|
|||||||
.orElseGet(Chatbot::new);
|
.orElseGet(Chatbot::new);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private ChatbotSettings.ModelProvider resolveProvider(String configuredProvider) {
|
||||||
|
if (!StringUtils.hasText(configuredProvider)) {
|
||||||
|
return ChatbotSettings.ModelProvider.OPENAI;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
return ChatbotSettings.ModelProvider.valueOf(configuredProvider.trim().toUpperCase());
|
||||||
|
} catch (IllegalArgumentException ignored) {
|
||||||
|
return ChatbotSettings.ModelProvider.OPENAI;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public record ChatbotSettings(
|
public record ChatbotSettings(
|
||||||
boolean enabled,
|
boolean enabled,
|
||||||
boolean alphaWarning,
|
boolean alphaWarning,
|
||||||
@ -64,7 +79,8 @@ public class ChatbotFeatureProperties {
|
|||||||
OcrSettings ocr,
|
OcrSettings ocr,
|
||||||
AuditSettings audit) {
|
AuditSettings audit) {
|
||||||
|
|
||||||
public record ModelSettings(String primary, String fallback, String embedding) {}
|
public record ModelSettings(
|
||||||
|
ModelProvider provider, String primary, String fallback, String embedding) {}
|
||||||
|
|
||||||
public record RagSettings(int chunkSizeTokens, int chunkOverlapTokens, int topK) {}
|
public record RagSettings(int chunkSizeTokens, int chunkOverlapTokens, int topK) {}
|
||||||
|
|
||||||
@ -73,5 +89,10 @@ public class ChatbotFeatureProperties {
|
|||||||
public record OcrSettings(boolean enabledByDefault) {}
|
public record OcrSettings(boolean enabledByDefault) {}
|
||||||
|
|
||||||
public record AuditSettings(boolean enabled) {}
|
public record AuditSettings(boolean enabled) {}
|
||||||
|
|
||||||
|
public enum ModelProvider {
|
||||||
|
OPENAI,
|
||||||
|
OLLAMA
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2,6 +2,7 @@ package stirling.software.proprietary.service.chatbot;
|
|||||||
|
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
@ -43,25 +44,38 @@ public class ChatbotIngestionService {
|
|||||||
if (!request.isWarningsAccepted() && settings.alphaWarning()) {
|
if (!request.isWarningsAccepted() && settings.alphaWarning()) {
|
||||||
throw new ChatbotException("Alpha warning must be accepted before use");
|
throw new ChatbotException("Alpha warning must be accepted before use");
|
||||||
}
|
}
|
||||||
if (!StringUtils.hasText(request.getText())) {
|
boolean hasText = StringUtils.hasText(request.getText());
|
||||||
throw new NoTextDetectedException("No text detected in document payload");
|
if (!hasText) {
|
||||||
|
throw new NoTextDetectedException(
|
||||||
|
"No text detected in document payload. Images are currently unsupported – enable OCR to continue.");
|
||||||
}
|
}
|
||||||
|
|
||||||
String sessionId =
|
String sessionId =
|
||||||
StringUtils.hasText(request.getSessionId())
|
StringUtils.hasText(request.getSessionId())
|
||||||
? request.getSessionId()
|
? request.getSessionId()
|
||||||
: ChatbotSession.randomSessionId();
|
: ChatbotSession.randomSessionId();
|
||||||
Map<String, String> metadata =
|
boolean imagesDetected = request.isImagesDetected();
|
||||||
request.getMetadata() == null ? Map.of() : Map.copyOf(request.getMetadata());
|
long textCharacters = request.getText().length();
|
||||||
boolean ocrApplied = request.isOcrRequested();
|
boolean ocrApplied = request.isOcrRequested();
|
||||||
|
Map<String, String> metadata = new HashMap<>();
|
||||||
|
if (request.getMetadata() != null) {
|
||||||
|
metadata.putAll(request.getMetadata());
|
||||||
|
}
|
||||||
|
metadata.put("content.imagesDetected", Boolean.toString(imagesDetected));
|
||||||
|
metadata.put("content.characterCount", String.valueOf(textCharacters));
|
||||||
|
metadata.put(
|
||||||
|
"content.extractionSource", ocrApplied ? "ocr-text-layer" : "embedded-text-layer");
|
||||||
|
Map<String, String> immutableMetadata = Map.copyOf(metadata);
|
||||||
|
|
||||||
String cacheKey =
|
String cacheKey =
|
||||||
cacheService.register(
|
cacheService.register(
|
||||||
sessionId,
|
sessionId,
|
||||||
request.getDocumentId(),
|
request.getDocumentId(),
|
||||||
request.getText(),
|
request.getText(),
|
||||||
metadata,
|
immutableMetadata,
|
||||||
ocrApplied);
|
ocrApplied,
|
||||||
|
imagesDetected,
|
||||||
|
textCharacters);
|
||||||
|
|
||||||
List<String> chunkTexts =
|
List<String> chunkTexts =
|
||||||
chunkText(
|
chunkText(
|
||||||
@ -76,8 +90,10 @@ public class ChatbotIngestionService {
|
|||||||
.sessionId(sessionId)
|
.sessionId(sessionId)
|
||||||
.documentId(request.getDocumentId())
|
.documentId(request.getDocumentId())
|
||||||
.userId(request.getUserId())
|
.userId(request.getUserId())
|
||||||
.metadata(metadata)
|
.metadata(immutableMetadata)
|
||||||
.ocrRequested(ocrApplied)
|
.ocrRequested(ocrApplied)
|
||||||
|
.imageContentDetected(imagesDetected)
|
||||||
|
.textCharacters(textCharacters)
|
||||||
.warningsAccepted(request.isWarningsAccepted())
|
.warningsAccepted(request.isWarningsAccepted())
|
||||||
.alphaWarningRequired(settings.alphaWarning())
|
.alphaWarningRequired(settings.alphaWarning())
|
||||||
.cacheKey(cacheKey)
|
.cacheKey(cacheKey)
|
||||||
|
|||||||
@ -39,7 +39,9 @@ public class ChatbotService {
|
|||||||
session.getSessionId(),
|
session.getSessionId(),
|
||||||
Map.of(
|
Map.of(
|
||||||
"documentId", session.getDocumentId(),
|
"documentId", session.getDocumentId(),
|
||||||
"ocrRequested", session.isOcrRequested()));
|
"ocrRequested", session.isOcrRequested(),
|
||||||
|
"imagesDetected", session.isImageContentDetected(),
|
||||||
|
"textCharacters", session.getTextCharacters()));
|
||||||
return session;
|
return session;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -40,7 +40,15 @@ class ChatbotCacheServiceTest {
|
|||||||
String longText = "a".repeat(51);
|
String longText = "a".repeat(51);
|
||||||
assertThrows(
|
assertThrows(
|
||||||
ChatbotException.class,
|
ChatbotException.class,
|
||||||
() -> cacheService.register("session", "doc", longText, Map.of(), false));
|
() ->
|
||||||
|
cacheService.register(
|
||||||
|
"session",
|
||||||
|
"doc",
|
||||||
|
longText,
|
||||||
|
Map.of(),
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
longText.length()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -48,10 +56,18 @@ class ChatbotCacheServiceTest {
|
|||||||
ChatbotCacheService cacheService = new ChatbotCacheService(properties);
|
ChatbotCacheService cacheService = new ChatbotCacheService(properties);
|
||||||
String cacheKey =
|
String cacheKey =
|
||||||
cacheService.register(
|
cacheService.register(
|
||||||
"session1", "doc1", "hello world", Map.of("title", "Sample"), false);
|
"session1",
|
||||||
|
"doc1",
|
||||||
|
"hello world",
|
||||||
|
Map.of("title", "Sample"),
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
"hello world".length());
|
||||||
assertTrue(cacheService.resolveBySessionId("session1").isPresent());
|
assertTrue(cacheService.resolveBySessionId("session1").isPresent());
|
||||||
ChatbotDocumentCacheEntry entry = cacheService.resolveByCacheKey(cacheKey).orElseThrow();
|
ChatbotDocumentCacheEntry entry = cacheService.resolveByCacheKey(cacheKey).orElseThrow();
|
||||||
assertEquals("doc1", entry.getDocumentId());
|
assertEquals("doc1", entry.getDocumentId());
|
||||||
assertEquals("Sample", entry.getMetadata().get("title"));
|
assertEquals("Sample", entry.getMetadata().get("title"));
|
||||||
|
assertEquals("hello world".length(), entry.getTextCharacters());
|
||||||
|
assertTrue(!entry.isImageContentDetected());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user