mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-16 01:21:16 +01:00
improving context
This commit is contained in:
parent
c37707d9ad
commit
9ffbede49a
@ -28,10 +28,12 @@ public class ChatbotVectorStoreConfig {
|
||||
public VectorStore chatbotVectorStore(
|
||||
ObjectProvider<JedisPooled> jedisProvider, EmbeddingModel embeddingModel) {
|
||||
JedisPooled jedis = jedisProvider.getIfAvailable();
|
||||
|
||||
if (jedis != null) {
|
||||
try {
|
||||
jedis.ping();
|
||||
log.info("Initialising Redis vector store for chatbot usage");
|
||||
|
||||
return RedisVectorStore.builder(jedis, embeddingModel)
|
||||
.indexName(DEFAULT_INDEX)
|
||||
.prefix(DEFAULT_PREFIX)
|
||||
@ -45,6 +47,7 @@ public class ChatbotVectorStoreConfig {
|
||||
} else {
|
||||
log.info("No Redis connection detected; using SimpleVectorStore for chatbot.");
|
||||
}
|
||||
|
||||
return SimpleVectorStore.builder(embeddingModel).build();
|
||||
}
|
||||
|
||||
|
||||
@ -107,7 +107,7 @@ public class ChatbotController {
|
||||
warnings.add("Images detected - Images are not currently supported.");
|
||||
}
|
||||
|
||||
warnings.add("Only extracted text is sent for analysis.");
|
||||
warnings.add("Images are not yet supported. Only extracted text is sent for analysis.");
|
||||
if (session != null && session.isOcrRequested()) {
|
||||
warnings.add("OCR requested – uses credits .");
|
||||
}
|
||||
|
||||
@ -0,0 +1,49 @@
|
||||
package stirling.software.proprietary.service.chatbot;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
@Component
|
||||
public class ChatbotContextCompressor {
|
||||
|
||||
private static final int DEFAULT_SUMMARY_LIMIT = 3000;
|
||||
private static final int MIN_CHUNK_SNIPPET = 160;
|
||||
|
||||
public String summarize(List<Document> documents, int requestedLimit) {
|
||||
if (CollectionUtils.isEmpty(documents)) {
|
||||
return "No contextual snippets available for this session.";
|
||||
}
|
||||
int maxChars =
|
||||
requestedLimit > 0
|
||||
? Math.min(requestedLimit, DEFAULT_SUMMARY_LIMIT)
|
||||
: DEFAULT_SUMMARY_LIMIT;
|
||||
StringBuilder builder = new StringBuilder();
|
||||
int perChunkLimit = Math.max(MIN_CHUNK_SNIPPET, maxChars / Math.max(documents.size(), 1));
|
||||
for (Document doc : documents) {
|
||||
if (builder.length() >= maxChars) {
|
||||
break;
|
||||
}
|
||||
String chunkOrder = doc.getMetadata().getOrDefault("chunkOrder", "?").toString();
|
||||
String text = trimContent(doc.getText(), perChunkLimit);
|
||||
builder.append("Chunk ").append(chunkOrder).append(": ").append(text).append('\n');
|
||||
}
|
||||
if (builder.length() == 0) {
|
||||
return "Unable to summarise context; original content unavailable.";
|
||||
}
|
||||
return builder.substring(0, Math.min(builder.length(), maxChars)).trim();
|
||||
}
|
||||
|
||||
private String trimContent(String content, int perChunkLimit) {
|
||||
if (content == null || content.isBlank()) {
|
||||
return "(empty chunk)";
|
||||
}
|
||||
String normalized = content.replaceAll("\\s+", " ").trim();
|
||||
if (normalized.length() <= perChunkLimit) {
|
||||
return normalized;
|
||||
}
|
||||
return normalized.substring(0, Math.max(0, perChunkLimit - 3)) + "...";
|
||||
}
|
||||
}
|
||||
@ -47,6 +47,8 @@ public class ChatbotConversationService {
|
||||
private final ChatbotCacheService cacheService;
|
||||
private final ChatbotFeatureProperties featureProperties;
|
||||
private final ChatbotRetrievalService retrievalService;
|
||||
private final ChatbotContextCompressor contextCompressor;
|
||||
private final ChatbotMemoryService memoryService;
|
||||
private final ChatbotUsageService usageService;
|
||||
private final ObjectMapper objectMapper;
|
||||
private final AtomicBoolean modelSwitchVerified = new AtomicBoolean(false);
|
||||
@ -79,6 +81,9 @@ public class ChatbotConversationService {
|
||||
List<Document> context =
|
||||
retrievalService.retrieveTopK(
|
||||
request.getSessionId(), request.getPrompt(), settings);
|
||||
String contextSummary =
|
||||
contextCompressor.summarize(
|
||||
context, (int) Math.max(settings.maxPromptCharacters() / 2, 1000));
|
||||
|
||||
ModelReply nanoReply =
|
||||
invokeModel(
|
||||
@ -87,6 +92,7 @@ public class ChatbotConversationService {
|
||||
request.getPrompt(),
|
||||
session,
|
||||
context,
|
||||
contextSummary,
|
||||
cacheEntry.getMetadata());
|
||||
|
||||
boolean shouldEscalate =
|
||||
@ -106,6 +112,7 @@ public class ChatbotConversationService {
|
||||
request.getPrompt(),
|
||||
session,
|
||||
context,
|
||||
contextSummary,
|
||||
cacheEntry.getMetadata());
|
||||
}
|
||||
|
||||
@ -116,6 +123,8 @@ public class ChatbotConversationService {
|
||||
finalReply.completionTokens());
|
||||
session.setUsageSummary(usageSummary);
|
||||
|
||||
memoryService.recordTurn(session, request.getPrompt(), finalReply.answer());
|
||||
|
||||
return ChatbotResponse.builder()
|
||||
.sessionId(request.getSessionId())
|
||||
.modelUsed(
|
||||
@ -200,8 +209,10 @@ public class ChatbotConversationService {
|
||||
String prompt,
|
||||
ChatbotSession session,
|
||||
List<Document> context,
|
||||
String contextSummary,
|
||||
Map<String, String> metadata) {
|
||||
Prompt requestPrompt = buildPrompt(settings, model, prompt, session, context, metadata);
|
||||
Prompt requestPrompt =
|
||||
buildPrompt(settings, model, prompt, session, context, contextSummary, metadata);
|
||||
ChatResponse response;
|
||||
try {
|
||||
response = chatModel.call(requestPrompt);
|
||||
@ -244,16 +255,9 @@ public class ChatbotConversationService {
|
||||
String question,
|
||||
ChatbotSession session,
|
||||
List<Document> context,
|
||||
String contextSummary,
|
||||
Map<String, String> metadata) {
|
||||
StringBuilder contextBuilder = new StringBuilder();
|
||||
for (Document chunk : context) {
|
||||
contextBuilder
|
||||
.append("[Chunk ")
|
||||
.append(chunk.getMetadata().getOrDefault("chunkOrder", "?"))
|
||||
.append("]\n")
|
||||
.append(chunk.getText())
|
||||
.append("\n\n");
|
||||
}
|
||||
String chunkOutline = buildChunkOutline(context);
|
||||
String metadataSummary =
|
||||
metadata.entrySet().stream()
|
||||
.map(entry -> entry.getKey() + ": " + entry.getValue())
|
||||
@ -277,8 +281,10 @@ public class ChatbotConversationService {
|
||||
+ session.isOcrRequested()
|
||||
+ "\n"
|
||||
+ imageDirective
|
||||
+ "\nContext:\n"
|
||||
+ contextBuilder
|
||||
+ "\nContext summary:\n"
|
||||
+ contextSummary
|
||||
+ "\nContext outline:\n"
|
||||
+ chunkOutline
|
||||
+ "Question: "
|
||||
+ question;
|
||||
|
||||
@ -298,6 +304,27 @@ public class ChatbotConversationService {
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
private String buildChunkOutline(List<Document> context) {
|
||||
if (context == null || context.isEmpty()) {
|
||||
return "No chunks retrieved for this question.";
|
||||
}
|
||||
StringBuilder outline = new StringBuilder();
|
||||
for (Document chunk : context) {
|
||||
String order = chunk.getMetadata().getOrDefault("chunkOrder", "?").toString();
|
||||
String snippet = chunk.getText();
|
||||
if (snippet != null) {
|
||||
snippet = snippet.replaceAll("\\s+", " ").trim();
|
||||
if (snippet.length() > 240) {
|
||||
snippet = snippet.substring(0, 237) + "...";
|
||||
}
|
||||
} else {
|
||||
snippet = "(empty)";
|
||||
}
|
||||
outline.append("- Chunk ").append(order).append(": ").append(snippet).append("\n");
|
||||
}
|
||||
return outline.toString();
|
||||
}
|
||||
|
||||
private ModelReply parseModelResponse(
|
||||
String raw, long promptTokens, long completionTokens, long totalTokens) {
|
||||
if (!StringUtils.hasText(raw)) {
|
||||
|
||||
@ -0,0 +1,52 @@
|
||||
package stirling.software.proprietary.service.chatbot;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.vectorstore.VectorStore;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.StringUtils;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.proprietary.model.chatbot.ChatbotSession;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class ChatbotMemoryService {
|
||||
|
||||
private final VectorStore vectorStore;
|
||||
|
||||
public void recordTurn(ChatbotSession session, String prompt, String answer) {
|
||||
if (session == null) {
|
||||
return;
|
||||
}
|
||||
if (!StringUtils.hasText(prompt) && !StringUtils.hasText(answer)) {
|
||||
return;
|
||||
}
|
||||
Map<String, Object> metadata = new HashMap<>();
|
||||
metadata.put("sessionId", session.getSessionId());
|
||||
metadata.put("documentId", session.getDocumentId());
|
||||
metadata.put("turnType", "conversation");
|
||||
metadata.put("turnTimestamp", Instant.now().toString());
|
||||
metadata.put("userId", session.getUserId());
|
||||
|
||||
StringBuilder contentBuilder = new StringBuilder();
|
||||
if (StringUtils.hasText(prompt)) {
|
||||
contentBuilder.append("User: ").append(prompt.trim()).append("\n");
|
||||
}
|
||||
if (StringUtils.hasText(answer)) {
|
||||
contentBuilder.append("Assistant: ").append(answer.trim());
|
||||
}
|
||||
try {
|
||||
vectorStore.add(List.of(new Document(contentBuilder.toString(), metadata)));
|
||||
} catch (RuntimeException ex) {
|
||||
log.warn("Failed to persist chatbot conversation turn: {}", ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -5145,8 +5145,7 @@
|
||||
"title": "Stirling PDF Bot",
|
||||
"alphaBadge": "Alpha",
|
||||
"alphaTitle": "Experimental feature",
|
||||
"alphaDescription": "This Chatbot feature is in currently in alpha and is subject to change. Image-based content is not supported yet. Responses may be imperfect, so double-check important answers.",
|
||||
"acceptAlphaLabel": "I understand this feature is experimental and image content is not supported yet.",
|
||||
"alphaDescription": "Chatbot is in currently in alpha and is subject to change. Responses may be imperfect, please check responses.",
|
||||
"fileLabel": "Document to query",
|
||||
"filePlaceholder": "Select an uploaded PDF",
|
||||
"noFiles": "Upload a PDF from File Manager to start chatting.",
|
||||
@ -5193,6 +5192,14 @@
|
||||
"noTextTitle": "No text detected in this PDF",
|
||||
"noTextBody": "We could not find selectable text in this document. Would you like to run OCR to convert scanned pages into text?",
|
||||
"noTextDismiss": "Maybe later",
|
||||
"noTextRunOcr": "Run OCR and retry"
|
||||
"noTextRunOcr": "Run OCR and retry",
|
||||
"usage": {
|
||||
"limitReachedTitle": "Chatbot limit reached",
|
||||
"limitReachedBody": "You have exceeded the current monthly allocation for the chatbot. Further responses may be throttled.",
|
||||
"nearingLimitTitle": "Approaching usage limit",
|
||||
"nearingLimitBody": "You are nearing your monthly chatbot allocation. Consider limiting very large requests."
|
||||
},
|
||||
"autoSyncInfo": "Selected documents are synced automatically when the chatbot opens.",
|
||||
"autoSyncPrompt": "Acknowledge the alpha notice to start syncing automatically."
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
import { useEffect, useLayoutEffect, useMemo, useRef, useState, type KeyboardEvent } from 'react';
|
||||
import {
|
||||
ActionIcon,
|
||||
Badge,
|
||||
Box,
|
||||
Button,
|
||||
@ -13,14 +12,12 @@ import {
|
||||
Switch,
|
||||
Text,
|
||||
Textarea,
|
||||
Tooltip,
|
||||
} from '@mantine/core';
|
||||
import { useMediaQuery, useViewportSize } from '@mantine/hooks';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import SmartToyRoundedIcon from '@mui/icons-material/SmartToyRounded';
|
||||
import WarningAmberRoundedIcon from '@mui/icons-material/WarningAmberRounded';
|
||||
import SendRoundedIcon from '@mui/icons-material/SendRounded';
|
||||
import RefreshRoundedIcon from '@mui/icons-material/RefreshRounded';
|
||||
import CloseRoundedIcon from '@mui/icons-material/CloseRounded';
|
||||
|
||||
import { useChatbot } from '@app/contexts/ChatbotContext';
|
||||
@ -47,8 +44,6 @@ interface ChatMessage {
|
||||
createdAt: Date;
|
||||
}
|
||||
|
||||
const ALPHA_ACK_KEY = 'stirling.chatbot.alphaAck';
|
||||
|
||||
function createMessageId() {
|
||||
if (typeof crypto !== 'undefined' && crypto.randomUUID) {
|
||||
return crypto.randomUUID();
|
||||
@ -68,7 +63,6 @@ const ChatbotDrawer = () => {
|
||||
const { show } = useToast();
|
||||
const files = selectors.getFiles();
|
||||
const [selectedFileId, setSelectedFileId] = useState<string | undefined>();
|
||||
const [alphaAccepted, setAlphaAccepted] = useState(false);
|
||||
const [runOcr, setRunOcr] = useState(false);
|
||||
const [isStartingSession, setIsStartingSession] = useState(false);
|
||||
const [isSendingMessage, setIsSendingMessage] = useState(false);
|
||||
@ -94,11 +88,6 @@ const ChatbotDrawer = () => {
|
||||
return;
|
||||
}
|
||||
|
||||
const storedAck = typeof window !== 'undefined'
|
||||
? window.localStorage.getItem(ALPHA_ACK_KEY) === 'true'
|
||||
: false;
|
||||
setAlphaAccepted(storedAck);
|
||||
|
||||
if (preferredFileId) {
|
||||
setSelectedFileId(preferredFileId);
|
||||
setPreferredFileId(undefined);
|
||||
@ -161,6 +150,8 @@ const ChatbotDrawer = () => {
|
||||
setContextStats(null);
|
||||
setMessages([]);
|
||||
setWarnings([]);
|
||||
setPendingOcrRetry(false);
|
||||
setNoTextModalOpen(false);
|
||||
}
|
||||
}, [sessionInfo, selectedFileId]);
|
||||
|
||||
@ -192,17 +183,6 @@ const ChatbotDrawer = () => {
|
||||
};
|
||||
}, [isMobile, isOpen, sidebarRefs.toolPanelRef]);
|
||||
|
||||
const handleAlphaAccept = (checked: boolean) => {
|
||||
setAlphaAccepted(checked);
|
||||
if (typeof window !== 'undefined') {
|
||||
if (checked) {
|
||||
window.localStorage.setItem(ALPHA_ACK_KEY, 'true');
|
||||
} else {
|
||||
window.localStorage.removeItem(ALPHA_ACK_KEY);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const withStatus = async <T,>(label: string, fn: () => Promise<T>): Promise<T> => {
|
||||
setStatusMessage(label);
|
||||
try {
|
||||
@ -228,14 +208,6 @@ const ChatbotDrawer = () => {
|
||||
if (!ensureFileSelected() || !selectedFile) {
|
||||
return;
|
||||
}
|
||||
if (!alphaAccepted) {
|
||||
show({
|
||||
alertType: 'neutral',
|
||||
title: t('chatbot.toasts.ackTitle', 'Accept alpha notice'),
|
||||
body: t('chatbot.toasts.ackBody', 'Please acknowledge the alpha warning before starting.'),
|
||||
});
|
||||
return;
|
||||
}
|
||||
setIsStartingSession(true);
|
||||
try {
|
||||
let workingFile: File = selectedFile;
|
||||
@ -271,7 +243,7 @@ const ChatbotDrawer = () => {
|
||||
text: extractionResult.text,
|
||||
metadata,
|
||||
ocrRequested: shouldRunOcr,
|
||||
warningsAccepted: alphaAccepted,
|
||||
warningsAccepted: true,
|
||||
};
|
||||
|
||||
const response = await withStatus(
|
||||
@ -302,6 +274,36 @@ const ChatbotDrawer = () => {
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (
|
||||
!isOpen ||
|
||||
!selectedFile ||
|
||||
sessionInfo ||
|
||||
isStartingSession ||
|
||||
pendingOcrRetry ||
|
||||
noTextModalOpen
|
||||
) {
|
||||
return;
|
||||
}
|
||||
let cancelled = false;
|
||||
handleSessionStart().catch((error) => {
|
||||
if (!cancelled) {
|
||||
console.error('[Chatbot] Auto-sync failed', error);
|
||||
}
|
||||
});
|
||||
return () => {
|
||||
cancelled = true;
|
||||
};
|
||||
}, [isOpen, selectedFile, sessionInfo, isStartingSession, pendingOcrRetry, noTextModalOpen, runOcr]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!sessionInfo) {
|
||||
return;
|
||||
}
|
||||
setSessionInfo(null);
|
||||
setContextStats(null);
|
||||
}, [runOcr]);
|
||||
|
||||
const handleSendMessage = async () => {
|
||||
if (!sessionInfo) {
|
||||
show({
|
||||
@ -495,25 +497,6 @@ const ChatbotDrawer = () => {
|
||||
transitionProps={{ transition: 'slide-left', duration: 200 }}
|
||||
>
|
||||
<Stack gap="sm" h="100%" style={{ minHeight: 0 }}>
|
||||
<Box
|
||||
p="sm"
|
||||
style={{
|
||||
border: '1px solid var(--border-subtle)',
|
||||
borderRadius: 8,
|
||||
backgroundColor: 'var(--bg-subtle)',
|
||||
display: 'flex',
|
||||
gap: '0.5rem',
|
||||
alignItems: 'flex-start',
|
||||
}}
|
||||
>
|
||||
<WarningAmberRoundedIcon fontSize="small" style={{ color: 'var(--text-warning)' }} />
|
||||
<Box>
|
||||
<Text fw={600}>{t('chatbot.alphaTitle', 'Experimental feature')}</Text>
|
||||
<Text size="sm">
|
||||
{t('chatbot.alphaDescription', 'This chatbot is in alpha. It currently ignores images and may produce inaccurate answers. Your PDF text stays local until you confirm you want to chat.')}
|
||||
</Text>
|
||||
</Box>
|
||||
</Box>
|
||||
|
||||
<Group align="flex-end" justify="space-between" gap="md" wrap="wrap">
|
||||
<Select
|
||||
@ -526,11 +509,6 @@ const ChatbotDrawer = () => {
|
||||
style={{ flex: '1 1 200px' }}
|
||||
/>
|
||||
<Stack gap={4} style={{ minWidth: 160 }}>
|
||||
<Switch
|
||||
checked={alphaAccepted}
|
||||
label={t('chatbot.acceptAlphaLabel', 'Alpha notice acknowledged')}
|
||||
onChange={(event) => handleAlphaAccept(event.currentTarget.checked)}
|
||||
/>
|
||||
<Switch
|
||||
checked={runOcr}
|
||||
onChange={(event) => setRunOcr(event.currentTarget.checked)}
|
||||
@ -539,19 +517,6 @@ const ChatbotDrawer = () => {
|
||||
</Stack>
|
||||
</Group>
|
||||
|
||||
<Button
|
||||
fullWidth
|
||||
variant="filled"
|
||||
leftSection={<RefreshRoundedIcon fontSize="small" />}
|
||||
loading={isStartingSession}
|
||||
onClick={() => handleSessionStart()}
|
||||
disabled={!selectedFile || !alphaAccepted}
|
||||
>
|
||||
{sessionInfo
|
||||
? t('chatbot.refreshButton', 'Re-sync document')
|
||||
: t('chatbot.startButton', 'Send document to chat')}
|
||||
</Button>
|
||||
|
||||
{statusMessage && (
|
||||
<Box
|
||||
p="sm"
|
||||
@ -596,6 +561,28 @@ const ChatbotDrawer = () => {
|
||||
</Group>
|
||||
</Box>
|
||||
))}
|
||||
{isOpen && (
|
||||
<Box
|
||||
p="sm"
|
||||
bg="var(--bg-muted)"
|
||||
style={{ borderRadius: 12, border: '1px solid var(--border-subtle)' }}
|
||||
>
|
||||
<Group gap="xs" align="flex-start">
|
||||
<WarningAmberRoundedIcon fontSize="small" style={{ color: 'var(--text-warning)' }} />
|
||||
<Box>
|
||||
<Text size="sm" fw={600}>
|
||||
{t('chatbot.alphaTitle', 'Experimental feature')}
|
||||
</Text>
|
||||
<Text size="sm">
|
||||
{t(
|
||||
'chatbot.alphaDescription',
|
||||
'This chatbot is in alpha. It currently ignores images and may produce inaccurate answers.'
|
||||
)}
|
||||
</Text>
|
||||
</Box>
|
||||
</Group>
|
||||
</Box>
|
||||
)}
|
||||
{messages.length === 0 && (
|
||||
<Text size="sm" c="dimmed">
|
||||
{t('chatbot.emptyState', 'Ask a question about your PDF to start the conversation.')}
|
||||
|
||||
@ -7,14 +7,10 @@ import LocalIcon from '@app/components/shared/LocalIcon';
|
||||
import { Tooltip } from '@app/components/shared/Tooltip';
|
||||
import { SearchInterface } from '@app/components/viewer/SearchInterface';
|
||||
import ViewerAnnotationControls from '@app/components/shared/rightRail/ViewerAnnotationControls';
|
||||
import { useFileState } from '@app/contexts/FileContext';
|
||||
|
||||
export function useViewerRightRailButtons() {
|
||||
const { t } = useTranslation();
|
||||
const viewer = useViewer();
|
||||
const { selectors } = useFileState();
|
||||
const filesSignature = selectors.getFilesSignature();
|
||||
const files = useMemo(() => selectors.getFiles(), [selectors, filesSignature]);
|
||||
const [isPanning, setIsPanning] = useState<boolean>(() => viewer.getPanState()?.isPanning ?? false);
|
||||
|
||||
// Lift i18n labels out of memo for clarity
|
||||
|
||||
Loading…
Reference in New Issue
Block a user