mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-16 01:21:16 +01:00
pre-process docs on upload
This commit is contained in:
parent
9ffbede49a
commit
e5212f7f63
@ -45,21 +45,7 @@ public class ChatbotController {
|
|||||||
@RequestBody ChatbotSessionCreateRequest request) {
|
@RequestBody ChatbotSessionCreateRequest request) {
|
||||||
ChatbotSession session = chatbotService.createSession(request);
|
ChatbotSession session = chatbotService.createSession(request);
|
||||||
ChatbotSettings settings = featureProperties.current();
|
ChatbotSettings settings = featureProperties.current();
|
||||||
ChatbotSessionResponse response =
|
ChatbotSessionResponse response = toResponse(session, settings);
|
||||||
ChatbotSessionResponse.builder()
|
|
||||||
.sessionId(session.getSessionId())
|
|
||||||
.documentId(session.getDocumentId())
|
|
||||||
.alphaWarning(settings.alphaWarning())
|
|
||||||
.ocrRequested(session.isOcrRequested())
|
|
||||||
.imageContentDetected(session.isImageContentDetected())
|
|
||||||
.textCharacters(session.getTextCharacters())
|
|
||||||
.estimatedTokens(session.getEstimatedTokens())
|
|
||||||
.maxCachedCharacters(cacheService.getMaxDocumentCharacters())
|
|
||||||
.createdAt(session.getCreatedAt())
|
|
||||||
.warnings(sessionWarnings(settings, session))
|
|
||||||
.metadata(session.getMetadata())
|
|
||||||
.usageSummary(session.getUsageSummary())
|
|
||||||
.build();
|
|
||||||
return ResponseEntity.status(HttpStatus.CREATED).body(response);
|
return ResponseEntity.status(HttpStatus.CREATED).body(response);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -76,24 +62,21 @@ public class ChatbotController {
|
|||||||
sessionRegistry
|
sessionRegistry
|
||||||
.findById(sessionId)
|
.findById(sessionId)
|
||||||
.orElseThrow(() -> new ChatbotException("Session not found"));
|
.orElseThrow(() -> new ChatbotException("Session not found"));
|
||||||
ChatbotSessionResponse response =
|
ChatbotSessionResponse response = toResponse(session, settings);
|
||||||
ChatbotSessionResponse.builder()
|
|
||||||
.sessionId(session.getSessionId())
|
|
||||||
.documentId(session.getDocumentId())
|
|
||||||
.alphaWarning(settings.alphaWarning())
|
|
||||||
.ocrRequested(session.isOcrRequested())
|
|
||||||
.imageContentDetected(session.isImageContentDetected())
|
|
||||||
.textCharacters(session.getTextCharacters())
|
|
||||||
.estimatedTokens(session.getEstimatedTokens())
|
|
||||||
.maxCachedCharacters(cacheService.getMaxDocumentCharacters())
|
|
||||||
.createdAt(session.getCreatedAt())
|
|
||||||
.warnings(sessionWarnings(settings, session))
|
|
||||||
.metadata(session.getMetadata())
|
|
||||||
.usageSummary(session.getUsageSummary())
|
|
||||||
.build();
|
|
||||||
return ResponseEntity.ok(response);
|
return ResponseEntity.ok(response);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@GetMapping("/document/{documentId}")
|
||||||
|
public ResponseEntity<ChatbotSessionResponse> getSessionByDocument(
|
||||||
|
@PathVariable String documentId) {
|
||||||
|
ChatbotSettings settings = featureProperties.current();
|
||||||
|
ChatbotSession session =
|
||||||
|
sessionRegistry
|
||||||
|
.findByDocumentId(documentId)
|
||||||
|
.orElseThrow(() -> new ChatbotException("Session not found"));
|
||||||
|
return ResponseEntity.ok(toResponse(session, settings));
|
||||||
|
}
|
||||||
|
|
||||||
@DeleteMapping("/session/{sessionId}")
|
@DeleteMapping("/session/{sessionId}")
|
||||||
public ResponseEntity<Void> closeSession(@PathVariable String sessionId) {
|
public ResponseEntity<Void> closeSession(@PathVariable String sessionId) {
|
||||||
chatbotService.close(sessionId);
|
chatbotService.close(sessionId);
|
||||||
@ -123,4 +106,21 @@ public class ChatbotController {
|
|||||||
|
|
||||||
return warnings;
|
return warnings;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private ChatbotSessionResponse toResponse(ChatbotSession session, ChatbotSettings settings) {
|
||||||
|
return ChatbotSessionResponse.builder()
|
||||||
|
.sessionId(session.getSessionId())
|
||||||
|
.documentId(session.getDocumentId())
|
||||||
|
.alphaWarning(settings.alphaWarning())
|
||||||
|
.ocrRequested(session.isOcrRequested())
|
||||||
|
.imageContentDetected(session.isImageContentDetected())
|
||||||
|
.textCharacters(session.getTextCharacters())
|
||||||
|
.estimatedTokens(session.getEstimatedTokens())
|
||||||
|
.maxCachedCharacters(cacheService.getMaxDocumentCharacters())
|
||||||
|
.createdAt(session.getCreatedAt())
|
||||||
|
.warnings(sessionWarnings(settings, session))
|
||||||
|
.metadata(session.getMetadata())
|
||||||
|
.usageSummary(session.getUsageSummary())
|
||||||
|
.build();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -12,9 +12,13 @@ import stirling.software.proprietary.model.chatbot.ChatbotSession;
|
|||||||
public class ChatbotSessionRegistry {
|
public class ChatbotSessionRegistry {
|
||||||
|
|
||||||
private final Map<String, ChatbotSession> sessionStore = new ConcurrentHashMap<>();
|
private final Map<String, ChatbotSession> sessionStore = new ConcurrentHashMap<>();
|
||||||
|
private final Map<String, String> documentToSession = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
public void register(ChatbotSession session) {
|
public void register(ChatbotSession session) {
|
||||||
sessionStore.put(session.getSessionId(), session);
|
sessionStore.put(session.getSessionId(), session);
|
||||||
|
if (session.getDocumentId() != null) {
|
||||||
|
documentToSession.put(session.getDocumentId(), session.getSessionId());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Optional<ChatbotSession> findById(String sessionId) {
|
public Optional<ChatbotSession> findById(String sessionId) {
|
||||||
@ -22,6 +26,16 @@ public class ChatbotSessionRegistry {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void remove(String sessionId) {
|
public void remove(String sessionId) {
|
||||||
sessionStore.remove(sessionId);
|
Optional.ofNullable(sessionStore.remove(sessionId))
|
||||||
|
.map(ChatbotSession::getDocumentId)
|
||||||
|
.ifPresent(documentToSession::remove);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Optional<ChatbotSession> findByDocumentId(String documentId) {
|
||||||
|
return Optional.ofNullable(documentToSession.get(documentId)).flatMap(this::findById);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void removeByDocumentId(String documentId) {
|
||||||
|
Optional.ofNullable(documentToSession.remove(documentId)).ifPresent(sessionStore::remove);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5145,14 +5145,15 @@
|
|||||||
"title": "Stirling PDF Bot",
|
"title": "Stirling PDF Bot",
|
||||||
"alphaBadge": "Alpha",
|
"alphaBadge": "Alpha",
|
||||||
"alphaTitle": "Experimental feature",
|
"alphaTitle": "Experimental feature",
|
||||||
"alphaDescription": "Chatbot is in currently in alpha and is subject to change. Responses may be imperfect, please check responses.",
|
"alphaDescription": "This chatbot is in alpha. It currently ignores images and may produce inaccurate answers.",
|
||||||
|
"acceptAlphaLabel": "I understand this feature is experimental and image content is not supported yet.",
|
||||||
"fileLabel": "Document to query",
|
"fileLabel": "Document to query",
|
||||||
"filePlaceholder": "Select an uploaded PDF",
|
"filePlaceholder": "Select an uploaded PDF",
|
||||||
"noFiles": "Upload a PDF from File Manager to start chatting.",
|
"noFiles": "Upload a PDF from File Manager to start chatting.",
|
||||||
"ocrToggle": "Run OCR before extracting text (uses more resources)",
|
"ocrToggle": "Run OCR before extracting text (uses more resources)",
|
||||||
"ocrHint": "Enable when your PDF is a scan or contains images.",
|
"ocrHint": "Enable when your PDF is a scan or contains images.",
|
||||||
"refreshButton": "Re-sync document",
|
"refreshButton": "Reprocess document",
|
||||||
"startButton": "Send document to chat",
|
"startButton": "Prepare document for chat",
|
||||||
"sessionSummary": "Context summary",
|
"sessionSummary": "Context summary",
|
||||||
"contextDetails": "{{pages}} pages · {{chars}} characters synced",
|
"contextDetails": "{{pages}} pages · {{chars}} characters synced",
|
||||||
"conversationTitle": "Conversation",
|
"conversationTitle": "Conversation",
|
||||||
@ -5172,6 +5173,10 @@
|
|||||||
"toolHint": "The chat window slides in from the left. If it is already open, this button simply focuses it and passes along the currently selected PDF.",
|
"toolHint": "The chat window slides in from the left. If it is already open, this button simply focuses it and passes along the currently selected PDF.",
|
||||||
"toolTitleMenu": "Chatbot (Alpha)",
|
"toolTitleMenu": "Chatbot (Alpha)",
|
||||||
"toolMenuDescription": "Chat with Stirling Bot about the contents of your PDF.",
|
"toolMenuDescription": "Chat with Stirling Bot about the contents of your PDF.",
|
||||||
|
"errors": {
|
||||||
|
"preprocessing": "Unable to prepare this document.",
|
||||||
|
"unsupported": "Unsupported document type."
|
||||||
|
},
|
||||||
"status": {
|
"status": {
|
||||||
"runningOcr": "Running OCR and extracting text…",
|
"runningOcr": "Running OCR and extracting text…",
|
||||||
"extracting": "Extracting text from PDF…",
|
"extracting": "Extracting text from PDF…",
|
||||||
|
|||||||
@ -18,17 +18,14 @@ import { useTranslation } from 'react-i18next';
|
|||||||
import SmartToyRoundedIcon from '@mui/icons-material/SmartToyRounded';
|
import SmartToyRoundedIcon from '@mui/icons-material/SmartToyRounded';
|
||||||
import WarningAmberRoundedIcon from '@mui/icons-material/WarningAmberRounded';
|
import WarningAmberRoundedIcon from '@mui/icons-material/WarningAmberRounded';
|
||||||
import SendRoundedIcon from '@mui/icons-material/SendRounded';
|
import SendRoundedIcon from '@mui/icons-material/SendRounded';
|
||||||
import CloseRoundedIcon from '@mui/icons-material/CloseRounded';
|
import RefreshRoundedIcon from '@mui/icons-material/RefreshRounded';
|
||||||
|
|
||||||
import { useChatbot } from '@app/contexts/ChatbotContext';
|
import { useChatbot } from '@app/contexts/ChatbotContext';
|
||||||
import { useFileState } from '@app/contexts/FileContext';
|
import { useFileState } from '@app/contexts/FileContext';
|
||||||
import { extractTextFromPdf } from '@app/services/pdfTextExtractor';
|
|
||||||
import { runOcrForChat } from '@app/services/chatbotOcrService';
|
|
||||||
import {
|
import {
|
||||||
ChatbotMessageResponse,
|
ChatbotMessageResponse,
|
||||||
ChatbotSessionInfo,
|
ChatbotSessionInfo,
|
||||||
ChatbotUsageSummary,
|
ChatbotUsageSummary,
|
||||||
createChatbotSession,
|
|
||||||
sendChatbotPrompt,
|
sendChatbotPrompt,
|
||||||
} from '@app/services/chatbotService';
|
} from '@app/services/chatbotService';
|
||||||
import { useToast } from '@app/components/toast';
|
import { useToast } from '@app/components/toast';
|
||||||
@ -52,28 +49,32 @@ function createMessageId() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const MAX_PROMPT_CHARS = 4000;
|
const MAX_PROMPT_CHARS = 4000;
|
||||||
|
const ALPHA_ACK_KEY = 'stirling.chatbot.alphaAck';
|
||||||
|
|
||||||
const ChatbotDrawer = () => {
|
const ChatbotDrawer = () => {
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
const isMobile = useMediaQuery('(max-width: 768px)');
|
const isMobile = useMediaQuery('(max-width: 768px)');
|
||||||
const { width: viewportWidth, height: viewportHeight } = useViewportSize();
|
const { width: viewportWidth, height: viewportHeight } = useViewportSize();
|
||||||
const { isOpen, closeChat, preferredFileId, setPreferredFileId } = useChatbot();
|
const {
|
||||||
|
isOpen,
|
||||||
|
closeChat,
|
||||||
|
preferredFileId,
|
||||||
|
setPreferredFileId,
|
||||||
|
sessions: preparedSessions,
|
||||||
|
requestPreprocessing,
|
||||||
|
} = useChatbot();
|
||||||
const { selectors } = useFileState();
|
const { selectors } = useFileState();
|
||||||
const { sidebarRefs } = useSidebarContext();
|
const { sidebarRefs } = useSidebarContext();
|
||||||
const { show } = useToast();
|
const { show } = useToast();
|
||||||
const files = selectors.getFiles();
|
const files = selectors.getFiles();
|
||||||
const [selectedFileId, setSelectedFileId] = useState<string | undefined>();
|
const [selectedFileId, setSelectedFileId] = useState<string | undefined>();
|
||||||
|
const [alphaAccepted, setAlphaAccepted] = useState(false);
|
||||||
const [runOcr, setRunOcr] = useState(false);
|
const [runOcr, setRunOcr] = useState(false);
|
||||||
const [isStartingSession, setIsStartingSession] = useState(false);
|
const [isStartingSession, setIsStartingSession] = useState(false);
|
||||||
const [isSendingMessage, setIsSendingMessage] = useState(false);
|
const [isSendingMessage, setIsSendingMessage] = useState(false);
|
||||||
const [statusMessage, setStatusMessage] = useState<string>('');
|
|
||||||
const [sessionInfo, setSessionInfo] = useState<ChatbotSessionInfo | null>(null);
|
|
||||||
const [contextStats, setContextStats] = useState<{ pageCount: number; characterCount: number } | null>(null);
|
|
||||||
const [messages, setMessages] = useState<ChatMessage[]>([]);
|
const [messages, setMessages] = useState<ChatMessage[]>([]);
|
||||||
const [prompt, setPrompt] = useState('');
|
const [prompt, setPrompt] = useState('');
|
||||||
const [warnings, setWarnings] = useState<string[]>([]);
|
const [warnings, setWarnings] = useState<string[]>([]);
|
||||||
const [noTextModalOpen, setNoTextModalOpen] = useState(false);
|
|
||||||
const [pendingOcrRetry, setPendingOcrRetry] = useState(false);
|
|
||||||
const scrollViewportRef = useRef<HTMLDivElement>(null);
|
const scrollViewportRef = useRef<HTMLDivElement>(null);
|
||||||
const [panelAnchor, setPanelAnchor] = useState<{ right: number; top: number } | null>(null);
|
const [panelAnchor, setPanelAnchor] = useState<{ right: number; top: number } | null>(null);
|
||||||
const usageAlertState = useRef<'none' | 'warned' | 'limit'>('none');
|
const usageAlertState = useRef<'none' | 'warned' | 'limit'>('none');
|
||||||
@ -82,6 +83,50 @@ const ChatbotDrawer = () => {
|
|||||||
() => files.find((file) => file.fileId === selectedFileId),
|
() => files.find((file) => file.fileId === selectedFileId),
|
||||||
[files, selectedFileId]
|
[files, selectedFileId]
|
||||||
);
|
);
|
||||||
|
const selectedSessionEntry = selectedFileId
|
||||||
|
? preparedSessions[selectedFileId]
|
||||||
|
: undefined;
|
||||||
|
const sessionStatus = selectedSessionEntry?.status ?? 'idle';
|
||||||
|
const sessionError = selectedSessionEntry?.error;
|
||||||
|
const sessionInfo: ChatbotSessionInfo | null = selectedSessionEntry?.session ?? null;
|
||||||
|
const contextStats =
|
||||||
|
selectedSessionEntry?.status === 'ready' && selectedSessionEntry?.characterCount !== undefined
|
||||||
|
? {
|
||||||
|
pageCount: selectedSessionEntry.pageCount ?? 0,
|
||||||
|
characterCount: selectedSessionEntry.characterCount ?? 0,
|
||||||
|
}
|
||||||
|
: null;
|
||||||
|
const preparationWarnings = selectedSessionEntry?.warnings ?? [];
|
||||||
|
const derivedStatusMessage = useMemo(() => {
|
||||||
|
if (!alphaAccepted) {
|
||||||
|
return t('chatbot.autoSyncPrompt', 'Acknowledge the alpha notice to start syncing automatically.');
|
||||||
|
}
|
||||||
|
if (sessionStatus === 'processing' || isStartingSession) {
|
||||||
|
return t('chatbot.status.syncing', 'Preparing document for chat…');
|
||||||
|
}
|
||||||
|
if (sessionStatus === 'error') {
|
||||||
|
return sessionError || t('chatbot.errors.preprocessing', 'Unable to prepare this document.');
|
||||||
|
}
|
||||||
|
if (sessionStatus === 'unsupported') {
|
||||||
|
return sessionError || t('chatbot.errors.unsupported', 'Unsupported document type.');
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}, [alphaAccepted, sessionStatus, sessionError, isStartingSession, t]);
|
||||||
|
const assistantWarnings = useMemo(
|
||||||
|
() => [...preparationWarnings, ...warnings.filter(Boolean)],
|
||||||
|
[preparationWarnings, warnings]
|
||||||
|
);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!isOpen) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const storedAck =
|
||||||
|
typeof window !== 'undefined'
|
||||||
|
? window.localStorage.getItem(ALPHA_ACK_KEY) === 'true'
|
||||||
|
: false;
|
||||||
|
setAlphaAccepted(storedAck);
|
||||||
|
}, [isOpen]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!isOpen) {
|
if (!isOpen) {
|
||||||
@ -113,8 +158,16 @@ const ChatbotDrawer = () => {
|
|||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
usageAlertState.current = 'none';
|
usageAlertState.current = 'none';
|
||||||
|
if (sessionInfo) {
|
||||||
|
maybeShowUsageWarning(sessionInfo.usageSummary);
|
||||||
|
}
|
||||||
}, [sessionInfo?.sessionId]);
|
}, [sessionInfo?.sessionId]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
setMessages([]);
|
||||||
|
setWarnings([]);
|
||||||
|
}, [selectedFileId]);
|
||||||
|
|
||||||
const maybeShowUsageWarning = (usage?: ChatbotUsageSummary | null) => {
|
const maybeShowUsageWarning = (usage?: ChatbotUsageSummary | null) => {
|
||||||
if (!usage) {
|
if (!usage) {
|
||||||
return;
|
return;
|
||||||
@ -144,17 +197,6 @@ const ChatbotDrawer = () => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
if (sessionInfo && sessionInfo.documentId !== selectedFileId) {
|
|
||||||
setSessionInfo(null);
|
|
||||||
setContextStats(null);
|
|
||||||
setMessages([]);
|
|
||||||
setWarnings([]);
|
|
||||||
setPendingOcrRetry(false);
|
|
||||||
setNoTextModalOpen(false);
|
|
||||||
}
|
|
||||||
}, [sessionInfo, selectedFileId]);
|
|
||||||
|
|
||||||
useLayoutEffect(() => {
|
useLayoutEffect(() => {
|
||||||
if (isMobile || !isOpen) {
|
if (isMobile || !isOpen) {
|
||||||
setPanelAnchor(null);
|
setPanelAnchor(null);
|
||||||
@ -183,15 +225,6 @@ const ChatbotDrawer = () => {
|
|||||||
};
|
};
|
||||||
}, [isMobile, isOpen, sidebarRefs.toolPanelRef]);
|
}, [isMobile, isOpen, sidebarRefs.toolPanelRef]);
|
||||||
|
|
||||||
const withStatus = async <T,>(label: string, fn: () => Promise<T>): Promise<T> => {
|
|
||||||
setStatusMessage(label);
|
|
||||||
try {
|
|
||||||
return await fn();
|
|
||||||
} finally {
|
|
||||||
setStatusMessage('');
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const ensureFileSelected = () => {
|
const ensureFileSelected = () => {
|
||||||
if (!selectedFile) {
|
if (!selectedFile) {
|
||||||
show({
|
show({
|
||||||
@ -204,65 +237,27 @@ const ChatbotDrawer = () => {
|
|||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleSessionStart = async (forceOcr?: boolean) => {
|
const handleAlphaAccept = (checked: boolean) => {
|
||||||
if (!ensureFileSelected() || !selectedFile) {
|
setAlphaAccepted(checked);
|
||||||
|
if (typeof window !== 'undefined') {
|
||||||
|
if (checked) {
|
||||||
|
window.localStorage.setItem(ALPHA_ACK_KEY, 'true');
|
||||||
|
} else {
|
||||||
|
window.localStorage.removeItem(ALPHA_ACK_KEY);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleManualPrepare = async (forceOcr?: boolean) => {
|
||||||
|
if (!ensureFileSelected() || !selectedFileId) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
setIsStartingSession(true);
|
setIsStartingSession(true);
|
||||||
try {
|
try {
|
||||||
let workingFile: File = selectedFile;
|
await requestPreprocessing(selectedFileId, { force: true, forceOcr: forceOcr ?? runOcr });
|
||||||
const shouldRunOcr = forceOcr ?? runOcr;
|
usageAlertState.current = 'none';
|
||||||
|
|
||||||
const extractionResult = await withStatus(
|
|
||||||
shouldRunOcr
|
|
||||||
? t('chatbot.status.runningOcr', 'Running OCR and extracting text…')
|
|
||||||
: t('chatbot.status.extracting', 'Extracting text from PDF…'),
|
|
||||||
async () => {
|
|
||||||
if (shouldRunOcr) {
|
|
||||||
workingFile = await runOcrForChat(selectedFile);
|
|
||||||
}
|
|
||||||
return extractTextFromPdf(workingFile);
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!extractionResult.text || extractionResult.text.trim().length === 0) {
|
|
||||||
setPendingOcrRetry(true);
|
|
||||||
setNoTextModalOpen(true);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const metadata = {
|
|
||||||
name: workingFile.name,
|
|
||||||
size: String(workingFile.size),
|
|
||||||
pageCount: String(extractionResult.pageCount),
|
|
||||||
};
|
|
||||||
|
|
||||||
const sessionPayload = {
|
|
||||||
sessionId: sessionInfo?.sessionId,
|
|
||||||
documentId: selectedFile.fileId,
|
|
||||||
text: extractionResult.text,
|
|
||||||
metadata,
|
|
||||||
ocrRequested: shouldRunOcr,
|
|
||||||
warningsAccepted: true,
|
|
||||||
};
|
|
||||||
|
|
||||||
const response = await withStatus(
|
|
||||||
t('chatbot.status.syncing', 'Syncing document with Stirling Bot…'),
|
|
||||||
() => createChatbotSession(sessionPayload)
|
|
||||||
);
|
|
||||||
|
|
||||||
setSessionInfo(response);
|
|
||||||
maybeShowUsageWarning(response.usageSummary);
|
|
||||||
setContextStats({
|
|
||||||
pageCount: extractionResult.pageCount,
|
|
||||||
characterCount: extractionResult.characterCount,
|
|
||||||
});
|
|
||||||
setMessages([]);
|
|
||||||
setWarnings(response.warnings ?? []);
|
|
||||||
setPendingOcrRetry(false);
|
|
||||||
setNoTextModalOpen(false);
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('[Chatbot] Failed to start session', error);
|
console.error('[Chatbot] Failed to prepare document', error);
|
||||||
show({
|
show({
|
||||||
alertType: 'error',
|
alertType: 'error',
|
||||||
title: t('chatbot.toasts.failedSessionTitle', 'Could not prepare document'),
|
title: t('chatbot.toasts.failedSessionTitle', 'Could not prepare document'),
|
||||||
@ -270,42 +265,11 @@ const ChatbotDrawer = () => {
|
|||||||
});
|
});
|
||||||
} finally {
|
} finally {
|
||||||
setIsStartingSession(false);
|
setIsStartingSession(false);
|
||||||
setStatusMessage('');
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
if (
|
|
||||||
!isOpen ||
|
|
||||||
!selectedFile ||
|
|
||||||
sessionInfo ||
|
|
||||||
isStartingSession ||
|
|
||||||
pendingOcrRetry ||
|
|
||||||
noTextModalOpen
|
|
||||||
) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
let cancelled = false;
|
|
||||||
handleSessionStart().catch((error) => {
|
|
||||||
if (!cancelled) {
|
|
||||||
console.error('[Chatbot] Auto-sync failed', error);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
return () => {
|
|
||||||
cancelled = true;
|
|
||||||
};
|
|
||||||
}, [isOpen, selectedFile, sessionInfo, isStartingSession, pendingOcrRetry, noTextModalOpen, runOcr]);
|
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
if (!sessionInfo) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
setSessionInfo(null);
|
|
||||||
setContextStats(null);
|
|
||||||
}, [runOcr]);
|
|
||||||
|
|
||||||
const handleSendMessage = async () => {
|
const handleSendMessage = async () => {
|
||||||
if (!sessionInfo) {
|
if (!sessionInfo || sessionStatus !== 'ready') {
|
||||||
show({
|
show({
|
||||||
alertType: 'neutral',
|
alertType: 'neutral',
|
||||||
title: t('chatbot.toasts.noSessionTitle', 'Sync your document first'),
|
title: t('chatbot.toasts.noSessionTitle', 'Sync your document first'),
|
||||||
@ -369,7 +333,8 @@ const ChatbotDrawer = () => {
|
|||||||
[files]
|
[files]
|
||||||
);
|
);
|
||||||
|
|
||||||
const disablePromptInput = !sessionInfo || isStartingSession || isSendingMessage;
|
const disablePromptInput =
|
||||||
|
!sessionInfo || sessionStatus !== 'ready' || isStartingSession || isSendingMessage;
|
||||||
const canSend = !disablePromptInput && prompt.trim().length > 0;
|
const canSend = !disablePromptInput && prompt.trim().length > 0;
|
||||||
|
|
||||||
const handlePromptKeyDown = (event: KeyboardEvent<HTMLTextAreaElement>) => {
|
const handlePromptKeyDown = (event: KeyboardEvent<HTMLTextAreaElement>) => {
|
||||||
@ -395,7 +360,6 @@ const ChatbotDrawer = () => {
|
|||||||
</Group>
|
</Group>
|
||||||
);
|
);
|
||||||
|
|
||||||
const assistantWarnings = warnings.filter(Boolean);
|
|
||||||
|
|
||||||
const safeViewportWidth =
|
const safeViewportWidth =
|
||||||
viewportWidth || (typeof window !== 'undefined' ? window.innerWidth : 1280);
|
viewportWidth || (typeof window !== 'undefined' ? window.innerWidth : 1280);
|
||||||
@ -497,6 +461,28 @@ const ChatbotDrawer = () => {
|
|||||||
transitionProps={{ transition: 'slide-left', duration: 200 }}
|
transitionProps={{ transition: 'slide-left', duration: 200 }}
|
||||||
>
|
>
|
||||||
<Stack gap="sm" h="100%" style={{ minHeight: 0 }}>
|
<Stack gap="sm" h="100%" style={{ minHeight: 0 }}>
|
||||||
|
<Box
|
||||||
|
p="sm"
|
||||||
|
style={{
|
||||||
|
border: '1px solid var(--border-subtle)',
|
||||||
|
borderRadius: 8,
|
||||||
|
backgroundColor: 'var(--bg-subtle)',
|
||||||
|
display: 'flex',
|
||||||
|
gap: '0.5rem',
|
||||||
|
alignItems: 'flex-start',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<WarningAmberRoundedIcon fontSize="small" style={{ color: 'var(--text-warning)' }} />
|
||||||
|
<Box>
|
||||||
|
<Text fw={600}>{t('chatbot.alphaTitle', 'Experimental feature')}</Text>
|
||||||
|
<Text size="sm">
|
||||||
|
{t(
|
||||||
|
'chatbot.alphaDescription',
|
||||||
|
'This chatbot is in alpha. It currently ignores images and may produce inaccurate answers.'
|
||||||
|
)}
|
||||||
|
</Text>
|
||||||
|
</Box>
|
||||||
|
</Box>
|
||||||
|
|
||||||
<Group align="flex-end" justify="space-between" gap="md" wrap="wrap">
|
<Group align="flex-end" justify="space-between" gap="md" wrap="wrap">
|
||||||
<Select
|
<Select
|
||||||
@ -508,7 +494,12 @@ const ChatbotDrawer = () => {
|
|||||||
nothingFoundMessage={t('chatbot.noFiles', 'Upload a PDF from File Manager to start chatting.')}
|
nothingFoundMessage={t('chatbot.noFiles', 'Upload a PDF from File Manager to start chatting.')}
|
||||||
style={{ flex: '1 1 200px' }}
|
style={{ flex: '1 1 200px' }}
|
||||||
/>
|
/>
|
||||||
<Stack gap={4} style={{ minWidth: 160 }}>
|
<Stack gap={4} style={{ minWidth: 180 }}>
|
||||||
|
<Switch
|
||||||
|
checked={alphaAccepted}
|
||||||
|
onChange={(event) => handleAlphaAccept(event.currentTarget.checked)}
|
||||||
|
label={t('chatbot.acceptAlphaLabel', 'I acknowledge this experimental feature')}
|
||||||
|
/>
|
||||||
<Switch
|
<Switch
|
||||||
checked={runOcr}
|
checked={runOcr}
|
||||||
onChange={(event) => setRunOcr(event.currentTarget.checked)}
|
onChange={(event) => setRunOcr(event.currentTarget.checked)}
|
||||||
@ -517,7 +508,20 @@ const ChatbotDrawer = () => {
|
|||||||
</Stack>
|
</Stack>
|
||||||
</Group>
|
</Group>
|
||||||
|
|
||||||
{statusMessage && (
|
<Button
|
||||||
|
fullWidth
|
||||||
|
variant="filled"
|
||||||
|
leftSection={<RefreshRoundedIcon fontSize="small" />}
|
||||||
|
loading={isStartingSession || sessionStatus === 'processing'}
|
||||||
|
onClick={() => handleManualPrepare()}
|
||||||
|
disabled={!selectedFile || !alphaAccepted || sessionStatus === 'processing'}
|
||||||
|
>
|
||||||
|
{sessionStatus === 'ready'
|
||||||
|
? t('chatbot.refreshButton', 'Reprocess document')
|
||||||
|
: t('chatbot.startButton', 'Prepare document for chat')}
|
||||||
|
</Button>
|
||||||
|
|
||||||
|
{derivedStatusMessage && (
|
||||||
<Box
|
<Box
|
||||||
p="sm"
|
p="sm"
|
||||||
style={{
|
style={{
|
||||||
@ -526,7 +530,16 @@ const ChatbotDrawer = () => {
|
|||||||
backgroundColor: 'var(--bg-muted)',
|
backgroundColor: 'var(--bg-muted)',
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
<Text size="sm" c="blue">{statusMessage}</Text>
|
<Text
|
||||||
|
size="sm"
|
||||||
|
c={
|
||||||
|
sessionStatus === 'error' || sessionStatus === 'unsupported'
|
||||||
|
? 'var(--text-warning)'
|
||||||
|
: 'blue'
|
||||||
|
}
|
||||||
|
>
|
||||||
|
{derivedStatusMessage}
|
||||||
|
</Text>
|
||||||
</Box>
|
</Box>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
@ -561,28 +574,6 @@ const ChatbotDrawer = () => {
|
|||||||
</Group>
|
</Group>
|
||||||
</Box>
|
</Box>
|
||||||
))}
|
))}
|
||||||
{isOpen && (
|
|
||||||
<Box
|
|
||||||
p="sm"
|
|
||||||
bg="var(--bg-muted)"
|
|
||||||
style={{ borderRadius: 12, border: '1px solid var(--border-subtle)' }}
|
|
||||||
>
|
|
||||||
<Group gap="xs" align="flex-start">
|
|
||||||
<WarningAmberRoundedIcon fontSize="small" style={{ color: 'var(--text-warning)' }} />
|
|
||||||
<Box>
|
|
||||||
<Text size="sm" fw={600}>
|
|
||||||
{t('chatbot.alphaTitle', 'Experimental feature')}
|
|
||||||
</Text>
|
|
||||||
<Text size="sm">
|
|
||||||
{t(
|
|
||||||
'chatbot.alphaDescription',
|
|
||||||
'This chatbot is in alpha. It currently ignores images and may produce inaccurate answers.'
|
|
||||||
)}
|
|
||||||
</Text>
|
|
||||||
</Box>
|
|
||||||
</Group>
|
|
||||||
</Box>
|
|
||||||
)}
|
|
||||||
{messages.length === 0 && (
|
{messages.length === 0 && (
|
||||||
<Text size="sm" c="dimmed">
|
<Text size="sm" c="dimmed">
|
||||||
{t('chatbot.emptyState', 'Ask a question about your PDF to start the conversation.')}
|
{t('chatbot.emptyState', 'Ask a question about your PDF to start the conversation.')}
|
||||||
@ -634,35 +625,6 @@ const ChatbotDrawer = () => {
|
|||||||
</Stack>
|
</Stack>
|
||||||
</Modal>
|
</Modal>
|
||||||
|
|
||||||
<Modal
|
|
||||||
opened={noTextModalOpen}
|
|
||||||
onClose={() => setNoTextModalOpen(false)}
|
|
||||||
title={t('chatbot.noTextTitle', 'No text detected in this PDF')}
|
|
||||||
centered
|
|
||||||
>
|
|
||||||
<Stack gap="sm">
|
|
||||||
<Text size="sm">
|
|
||||||
{t('chatbot.noTextBody', 'We could not find selectable text in this document. Would you like to run OCR to convert scanned pages into text?')}
|
|
||||||
</Text>
|
|
||||||
<Group justify="flex-end">
|
|
||||||
<Button variant="default" leftSection={<CloseRoundedIcon fontSize="small" />} onClick={() => setNoTextModalOpen(false)}>
|
|
||||||
{t('chatbot.noTextDismiss', 'Maybe later')}
|
|
||||||
</Button>
|
|
||||||
<Button
|
|
||||||
leftSection={<SmartToyRoundedIcon fontSize="small" />}
|
|
||||||
onClick={() => {
|
|
||||||
setNoTextModalOpen(false);
|
|
||||||
setRunOcr(true);
|
|
||||||
if (pendingOcrRetry) {
|
|
||||||
handleSessionStart(true);
|
|
||||||
}
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
{t('chatbot.noTextRunOcr', 'Run OCR and retry')}
|
|
||||||
</Button>
|
|
||||||
</Group>
|
|
||||||
</Stack>
|
|
||||||
</Modal>
|
|
||||||
</>
|
</>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|||||||
@ -1,4 +1,23 @@
|
|||||||
import { createContext, useCallback, useContext, useMemo, useState, type ReactNode } from 'react';
|
import {
|
||||||
|
createContext,
|
||||||
|
useCallback,
|
||||||
|
useContext,
|
||||||
|
useEffect,
|
||||||
|
useMemo,
|
||||||
|
useRef,
|
||||||
|
useState,
|
||||||
|
type ReactNode,
|
||||||
|
} from 'react';
|
||||||
|
|
||||||
|
import { useFileState } from '@app/contexts/FileContext';
|
||||||
|
import type { StirlingFile } from '@app/types/fileContext';
|
||||||
|
import { extractTextFromPdf } from '@app/services/pdfTextExtractor';
|
||||||
|
import { extractTextFromDocx } from '@app/services/docxTextExtractor';
|
||||||
|
import {
|
||||||
|
ChatbotSessionInfo,
|
||||||
|
createChatbotSession,
|
||||||
|
} from '@app/services/chatbotService';
|
||||||
|
import { runOcrForChat } from '@app/services/chatbotOcrService';
|
||||||
|
|
||||||
type ChatbotSource = 'viewer' | 'tool';
|
type ChatbotSource = 'viewer' | 'tool';
|
||||||
|
|
||||||
@ -7,6 +26,25 @@ interface OpenChatOptions {
|
|||||||
fileId?: string;
|
fileId?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type PreparationStatus = 'idle' | 'processing' | 'ready' | 'error' | 'unsupported';
|
||||||
|
|
||||||
|
interface PreparedChatbotDocument {
|
||||||
|
documentId: string;
|
||||||
|
fileId: string;
|
||||||
|
fileName: string;
|
||||||
|
status: PreparationStatus;
|
||||||
|
session?: ChatbotSessionInfo;
|
||||||
|
characterCount?: number;
|
||||||
|
pageCount?: number;
|
||||||
|
warnings?: string[];
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PreprocessOptions {
|
||||||
|
force?: boolean;
|
||||||
|
forceOcr?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
interface ChatbotContextValue {
|
interface ChatbotContextValue {
|
||||||
isOpen: boolean;
|
isOpen: boolean;
|
||||||
source: ChatbotSource;
|
source: ChatbotSource;
|
||||||
@ -14,6 +52,8 @@ interface ChatbotContextValue {
|
|||||||
openChat: (options?: OpenChatOptions) => void;
|
openChat: (options?: OpenChatOptions) => void;
|
||||||
closeChat: () => void;
|
closeChat: () => void;
|
||||||
setPreferredFileId: (fileId?: string) => void;
|
setPreferredFileId: (fileId?: string) => void;
|
||||||
|
sessions: Record<string, PreparedChatbotDocument>;
|
||||||
|
requestPreprocessing: (fileId: string, options?: PreprocessOptions) => Promise<void>;
|
||||||
}
|
}
|
||||||
|
|
||||||
const ChatbotContext = createContext<ChatbotContextValue | undefined>(undefined);
|
const ChatbotContext = createContext<ChatbotContextValue | undefined>(undefined);
|
||||||
@ -23,6 +63,184 @@ export function ChatbotProvider({ children }: { children: ReactNode }) {
|
|||||||
const [source, setSource] = useState<ChatbotSource>('viewer');
|
const [source, setSource] = useState<ChatbotSource>('viewer');
|
||||||
const [preferredFileId, setPreferredFileId] = useState<string | undefined>();
|
const [preferredFileId, setPreferredFileId] = useState<string | undefined>();
|
||||||
|
|
||||||
|
const { selectors } = useFileState();
|
||||||
|
const [preparedSessions, setPreparedSessions] = useState<
|
||||||
|
Record<string, PreparedChatbotDocument>
|
||||||
|
>({});
|
||||||
|
const sessionsRef = useRef(preparedSessions);
|
||||||
|
sessionsRef.current = preparedSessions;
|
||||||
|
const inFlightRef = useRef<Map<string, Promise<void>>>(new Map());
|
||||||
|
|
||||||
|
const supportedExtensions = useMemo(
|
||||||
|
() => new Set(['pdf', 'doc', 'docx']),
|
||||||
|
[]
|
||||||
|
);
|
||||||
|
|
||||||
|
const getExtension = useCallback((file: StirlingFile) => {
|
||||||
|
const parts = file.name.split('.');
|
||||||
|
return parts.length > 1 ? parts.at(-1)!.toLowerCase() : '';
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const updateSessionEntry = useCallback((file: StirlingFile, partial: Partial<PreparedChatbotDocument>) => {
|
||||||
|
setPreparedSessions((prev) => ({
|
||||||
|
...prev,
|
||||||
|
[file.fileId]: {
|
||||||
|
...prev[file.fileId],
|
||||||
|
documentId: file.fileId,
|
||||||
|
fileId: file.fileId,
|
||||||
|
fileName: file.name,
|
||||||
|
status: 'idle',
|
||||||
|
...partial,
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const preprocessFile = useCallback(
|
||||||
|
async (file: StirlingFile, options?: PreprocessOptions) => {
|
||||||
|
const extension = getExtension(file);
|
||||||
|
if (!supportedExtensions.has(extension)) {
|
||||||
|
updateSessionEntry(file, {
|
||||||
|
status: 'unsupported',
|
||||||
|
error: 'Only PDF and Word documents are indexed for chat.',
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (extension === 'doc') {
|
||||||
|
updateSessionEntry(file, {
|
||||||
|
status: 'unsupported',
|
||||||
|
error: 'Legacy Word (.doc) files are not supported yet.',
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
updateSessionEntry(file, {
|
||||||
|
status: 'processing',
|
||||||
|
error: undefined,
|
||||||
|
session: undefined,
|
||||||
|
warnings: undefined,
|
||||||
|
characterCount: undefined,
|
||||||
|
pageCount: undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
let workingFile: File = file;
|
||||||
|
const shouldRunOcr = Boolean(options?.forceOcr && extension === 'pdf');
|
||||||
|
if (shouldRunOcr) {
|
||||||
|
workingFile = await runOcrForChat(file);
|
||||||
|
}
|
||||||
|
let extracted: { text: string; pageCount?: number; characterCount: number };
|
||||||
|
if (extension === 'pdf') {
|
||||||
|
const pdfResult = await extractTextFromPdf(workingFile);
|
||||||
|
extracted = {
|
||||||
|
text: pdfResult.text,
|
||||||
|
pageCount: pdfResult.pageCount,
|
||||||
|
characterCount: pdfResult.characterCount,
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
const docxResult = await extractTextFromDocx(workingFile);
|
||||||
|
extracted = {
|
||||||
|
text: docxResult.text,
|
||||||
|
pageCount: 0,
|
||||||
|
characterCount: docxResult.characterCount,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!extracted.text || extracted.text.trim().length === 0) {
|
||||||
|
throw new Error(
|
||||||
|
'No text detected. Try running OCR from the chat window.'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const metadata: Record<string, string> = {
|
||||||
|
fileName: workingFile.name,
|
||||||
|
fileSize: String(workingFile.size),
|
||||||
|
fileType: workingFile.type || extension,
|
||||||
|
characterCount: String(extracted.characterCount),
|
||||||
|
ocrApplied: shouldRunOcr ? 'true' : 'false',
|
||||||
|
};
|
||||||
|
if (typeof extracted.pageCount === 'number') {
|
||||||
|
metadata.pageCount = String(extracted.pageCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
const session = await createChatbotSession({
|
||||||
|
sessionId: file.fileId,
|
||||||
|
documentId: file.fileId,
|
||||||
|
text: extracted.text,
|
||||||
|
metadata,
|
||||||
|
ocrRequested: shouldRunOcr,
|
||||||
|
warningsAccepted: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
updateSessionEntry(file, {
|
||||||
|
status: 'ready',
|
||||||
|
session,
|
||||||
|
characterCount: extracted.characterCount,
|
||||||
|
pageCount: extracted.pageCount,
|
||||||
|
warnings: session.warnings ?? [],
|
||||||
|
error: undefined,
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
const message =
|
||||||
|
error instanceof Error
|
||||||
|
? error.message
|
||||||
|
: 'Failed to prepare document for chatbot.';
|
||||||
|
updateSessionEntry(file, {
|
||||||
|
status: 'error',
|
||||||
|
error: message,
|
||||||
|
});
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
[getExtension, supportedExtensions, updateSessionEntry]
|
||||||
|
);
|
||||||
|
|
||||||
|
const requestPreprocessing = useCallback(
|
||||||
|
async (fileId: string, options?: PreprocessOptions) => {
|
||||||
|
const file = selectors.getFile(fileId as any);
|
||||||
|
if (!file) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (inFlightRef.current.has(fileId) && !options?.force) {
|
||||||
|
return inFlightRef.current.get(fileId);
|
||||||
|
}
|
||||||
|
const promise = preprocessFile(file, options)
|
||||||
|
.finally(() => {
|
||||||
|
inFlightRef.current.delete(fileId);
|
||||||
|
});
|
||||||
|
inFlightRef.current.set(fileId, promise);
|
||||||
|
return promise;
|
||||||
|
},
|
||||||
|
[selectors, preprocessFile]
|
||||||
|
);
|
||||||
|
|
||||||
|
const filesSignature = selectors.getFilesSignature();
|
||||||
|
const availableFiles = useMemo(
|
||||||
|
() => selectors.getFiles(),
|
||||||
|
[filesSignature, selectors]
|
||||||
|
);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
availableFiles.forEach((file) => {
|
||||||
|
if (!supportedExtensions.has(getExtension(file))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!sessionsRef.current[file.fileId]) {
|
||||||
|
requestPreprocessing(file.fileId).catch(() => {});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const currentIds = new Set(availableFiles.map((file) => file.fileId));
|
||||||
|
setPreparedSessions((prev) => {
|
||||||
|
const next = { ...prev };
|
||||||
|
Object.keys(next).forEach((fileId) => {
|
||||||
|
if (!currentIds.has(fileId as any)) {
|
||||||
|
delete next[fileId];
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return next;
|
||||||
|
});
|
||||||
|
}, [availableFiles, getExtension, requestPreprocessing, supportedExtensions]);
|
||||||
|
|
||||||
const openChat = useCallback((options: OpenChatOptions = {}) => {
|
const openChat = useCallback((options: OpenChatOptions = {}) => {
|
||||||
if (options.source) {
|
if (options.source) {
|
||||||
setSource(options.source);
|
setSource(options.source);
|
||||||
@ -45,8 +263,10 @@ export function ChatbotProvider({ children }: { children: ReactNode }) {
|
|||||||
openChat,
|
openChat,
|
||||||
closeChat,
|
closeChat,
|
||||||
setPreferredFileId,
|
setPreferredFileId,
|
||||||
|
sessions: preparedSessions,
|
||||||
|
requestPreprocessing,
|
||||||
}),
|
}),
|
||||||
[isOpen, source, preferredFileId, openChat, closeChat]
|
[isOpen, source, preferredFileId, openChat, closeChat, preparedSessions, requestPreprocessing]
|
||||||
);
|
);
|
||||||
|
|
||||||
return <ChatbotContext.Provider value={value}>{children}</ChatbotContext.Provider>;
|
return <ChatbotContext.Provider value={value}>{children}</ChatbotContext.Provider>;
|
||||||
|
|||||||
34
frontend/src/core/services/docxTextExtractor.ts
Normal file
34
frontend/src/core/services/docxTextExtractor.ts
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import JSZip from 'jszip';
|
||||||
|
|
||||||
|
export interface ExtractedDocxText {
|
||||||
|
text: string;
|
||||||
|
characterCount: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function extractTextFromDocx(file: File): Promise<ExtractedDocxText> {
|
||||||
|
const zip = await JSZip.loadAsync(file);
|
||||||
|
const documentXml =
|
||||||
|
(await zip.file('word/document.xml')?.async('string')) ??
|
||||||
|
(await zip.file('word/document2.xml')?.async('string'));
|
||||||
|
|
||||||
|
if (!documentXml) {
|
||||||
|
throw new Error('Docx document.xml missing');
|
||||||
|
}
|
||||||
|
|
||||||
|
const parser = new DOMParser();
|
||||||
|
const xml = parser.parseFromString(documentXml, 'application/xml');
|
||||||
|
const paragraphNodes = [
|
||||||
|
...Array.from(xml.getElementsByTagNameNS('*', 'p')),
|
||||||
|
...Array.from(xml.getElementsByTagName('w:p')),
|
||||||
|
];
|
||||||
|
const text = paragraphNodes
|
||||||
|
.map((p) => (p.textContent || '').replace(/\s+/g, ' ').trim())
|
||||||
|
.filter(Boolean)
|
||||||
|
.join('\n')
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
return {
|
||||||
|
text,
|
||||||
|
characterCount: text.length,
|
||||||
|
};
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user