feat(conversion): add PDF to Excel (XLSX) conversion (#5778)

This commit is contained in:
Balázs Szücs
2026-02-23 21:47:24 +01:00
committed by GitHub
parent 549f796e47
commit 91b4a3484c
9 changed files with 154 additions and 3 deletions

View File

@@ -28,6 +28,7 @@ export const CONVERSION_ENDPOINTS = {
'pdf-office-presentation': '/api/v1/convert/pdf/presentation',
'pdf-office-text': '/api/v1/convert/pdf/text',
'pdf-csv': '/api/v1/convert/pdf/csv',
'pdf-xlsx': '/api/v1/convert/pdf/xlsx',
'pdf-markdown': '/api/v1/convert/pdf/markdown',
'pdf-html': '/api/v1/convert/pdf/html',
'pdf-xml': '/api/v1/convert/pdf/xml',
@@ -54,6 +55,7 @@ export const ENDPOINT_NAMES = {
'pdf-office-presentation': 'pdf-to-presentation',
'pdf-office-text': 'pdf-to-text',
'pdf-csv': 'pdf-to-csv',
'pdf-xlsx': 'pdf-to-xlsx',
'pdf-markdown': 'pdf-to-markdown',
'pdf-html': 'pdf-to-html',
'pdf-xml': 'pdf-to-xml',
@@ -116,6 +118,7 @@ export const TO_FORMAT_OPTIONS = [
{ value: 'cbz', label: 'CBZ', group: 'Archive' },
{ value: 'cbr', label: 'CBR', group: 'Archive' },
{ value: 'csv', label: 'CSV', group: 'Spreadsheet' },
{ value: 'xlsx', label: 'XLSX', group: 'Spreadsheet' },
{ value: 'pptx', label: 'PPTX', group: 'Presentation' },
{ value: 'odp', label: 'ODP', group: 'Presentation' },
{ value: 'txt', label: 'TXT', group: 'Text' },
@@ -137,7 +140,7 @@ export const TO_FORMAT_OPTIONS = [
export const CONVERSION_MATRIX: Record<string, string[]> = {
'any': ['pdf'], // Mixed files always convert to PDF
'image': ['pdf'], // Multiple images always convert to PDF
'pdf': ['png', 'jpg', 'gif', 'tiff', 'bmp', 'webp', 'docx', 'odt', 'pptx', 'odp', 'csv', 'txt', 'rtf', 'md', 'html', 'xml', 'pdfa', 'pdfx', 'cbz', 'cbr', 'epub', 'azw3'],
'pdf': ['png', 'jpg', 'gif', 'tiff', 'bmp', 'webp', 'docx', 'odt', 'pptx', 'odp', 'csv', 'xlsx', 'txt', 'rtf', 'md', 'html', 'xml', 'pdfa', 'pdfx', 'cbz', 'cbr', 'epub', 'azw3'],
'cbz': ['pdf'],
'docx': ['pdf'], 'doc': ['pdf'], 'odt': ['pdf'],
'xlsx': ['pdf'], 'xls': ['pdf'], 'ods': ['pdf'],
@@ -162,6 +165,7 @@ export const EXTENSION_TO_ENDPOINT: Record<string, Record<string, string>> = {
'docx': 'pdf-to-word', 'odt': 'pdf-to-word',
'pptx': 'pdf-to-presentation', 'odp': 'pdf-to-presentation',
'csv': 'pdf-to-csv',
'xlsx': 'pdf-to-xlsx',
'txt': 'pdf-to-text', 'rtf': 'pdf-to-text', 'md': 'pdf-to-markdown',
'html': 'pdf-to-html', 'xml': 'pdf-to-xml',
'pdfa': 'pdf-to-pdfa',

View File

@@ -916,6 +916,7 @@ export function useTranslatedToolCatalog(): TranslatedToolCatalog {
"markdown-to-pdf",
"file-to-pdf",
"pdf-to-csv",
"pdf-to-xlsx",
"pdf-to-markdown",
"pdf-to-pdfa",
"eml-to-pdf",

View File

@@ -21,8 +21,8 @@ export const shouldProcessFilesSeparately = (
(parameters.fromExtension === 'pdf' && isImageFormat(parameters.toExtension)) ||
// PDF to PDF/A and PDF/X conversions (each PDF should be processed separately)
(parameters.fromExtension === 'pdf' && (parameters.toExtension === 'pdfa' || parameters.toExtension === 'pdfx')) ||
// PDF to text-like formats should be one output per input
(parameters.fromExtension === 'pdf' && ['txt', 'rtf', 'csv'].includes(parameters.toExtension)) ||
// PDF to text-like/spreadsheet formats should be one output per input
(parameters.fromExtension === 'pdf' && ['txt', 'rtf', 'csv', 'xlsx'].includes(parameters.toExtension)) ||
// PDF to CBR conversions (each PDF should generate its own archive)
(parameters.fromExtension === 'pdf' && parameters.toExtension === 'cbr') ||
// PDF to EPUB/AZW3 conversions (each PDF should generate its own ebook)
@@ -85,6 +85,8 @@ export const buildConvertFormData = (parameters: ConvertParameters, selectedFile
formData.append("outputFormat", pdfxOptions?.outputFormat || 'pdfx');
} else if (fromExtension === 'pdf' && toExtension === 'csv') {
formData.append("pageNumbers", "all");
} else if (fromExtension === 'pdf' && toExtension === 'xlsx') {
formData.append("pageNumbers", "all");
} else if (fromExtension === 'cbr' && toExtension === 'pdf') {
formData.append("optimizeForEbook", cbrOptions.optimizeForEbook.toString());
} else if (fromExtension === 'pdf' && toExtension === 'cbr') {

View File

@@ -107,6 +107,13 @@ const ALL_CONVERSION_ENDPOINTS: ConversionEndpoint[] = [
description: 'Extract CSV data from PDF',
apiPath: '/api/v1/convert/pdf/csv'
},
{
endpoint: 'pdf-to-xlsx',
fromFormat: 'pdf',
toFormat: 'xlsx',
description: 'Extract Excel spreadsheet from PDF',
apiPath: '/api/v1/convert/pdf/xlsx'
},
{
endpoint: 'pdf-to-markdown',
fromFormat: 'pdf',

View File

@@ -23,6 +23,7 @@ export const URL_TO_TOOL_MAP: Record<string, ToolId> = {
'/html-to-pdf': 'convert',
'/markdown-to-pdf': 'convert',
'/pdf-to-csv': 'convert',
'/pdf-to-xlsx': 'convert',
'/pdf-to-img': 'convert',
'/pdf-to-markdown': 'convert',
'/pdf-to-pdfa': 'convert',

View File

@@ -143,6 +143,7 @@ export default function AdminEndpointsSection() {
'ocr-pdf',
'overlay-pdf',
'pdf-to-csv',
'pdf-to-xlsx',
'pdf-to-epub',
'pdf-to-html',
'pdf-to-img',