release notes and ghostscript removal

This commit is contained in:
Anthony Stirling 2024-11-26 19:48:42 +00:00
parent 73e64e5898
commit 6ec2c34c2b
20 changed files with 918 additions and 501 deletions

View File

@ -30,6 +30,7 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et
tini \
bash \
curl \
qpdf \
shadow \
su-exec \
openssl \
@ -40,7 +41,6 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et
# pdftohtml
poppler-utils \
# OCR MY PDF (unpaper for descew and other advanced features)
ocrmypdf \
tesseract-ocr-data-eng \
# CV
py3-opencv \

View File

@ -24,7 +24,7 @@ ext {
}
group = "stirling.software"
version = "0.33.2"
version = "0.33.1"
java {
// 17 is lowest but we support and recommend 21

View File

@ -188,7 +188,7 @@ public class EndpointConfiguration {
addEndpointToGroup("OpenCV", "extract-image-scans");
// LibreOffice
addEndpointToGroup("LibreOffice", "repair");
addEndpointToGroup("qpdf", "repair");
addEndpointToGroup("LibreOffice", "file-to-pdf");
addEndpointToGroup("LibreOffice", "pdf-to-word");
addEndpointToGroup("LibreOffice", "pdf-to-presentation");
@ -199,10 +199,10 @@ public class EndpointConfiguration {
// Unoconv
addEndpointToGroup("Unoconv", "file-to-pdf");
// OCRmyPDF
addEndpointToGroup("OCRmyPDF", "compress-pdf");
addEndpointToGroup("OCRmyPDF", "pdf-to-pdfa");
addEndpointToGroup("OCRmyPDF", "ocr-pdf");
// qpdf
addEndpointToGroup("qpdf", "compress-pdf");
addEndpointToGroup("qpdf", "pdf-to-pdfa");
addEndpointToGroup("qpdf", "ocr-pdf");
// Java
addEndpointToGroup("Java", "merge-pdfs");
@ -249,9 +249,9 @@ public class EndpointConfiguration {
addEndpointToGroup("Javascript", "adjust-contrast");
// Ghostscript dependent endpoints
addEndpointToGroup("Ghostscript", "compress-pdf");
addEndpointToGroup("Ghostscript", "pdf-to-pdfa");
addEndpointToGroup("Ghostscript", "repair");
addEndpointToGroup("qpdf", "compress-pdf");
addEndpointToGroup("qpdf", "pdf-to-pdfa");
addEndpointToGroup("qpdf", "repair");
// Weasyprint dependent endpoints
addEndpointToGroup("Weasyprint", "html-to-pdf");

View File

@ -37,12 +37,11 @@ public class ExternalAppDepConfig {
private final Map<String, List<String>> commandToGroupMapping =
new HashMap<>() {
{
put("gs", List.of("Ghostscript"));
put("soffice", List.of("LibreOffice"));
put("ocrmypdf", List.of("OCRmyPDF"));
put("weasyprint", List.of("Weasyprint"));
put("pdftohtml", List.of("Pdftohtml"));
put("unoconv", List.of("Unoconv"));
put("qpdf", List.of("qpdf"));
}
};
@ -97,9 +96,8 @@ public class ExternalAppDepConfig {
public void checkDependencies() {
// Check core dependencies
checkDependencyAndDisableGroup("gs");
checkDependencyAndDisableGroup("soffice");
checkDependencyAndDisableGroup("ocrmypdf");
checkDependencyAndDisableGroup("qpdf");
checkDependencyAndDisableGroup("weasyprint");
checkDependencyAndDisableGroup("pdftohtml");
checkDependencyAndDisableGroup("unoconv");

View File

@ -1,12 +1,13 @@
package stirling.software.SPDF.controller.api.converters;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.MediaType;
@ -37,59 +38,90 @@ public class ConvertPDFToPDFA {
@Operation(
summary = "Convert a PDF to a PDF/A",
description =
"This endpoint converts a PDF file to a PDF/A file. PDF/A is a format designed for long-term archiving of digital documents. Input:PDF Output:PDF Type:SISO")
"This endpoint converts a PDF file to a PDF/A file using LibreOffice. PDF/A is a format designed for long-term archiving of digital documents. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> pdfToPdfA(@ModelAttribute PdfToPdfARequest request)
throws Exception {
MultipartFile inputFile = request.getFileInput();
String outputFormat = request.getOutputFormat();
// Convert MultipartFile to byte[]
byte[] pdfBytes = inputFile.getBytes();
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
try (OutputStream outputStream = new FileOutputStream(tempInputFile.toFile())) {
outputStream.write(pdfBytes);
// Validate input file type
if (!"application/pdf".equals(inputFile.getContentType())) {
logger.error("Invalid input file type: {}", inputFile.getContentType());
throw new IllegalArgumentException("Input file must be a PDF");
}
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Prepare the ghostscript command
List<String> command = new ArrayList<>();
command.add("gs");
command.add("-dPDFA=" + ("pdfa".equals(outputFormat) ? "2" : "1"));
command.add("-dNOPAUSE");
command.add("-dBATCH");
command.add("-sColorConversionStrategy=sRGB");
command.add("-sDEVICE=pdfwrite");
command.add("-dPDFACompatibilityPolicy=2");
command.add("-o");
command.add(tempOutputFile.toString());
command.add(tempInputFile.toString());
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
.runCommandWithOutputHandling(command);
if (returnCode.getRc() != 0) {
logger.info(
outputFormat + " conversion failed with return code: " + returnCode.getRc());
// Get the original filename without extension
String originalFileName = Filenames.toSimpleFileName(inputFile.getOriginalFilename());
if (originalFileName == null || originalFileName.trim().isEmpty()) {
originalFileName = "output.pdf";
}
String baseFileName =
originalFileName.contains(".")
? originalFileName.substring(0, originalFileName.lastIndexOf('.'))
: originalFileName;
Path tempInputFile = null;
Path tempOutputDir = null;
byte[] fileBytes;
try {
byte[] pdfBytesOutput = Files.readAllBytes(tempOutputFile);
// Return the optimized PDF as a response
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_PDFA.pdf";
// Save uploaded file to temp location
tempInputFile = Files.createTempFile("input_", ".pdf");
inputFile.transferTo(tempInputFile);
// Create temp output directory
tempOutputDir = Files.createTempDirectory("output_");
// Determine PDF/A filter based on requested format
String pdfFilter =
"pdfa".equals(outputFormat)
? "writer_pdf_Export:{'SelectPdfVersion':{'Value':'2'}}:writer_pdf_Export"
: "writer_pdf_Export:{'SelectPdfVersion':{'Value':'1'}}:writer_pdf_Export";
// Prepare LibreOffice command
List<String> command =
new ArrayList<>(
Arrays.asList(
"soffice",
"--headless",
"--nologo",
"--convert-to",
"pdf:" + pdfFilter,
"--outdir",
tempOutputDir.toString(),
tempInputFile.toString()));
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
.runCommandWithOutputHandling(command);
if (returnCode.getRc() != 0) {
logger.error("PDF/A conversion failed with return code: {}", returnCode.getRc());
throw new RuntimeException("PDF/A conversion failed");
}
// Get the output file
File[] outputFiles = tempOutputDir.toFile().listFiles();
if (outputFiles == null || outputFiles.length != 1) {
throw new RuntimeException(
"Expected exactly one output file but found "
+ (outputFiles == null ? "none" : outputFiles.length));
}
fileBytes = FileUtils.readFileToByteArray(outputFiles[0]);
String outputFilename = baseFileName + "_PDFA.pdf";
return WebResponseUtils.bytesToWebResponse(
pdfBytesOutput, outputFilename, MediaType.APPLICATION_PDF);
fileBytes, outputFilename, MediaType.APPLICATION_PDF);
} finally {
// Clean up the temporary files
Files.deleteIfExists(tempInputFile);
Files.deleteIfExists(tempOutputFile);
// Clean up temporary files
if (tempInputFile != null) {
Files.deleteIfExists(tempInputFile);
}
if (tempOutputDir != null) {
FileUtils.deleteDirectory(tempOutputDir.toFile());
}
}
}
}

View File

@ -20,7 +20,7 @@ import org.springframework.web.bind.annotation.RestController;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.controller.api.CropController;
import stirling.software.SPDF.model.api.extract.PDFFilePage;
import stirling.software.SPDF.pdf.FlexibleCSVWriter;
import technology.tabula.ObjectExtractor;
@ -37,11 +37,15 @@ public class ExtractCSVController {
private static final Logger logger = LoggerFactory.getLogger(ExtractCSVController.class);
@PostMapping(value = "/pdf/csv", consumes = "multipart/form-data")
@Operation(summary = "Extracts a CSV document from a PDF", description = "This operation takes an input PDF file and returns CSV file of whole page. Input:PDF Output:CSV Type:SISO")
@Operation(
summary = "Extracts a CSV document from a PDF",
description =
"This operation takes an input PDF file and returns CSV file of whole page. Input:PDF Output:CSV Type:SISO")
public ResponseEntity<String> PdfToCsv(@ModelAttribute PDFFilePage form) throws Exception {
StringWriter writer = new StringWriter();
try (PDDocument document = Loader.loadPDF(form.getFileInput().getBytes())) {
CSVFormat format = CSVFormat.EXCEL.builder().setEscape('"').setQuoteMode(QuoteMode.ALL).build();
CSVFormat format =
CSVFormat.EXCEL.builder().setEscape('"').setQuoteMode(QuoteMode.ALL).build();
Writer csvWriter = new FlexibleCSVWriter(format);
SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
try (ObjectExtractor extractor = new ObjectExtractor(document)) {
@ -56,8 +60,8 @@ public class ExtractCSVController {
ContentDisposition.builder("attachment")
.filename(
form.getFileInput()
.getOriginalFilename()
.replaceFirst("[.][^.]+$", "")
.getOriginalFilename()
.replaceFirst("[.][^.]+$", "")
+ "_extracted.csv")
.build());
headers.setContentType(MediaType.parseMediaType("text/csv"));

View File

@ -10,7 +10,6 @@ import java.util.List;
import javax.imageio.ImageIO;
import org.apache.commons.io.FileUtils;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
@ -53,6 +52,54 @@ public class CompressController {
this.pdfDocumentFactory = pdfDocumentFactory;
}
private void compressImagesInPDF(Path pdfFile, double initialScaleFactor) throws Exception {
byte[] fileBytes = Files.readAllBytes(pdfFile);
try (PDDocument doc = Loader.loadPDF(fileBytes)) {
double scaleFactor = initialScaleFactor;
for (PDPage page : doc.getPages()) {
PDResources res = page.getResources();
if (res != null && res.getXObjectNames() != null) {
for (COSName name : res.getXObjectNames()) {
PDXObject xobj = res.getXObject(name);
if (xobj instanceof PDImageXObject) {
PDImageXObject image = (PDImageXObject) xobj;
BufferedImage bufferedImage = image.getImage();
int newWidth = (int) (bufferedImage.getWidth() * scaleFactor);
int newHeight = (int) (bufferedImage.getHeight() * scaleFactor);
if (newWidth == 0 || newHeight == 0) {
continue;
}
Image scaledImage =
bufferedImage.getScaledInstance(
newWidth, newHeight, Image.SCALE_SMOOTH);
BufferedImage scaledBufferedImage =
new BufferedImage(
newWidth, newHeight, BufferedImage.TYPE_INT_RGB);
scaledBufferedImage.getGraphics().drawImage(scaledImage, 0, 0, null);
ByteArrayOutputStream compressedImageStream =
new ByteArrayOutputStream();
ImageIO.write(scaledBufferedImage, "jpeg", compressedImageStream);
byte[] imageBytes = compressedImageStream.toByteArray();
compressedImageStream.close();
PDImageXObject compressedImage =
PDImageXObject.createFromByteArray(
doc, imageBytes, image.getCOSObject().toString());
res.put(name, compressedImage);
}
}
}
}
doc.save(pdfFile.toString());
}
}
@PostMapping(consumes = "multipart/form-data", value = "/compress-pdf")
@Operation(
summary = "Optimize PDF file",
@ -75,209 +122,92 @@ public class CompressController {
autoMode = true;
}
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
inputFile.transferTo(tempInputFile.toFile());
long inputFileSize = Files.size(tempInputFile);
// Prepare the output file path
Path tempOutputFile = null;
byte[] pdfBytes;
try {
tempOutputFile = Files.createTempFile("output_", ".pdf");
// Determine initial optimization level based on expected size reduction, only if in
// autoMode
if (autoMode) {
double sizeReductionRatio = expectedOutputSize / (double) inputFileSize;
if (sizeReductionRatio > 0.7) {
optimizeLevel = 1;
} else if (sizeReductionRatio > 0.5) {
optimizeLevel = 2;
} else if (sizeReductionRatio > 0.35) {
optimizeLevel = 3;
} else {
optimizeLevel = 3;
}
optimizeLevel = determineOptimizeLevel(sizeReductionRatio);
}
boolean sizeMet = false;
while (!sizeMet && optimizeLevel <= 4) {
// Prepare the Ghostscript command
List<String> command = new ArrayList<>();
command.add("gs");
command.add("-sDEVICE=pdfwrite");
command.add("-dCompatibilityLevel=1.5");
while (!sizeMet && optimizeLevel <= 9) {
switch (optimizeLevel) {
case 1:
command.add("-dPDFSETTINGS=/prepress");
break;
case 2:
command.add("-dPDFSETTINGS=/printer");
break;
case 3:
command.add("-dPDFSETTINGS=/ebook");
break;
case 4:
command.add("-dPDFSETTINGS=/screen");
break;
default:
command.add("-dPDFSETTINGS=/default");
// Apply additional image compression for levels 6-9
if (optimizeLevel >= 6) {
// Calculate scale factor based on optimization level
double scaleFactor =
switch (optimizeLevel) {
case 6 -> 0.9; // 90% of original size
case 7 -> 0.8; // 80% of original size
case 8 -> 0.65; // 70% of original size
case 9 -> 0.5; // 60% of original size
default -> 1.0;
};
compressImagesInPDF(tempInputFile, scaleFactor);
}
command.add("-dNOPAUSE");
command.add("-dQUIET");
command.add("-dBATCH");
command.add("-sOutputFile=" + tempOutputFile.toString());
// Run QPDF optimization
List<String> command = new ArrayList<>();
command.add("qpdf");
if (request.getNormalize()) {
command.add("--normalize-content=y");
}
if (request.getLinearize()) {
command.add("--linearize");
}
command.add("--optimize-images");
command.add("--recompress-flate");
command.add("--compression-level=" + optimizeLevel);
command.add("--compress-streams=y");
command.add("--object-streams=generate");
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
.runCommandWithOutputHandling(command);
ProcessExecutorResult returnCode = null;
try {
returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
.runCommandWithOutputHandling(command);
} catch (Exception e) {
if (returnCode != null && returnCode.getRc() != 3) {
throw e;
}
}
// Check if file size is within expected size or not auto mode so instantly finish
// Check if file size is within expected size or not auto mode
long outputFileSize = Files.size(tempOutputFile);
if (outputFileSize <= expectedOutputSize || !autoMode) {
sizeMet = true;
} else {
// Increase optimization level for next iteration
optimizeLevel++;
if (autoMode && optimizeLevel > 4) {
logger.info("Skipping level 5 due to bad results in auto mode");
optimizeLevel =
incrementOptimizeLevel(
optimizeLevel, outputFileSize, expectedOutputSize);
if (autoMode && optimizeLevel > 9) {
logger.info("Maximum compression level reached in auto mode");
sizeMet = true;
} else {
logger.info(
"Increasing ghostscript optimisation level to " + optimizeLevel);
}
}
}
if (expectedOutputSize != null && autoMode) {
long outputFileSize = Files.size(tempOutputFile);
byte[] fileBytes = Files.readAllBytes(tempOutputFile);
if (outputFileSize > expectedOutputSize) {
try (PDDocument doc = Loader.loadPDF(fileBytes)) {
long previousFileSize = 0;
double scaleFactorConst = 0.9f;
double scaleFactor = 0.9f;
while (true) {
for (PDPage page : doc.getPages()) {
PDResources res = page.getResources();
if (res != null && res.getXObjectNames() != null) {
for (COSName name : res.getXObjectNames()) {
PDXObject xobj = res.getXObject(name);
if (xobj != null && xobj instanceof PDImageXObject) {
PDImageXObject image = (PDImageXObject) xobj;
// Get the image in BufferedImage format
BufferedImage bufferedImage = image.getImage();
// Calculate the new dimensions
int newWidth =
(int)
(bufferedImage.getWidth()
* scaleFactorConst);
int newHeight =
(int)
(bufferedImage.getHeight()
* scaleFactorConst);
// If the new dimensions are zero, skip this iteration
if (newWidth == 0 || newHeight == 0) {
continue;
}
// Otherwise, proceed with the scaling
Image scaledImage =
bufferedImage.getScaledInstance(
newWidth,
newHeight,
Image.SCALE_SMOOTH);
// Convert the scaled image back to a BufferedImage
BufferedImage scaledBufferedImage =
new BufferedImage(
newWidth,
newHeight,
BufferedImage.TYPE_INT_RGB);
scaledBufferedImage
.getGraphics()
.drawImage(scaledImage, 0, 0, null);
// Compress the scaled image
ByteArrayOutputStream compressedImageStream =
new ByteArrayOutputStream();
ImageIO.write(
scaledBufferedImage,
"jpeg",
compressedImageStream);
byte[] imageBytes = compressedImageStream.toByteArray();
compressedImageStream.close();
PDImageXObject compressedImage =
PDImageXObject.createFromByteArray(
doc,
imageBytes,
image.getCOSObject().toString());
// Replace the image in the resources with the
// compressed
// version
res.put(name, compressedImage);
}
}
}
}
// save the document to tempOutputFile again
doc.save(tempOutputFile.toString());
long currentSize = Files.size(tempOutputFile);
// Check if the overall PDF size is still larger than expectedOutputSize
if (currentSize > expectedOutputSize) {
// Log the current file size and scaleFactor
logger.info(
"Current file size: "
+ FileUtils.byteCountToDisplaySize(currentSize));
logger.info("Current scale factor: " + scaleFactor);
// The file is still too large, reduce scaleFactor and try again
scaleFactor *= 0.9f; // reduce scaleFactor by 10%
// Avoid scaleFactor being too small, causing the image to shrink to
// 0
if (scaleFactor < 0.2f || previousFileSize == currentSize) {
throw new RuntimeException(
"Could not reach the desired size without excessively degrading image quality, lowest size recommended is "
+ FileUtils.byteCountToDisplaySize(currentSize)
+ ", "
+ currentSize
+ " bytes");
}
previousFileSize = currentSize;
} else {
// The file is small enough, break the loop
break;
}
}
}
}
}
// Read the optimized PDF file
pdfBytes = Files.readAllBytes(tempOutputFile);
Path finalFile = tempOutputFile;
// Check if optimized file is larger than the original
if (pdfBytes.length > inputFileSize) {
// Log the occurrence
logger.warn(
"Optimized file is larger than the original. Returning the original file instead.");
// Read the original file again
finalFile = tempInputFile;
}
// Return the optimized PDF as a response
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
@ -286,10 +216,31 @@ public class CompressController {
pdfDocumentFactory.load(finalFile.toFile()), outputFilename);
} finally {
// Clean up the temporary files
// deleted by multipart file handler deu to transferTo?
// Files.deleteIfExists(tempInputFile);
Files.deleteIfExists(tempOutputFile);
}
}
private int determineOptimizeLevel(double sizeReductionRatio) {
if (sizeReductionRatio > 0.9) return 1;
if (sizeReductionRatio > 0.8) return 2;
if (sizeReductionRatio > 0.7) return 3;
if (sizeReductionRatio > 0.6) return 4;
if (sizeReductionRatio > 0.5) return 5;
if (sizeReductionRatio > 0.4) return 6;
if (sizeReductionRatio > 0.3) return 7;
if (sizeReductionRatio > 0.2) return 8;
return 9;
}
private int incrementOptimizeLevel(int currentLevel, long currentSize, long targetSize) {
double currentRatio = currentSize / (double) targetSize;
logger.info("Current compression ratio: {}", String.format("%.2f", currentRatio));
if (currentRatio > 2.0) {
return Math.min(9, currentLevel + 3);
} else if (currentRatio > 1.5) {
return Math.min(9, currentLevel + 2);
}
return Math.min(9, currentLevel + 1);
}
}

View File

@ -1,19 +1,29 @@
package stirling.software.SPDF.controller.api.misc;
import java.io.ByteArrayInputStream;
import java.awt.image.BufferedImage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import javax.imageio.ImageIO;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.text.PDFTextStripper;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
@ -23,24 +33,29 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.ApplicationProperties;
import stirling.software.SPDF.model.api.misc.ProcessPdfWithOcrRequest;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@RequestMapping("/api/v1/misc")
@Tag(name = "Misc", description = "Miscellaneous APIs")
@Slf4j
public class OCRController {
@Autowired ApplicationProperties applicationProperties;
@Autowired private ApplicationProperties applicationProperties;
private final CustomPDDocumentFactory pdfDocumentFactory;
@Autowired
public OCRController(CustomPDDocumentFactory pdfDocumentFactory) {
this.pdfDocumentFactory = pdfDocumentFactory;
}
/** Gets the list of available Tesseract languages from the tessdata directory */
public List<String> getAvailableTesseractLanguages() {
String tessdataDir = applicationProperties.getSystem().getTessdataDir();
File[] files = new File(tessdataDir).listFiles();
@ -54,196 +69,161 @@ public class OCRController {
.collect(Collectors.toList());
}
private final CustomPDDocumentFactory pdfDocumentFactory;
@Autowired
public OCRController(CustomPDDocumentFactory pdfDocumentFactory) {
this.pdfDocumentFactory = pdfDocumentFactory;
}
@PostMapping(consumes = "multipart/form-data", value = "/ocr-pdf")
@Operation(
summary = "Process a PDF file with OCR",
description =
"This endpoint processes a PDF file using OCR (Optical Character Recognition). Users can specify languages, sidecar, deskew, clean, cleanFinal, ocrType, ocrRenderType, and removeImagesAfter options. Input:PDF Output:PDF Type:SI-Conditional")
public ResponseEntity<byte[]> processPdfWithOCR(
@ModelAttribute ProcessPdfWithOcrRequest request)
throws IOException, InterruptedException {
MultipartFile inputFile = request.getFileInput();
List<String> selectedLanguages = request.getLanguages();
Boolean sidecar = request.isSidecar();
Boolean deskew = request.isDeskew();
Boolean clean = request.isClean();
Boolean cleanFinal = request.isCleanFinal();
List<String> languages = request.getLanguages();
String ocrType = request.getOcrType();
String ocrRenderType = request.getOcrRenderType();
Boolean removeImagesAfter = request.isRemoveImagesAfter();
// --output-type pdfa
if (selectedLanguages == null || selectedLanguages.isEmpty()) {
throw new IOException("Please select at least one language.");
}
if (!"hocr".equals(ocrRenderType) && !"sandwich".equals(ocrRenderType)) {
throw new IOException("ocrRenderType wrong");
}
Path tempDir = Files.createTempDirectory("ocr_process");
Path tempInputFile = tempDir.resolve("input.pdf");
Path tempOutputDir = tempDir.resolve("output");
Path tempImagesDir = tempDir.resolve("images");
Path finalOutputFile = tempDir.resolve("final_output.pdf");
// Get available Tesseract languages
List<String> availableLanguages = getAvailableTesseractLanguages();
// Validate selected languages
selectedLanguages =
selectedLanguages.stream().filter(availableLanguages::contains).toList();
if (selectedLanguages.isEmpty()) {
throw new IOException("None of the selected languages are valid.");
}
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
Path sidecarTextPath = null;
Files.createDirectories(tempOutputDir);
Files.createDirectories(tempImagesDir);
try {
// Save input file
inputFile.transferTo(tempInputFile.toFile());
PDFMergerUtility merger = new PDFMergerUtility();
merger.setDestinationFileName(finalOutputFile.toString());
// Run OCR Command
String languageOption = String.join("+", selectedLanguages);
try (PDDocument document = pdfDocumentFactory.load(tempInputFile.toFile())) {
PDFRenderer pdfRenderer = new PDFRenderer(document);
int pageCount = document.getNumberOfPages();
List<String> command =
new ArrayList<>(
Arrays.asList(
"ocrmypdf",
"--verbose",
"2",
"--output-type",
"pdf",
"--pdf-renderer",
ocrRenderType));
for (int pageNum = 0; pageNum < pageCount; pageNum++) {
PDPage page = document.getPage(pageNum);
boolean hasText = false;
if (sidecar != null && sidecar) {
sidecarTextPath = Files.createTempFile("sidecar", ".txt");
command.add("--sidecar");
command.add(sidecarTextPath.toString());
}
// Check for existing text
try (PDDocument tempDoc = new PDDocument()) {
tempDoc.addPage(page);
PDFTextStripper stripper = new PDFTextStripper();
hasText = !stripper.getText(tempDoc).trim().isEmpty();
}
if (deskew != null && deskew) {
command.add("--deskew");
}
if (clean != null && clean) {
command.add("--clean");
}
if (cleanFinal != null && cleanFinal) {
command.add("--clean-final");
}
if (ocrType != null && !"".equals(ocrType)) {
if ("skip-text".equals(ocrType)) {
command.add("--skip-text");
} else if ("force-ocr".equals(ocrType)) {
command.add("--force-ocr");
} else if ("Normal".equals(ocrType)) {
boolean shouldOcr =
switch (ocrType) {
case "skip-text" -> !hasText;
case "force-ocr" -> true;
default -> true;
};
}
}
Path pageOutputPath =
tempOutputDir.resolve(String.format("page_%d.pdf", pageNum));
command.addAll(
Arrays.asList(
"--language",
languageOption,
tempInputFile.toString(),
tempOutputFile.toString()));
if (shouldOcr) {
// Convert page to image
BufferedImage image = pdfRenderer.renderImageWithDPI(pageNum, 300);
Path imagePath =
tempImagesDir.resolve(String.format("page_%d.png", pageNum));
ImageIO.write(image, "png", imagePath.toFile());
// Run CLI command
ProcessExecutorResult result =
ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
.runCommandWithOutputHandling(command);
if (result.getRc() != 0
&& result.getMessages().contains("multiprocessing/synchronize.py")
&& result.getMessages()
.contains("OSError: [Errno 38] Function not implemented")) {
command.add("--jobs");
command.add("1");
result =
ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
.runCommandWithOutputHandling(command);
}
// Build OCR command
List<String> command = new ArrayList<>();
command.add("tesseract");
command.add(imagePath.toString());
command.add(
tempOutputDir
.resolve(String.format("page_%d", pageNum))
.toString());
command.add("-l");
command.add(String.join("+", languages));
command.add("pdf"); // Always output PDF
// Remove images from the OCR processed PDF if the flag is set to true
if (removeImagesAfter != null && removeImagesAfter) {
Path tempPdfWithoutImages = Files.createTempFile("output_", "_no_images.pdf");
ProcessBuilder pb = new ProcessBuilder(command);
Process process = pb.start();
List<String> gsCommand =
Arrays.asList(
"gs",
"-sDEVICE=pdfwrite",
"-dFILTERIMAGE",
"-o",
tempPdfWithoutImages.toString(),
tempOutputFile.toString());
// Capture any error output
try (BufferedReader reader =
new BufferedReader(
new InputStreamReader(process.getErrorStream()))) {
String line;
while ((line = reader.readLine()) != null) {
log.debug("Tesseract: {}", line);
}
}
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
.runCommandWithOutputHandling(gsCommand);
tempOutputFile = tempPdfWithoutImages;
}
// Read the OCR processed PDF file
byte[] pdfBytes = pdfDocumentFactory.loadToBytes(tempOutputFile.toFile());
int exitCode = process.waitFor();
if (exitCode != 0) {
throw new RuntimeException(
"Tesseract failed with exit code: " + exitCode);
}
// Return the OCR processed PDF as a response
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_OCR.pdf";
if (sidecar != null && sidecar) {
// Create a zip file containing both the PDF and the text file
String outputZipFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_OCR.zip";
Path tempZipFile = Files.createTempFile("output_", ".zip");
try (ZipOutputStream zipOut =
new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
// Add PDF file to the zip
ZipEntry pdfEntry = new ZipEntry(outputFilename);
zipOut.putNextEntry(pdfEntry);
try (ByteArrayInputStream pdfInputStream = new ByteArrayInputStream(pdfBytes)) {
byte[] buffer = new byte[1024];
int length;
while ((length = pdfInputStream.read(buffer)) != -1) {
zipOut.write(buffer, 0, length);
// Add OCR'd PDF to merger
merger.addSource(pageOutputPath.toFile());
} else {
// Save original page without OCR
try (PDDocument pageDoc = new PDDocument()) {
pageDoc.addPage(page);
pageDoc.save(pageOutputPath.toFile());
merger.addSource(pageOutputPath.toFile());
}
}
zipOut.closeEntry();
// Add text file to the zip
ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
zipOut.putNextEntry(txtEntry);
Files.copy(sidecarTextPath, zipOut);
zipOut.closeEntry();
}
byte[] zipBytes = Files.readAllBytes(tempZipFile);
// Clean up the temporary zip file
Files.deleteIfExists(tempZipFile);
Files.deleteIfExists(tempOutputFile);
Files.deleteIfExists(sidecarTextPath);
// Return the zip file containing both the PDF and the text file
return WebResponseUtils.bytesToWebResponse(
zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
} else {
// Return the OCR processed PDF as a response
Files.deleteIfExists(tempOutputFile);
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
}
// Merge all pages into final PDF
merger.mergeDocuments(null);
// Read the final PDF file
byte[] pdfContent = Files.readAllBytes(finalOutputFile);
String outputFilename =
inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
return ResponseEntity.ok()
.header(
"Content-Disposition",
"attachment; filename=\"" + outputFilename + "\"")
.contentType(MediaType.APPLICATION_PDF)
.body(pdfContent);
} finally {
// Clean up the temporary files
Files.deleteIfExists(tempOutputFile);
// Comment out as transferTo makes multipart handle cleanup
// Files.deleteIfExists(tempInputFile);
if (sidecarTextPath != null) {
Files.deleteIfExists(sidecarTextPath);
// Clean up temporary files
deleteDirectory(tempDir);
}
}
private void addFileToZip(File file, String filename, ZipOutputStream zipOut)
throws IOException {
if (!file.exists()) {
log.warn("File {} does not exist, skipping", file);
return;
}
try (FileInputStream fis = new FileInputStream(file)) {
ZipEntry zipEntry = new ZipEntry(filename);
zipOut.putNextEntry(zipEntry);
byte[] buffer = new byte[1024];
int length;
while ((length = fis.read(buffer)) >= 0) {
zipOut.write(buffer, 0, length);
}
zipOut.closeEntry();
}
}
private void deleteDirectory(Path directory) {
try {
Files.walk(directory)
.sorted(Comparator.reverseOrder())
.forEach(
path -> {
try {
Files.delete(path);
} catch (IOException e) {
log.error("Error deleting {}: {}", path, e.getMessage());
}
});
} catch (IOException e) {
log.error("Error walking directory {}: {}", directory, e.getMessage());
}
}
}

View File

@ -56,14 +56,15 @@ public class RepairController {
try {
List<String> command = new ArrayList<>();
command.add("gs");
command.add("-o");
command.add(tempOutputFile.toString());
command.add("-sDEVICE=pdfwrite");
command.add("qpdf");
command.add("--replace-input"); // Automatically fixes problems it can
command.add("--qdf"); // Linearizes and normalizes PDF structure
command.add("--object-streams=disable"); // Can help with some corruptions
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
.runCommandWithOutputHandling(command);
// Read the optimized PDF file

View File

@ -98,10 +98,10 @@ public class CertSignController {
public CreateSignature(KeyStore keystore, char[] pin)
throws KeyStoreException,
UnrecoverableKeyException,
NoSuchAlgorithmException,
IOException,
CertificateException {
UnrecoverableKeyException,
NoSuchAlgorithmException,
IOException,
CertificateException {
super(keystore, pin);
ClassPathResource resource = new ClassPathResource("static/images/signature.png");
try (InputStream is = resource.getInputStream()) {
@ -160,7 +160,8 @@ public class CertSignController {
extState.setNonStrokingAlphaConstant(0.5f);
cs.setGraphicsStateParameters(extState);
cs.transform(Matrix.getScaleInstance(0.08f, 0.08f));
PDImageXObject img = PDImageXObject.createFromFileByExtension(logoFile, doc);
PDImageXObject img =
PDImageXObject.createFromFileByExtension(logoFile, doc);
cs.drawImage(img, 100, 0);
cs.restoreGraphicsState();
}
@ -208,7 +209,10 @@ public class CertSignController {
}
@PostMapping(consumes = "multipart/form-data", value = "/cert-sign")
@Operation(summary = "Sign PDF with a Digital Certificate", description = "This endpoint accepts a PDF file, a digital certificate and related information to sign the PDF. It then returns the digitally signed PDF file. Input:PDF Output:PDF Type:SISO")
@Operation(
summary = "Sign PDF with a Digital Certificate",
description =
"This endpoint accepts a PDF file, a digital certificate and related information to sign the PDF. It then returns the digitally signed PDF file. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> signPDFWithCert(@ModelAttribute SignPDFWithCertRequest request)
throws Exception {
MultipartFile pdf = request.getFileInput();
@ -238,7 +242,7 @@ public class CertSignController {
PrivateKey privateKey = getPrivateKeyFromPEM(privateKeyFile.getBytes(), password);
Certificate cert = (Certificate) getCertificateFromPEM(certFile.getBytes());
ks.setKeyEntry(
"alias", privateKey, password.toCharArray(), new Certificate[] { cert });
"alias", privateKey, password.toCharArray(), new Certificate[] {cert});
break;
case "PKCS12":
ks = KeyStore.getInstance("PKCS12");
@ -310,19 +314,22 @@ public class CertSignController {
private PrivateKey getPrivateKeyFromPEM(byte[] pemBytes, String password)
throws IOException, OperatorCreationException, PKCSException {
try (PEMParser pemParser = new PEMParser(new InputStreamReader(new ByteArrayInputStream(pemBytes)))) {
try (PEMParser pemParser =
new PEMParser(new InputStreamReader(new ByteArrayInputStream(pemBytes)))) {
Object pemObject = pemParser.readObject();
JcaPEMKeyConverter converter = new JcaPEMKeyConverter().setProvider("BC");
PrivateKeyInfo pkInfo;
if (pemObject instanceof PKCS8EncryptedPrivateKeyInfo) {
InputDecryptorProvider decProv = new JceOpenSSLPKCS8DecryptorProviderBuilder()
.build(password.toCharArray());
InputDecryptorProvider decProv =
new JceOpenSSLPKCS8DecryptorProviderBuilder().build(password.toCharArray());
pkInfo = ((PKCS8EncryptedPrivateKeyInfo) pemObject).decryptPrivateKeyInfo(decProv);
} else if (pemObject instanceof PEMEncryptedKeyPair) {
PEMDecryptorProvider decProv = new JcePEMDecryptorProviderBuilder().build(password.toCharArray());
pkInfo = ((PEMEncryptedKeyPair) pemObject)
.decryptKeyPair(decProv)
.getPrivateKeyInfo();
PEMDecryptorProvider decProv =
new JcePEMDecryptorProviderBuilder().build(password.toCharArray());
pkInfo =
((PEMEncryptedKeyPair) pemObject)
.decryptKeyPair(decProv)
.getPrivateKeyInfo();
} else {
pkInfo = ((PEMKeyPair) pemObject).getPrivateKeyInfo();
}

View File

@ -55,6 +55,11 @@ public class HomeWebController {
return "licenses";
}
@GetMapping("/releases")
public String getReleaseNotes(Model model) {
return "releases";
}
@GetMapping("/")
public String home(Model model) {
model.addAttribute("currentPage", "home");

View File

@ -18,4 +18,15 @@ public class OptimizePdfRequest extends PDFFile {
@Schema(description = "The expected output size, e.g. '100MB', '25KB', etc.")
private String expectedOutputSize;
@Schema(
description = "Whether to linearize the PDF for faster web viewing. Default is false.",
defaultValue = "false")
private Boolean linearize = false;
@Schema(
description =
"Whether to normalize the PDF content for better compatibility. Default is true.",
defaultValue = "true")
private Boolean normalize = true;
}

View File

@ -15,18 +15,6 @@ public class ProcessPdfWithOcrRequest extends PDFFile {
@Schema(description = "List of languages to use in OCR processing")
private List<String> languages;
@Schema(description = "Include OCR text in a sidecar text file if set to true")
private boolean sidecar;
@Schema(description = "Deskew the input file if set to true")
private boolean deskew;
@Schema(description = "Clean the input file if set to true")
private boolean clean;
@Schema(description = "Clean the final output if set to true")
private boolean cleanFinal;
@Schema(
description = "Specify the OCR type, e.g., 'skip-text', 'force-ocr', or 'Normal'",
allowableValues = {"skip-text", "force-ocr", "Normal"})
@ -37,7 +25,4 @@ public class ProcessPdfWithOcrRequest extends PDFFile {
allowableValues = {"hocr", "sandwich"},
defaultValue = "hocr")
private String ocrRenderType = "hocr";
@Schema(description = "Remove images from the output PDF if set to true")
private boolean removeImagesAfter;
}

View File

@ -34,17 +34,15 @@ public class MetricsAggregatorService {
counter -> {
String method = counter.getId().getTag("method");
String uri = counter.getId().getTag("uri");
// Skip if either method or uri is null
if (method == null || uri == null) {
return;
}
String key = String.format(
"http_requests_%s_%s",
method,
uri.replace("/", "_")
);
String key =
String.format(
"http_requests_%s_%s", method, uri.replace("/", "_"));
double currentCount = counter.count();
double lastCount = lastSentMetrics.getOrDefault(key, 0.0);

View File

@ -31,7 +31,7 @@ public class PostHogService {
private final ApplicationProperties applicationProperties;
private final UserServiceInterface userService;
private final Environment env;
@Autowired
public PostHogService(
PostHog postHog,
@ -71,16 +71,16 @@ public class PostHogService {
Map<String, Object> metrics = new HashMap<>();
try {
//Application version
metrics.put("app_version", appVersion);
String deploymentType = "JAR"; // default
if ("true".equalsIgnoreCase(env.getProperty("BROWSER_OPEN"))) {
deploymentType = "EXE";
} else if (isRunningInDocker()) {
deploymentType = "DOCKER";
}
metrics.put("deployment_type", deploymentType);
// Application version
metrics.put("app_version", appVersion);
String deploymentType = "JAR"; // default
if ("true".equalsIgnoreCase(env.getProperty("BROWSER_OPEN"))) {
deploymentType = "EXE";
} else if (isRunningInDocker()) {
deploymentType = "DOCKER";
}
metrics.put("deployment_type", deploymentType);
// System info
metrics.put("os_name", System.getProperty("os.name"));
metrics.put("os_version", System.getProperty("os.version"));

View File

@ -29,12 +29,13 @@ public class ProcessExecutor {
public enum Processes {
LIBRE_OFFICE,
PDFTOHTML,
OCR_MY_PDF,
PYTHON_OPENCV,
GHOSTSCRIPT,
WEASYPRINT,
INSTALL_APP,
CALIBRE
CALIBRE,
IMAGEMAGICK,
TESSERACT,
QPDF
}
private static final Map<Processes, ProcessExecutor> instances = new ConcurrentHashMap<>();
@ -59,21 +60,11 @@ public class ProcessExecutor {
.getProcessExecutor()
.getSessionLimit()
.getPdfToHtmlSessionLimit();
case OCR_MY_PDF ->
applicationProperties
.getProcessExecutor()
.getSessionLimit()
.getOcrMyPdfSessionLimit();
case PYTHON_OPENCV ->
applicationProperties
.getProcessExecutor()
.getSessionLimit()
.getPythonOpenCvSessionLimit();
case GHOSTSCRIPT ->
applicationProperties
.getProcessExecutor()
.getSessionLimit()
.getGhostScriptSessionLimit();
case WEASYPRINT ->
applicationProperties
.getProcessExecutor()
@ -84,7 +75,7 @@ public class ProcessExecutor {
.getProcessExecutor()
.getSessionLimit()
.getInstallAppSessionLimit();
case CALIBRE ->
case CALIBRE, IMAGEMAGICK, TESSERACT, QPDF ->
applicationProperties
.getProcessExecutor()
.getSessionLimit()
@ -103,21 +94,11 @@ public class ProcessExecutor {
.getProcessExecutor()
.getTimeoutMinutes()
.getPdfToHtmlTimeoutMinutes();
case OCR_MY_PDF ->
applicationProperties
.getProcessExecutor()
.getTimeoutMinutes()
.getOcrMyPdfTimeoutMinutes();
case PYTHON_OPENCV ->
applicationProperties
.getProcessExecutor()
.getTimeoutMinutes()
.getPythonOpenCvTimeoutMinutes();
case GHOSTSCRIPT ->
applicationProperties
.getProcessExecutor()
.getTimeoutMinutes()
.getGhostScriptTimeoutMinutes();
case WEASYPRINT ->
applicationProperties
.getProcessExecutor()
@ -128,7 +109,7 @@ public class ProcessExecutor {
.getProcessExecutor()
.getTimeoutMinutes()
.getInstallAppTimeoutMinutes();
case CALIBRE ->
case CALIBRE, IMAGEMAGICK, TESSERACT, QPDF ->
applicationProperties
.getProcessExecutor()
.getTimeoutMinutes()

View File

@ -1260,3 +1260,10 @@ splitByChapters.desc.2=Bookmark Level: Choose the level of bookmarks to use for
splitByChapters.desc.3=Include Metadata: If checked, the original PDF's metadata will be included in each split PDF.
splitByChapters.desc.4=Allow Duplicates: If checked, allows multiple bookmarks on the same page to create separate PDFs.
splitByChapters.submit=Split PDF
#release notes
releases.title=Release Notes
releases.header=Release Notes
releases.current.version=Current Release
releases.note=All release notes are only available in english

View File

@ -29,9 +29,14 @@
<label for="optimizeLevel" th:text="#{compress.selectText.2}"></label>
<select name="optimizeLevel" id="optimizeLevel" class="form-control">
<option value="1">1</option>
<option value="2" selected>2</option>
<option value="2">2</option>
<option value="3">3</option>
<option value="4" th:text="#{compress.selectText.3}"></option>
<option value="4">4</option>
<option value="5" selected>5</option>
<option value="6">6</option>
<option value="7">7</option>
<option value="8">8</option>
<option value="9">9</option>
</select>
</div>
</div>

View File

@ -62,26 +62,6 @@
</div>
<br>
<label for="languages" class="form-label" th:text="#{ocr.selectText.9}"></label>
<div class="form-check ms-3">
<input type="checkbox" name="sidecar" id="sidecar" />
<label for="sidecar" th:text="#{ocr.selectText.2}"></label>
</div>
<div class="form-check ms-3">
<input type="checkbox" name="deskew" id="deskew" />
<label for="deskew" th:text="#{ocr.selectText.3}"></label>
</div>
<div class="form-check ms-3">
<input type="checkbox" name="clean" id="clean" />
<label for="clean" th:text="#{ocr.selectText.4}"></label>
</div>
<div class="form-check ms-3">
<input type="checkbox" name="clean-final" id="clean-final" />
<label for="clean-final" th:text="#{ocr.selectText.5}"></label>
</div>
<div class="form-check ms-3">
<input type="checkbox" name="removeImagesAfter" id="removeImagesAfter" />
<label for="removeImagesAfter" th:text="#{ocr.selectText.11}"></label>
</div>
<div class="mb-3">
<label th:text="#{ocr.selectText.12}"></label>
<select class="form-control" name="ocrRenderType">
@ -239,7 +219,6 @@
});
});
</script>
<p th:text="#{ocr.credit}"></p>
<p th:text="#{ocr.help}"></p>
<a href="https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToUseOCR.md">https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToUseOCR.md</a>
</div>

View File

@ -0,0 +1,473 @@
<!DOCTYPE html>
<html th:lang="${#locale.language}" th:dir="#{language.direction}" th:data-language="${#locale.toString()}" xmlns:th="https://www.thymeleaf.org">
<head>
<th:block th:insert="~{fragments/common :: head(title=#{releases.title}, header=#{releases.title})}"></th:block>
</head>
<body>
<div id="page-container">
<div id="content-wrap">
<th:block th:insert="~{fragments/navbar.html :: navbar}"></th:block>
<br><br>
<div class="container">
<div class="row justify-content-center">
<div class="col-md-8 bg-card">
<div class="tool-header">
<span class="material-symbols-rounded tool-header-icon history">update</span>
<span class="tool-header-text" th:text="#{releases.header}">Release Notes</span>
</div>
<div class="alert alert-info" role="alert">
<strong th:text="#{releases.current.version}">Current Installed Version</strong>:
<span id="currentVersion" th:text="${@appVersion}"></span>
</div>
<div class="alert alert-warning" role="alert">
<span th:text="#{releases.note}">All release notes are only available in English</span>
</div>
<div id="loading" class="text-center my-4">
<div class="spinner-border text-primary" role="status">
<span class="visually-hidden">Loading...</span>
</div>
</div>
<div id="error-message" class="alert alert-danger d-none" role="alert">
Failed to load release notes. Please try again later.
</div>
<!-- Release Notes Container -->
<div id="release-notes-container" class="release-notes-container">
<!-- Release notes will be dynamically inserted here -->
</div>
</div>
</div>
</div>
</div>
<th:block th:insert="~{fragments/footer.html :: footer}"></th:block>
</div>
<style>
.release-notes-container {
margin-top: 2rem;
}
.release-card {
border: 1px solid #dee2e6;
border-radius: 0.25rem;
margin-bottom: 1.5rem;
padding: 1rem;
}
.release-card.current-version {
border-color: #28a745;
background-color: rgba(40, 167, 69, 0.05);
}
.release-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 1rem;
}
.release-header h3 {
margin: 0;
display: flex;
gap: 1rem;
align-items: center;
}
.version {
font-weight: bold;
}
.release-date {
color: #6c757d;
font-size: 0.9em;
}
.release-body {
font-size: 0.9rem;
white-space: pre-wrap;
word-break: break-word;
}
.release-link {
color: #0d6efd;
text-decoration: none;
}
.release-link:hover {
text-decoration: underline;
}
</style>
<script th:inline="javascript">
/*<![CDATA[*/
// Get the current version from the appVersion bean
const appVersion = [[${@appVersion}]];
// GitHub API configuration
const REPO_OWNER = 'Stirling-Tools';
const REPO_NAME = 'Stirling-PDF';
const GITHUB_API = 'https://api.github.com/repos/' + REPO_OWNER + '/' + REPO_NAME;
const GITHUB_URL = 'https://github.com/' + REPO_OWNER + '/' + REPO_NAME;
const MAX_RELEASES = 8;
// Secure element creation helper
function createElement(tag, attributes = {}, children = []) {
const element = document.createElement(tag);
Object.entries(attributes).forEach(([key, value]) => {
if (typeof value === 'string') {
element.setAttribute(key, value);
}
});
children.forEach(child => {
if (typeof child === 'string') {
element.appendChild(document.createTextNode(child));
} else if (child instanceof Node) {
element.appendChild(child);
}
});
return element;
}
const ALLOWED_TAGS = {
'a': ['href', 'target', 'rel', 'class'],
'img': ['src', 'alt', 'width', 'height', 'style'],
'br': [],
'p': [],
'div': [],
'span': []
};
// Function to safely create HTML elements from string
function createSafeElement(htmlString) {
try {
const parser = new DOMParser();
const doc = parser.parseFromString(htmlString, 'text/html');
function sanitizeNode(node) {
// Safety check for null/undefined
if (!node) return null;
// Handle text nodes
if (node.nodeType === Node.TEXT_NODE) {
return node.cloneNode(true);
}
// Handle element nodes
if (node.nodeType === Node.ELEMENT_NODE) {
const tagName = node.tagName.toLowerCase();
// Check if tag is allowed
if (!ALLOWED_TAGS[tagName]) {
return document.createTextNode(node.textContent);
}
// Create new element
const cleanElement = document.createElement(tagName);
// Copy allowed attributes
const allowedAttributes = ALLOWED_TAGS[tagName];
Array.from(node.attributes).forEach(attr => {
if (allowedAttributes.includes(attr.name)) {
let value = attr.value;
if (attr.name === 'href' || attr.name === 'src') {
try {
value = encodeURI(value);
} catch {
return;
}
}
cleanElement.setAttribute(attr.name, value);
}
});
// Add security attributes for links
if (tagName === 'a') {
cleanElement.setAttribute('rel', 'noopener noreferrer');
}
// Process children
Array.from(node.childNodes).forEach(child => {
const cleanChild = sanitizeNode(child);
if (cleanChild) {
cleanElement.appendChild(cleanChild);
}
});
return cleanElement;
}
// If not text or element node, return null
return null;
}
// Get the actual content from the body
const content = doc.body.children;
if (!content || content.length === 0) {
return null;
}
// If it's a single element, process it directly
if (content.length === 1) {
return sanitizeNode(content[0]);
}
// If multiple elements, wrap them in a div
const wrapper = document.createElement('div');
Array.from(content).forEach(node => {
const cleanNode = sanitizeNode(node);
if (cleanNode) {
wrapper.appendChild(cleanNode);
}
});
return wrapper;
} catch (error) {
console.error('Error parsing HTML:', error);
return null;
}
}
function processGitHubReferences(text) {
if (!text || typeof text !== 'string') {
return text;
}
let currentText = text;
let match;
let lastIndex = 0;
const result = document.createElement('span');
const urlRegex = new RegExp('https://github\\.com/' + REPO_OWNER + '/' + REPO_NAME + '/(?:issues|pull)/(\\d+)', 'g');
while ((match = urlRegex.exec(text)) !== null) {
// Add text before the match
if (match.index > lastIndex) {
result.appendChild(document.createTextNode(text.substring(lastIndex, match.index)));
}
// Create link element
const link = document.createElement('a');
link.href = encodeURI(match[0]);
link.textContent = `#${match[1]}`; // Use issue/PR number
link.className = 'release-link';
link.target = '_blank';
link.rel = 'noopener noreferrer';
result.appendChild(link);
lastIndex = match.index + match[0].length;
}
// Add remaining text after last match
if (lastIndex < text.length) {
result.appendChild(document.createTextNode(text.substring(lastIndex)));
}
return result;
}
// Update formatText function to handle processGitHubReferences properly
function formatText(text) {
const container = document.createElement('div');
// Split the text into lines
const lines = text.split('\n');
let currentList = null;
lines.forEach(line => {
const trimmedLine = line.trim();
// Skip empty lines but add spacing
if (!trimmedLine) {
if (currentList) {
container.appendChild(currentList);
currentList = null;
}
container.appendChild(document.createElement('br'));
return;
}
// Check if the line is HTML
if (trimmedLine.startsWith('<') && trimmedLine.endsWith('>')) {
if (currentList) {
container.appendChild(currentList);
currentList = null;
}
const safeElement = createSafeElement(trimmedLine);
if (safeElement) {
container.appendChild(safeElement);
} else {
// If HTML parsing fails, treat as plain text
container.appendChild(document.createTextNode(trimmedLine));
}
return;
}
// Check for headers
const headerMatch = trimmedLine.match(/^(#{1,3})\s+(.+)$/);
if (headerMatch) {
if (currentList) {
container.appendChild(currentList);
currentList = null;
}
const headerLevel = headerMatch[1].length;
const headerContent = headerMatch[2];
// Process GitHub references in headers
const processedContent = processGitHubReferences(headerContent);
const header = createElement(`h${headerLevel}`);
header.appendChild(processedContent);
container.appendChild(header);
return;
}
// Check for bullet points
const bulletMatch = trimmedLine.match(/^[-*]\s+(.+)$/);
if (bulletMatch) {
if (!currentList) {
currentList = document.createElement('ul');
}
const listContent = bulletMatch[1];
const listItem = document.createElement('li');
// Process GitHub references in list items
listItem.appendChild(processGitHubReferences(listContent));
currentList.appendChild(listItem);
return;
}
// If we reach here and have a list, append it
if (currentList) {
container.appendChild(currentList);
currentList = null;
}
// Handle regular paragraph
const paragraph = document.createElement('p');
paragraph.appendChild(processGitHubReferences(trimmedLine));
container.appendChild(paragraph);
});
// Append any remaining list
if (currentList) {
container.appendChild(currentList);
}
return container;
}
const MAX_PREVIOUS_RELEASES = 5;
function compareVersions(v1, v2) {
const normalize = v => v.replace(/^v/, '');
const v1Parts = normalize(v1).split('.').map(Number);
const v2Parts = normalize(v2).split('.').map(Number);
for (let i = 0; i < Math.max(v1Parts.length, v2Parts.length); i++) {
const v1Part = v1Parts[i] || 0;
const v2Part = v2Parts[i] || 0;
if (v1Part > v2Part) return 1;
if (v1Part < v2Part) return -1;
}
return 0;
}
async function loadReleases() {
const container = document.getElementById('release-notes-container');
const loading = document.getElementById('loading');
const errorMessage = document.getElementById('error-message');
try {
loading.classList.remove('d-none');
errorMessage.classList.add('d-none');
// Clear container safely
while (container.firstChild) {
container.removeChild(container.firstChild);
}
const response = await fetch(GITHUB_API + '/releases');
if (!response.ok) throw new Error('Failed to fetch releases');
const releases = await response.json();
// Sort releases by version number (descending)
releases.sort((a, b) => compareVersions(b.tag_name, a.tag_name));
// Find index of current version
const currentVersionIndex = releases.findIndex(release =>
compareVersions(release.tag_name, 'v' + appVersion) === 0 ||
compareVersions(release.tag_name, appVersion) === 0
);
if (currentVersionIndex === -1) {
container.appendChild(createElement('div', {
class: 'alert alert-warning'
}, ['Current version not found in releases.']));
return;
}
// Get current version and 8 previous releases
const endIndex = Math.min(currentVersionIndex + MAX_PREVIOUS_RELEASES + 1, releases.length);
const relevantReleases = releases.slice(currentVersionIndex, endIndex);
if (relevantReleases.length === 0) {
container.appendChild(createElement('div', {
class: 'alert alert-warning'
}, ['No releases found.']));
return;
}
relevantReleases.forEach((release, index) => {
const isCurrentVersion = index === 0; // First release in the array is current version
const releaseCard = createElement('div', {
class: `release-card ${isCurrentVersion ? 'current-version' : ''}`
});
const header = createElement('div', { class: 'release-header' });
const h3 = createElement('h3', {}, [
createElement('span', { class: 'version' }, [release.tag_name]),
createElement('span', { class: 'release-date' }, [
new Date(release.created_at).toLocaleDateString()
])
]);
header.appendChild(h3);
if (isCurrentVersion) {
header.appendChild(createElement('span', {
class: 'badge bg-success'
}, ['Installed']));
}
releaseCard.appendChild(header);
const body = createElement('div', { class: 'release-body' });
body.appendChild(formatText(release.body || 'No release notes available.'));
releaseCard.appendChild(body);
container.appendChild(releaseCard);
});
} catch (error) {
console.error('Error loading releases:', error);
errorMessage.classList.remove('d-none');
} finally {
loading.classList.add('d-none');
}
}
// Load releases when the page loads
document.addEventListener('DOMContentLoaded', loadReleases);
/*]]>*/
</script>
</body>
</html>