This commit is contained in:
Anthony Stirling 2024-12-09 20:40:59 +00:00
parent a400fe6015
commit 0652299bec
5 changed files with 304 additions and 230 deletions

View File

@ -52,11 +52,18 @@ public class SplitPDFController {
"This endpoint splits a given PDF file into separate documents based on the specified page numbers or ranges. Users can specify pages using individual numbers, ranges, or 'all' for every page. Input:PDF Output:PDF Type:SIMO") "This endpoint splits a given PDF file into separate documents based on the specified page numbers or ranges. Users can specify pages using individual numbers, ranges, or 'all' for every page. Input:PDF Output:PDF Type:SIMO")
public ResponseEntity<byte[]> splitPdf(@ModelAttribute PDFWithPageNums request) public ResponseEntity<byte[]> splitPdf(@ModelAttribute PDFWithPageNums request)
throws IOException { throws IOException {
PDDocument document = null;
Path zipFile = null;
List<ByteArrayOutputStream> splitDocumentsBoas = new ArrayList<>();
try {
MultipartFile file = request.getFileInput(); MultipartFile file = request.getFileInput();
String pages = request.getPageNumbers(); String pages = request.getPageNumbers();
// open the pdf document // open the pdf document
PDDocument document = Loader.loadPDF(file.getBytes()); document = Loader.loadPDF(file.getBytes());
// PdfMetadata metadata = PdfMetadataService.extractMetadataFromPdf(document); // PdfMetadata metadata = PdfMetadataService.extractMetadataFromPdf(document);
int totalPages = document.getNumberOfPages(); int totalPages = document.getNumberOfPages();
List<Integer> pageNumbers = request.getPageNumbersList(document, false); List<Integer> pageNumbers = request.getPageNumbersList(document, false);
@ -71,7 +78,7 @@ public class SplitPDFController {
pageNumbers.stream().map(String::valueOf).collect(Collectors.joining(","))); pageNumbers.stream().map(String::valueOf).collect(Collectors.joining(",")));
// split the document // split the document
List<ByteArrayOutputStream> splitDocumentsBoas = new ArrayList<>(); splitDocumentsBoas = new ArrayList<>();
int previousPageNumber = 0; int previousPageNumber = 0;
for (int splitPoint : pageNumbers) { for (int splitPoint : pageNumbers) {
try (PDDocument splitDocument = try (PDDocument splitDocument =
@ -99,7 +106,7 @@ public class SplitPDFController {
// closing the original document // closing the original document
document.close(); document.close();
Path zipFile = Files.createTempFile("split_documents", ".zip"); zipFile = Files.createTempFile("split_documents", ".zip");
String filename = String filename =
Filenames.toSimpleFileName(file.getOriginalFilename()) Filenames.toSimpleFileName(file.getOriginalFilename())
@ -124,12 +131,36 @@ public class SplitPDFController {
throw e; throw e;
} }
logger.info("Successfully created zip file with split documents: {}", zipFile.toString()); logger.info(
"Successfully created zip file with split documents: {}", zipFile.toString());
byte[] data = Files.readAllBytes(zipFile); byte[] data = Files.readAllBytes(zipFile);
Files.deleteIfExists(zipFile); Files.deleteIfExists(zipFile);
// return the Resource in the response // return the Resource in the response
return WebResponseUtils.bytesToWebResponse( return WebResponseUtils.bytesToWebResponse(
data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM); data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
} finally {
try {
// Close the main document
if (document != null) {
document.close();
}
// Close all ByteArrayOutputStreams
for (ByteArrayOutputStream baos : splitDocumentsBoas) {
if (baos != null) {
baos.close();
}
}
// Delete temporary zip file
if (zipFile != null) {
Files.deleteIfExists(zipFile);
}
} catch (Exception e) {
logger.error("Error while cleaning up resources", e);
}
}
} }
} }

View File

@ -59,13 +59,17 @@ public class SplitPdfByChaptersController {
public ResponseEntity<byte[]> splitPdf(@ModelAttribute SplitPdfByChaptersRequest request) public ResponseEntity<byte[]> splitPdf(@ModelAttribute SplitPdfByChaptersRequest request)
throws Exception { throws Exception {
MultipartFile file = request.getFileInput(); MultipartFile file = request.getFileInput();
PDDocument sourceDocument = null;
Path zipFile = null;
try {
boolean includeMetadata = request.getIncludeMetadata(); boolean includeMetadata = request.getIncludeMetadata();
Integer bookmarkLevel = Integer bookmarkLevel =
request.getBookmarkLevel(); // levels start from 0 (top most bookmarks) request.getBookmarkLevel(); // levels start from 0 (top most bookmarks)
if (bookmarkLevel < 0) { if (bookmarkLevel < 0) {
return ResponseEntity.badRequest().body("Invalid bookmark level".getBytes()); return ResponseEntity.badRequest().body("Invalid bookmark level".getBytes());
} }
PDDocument sourceDocument = Loader.loadPDF(file.getBytes()); sourceDocument = Loader.loadPDF(file.getBytes());
PDDocumentOutline outline = sourceDocument.getDocumentCatalog().getDocumentOutline(); PDDocumentOutline outline = sourceDocument.getDocumentCatalog().getDocumentOutline();
@ -112,7 +116,7 @@ public class SplitPdfByChaptersController {
List<ByteArrayOutputStream> splitDocumentsBoas = List<ByteArrayOutputStream> splitDocumentsBoas =
getSplitDocumentsBoas(sourceDocument, bookmarks, includeMetadata); getSplitDocumentsBoas(sourceDocument, bookmarks, includeMetadata);
Path zipFile = createZipFile(bookmarks, splitDocumentsBoas); zipFile = createZipFile(bookmarks, splitDocumentsBoas);
byte[] data = Files.readAllBytes(zipFile); byte[] data = Files.readAllBytes(zipFile);
Files.deleteIfExists(zipFile); Files.deleteIfExists(zipFile);
@ -123,6 +127,18 @@ public class SplitPdfByChaptersController {
sourceDocument.close(); sourceDocument.close();
return WebResponseUtils.bytesToWebResponse( return WebResponseUtils.bytesToWebResponse(
data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM); data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
} finally {
try {
if (sourceDocument != null) {
sourceDocument.close();
}
if (zipFile != null) {
Files.deleteIfExists(zipFile);
}
} catch (Exception e) {
logger.error("Error while cleaning up resources", e);
}
}
} }
private List<Bookmark> mergeBookmarksThatCorrespondToSamePage(List<Bookmark> bookmarks) { private List<Bookmark> mergeBookmarksThatCorrespondToSamePage(List<Bookmark> bookmarks) {

View File

@ -105,15 +105,13 @@ public class SplitPdfBySectionsController {
if (sectionNum == horiz * verti) pageNum++; if (sectionNum == horiz * verti) pageNum++;
} }
} catch (Exception e) {
logger.error("exception", e);
} finally {
data = Files.readAllBytes(zipFile); data = Files.readAllBytes(zipFile);
Files.deleteIfExists(zipFile);
}
return WebResponseUtils.bytesToWebResponse( return WebResponseUtils.bytesToWebResponse(
data, filename + "_split.zip", MediaType.APPLICATION_OCTET_STREAM); data, filename + "_split.zip", MediaType.APPLICATION_OCTET_STREAM);
} finally {
Files.deleteIfExists(zipFile);
}
} }
public List<PDDocument> splitPdfPages( public List<PDDocument> splitPdfPages(

View File

@ -65,6 +65,12 @@ public class ConvertImgPDFController {
String colorType = request.getColorType(); String colorType = request.getColorType();
String dpi = request.getDpi(); String dpi = request.getDpi();
Path tempFile = null;
Path tempOutputDir = null;
Path tempPdfPath = null;
byte[] result = null;
try {
byte[] pdfBytes = file.getBytes(); byte[] pdfBytes = file.getBytes();
ImageType colorTypeResult = ImageType.RGB; ImageType colorTypeResult = ImageType.RGB;
if ("greyscale".equals(colorType)) { if ("greyscale".equals(colorType)) {
@ -74,7 +80,6 @@ public class ConvertImgPDFController {
} }
// returns bytes for image // returns bytes for image
boolean singleImage = "single".equals(singleOrMultiple); boolean singleImage = "single".equals(singleOrMultiple);
byte[] result = null;
String filename = String filename =
Filenames.toSimpleFileName(file.getOriginalFilename()) Filenames.toSimpleFileName(file.getOriginalFilename())
.replaceFirst("[.][^.]+$", ""); .replaceFirst("[.][^.]+$", "");
@ -82,7 +87,9 @@ public class ConvertImgPDFController {
result = result =
PdfUtils.convertFromPdf( PdfUtils.convertFromPdf(
pdfBytes, pdfBytes,
"webp".equalsIgnoreCase(imageFormat) ? "png" : imageFormat.toUpperCase(), "webp".equalsIgnoreCase(imageFormat)
? "png"
: imageFormat.toUpperCase(),
colorTypeResult, colorTypeResult,
singleImage, singleImage,
Integer.valueOf(dpi), Integer.valueOf(dpi),
@ -95,7 +102,7 @@ public class ConvertImgPDFController {
} else if ("webp".equalsIgnoreCase(imageFormat) } else if ("webp".equalsIgnoreCase(imageFormat)
&& CheckProgramInstall.isPythonAvailable()) { && CheckProgramInstall.isPythonAvailable()) {
// Write the output stream to a temp file // Write the output stream to a temp file
Path tempFile = Files.createTempFile("temp_png", ".png"); tempFile = Files.createTempFile("temp_png", ".png");
try (FileOutputStream fos = new FileOutputStream(tempFile.toFile())) { try (FileOutputStream fos = new FileOutputStream(tempFile.toFile())) {
fos.write(result); fos.write(result);
fos.flush(); fos.flush();
@ -108,7 +115,7 @@ public class ConvertImgPDFController {
command.add("./scripts/png_to_webp.py"); // Python script to handle the conversion command.add("./scripts/png_to_webp.py"); // Python script to handle the conversion
// Create a temporary directory for the output WebP files // Create a temporary directory for the output WebP files
Path tempOutputDir = Files.createTempDirectory("webp_output"); tempOutputDir = Files.createTempDirectory("webp_output");
if (singleImage) { if (singleImage) {
// Run the Python script to convert PNG to WebP // Run the Python script to convert PNG to WebP
command.add(tempFile.toString()); command.add(tempFile.toString());
@ -116,7 +123,7 @@ public class ConvertImgPDFController {
command.add("--single"); command.add("--single");
} else { } else {
// Save the uploaded PDF to a temporary file // Save the uploaded PDF to a temporary file
Path tempPdfPath = Files.createTempFile("temp_pdf", ".pdf"); tempPdfPath = Files.createTempFile("temp_pdf", ".pdf");
file.transferTo(tempPdfPath.toFile()); file.transferTo(tempPdfPath.toFile());
// Run the Python script to convert PDF to WebP // Run the Python script to convert PDF to WebP
command.add(tempPdfPath.toString()); command.add(tempPdfPath.toString());
@ -136,7 +143,8 @@ public class ConvertImgPDFController {
if (webpFiles.isEmpty()) { if (webpFiles.isEmpty()) {
logger.error("No WebP files were created in: {}", tempOutputDir.toString()); logger.error("No WebP files were created in: {}", tempOutputDir.toString());
throw new IOException("No WebP files were created. " + resultProcess.getMessages()); throw new IOException(
"No WebP files were created. " + resultProcess.getMessages());
} }
byte[] bodyBytes = new byte[0]; byte[] bodyBytes = new byte[0];
@ -172,6 +180,23 @@ public class ConvertImgPDFController {
return WebResponseUtils.bytesToWebResponse( return WebResponseUtils.bytesToWebResponse(
result, zipFilename, MediaType.APPLICATION_OCTET_STREAM); result, zipFilename, MediaType.APPLICATION_OCTET_STREAM);
} }
} finally {
try {
// Clean up temporary files
if (tempFile != null) {
Files.deleteIfExists(tempFile);
}
if (tempPdfPath != null) {
Files.deleteIfExists(tempPdfPath);
}
if (tempOutputDir != null) {
FileUtils.deleteDirectory(tempOutputDir.toFile());
}
} catch (Exception e) {
logger.error("Error cleaning up temporary files", e);
}
}
} }
@PostMapping(consumes = "multipart/form-data", value = "/img/pdf") @PostMapping(consumes = "multipart/form-data", value = "/img/pdf")

View File

@ -87,7 +87,7 @@ public class OCRController {
Files.createDirectories(tempOutputDir); Files.createDirectories(tempOutputDir);
Files.createDirectories(tempImagesDir); Files.createDirectories(tempImagesDir);
Process process = null;
try { try {
// Save input file // Save input file
inputFile.transferTo(tempInputFile.toFile()); inputFile.transferTo(tempInputFile.toFile());
@ -139,7 +139,7 @@ public class OCRController {
command.add("pdf"); // Always output PDF command.add("pdf"); // Always output PDF
ProcessBuilder pb = new ProcessBuilder(command); ProcessBuilder pb = new ProcessBuilder(command);
Process process = pb.start(); process = pb.start();
// Capture any error output // Capture any error output
try (BufferedReader reader = try (BufferedReader reader =
@ -188,6 +188,10 @@ public class OCRController {
.body(pdfContent); .body(pdfContent);
} finally { } finally {
if (process != null) {
process.destroy();
}
// Clean up temporary files // Clean up temporary files
deleteDirectory(tempDir); deleteDirectory(tempDir);
} }