This commit is contained in:
Anthony Stirling 2024-12-09 20:40:59 +00:00
parent a400fe6015
commit 0652299bec
5 changed files with 304 additions and 230 deletions

View File

@ -52,11 +52,18 @@ public class SplitPDFController {
"This endpoint splits a given PDF file into separate documents based on the specified page numbers or ranges. Users can specify pages using individual numbers, ranges, or 'all' for every page. Input:PDF Output:PDF Type:SIMO")
public ResponseEntity<byte[]> splitPdf(@ModelAttribute PDFWithPageNums request)
throws IOException {
PDDocument document = null;
Path zipFile = null;
List<ByteArrayOutputStream> splitDocumentsBoas = new ArrayList<>();
try {
MultipartFile file = request.getFileInput();
String pages = request.getPageNumbers();
// open the pdf document
PDDocument document = Loader.loadPDF(file.getBytes());
document = Loader.loadPDF(file.getBytes());
// PdfMetadata metadata = PdfMetadataService.extractMetadataFromPdf(document);
int totalPages = document.getNumberOfPages();
List<Integer> pageNumbers = request.getPageNumbersList(document, false);
@ -71,7 +78,7 @@ public class SplitPDFController {
pageNumbers.stream().map(String::valueOf).collect(Collectors.joining(",")));
// split the document
List<ByteArrayOutputStream> splitDocumentsBoas = new ArrayList<>();
splitDocumentsBoas = new ArrayList<>();
int previousPageNumber = 0;
for (int splitPoint : pageNumbers) {
try (PDDocument splitDocument =
@ -99,7 +106,7 @@ public class SplitPDFController {
// closing the original document
document.close();
Path zipFile = Files.createTempFile("split_documents", ".zip");
zipFile = Files.createTempFile("split_documents", ".zip");
String filename =
Filenames.toSimpleFileName(file.getOriginalFilename())
@ -124,12 +131,36 @@ public class SplitPDFController {
throw e;
}
logger.info("Successfully created zip file with split documents: {}", zipFile.toString());
logger.info(
"Successfully created zip file with split documents: {}", zipFile.toString());
byte[] data = Files.readAllBytes(zipFile);
Files.deleteIfExists(zipFile);
// return the Resource in the response
return WebResponseUtils.bytesToWebResponse(
data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
} finally {
try {
// Close the main document
if (document != null) {
document.close();
}
// Close all ByteArrayOutputStreams
for (ByteArrayOutputStream baos : splitDocumentsBoas) {
if (baos != null) {
baos.close();
}
}
// Delete temporary zip file
if (zipFile != null) {
Files.deleteIfExists(zipFile);
}
} catch (Exception e) {
logger.error("Error while cleaning up resources", e);
}
}
}
}

View File

@ -59,13 +59,17 @@ public class SplitPdfByChaptersController {
public ResponseEntity<byte[]> splitPdf(@ModelAttribute SplitPdfByChaptersRequest request)
throws Exception {
MultipartFile file = request.getFileInput();
PDDocument sourceDocument = null;
Path zipFile = null;
try {
boolean includeMetadata = request.getIncludeMetadata();
Integer bookmarkLevel =
request.getBookmarkLevel(); // levels start from 0 (top most bookmarks)
if (bookmarkLevel < 0) {
return ResponseEntity.badRequest().body("Invalid bookmark level".getBytes());
}
PDDocument sourceDocument = Loader.loadPDF(file.getBytes());
sourceDocument = Loader.loadPDF(file.getBytes());
PDDocumentOutline outline = sourceDocument.getDocumentCatalog().getDocumentOutline();
@ -112,7 +116,7 @@ public class SplitPdfByChaptersController {
List<ByteArrayOutputStream> splitDocumentsBoas =
getSplitDocumentsBoas(sourceDocument, bookmarks, includeMetadata);
Path zipFile = createZipFile(bookmarks, splitDocumentsBoas);
zipFile = createZipFile(bookmarks, splitDocumentsBoas);
byte[] data = Files.readAllBytes(zipFile);
Files.deleteIfExists(zipFile);
@ -123,6 +127,18 @@ public class SplitPdfByChaptersController {
sourceDocument.close();
return WebResponseUtils.bytesToWebResponse(
data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
} finally {
try {
if (sourceDocument != null) {
sourceDocument.close();
}
if (zipFile != null) {
Files.deleteIfExists(zipFile);
}
} catch (Exception e) {
logger.error("Error while cleaning up resources", e);
}
}
}
private List<Bookmark> mergeBookmarksThatCorrespondToSamePage(List<Bookmark> bookmarks) {

View File

@ -105,15 +105,13 @@ public class SplitPdfBySectionsController {
if (sectionNum == horiz * verti) pageNum++;
}
} catch (Exception e) {
logger.error("exception", e);
} finally {
data = Files.readAllBytes(zipFile);
Files.deleteIfExists(zipFile);
}
return WebResponseUtils.bytesToWebResponse(
data, filename + "_split.zip", MediaType.APPLICATION_OCTET_STREAM);
} finally {
Files.deleteIfExists(zipFile);
}
}
public List<PDDocument> splitPdfPages(

View File

@ -65,6 +65,12 @@ public class ConvertImgPDFController {
String colorType = request.getColorType();
String dpi = request.getDpi();
Path tempFile = null;
Path tempOutputDir = null;
Path tempPdfPath = null;
byte[] result = null;
try {
byte[] pdfBytes = file.getBytes();
ImageType colorTypeResult = ImageType.RGB;
if ("greyscale".equals(colorType)) {
@ -74,7 +80,6 @@ public class ConvertImgPDFController {
}
// returns bytes for image
boolean singleImage = "single".equals(singleOrMultiple);
byte[] result = null;
String filename =
Filenames.toSimpleFileName(file.getOriginalFilename())
.replaceFirst("[.][^.]+$", "");
@ -82,7 +87,9 @@ public class ConvertImgPDFController {
result =
PdfUtils.convertFromPdf(
pdfBytes,
"webp".equalsIgnoreCase(imageFormat) ? "png" : imageFormat.toUpperCase(),
"webp".equalsIgnoreCase(imageFormat)
? "png"
: imageFormat.toUpperCase(),
colorTypeResult,
singleImage,
Integer.valueOf(dpi),
@ -95,7 +102,7 @@ public class ConvertImgPDFController {
} else if ("webp".equalsIgnoreCase(imageFormat)
&& CheckProgramInstall.isPythonAvailable()) {
// Write the output stream to a temp file
Path tempFile = Files.createTempFile("temp_png", ".png");
tempFile = Files.createTempFile("temp_png", ".png");
try (FileOutputStream fos = new FileOutputStream(tempFile.toFile())) {
fos.write(result);
fos.flush();
@ -108,7 +115,7 @@ public class ConvertImgPDFController {
command.add("./scripts/png_to_webp.py"); // Python script to handle the conversion
// Create a temporary directory for the output WebP files
Path tempOutputDir = Files.createTempDirectory("webp_output");
tempOutputDir = Files.createTempDirectory("webp_output");
if (singleImage) {
// Run the Python script to convert PNG to WebP
command.add(tempFile.toString());
@ -116,7 +123,7 @@ public class ConvertImgPDFController {
command.add("--single");
} else {
// Save the uploaded PDF to a temporary file
Path tempPdfPath = Files.createTempFile("temp_pdf", ".pdf");
tempPdfPath = Files.createTempFile("temp_pdf", ".pdf");
file.transferTo(tempPdfPath.toFile());
// Run the Python script to convert PDF to WebP
command.add(tempPdfPath.toString());
@ -136,7 +143,8 @@ public class ConvertImgPDFController {
if (webpFiles.isEmpty()) {
logger.error("No WebP files were created in: {}", tempOutputDir.toString());
throw new IOException("No WebP files were created. " + resultProcess.getMessages());
throw new IOException(
"No WebP files were created. " + resultProcess.getMessages());
}
byte[] bodyBytes = new byte[0];
@ -172,6 +180,23 @@ public class ConvertImgPDFController {
return WebResponseUtils.bytesToWebResponse(
result, zipFilename, MediaType.APPLICATION_OCTET_STREAM);
}
} finally {
try {
// Clean up temporary files
if (tempFile != null) {
Files.deleteIfExists(tempFile);
}
if (tempPdfPath != null) {
Files.deleteIfExists(tempPdfPath);
}
if (tempOutputDir != null) {
FileUtils.deleteDirectory(tempOutputDir.toFile());
}
} catch (Exception e) {
logger.error("Error cleaning up temporary files", e);
}
}
}
@PostMapping(consumes = "multipart/form-data", value = "/img/pdf")

View File

@ -87,7 +87,7 @@ public class OCRController {
Files.createDirectories(tempOutputDir);
Files.createDirectories(tempImagesDir);
Process process = null;
try {
// Save input file
inputFile.transferTo(tempInputFile.toFile());
@ -139,7 +139,7 @@ public class OCRController {
command.add("pdf"); // Always output PDF
ProcessBuilder pb = new ProcessBuilder(command);
Process process = pb.start();
process = pb.start();
// Capture any error output
try (BufferedReader reader =
@ -188,6 +188,10 @@ public class OCRController {
.body(pdfContent);
} finally {
if (process != null) {
process.destroy();
}
// Clean up temporary files
deleteDirectory(tempDir);
}