removal of all getByte loads (#3153)

# Description of Changes

Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)

---

## Checklist

### General

- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing)
for more details.

---------

Co-authored-by: a <a>
This commit is contained in:
Anthony Stirling
2025-03-10 20:17:45 +00:00
committed by GitHub
parent d0a5416570
commit a61749d500
34 changed files with 504 additions and 214 deletions

View File

@@ -37,7 +37,7 @@ public class AnalysisController {
summary = "Get PDF page count",
description = "Returns total number of pages in PDF. Input:PDF Output:JSON Type:SISO")
public Map<String, Integer> getPageCount(@ModelAttribute PDFFile file) throws IOException {
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput().getBytes())) {
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) {
return Map.of("pageCount", document.getNumberOfPages());
}
}
@@ -47,7 +47,7 @@ public class AnalysisController {
summary = "Get basic PDF information",
description = "Returns page count, version, file size. Input:PDF Output:JSON Type:SISO")
public Map<String, Object> getBasicInfo(@ModelAttribute PDFFile file) throws IOException {
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput().getBytes())) {
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) {
Map<String, Object> info = new HashMap<>();
info.put("pageCount", document.getNumberOfPages());
info.put("pdfVersion", document.getVersion());
@@ -62,7 +62,7 @@ public class AnalysisController {
description = "Returns title, author, subject, etc. Input:PDF Output:JSON Type:SISO")
public Map<String, String> getDocumentProperties(@ModelAttribute PDFFile file)
throws IOException {
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput().getBytes())) {
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) {
PDDocumentInformation info = document.getDocumentInformation();
Map<String, String> properties = new HashMap<>();
properties.put("title", info.getTitle());
@@ -83,7 +83,7 @@ public class AnalysisController {
description = "Returns width and height of each page. Input:PDF Output:JSON Type:SISO")
public List<Map<String, Float>> getPageDimensions(@ModelAttribute PDFFile file)
throws IOException {
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput().getBytes())) {
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) {
List<Map<String, Float>> dimensions = new ArrayList<>();
PDPageTree pages = document.getPages();
@@ -103,7 +103,7 @@ public class AnalysisController {
description =
"Returns count and details of form fields. Input:PDF Output:JSON Type:SISO")
public Map<String, Object> getFormFields(@ModelAttribute PDFFile file) throws IOException {
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput().getBytes())) {
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) {
Map<String, Object> formInfo = new HashMap<>();
PDAcroForm form = document.getDocumentCatalog().getAcroForm();
@@ -125,7 +125,7 @@ public class AnalysisController {
summary = "Get annotation information",
description = "Returns count and types of annotations. Input:PDF Output:JSON Type:SISO")
public Map<String, Object> getAnnotationInfo(@ModelAttribute PDFFile file) throws IOException {
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput().getBytes())) {
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) {
Map<String, Object> annotInfo = new HashMap<>();
int totalAnnotations = 0;
Map<String, Integer> annotationTypes = new HashMap<>();
@@ -150,7 +150,7 @@ public class AnalysisController {
description =
"Returns list of fonts used in the document. Input:PDF Output:JSON Type:SISO")
public Map<String, Object> getFontInfo(@ModelAttribute PDFFile file) throws IOException {
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput().getBytes())) {
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) {
Map<String, Object> fontInfo = new HashMap<>();
Set<String> fontNames = new HashSet<>();
@@ -172,7 +172,7 @@ public class AnalysisController {
description =
"Returns encryption and permission details. Input:PDF Output:JSON Type:SISO")
public Map<String, Object> getSecurityInfo(@ModelAttribute PDFFile file) throws IOException {
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput().getBytes())) {
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) {
Map<String, Object> securityInfo = new HashMap<>();
PDEncryption encryption = document.getEncryption();

View File

@@ -42,7 +42,7 @@ public class CropController {
description =
"This operation takes an input PDF file and crops it according to the given coordinates. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> cropPdf(@ModelAttribute CropPdfForm form) throws IOException {
PDDocument sourceDocument = pdfDocumentFactory.load(form.getFileInput().getBytes());
PDDocument sourceDocument = pdfDocumentFactory.load(form);
PDDocument newDocument =
pdfDocumentFactory.createNewDocumentBasedOnOldDocument(sourceDocument);

View File

@@ -100,8 +100,8 @@ public class MergeController {
};
case "byPDFTitle":
return (file1, file2) -> {
try (PDDocument doc1 = pdfDocumentFactory.load(file1.getBytes());
PDDocument doc2 = pdfDocumentFactory.load(file2.getBytes())) {
try (PDDocument doc1 = pdfDocumentFactory.load(file1);
PDDocument doc2 = pdfDocumentFactory.load(file2)) {
String title1 = doc1.getDocumentInformation().getTitle();
String title2 = doc2.getDocumentInformation().getTitle();
return title1.compareTo(title2);

View File

@@ -63,7 +63,7 @@ public class MultiPageLayoutController {
: (int) Math.sqrt(pagesPerSheet);
int rows = pagesPerSheet == 2 || pagesPerSheet == 3 ? 1 : (int) Math.sqrt(pagesPerSheet);
PDDocument sourceDocument = pdfDocumentFactory.load(file.getBytes());
PDDocument sourceDocument = pdfDocumentFactory.load(file);
PDDocument newDocument =
pdfDocumentFactory.createNewDocumentBasedOnOldDocument(sourceDocument);
PDPage newPage = new PDPage(PDRectangle.A4);

View File

@@ -250,7 +250,7 @@ public class RearrangePagesPDFController {
String sortType = request.getCustomMode();
try {
// Load the input PDF
PDDocument document = pdfDocumentFactory.load(pdfFile.getBytes());
PDDocument document = pdfDocumentFactory.load(pdfFile);
// Split the page order string into an array of page numbers or range of numbers
String[] pageOrderArr = pageOrder != null ? pageOrder.split(",") : new String[0];

View File

@@ -51,7 +51,7 @@ public class ScalePagesController {
String targetPDRectangle = request.getPageSize();
float scaleFactor = request.getScaleFactor();
PDDocument sourceDocument = pdfDocumentFactory.load(file.getBytes());
PDDocument sourceDocument = pdfDocumentFactory.load(file);
PDDocument outputDocument =
pdfDocumentFactory.createNewDocumentBasedOnOldDocument(sourceDocument);

View File

@@ -62,7 +62,7 @@ public class SplitPDFController {
String pages = request.getPageNumbers();
// open the pdf document
document = pdfDocumentFactory.load(file.getBytes());
document = pdfDocumentFactory.load(file);
// PdfMetadata metadata = PdfMetadataService.extractMetadataFromPdf(document);
int totalPages = document.getNumberOfPages();
List<Integer> pageNumbers = request.getPageNumbersList(document, false);

View File

@@ -139,7 +139,7 @@ public class SplitPdfByChaptersController {
if (bookmarkLevel < 0) {
return ResponseEntity.badRequest().body("Invalid bookmark level".getBytes());
}
sourceDocument = pdfDocumentFactory.load(file.getBytes());
sourceDocument = pdfDocumentFactory.load(file);
PDDocumentOutline outline = sourceDocument.getDocumentCatalog().getDocumentOutline();

View File

@@ -56,7 +56,7 @@ public class SplitPdfBySectionsController {
List<ByteArrayOutputStream> splitDocumentsBoas = new ArrayList<>();
MultipartFile file = request.getFileInput();
PDDocument sourceDocument = pdfDocumentFactory.load(file.getBytes());
PDDocument sourceDocument = pdfDocumentFactory.load(file);
// Process the PDF based on split parameters
int horiz = request.getHorizontalDivisions() + 1;

View File

@@ -45,7 +45,7 @@ public class ToSinglePageController {
throws IOException {
// Load the source document
PDDocument sourceDocument = pdfDocumentFactory.load(request.getFileInput().getBytes());
PDDocument sourceDocument = pdfDocumentFactory.load(request);
// Calculate total height and max width
float totalHeight = 0;

View File

@@ -74,7 +74,7 @@ public class ConvertImgPDFController {
;
try {
// Load the input PDF
byte[] newPdfBytes = rearrangePdfPages(file.getBytes(), pageOrderArr);
byte[] newPdfBytes = rearrangePdfPages(file, pageOrderArr);
ImageType colorTypeResult = ImageType.RGB;
if ("greyscale".equals(colorType)) {
@@ -243,9 +243,10 @@ public class ConvertImgPDFController {
* @return A byte array of the rearranged PDF.
* @throws IOException If an error occurs while processing the PDF.
*/
private byte[] rearrangePdfPages(byte[] pdfBytes, String[] pageOrderArr) throws IOException {
private byte[] rearrangePdfPages(MultipartFile pdfFile, String[] pageOrderArr)
throws IOException {
// Load the input PDF
PDDocument document = pdfDocumentFactory.load(pdfBytes);
PDDocument document = pdfDocumentFactory.load(pdfFile);
int totalPages = document.getNumberOfPages();
List<Integer> newPageOrder = GeneralUtils.parsePageList(pageOrderArr, totalPages, false);

View File

@@ -62,7 +62,7 @@ public class ConvertPDFToOffice {
MultipartFile inputFile = request.getFileInput();
String outputFormat = request.getOutputFormat();
if ("txt".equals(request.getOutputFormat())) {
try (PDDocument document = pdfDocumentFactory.load(inputFile.getBytes())) {
try (PDDocument document = pdfDocumentFactory.load(inputFile)) {
PDFTextStripper stripper = new PDFTextStripper();
String text = stripper.getText(document);
return WebResponseUtils.bytesToWebResponse(

View File

@@ -59,7 +59,7 @@ public class ExtractCSVController {
String baseName = getBaseName(form.getFileInput().getOriginalFilename());
List<CsvEntry> csvEntries = new ArrayList<>();
try (PDDocument document = pdfDocumentFactory.load(form.getFileInput().getBytes())) {
try (PDDocument document = pdfDocumentFactory.load(form)) {
List<Integer> pages = form.getPageNumbersList(document, true);
SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
CSVFormat format =

View File

@@ -49,7 +49,7 @@ public class FilterController {
String text = request.getText();
String pageNumber = request.getPageNumbers();
PDDocument pdfDocument = pdfDocumentFactory.load(inputFile.getBytes());
PDDocument pdfDocument = pdfDocumentFactory.load(inputFile);
if (PdfUtils.hasText(pdfDocument, pageNumber, text))
return WebResponseUtils.pdfDocToWebResponse(
pdfDocument, Filenames.toSimpleFileName(inputFile.getOriginalFilename()));
@@ -66,7 +66,7 @@ public class FilterController {
MultipartFile inputFile = request.getFileInput();
String pageNumber = request.getPageNumbers();
PDDocument pdfDocument = pdfDocumentFactory.load(inputFile.getBytes());
PDDocument pdfDocument = pdfDocumentFactory.load(inputFile);
if (PdfUtils.hasImages(pdfDocument, pageNumber))
return WebResponseUtils.pdfDocToWebResponse(
pdfDocument, Filenames.toSimpleFileName(inputFile.getOriginalFilename()));
@@ -83,7 +83,7 @@ public class FilterController {
String pageCount = request.getPageCount();
String comparator = request.getComparator();
// Load the PDF
PDDocument document = pdfDocumentFactory.load(inputFile.getBytes());
PDDocument document = pdfDocumentFactory.load(inputFile);
int actualPageCount = document.getNumberOfPages();
boolean valid = false;
@@ -117,7 +117,7 @@ public class FilterController {
String comparator = request.getComparator();
// Load the PDF
PDDocument document = pdfDocumentFactory.load(inputFile.getBytes());
PDDocument document = pdfDocumentFactory.load(inputFile);
PDPage firstPage = document.getPage(0);
PDRectangle actualPageSize = firstPage.getMediaBox();
@@ -193,7 +193,7 @@ public class FilterController {
String comparator = request.getComparator();
// Load the PDF
PDDocument document = pdfDocumentFactory.load(inputFile.getBytes());
PDDocument document = pdfDocumentFactory.load(inputFile);
// Get the rotation of the first page
PDPage firstPage = document.getPage(0);

View File

@@ -52,7 +52,7 @@ public class AutoRenameController {
MultipartFile file = request.getFileInput();
Boolean useFirstTextAsFallback = request.isUseFirstTextAsFallback();
PDDocument document = pdfDocumentFactory.load(file.getBytes());
PDDocument document = pdfDocumentFactory.load(file);
PDFTextStripper reader =
new PDFTextStripper() {
List<LineInfo> lineInfos = new ArrayList<>();

View File

@@ -84,7 +84,7 @@ public class BlankPageController {
int threshold = request.getThreshold();
float whitePercent = request.getWhitePercent();
try (PDDocument document = pdfDocumentFactory.load(inputFile.getBytes())) {
try (PDDocument document = pdfDocumentFactory.load(inputFile)) {
PDPageTree pages = document.getDocumentCatalog().getPages();
PDFTextStripper textStripper = new PDFTextStripper();

View File

@@ -50,7 +50,7 @@ public class DecompressPdfController {
MultipartFile file = request.getFileInput();
try (PDDocument document = pdfDocumentFactory.load(file.getBytes())) {
try (PDDocument document = pdfDocumentFactory.load(file)) {
// Process all objects in document
processAllObjects(document);

View File

@@ -95,8 +95,7 @@ public class ExtractImageScansController {
// Check if input file is a PDF
if ("pdf".equalsIgnoreCase(extension)) {
// Load PDF document
try (PDDocument document =
pdfDocumentFactory.load(form.getFileInput().getBytes())) {
try (PDDocument document = pdfDocumentFactory.load(form.getFileInput())) {
PDFRenderer pdfRenderer = new PDFRenderer(document);
pdfRenderer.setSubsamplingAllowed(true);
int pageCount = document.getNumberOfPages();

View File

@@ -67,7 +67,7 @@ public class ExtractImagesController {
MultipartFile file = request.getFileInput();
String format = request.getFormat();
boolean allowDuplicates = request.isAllowDuplicates();
PDDocument document = pdfDocumentFactory.load(file.getBytes());
PDDocument document = pdfDocumentFactory.load(file);
// Determine if multithreading should be used based on PDF size or number of pages
boolean useMultithreading = shouldUseMultithreading(file, document);

View File

@@ -50,7 +50,7 @@ public class FlattenController {
public ResponseEntity<byte[]> flatten(@ModelAttribute FlattenRequest request) throws Exception {
MultipartFile file = request.getFileInput();
PDDocument document = pdfDocumentFactory.load(file.getBytes());
PDDocument document = pdfDocumentFactory.load(file);
Boolean flattenOnlyForms = request.getFlattenOnlyForms();
if (Boolean.TRUE.equals(flattenOnlyForms)) {

View File

@@ -84,7 +84,7 @@ public class MetadataController {
allRequestParams = new java.util.HashMap<String, String>();
}
// Load the PDF file into a PDDocument
PDDocument document = pdfDocumentFactory.load(pdfFile.getBytes());
PDDocument document = pdfDocumentFactory.load(pdfFile);
// Get the document information from the PDF
PDDocumentInformation info = document.getDocumentInformation();

View File

@@ -55,8 +55,7 @@ public class PageNumbersController {
String pagesToNumber = request.getPagesToNumber();
String customText = request.getCustomText();
int pageNumber = startingNumber;
byte[] fileBytes = file.getBytes();
PDDocument document = pdfDocumentFactory.load(fileBytes);
PDDocument document = pdfDocumentFactory.load(file);
float font_size = request.getFontSize();
String font_type = request.getFontType();
float marginFactor;

View File

@@ -43,7 +43,7 @@ public class ShowJavascript {
MultipartFile inputFile = request.getFileInput();
String script = "";
try (PDDocument document = pdfDocumentFactory.load(inputFile.getBytes())) {
try (PDDocument document = pdfDocumentFactory.load(inputFile)) {
if (document.getDocumentCatalog() != null
&& document.getDocumentCatalog().getNames() != null) {

View File

@@ -90,7 +90,7 @@ public class CertSignController {
private static void sign(
CustomPDDocumentFactory pdfDocumentFactory,
byte[] input,
MultipartFile input,
OutputStream output,
CreateSignature instance,
Boolean showSignature,
@@ -179,7 +179,7 @@ public class CertSignController {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
sign(
pdfDocumentFactory,
pdf.getBytes(),
pdf,
baos,
createSignature,
showSignature,

View File

@@ -126,7 +126,7 @@ public class GetInfoOnPDF {
@Operation(summary = "Summary here", description = "desc. Input:PDF Output:JSON Type:SISO")
public ResponseEntity<byte[]> getPdfInfo(@ModelAttribute PDFFile request) throws IOException {
MultipartFile inputFile = request.getFileInput();
try (PDDocument pdfBoxDoc = pdfDocumentFactory.load(inputFile.getBytes()); ) {
try (PDDocument pdfBoxDoc = pdfDocumentFactory.load(inputFile); ) {
ObjectMapper objectMapper = new ObjectMapper();
ObjectNode jsonOutput = objectMapper.createObjectNode();

View File

@@ -1,9 +1,7 @@
package stirling.software.SPDF.model.api;
import java.io.IOException;
import java.util.List;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import io.swagger.v3.oas.annotations.Hidden;
@@ -32,18 +30,6 @@ public class PDFWithPageNums extends PDFFile {
requiredMode = RequiredMode.NOT_REQUIRED)
private String pageNumbers;
@Hidden
public List<Integer> getPageNumbersList(boolean zeroCount) {
int pageCount = 0;
try {
pageCount = Loader.loadPDF(getFileInput().getBytes()).getNumberOfPages();
} catch (IOException e) {
// TODO Auto-generated catch block
log.error("exception", e);
}
return GeneralUtils.parsePageList(pageNumbers, pageCount, zeroCount);
}
@Hidden
public List<Integer> getPageNumbersList(PDDocument doc, boolean oneBased) {
int pageCount = 0;

View File

@@ -10,9 +10,9 @@ import java.nio.file.StandardCopyOption;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.examples.util.DeletingRandomAccessFile;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
import org.apache.pdfbox.io.RandomAccessStreamCache.StreamCacheCreateFunction;
import org.apache.pdfbox.io.ScratchFile;
import org.apache.pdfbox.pdmodel.PDDocument;
@@ -102,16 +102,29 @@ public class CustomPDDocumentFactory {
// Since we don't know the size upfront, buffer to a temp file
Path tempFile = createTempFile("pdf-stream-");
try {
Files.copy(input, tempFile, StandardCopyOption.REPLACE_EXISTING);
return loadAdaptively(tempFile.toFile(), Files.size(tempFile));
} catch (IOException e) {
cleanupFile(tempFile);
throw e;
}
Files.copy(input, tempFile, StandardCopyOption.REPLACE_EXISTING);
return loadAdaptively(tempFile.toFile(), Files.size(tempFile));
}
private PDDocument loadAdaptively(Object source, long contentSize) throws IOException {
/** Load with password from InputStream */
public PDDocument load(InputStream input, String password) throws IOException {
if (input == null) {
throw new IllegalArgumentException("InputStream cannot be null");
}
// Since we don't know the size upfront, buffer to a temp file
Path tempFile = createTempFile("pdf-stream-");
Files.copy(input, tempFile, StandardCopyOption.REPLACE_EXISTING);
return loadAdaptivelyWithPassword(tempFile.toFile(), Files.size(tempFile), password);
}
/**
* Determine the appropriate caching strategy based on file size and available memory. This
* common method is used by both password and non-password loading paths.
*/
private StreamCacheCreateFunction getStreamCacheFunction(long contentSize) {
long maxMemory = Runtime.getRuntime().maxMemory();
long freeMemory = Runtime.getRuntime().freeMemory();
long totalMemory = Runtime.getRuntime().totalMemory();
@@ -129,32 +142,38 @@ public class CustomPDDocumentFactory {
usedMemory / (1024 * 1024),
maxMemory / (1024 * 1024));
// Determine caching strategy based on both file size and available memory
StreamCacheCreateFunction cacheFunction;
// If free memory is critically low, always use file-based caching
// In loadAdaptively method, replace current caching strategy decision with:
if (freeMemoryPercent < MIN_FREE_MEMORY_PERCENTAGE
|| actualFreeMemory < MIN_FREE_MEMORY_BYTES) {
log.info(
"Low memory detected ({}%), forcing file-based cache",
String.format("%.2f", freeMemoryPercent));
cacheFunction = createScratchFileCacheFunction(MemoryUsageSetting.setupTempFileOnly());
return createScratchFileCacheFunction(MemoryUsageSetting.setupTempFileOnly());
} else if (contentSize < SMALL_FILE_THRESHOLD) {
log.info("Using memory-only cache for small document ({}KB)", contentSize / 1024);
cacheFunction = IOUtils.createMemoryOnlyStreamCache();
return IOUtils.createMemoryOnlyStreamCache();
} else if (contentSize < LARGE_FILE_THRESHOLD) {
// For medium files (10-50MB), use a mixed approach
log.info(
"Using mixed memory/file cache for medium document ({}MB)",
contentSize / (1024 * 1024));
cacheFunction =
createScratchFileCacheFunction(MemoryUsageSetting.setupMixed(LARGE_FILE_USAGE));
return createScratchFileCacheFunction(MemoryUsageSetting.setupMixed(LARGE_FILE_USAGE));
} else {
log.info("Using file-based cache for large document");
cacheFunction = createScratchFileCacheFunction(MemoryUsageSetting.setupTempFileOnly());
return createScratchFileCacheFunction(MemoryUsageSetting.setupTempFileOnly());
}
}
/** Update the existing loadAdaptively method to use the common function */
private PDDocument loadAdaptively(Object source, long contentSize) throws IOException {
// Get the appropriate caching strategy
StreamCacheCreateFunction cacheFunction = getStreamCacheFunction(contentSize);
//If small handle as bytes and remove original file
if (contentSize <= SMALL_FILE_THRESHOLD && source instanceof File file) {
source = Files.readAllBytes(file.toPath());
file.delete();
}
PDDocument document;
if (source instanceof File file) {
document = loadFromFile(file, contentSize, cacheFunction);
@@ -168,6 +187,50 @@ public class CustomPDDocumentFactory {
return document;
}
/** Load a PDF with password protection using adaptive loading strategies */
private PDDocument loadAdaptivelyWithPassword(Object source, long contentSize, String password)
throws IOException {
// Get the appropriate caching strategy
StreamCacheCreateFunction cacheFunction = getStreamCacheFunction(contentSize);
//If small handle as bytes and remove original file
if (contentSize <= SMALL_FILE_THRESHOLD && source instanceof File file) {
source = Files.readAllBytes(file.toPath());
file.delete();
}
PDDocument document;
if (source instanceof File file) {
document = loadFromFileWithPassword(file, contentSize, cacheFunction, password);
} else if (source instanceof byte[] bytes) {
document = loadFromBytesWithPassword(bytes, contentSize, cacheFunction, password);
} else {
throw new IllegalArgumentException("Unsupported source type: " + source.getClass());
}
postProcessDocument(document);
return document;
}
/** Load a file with password */
private PDDocument loadFromFileWithPassword(
File file, long size, StreamCacheCreateFunction cache, String password)
throws IOException {
return Loader.loadPDF(new DeletingRandomAccessFile(file), password, null, null, cache);
}
/** Load bytes with password */
private PDDocument loadFromBytesWithPassword(
byte[] bytes, long size, StreamCacheCreateFunction cache, String password)
throws IOException {
if (size >= SMALL_FILE_THRESHOLD) {
log.info("Writing large byte array to temp file for password-protected PDF");
Path tempFile = createTempFile("pdf-bytes-");
Files.write(tempFile, bytes);
return Loader.loadPDF(tempFile.toFile(), password, null, null, cache);
}
return Loader.loadPDF(bytes, password, null, null, cache);
}
private StreamCacheCreateFunction createScratchFileCacheFunction(MemoryUsageSetting settings) {
return () -> {
try {
@@ -185,11 +248,7 @@ public class CustomPDDocumentFactory {
private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache)
throws IOException {
if (size >= EXTREMELY_LARGE_THRESHOLD) {
log.info("Loading extremely large file via buffered access");
return Loader.loadPDF(new RandomAccessReadBufferedFile(file), "", null, null, cache);
}
return Loader.loadPDF(file, "", null, null, cache);
return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);
}
private PDDocument loadFromBytes(byte[] bytes, long size, StreamCacheCreateFunction cache)
@@ -197,12 +256,9 @@ public class CustomPDDocumentFactory {
if (size >= SMALL_FILE_THRESHOLD) {
log.info("Writing large byte array to temp file");
Path tempFile = createTempFile("pdf-bytes-");
try {
Files.write(tempFile, bytes);
return Loader.loadPDF(tempFile.toFile(), "", null, null, cache);
} finally {
cleanupFile(tempFile);
}
Files.write(tempFile, bytes);
return loadFromFile(tempFile.toFile(), size, cache);
}
return Loader.loadPDF(bytes, "", null, null, cache);
}
@@ -225,12 +281,9 @@ public class CustomPDDocumentFactory {
}
} else {
Path tempFile = createTempFile("pdf-save-");
try {
document.save(tempFile.toFile());
return Files.readAllBytes(tempFile);
} finally {
cleanupFile(tempFile);
}
document.save(tempFile.toFile());
return Files.readAllBytes(tempFile);
}
}
@@ -258,17 +311,6 @@ public class CustomPDDocumentFactory {
return Files.createTempDirectory(prefix + tempCounter.incrementAndGet() + "-");
}
/** Clean up a temporary file */
private void cleanupFile(Path file) {
try {
if (Files.deleteIfExists(file)) {
log.info("Deleted temp file: {}", file);
}
} catch (IOException e) {
log.info("Error deleting temp file {}", file, e);
}
}
/** Create new document bytes based on an existing document */
public byte[] createNewBytesBasedOnOldDocument(byte[] oldDocument) throws IOException {
try (PDDocument document = load(oldDocument)) {
@@ -339,20 +381,11 @@ public class CustomPDDocumentFactory {
/** Load from a MultipartFile */
public PDDocument load(MultipartFile pdfFile) throws IOException {
return load(pdfFile.getBytes());
return load(pdfFile.getInputStream());
}
/** Load with password from MultipartFile */
public PDDocument load(MultipartFile fileInput, String password) throws IOException {
return load(fileInput.getBytes(), password);
}
/** Load with password from byte array */
private PDDocument load(byte[] bytes, String password) throws IOException {
// Since we don't have direct password support in the adaptive loader,
// we'll need to use PDFBox's Loader directly
PDDocument document = Loader.loadPDF(bytes, password);
pdfMetadataService.setDefaultMetadata(document);
return document;
return load(fileInput.getInputStream(), password);
}
}

View File

@@ -32,8 +32,15 @@ public class GeneralUtils {
public static File convertMultipartFileToFile(MultipartFile multipartFile) throws IOException {
File tempFile = Files.createTempFile("temp", null).toFile();
try (FileOutputStream os = new FileOutputStream(tempFile)) {
os.write(multipartFile.getBytes());
try (InputStream inputStream = multipartFile.getInputStream();
FileOutputStream outputStream = new FileOutputStream(tempFile)) {
byte[] buffer = new byte[8192];
int bytesRead;
while ((bytesRead = inputStream.read(buffer)) != -1) {
outputStream.write(buffer, 0, bytesRead);
}
}
return tempFile;
}