This commit is contained in:
Anthony Stirling 2023-05-19 20:43:30 +01:00
parent f8c855eab1
commit 87cd6dfb54
3 changed files with 166 additions and 67 deletions

View File

@ -76,6 +76,22 @@ jobs:
labels: ${{ steps.meta.outputs.labels }} labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64/v8 platforms: linux/amd64,linux/arm64/v8
- name: Generate tags
id: meta2
uses: docker/metadata-action@v4.4.0
with:
images: |
${{ secrets.DOCKER_HUB_USERNAME }}/s-pdf
ghcr.io/${{ github.repository_owner }}/s-pdf
tags: |
${{ steps.versionNumber.outputs.versionNumber }}-ultra-light
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/master' }}
type=raw,value=alpha,enable=${{ github.ref == 'refs/heads/main' }}
- name: Convert repository owner to lowercase
id: repoowner
run: echo "::set-output name=lowercase::$(echo ${{ github.repository_owner }} | awk '{print tolower($0)}')"
- name: Build and push Dockerfile-ultralite - name: Build and push Dockerfile-ultralite
uses: docker/build-push-action@v4.0.0 uses: docker/build-push-action@v4.0.0
with: with:
@ -84,12 +100,15 @@ jobs:
push: true push: true
cache-from: type=gha cache-from: type=gha
cache-to: type=gha,mode=max cache-to: type=gha,mode=max
tags: ${{ steps.meta2.outputs.tags }}
labels: ${{ steps.meta2.outputs.labels }}
tags: | tags: |
${{ secrets.DOCKER_HUB_USERNAME }}/s-pdf:ultra-light-latest ${{ secrets.DOCKER_HUB_USERNAME }}/s-pdf:ultra-light-latest
ghcr.io/${{ github.repository_owner }}/s-pdf:ultra-light-latest ghcr.io/${{ steps.repoowner.outputs.lowercase }}/s-pdf:ultra-light-latest
labels: | labels: |
${{ steps.meta.outputs.labels }} ${{ steps.meta2.outputs.labels }}
type=raw,value=ultra-light-latest,enable=${{ github.ref == 'refs/heads/master' }} type=raw,value=ultra-light-latest,enable=${{ github.ref == 'refs/heads/master' }}
type=raw,value=ultra-light-alpha,enable=${{ github.ref == 'refs/heads/main' }} type=raw,value=ultra-light-alpha,enable=${{ github.ref == 'refs/heads/main' }}
platforms: linux/amd64,linux/arm64/v8 platforms: linux/amd64,linux/arm64/v8

View File

@ -1,11 +1,27 @@
package stirling.software.SPDF.controller.api.other; package stirling.software.SPDF.controller.api.other;
import java.awt.Image;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import javax.imageio.ImageIO;
import javax.imageio.stream.MemoryCacheImageOutputStream;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.http.ResponseEntity; import org.springframework.http.ResponseEntity;
@ -20,30 +36,19 @@ import io.swagger.v3.oas.annotations.Parameter;
import stirling.software.SPDF.utils.PdfUtils; import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor; import stirling.software.SPDF.utils.ProcessExecutor;
import io.swagger.v3.oas.annotations.media.Schema; import io.swagger.v3.oas.annotations.media.Schema;
@RestController @RestController
public class CompressController { public class CompressController {
private static final Logger logger = LoggerFactory.getLogger(CompressController.class); private static final Logger logger = LoggerFactory.getLogger(CompressController.class);
@PostMapping(consumes = "multipart/form-data", value = "/compress-pdf") @PostMapping(consumes = "multipart/form-data", value = "/compress-pdf")
@Operation( @Operation(summary = "Optimize PDF file", description = "This endpoint accepts a PDF file and optimizes it based on the provided parameters.")
summary = "Optimize PDF file",
description = "This endpoint accepts a PDF file and optimizes it based on the provided parameters."
)
public ResponseEntity<byte[]> optimizePdf( public ResponseEntity<byte[]> optimizePdf(
@RequestPart(required = true, value = "fileInput") @RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file to be optimized.", required = true) MultipartFile inputFile,
@Parameter(description = "The input PDF file to be optimized.", required = true) @RequestParam("optimizeLevel") @Parameter(description = "The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.", schema = @Schema(allowableValues = {
MultipartFile inputFile, "0", "1", "2", "3" }), example = "1") int optimizeLevel,
@RequestParam("optimizeLevel") @RequestParam("expectedOutputSize") @Parameter(description = "The expected output size in bytes.", required = false) Long expectedOutputSize)
@Parameter(description = "The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.",
schema = @Schema(allowableValues = {"0", "1", "2", "3"}), example = "1")
int optimizeLevel,
@RequestParam(name = "fastWebView", required = false)
@Parameter(description = "If true, optimize the PDF for fast web view. This increases the file size by about 25%.", example = "false")
Boolean fastWebView,
@RequestParam(name = "jbig2Lossy", required = false)
@Parameter(description = "If true, apply lossy JB2 compression to the PDF file.", example = "false")
Boolean jbig2Lossy)
throws IOException, InterruptedException { throws IOException, InterruptedException {
// Save the uploaded file to a temporary location // Save the uploaded file to a temporary location
@ -53,31 +58,109 @@ public class CompressController {
// Prepare the output file path // Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf"); Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Prepare the OCRmyPDF command // Prepare the Ghostscript command
List<String> command = new ArrayList<>(); List<String> command = new ArrayList<>();
command.add("ocrmypdf"); command.add("gs");
command.add("--skip-text"); command.add("-sDEVICE=pdfwrite");
command.add("--tesseract-timeout=0"); command.add("-dCompatibilityLevel=1.4");
command.add("--optimize");
command.add(String.valueOf(optimizeLevel));
command.add("--output-type");
command.add("pdf");
if (fastWebView != null && fastWebView) { switch (optimizeLevel) {
long fileSize = inputFile.getSize(); case 0:
long fastWebViewSize = (long) (fileSize * 1.25); // 25% higher than file size command.add("-dPDFSETTINGS=/default");
command.add("--fast-web-view"); break;
command.add(String.valueOf(fastWebViewSize)); case 1:
} command.add("-dPDFSETTINGS=/ebook");
break;
if (jbig2Lossy != null && jbig2Lossy) { case 2:
command.add("--jbig2-lossy"); command.add("-dPDFSETTINGS=/printer");
break;
case 3:
command.add("-dPDFSETTINGS=/prepress");
break;
default:
command.add("-dPDFSETTINGS=/default");
} }
command.add("-dNOPAUSE");
command.add("-dQUIET");
command.add("-dBATCH");
command.add("-sOutputFile=" + tempOutputFile.toString());
command.add(tempInputFile.toString()); command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command); int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
if (expectedOutputSize != null) {
long outputFileSize = Files.size(tempOutputFile);
if (outputFileSize > expectedOutputSize) {
try (PDDocument doc = PDDocument.load(new File(tempOutputFile.toString()))) {
double scaleFactor = 1.0;
while (true) {
for (PDPage page : doc.getPages()) {
PDResources res = page.getResources();
for (COSName name : res.getXObjectNames()) {
PDXObject xobj = res.getXObject(name);
if (xobj instanceof PDImageXObject) {
PDImageXObject image = (PDImageXObject) xobj;
// Get the image in BufferedImage format
BufferedImage bufferedImage = image.getImage();
// Calculate the new dimensions
int newWidth = (int)(bufferedImage.getWidth() * scaleFactor);
int newHeight = (int)(bufferedImage.getHeight() * scaleFactor);
// If the new dimensions are zero, skip this iteration
if (newWidth == 0 || newHeight == 0) {
continue;
}
// Otherwise, proceed with the scaling
Image scaledImage = bufferedImage.getScaledInstance(newWidth, newHeight, Image.SCALE_SMOOTH);
// Convert the scaled image back to a BufferedImage
BufferedImage scaledBufferedImage = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB);
scaledBufferedImage.getGraphics().drawImage(scaledImage, 0, 0, null);
// Compress the scaled image
ByteArrayOutputStream compressedImageStream = new ByteArrayOutputStream();
ImageIO.write(scaledBufferedImage, "jpeg", compressedImageStream);
byte[] imageBytes = compressedImageStream.toByteArray();
compressedImageStream.close();
// Convert compressed image back to PDImageXObject
ByteArrayInputStream bais = new ByteArrayInputStream(imageBytes);
PDImageXObject compressedImage = PDImageXObject.createFromByteArray(doc, imageBytes, image.getCOSObject().toString());
// Replace the image in the resources with the compressed version
res.put(name, compressedImage);
}
}
}
// save the document to tempOutputFile again
doc.save(tempOutputFile.toString());
// Check if the overall PDF size is still larger than expectedOutputSize
if (Files.size(tempOutputFile) > expectedOutputSize) {
// The file is still too large, reduce scaleFactor and try again
scaleFactor *= 0.9; // reduce scaleFactor by 10%
// Avoid scaleFactor being too small, causing the image to shrink to 0
if(scaleFactor < 0.1){
throw new RuntimeException("Could not reach the desired size without excessively degrading image quality");
}
} else {
// The file is small enough, break the loop
break;
}
}
}
}
}
// Read the optimized PDF file // Read the optimized PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile); byte[] pdfBytes = Files.readAllBytes(tempOutputFile);

View File

@ -11,7 +11,7 @@
<div id="content-wrap"> <div id="content-wrap">
<div th:insert="~{fragments/navbar.html :: navbar}"></div> <div th:insert="~{fragments/navbar.html :: navbar}"></div>
<br> <br> <br> <br>
<div class="container"> <div class="container">R
<div class="row justify-content-center"> <div class="row justify-content-center">
<div class="col-md-6"> <div class="col-md-6">
<h2 th:text="#{compress.header}"></h2> <h2 th:text="#{compress.header}"></h2>
@ -27,15 +27,12 @@
</select> </select>
</div> </div>
<div> <div>
<input type="checkbox" name="fastWebView" id="fastWebView"> <label for="expectedOutputSize" th:text="#{compress.selectText.8}"></label>
<label for="fastWebView" th:text="#{compress.selectText.6}"></label> <input type="number" name="expectedOutputSize" id="expectedOutputSize" min="1">
</div>
<div>
<input type="checkbox" name="jbig2Lossy" id="jbig2Lossy">
<label for="jbig2Lossy" th:text="#{compress.selectText.7}"></label>
</div> </div>
<button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{compress.submit}"></button> <button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{compress.submit}"></button>
</form> </form>
<p class="mt-3" th:text="#{compress.credit}"></p> <p class="mt-3" th:text="#{compress.credit}"></p>
</div> </div>