This commit is contained in:
Anthony Stirling 2023-04-01 16:03:40 +01:00
parent 7bac488ca4
commit 80820e75c9
6 changed files with 96 additions and 3 deletions

View File

@ -53,9 +53,10 @@ public class CompressController {
command.add("--skip-text");
command.add("--tesseract-timeout=0");
command.add("--optimize");
command.add(String.valueOf(optimizeLevel));
command.add("--output-type");
command.add("pdf");
command.add(String.valueOf(optimizeLevel));
if (fastWebView != null && fastWebView) {
long fileSize = inputFile.getSize();

View File

@ -1,5 +1,6 @@
package stirling.software.SPDF.controller.converters;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
@ -7,6 +8,8 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
@ -15,10 +18,58 @@ import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import com.itextpdf.xmp.XMPException;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
public class ConvertPDFToPDFA {
@GetMapping("/pdf-to-pdfa")
public String pdfToPdfAForm(Model model) {
model.addAttribute("currentPage", "pdf-to-pdfa");
return "pdf-to-pdfa";
}
@PostMapping("/pdf-to-pdfa")
public ResponseEntity<byte[]> pdfToPdfA(
@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
inputFile.transferTo(tempInputFile.toFile());
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Prepare the OCRmyPDF command
List<String> command = new ArrayList<>();
command.add("ocrmypdf");
command.add("--skip-text");
command.add("--tesseract-timeout=0");
command.add("--output-type");
command.add("pdfa");
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
// Read the optimized PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
Files.delete(tempOutputFile);
// Return the optimized PDF as a response
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_PDFA.pdf";
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentDispositionFormData("attachment", outputFilename);
return ResponseEntity.ok().headers(headers).body(pdfBytes);
}
}

View File

@ -82,6 +82,9 @@ home.ocr.desc=Scans and detects text from images within a PDF and re-adds it as
home.extractImages.title=Extract Images
home.extractImages.desc=Extracts all images from a PDF and saves them to zip
home.pdfToPDFA.title=Convert PDF to PDF/A
home.pdfToPDFA.desc=Convert PDF to PDF/A for long-term storage
navbar.settings=Settings
settings.title=Settings
@ -113,6 +116,12 @@ ocr.help=Please read this documentation on how to use this for other languages a
ocr.credit=This service uses OCRmyPDF and Tesseract for OCR.
ocr.submit=Process PDF with OCR
pdfToPDFA.title=PDF To PDF/A
pdfToPDFA.header=PDF To PDF/A
pdfToPDFA.credit=This service uses OCRmyPDF for PDF/A conversion
pdfToPDFA.submit=Convert
extractImages.title=Extract Images
extractImages.header=Extract Images
extractImages.selectText=Select image format to convert extracted images to

View File

@ -0,0 +1,30 @@
<!DOCTYPE html>
<html th:lang="${#locale.language}" th:lang-direction="#{language.direction}" xmlns:th="http://www.thymeleaf.org">
<th:block th:insert="~{fragments/common :: head(title=#{pdfToPDFA.title})}"></th:block>
<body>
<div id="page-container">
<div id="content-wrap">
<div th:insert="~{fragments/navbar.html :: navbar}"></div>
<br> <br>
<div class="container">
<div class="row justify-content-center">
<div class="col-md-6">
<h2 th:text="#{pdfToPDFA.header}"></h2>
<form method="post" enctype="multipart/form-data" th:action="@{file-to-pdf}">
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, accept='application/pdf')}"></div>
<br>
<button type="submit" class="btn btn-primary" th:text="#{pdfToPDFA.submit}"></button>
</form>
<p class="mt-3" th:text="#{pdfToPDFA.credit}"></p>
</div>
</div>
</div>
</div>
<div th:insert="~{fragments/footer.html :: footer}"></div>
</div>
</body>
</html>

View File

@ -127,12 +127,13 @@ function compareVersions(version1, version2) {
<li class="nav-item"><a class="nav-link" href="#" th:href="@{rotate-pdf}" th:classappend="${currentPage}=='rotate-pdf' ? 'active' : ''" th:text="#{home.rotate.title}"></a></li>
<li class="nav-item dropdown" th:classappend="${currentPage}=='pdf-to-img' OR ${currentPage}=='img-to-pdf' OR ${currentPage}=='file-to-pdf' OR ${currentPage}=='xlsx-to-pdf' ? 'active' : ''"><a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown"
<li class="nav-item dropdown" th:classappend="${currentPage}=='pdf-to-img' OR ${currentPage}=='img-to-pdf' OR ${currentPage}=='pdf-to-pdfa' OR ${currentPage}=='file-to-pdf' OR ${currentPage}=='xlsx-to-pdf' ? 'active' : ''"><a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown"
aria-haspopup="true" aria-expanded="false" th:text="#{navbar.convert}"></a>
<div class="dropdown-menu" aria-labelledby="navbarDropdown">
<a class="dropdown-item" href="#" th:href="@{pdf-to-img}" th:classappend="${currentPage}=='pdf-to-img' ? 'active' : ''" th:text="#{home.pdfToImage.title}"></a>
<a class="dropdown-item" href="#" th:href="@{img-to-pdf}" th:classappend="${currentPage}=='img-to-pdf' ? 'active' : ''" th:text="#{home.imageToPdf.title}"></a>
<a class="dropdown-item" href="#" th:href="@{file-to-pdf}" th:classappend="${currentPage}=='file-to-pdf' ? 'active' : ''" th:text="#{home.fileToPDF.title}"></a>
<a class="dropdown-item" href="#" th:href="@{pdf-to-pdfa}" th:classappend="${currentPage}=='pdf-to-pdfa' ? 'active' : ''" th:text="#{home.pdfToPDFA.title}"></a>
</div>
</li>

View File

@ -64,6 +64,7 @@
<div th:replace="~{fragments/card :: card(cardTitle=#{home.ocr.title}, cardText=#{home.ocr.desc}, cardLink='ocr-pdf')}"></div>
<div th:replace="~{fragments/card :: card(cardTitle=#{home.extractImages.title}, cardText=#{home.extractImages.desc}, cardLink='extract-images')}"></div>
<div th:replace="~{fragments/card :: card(cardTitle=#{home.pdfToPDFA.title}, cardText=#{home.pdfToPDFA.desc}, cardLink='pdf-to-pdfa')}"></div>