auto rename

This commit is contained in:
Anthony Stirling 2023-07-04 23:25:21 +01:00
parent f92482d89e
commit 4e28bf03bd
7 changed files with 212 additions and 2 deletions

View File

@ -0,0 +1,164 @@
package stirling.software.SPDF.controller.api.other;
import java.io.IOException;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RequestPart;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.WebResponseUtils;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.pdmodel.common.*;
import org.apache.pdfbox.pdmodel.PDPageContentStream.*;
import org.springframework.web.bind.annotation.*;
import org.springframework.http.*;
import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.*;
import io.swagger.v3.oas.annotations.media.*;
import io.swagger.v3.oas.annotations.parameters.*;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.text.TextPosition;
import org.apache.tomcat.util.http.ResponseUtil;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URLEncoder;
import java.util.List;
import java.util.ArrayList;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import com.itextpdf.io.font.constants.StandardFonts;
import com.itextpdf.kernel.font.PdfFont;
import com.itextpdf.kernel.font.PdfFontFactory;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
import com.itextpdf.layout.Canvas;
import com.itextpdf.layout.element.Paragraph;
import com.itextpdf.layout.properties.TextAlignment;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.media.Schema;
import java.io.*;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.text.*;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.*;
import io.swagger.v3.oas.annotations.media.Schema;
import org.springframework.http.ResponseEntity;
@RestController
@Tag(name = "Other", description = "Other APIs")
public class AutoRenameController {
private static final Logger logger = LoggerFactory.getLogger(AutoRenameController.class);
private static final float TITLE_FONT_SIZE_THRESHOLD = 20.0f;
private static final int LINE_LIMIT = 7;
@PostMapping(consumes = "multipart/form-data", value = "/auto-rename")
@Operation(summary = "Extract header from PDF file", description = "This endpoint accepts a PDF file and attempts to extract its title or header based on heuristics. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> extractHeader(
@RequestPart(value = "fileInput") @Parameter(description = "The input PDF file from which the header is to be extracted.", required = true) MultipartFile file,
@RequestParam(required = false, defaultValue = "false") @Parameter(description = "Flag indicating whether to use the first text as a fallback if no suitable title is found. Defaults to false.", required = false) Boolean useFirstTextAsFallback)
throws Exception {
PDDocument document = PDDocument.load(file.getInputStream());
PDFTextStripper reader = new PDFTextStripper() {
class LineInfo {
String text;
float fontSize;
LineInfo(String text, float fontSize) {
this.text = text;
this.fontSize = fontSize;
}
}
List<LineInfo> lineInfos = new ArrayList<>();
StringBuilder lineBuilder = new StringBuilder();
float lastY = -1;
float maxFontSizeInLine = 0.0f;
int lineCount = 0;
@Override
protected void processTextPosition(TextPosition text) {
if (lastY != text.getY() && lineCount < LINE_LIMIT) {
processLine();
lineBuilder = new StringBuilder(text.getUnicode());
maxFontSizeInLine = text.getFontSizeInPt();
lastY = text.getY();
lineCount++;
} else if (lineCount < LINE_LIMIT) {
lineBuilder.append(text.getUnicode());
if (text.getFontSizeInPt() > maxFontSizeInLine) {
maxFontSizeInLine = text.getFontSizeInPt();
}
}
}
private void processLine() {
if (lineBuilder.length() > 0 && lineCount < LINE_LIMIT) {
lineInfos.add(new LineInfo(lineBuilder.toString(), maxFontSizeInLine));
}
}
@Override
public String getText(PDDocument doc) throws IOException {
this.lineInfos.clear();
this.lineBuilder = new StringBuilder();
this.lastY = -1;
this.maxFontSizeInLine = 0.0f;
this.lineCount = 0;
super.getText(doc);
processLine(); // Process the last line
// Sort lines by font size in descending order and get the first one
lineInfos.sort(Comparator.comparing((LineInfo li) -> li.fontSize).reversed());
String title = lineInfos.isEmpty() ? null : lineInfos.get(0).text;
return title != null ? title : (useFirstTextAsFallback ? (lineInfos.isEmpty() ? null : lineInfos.get(lineInfos.size() - 1).text) : null);
}
};
String header = reader.getText(document);
// Sanitize the header string by removing characters not allowed in a filename.
if (header != null && header.length() < 255) {
header = header.replaceAll("[/\\\\?%*:|\"<>]", "");
return WebResponseUtils.pdfDocToWebResponse(document, header + ".pdf");
} else {
logger.info("File has no good title to be found");
return WebResponseUtils.pdfDocToWebResponse(document, file.getOriginalFilename());
}
}
}

View File

@ -140,4 +140,11 @@ public class OtherWebController {
return "other/auto-crop";
}
@GetMapping("/auto-rename")
@Hidden
public String autoRenameForm(Model model) {
model.addAttribute("currentPage", "auto-rename");
return "other/auto-rename";
}
}

View File

@ -138,6 +138,10 @@ home.pipeline.desc=Run multiple actions on PDFs by defining pipeline scripts
home.add-page-numbers.title=Add Page Numbers
home.add-page-numbers.desc=Add Page numbers throughout a document in a set location
home.auto-rename.title=Auto Rename PDF File
home.auto-rename.desc=Auto renames a PDF file based on its detected header
error.pdfPassword=The PDF Document is passworded and either the password was not provided or was incorrect
downloadPdf=Download PDF

View File

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-fonts" viewBox="0 0 16 16">
<path d="M12.258 3h-8.51l-.083 2.46h.479c.26-1.544.758-1.783 2.693-1.845l.424-.013v7.827c0 .663-.144.82-1.3.923v.52h4.082v-.52c-1.162-.103-1.306-.26-1.306-.923V3.602l.431.013c1.934.062 2.434.301 2.693 1.846h.479L12.258 3z"/>
</svg>

After

Width:  |  Height:  |  Size: 357 B

View File

@ -116,7 +116,8 @@
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('repair', 'images/wrench.svg', 'home.repair.title', 'home.repair.desc')}"></div>
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('remove-blanks', 'images/blank-file.svg', 'home.removeBlanks.title', 'home.removeBlanks.desc')}"></div>
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('compare', 'images/scales.svg', 'home.compare.title', 'home.compare.desc')}"></div>
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('compare', 'images/add-page-numbers.svg', 'home.add-page-numbers.title', 'home.add-page-numbers.desc')}"></div>
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('add-page-numbers', 'images/add-page-numbers.svg', 'home.add-page-numbers.title', 'home.add-page-numbers.desc')}"></div>
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('auto-rename', 'images/fonts.svg', 'home.auto-rename.title', 'home.auto-rename.desc')}"></div>
</div>
</li>

View File

@ -69,7 +69,7 @@
<div th:replace="~{fragments/card :: card(id='scale-pages', cardTitle=#{home.scalePages.title}, cardText=#{home.scalePages.desc}, cardLink='scale-pages', svgPath='images/scale-pages.svg')}"></div>
<div th:replace="~{fragments/card :: card(id='add-page-numbers', cardTitle=#{home.add-page-numbers.title}, cardText=#{home.add-page-numbers.desc}, cardLink='add-page-numbers', svgPath='images/add-page-numbers.svg')}"></div>
<div th:replace="~{fragments/card :: card(id='auto-rename', cardTitle=#{home.auto-rename.title}, cardText=#{home.auto-rename.desc}, cardLink='auto-rename', svgPath='images/fonts.svg')}"></div>

View File

@ -0,0 +1,31 @@
<!DOCTYPE html>
<html th:lang="${#locale.toString()}" th:lang-direction="#{language.direction}" xmlns:th="http://www.thymeleaf.org">
<th:block th:insert="~{fragments/common :: head(title=#{auto-rename.title})}"></th:block>
<body>
<th:block th:insert="~{fragments/common :: game}"></th:block>
<div id="page-container">
<div id="content-wrap">
<div th:insert="~{fragments/navbar.html :: navbar}"></div>
<br> <br>
<div class="container">
<div class="row justify-content-center">
<div class="col-md-6">
<h2 th:text="#{auto-rename.header}"></h2>
<form method="post" enctype="multipart/form-data" th:action="@{auto-rename}">
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, accept='application/pdf')}"></div>
<br>
<button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{auto-rename.submit}"></button>
</form>
<p class="mt-3" th:text="#{auto-rename.credit}"></p>
</div>
</div>
</div>
</div>
<div th:insert="~{fragments/footer.html :: footer}"></div>
</div>
</body>
</html>