This commit is contained in:
Anthony Stirling
2025-10-23 11:20:16 +01:00
parent 5780b3a119
commit 4d9cf45009
20 changed files with 2628 additions and 176 deletions

View File

@@ -31,7 +31,8 @@ public class ConvertPdfJsonController {
description =
"Extracts PDF text, fonts, and metadata into an editable JSON structure that can be"
+ " transformed back into a PDF. Input:PDF Output:JSON Type:SISO")
public ResponseEntity<byte[]> convertPdfToJson(@ModelAttribute PDFFile request) throws Exception {
public ResponseEntity<byte[]> convertPdfToJson(@ModelAttribute PDFFile request)
throws Exception {
MultipartFile inputFile = request.getFileInput();
if (inputFile == null) {
throw ExceptionUtils.createNullArgumentException("fileInput");
@@ -44,8 +45,7 @@ public class ConvertPdfJsonController {
? Filenames.toSimpleFileName(originalName).replaceFirst("[.][^.]+$", "")
: "document";
String docName = baseName + ".json";
return WebResponseUtils.bytesToWebResponse(
jsonBytes, docName, MediaType.APPLICATION_JSON);
return WebResponseUtils.bytesToWebResponse(jsonBytes, docName, MediaType.APPLICATION_JSON);
}
@AutoJobPostMapping(consumes = "multipart/form-data", value = "/json/pdf")
@@ -55,7 +55,8 @@ public class ConvertPdfJsonController {
description =
"Rebuilds a PDF from the editable JSON structure generated by the PDF to JSON"
+ " endpoint. Input:JSON Output:PDF Type:SISO")
public ResponseEntity<byte[]> convertJsonToPdf(@ModelAttribute GeneralFile request) throws Exception {
public ResponseEntity<byte[]> convertJsonToPdf(@ModelAttribute GeneralFile request)
throws Exception {
MultipartFile jsonFile = request.getFileInput();
if (jsonFile == null) {
throw ExceptionUtils.createNullArgumentException("fileInput");

View File

@@ -0,0 +1,49 @@
package stirling.software.SPDF.model.json;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.annotation.JsonInclude;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@JsonInclude(JsonInclude.Include.NON_NULL)
public class PdfJsonCosValue {
public enum Type {
NULL,
BOOLEAN,
INTEGER,
FLOAT,
NAME,
STRING,
ARRAY,
DICTIONARY,
STREAM
}
private Type type;
/**
* Holds the decoded value for primitives (boolean, integer, float, name, string). For name
* values the stored value is the PDF name literal. For string values the content is Base64
* encoded to safely transport arbitrary binaries.
*/
private Object value;
/** Reference to nested values for arrays. */
private List<PdfJsonCosValue> items;
/** Reference to nested values for dictionaries. */
private Map<String, PdfJsonCosValue> entries;
/** Stream payload when {@code type == STREAM}. */
private PdfJsonStream stream;
}

View File

@@ -19,6 +19,9 @@ public class PdfJsonDocument {
private PdfJsonMetadata metadata;
/** Optional XMP metadata packet stored as Base64. */
private String xmpMetadata;
@Builder.Default private List<PdfJsonFont> fonts = new ArrayList<>();
@Builder.Default private List<PdfJsonPage> pages = new ArrayList<>();

View File

@@ -14,12 +14,42 @@ import lombok.NoArgsConstructor;
@JsonInclude(JsonInclude.Include.NON_NULL)
public class PdfJsonFont {
/** PDF resource name (e.g. F1) used as the primary identifier. */
private String id;
private String name;
/** Logical page number that owns this font resource. */
private Integer pageNumber;
/** Stable UID combining page number and resource for diagnostics. */
private String uid;
/** Reported PostScript/Base font name. */
private String baseName;
/** Declared subtype in the COS dictionary. */
private String subtype;
/** Encoding dictionary or name. */
private String encoding;
/** CID system info for Type0 fonts. */
private PdfJsonFontCidSystemInfo cidSystemInfo;
/** True when the original PDF embedded the font program. */
private Boolean embedded;
/** Font program bytes (TTF/OTF/CFF/PFB) encoded as Base64. */
private String program;
/** Hint describing the font program type (ttf, otf, cff, pfb, etc.). */
private String programFormat;
/** ToUnicode stream encoded as Base64 when present. */
private String toUnicode;
/** Mapped Standard 14 font name when available. */
private String standard14Name;
/** Font descriptor flags copied from the source document. */
private Integer fontDescriptorFlags;
private String base64Data;
}

View File

@@ -0,0 +1,20 @@
package stirling.software.SPDF.model.json;
import com.fasterxml.jackson.annotation.JsonInclude;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@JsonInclude(JsonInclude.Include.NON_NULL)
public class PdfJsonFontCidSystemInfo {
private String registry;
private String ordering;
private Integer supplement;
}

View File

@@ -23,4 +23,10 @@ public class PdfJsonPage {
private Integer rotation;
@Builder.Default private List<PdfJsonTextElement> textElements = new ArrayList<>();
/** Serialized representation of the page resources dictionary. */
private PdfJsonCosValue resources;
/** Raw content streams associated with the page, preserved for lossless round-tripping. */
@Builder.Default private List<PdfJsonStream> contentStreams = new ArrayList<>();
}

View File

@@ -0,0 +1,27 @@
package stirling.software.SPDF.model.json;
import java.util.Map;
import com.fasterxml.jackson.annotation.JsonInclude;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@JsonInclude(JsonInclude.Include.NON_NULL)
public class PdfJsonStream {
/**
* A dictionary of entries that describe the stream metadata (Filter, DecodeParms, etc). Each
* entry is represented using {@link PdfJsonCosValue} so nested structures are supported.
*/
private Map<String, PdfJsonCosValue> dictionary;
/** Raw stream bytes in Base64 form. Data is stored exactly as it appeared in the source PDF. */
private String rawData;
}

View File

@@ -20,6 +20,8 @@ public class PdfJsonTextElement {
private String text;
private String fontId;
private Float fontSize;
private Float fontMatrixSize;
private Float fontSizeInPt;
private Float x;
private Float y;
private Float width;

View File

@@ -7,6 +7,7 @@ logging.level.org.eclipse.jetty=WARN
#logging.level.org.opensaml=DEBUG
#logging.level.stirling.software.proprietary.security=DEBUG
logging.level.com.zaxxer.hikari=WARN
logging.level.stirling.software.SPDF.service.PdfJsonConversionService=TRACE
spring.jpa.open-in-view=false
server.forward-headers-strategy=NATIVE
server.error.path=/error