diff --git a/.github/workflows/PR-Demo-Comment.yml b/.github/workflows/PR-Demo-Comment.yml index c248d6572..188eac2a1 100644 --- a/.github/workflows/PR-Demo-Comment.yml +++ b/.github/workflows/PR-Demo-Comment.yml @@ -103,6 +103,7 @@ jobs: run: ./gradlew clean build env: DOCKER_ENABLE_SECURITY: false + STIRLING_PDF_DESKTOP_UI: false - name: Set up Docker Buildx uses: docker/setup-buildx-action@f7ce87c1d6bead3e36075b2ce75da1f6cc28aaca # v3.9.0 diff --git a/.github/workflows/push-docker.yml b/.github/workflows/push-docker.yml index a336fcd05..4b603e967 100644 --- a/.github/workflows/push-docker.yml +++ b/.github/workflows/push-docker.yml @@ -38,6 +38,7 @@ jobs: run: ./gradlew clean build env: DOCKER_ENABLE_SECURITY: false + STIRLING_PDF_DESKTOP_UI: false - name: Install cosign if: github.ref == 'refs/heads/master' diff --git a/Dockerfile b/Dockerfile index 45e28df3e..ccb8408a9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,47 +35,56 @@ ENV DOCKER_ENABLE_SECURITY=false \ HOME=/home/stirlingpdfuser \ PUID=1000 \ PGID=1000 \ - UMASK=022 + UMASK=022 \ + PYTHONPATH=/usr/lib/libreoffice/program:/opt/venv/lib/python3.12/site-packages \ + UNO_PATH=/usr/lib/libreoffice/program \ + URE_BOOTSTRAP=file:///usr/lib/libreoffice/program/fundamentalrc # JDK for app -RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \ - echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \ +RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \ + echo "@community https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \ echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \ apk upgrade --no-cache -a && \ apk add --no-cache \ - ca-certificates \ - tzdata \ - tini \ - bash \ - curl \ - qpdf \ - shadow \ - su-exec \ - openssl \ - openssl-dev \ - openjdk21-jre \ -# Doc conversion - gcompat \ - libc6-compat \ - libreoffice \ -# pdftohtml - poppler-utils \ -# OCR MY PDF (unpaper for descew and other advanced features) - tesseract-ocr-data-eng \ -# CV - py3-opencv \ -# python3/pip - python3 \ - py3-pip && \ -# uno unoconv and HTML - pip install --break-system-packages --no-cache-dir --upgrade unoconv WeasyPrint pdf2image pillow && \ + ca-certificates \ + tzdata \ + tini \ + bash \ + curl \ + qpdf \ + shadow \ + su-exec \ + openssl \ + openssl-dev \ + openjdk21-jre \ + # Doc conversion + gcompat \ + libc6-compat \ + libreoffice \ + # pdftohtml + poppler-utils \ + # OCR MY PDF (unpaper for descew and other advanced features) + tesseract-ocr-data-eng \ + # CV + py3-opencv \ + python3 \ + py3-pip \ + py3-pillow@testing \ + py3-pdf2image@testing && \ + python3 -m venv /opt/venv && \ + export PATH="/opt/venv/bin:$PATH" && \ + pip install --upgrade pip && \ + pip install --no-cache-dir --upgrade unoserver weasyprint && \ + ln -s /usr/lib/libreoffice/program/uno.py /opt/venv/lib/python3.12/site-packages/ && \ + ln -s /usr/lib/libreoffice/program/unohelper.py /opt/venv/lib/python3.12/site-packages/ && \ + ln -s /usr/lib/libreoffice/program /opt/venv/lib/python3.12/site-packages/LibreOffice && \ mv /usr/share/tessdata /usr/share/tessdata-original && \ mkdir -p $HOME /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \ fc-cache -f -v && \ chmod +x /scripts/* && \ chmod +x /scripts/init.sh && \ -# User permissions + # User permissions addgroup -S stirlingpdfgroup && adduser -S stirlingpdfuser -G stirlingpdfgroup && \ chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline && \ chown stirlingpdfuser:stirlingpdfgroup /app.jar @@ -84,4 +93,4 @@ EXPOSE 8080/tcp # Set user and run command ENTRYPOINT ["tini", "--", "/scripts/init.sh"] -CMD ["java", "-Dfile.encoding=UTF-8", "-jar", "/app.jar"] +CMD ["sh", "-c", "java -Dfile.encoding=UTF-8 -jar /app.jar & /opt/venv/bin/unoserver --port 2003 --interface 0.0.0.0"] \ No newline at end of file diff --git a/Dockerfile.fat b/Dockerfile.fat index 5ff49aabd..c7b3e203d 100644 --- a/Dockerfile.fat +++ b/Dockerfile.fat @@ -9,6 +9,7 @@ COPY . . # Build the application with DOCKER_ENABLE_SECURITY=false RUN DOCKER_ENABLE_SECURITY=true \ +STIRLING_PDF_DESKTOP_UI=false \ ./gradlew clean build # Main stage @@ -37,12 +38,15 @@ ENV DOCKER_ENABLE_SECURITY=false \ PGID=1000 \ UMASK=022 \ FAT_DOCKER=true \ - INSTALL_BOOK_AND_ADVANCED_HTML_OPS=false + INSTALL_BOOK_AND_ADVANCED_HTML_OPS=false \ + PYTHONPATH=/usr/lib/libreoffice/program:/opt/venv/lib/python3.12/site-packages \ + UNO_PATH=/usr/lib/libreoffice/program \ + URE_BOOTSTRAP=file:///usr/lib/libreoffice/program/fundamentalrc # JDK for app -RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \ - echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \ +RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \ + echo "@community https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \ echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \ apk upgrade --no-cache -a && \ apk add --no-cache \ @@ -65,14 +69,21 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et # OCR MY PDF (unpaper for descew and other advanced featues) qpdf \ tesseract-ocr-data-eng \ + font-terminus font-dejavu font-noto font-noto-cjk font-awesome font-noto-extra \ # CV py3-opencv \ -# python3/pip - python3 \ - py3-pip && \ -# uno unoconv and HTML - pip install --break-system-packages --no-cache-dir --upgrade unoconv WeasyPrint pdf2image pillow && \ + python3 \ + py3-pip \ + py3-pillow@testing \ + py3-pdf2image@testing && \ + python3 -m venv /opt/venv && \ + export PATH="/opt/venv/bin:$PATH" && \ + pip install --upgrade pip && \ + pip install --no-cache-dir --upgrade unoserver weasyprint && \ + ln -s /usr/lib/libreoffice/program/uno.py /opt/venv/lib/python3.12/site-packages/ && \ + ln -s /usr/lib/libreoffice/program/unohelper.py /opt/venv/lib/python3.12/site-packages/ && \ + ln -s /usr/lib/libreoffice/program /opt/venv/lib/python3.12/site-packages/LibreOffice && \ mv /usr/share/tessdata /usr/share/tessdata-original && \ mkdir -p $HOME /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \ fc-cache -f -v && \ @@ -84,7 +95,6 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et chown stirlingpdfuser:stirlingpdfgroup /app.jar EXPOSE 8080/tcp - # Set user and run command ENTRYPOINT ["tini", "--", "/scripts/init.sh"] -CMD ["java", "-Dfile.encoding=UTF-8", "-jar", "/app.jar"] +CMD ["sh", "-c", "java -Dfile.encoding=UTF-8 -jar /app.jar & /opt/venv/bin/unoserver --port 2003 --interface 0.0.0.0"] \ No newline at end of file diff --git a/build.gradle b/build.gradle index 4f0d2d57a..182e91f4b 100644 --- a/build.gradle +++ b/build.gradle @@ -15,9 +15,8 @@ plugins { import com.github.jk1.license.render.* ext { - springBootVersion = "3.4.1" + springBootVersion = "3.4.2" pdfboxVersion = "3.0.4" - logbackVersion = "1.5.7" imageioVersion = "3.12.0" lombokVersion = "1.18.36" bouncycastleVersion = "1.80" @@ -26,7 +25,7 @@ ext { } group = "stirling.software" -version = "0.41.0" +version = "0.42.0" java { // 17 is lowest but we support and recommend 21 @@ -294,14 +293,27 @@ configurations.all { } dependencies { + //tmp for security bumps + implementation 'ch.qos.logback:logback-core:1.5.15' + implementation 'ch.qos.logback:logback-classic:1.5.15' + + + // Exclude vulnerable BouncyCastle version used in tableau + configurations.all { + exclude group: 'org.bouncycastle', module: 'bcpkix-jdk15on' + exclude group: 'org.bouncycastle', module: 'bcutil-jdk15on' + exclude group: 'org.bouncycastle', module: 'bcmail-jdk15on' + } + if (System.getenv("STIRLING_PDF_DESKTOP_UI") != "false") { + implementation 'org.apache.commons:commons-compress:1.26.0' implementation "me.friwi:jcefmaven:127.3.1" implementation "org.openjfx:javafx-controls:21" implementation "org.openjfx:javafx-swing:21" } //security updates - implementation "org.springframework:spring-webmvc:6.2.2" + implementation "org.springframework:spring-webmvc:6.2.3" implementation("io.github.pixee:java-security-toolkit:1.2.1") @@ -320,8 +332,8 @@ dependencies { implementation "org.springframework.boot:spring-boot-starter-data-jpa:$springBootVersion" implementation "org.springframework.boot:spring-boot-starter-oauth2-client:$springBootVersion" - implementation "org.springframework.session:spring-session-core:$springBootVersion" - implementation "org.springframework:spring-jdbc:6.2.2" + implementation "org.springframework.session:spring-session-core:3.4.1" + implementation "org.springframework:spring-jdbc:6.2.3" implementation 'com.unboundid.product.scim2:scim2-sdk-client:2.3.5' // Don't upgrade h2database diff --git a/src/main/java/stirling/software/SPDF/UI/impl/DesktopBrowser.java b/src/main/java/stirling/software/SPDF/UI/impl/DesktopBrowser.java index ae5f76fd7..b221d0190 100644 --- a/src/main/java/stirling/software/SPDF/UI/impl/DesktopBrowser.java +++ b/src/main/java/stirling/software/SPDF/UI/impl/DesktopBrowser.java @@ -41,6 +41,7 @@ import me.friwi.jcefmaven.MavenCefAppHandlerAdapter; import me.friwi.jcefmaven.impl.progress.ConsoleProgressHandler; import stirling.software.SPDF.UI.WebBrowser; import stirling.software.SPDF.config.InstallationPathConfig; +import stirling.software.SPDF.utils.UIScaling; @Component @Slf4j @@ -215,7 +216,7 @@ public class DesktopBrowser implements WebBrowser { } }); - frame.setSize(1280, 768); + frame.setSize(UIScaling.scaleWidth(1280), UIScaling.scaleHeight(800)); frame.setLocationRelativeTo(null); loadIcon(); @@ -264,7 +265,9 @@ public class DesktopBrowser implements WebBrowser { frame.setOpacity(1.0f); frame.setUndecorated(false); frame.pack(); - frame.setSize(1280, 800); + frame.setSize( + UIScaling.scaleWidth(1280), + UIScaling.scaleHeight(800)); frame.setLocationRelativeTo(null); log.debug("Frame reconfigured"); diff --git a/src/main/java/stirling/software/SPDF/UI/impl/LoadingWindow.java b/src/main/java/stirling/software/SPDF/UI/impl/LoadingWindow.java index d6c0d27a0..b4479be39 100644 --- a/src/main/java/stirling/software/SPDF/UI/impl/LoadingWindow.java +++ b/src/main/java/stirling/software/SPDF/UI/impl/LoadingWindow.java @@ -1,12 +1,20 @@ package stirling.software.SPDF.UI.impl; import java.awt.*; +import java.io.BufferedReader; import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.TimeUnit; import javax.imageio.ImageIO; import javax.swing.*; +import io.github.pixee.security.BoundedLineReader; + import lombok.extern.slf4j.Slf4j; +import stirling.software.SPDF.utils.UIScaling; @Slf4j public class LoadingWindow extends JDialog { @@ -16,6 +24,13 @@ public class LoadingWindow extends JDialog { private final JLabel brandLabel; private long startTime; + private Timer stuckTimer; + private long stuckThreshold = 4000; + private long timeAt90Percent = -1; + private volatile Process explorerProcess; + private static final boolean IS_WINDOWS = + System.getProperty("os.name").toLowerCase().contains("win"); + public LoadingWindow(Frame parent, String initialUrl) { super(parent, "Initializing Stirling-PDF", true); startTime = System.currentTimeMillis(); @@ -41,12 +56,12 @@ public class LoadingWindow extends JDialog { if (is != null) { Image img = ImageIO.read(is); if (img != null) { - Image scaledImg = img.getScaledInstance(48, 48, Image.SCALE_SMOOTH); + Image scaledImg = UIScaling.scaleIcon(img, 48, 48); JLabel iconLabel = new JLabel(new ImageIcon(scaledImg)); iconLabel.setHorizontalAlignment(SwingConstants.CENTER); gbc.gridy = 0; mainPanel.add(iconLabel, gbc); - log.debug("Icon loaded and scaled successfully"); + log.info("Icon loaded and scaled successfully"); } } } @@ -83,7 +98,8 @@ public class LoadingWindow extends JDialog { setUndecorated(false); // Set size and position - setSize(400, 200); + setSize(UIScaling.scaleWidth(400), UIScaling.scaleHeight(200)); + setLocationRelativeTo(parent); setAlwaysOnTop(true); setProgress(0); @@ -94,6 +110,163 @@ public class LoadingWindow extends JDialog { System.currentTimeMillis() - startTime); } + private void checkAndRefreshExplorer() { + if (!IS_WINDOWS) { + return; + } + if (timeAt90Percent == -1) { + timeAt90Percent = System.currentTimeMillis(); + stuckTimer = + new Timer( + 1000, + e -> { + long currentTime = System.currentTimeMillis(); + if (currentTime - timeAt90Percent > stuckThreshold) { + try { + log.debug( + "Attempting Windows explorer refresh due to 90% stuck state"); + String currentDir = System.getProperty("user.dir"); + + // Store current explorer PIDs before we start new one + Set existingPids = new HashSet<>(); + ProcessBuilder listExplorer = + new ProcessBuilder( + "cmd", + "/c", + "wmic", + "process", + "where", + "name='explorer.exe'", + "get", + "ProcessId", + "/format:csv"); + Process process = listExplorer.start(); + BufferedReader reader = + new BufferedReader( + new InputStreamReader( + process.getInputStream())); + String line; + while ((line = + BoundedLineReader.readLine( + reader, 5_000_000)) + != null) { + if (line.matches(".*\\d+.*")) { // Contains numbers + String[] parts = line.trim().split(","); + if (parts.length >= 2) { + existingPids.add( + parts[parts.length - 1].trim()); + } + } + } + process.waitFor(2, TimeUnit.SECONDS); + + // Start new explorer + ProcessBuilder pb = + new ProcessBuilder( + "cmd", + "/c", + "start", + "/min", + "/b", + "explorer.exe", + currentDir); + pb.redirectErrorStream(true); + explorerProcess = pb.start(); + + // Schedule cleanup + Timer cleanupTimer = + new Timer( + 2000, + cleanup -> { + try { + // Find new explorer processes + ProcessBuilder findNewExplorer = + new ProcessBuilder( + "cmd", + "/c", + "wmic", + "process", + "where", + "name='explorer.exe'", + "get", + "ProcessId", + "/format:csv"); + Process newProcess = + findNewExplorer.start(); + BufferedReader newReader = + new BufferedReader( + new InputStreamReader( + newProcess + .getInputStream())); + String newLine; + while ((newLine = + BoundedLineReader + .readLine( + newReader, + 5_000_000)) + != null) { + if (newLine.matches( + ".*\\d+.*")) { + String[] parts = + newLine.trim() + .split(","); + if (parts.length >= 2) { + String pid = + parts[ + parts.length + - 1] + .trim(); + if (!existingPids + .contains( + pid)) { + log.debug( + "Found new explorer.exe with PID: " + + pid); + ProcessBuilder + killProcess = + new ProcessBuilder( + "taskkill", + "/PID", + pid, + "/F"); + killProcess + .redirectErrorStream( + true); + Process killResult = + killProcess + .start(); + killResult.waitFor( + 2, + TimeUnit + .SECONDS); + log.debug( + "Explorer process terminated: " + + pid); + } + } + } + } + newProcess.waitFor( + 2, TimeUnit.SECONDS); + } catch (Exception ex) { + log.error( + "Error cleaning up Windows explorer process", + ex); + } + }); + cleanupTimer.setRepeats(false); + cleanupTimer.start(); + stuckTimer.stop(); + } catch (Exception ex) { + log.error("Error refreshing Windows explorer", ex); + } + } + }); + stuckTimer.setRepeats(true); + stuckTimer.start(); + } + } + public void setProgress(final int progress) { SwingUtilities.invokeLater( () -> { @@ -115,11 +288,23 @@ public class LoadingWindow extends JDialog { // Add thread state logging Thread currentThread = Thread.currentThread(); - log.debug( + log.info( "Current thread state - Name: {}, State: {}, Priority: {}", currentThread.getName(), currentThread.getState(), currentThread.getPriority()); + + if (validProgress >= 90 && validProgress < 95) { + checkAndRefreshExplorer(); + } else { + // Reset the timer if we move past 95% + if (validProgress >= 95) { + if (stuckTimer != null) { + stuckTimer.stop(); + } + timeAt90Percent = -1; + } + } } progressBar.setValue(validProgress); @@ -145,7 +330,7 @@ public class LoadingWindow extends JDialog { statusLabel.setText(validStatus); // Log UI state when status changes - log.debug( + log.info( "UI State - Window visible: {}, Progress: {}%, Status: {}", isVisible(), progressBar.getValue(), validStatus); diff --git a/src/main/java/stirling/software/SPDF/config/ExternalAppDepConfig.java b/src/main/java/stirling/software/SPDF/config/ExternalAppDepConfig.java index 9d1ac1fcc..886244ff4 100644 --- a/src/main/java/stirling/software/SPDF/config/ExternalAppDepConfig.java +++ b/src/main/java/stirling/software/SPDF/config/ExternalAppDepConfig.java @@ -21,9 +21,9 @@ public class ExternalAppDepConfig { { put("soffice", List.of("LibreOffice")); - put("weasyprint", List.of("Weasyprint")); + put("/opt/venv/bin/weasyprint", List.of("Weasyprint")); put("pdftohtml", List.of("Pdftohtml")); - put("unoconv", List.of("Unoconv")); + put("/opt/venv/bin/unoconvert", List.of("Unoconv")); put("qpdf", List.of("qpdf")); put("tesseract", List.of("tesseract")); } @@ -101,9 +101,9 @@ public class ExternalAppDepConfig { checkDependencyAndDisableGroup("tesseract"); checkDependencyAndDisableGroup("soffice"); checkDependencyAndDisableGroup("qpdf"); - checkDependencyAndDisableGroup("weasyprint"); + checkDependencyAndDisableGroup("/opt/venv/bin/weasyprint"); checkDependencyAndDisableGroup("pdftohtml"); - checkDependencyAndDisableGroup("unoconv"); + checkDependencyAndDisableGroup("/opt/venv/bin/unoconvert"); // Special handling for Python/OpenCV dependencies boolean pythonAvailable = isCommandAvailable("python3") || isCommandAvailable("python"); if (!pythonAvailable) { diff --git a/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java b/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java index 77601b5d0..35f6cf07f 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java @@ -174,7 +174,38 @@ public class RearrangePagesPDFController { return newPageOrderZeroBased; } - private List processSortTypes(String sortTypes, int totalPages) { + private List duplicate(int totalPages, String pageOrder) { + List newPageOrder = new ArrayList<>(); + int duplicateCount; + + try { + // Parse the duplicate count from pageOrder + duplicateCount = + pageOrder != null && !pageOrder.isEmpty() + ? Integer.parseInt(pageOrder.trim()) + : 2; // Default to 2 if not specified + } catch (NumberFormatException e) { + log.error("Invalid duplicate count specified", e); + duplicateCount = 2; // Default to 2 if invalid input + } + + // Validate duplicate count + if (duplicateCount < 1) { + duplicateCount = 2; // Default to 2 if invalid input + } + + // For each page in the document + for (int pageNum = 0; pageNum < totalPages; pageNum++) { + // Add the current page index duplicateCount times + for (int dupCount = 0; dupCount < duplicateCount; dupCount++) { + newPageOrder.add(pageNum); + } + } + + return newPageOrder; + } + + private List processSortTypes(String sortTypes, int totalPages, String pageOrder) { try { SortTypes mode = SortTypes.valueOf(sortTypes.toUpperCase()); switch (mode) { @@ -196,6 +227,8 @@ public class RearrangePagesPDFController { return removeLast(totalPages); case REMOVE_FIRST_AND_LAST: return removeFirstAndLast(totalPages); + case DUPLICATE: + return duplicate(totalPages, pageOrder); default: throw new IllegalArgumentException("Unsupported custom mode"); } @@ -223,8 +256,10 @@ public class RearrangePagesPDFController { String[] pageOrderArr = pageOrder != null ? pageOrder.split(",") : new String[0]; int totalPages = document.getNumberOfPages(); List newPageOrder; - if (sortType != null && sortType.length() > 0) { - newPageOrder = processSortTypes(sortType, totalPages); + if (sortType != null + && sortType.length() > 0 + && !"custom".equals(sortType.toLowerCase())) { + newPageOrder = processSortTypes(sortType, totalPages, pageOrder); } else { newPageOrder = GeneralUtils.parsePageList(pageOrderArr, totalPages, false); } diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java index 50a251e4e..b7bb699c5 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java @@ -61,13 +61,13 @@ public class ConvertOfficeController { List command = new ArrayList<>( Arrays.asList( - "unoconv", - "-vvv", - "-f", + "/opt/venv/bin/unoconvert", + "--port", + "2003", + "--convert-to", "pdf", - "-o", - tempOutputFile.toString(), - tempInputFile.toString())); + tempInputFile.toString(), + tempOutputFile.toString())); ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE) .runCommandWithOutputHandling(command); diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java index d6ae1a478..dccc4dffe 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java @@ -65,7 +65,7 @@ public class ConvertWebsiteToPDF { // Prepare the WeasyPrint command List command = new ArrayList<>(); - command.add("weasyprint"); + command.add("/opt/venv/bin/weasyprint"); command.add(URL); command.add(tempOutputFile.toString()); diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ExtractCSVController.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ExtractCSVController.java index f0a4c267f..7f8e70878 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ExtractCSVController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ExtractCSVController.java @@ -1,7 +1,14 @@ package stirling.software.SPDF.controller.api.converters; +import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.io.StringWriter; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.QuoteMode; @@ -18,18 +25,18 @@ import org.springframework.web.bind.annotation.RestController; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; - -import stirling.software.SPDF.model.api.extract.PDFFilePage; +import lombok.extern.slf4j.Slf4j; +import stirling.software.SPDF.model.api.PDFWithPageNums; import stirling.software.SPDF.pdf.FlexibleCSVWriter; import technology.tabula.ObjectExtractor; import technology.tabula.Page; import technology.tabula.Table; import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; -import technology.tabula.writers.Writer; @RestController @RequestMapping("/api/v1/convert") @Tag(name = "Convert", description = "Convert APIs") +@Slf4j public class ExtractCSVController { @PostMapping(value = "/pdf/csv", consumes = "multipart/form-data") @@ -37,31 +44,80 @@ public class ExtractCSVController { summary = "Extracts a CSV document from a PDF", description = "This operation takes an input PDF file and returns CSV file of whole page. Input:PDF Output:CSV Type:SISO") - public ResponseEntity PdfToCsv(@ModelAttribute PDFFilePage form) throws Exception { - StringWriter writer = new StringWriter(); + public ResponseEntity pdfToCsv(@ModelAttribute PDFWithPageNums form) throws Exception { + String baseName = getBaseName(form.getFileInput().getOriginalFilename()); + List csvEntries = new ArrayList<>(); + try (PDDocument document = Loader.loadPDF(form.getFileInput().getBytes())) { - CSVFormat format = - CSVFormat.EXCEL.builder().setEscape('"').setQuoteMode(QuoteMode.ALL).build(); - Writer csvWriter = new FlexibleCSVWriter(format); + List pages = form.getPageNumbersList(document, true); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - try (ObjectExtractor extractor = new ObjectExtractor(document)) { - Page page = extractor.extract(form.getPageId()); - List tables = sea.extract(page); - csvWriter.write(writer, tables); + CSVFormat format = CSVFormat.EXCEL.builder() + .setEscape('"') + .setQuoteMode(QuoteMode.ALL) + .build(); + + for (int pageNum : pages) { + try (ObjectExtractor extractor = new ObjectExtractor(document)) { + log.info("{}",pageNum); + Page page = extractor.extract(pageNum); + List
tables = sea.extract(page); + + for (int i = 0; i < tables.size(); i++) { + StringWriter sw = new StringWriter(); + FlexibleCSVWriter csvWriter = new FlexibleCSVWriter(format); + csvWriter.write(sw, Collections.singletonList(tables.get(i))); + + String entryName = generateEntryName(baseName, pageNum, i + 1); + csvEntries.add(new CsvEntry(entryName, sw.toString())); + } + } + } + + if (csvEntries.isEmpty()) { + return ResponseEntity.noContent().build(); + } else if (csvEntries.size() == 1) { + return createCsvResponse(csvEntries.get(0), baseName); + } else { + return createZipResponse(csvEntries, baseName); } } - - HttpHeaders headers = new HttpHeaders(); - headers.setContentDisposition( - ContentDisposition.builder("attachment") - .filename( - form.getFileInput() - .getOriginalFilename() - .replaceFirst("[.][^.]+$", "") - + "_extracted.csv") - .build()); - headers.setContentType(MediaType.parseMediaType("text/csv")); - - return ResponseEntity.ok().headers(headers).body(writer.toString()); } + + private ResponseEntity createZipResponse(List entries, String baseName) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (ZipOutputStream zipOut = new ZipOutputStream(baos)) { + for (CsvEntry entry : entries) { + ZipEntry zipEntry = new ZipEntry(entry.filename()); + zipOut.putNextEntry(zipEntry); + zipOut.write(entry.content().getBytes(StandardCharsets.UTF_8)); + zipOut.closeEntry(); + } + } + + HttpHeaders headers = new HttpHeaders(); + headers.setContentDisposition(ContentDisposition.builder("attachment") + .filename(baseName + "_extracted.zip").build()); + headers.setContentType(MediaType.parseMediaType("application/zip")); + + return ResponseEntity.ok().headers(headers).body(baos.toByteArray()); + } + + private ResponseEntity createCsvResponse(CsvEntry entry, String baseName) { + HttpHeaders headers = new HttpHeaders(); + headers.setContentDisposition(ContentDisposition.builder("attachment") + .filename(baseName + "_extracted.csv").build()); + headers.setContentType(MediaType.parseMediaType("text/csv")); + + return ResponseEntity.ok().headers(headers).body(entry.content()); + } + + private String generateEntryName(String baseName, int pageNum, int tableIndex) { + return String.format("%s_p%d_t%d.csv", baseName, pageNum, tableIndex); + } + + private String getBaseName(String filename) { + return filename.replaceFirst("[.][^.]+$", ""); + } + + private record CsvEntry(String filename, String content) {} } diff --git a/src/main/java/stirling/software/SPDF/model/SortTypes.java b/src/main/java/stirling/software/SPDF/model/SortTypes.java index a7a699b87..14d12b5a9 100644 --- a/src/main/java/stirling/software/SPDF/model/SortTypes.java +++ b/src/main/java/stirling/software/SPDF/model/SortTypes.java @@ -1,6 +1,7 @@ package stirling.software.SPDF.model; public enum SortTypes { + CUSTOM, REVERSE_ORDER, DUPLEX_SORT, BOOKLET_SORT, @@ -10,4 +11,5 @@ public enum SortTypes { REMOVE_FIRST, REMOVE_LAST, REMOVE_FIRST_AND_LAST, + DUPLICATE } diff --git a/src/main/java/stirling/software/SPDF/model/api/PDFWithPageNums.java b/src/main/java/stirling/software/SPDF/model/api/PDFWithPageNums.java index 4eaabe87f..1e7ae22af 100644 --- a/src/main/java/stirling/software/SPDF/model/api/PDFWithPageNums.java +++ b/src/main/java/stirling/software/SPDF/model/api/PDFWithPageNums.java @@ -8,6 +8,7 @@ import org.apache.pdfbox.pdmodel.PDDocument; import io.swagger.v3.oas.annotations.Hidden; import io.swagger.v3.oas.annotations.media.Schema; +import io.swagger.v3.oas.annotations.media.Schema.RequiredMode; import lombok.Data; import lombok.EqualsAndHashCode; @@ -25,7 +26,9 @@ public class PDFWithPageNums extends PDFFile { description = "The pages to select, Supports ranges (e.g., '1,3,5-9'), or 'all' or functions in the" + " format 'an+b' where 'a' is the multiplier of the page number 'n', and 'b' is a" - + " constant (e.g., '2n+1', '3n', '6n-5')\"") + + " constant (e.g., '2n+1', '3n', '6n-5')\"", + defaultValue = "all", + requiredMode = RequiredMode.NOT_REQUIRED) private String pageNumbers; @Hidden @@ -41,9 +44,9 @@ public class PDFWithPageNums extends PDFFile { } @Hidden - public List getPageNumbersList(PDDocument doc, boolean zeroCount) { + public List getPageNumbersList(PDDocument doc, boolean oneBased) { int pageCount = 0; pageCount = doc.getNumberOfPages(); - return GeneralUtils.parsePageList(pageNumbers, pageCount, zeroCount); + return GeneralUtils.parsePageList(pageNumbers, pageCount, oneBased); } } diff --git a/src/main/java/stirling/software/SPDF/model/api/extract/PDFFilePage.java b/src/main/java/stirling/software/SPDF/model/api/extract/PDFFilePage.java deleted file mode 100644 index faf955c64..000000000 --- a/src/main/java/stirling/software/SPDF/model/api/extract/PDFFilePage.java +++ /dev/null @@ -1,15 +0,0 @@ -package stirling.software.SPDF.model.api.extract; - -import io.swagger.v3.oas.annotations.media.Schema; - -import lombok.Data; -import lombok.EqualsAndHashCode; -import stirling.software.SPDF.model.api.PDFFile; - -@Data -@EqualsAndHashCode(callSuper = true) -public class PDFFilePage extends PDFFile { - - @Schema(description = "Number of chosen page", type = "number") - private int pageId; -} diff --git a/src/main/java/stirling/software/SPDF/model/api/general/RearrangePagesRequest.java b/src/main/java/stirling/software/SPDF/model/api/general/RearrangePagesRequest.java index 7ba2d84ce..7cda530e1 100644 --- a/src/main/java/stirling/software/SPDF/model/api/general/RearrangePagesRequest.java +++ b/src/main/java/stirling/software/SPDF/model/api/general/RearrangePagesRequest.java @@ -15,6 +15,8 @@ public class RearrangePagesRequest extends PDFWithPageNums { implementation = SortTypes.class, description = "The custom mode for page rearrangement. Valid values are:\n" + + "CUSTOM: Uses order defined in PageNums " + + "DUPLICATE: Duplicate pages n times (if Page order defined as 4, then duplicates each page 4 times)" + "REVERSE_ORDER: Reverses the order of all pages.\n" + "DUPLEX_SORT: Sorts pages as if all fronts were scanned then all backs in reverse (1, n, 2, n-1, ...). " + "BOOKLET_SORT: Arranges pages for booklet printing (last, first, second, second last, ...).\n" diff --git a/src/main/java/stirling/software/SPDF/model/api/misc/AddPageNumbersRequest.java b/src/main/java/stirling/software/SPDF/model/api/misc/AddPageNumbersRequest.java index 9f3b5266f..4776ddc08 100644 --- a/src/main/java/stirling/software/SPDF/model/api/misc/AddPageNumbersRequest.java +++ b/src/main/java/stirling/software/SPDF/model/api/misc/AddPageNumbersRequest.java @@ -1,6 +1,7 @@ package stirling.software.SPDF.model.api.misc; import io.swagger.v3.oas.annotations.media.Schema; +import io.swagger.v3.oas.annotations.media.Schema.RequiredMode; import lombok.Data; import lombok.EqualsAndHashCode; @@ -11,24 +12,49 @@ import stirling.software.SPDF.model.api.PDFWithPageNums; public class AddPageNumbersRequest extends PDFWithPageNums { @Schema( - description = "Custom margin: small/medium/large", - allowableValues = {"small", "medium", "large"}) + description = "Custom margin: small/medium/large/x-large", + allowableValues = {"small", "medium", "large", "x-large"}, + defaultValue = "medium", + requiredMode = RequiredMode.NOT_REQUIRED) private String customMargin; + @Schema( + description = "Font size for page numbers", + minimum = "1", + requiredMode = RequiredMode.REQUIRED) private float fontSize; + + @Schema( + description = "Font type for page numbers", + allowableValues = {"helvetica", "courier", "times"}, + requiredMode = RequiredMode.REQUIRED) private String fontType; - @Schema(description = "Position: 1 of 9 positions", minimum = "1", maximum = "9") + @Schema( + description = + "Position: 1-9 representing positions on the page (1=top-left, 5=center, 9=bottom-right)", + minimum = "1", + maximum = "9", + requiredMode = RequiredMode.REQUIRED) private int position; - @Schema(description = "Starting number", minimum = "1") + @Schema( + description = "Starting number for page numbering", + minimum = "1", + requiredMode = RequiredMode.REQUIRED) private int startingNumber; - @Schema(description = "Which pages to number, default all") + @Schema( + description = "Which pages to number (e.g. '1,3-5,7' or 'all')", + defaultValue = "all", + requiredMode = RequiredMode.NOT_REQUIRED) private String pagesToNumber; @Schema( description = - "Custom text: defaults to just number but can have things like \"Page {n} of {p}\"") + "Custom text pattern. Available variables: {n}=current page number, {total}=total pages, {filename}=original filename", + example = "Page {n} of {total}", + defaultValue = "{n}", + requiredMode = RequiredMode.NOT_REQUIRED) private String customText; } diff --git a/src/main/java/stirling/software/SPDF/utils/FileToPdf.java b/src/main/java/stirling/software/SPDF/utils/FileToPdf.java index e46bcb16a..c28cda6e7 100644 --- a/src/main/java/stirling/software/SPDF/utils/FileToPdf.java +++ b/src/main/java/stirling/software/SPDF/utils/FileToPdf.java @@ -50,7 +50,7 @@ public class FileToPdf { List command = new ArrayList<>(); if (!htmlFormatsInstalled) { - command.add("weasyprint"); + command.add("/opt/venv/bin/weasyprint"); command.add("-e"); command.add("utf-8"); command.add("-v"); diff --git a/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java b/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java index 15d866e29..ea4b235b3 100644 --- a/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java +++ b/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java @@ -210,8 +210,7 @@ public class GeneralUtils { result.addAll(handlePart(page, totalPages, offset)); } } - return new ArrayList<>( - new java.util.LinkedHashSet<>(result)); // Remove duplicates and maintain order + return result; } public static List evaluateNFunc(String expression, int maxValue) { diff --git a/src/main/java/stirling/software/SPDF/utils/UIScaling.java b/src/main/java/stirling/software/SPDF/utils/UIScaling.java new file mode 100644 index 000000000..fe1364c9b --- /dev/null +++ b/src/main/java/stirling/software/SPDF/utils/UIScaling.java @@ -0,0 +1,67 @@ +package stirling.software.SPDF.utils; + +import java.awt.*; + +import javax.swing.*; + +public class UIScaling { + private static final double BASE_RESOLUTION_WIDTH = 1920.0; + private static final double BASE_RESOLUTION_HEIGHT = 1080.0; + + public static double getWidthScaleFactor() { + Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize(); + return screenSize.getWidth() / BASE_RESOLUTION_WIDTH; + } + + public static double getHeightScaleFactor() { + Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize(); + return screenSize.getHeight() / BASE_RESOLUTION_HEIGHT; + } + + public static int scaleWidth(int value) { + return (int) Math.round(value * getWidthScaleFactor()); + } + + public static int scaleHeight(int value) { + return (int) Math.round(value * getHeightScaleFactor()); + } + + public static Dimension scale(Dimension dim) { + return new Dimension(scaleWidth(dim.width), scaleHeight(dim.height)); + } + + public static Insets scale(Insets insets) { + return new Insets( + scaleHeight(insets.top), + scaleWidth(insets.left), + scaleHeight(insets.bottom), + scaleWidth(insets.right)); + } + + public static Font scaleFont(Font font) { + // For fonts, we'll use the smaller scale factor to ensure readability + double scaleFactor = Math.min(getWidthScaleFactor(), getHeightScaleFactor()); + return font.deriveFont((float) (font.getSize() * scaleFactor)); + } + + // Utility method for aspect ratio aware icon scaling + public static Image scaleIcon(Image icon, int targetWidth, int targetHeight) { + if (icon == null) return null; + + double widthScale = getWidthScaleFactor(); + double heightScale = getHeightScaleFactor(); + + int scaledWidth = (int) Math.round(targetWidth * widthScale); + int scaledHeight = (int) Math.round(targetHeight * heightScale); + + // Maintain aspect ratio for icons + double aspectRatio = (double) icon.getWidth(null) / icon.getHeight(null); + if (scaledWidth / scaledHeight > aspectRatio) { + scaledWidth = (int) (scaledHeight * aspectRatio); + } else { + scaledHeight = (int) (scaledWidth / aspectRatio); + } + + return icon.getScaledInstance(scaledWidth, scaledHeight, Image.SCALE_SMOOTH); + } +} diff --git a/src/main/resources/messages_en_GB.properties b/src/main/resources/messages_en_GB.properties index 6e6b234b2..3f7320f8a 100644 --- a/src/main/resources/messages_en_GB.properties +++ b/src/main/resources/messages_en_GB.properties @@ -992,6 +992,7 @@ pdfOrganiser.mode.7=Remove First pdfOrganiser.mode.8=Remove Last pdfOrganiser.mode.9=Remove First and Last pdfOrganiser.mode.10=Odd-Even Merge +pdfOrganiser.mode.11=Duplicate all pages pdfOrganiser.placeholder=(e.g. 1,3,2 or 4-8,2,10-12 or 2n-1) diff --git a/src/main/resources/static/js/pages/pdf-to-csv.js b/src/main/resources/static/js/pages/pdf-to-csv.js index 6be3c2ed6..9a06aac5b 100644 --- a/src/main/resources/static/js/pages/pdf-to-csv.js +++ b/src/main/resources/static/js/pages/pdf-to-csv.js @@ -17,7 +17,7 @@ let fileInput = document.getElementById('fileInput-input'); let file; let pdfDoc = null; -let pageId = document.getElementById('pageId'); +let pageNumbers = document.getElementById('pageNumbers'); let currentPage = 1; let totalPages = 0; @@ -31,7 +31,7 @@ let timeId = null; // timeout id for resizing canvases event btn1Object.addEventListener('click', function (e) { if (currentPage !== 1) { currentPage = currentPage - 1; - pageId.value = currentPage; + pageNumbers.value = currentPage; if (file.type === 'application/pdf') { let reader = new FileReader(); @@ -52,7 +52,7 @@ btn1Object.addEventListener('click', function (e) { btn2Object.addEventListener('click', function (e) { if (currentPage !== totalPages) { currentPage = currentPage + 1; - pageId.value = currentPage; + pageNumbers.value = currentPage; if (file.type === 'application/pdf') { let reader = new FileReader(); @@ -81,7 +81,7 @@ function renderPageFromFile(file) { totalPages = pdf.numPages; renderPage(currentPage); }); - pageId.value = currentPage; + pageNumbers.value = currentPage; }; reader.readAsArrayBuffer(file); document.getElementById('pagination-button-container').style.display = 'flex'; diff --git a/src/main/resources/templates/convert/pdf-to-csv.html b/src/main/resources/templates/convert/pdf-to-csv.html index b9e27b72c..3272766a1 100644 --- a/src/main/resources/templates/convert/pdf-to-csv.html +++ b/src/main/resources/templates/convert/pdf-to-csv.html @@ -17,7 +17,7 @@
- +
diff --git a/src/main/resources/templates/pdf-organizer.html b/src/main/resources/templates/pdf-organizer.html index 8d698869d..746a6bafd 100644 --- a/src/main/resources/templates/pdf-organizer.html +++ b/src/main/resources/templates/pdf-organizer.html @@ -36,6 +36,7 @@ + @@ -51,7 +52,7 @@