Merge branch 'main' into main

This commit is contained in:
Calvin Li 2025-07-14 15:03:06 -04:00 committed by GitHub
commit 357db930fb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
29 changed files with 2621 additions and 742 deletions

View File

@ -119,7 +119,9 @@
"EditorConfig.EditorConfig", // EditorConfig support for maintaining consistent coding styles
"ms-azuretools.vscode-docker", // Docker extension for Visual Studio Code
"charliermarsh.ruff", // Ruff extension for Ruff language support
"github.vscode-github-actions" // GitHub Actions extension for Visual Studio Code
"github.vscode-github-actions", // GitHub Actions extension for Visual Studio Code
"stylelint.vscode-stylelint", // Stylelint extension for CSS and SCSS linting
"redhat.vscode-yaml" // YAML extension for Visual Studio Code
]
}
},

View File

@ -2,37 +2,46 @@ version: 1
labels:
- label: "Bugfix"
title: '^fix:.*'
title: '^fix(\([^)]*\))?:|^fix:.*'
- label: "enhancement"
title: '^feat:.*'
title: '^feat(\([^)]*\))?:|^feat:.*'
- label: "build"
title: '^build:.*'
title: '^build(\([^)]*\))?:|^build:.*'
- label: "chore"
title: '^chore:.*'
title: '^chore(\([^)]*\))?:|^chore:.*'
- label: "ci"
title: '^ci:.*'
title: '^ci(\([^)]*\))?:|^ci:.*'
- label: "ci"
title: '^.*\(ci\):.*'
- label: "perf"
title: '^perf:.*'
title: '^perf(\([^)]*\))?:|^perf:.*'
- label: "refactor"
title: '^refactor:.*'
title: '^refactor(\([^)]*\))?:|^refactor:.*'
- label: "revert"
title: '^revert:.*'
title: '^revert(\([^)]*\))?:|^revert:.*'
- label: "style"
title: '^style:.*'
title: '^style(\([^)]*\))?:|^style:.*'
- label: "Documentation"
title: '^docs:.*'
title: '^docs(\([^)]*\))?:|^docs:.*'
- label: "dependencies"
title: '^deps(\([^)]*\))?:|^deps:.*'
- label: "dependencies"
title: '^.*\(deps\):.*'
- label: 'API'
title: '.*openapi.*'
title: '.*openapi.*|.*swagger.*|.*api.*'
- label: 'Translation'
files:
@ -81,6 +90,7 @@ labels:
- 'stirling-pdf/src/main/java/stirling/software/SPDF/controller/web/MetricsController.java'
- 'stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/.*'
- 'stirling-pdf/src/main/java/stirling/software/SPDF/model/api/.*'
- 'stirling-pdf/src/main/java/stirling/software/SPDF/service/ApiDocService.java'
- 'proprietary/src/main/java/stirling/software/proprietary/security/controller/api/.*'
- 'scripts/png_to_webp.py'
- 'split_photos.py'
@ -116,6 +126,7 @@ labels:
- '.pre-commit-config'
- '.github/workflows/pre_commit.yml'
- 'devGuide/.*'
- 'devTools/.*'
- label: 'Test'
files:

3
.github/labels.yml vendored
View File

@ -175,3 +175,6 @@
description: "This PR changes 1000+ lines ignoring generated files."
- name: "to research"
color: "FBCA04"
- name: "pr-deployed"
color: "00FF00"
description: "Pull request has been deployed to a test environment"

View File

@ -6,20 +6,18 @@ on:
permissions:
contents: read
issues: write # Required for adding reactions to comments
pull-requests: read # Required for reading PR information
pull-requests: read
jobs:
check-comment:
runs-on: ubuntu-latest
permissions:
issues: write
pull-requests: read
if: |
github.event.issue.pull_request &&
(
contains(github.event.comment.body, 'prdeploy') ||
contains(github.event.comment.body, 'deploypr')
contains(github.event.comment.body, 'prdeploy') ||
contains(github.event.comment.body, 'deploypr')
)
&&
(
@ -47,10 +45,14 @@ jobs:
with:
egress-policy: audit
# Generate GitHub App token
- name: Generate GitHub App Token
id: generate-token
uses: actions/create-github-app-token@df432ceedc7162793a195dd1713ff69aefc7379e # v2.0.6
- name: Checkout PR
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Setup GitHub App Bot
if: github.actor != 'dependabot[bot]'
id: setup-bot
uses: ./.github/actions/setup-bot
continue-on-error: true
with:
app-id: ${{ secrets.GH_APP_ID }}
private-key: ${{ secrets.GH_APP_PRIVATE_KEY }}
@ -123,7 +125,7 @@ jobs:
id: add-eyes-reaction
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
github-token: ${{ steps.generate-token.outputs.token }}
github-token: ${{ steps.setup-bot.outputs.token }}
script: |
console.log(`Adding eyes reaction to comment ID: ${context.payload.comment.id}`);
try {
@ -145,8 +147,8 @@ jobs:
needs: check-comment
runs-on: ubuntu-latest
permissions:
contents: read
issues: write
pull-requests: write
steps:
- name: Harden Runner
@ -154,9 +156,14 @@ jobs:
with:
egress-policy: audit
- name: Generate GitHub App Token
id: generate-token
uses: actions/create-github-app-token@df432ceedc7162793a195dd1713ff69aefc7379e # v2.0.6
- name: Checkout PR
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Setup GitHub App Bot
if: github.actor != 'dependabot[bot]'
id: setup-bot
uses: ./.github/actions/setup-bot
continue-on-error: true
with:
app-id: ${{ secrets.GH_APP_ID }}
private-key: ${{ secrets.GH_APP_PRIVATE_KEY }}
@ -166,7 +173,7 @@ jobs:
with:
repository: ${{ needs.check-comment.outputs.pr_repository }}
ref: ${{ needs.check-comment.outputs.pr_ref }}
token: ${{ secrets.GITHUB_TOKEN }}
token: ${{ steps.setup-bot.outputs.token }}
- name: Set up JDK
uses: actions/setup-java@c5195efecf7bdfc987ee8bae7a71cb8b11521c00 # v4.7.1
@ -188,12 +195,6 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
- name: Get version number
id: versionNumber
run: |
VERSION=$(grep "^version =" build.gradle | awk -F'"' '{print $2}')
echo "versionNumber=$VERSION" >> $GITHUB_OUTPUT
- name: Login to Docker Hub
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
@ -297,7 +298,7 @@ jobs:
if: success()
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
github-token: ${{ steps.generate-token.outputs.token }}
github-token: ${{ steps.setup-bot.outputs.token }}
script: |
console.log(`Adding rocket reaction to comment ID: ${{ needs.check-comment.outputs.comment_id }}`);
try {
@ -313,11 +314,26 @@ jobs:
console.error(error);
}
// add label to PR
const prNumber = ${{ needs.check-comment.outputs.pr_number }};
try {
await github.rest.issues.addLabels({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
labels: ['pr-deployed']
});
console.log(`Added 'pr-deployed' label to PR #${prNumber}`);
} catch (error) {
console.error(`Failed to add label to PR: ${error.message}`);
console.error(error);
}
- name: Add failure reaction to comment
if: failure()
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
github-token: ${{ steps.generate-token.outputs.token }}
github-token: ${{ steps.setup-bot.outputs.token }}
script: |
console.log(`Adding -1 reaction to comment ID: ${{ needs.check-comment.outputs.comment_id }}`);
try {
@ -337,7 +353,7 @@ jobs:
if: success()
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
github-token: ${{ steps.generate-token.outputs.token }}
github-token: ${{ steps.setup-bot.outputs.token }}
script: |
const { GITHUB_REPOSITORY } = process.env;
const [repoOwner, repoName] = GITHUB_REPOSITORY.split('/');
@ -357,3 +373,11 @@ jobs:
issue_number: prNumber,
body: commentBody
});
- name: Cleanup temporary files
if: always()
run: |
echo "Cleaning up temporary files..."
rm -f ../private.key docker-compose.yml
echo "Cleanup complete."
continue-on-error: true

View File

@ -1,7 +1,7 @@
name: PR Deployment cleanup
on:
pull_request:
pull_request_target:
types: [opened, synchronize, reopened, closed]
permissions:
@ -13,11 +13,11 @@ env:
jobs:
cleanup:
if: github.event.action == 'closed'
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
if: github.event.action == 'closed'
issues: write
steps:
- name: Harden Runner
@ -25,13 +25,84 @@ jobs:
with:
egress-policy: audit
- name: Checkout PR
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Setup GitHub App Bot
if: github.actor != 'dependabot[bot]'
id: setup-bot
uses: ./.github/actions/setup-bot
continue-on-error: true
with:
app-id: ${{ secrets.GH_APP_ID }}
private-key: ${{ secrets.GH_APP_PRIVATE_KEY }}
- name: Remove 'pr-deployed' label if present
id: remove-label-comment
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
github-token: ${{ steps.setup-bot.outputs.token }}
script: |
const prNumber = ${{ github.event.pull_request.number }};
const owner = context.repo.owner;
const repo = context.repo.repo;
// Hole alle Labels auf dem PR
const { data: labels } = await github.rest.issues.listLabelsOnIssue({
owner,
repo,
issue_number: prNumber
});
const hasLabel = labels.some(label => label.name === 'pr-deployed');
if (hasLabel) {
console.log("Label 'pr-deployed' found. Removing...");
await github.rest.issues.removeLabel({
owner,
repo,
issue_number: prNumber,
name: 'pr-deployed'
});
} else {
console.log("Label 'pr-deployed' not found. Nothing to do.");
}
// Find existing comment
const comments = await github.rest.issues.listComments({
owner,
repo,
issue_number: prNumber
});
const deploymentComments = comments.data.filter(c =>
c.body?.includes("## 🚀 PR Test Deployment") &&
c.user?.type === "Bot"
);
if (deploymentComments.length > 0) {
for (const comment of deploymentComments) {
await github.rest.issues.deleteComment({
owner,
repo,
comment_id: comment.id
});
console.log(`Deleted deployment comment (ID: ${comment.id})`);
}
} else {
console.log("No matching deployment comments found.");
}
core.setOutput('present', hasLabel || deploymentComment ? 'true' : 'false');
- name: Set up SSH
if: steps.remove-label-comment.outputs.present == 'true'
run: |
mkdir -p ~/.ssh/
echo "${{ secrets.VPS_SSH_KEY }}" > ../private.key
sudo chmod 600 ../private.key
- name: Cleanup PR deployment
if: steps.remove-label-comment.outputs.present == 'true'
id: cleanup
run: |
ssh -i ../private.key -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -T ${{ secrets.VPS_USERNAME }}@${{ secrets.VPS_HOST }} << 'ENDSSH'
@ -57,3 +128,11 @@ jobs:
echo "NO_CLEANUP_NEEDED"
fi
ENDSSH
- name: Cleanup temporary files
if: always()
run: |
echo "Cleaning up temporary files..."
rm -f ../private.key
echo "Cleanup complete."
continue-on-error: true

View File

@ -29,7 +29,7 @@ jobs:
- uses: gradle/actions/setup-gradle@ac638b010cf58a27ee6c972d7336334ccaf61c96 # v4.4.1
- name: Generate Swagger documentation
run: ./gradlew generateOpenApiDocs
run: ./gradlew :stirling-pdf:generateOpenApiDocs
- name: Upload Swagger Documentation to SwaggerHub
run: ./gradlew swaggerhubUpload

View File

@ -17,5 +17,7 @@
"GitHub.vscode-pull-request-github", // GitHub Pull Requests extension for Visual Studio Code
"charliermarsh.ruff", // Ruff code formatter for Python to follow the Ruff Style Guide
"yzhang.markdown-all-in-one", // Markdown All-in-One extension for enhanced Markdown editing
"stylelint.vscode-stylelint", // Stylelint extension for CSS and SCSS linting
"redhat.vscode-yaml", // YAML extension for Visual Studio Code
]
}

View File

@ -9,6 +9,9 @@
"[jsonc]": {
"editor.defaultFormatter": "vscode.json-language-features"
},
"[css]": {
"editor.defaultFormatter": "stylelint.vscode-stylelint"
},
"[json]": {
"editor.defaultFormatter": "vscode.json-language-features"
},
@ -27,6 +30,9 @@
"[gradle]": {
"editor.defaultFormatter": "vscjava.vscode-gradle"
},
"[yaml]": {
"editor.defaultFormatter": "redhat.vscode-yaml"
},
"java.compile.nullAnalysis.mode": "automatic",
"java.configuration.updateBuildConfiguration": "interactive",
"java.format.enabled": true,
@ -119,6 +125,7 @@
"html.format.indentHandlebars": true,
"html.format.preserveNewLines": true,
"html.format.maxPreserveNewLines": 2,
"stylelint.configFile": "devTools/.stylelintrc.json",
"java.project.sourcePaths": [
"stirling-pdf/src/main/java",
"common/src/main/java",

View File

@ -135,7 +135,7 @@ Stirling-PDF currently supports 40 languages!
| Indonesian (Bahasa Indonesia) (id_ID) | ![63%](https://geps.dev/progress/63) |
| Irish (Gaeilge) (ga_IE) | ![70%](https://geps.dev/progress/70) |
| Italian (Italiano) (it_IT) | ![98%](https://geps.dev/progress/98) |
| Japanese (日本語) (ja_JP) | ![70%](https://geps.dev/progress/70) |
| Japanese (日本語) (ja_JP) | ![95%](https://geps.dev/progress/95) |
| Korean (한국어) (ko_KR) | ![69%](https://geps.dev/progress/69) |
| Norwegian (Norsk) (no_NB) | ![67%](https://geps.dev/progress/67) |
| Persian (فارسی) (fa_IR) | ![66%](https://geps.dev/progress/66) |
@ -145,7 +145,7 @@ Stirling-PDF currently supports 40 languages!
| Romanian (Română) (ro_RO) | ![59%](https://geps.dev/progress/59) |
| Russian (Русский) (ru_RU) | ![70%](https://geps.dev/progress/70) |
| Serbian Latin alphabet (Srpski) (sr_LATN_RS) | ![97%](https://geps.dev/progress/97) |
| Simplified Chinese (简体中文) (zh_CN) | ![90%](https://geps.dev/progress/90) |
| Simplified Chinese (简体中文) (zh_CN) | ![95%](https://geps.dev/progress/95) |
| Slovakian (Slovensky) (sk_SK) | ![53%](https://geps.dev/progress/53) |
| Slovenian (Slovenščina) (sl_SI) | ![73%](https://geps.dev/progress/73) |
| Spanish (Español) (es_ES) | ![75%](https://geps.dev/progress/75) |

View File

@ -6,7 +6,7 @@ plugins {
id "org.springdoc.openapi-gradle-plugin" version "1.9.0"
id "io.swagger.swaggerhub" version "1.3.2"
id "edu.sc.seis.launch4j" version "3.0.6"
id "com.diffplug.spotless" version "7.0.4"
id "com.diffplug.spotless" version "7.1.0"
id "com.github.jk1.dependency-license-report" version "2.9"
//id "nebula.lint" version "19.0.3"
id "org.panteleyev.jpackageplugin" version "1.7.3"
@ -161,6 +161,44 @@ subprojects {
tasks.named("processResources") {
dependsOn(rootProject.tasks.writeVersion)
}
if (name == 'stirling-pdf') {
apply plugin: 'org.springdoc.openapi-gradle-plugin'
openApi {
apiDocsUrl = "http://localhost:8080/v1/api-docs"
outputDir = file("$projectDir")
outputFileName = "SwaggerDoc.json"
waitTimeInSeconds = 60 // Increase the wait time to 60 seconds
}
tasks.named("forkedSpringBootRun") {
dependsOn(":common:jar")
dependsOn(":proprietary:jar")
}
tasks.register("copySwaggerDoc", Copy) {
doNotTrackState("Writes SwaggerDoc.json to project root")
from(layout.projectDirectory.file("SwaggerDoc.json"))
into(rootProject.projectDir)
dependsOn("generateOpenApiDocs")
}
tasks.register("cleanSwaggerInBuild", Delete) {
doNotTrackState("Cleans up SwaggerDoc.json in build directory")
delete(layout.projectDirectory.file("SwaggerDoc.json"))
dependsOn("copySwaggerDoc")
}
tasks.named("copySwaggerDoc") {
finalizedBy("cleanSwaggerInBuild")
}
tasks.named("generateOpenApiDocs") {
finalizedBy("copySwaggerDoc")
doNotTrackState("OpenAPI plugin writes outside build directory")
}
}
}
tasks.withType(JavaCompile).configureEach {
@ -169,6 +207,7 @@ tasks.withType(JavaCompile).configureEach {
}
licenseReport {
projects = [project]
renderers = [new JsonReportRenderer()]
allowedLicensesFile = new File("$projectDir/allowed-licenses.json")
}
@ -204,13 +243,6 @@ sourceSets {
}
}
openApi {
apiDocsUrl = "http://localhost:8080/v1/api-docs"
outputDir = file("$projectDir")
outputFileName = "SwaggerDoc.json"
waitTimeInSeconds = 60 // Increase the wait time to 60 seconds
}
// Configure the forked spring boot run task to properly delegate to the stirling-pdf module
tasks.named('forkedSpringBootRun') {
dependsOn ':stirling-pdf:bootRun'
@ -565,9 +597,6 @@ tasks.register('printMacVersion') {
}
}
tasks.named('generateOpenApiDocs') {
doNotTrackState("Tracking state is not supported for this task")
}
tasks.named('bootRun') {
group = 'application'
description = 'Delegates to :stirling-pdf:bootRun'

View File

@ -21,7 +21,7 @@ dependencies {
api 'com.googlecode.owasp-java-html-sanitizer:owasp-java-html-sanitizer:20240325.1'
api 'com.fathzer:javaluator:3.0.6'
api 'com.posthog.java:posthog:1.2.0'
api 'org.apache.commons:commons-lang3:3.17.0'
api 'org.apache.commons:commons-lang3:3.18.0'
api 'com.drewnoakes:metadata-extractor:2.19.0' // Image metadata extractor
api 'com.vladsch.flexmark:flexmark-html2md-converter:0.64.8'
api "org.apache.pdfbox:pdfbox:$pdfboxVersion"

View File

@ -43,6 +43,7 @@ public class AutoJobAspect {
// This aspect will run before any audit aspects due to @Order(0)
// Extract parameters from the request and annotation
boolean async = Boolean.parseBoolean(request.getParameter("async"));
log.debug("AutoJobAspect: Processing {} {} with async={}", request.getMethod(), request.getRequestURI(), async);
long timeout = autoJobPostMapping.timeout();
int retryCount = autoJobPostMapping.retryCount();
boolean trackProgress = autoJobPostMapping.trackProgress();
@ -54,19 +55,8 @@ public class AutoJobAspect {
retryCount,
trackProgress);
// Copy and process arguments
// In a test environment, we might need to update the original objects for verification
boolean isTestEnvironment = false;
try {
isTestEnvironment = Class.forName("org.junit.jupiter.api.Test") != null;
} catch (ClassNotFoundException e) {
// Not in a test environment
}
Object[] args =
isTestEnvironment
? processArgsInPlace(joinPoint.getArgs(), async)
: copyAndProcessArgs(joinPoint.getArgs(), async);
// Process arguments in-place to avoid type mismatch issues
Object[] args = processArgsInPlace(joinPoint.getArgs(), async);
// Extract queueable and resourceWeight parameters and validate
boolean queueable = autoJobPostMapping.queueable();
@ -229,79 +219,10 @@ public class AutoJobAspect {
resourceWeight);
}
/**
* Creates deep copies of arguments when needed to avoid mutating the original objects
* Particularly important for PDFFile objects that might be reused by Spring
*
* @param originalArgs The original arguments
* @param async Whether this is an async operation
* @return A new array with safely processed arguments
*/
private Object[] copyAndProcessArgs(Object[] originalArgs, boolean async) {
if (originalArgs == null || originalArgs.length == 0) {
return originalArgs;
}
Object[] processedArgs = new Object[originalArgs.length];
// Copy all arguments
for (int i = 0; i < originalArgs.length; i++) {
Object arg = originalArgs[i];
if (arg instanceof PDFFile pdfFile) {
// Create a copy of PDFFile to avoid mutating the original
// Using direct property access instead of reflection for better performance
PDFFile pdfFileCopy = new PDFFile();
pdfFileCopy.setFileId(pdfFile.getFileId());
pdfFileCopy.setFileInput(pdfFile.getFileInput());
// Case 1: fileId is provided but no fileInput
if (pdfFileCopy.getFileInput() == null && pdfFileCopy.getFileId() != null) {
try {
log.debug("Using fileId {} to get file content", pdfFileCopy.getFileId());
MultipartFile file = fileStorage.retrieveFile(pdfFileCopy.getFileId());
pdfFileCopy.setFileInput(file);
} catch (Exception e) {
throw new RuntimeException(
"Failed to resolve file by ID: " + pdfFileCopy.getFileId(), e);
}
}
// Case 2: For async requests, we need to make a copy of the MultipartFile
else if (async && pdfFileCopy.getFileInput() != null) {
try {
log.debug("Making persistent copy of uploaded file for async processing");
MultipartFile originalFile = pdfFileCopy.getFileInput();
String fileId = fileStorage.storeFile(originalFile);
// Store the fileId for later reference
pdfFileCopy.setFileId(fileId);
// Replace the original MultipartFile with our persistent copy
MultipartFile persistentFile = fileStorage.retrieveFile(fileId);
pdfFileCopy.setFileInput(persistentFile);
log.debug("Created persistent file copy with fileId: {}", fileId);
} catch (IOException e) {
throw new RuntimeException(
"Failed to create persistent copy of uploaded file", e);
}
}
processedArgs[i] = pdfFileCopy;
} else {
// For non-PDFFile objects, just pass the original reference
// If other classes need copy-on-write, add them here
processedArgs[i] = arg;
}
}
return processedArgs;
}
/**
* Processes arguments in-place for testing purposes This is similar to our original
* implementation before introducing copy-on-write It's only used in test environments to
* maintain test compatibility
* Processes arguments in-place to handle file resolution and async file persistence.
* This approach avoids type mismatch issues by modifying the original objects directly.
*
* @param originalArgs The original arguments
* @param async Whether this is an async operation

View File

@ -6,6 +6,8 @@ import java.util.Collections;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
import com.fasterxml.jackson.annotation.JsonIgnore;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@ -28,6 +30,7 @@ public class JobResult {
private String error;
/** List of result files for jobs that produce files */
@JsonIgnore
private List<ResultFile> resultFiles;
/** Time when the job was created */

View File

@ -1,6 +1,5 @@
package stirling.software.common.service;
import io.github.pixee.security.ZipSecurity;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
@ -21,6 +20,8 @@ import org.springframework.http.MediaType;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.ZipSecurity;
import jakarta.annotation.PreDestroy;
import lombok.extern.slf4j.Slf4j;
@ -361,7 +362,8 @@ public class TaskManager {
MultipartFile zipFile = fileStorage.retrieveFile(zipFileId);
try (ZipInputStream zipIn =
ZipSecurity.createHardenedInputStream(new ByteArrayInputStream(zipFile.getBytes()))) {
ZipSecurity.createHardenedInputStream(
new ByteArrayInputStream(zipFile.getBytes()))) {
ZipEntry entry;
while ((entry = zipIn.getNextEntry()) != null) {
if (!entry.isDirectory()) {

View File

@ -95,10 +95,10 @@ class TaskManagerTest {
assertTrue(result.isComplete());
assertTrue(result.hasFiles());
assertFalse(result.hasMultipleFiles());
var resultFiles = result.getAllResultFiles();
assertEquals(1, resultFiles.size());
ResultFile resultFile = resultFiles.get(0);
assertEquals(fileId, resultFile.getFileId());
assertEquals(originalFileName, resultFile.getFileName());
@ -180,7 +180,7 @@ class TaskManagerTest {
// Arrange
// Mock fileStorage.getFileSize for file operations
when(fileStorage.getFileSize("file-id")).thenReturn(1024L);
// 1. Create active job
String activeJobId = "active-job";
taskManager.createTask(activeJobId);
@ -232,7 +232,7 @@ class TaskManagerTest {
LocalDateTime oldTime = LocalDateTime.now().minusHours(1);
ReflectionTestUtils.setField(oldJob, "completedAt", oldTime);
ReflectionTestUtils.setField(oldJob, "complete", true);
// Create a ResultFile and set it using the new approach
ResultFile resultFile = ResultFile.builder()
.fileId("file-id")

47
devGuide/STYLELINT.md Normal file
View File

@ -0,0 +1,47 @@
# STYLELINT.md
## Usage
Apply Stylelint to your project's CSS with the following steps:
1. **NPM Script**
- Go to directory: `devTools/`
- Add Stylelint & stylistic/stylelint-plugin
```bash
npm install --save-dev stylelint stylelint-config-standard
npm install --save-dev @stylistic/stylelint-plugin
```
- Add a script entry to your `package.json`:
```jsonc
{
"scripts": {
"lint:css:check": "stylelint \"../stirling-pdf/src/main/**/*.css\" \"../proprietary/src/main/resources/static/css/*.css\" --config .stylelintrc.json",
"lint:css:fix": "stylelint \"../stirling-pdf/src/main/**/*.css\" \"../proprietary/src/main/resources/static/css/*.css\" --config .stylelintrc.json --fix"
}
}
```
- Run the linter:
```bash
npm run lint:css:check
npm run lint:css:fix
```
2. **CLI Usage**
- Lint all CSS files:
```bash
npx stylelint ../stirling-pdf/src/main/**/*.css ../proprietary/src/main/resources/static/css/*.css
```
- Lint a single file:
```bash
npx stylelint ../proprietary/src/main/resources/static/css/audit-dashboard.css
```
- Apply automatic fixes:
```bash
npx stylelint "../stirling-pdf/src/main/**/*.css" "../proprietary/src/main/resources/static/css/*.css" --fix
```
For full configuration options and rule customization, refer to the official documentation: [https://stylelint.io](https://stylelint.io)

View File

@ -0,0 +1,69 @@
{
"extends": [
"stylelint-config-standard"
],
"plugins": [
"@stylistic/stylelint-plugin"
],
"ignoreFiles": [
"stirling-pdf/src/main/resources/static/css/bootstrap*.css",
"stirling-pdf/src/main/resources/static/css/cookieconsent.css",
"stirling-pdf/src/main/resources/static/css/cookieconsentCustomisation.css",
"stirling-pdf/src/main/resources/static/css/prism.css",
"stirling-pdf/src/main/resources/static/pdfjs-legacy/**/*.css"
],
"rules": {
"property-no-vendor-prefix": null,
"value-no-vendor-prefix": null,
"selector-no-vendor-prefix": null,
"media-feature-name-no-vendor-prefix": null,
"value-keyword-case": null,
"color-function-notation": null,
"alpha-value-notation": null,
"color-function-alias-notation": null,
"selector-class-pattern": null,
"selector-id-pattern": null,
"declaration-block-no-redundant-longhand-properties": null,
"media-feature-range-notation": "prefix",
"selector-attribute-quotes": null,
"at-rule-no-vendor-prefix": null,
"selector-not-notation": null,
"no-duplicate-selectors": [
true,
{
"disableFix": true
}
],
"comment-word-disallowed-list": null,
"custom-property-pattern": null,
"no-descending-specificity": null,
"keyframes-name-pattern": null,
"comment-empty-line-before": [
"always",
{
"ignore": [
"stylelint-commands"
]
}
],
"block-no-empty": true,
"@stylistic/declaration-bang-space-after": "never",
"@stylistic/declaration-bang-space-before": "always",
"@stylistic/declaration-block-trailing-semicolon": "always",
"@stylistic/function-comma-space-after": [
"always-single-line",
{
"disableFix": false
}
],
"@stylistic/function-comma-space-before": "never",
"@stylistic/color-hex-case": "lower",
"@stylistic/declaration-block-semicolon-newline-after": "always",
"@stylistic/indentation": [
2,
{
"baseIndentLevel": 2
}
]
}
}

1598
devTools/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

13
devTools/package.json Normal file
View File

@ -0,0 +1,13 @@
{
"name": "stirling-pdf",
"version": "1.0.0",
"scripts": {
"lint:css:check": "stylelint \"../stirling-pdf/src/main/**/*.css\" \"../proprietary/src/main/resources/static/css/*.css\" --config .stylelintrc.json",
"lint:css:fix": "stylelint \"../stirling-pdf/src/main/**/*.css\" \"../proprietary/src/main/resources/static/css/*.css\" --config .stylelintrc.json --fix"
},
"devDependencies": {
"@stylistic/stylelint-plugin": "^3.1.3",
"stylelint": "^16.21.1",
"stylelint-config-standard": "^38.0.0"
}
}

View File

@ -37,7 +37,7 @@ dependencies {
implementation 'org.thymeleaf.extras:thymeleaf-extras-springsecurity5:3.1.3.RELEASE'
api 'io.micrometer:micrometer-registry-prometheus'
implementation 'com.unboundid.product.scim2:scim2-sdk-client:2.3.5'
implementation 'com.unboundid.product.scim2:scim2-sdk-client:4.0.0'
runtimeOnly 'com.h2database:h2:2.3.232' // Don't upgrade h2database
runtimeOnly 'org.postgresql:postgresql:42.7.7'
constraints {

View File

@ -529,7 +529,6 @@ ignore = [
[ja_JP]
ignore = [
'lang.jav',
'language.direction',
]

View File

@ -29,7 +29,8 @@ public class CleanUrlInterceptor implements HandlerInterceptor {
"type",
"principal",
"startDate",
"endDate");
"endDate",
"async");
@Override
public boolean preHandle(

View File

@ -165,12 +165,6 @@
"moduleLicense": "Apache-2.0",
"moduleLicenseUrl": "https://www.apache.org/licenses/LICENSE-2.0.txt"
},
{
"moduleName": "com.google.errorprone:error_prone_annotations",
"moduleVersion": "2.11.0",
"moduleLicense": "Apache 2.0",
"moduleLicenseUrl": "http://www.apache.org/licenses/LICENSE-2.0.txt"
},
{
"moduleName": "com.google.errorprone:error_prone_annotations",
"moduleUrl": "https://errorprone.info/error_prone_annotations",
@ -639,13 +633,6 @@
"moduleLicense": "Apache License, Version 2.0",
"moduleLicenseUrl": "https://www.apache.org/licenses/LICENSE-2.0.txt"
},
{
"moduleName": "io.swagger.core.v3:swagger-annotations-jakarta",
"moduleUrl": "https://github.com/swagger-api/swagger-core/modules/swagger-annotations",
"moduleVersion": "2.2.30",
"moduleLicense": "Apache License, Version 2.0",
"moduleLicenseUrl": "https://www.apache.org/licenses/LICENSE-2.0"
},
{
"moduleName": "io.swagger.core.v3:swagger-annotations-jakarta",
"moduleUrl": "https://github.com/swagger-api/swagger-core/modules/swagger-annotations",
@ -653,13 +640,6 @@
"moduleLicense": "Apache License, Version 2.0",
"moduleLicenseUrl": "https://www.apache.org/licenses/LICENSE-2.0"
},
{
"moduleName": "io.swagger.core.v3:swagger-core-jakarta",
"moduleUrl": "https://github.com/swagger-api/swagger-core/modules/swagger-core",
"moduleVersion": "2.2.30",
"moduleLicense": "Apache License, Version 2.0",
"moduleLicenseUrl": "https://www.apache.org/licenses/LICENSE-2.0"
},
{
"moduleName": "io.swagger.core.v3:swagger-core-jakarta",
"moduleUrl": "https://github.com/swagger-api/swagger-core/modules/swagger-core",
@ -667,13 +647,6 @@
"moduleLicense": "Apache License, Version 2.0",
"moduleLicenseUrl": "https://www.apache.org/licenses/LICENSE-2.0"
},
{
"moduleName": "io.swagger.core.v3:swagger-models-jakarta",
"moduleUrl": "https://github.com/swagger-api/swagger-core/modules/swagger-models",
"moduleVersion": "2.2.30",
"moduleLicense": "Apache License, Version 2.0",
"moduleLicenseUrl": "https://www.apache.org/licenses/LICENSE-2.0"
},
{
"moduleName": "io.swagger.core.v3:swagger-models-jakarta",
"moduleUrl": "https://github.com/swagger-api/swagger-core/modules/swagger-models",
@ -744,13 +717,6 @@
"moduleLicense": "GPL2 w/ CPE",
"moduleLicenseUrl": "https://www.gnu.org/software/classpath/license.html"
},
{
"moduleName": "jakarta.servlet:jakarta.servlet-api",
"moduleUrl": "https://www.eclipse.org",
"moduleVersion": "6.1.0",
"moduleLicense": "GPL2 w/ CPE",
"moduleLicenseUrl": "https://www.gnu.org/software/classpath/license.html"
},
{
"moduleName": "jakarta.transaction:jakarta.transaction-api",
"moduleUrl": "https://projects.eclipse.org/projects/ee4j.jta",
@ -889,13 +855,6 @@
"moduleLicense": "Apache-2.0",
"moduleLicenseUrl": "https://www.apache.org/licenses/LICENSE-2.0.txt"
},
{
"moduleName": "org.apache.commons:commons-text",
"moduleUrl": "https://commons.apache.org/proper/commons-text",
"moduleVersion": "1.10.0",
"moduleLicense": "Apache License, Version 2.0",
"moduleLicenseUrl": "https://www.apache.org/licenses/LICENSE-2.0.txt"
},
{
"moduleName": "org.apache.commons:commons-text",
"moduleUrl": "https://commons.apache.org/proper/commons-text",
@ -1018,13 +977,6 @@
"moduleLicense": "The Apache Software License, Version 2.0",
"moduleLicenseUrl": "https://www.apache.org/licenses/LICENSE-2.0.txt"
},
{
"moduleName": "org.bouncycastle:bcpkix-jdk18on",
"moduleUrl": "https://www.bouncycastle.org/java.html",
"moduleVersion": "1.72",
"moduleLicense": "Bouncy Castle Licence",
"moduleLicenseUrl": "https://www.bouncycastle.org/licence.html"
},
{
"moduleName": "org.bouncycastle:bcpkix-jdk18on",
"moduleUrl": "https://www.bouncycastle.org/download/bouncy-castle-java/",
@ -1039,13 +991,6 @@
"moduleLicense": "Bouncy Castle Licence",
"moduleLicenseUrl": "https://www.bouncycastle.org/licence.html"
},
{
"moduleName": "org.bouncycastle:bcutil-jdk18on",
"moduleUrl": "https://www.bouncycastle.org/java.html",
"moduleVersion": "1.72",
"moduleLicense": "Bouncy Castle Licence",
"moduleLicenseUrl": "https://www.bouncycastle.org/licence.html"
},
{
"moduleName": "org.bouncycastle:bcutil-jdk18on",
"moduleUrl": "https://www.bouncycastle.org/download/bouncy-castle-java/",
@ -1562,13 +1507,6 @@
"moduleLicense": "Apache License, Version 2.0",
"moduleLicenseUrl": "https://www.apache.org/licenses/LICENSE-2.0"
},
{
"moduleName": "org.springframework.boot:spring-boot-devtools",
"moduleUrl": "https://spring.io/projects/spring-boot",
"moduleVersion": "3.5.3",
"moduleLicense": "Apache License, Version 2.0",
"moduleLicenseUrl": "https://www.apache.org/licenses/LICENSE-2.0"
},
{
"moduleName": "org.springframework.boot:spring-boot-starter",
"moduleUrl": "https://spring.io/projects/spring-boot",

View File

@ -65,17 +65,23 @@ premium:
key: 00000000-0000-0000-0000-000000000000
enabled: false # Enable license key checks for pro/enterprise features
proFeatures:
database: true # Enable database features
SSOAutoLogin: false
CustomMetadata:
autoUpdateMetadata: false # set to 'true' to automatically update metadata with below values
author: username # supports text such as 'John Doe' or types such as username to autopopulate with user's username
creator: Stirling-PDF # supports text such as 'Company-PDF'
producer: Stirling-PDF # supports text such as 'Company-PDF'
autoUpdateMetadata: false
author: username
creator: Stirling-PDF
producer: Stirling-PDF
googleDrive:
enabled: false
clientId: ''
apiKey: ''
appId: ''
enterpriseFeatures:
audit:
enabled: true # Enable audit logging
level: 2 # Audit logging level: 0=OFF, 1=BASIC, 2=STANDARD, 3=VERBOSE
retentionDays: 90 # Number of days to retain audit logs
mail:
enabled: false # set to 'true' to enable sending emails
@ -86,7 +92,7 @@ mail:
from: '' # sender email address
legal:
termsAndConditions: https://www.stirlingpdf.com/terms # URL to the terms and conditions of your application (e.g. https://example.com/terms). Empty string to disable or filename to load from local file in static folder
termsAndConditions: https://www.stirlingpdf.com/terms-and-conditions # URL to the terms and conditions of your application (e.g. https://example.com/terms). Empty string to disable or filename to load from local file in static folder
privacyPolicy: https://www.stirlingpdf.com/privacy-policy # URL to the privacy policy of your application (e.g. https://example.com/privacy). Empty string to disable or filename to load from local file in static folder
accessibilityStatement: '' # URL to the accessibility statement of your application (e.g. https://example.com/accessibility). Empty string to disable or filename to load from local file in static folder
cookiePolicy: '' # URL to the cookie policy of your application (e.g. https://example.com/cookie). Empty string to disable or filename to load from local file in static folder
@ -120,6 +126,15 @@ system:
weasyprint: '' # Defaults to /opt/venv/bin/weasyprint
unoconvert: '' # Defaults to /opt/venv/bin/unoconvert
fileUploadLimit: '' # Defaults to "". No limit when string is empty. Set a number, between 0 and 999, followed by one of the following strings to set a limit. "KB", "MB", "GB".
tempFileManagement:
baseTmpDir: '' # Defaults to java.io.tmpdir/stirling-pdf
libreofficeDir: '' # Defaults to tempFileManagement.baseTmpDir/libreoffice
systemTempDir: '' # Only used if cleanupSystemTemp is true
prefix: stirling-pdf- # Prefix for temp file names
maxAgeHours: 24 # Maximum age in hours before temp files are cleaned up
cleanupIntervalMinutes: 30 # How often to run cleanup (in minutes)
startupCleanup: true # Clean up old temp files on startup
cleanupSystemTemp: false # Whether to clean broader system temp directory
ui:
appName: '' # application's visible name
@ -150,6 +165,8 @@ processExecutor:
weasyPrintSessionLimit: 16
installAppSessionLimit: 1
calibreSessionLimit: 1
ghostscriptSessionLimit: 8
ocrMyPdfSessionLimit: 2
timeoutMinutes: # Process executor timeout in minutes
libreOfficetimeoutMinutes: 30
pdfToHtmltimeoutMinutes: 20
@ -158,3 +175,6 @@ processExecutor:
installApptimeoutMinutes: 60
calibretimeoutMinutes: 30
tesseractTimeoutMinutes: 30
qpdfTimeoutMinutes: 30
ghostscriptTimeoutMinutes: 30
ocrMyPdfTimeoutMinutes: 30

View File

@ -1,21 +1,25 @@
import os
def before_all(context):
context.endpoint = None
context.request_data = None
context.files = {}
context.response = None
def after_scenario(context, scenario):
if hasattr(context, 'files'):
if hasattr(context, "files"):
for file in context.files.values():
file.close()
if os.path.exists('response_file'):
os.remove('response_file')
if hasattr(context, 'file_name') and os.path.exists(context.file_name):
if os.path.exists("response_file"):
os.remove("response_file")
if hasattr(context, "file_name") and os.path.exists(context.file_name):
os.remove(context.file_name)
# Remove any temporary files
for temp_file in os.listdir('.'):
if temp_file.startswith('genericNonCustomisableName') or temp_file.startswith('temp_image_'):
for temp_file in os.listdir("."):
if temp_file.startswith("genericNonCustomisableName") or temp_file.startswith(
"temp_image_"
):
os.remove(temp_file)

View File

@ -1,132 +1,132 @@
@example @general
Feature: API Validation
@positive @password
Scenario: Remove password
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the pdf is encrypted with password "password123"
And the request data includes
| parameter | value |
| password | password123 |
When I send the API request to the endpoint "/api/v1/security/remove-password"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response PDF is not passworded
And the response status code should be 200
@positive @password
Scenario: Remove password
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the pdf is encrypted with password "password123"
And the request data includes
| parameter | value |
| password | password123 |
When I send the API request to the endpoint "/api/v1/security/remove-password"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response PDF is not passworded
And the response status code should be 200
@negative @password
Scenario: Remove password wrong password
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the pdf is encrypted with password "password123"
And the request data includes
| parameter | value |
| password | wrongPassword |
When I send the API request to the endpoint "/api/v1/security/remove-password"
Then the response status code should be 500
And the response should contain error message "Internal Server Error"
@negative @password
Scenario: Remove password wrong password
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the pdf is encrypted with password "password123"
And the request data includes
| parameter | value |
| password | wrongPassword |
When I send the API request to the endpoint "/api/v1/security/remove-password"
Then the response status code should be 500
And the response should contain error message "Internal Server Error"
@positive @info
Scenario: Get info
Given I generate a PDF file as "fileInput"
When I send the API request to the endpoint "/api/v1/security/get-info-on-pdf"
Then the response content type should be "application/json"
And the response file should have size greater than 100
And the response status code should be 200
@positive @info
Scenario: Get info
Given I generate a PDF file as "fileInput"
When I send the API request to the endpoint "/api/v1/security/get-info-on-pdf"
Then the response content type should be "application/json"
And the response file should have size greater than 100
And the response status code should be 200
@positive @password
Scenario: Add password
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the request data includes
| parameter | value |
| password | password123 |
When I send the API request to the endpoint "/api/v1/security/add-password"
Then the response content type should be "application/pdf"
And the response file should have size greater than 100
And the response PDF is passworded
And the response status code should be 200
@positive @password
Scenario: Add password
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the request data includes
| parameter | value |
| password | password123 |
When I send the API request to the endpoint "/api/v1/security/add-password"
Then the response content type should be "application/pdf"
And the response file should have size greater than 100
And the response PDF is passworded
And the response status code should be 200
@positive @password
Scenario: Add password with other params
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the request data includes
| parameter | value |
| ownerPassword | ownerPass |
| password | password123 |
| keyLength | 256 |
| canPrint | true |
| canModify | false |
When I send the API request to the endpoint "/api/v1/security/add-password"
Then the response content type should be "application/pdf"
And the response file should have size greater than 100
And the response PDF is passworded
And the response status code should be 200
@positive @password
Scenario: Add password with other params
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the request data includes
| parameter | value |
| ownerPassword | ownerPass |
| password | password123 |
| keyLength | 256 |
| canPrint | true |
| canModify | false |
When I send the API request to the endpoint "/api/v1/security/add-password"
Then the response content type should be "application/pdf"
And the response file should have size greater than 100
And the response PDF is passworded
And the response status code should be 200
@positive @watermark
Scenario: Add watermark
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the request data includes
| parameter | value |
| watermarkType | text |
| watermarkText | Sample Watermark |
| fontSize | 30 |
| rotation | 45 |
| opacity | 0.5 |
| widthSpacer | 50 |
| heightSpacer | 50 |
| alphabet | roman |
| customColor | #d3d3d3 |
When I send the API request to the endpoint "/api/v1/security/add-watermark"
Then the response content type should be "application/pdf"
And the response file should have size greater than 100
And the response status code should be 200
@positive @watermark
Scenario: Add watermark
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the request data includes
| parameter | value |
| watermarkType | text |
| watermarkText | Sample Watermark |
| fontSize | 30 |
| rotation | 45 |
| opacity | 0.5 |
| widthSpacer | 50 |
| heightSpacer | 50 |
| alphabet | roman |
| customColor | #d3d3d3 |
When I send the API request to the endpoint "/api/v1/security/add-watermark"
Then the response content type should be "application/pdf"
And the response file should have size greater than 100
And the response status code should be 200
@positive
Scenario: Remove blank pages
Given I generate a PDF file as "fileInput"
And the pdf contains 3 blank pages
And the request data includes
| parameter | value |
| threshold | 90 |
| whitePercent | 99.9 |
When I send the API request to the endpoint "/api/v1/misc/remove-blanks"
Then the response content type should be "application/octet-stream"
And the response file should have extension ".zip"
And the response ZIP should contain 1 files
And the response file should have size greater than 0
@positive
Scenario: Remove blank pages
Given I generate a PDF file as "fileInput"
And the pdf contains 3 blank pages
And the request data includes
| parameter | value |
| threshold | 90 |
| whitePercent | 99.9 |
When I send the API request to the endpoint "/api/v1/misc/remove-blanks"
Then the response content type should be "application/octet-stream"
And the response file should have extension ".zip"
And the response ZIP should contain 1 files
And the response file should have size greater than 0
@positive @flatten
Scenario: Flatten PDF
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| flattenOnlyForms | false |
When I send the API request to the endpoint "/api/v1/misc/flatten"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@positive @flatten
Scenario: Flatten PDF
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| flattenOnlyForms | false |
When I send the API request to the endpoint "/api/v1/misc/flatten"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@positive @metadata
Scenario: Update metadata
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| author | John Doe |
| title | Sample Title |
| subject | Sample Subject |
| keywords | sample, test |
| producer | Test Producer |
When I send the API request to the endpoint "/api/v1/misc/update-metadata"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response PDF metadata should include "Author" as "John Doe"
And the response PDF metadata should include "Keywords" as "sample, test"
And the response PDF metadata should include "Subject" as "Sample Subject"
And the response PDF metadata should include "Title" as "Sample Title"
And the response status code should be 200
@positive @metadata
Scenario: Update metadata
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| author | John Doe |
| title | Sample Title |
| subject | Sample Subject |
| keywords | sample, test |
| producer | Test Producer |
When I send the API request to the endpoint "/api/v1/misc/update-metadata"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response PDF metadata should include "Author" as "John Doe"
And the response PDF metadata should include "Keywords" as "sample, test"
And the response PDF metadata should include "Subject" as "Sample Subject"
And the response PDF metadata should include "Title" as "Sample Title"
And the response status code should be 200

View File

@ -1,230 +1,250 @@
Feature: API Validation
@libre @positive
Scenario: Repair PDF
Given I generate a PDF file as "fileInput"
When I send the API request to the endpoint "/api/v1/misc/repair"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@ocr @positive
Scenario: Process PDF with OCR
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| languages | eng |
| sidecar | false |
| deskew | true |
| clean | true |
| cleanFinal | true |
| ocrType | Normal |
| ocrRenderType | hocr |
| removeImagesAfter| false |
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@libre @positive
Scenario: Repair PDF
Given I generate a PDF file as "fileInput"
When I send the API request to the endpoint "/api/v1/misc/repair"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@ocr @positive
Scenario: Extract Image Scans
Given I generate a PDF file as "fileInput"
And the pdf contains 3 images of size 300x300 on 2 pages
And the request data includes
| parameter | value |
| angleThreshold | 5 |
| tolerance | 20 |
| minArea | 8000 |
| minContourArea | 500 |
| borderSize | 1 |
When I send the API request to the endpoint "/api/v1/misc/extract-image-scans"
Then the response content type should be "application/octet-stream"
And the response file should have extension ".zip"
And the response ZIP should contain 2 files
And the response file should have size greater than 0
And the response status code should be 200
@ocr @positive
Scenario: Process PDF with OCR
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| languages | eng |
| sidecar | false |
| deskew | true |
| clean | true |
| cleanFinal | true |
| ocrType | Force |
| ocrRenderType | hocr |
| removeImagesAfter| false |
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@ocr @positive
Scenario: Process PDF with OCR
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| languages | eng |
| sidecar | false |
| deskew | true |
| clean | true |
| cleanFinal | true |
| ocrType | Normal |
| ocrRenderType | hocr |
| removeImagesAfter | false |
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@libre @positive
Scenario Outline: Convert PDF to various word formats
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| outputFormat | <format> |
When I send the API request to the endpoint "/api/v1/convert/pdf/word"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension "<extension>"
@ocr @positive
Scenario: Extract Image Scans
Given I generate a PDF file as "fileInput"
And the pdf contains 3 images of size 300x300 on 2 pages
And the request data includes
| parameter | value |
| angleThreshold | 5 |
| tolerance | 20 |
| minArea | 8000 |
| minContourArea | 500 |
| borderSize | 1 |
When I send the API request to the endpoint "/api/v1/misc/extract-image-scans"
Then the response content type should be "application/octet-stream"
And the response file should have extension ".zip"
And the response ZIP should contain 2 files
And the response file should have size greater than 0
And the response status code should be 200
Examples:
| format | extension |
| docx | .docx |
| odt | .odt |
| doc | .doc |
@ocr @pdfa1
Scenario: PDFA
Given I use an example file at "exampleFiles/pdfa2.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| outputFormat | pdfa |
When I send the API request to the endpoint "/api/v1/convert/pdf/pdfa"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@ocr @pdfa2
Scenario: PDFA1
Given I use an example file at "exampleFiles/pdfa1.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| outputFormat | pdfa-1 |
When I send the API request to the endpoint "/api/v1/convert/pdf/pdfa"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@compress @qpdf @positive
Scenario: Compress
Given I use an example file at "exampleFiles/ghost3.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| optimizeLevel | 4 |
When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@compress @qpdf @positive
Scenario: Compress
Given I use an example file at "exampleFiles/ghost2.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| optimizeLevel | 1 |
| expectedOutputSize | 5KB |
When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@compress @qpdf @positive
Scenario: Compress
Given I use an example file at "exampleFiles/ghost1.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| optimizeLevel | 1 |
| expectedOutputSize | 5KB |
When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@libre @positive
Scenario Outline: Convert PDF to various types
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| outputFormat | <format> |
When I send the API request to the endpoint "/api/v1/convert/pdf/<type>"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension "<extension>"
@ocr @positive
Scenario: Process PDF with OCR
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| languages | eng |
| sidecar | false |
| deskew | true |
| clean | true |
| cleanFinal | true |
| ocrType | Force |
| ocrRenderType | hocr |
| removeImagesAfter | false |
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
Examples:
| type | format | extension |
| text | rtf | .rtf |
| text | txt | .txt |
| presentation | ppt | .ppt |
| presentation | pptx | .pptx |
| presentation | odp | .odp |
| html | html | .zip |
@libre @positive @topdf
Scenario Outline: Convert PDF to various types
Given I use an example file at "exampleFiles/example<extension>" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/file/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
@libre @positive
Scenario Outline: Convert PDF to various word formats
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| outputFormat | <format> |
When I send the API request to the endpoint "/api/v1/convert/pdf/word"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension "<extension>"
Examples:
| extension |
| .docx |
| .odp |
| .odt |
| .pptx |
| .rtf |
@calibre @positive @htmltopdf
Scenario: Convert HTML to PDF
Given I use an example file at "exampleFiles/example.html" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/html/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
@calibre @positive @zippedhtmltopdf
Scenario: Convert zipped HTML to PDF
Given I use an example file at "exampleFiles/example_html.zip" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/html/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
@calibre @positive @markdowntopdf
Scenario: Convert Markdown to PDF
Given I use an example file at "exampleFiles/example.md" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/markdown/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
@markdown @positive
Scenario: Convert PDF to Markdown format
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
When I send the API request to the endpoint "/api/v1/convert/pdf/markdown"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".md"
@positive @pdftocsv
Scenario: Convert PDF with tables to CSV format
Given I use an example file at "exampleFiles/tables.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| outputFormat | csv |
| pageNumbers | all |
When I send the API request to the endpoint "/api/v1/convert/pdf/csv"
Then the response status code should be 200
And the response file should have size greater than 200
And the response file should have extension ".zip"
And the response ZIP should contain 3 files
Examples:
| format | extension |
| docx | .docx |
| odt | .odt |
| doc | .doc |
@ocr @pdfa1
Scenario: PDFA
Given I use an example file at "exampleFiles/pdfa2.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| outputFormat | pdfa |
When I send the API request to the endpoint "/api/v1/convert/pdf/pdfa"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@ocr @pdfa2
Scenario: PDFA1
Given I use an example file at "exampleFiles/pdfa1.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| outputFormat | pdfa-1 |
When I send the API request to the endpoint "/api/v1/convert/pdf/pdfa"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@compress @qpdf @positive
Scenario: Compress
Given I use an example file at "exampleFiles/ghost3.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| optimizeLevel | 4 |
When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@compress @qpdf @positive
Scenario: Compress
Given I use an example file at "exampleFiles/ghost2.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| optimizeLevel | 1 |
| expectedOutputSize | 5KB |
When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@compress @qpdf @positive
Scenario: Compress
Given I use an example file at "exampleFiles/ghost1.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| optimizeLevel | 1 |
| expectedOutputSize | 5KB |
When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@libre @positive
Scenario Outline: Convert PDF to various types
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| outputFormat | <format> |
When I send the API request to the endpoint "/api/v1/convert/pdf/<type>"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension "<extension>"
Examples:
| type | format | extension |
| text | rtf | .rtf |
| text | txt | .txt |
| presentation | ppt | .ppt |
| presentation | pptx | .pptx |
| presentation | odp | .odp |
| html | html | .zip |
@image @positive
Scenario Outline: Convert PDF to image
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the pdf contains 3 images of size 300x300 on 3 pages
And the request data includes
| parameter | value |
| dpi | 300 |
| imageFormat | <format> |
When I send the API request to the endpoint "/api/v1/convert/pdf/img"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".zip"
Examples:
| format |
| webp |
| png |
| jpeg |
| jpg |
| gif |
@libre @positive @topdf
Scenario Outline: Convert PDF to various types
Given I use an example file at "exampleFiles/example<extension>" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/file/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
Examples:
| extension |
| .docx |
| .odp |
| .odt |
| .pptx |
| .rtf |
@calibre @positive @htmltopdf
Scenario: Convert HTML to PDF
Given I use an example file at "exampleFiles/example.html" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/html/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
@calibre @positive @zippedhtmltopdf
Scenario: Convert zipped HTML to PDF
Given I use an example file at "exampleFiles/example_html.zip" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/html/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
@calibre @positive @markdowntopdf
Scenario: Convert Markdown to PDF
Given I use an example file at "exampleFiles/example.md" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/markdown/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
@markdown @positive
Scenario: Convert PDF to Markdown format
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
When I send the API request to the endpoint "/api/v1/convert/pdf/markdown"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".md"
@positive @pdftocsv
Scenario: Convert PDF with tables to CSV format
Given I use an example file at "exampleFiles/tables.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| outputFormat | csv |
| pageNumbers | all |
When I send the API request to the endpoint "/api/v1/convert/pdf/csv"
Then the response status code should be 200
And the response file should have size greater than 200
And the response file should have extension ".zip"
And the response ZIP should contain 3 files

View File

@ -2,113 +2,89 @@
Feature: API Validation
@split-pdf-by-sections @positive
Scenario Outline: split-pdf-by-sections with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 2 pages
And the request data includes
| parameter | value |
| horizontalDivisions | <horizontalDivisions> |
| verticalDivisions | <verticalDivisions> |
| merge | true |
When I send the API request to the endpoint "/api/v1/general/split-pdf-by-sections"
Then the response content type should be "application/pdf"
And the response file should have size greater than 200
And the response status code should be 200
And the response PDF should contain <page_count> pages
@split-pdf-by-sections @positive
Scenario Outline: split-pdf-by-sections with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 2 pages
And the request data includes
| parameter | value |
| horizontalDivisions | <horizontalDivisions> |
| verticalDivisions | <verticalDivisions> |
| merge | true |
When I send the API request to the endpoint "/api/v1/general/split-pdf-by-sections"
Then the response content type should be "application/pdf"
And the response file should have size greater than 200
And the response status code should be 200
And the response PDF should contain <page_count> pages
Examples:
| horizontalDivisions | verticalDivisions | page_count |
| 0 | 1 | 4 |
| 1 | 1 | 8 |
| 1 | 2 | 12 |
| 2 | 2 | 18 |
@split-pdf-by-sections @positive
Scenario Outline: split-pdf-by-sections with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 2 pages
And the request data includes
| parameter | value |
| horizontalDivisions | <horizontalDivisions> |
| verticalDivisions | <verticalDivisions> |
| merge | true |
When I send the API request to the endpoint "/api/v1/general/split-pdf-by-sections"
Then the response content type should be "application/pdf"
And the response file should have size greater than 200
And the response status code should be 200
And the response PDF should contain <page_count> pages
Examples:
| horizontalDivisions | verticalDivisions | page_count |
| 0 | 1 | 4 |
| 1 | 1 | 8 |
| 1 | 2 | 12 |
| 2 | 2 | 18 |
Examples:
| horizontalDivisions | verticalDivisions | page_count |
| 0 | 1 | 4 |
| 1 | 1 | 8 |
| 1 | 2 | 12 |
| 2 | 2 | 18 |
@split-pdf-by-pages @positive
Scenario Outline: split-pdf-by-pages with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 20 pages
And the request data includes
| parameter | value |
| fileInput | fileInput |
| pageNumbers | <pageNumbers> |
When I send the API request to the endpoint "/api/v1/general/split-pages"
Then the response content type should be "application/octet-stream"
And the response status code should be 200
And the response file should have size greater than 200
And the response ZIP should contain <file_count> files
@split-pdf-by-pages @positive
Scenario Outline: split-pdf-by-pages with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 20 pages
And the request data includes
| parameter | value |
| fileInput | fileInput |
| pageNumbers | <pageNumbers> |
When I send the API request to the endpoint "/api/v1/general/split-pages"
Then the response content type should be "application/octet-stream"
And the response status code should be 200
And the response file should have size greater than 200
And the response ZIP should contain <file_count> files
Examples:
| pageNumbers | file_count |
| 1,3,5-9 | 8 |
| all | 20 |
| 2n+1 | 10 |
| 3n | 7 |
Examples:
| pageNumbers | file_count |
| 1,3,5-9 | 8 |
| all | 20 |
| 2n+1 | 10 |
| 3n | 7 |
@split-pdf-by-size-or-count @positive
Scenario Outline: split-pdf-by-size-or-count with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 20 pages
And the request data includes
| parameter | value |
| fileInput | fileInput |
| splitType | <splitType> |
| splitValue | <splitValue> |
When I send the API request to the endpoint "/api/v1/general/split-by-size-or-count"
Then the response content type should be "application/octet-stream"
And the response status code should be 200
And the response file should have size greater than 200
And the response ZIP file should contain <doc_count> documents each having <pages_per_doc> pages
@split-pdf-by-size-or-count @positive
Scenario Outline: split-pdf-by-size-or-count with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 20 pages
And the request data includes
| parameter | value |
| fileInput | fileInput |
| splitType | <splitType> |
| splitValue | <splitValue> |
When I send the API request to the endpoint "/api/v1/general/split-by-size-or-count"
Then the response content type should be "application/octet-stream"
And the response status code should be 200
And the response file should have size greater than 200
And the response ZIP file should contain <doc_count> documents each having <pages_per_doc> pages
Examples:
| splitType | splitValue | doc_count | pages_per_doc |
| 1 | 5 | 4 | 5 |
| 2 | 2 | 2 | 10 |
| 2 | 4 | 4 | 5 |
| 1 | 10 | 2 | 10 |
Examples:
| splitType | splitValue | doc_count | pages_per_doc |
| 1 | 5 | 4 | 5 |
| 2 | 2 | 2 | 10 |
| 2 | 4 | 4 | 5 |
| 1 | 10 | 2 | 10 |
@extract-images
Scenario Outline: Extract Image Scans duplicates
Given I use an example file at "exampleFiles/images.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| format | <format> |
When I send the API request to the endpoint "/api/v1/misc/extract-images"
Then the response content type should be "application/octet-stream"
And the response file should have extension ".zip"
And the response ZIP should contain 2 files
And the response file should have size greater than 0
And the response status code should be 200
@extract-images
Scenario Outline: Extract Image Scans duplicates
Given I use an example file at "exampleFiles/images.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| format | <format> |
When I send the API request to the endpoint "/api/v1/misc/extract-images"
Then the response content type should be "application/octet-stream"
And the response file should have extension ".zip"
And the response ZIP should contain 2 files
And the response file should have size greater than 0
And the response status code should be 200
Examples:
| format |
| png |
| gif |
| jpeg |
Examples:
| format |
| png |
| gif |
| jpeg |

View File

@ -10,67 +10,67 @@ from reportlab.lib.pagesizes import letter
from reportlab.lib.utils import ImageReader
from reportlab.pdfgen import canvas
import mimetypes
import requests
import zipfile
import shutil
import re
from PIL import Image, ImageDraw
API_HEADERS = {
'X-API-KEY': '123456789'
}
API_HEADERS = {"X-API-KEY": "123456789"}
#########
# GIVEN #
#########
@given('I generate a PDF file as "{fileInput}"')
def step_generate_pdf(context, fileInput):
context.param_name = fileInput
context.file_name = "genericNonCustomisableName.pdf"
writer = PdfWriter()
writer.add_blank_page(width=72, height=72) # Single blank page
with open(context.file_name, 'wb') as f:
with open(context.file_name, "wb") as f:
writer.write(f)
if not hasattr(context, 'files'):
if not hasattr(context, "files"):
context.files = {}
context.files[context.param_name] = open(context.file_name, 'rb')
context.files[context.param_name] = open(context.file_name, "rb")
@given('I use an example file at "{filePath}" as parameter "{fileInput}"')
def step_use_example_file(context, filePath, fileInput):
context.param_name = fileInput
context.file_name = filePath.split('/')[-1]
if not hasattr(context, 'files'):
context.file_name = filePath.split("/")[-1]
if not hasattr(context, "files"):
context.files = {}
# Ensure the file exists before opening
try:
example_file = open(filePath, 'rb')
example_file = open(filePath, "rb")
context.files[context.param_name] = example_file
except FileNotFoundError:
raise FileNotFoundError(f"The example file '{filePath}' does not exist.")
@given('the pdf contains {page_count:d} pages')
@given("the pdf contains {page_count:d} pages")
def step_pdf_contains_pages(context, page_count):
writer = PdfWriter()
for i in range(page_count):
writer.add_blank_page(width=72, height=72)
with open(context.file_name, 'wb') as f:
with open(context.file_name, "wb") as f:
writer.write(f)
context.files[context.param_name].close()
context.files[context.param_name] = open(context.file_name, 'rb')
context.files[context.param_name] = open(context.file_name, "rb")
# Duplicate for now...
@given('the pdf contains {page_count:d} blank pages')
@given("the pdf contains {page_count:d} blank pages")
def step_pdf_contains_blank_pages(context, page_count):
writer = PdfWriter()
for i in range(page_count):
writer.add_blank_page(width=72, height=72)
with open(context.file_name, 'wb') as f:
with open(context.file_name, "wb") as f:
writer.write(f)
context.files[context.param_name].close()
context.files[context.param_name] = open(context.file_name, 'rb')
context.files[context.param_name] = open(context.file_name, "rb")
def create_black_box_image(file_name, size):
can = canvas.Canvas(file_name, pagesize=size)
@ -80,14 +80,20 @@ def create_black_box_image(file_name, size):
can.showPage()
can.save()
@given(u'the pdf contains {image_count:d} images of size {width:d}x{height:d} on {page_count:d} pages')
@given(
"the pdf contains {image_count:d} images of size {width:d}x{height:d} on {page_count:d} pages"
)
def step_impl(context, image_count, width, height, page_count):
context.param_name = "fileInput"
context.file_name = "genericNonCustomisableName.pdf"
create_pdf_with_images_and_boxes(context.file_name, image_count, page_count, width, height)
if not hasattr(context, 'files'):
create_pdf_with_images_and_boxes(
context.file_name, image_count, page_count, width, height
)
if not hasattr(context, "files"):
context.files = {}
context.files[context.param_name] = open(context.file_name, 'rb')
context.files[context.param_name] = open(context.file_name, "rb")
def add_black_boxes_to_image(image):
if isinstance(image, str):
@ -97,9 +103,14 @@ def add_black_boxes_to_image(image):
draw.rectangle([(0, 0), image.size], fill=(0, 0, 0)) # Fill image with black
return image
def create_pdf_with_images_and_boxes(file_name, image_count, page_count, image_width, image_height):
def create_pdf_with_images_and_boxes(
file_name, image_count, page_count, image_width, image_height
):
page_width, page_height = max(letter[0], image_width), max(letter[1], image_height)
boxes_per_page = image_count // page_count + (1 if image_count % page_count != 0 else 0)
boxes_per_page = image_count // page_count + (
1 if image_count % page_count != 0 else 0
)
writer = PdfWriter()
box_counter = 0
@ -114,12 +125,14 @@ def create_pdf_with_images_and_boxes(file_name, image_count, page_count, image_w
# Simulating a dynamic image creation (replace this with your actual image creation logic)
# For demonstration, we'll create a simple black image
dummy_image = Image.new('RGB', (image_width, image_height), color='white') # Create a white image
dummy_image = Image.new(
"RGB", (image_width, image_height), color="white"
) # Create a white image
dummy_image = add_black_boxes_to_image(dummy_image) # Add black boxes
# Convert the PIL Image to bytes to pass to drawImage
image_bytes = io.BytesIO()
dummy_image.save(image_bytes, format='PNG')
dummy_image.save(image_bytes, format="PNG")
image_bytes.seek(0)
# Check if the image fits in the current page dimensions
@ -130,7 +143,9 @@ def create_pdf_with_images_and_boxes(file_name, image_count, page_count, image_w
break
# Add the image to the PDF
can.drawImage(ImageReader(image_bytes), x, y, width=image_width, height=image_height)
can.drawImage(
ImageReader(image_bytes), x, y, width=image_width, height=image_height
)
box_counter += 1
can.showPage()
@ -140,7 +155,7 @@ def create_pdf_with_images_and_boxes(file_name, image_count, page_count, image_w
writer.add_page(new_pdf.pages[0])
# Write the PDF to file
with open(file_name, 'wb') as f:
with open(file_name, "wb") as f:
writer.write(f)
# Clean up temporary image files
@ -149,36 +164,81 @@ def create_pdf_with_images_and_boxes(file_name, image_count, page_count, image_w
if os.path.exists(temp_image_path):
os.remove(temp_image_path)
@given('the pdf contains {image_count:d} images on {page_count:d} pages')
@given("the pdf contains {image_count:d} images on {page_count:d} pages")
def step_pdf_contains_images(context, image_count, page_count):
if not hasattr(context, 'param_name'):
if not hasattr(context, "param_name"):
context.param_name = "default"
context.file_name = "genericNonCustomisableName.pdf"
create_pdf_with_black_boxes(context.file_name, image_count, page_count)
if not hasattr(context, 'files'):
if not hasattr(context, "files"):
context.files = {}
if context.param_name in context.files:
context.files[context.param_name].close()
context.files[context.param_name] = open(context.file_name, 'rb')
context.files[context.param_name] = open(context.file_name, "rb")
@given('the pdf contains {page_count:d} pages with random text')
def create_pdf_with_black_boxes(file_name, image_count, page_count):
page_width, page_height = letter
writer = PdfWriter()
box_counter = 0
for page in range(page_count):
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=(page_width, page_height))
boxes_per_page = image_count // page_count + (
1 if image_count % page_count != 0 else 0
)
for i in range(boxes_per_page):
if box_counter >= image_count:
break
# Create a black box image
dummy_image = Image.new("RGB", (100, 100), color="black")
image_bytes = io.BytesIO()
dummy_image.save(image_bytes, format="PNG")
image_bytes.seek(0)
x = (i % (page_width // 100)) * 100
y = page_height - (((i % (page_height // 100)) + 1) * 100)
if x + 100 > page_width or y < 0:
break
can.drawImage(ImageReader(image_bytes), x, y, width=100, height=100)
box_counter += 1
can.showPage()
can.save()
packet.seek(0)
new_pdf = PdfReader(packet)
writer.add_page(new_pdf.pages[0])
with open(file_name, "wb") as f:
writer.write(f)
@given("the pdf contains {page_count:d} pages with random text")
def step_pdf_contains_pages_with_random_text(context, page_count):
buffer = io.BytesIO()
c = canvas.Canvas(buffer, pagesize=letter)
width, height = letter
for _ in range(page_count):
text = ''.join(random.choices(string.ascii_letters + string.digits, k=100))
text = "".join(random.choices(string.ascii_letters + string.digits, k=100))
c.drawString(100, height - 100, text)
c.showPage()
c.save()
with open(context.file_name, 'wb') as f:
with open(context.file_name, "wb") as f:
f.write(buffer.getvalue())
context.files[context.param_name].close()
context.files[context.param_name] = open(context.file_name, 'rb')
context.files[context.param_name] = open(context.file_name, "rb")
@given('the pdf pages all contain the text "{text}"')
def step_pdf_pages_contain_text(context, text):
@ -192,11 +252,12 @@ def step_pdf_pages_contain_text(context, text):
c.save()
with open(context.file_name, 'wb') as f:
with open(context.file_name, "wb") as f:
f.write(buffer.getvalue())
context.files[context.param_name].close()
context.files[context.param_name] = open(context.file_name, 'rb')
context.files[context.param_name] = open(context.file_name, "rb")
@given('the pdf is encrypted with password "{password}"')
def step_encrypt_pdf(context, password):
@ -205,29 +266,34 @@ def step_encrypt_pdf(context, password):
for i in range(len(reader.pages)):
writer.add_page(reader.pages[i])
writer.encrypt(password)
with open(context.file_name, 'wb') as f:
with open(context.file_name, "wb") as f:
writer.write(f)
context.files[context.param_name].close()
context.files[context.param_name] = open(context.file_name, 'rb')
context.files[context.param_name] = open(context.file_name, "rb")
@given('the request data is')
@given("the request data is")
def step_request_data(context):
context.request_data = eval(context.text)
@given('the request data includes')
@given("the request data includes")
def step_request_data_table(context):
context.request_data = {row['parameter']: row['value'] for row in context.table}
context.request_data = {row["parameter"]: row["value"] for row in context.table}
@given('save the generated PDF file as "{filename}" for debugging')
def save_generated_pdf(context, filename):
with open(filename, 'wb') as f:
with open(filename, "wb") as f:
f.write(context.files[context.param_name].read())
print(f"Saved generated PDF content to {filename}")
########
# WHEN #
########
@when('I send a GET request to "{endpoint}"')
def step_send_get_request(context, endpoint):
base_url = "http://localhost:8080"
@ -235,20 +301,22 @@ def step_send_get_request(context, endpoint):
response = requests.get(full_url, headers=API_HEADERS)
context.response = response
@when('I send a GET request to "{endpoint}" with parameters')
def step_send_get_request_with_params(context, endpoint):
base_url = "http://localhost:8080"
params = {row['parameter']: row['value'] for row in context.table}
params = {row["parameter"]: row["value"] for row in context.table}
full_url = f"{base_url}{endpoint}"
response = requests.get(full_url, params=params, headers=API_HEADERS)
context.response = response
@when('I send the API request to the endpoint "{endpoint}"')
def step_send_api_request(context, endpoint):
url = f"http://localhost:8080{endpoint}"
files = context.files if hasattr(context, 'files') else {}
files = context.files if hasattr(context, "files") else {}
if not hasattr(context, 'request_data') or context.request_data is None:
if not hasattr(context, "request_data") or context.request_data is None:
context.request_data = {}
form_data = []
@ -257,130 +325,173 @@ def step_send_api_request(context, endpoint):
for key, file in files.items():
mime_type, _ = mimetypes.guess_type(file.name)
mime_type = mime_type or 'application/octet-stream'
mime_type = mime_type or "application/octet-stream"
print(f"form_data {file.name} with {mime_type}")
form_data.append((key, (file.name, file, mime_type)))
response = requests.post(url, files=form_data, headers=API_HEADERS)
context.response = response
########
# THEN #
########
@then('the response content type should be "{content_type}"')
def step_check_response_content_type(context, content_type):
actual_content_type = context.response.headers.get('Content-Type', '')
assert actual_content_type.startswith(content_type), f"Expected {content_type} but got {actual_content_type}. Response content: {context.response.content}"
actual_content_type = context.response.headers.get("Content-Type", "")
assert actual_content_type.startswith(
content_type
), f"Expected {content_type} but got {actual_content_type}. Response content: {context.response.content}"
@then('the response file should have size greater than {size:d}')
@then("the response file should have size greater than {size:d}")
def step_check_response_file_size(context, size):
response_file = io.BytesIO(context.response.content)
assert len(response_file.getvalue()) > size
@then('the response PDF is not passworded')
@then("the response PDF is not passworded")
def step_check_response_pdf_not_passworded(context):
response_file = io.BytesIO(context.response.content)
reader = PdfReader(response_file)
assert not reader.is_encrypted
@then('the response PDF is passworded')
@then("the response PDF is passworded")
def step_check_response_pdf_passworded(context):
response_file = io.BytesIO(context.response.content)
try:
reader = PdfReader(response_file)
assert reader.is_encrypted
except PdfReadError as e:
raise AssertionError(f"Failed to read PDF: {str(e)}. Response content: {context.response.content}")
raise AssertionError(
f"Failed to read PDF: {str(e)}. Response content: {context.response.content}"
)
except Exception as e:
raise AssertionError(f"An error occurred: {str(e)}. Response content: {context.response.content}")
raise AssertionError(
f"An error occurred: {str(e)}. Response content: {context.response.content}"
)
@then('the response status code should be {status_code:d}')
@then("the response status code should be {status_code:d}")
def step_check_response_status_code(context, status_code):
assert context.response.status_code == status_code, f"Expected status code {status_code} but got {context.response.status_code}"
assert (
context.response.status_code == status_code
), f"Expected status code {status_code} but got {context.response.status_code}"
@then('the response should contain error message "{message}"')
def step_check_response_error_message(context, message):
response_json = context.response.json()
assert response_json.get('error') == message, f"Expected error message '{message}' but got '{response_json.get('error')}'"
assert (
response_json.get("error") == message
), f"Expected error message '{message}' but got '{response_json.get('error')}'"
@then('the response PDF should contain {page_count:d} pages')
def step_check_response_pdf_page_count(context, page_count):
response_file = io.BytesIO(context.response.content)
reader = PdfReader(response_file)
assert len(reader.pages) == page_count, f"Expected {page_count} pages but got {len(reader.pages)} pages"
@then('the response PDF metadata should include "{metadata_key}" as "{metadata_value}"')
def step_check_response_pdf_metadata(context, metadata_key, metadata_value):
response_file = io.BytesIO(context.response.content)
reader = PdfReader(response_file)
metadata = reader.metadata
assert metadata.get("/" + metadata_key) == metadata_value, f"Expected {metadata_key} to be '{metadata_value}' but got '{metadata.get(metadata_key)}'"
assert (
metadata.get("/" + metadata_key) == metadata_value
), f"Expected {metadata_key} to be '{metadata_value}' but got '{metadata.get(metadata_key)}'"
@then('the response file should have extension "{extension}"')
def step_check_response_file_extension(context, extension):
content_disposition = context.response.headers.get('Content-Disposition', '')
content_disposition = context.response.headers.get("Content-Disposition", "")
filename = ""
if content_disposition:
parts = content_disposition.split(';')
parts = content_disposition.split(";")
for part in parts:
if part.strip().startswith('filename'):
filename = part.split('=')[1].strip().strip('"')
if part.strip().startswith("filename"):
filename = part.split("=")[1].strip().strip('"')
break
assert filename.endswith(extension), f"Expected file extension {extension} but got {filename}. Response content: {context.response.content}"
assert filename.endswith(
extension
), f"Expected file extension {extension} but got {filename}. Response content: {context.response.content}"
@then('save the response file as "{filename}" for debugging')
def step_save_response_file(context, filename):
with open(filename, 'wb') as f:
with open(filename, "wb") as f:
f.write(context.response.content)
print(f"Saved response content to {filename}")
@then('the response PDF should contain {page_count:d} pages')
@then("the response PDF should contain {page_count:d} pages")
def step_check_response_pdf_page_count(context, page_count):
response_file = io.BytesIO(context.response.content)
reader = PdfReader(io.BytesIO(response_file.getvalue()))
actual_page_count = len(reader.pages)
assert actual_page_count == page_count, f"Expected {page_count} pages but got {actual_page_count} pages"
assert (
actual_page_count == page_count
), f"Expected {page_count} pages but got {actual_page_count} pages"
@then('the response ZIP should contain {file_count:d} files')
@then("the response ZIP should contain {file_count:d} files")
def step_check_response_zip_file_count(context, file_count):
response_file = io.BytesIO(context.response.content)
with zipfile.ZipFile(io.BytesIO(response_file.getvalue())) as zip_file:
actual_file_count = len(zip_file.namelist())
assert actual_file_count == file_count, f"Expected {file_count} files but got {actual_file_count} files"
assert (
actual_file_count == file_count
), f"Expected {file_count} files but got {actual_file_count} files"
@then('the response ZIP file should contain {doc_count:d} documents each having {pages_per_doc:d} pages')
@then(
"the response ZIP file should contain {doc_count:d} documents each having {pages_per_doc:d} pages"
)
def step_check_response_zip_doc_page_count(context, doc_count, pages_per_doc):
response_file = io.BytesIO(context.response.content)
with zipfile.ZipFile(io.BytesIO(response_file.getvalue())) as zip_file:
actual_doc_count = len(zip_file.namelist())
assert actual_doc_count == doc_count, f"Expected {doc_count} documents but got {actual_doc_count} documents"
assert (
actual_doc_count == doc_count
), f"Expected {doc_count} documents but got {actual_doc_count} documents"
for file_name in zip_file.namelist():
with zip_file.open(file_name) as pdf_file:
reader = PdfReader(pdf_file)
actual_pages_per_doc = len(reader.pages)
assert actual_pages_per_doc == pages_per_doc, f"Expected {pages_per_doc} pages per document but got {actual_pages_per_doc} pages in document {file_name}"
assert (
actual_pages_per_doc == pages_per_doc
), f"Expected {pages_per_doc} pages per document but got {actual_pages_per_doc} pages in document {file_name}"
@then('the JSON value of "{key}" should be "{expected_value}"')
def step_check_json_value(context, key, expected_value):
actual_value = context.response.json().get(key)
assert actual_value == expected_value, \
f"Expected JSON value for '{key}' to be '{expected_value}' but got '{actual_value}'"
assert (
actual_value == expected_value
), f"Expected JSON value for '{key}' to be '{expected_value}' but got '{actual_value}'"
@then('JSON list entry containing "{identifier_key}" as "{identifier_value}" should have "{target_key}" as "{target_value}"')
def step_check_json_list_entry(context, identifier_key, identifier_self, target_key, target_value):
@then(
'JSON list entry containing "{identifier_key}" as "{identifier_value}" should have "{target_key}" as "{target_value}"'
)
def step_check_json_list_entry(
context, identifier_key, identifier_self, target_key, target_value
):
json_response = context.response.json()
for entry in json_response:
if entry.get(identifier_key) == identifier_value:
assert entry.get(target_key) == target_value, \
f"Expected {target_key} to be {target_value} in entry where {identifier_key} is {identifier_value}, but found {entry.get(target_key)}"
assert (
entry.get(target_key) == target_value
), f"Expected {target_key} to be {target_value} in entry where {identifier_key} is {identifier_value}, but found {entry.get(target_key)}"
break
else:
raise AssertionError(f"No entry with {identifier_key} as {identifier_value} found")
raise AssertionError(
f"No entry with {identifier_key} as {identifier_value} found"
)
@then('the response should match the regex "{pattern}"')
def step_response_matches_regex(context, pattern):
response_text = context.response.text
assert re.match(pattern, response_text), \
f"Response '{response_text}' does not match the expected pattern '{pattern}'"
assert re.match(
pattern, response_text
), f"Response '{response_text}' does not match the expected pattern '{pattern}'"