add image conversion scenario

This commit is contained in:
Ludy87 2025-07-12 14:48:54 +02:00
parent bbf5d5f6d4
commit 4b851bf18a
No known key found for this signature in database
GPG Key ID: 92696155E0220F94
5 changed files with 639 additions and 528 deletions

View File

@ -1,21 +1,25 @@
import os
def before_all(context):
context.endpoint = None
context.request_data = None
context.files = {}
context.response = None
def after_scenario(context, scenario):
if hasattr(context, 'files'):
if hasattr(context, "files"):
for file in context.files.values():
file.close()
if os.path.exists('response_file'):
os.remove('response_file')
if hasattr(context, 'file_name') and os.path.exists(context.file_name):
if os.path.exists("response_file"):
os.remove("response_file")
if hasattr(context, "file_name") and os.path.exists(context.file_name):
os.remove(context.file_name)
# Remove any temporary files
for temp_file in os.listdir('.'):
if temp_file.startswith('genericNonCustomisableName') or temp_file.startswith('temp_image_'):
for temp_file in os.listdir("."):
if temp_file.startswith("genericNonCustomisableName") or temp_file.startswith(
"temp_image_"
):
os.remove(temp_file)

View File

@ -1,132 +1,132 @@
@example @general
Feature: API Validation
@positive @password
Scenario: Remove password
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the pdf is encrypted with password "password123"
And the request data includes
| parameter | value |
| password | password123 |
When I send the API request to the endpoint "/api/v1/security/remove-password"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response PDF is not passworded
And the response status code should be 200
@positive @password
Scenario: Remove password
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the pdf is encrypted with password "password123"
And the request data includes
| parameter | value |
| password | password123 |
When I send the API request to the endpoint "/api/v1/security/remove-password"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response PDF is not passworded
And the response status code should be 200
@negative @password
Scenario: Remove password wrong password
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the pdf is encrypted with password "password123"
And the request data includes
| parameter | value |
| password | wrongPassword |
When I send the API request to the endpoint "/api/v1/security/remove-password"
Then the response status code should be 500
And the response should contain error message "Internal Server Error"
@negative @password
Scenario: Remove password wrong password
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the pdf is encrypted with password "password123"
And the request data includes
| parameter | value |
| password | wrongPassword |
When I send the API request to the endpoint "/api/v1/security/remove-password"
Then the response status code should be 500
And the response should contain error message "Internal Server Error"
@positive @info
Scenario: Get info
Given I generate a PDF file as "fileInput"
When I send the API request to the endpoint "/api/v1/security/get-info-on-pdf"
Then the response content type should be "application/json"
And the response file should have size greater than 100
And the response status code should be 200
@positive @info
Scenario: Get info
Given I generate a PDF file as "fileInput"
When I send the API request to the endpoint "/api/v1/security/get-info-on-pdf"
Then the response content type should be "application/json"
And the response file should have size greater than 100
And the response status code should be 200
@positive @password
Scenario: Add password
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the request data includes
| parameter | value |
| password | password123 |
When I send the API request to the endpoint "/api/v1/security/add-password"
Then the response content type should be "application/pdf"
And the response file should have size greater than 100
And the response PDF is passworded
And the response status code should be 200
@positive @password
Scenario: Add password
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the request data includes
| parameter | value |
| password | password123 |
When I send the API request to the endpoint "/api/v1/security/add-password"
Then the response content type should be "application/pdf"
And the response file should have size greater than 100
And the response PDF is passworded
And the response status code should be 200
@positive @password
Scenario: Add password with other params
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the request data includes
| parameter | value |
| ownerPassword | ownerPass |
| password | password123 |
| keyLength | 256 |
| canPrint | true |
| canModify | false |
When I send the API request to the endpoint "/api/v1/security/add-password"
Then the response content type should be "application/pdf"
And the response file should have size greater than 100
And the response PDF is passworded
And the response status code should be 200
@positive @password
Scenario: Add password with other params
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the request data includes
| parameter | value |
| ownerPassword | ownerPass |
| password | password123 |
| keyLength | 256 |
| canPrint | true |
| canModify | false |
When I send the API request to the endpoint "/api/v1/security/add-password"
Then the response content type should be "application/pdf"
And the response file should have size greater than 100
And the response PDF is passworded
And the response status code should be 200
@positive @watermark
Scenario: Add watermark
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the request data includes
| parameter | value |
| watermarkType | text |
| watermarkText | Sample Watermark |
| fontSize | 30 |
| rotation | 45 |
| opacity | 0.5 |
| widthSpacer | 50 |
| heightSpacer | 50 |
| alphabet | roman |
| customColor | #d3d3d3 |
When I send the API request to the endpoint "/api/v1/security/add-watermark"
Then the response content type should be "application/pdf"
And the response file should have size greater than 100
And the response status code should be 200
@positive @watermark
Scenario: Add watermark
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
And the request data includes
| parameter | value |
| watermarkType | text |
| watermarkText | Sample Watermark |
| fontSize | 30 |
| rotation | 45 |
| opacity | 0.5 |
| widthSpacer | 50 |
| heightSpacer | 50 |
| alphabet | roman |
| customColor | #d3d3d3 |
When I send the API request to the endpoint "/api/v1/security/add-watermark"
Then the response content type should be "application/pdf"
And the response file should have size greater than 100
And the response status code should be 200
@positive
Scenario: Remove blank pages
Given I generate a PDF file as "fileInput"
And the pdf contains 3 blank pages
And the request data includes
| parameter | value |
| threshold | 90 |
| whitePercent | 99.9 |
When I send the API request to the endpoint "/api/v1/misc/remove-blanks"
Then the response content type should be "application/octet-stream"
And the response file should have extension ".zip"
And the response ZIP should contain 1 files
And the response file should have size greater than 0
@positive
Scenario: Remove blank pages
Given I generate a PDF file as "fileInput"
And the pdf contains 3 blank pages
And the request data includes
| parameter | value |
| threshold | 90 |
| whitePercent | 99.9 |
When I send the API request to the endpoint "/api/v1/misc/remove-blanks"
Then the response content type should be "application/octet-stream"
And the response file should have extension ".zip"
And the response ZIP should contain 1 files
And the response file should have size greater than 0
@positive @flatten
Scenario: Flatten PDF
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| flattenOnlyForms | false |
When I send the API request to the endpoint "/api/v1/misc/flatten"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@positive @flatten
Scenario: Flatten PDF
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| flattenOnlyForms | false |
When I send the API request to the endpoint "/api/v1/misc/flatten"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@positive @metadata
Scenario: Update metadata
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| author | John Doe |
| title | Sample Title |
| subject | Sample Subject |
| keywords | sample, test |
| producer | Test Producer |
When I send the API request to the endpoint "/api/v1/misc/update-metadata"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response PDF metadata should include "Author" as "John Doe"
And the response PDF metadata should include "Keywords" as "sample, test"
And the response PDF metadata should include "Subject" as "Sample Subject"
And the response PDF metadata should include "Title" as "Sample Title"
And the response status code should be 200
@positive @metadata
Scenario: Update metadata
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| author | John Doe |
| title | Sample Title |
| subject | Sample Subject |
| keywords | sample, test |
| producer | Test Producer |
When I send the API request to the endpoint "/api/v1/misc/update-metadata"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response PDF metadata should include "Author" as "John Doe"
And the response PDF metadata should include "Keywords" as "sample, test"
And the response PDF metadata should include "Subject" as "Sample Subject"
And the response PDF metadata should include "Title" as "Sample Title"
And the response status code should be 200

View File

@ -1,230 +1,250 @@
Feature: API Validation
@libre @positive
Scenario: Repair PDF
Given I generate a PDF file as "fileInput"
When I send the API request to the endpoint "/api/v1/misc/repair"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@ocr @positive
Scenario: Process PDF with OCR
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| languages | eng |
| sidecar | false |
| deskew | true |
| clean | true |
| cleanFinal | true |
| ocrType | Normal |
| ocrRenderType | hocr |
| removeImagesAfter| false |
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@libre @positive
Scenario: Repair PDF
Given I generate a PDF file as "fileInput"
When I send the API request to the endpoint "/api/v1/misc/repair"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@ocr @positive
Scenario: Extract Image Scans
Given I generate a PDF file as "fileInput"
And the pdf contains 3 images of size 300x300 on 2 pages
And the request data includes
| parameter | value |
| angleThreshold | 5 |
| tolerance | 20 |
| minArea | 8000 |
| minContourArea | 500 |
| borderSize | 1 |
When I send the API request to the endpoint "/api/v1/misc/extract-image-scans"
Then the response content type should be "application/octet-stream"
And the response file should have extension ".zip"
And the response ZIP should contain 2 files
And the response file should have size greater than 0
And the response status code should be 200
@ocr @positive
Scenario: Process PDF with OCR
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| languages | eng |
| sidecar | false |
| deskew | true |
| clean | true |
| cleanFinal | true |
| ocrType | Force |
| ocrRenderType | hocr |
| removeImagesAfter| false |
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@ocr @positive
Scenario: Process PDF with OCR
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| languages | eng |
| sidecar | false |
| deskew | true |
| clean | true |
| cleanFinal | true |
| ocrType | Normal |
| ocrRenderType | hocr |
| removeImagesAfter | false |
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@libre @positive
Scenario Outline: Convert PDF to various word formats
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| outputFormat | <format> |
When I send the API request to the endpoint "/api/v1/convert/pdf/word"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension "<extension>"
@ocr @positive
Scenario: Extract Image Scans
Given I generate a PDF file as "fileInput"
And the pdf contains 3 images of size 300x300 on 2 pages
And the request data includes
| parameter | value |
| angleThreshold | 5 |
| tolerance | 20 |
| minArea | 8000 |
| minContourArea | 500 |
| borderSize | 1 |
When I send the API request to the endpoint "/api/v1/misc/extract-image-scans"
Then the response content type should be "application/octet-stream"
And the response file should have extension ".zip"
And the response ZIP should contain 2 files
And the response file should have size greater than 0
And the response status code should be 200
Examples:
| format | extension |
| docx | .docx |
| odt | .odt |
| doc | .doc |
@ocr @pdfa1
Scenario: PDFA
Given I use an example file at "exampleFiles/pdfa2.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| outputFormat | pdfa |
When I send the API request to the endpoint "/api/v1/convert/pdf/pdfa"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@ocr @pdfa2
Scenario: PDFA1
Given I use an example file at "exampleFiles/pdfa1.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| outputFormat | pdfa-1 |
When I send the API request to the endpoint "/api/v1/convert/pdf/pdfa"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@compress @qpdf @positive
Scenario: Compress
Given I use an example file at "exampleFiles/ghost3.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| optimizeLevel | 4 |
When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@compress @qpdf @positive
Scenario: Compress
Given I use an example file at "exampleFiles/ghost2.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| optimizeLevel | 1 |
| expectedOutputSize | 5KB |
When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@compress @qpdf @positive
Scenario: Compress
Given I use an example file at "exampleFiles/ghost1.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| optimizeLevel | 1 |
| expectedOutputSize | 5KB |
When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@libre @positive
Scenario Outline: Convert PDF to various types
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| outputFormat | <format> |
When I send the API request to the endpoint "/api/v1/convert/pdf/<type>"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension "<extension>"
@ocr @positive
Scenario: Process PDF with OCR
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| languages | eng |
| sidecar | false |
| deskew | true |
| clean | true |
| cleanFinal | true |
| ocrType | Force |
| ocrRenderType | hocr |
| removeImagesAfter | false |
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
Examples:
| type | format | extension |
| text | rtf | .rtf |
| text | txt | .txt |
| presentation | ppt | .ppt |
| presentation | pptx | .pptx |
| presentation | odp | .odp |
| html | html | .zip |
@libre @positive @topdf
Scenario Outline: Convert PDF to various types
Given I use an example file at "exampleFiles/example<extension>" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/file/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
@libre @positive
Scenario Outline: Convert PDF to various word formats
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| outputFormat | <format> |
When I send the API request to the endpoint "/api/v1/convert/pdf/word"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension "<extension>"
Examples:
| extension |
| .docx |
| .odp |
| .odt |
| .pptx |
| .rtf |
@calibre @positive @htmltopdf
Scenario: Convert HTML to PDF
Given I use an example file at "exampleFiles/example.html" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/html/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
@calibre @positive @zippedhtmltopdf
Scenario: Convert zipped HTML to PDF
Given I use an example file at "exampleFiles/example_html.zip" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/html/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
@calibre @positive @markdowntopdf
Scenario: Convert Markdown to PDF
Given I use an example file at "exampleFiles/example.md" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/markdown/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
@markdown @positive
Scenario: Convert PDF to Markdown format
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
When I send the API request to the endpoint "/api/v1/convert/pdf/markdown"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".md"
@positive @pdftocsv
Scenario: Convert PDF with tables to CSV format
Given I use an example file at "exampleFiles/tables.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| outputFormat | csv |
| pageNumbers | all |
When I send the API request to the endpoint "/api/v1/convert/pdf/csv"
Then the response status code should be 200
And the response file should have size greater than 200
And the response file should have extension ".zip"
And the response ZIP should contain 3 files
Examples:
| format | extension |
| docx | .docx |
| odt | .odt |
| doc | .doc |
@ocr @pdfa1
Scenario: PDFA
Given I use an example file at "exampleFiles/pdfa2.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| outputFormat | pdfa |
When I send the API request to the endpoint "/api/v1/convert/pdf/pdfa"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@ocr @pdfa2
Scenario: PDFA1
Given I use an example file at "exampleFiles/pdfa1.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| outputFormat | pdfa-1 |
When I send the API request to the endpoint "/api/v1/convert/pdf/pdfa"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@compress @qpdf @positive
Scenario: Compress
Given I use an example file at "exampleFiles/ghost3.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| optimizeLevel | 4 |
When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@compress @qpdf @positive
Scenario: Compress
Given I use an example file at "exampleFiles/ghost2.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| optimizeLevel | 1 |
| expectedOutputSize | 5KB |
When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@compress @qpdf @positive
Scenario: Compress
Given I use an example file at "exampleFiles/ghost1.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| optimizeLevel | 1 |
| expectedOutputSize | 5KB |
When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@libre @positive
Scenario Outline: Convert PDF to various types
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| outputFormat | <format> |
When I send the API request to the endpoint "/api/v1/convert/pdf/<type>"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension "<extension>"
Examples:
| type | format | extension |
| text | rtf | .rtf |
| text | txt | .txt |
| presentation | ppt | .ppt |
| presentation | pptx | .pptx |
| presentation | odp | .odp |
| html | html | .zip |
@image @positive
Scenario Outline: Convert PDF to image
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the pdf contains 3 images of size 300x300 on 3 pages
And the request data includes
| parameter | value |
| dpi | 300 |
| imageFormat | <format> |
When I send the API request to the endpoint "/api/v1/convert/pdf/img"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".zip"
Examples:
| format |
| webp |
| png |
| jpeg |
| jpg |
| gif |
@libre @positive @topdf
Scenario Outline: Convert PDF to various types
Given I use an example file at "exampleFiles/example<extension>" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/file/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
Examples:
| extension |
| .docx |
| .odp |
| .odt |
| .pptx |
| .rtf |
@calibre @positive @htmltopdf
Scenario: Convert HTML to PDF
Given I use an example file at "exampleFiles/example.html" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/html/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
@calibre @positive @zippedhtmltopdf
Scenario: Convert zipped HTML to PDF
Given I use an example file at "exampleFiles/example_html.zip" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/html/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
@calibre @positive @markdowntopdf
Scenario: Convert Markdown to PDF
Given I use an example file at "exampleFiles/example.md" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/markdown/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
@markdown @positive
Scenario: Convert PDF to Markdown format
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
When I send the API request to the endpoint "/api/v1/convert/pdf/markdown"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".md"
@positive @pdftocsv
Scenario: Convert PDF with tables to CSV format
Given I use an example file at "exampleFiles/tables.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| outputFormat | csv |
| pageNumbers | all |
When I send the API request to the endpoint "/api/v1/convert/pdf/csv"
Then the response status code should be 200
And the response file should have size greater than 200
And the response file should have extension ".zip"
And the response ZIP should contain 3 files

View File

@ -2,113 +2,89 @@
Feature: API Validation
@split-pdf-by-sections @positive
Scenario Outline: split-pdf-by-sections with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 2 pages
And the request data includes
| parameter | value |
| horizontalDivisions | <horizontalDivisions> |
| verticalDivisions | <verticalDivisions> |
| merge | true |
When I send the API request to the endpoint "/api/v1/general/split-pdf-by-sections"
Then the response content type should be "application/pdf"
And the response file should have size greater than 200
And the response status code should be 200
And the response PDF should contain <page_count> pages
@split-pdf-by-sections @positive
Scenario Outline: split-pdf-by-sections with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 2 pages
And the request data includes
| parameter | value |
| horizontalDivisions | <horizontalDivisions> |
| verticalDivisions | <verticalDivisions> |
| merge | true |
When I send the API request to the endpoint "/api/v1/general/split-pdf-by-sections"
Then the response content type should be "application/pdf"
And the response file should have size greater than 200
And the response status code should be 200
And the response PDF should contain <page_count> pages
Examples:
| horizontalDivisions | verticalDivisions | page_count |
| 0 | 1 | 4 |
| 1 | 1 | 8 |
| 1 | 2 | 12 |
| 2 | 2 | 18 |
@split-pdf-by-sections @positive
Scenario Outline: split-pdf-by-sections with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 2 pages
And the request data includes
| parameter | value |
| horizontalDivisions | <horizontalDivisions> |
| verticalDivisions | <verticalDivisions> |
| merge | true |
When I send the API request to the endpoint "/api/v1/general/split-pdf-by-sections"
Then the response content type should be "application/pdf"
And the response file should have size greater than 200
And the response status code should be 200
And the response PDF should contain <page_count> pages
Examples:
| horizontalDivisions | verticalDivisions | page_count |
| 0 | 1 | 4 |
| 1 | 1 | 8 |
| 1 | 2 | 12 |
| 2 | 2 | 18 |
Examples:
| horizontalDivisions | verticalDivisions | page_count |
| 0 | 1 | 4 |
| 1 | 1 | 8 |
| 1 | 2 | 12 |
| 2 | 2 | 18 |
@split-pdf-by-pages @positive
Scenario Outline: split-pdf-by-pages with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 20 pages
And the request data includes
| parameter | value |
| fileInput | fileInput |
| pageNumbers | <pageNumbers> |
When I send the API request to the endpoint "/api/v1/general/split-pages"
Then the response content type should be "application/octet-stream"
And the response status code should be 200
And the response file should have size greater than 200
And the response ZIP should contain <file_count> files
@split-pdf-by-pages @positive
Scenario Outline: split-pdf-by-pages with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 20 pages
And the request data includes
| parameter | value |
| fileInput | fileInput |
| pageNumbers | <pageNumbers> |
When I send the API request to the endpoint "/api/v1/general/split-pages"
Then the response content type should be "application/octet-stream"
And the response status code should be 200
And the response file should have size greater than 200
And the response ZIP should contain <file_count> files
Examples:
| pageNumbers | file_count |
| 1,3,5-9 | 8 |
| all | 20 |
| 2n+1 | 10 |
| 3n | 7 |
Examples:
| pageNumbers | file_count |
| 1,3,5-9 | 8 |
| all | 20 |
| 2n+1 | 10 |
| 3n | 7 |
@split-pdf-by-size-or-count @positive
Scenario Outline: split-pdf-by-size-or-count with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 20 pages
And the request data includes
| parameter | value |
| fileInput | fileInput |
| splitType | <splitType> |
| splitValue | <splitValue> |
When I send the API request to the endpoint "/api/v1/general/split-by-size-or-count"
Then the response content type should be "application/octet-stream"
And the response status code should be 200
And the response file should have size greater than 200
And the response ZIP file should contain <doc_count> documents each having <pages_per_doc> pages
@split-pdf-by-size-or-count @positive
Scenario Outline: split-pdf-by-size-or-count with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 20 pages
And the request data includes
| parameter | value |
| fileInput | fileInput |
| splitType | <splitType> |
| splitValue | <splitValue> |
When I send the API request to the endpoint "/api/v1/general/split-by-size-or-count"
Then the response content type should be "application/octet-stream"
And the response status code should be 200
And the response file should have size greater than 200
And the response ZIP file should contain <doc_count> documents each having <pages_per_doc> pages
Examples:
| splitType | splitValue | doc_count | pages_per_doc |
| 1 | 5 | 4 | 5 |
| 2 | 2 | 2 | 10 |
| 2 | 4 | 4 | 5 |
| 1 | 10 | 2 | 10 |
Examples:
| splitType | splitValue | doc_count | pages_per_doc |
| 1 | 5 | 4 | 5 |
| 2 | 2 | 2 | 10 |
| 2 | 4 | 4 | 5 |
| 1 | 10 | 2 | 10 |
@extract-images
Scenario Outline: Extract Image Scans duplicates
Given I use an example file at "exampleFiles/images.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| format | <format> |
When I send the API request to the endpoint "/api/v1/misc/extract-images"
Then the response content type should be "application/octet-stream"
And the response file should have extension ".zip"
And the response ZIP should contain 2 files
And the response file should have size greater than 0
And the response status code should be 200
@extract-images
Scenario Outline: Extract Image Scans duplicates
Given I use an example file at "exampleFiles/images.pdf" as parameter "fileInput"
And the request data includes
| parameter | value |
| format | <format> |
When I send the API request to the endpoint "/api/v1/misc/extract-images"
Then the response content type should be "application/octet-stream"
And the response file should have extension ".zip"
And the response ZIP should contain 2 files
And the response file should have size greater than 0
And the response status code should be 200
Examples:
| format |
| png |
| gif |
| jpeg |
Examples:
| format |
| png |
| gif |
| jpeg |

View File

@ -10,67 +10,67 @@ from reportlab.lib.pagesizes import letter
from reportlab.lib.utils import ImageReader
from reportlab.pdfgen import canvas
import mimetypes
import requests
import zipfile
import shutil
import re
from PIL import Image, ImageDraw
API_HEADERS = {
'X-API-KEY': '123456789'
}
API_HEADERS = {"X-API-KEY": "123456789"}
#########
# GIVEN #
#########
@given('I generate a PDF file as "{fileInput}"')
def step_generate_pdf(context, fileInput):
context.param_name = fileInput
context.file_name = "genericNonCustomisableName.pdf"
writer = PdfWriter()
writer.add_blank_page(width=72, height=72) # Single blank page
with open(context.file_name, 'wb') as f:
with open(context.file_name, "wb") as f:
writer.write(f)
if not hasattr(context, 'files'):
if not hasattr(context, "files"):
context.files = {}
context.files[context.param_name] = open(context.file_name, 'rb')
context.files[context.param_name] = open(context.file_name, "rb")
@given('I use an example file at "{filePath}" as parameter "{fileInput}"')
def step_use_example_file(context, filePath, fileInput):
context.param_name = fileInput
context.file_name = filePath.split('/')[-1]
if not hasattr(context, 'files'):
context.file_name = filePath.split("/")[-1]
if not hasattr(context, "files"):
context.files = {}
# Ensure the file exists before opening
try:
example_file = open(filePath, 'rb')
example_file = open(filePath, "rb")
context.files[context.param_name] = example_file
except FileNotFoundError:
raise FileNotFoundError(f"The example file '{filePath}' does not exist.")
@given('the pdf contains {page_count:d} pages')
@given("the pdf contains {page_count:d} pages")
def step_pdf_contains_pages(context, page_count):
writer = PdfWriter()
for i in range(page_count):
writer.add_blank_page(width=72, height=72)
with open(context.file_name, 'wb') as f:
with open(context.file_name, "wb") as f:
writer.write(f)
context.files[context.param_name].close()
context.files[context.param_name] = open(context.file_name, 'rb')
context.files[context.param_name] = open(context.file_name, "rb")
# Duplicate for now...
@given('the pdf contains {page_count:d} blank pages')
@given("the pdf contains {page_count:d} blank pages")
def step_pdf_contains_blank_pages(context, page_count):
writer = PdfWriter()
for i in range(page_count):
writer.add_blank_page(width=72, height=72)
with open(context.file_name, 'wb') as f:
with open(context.file_name, "wb") as f:
writer.write(f)
context.files[context.param_name].close()
context.files[context.param_name] = open(context.file_name, 'rb')
context.files[context.param_name] = open(context.file_name, "rb")
def create_black_box_image(file_name, size):
can = canvas.Canvas(file_name, pagesize=size)
@ -80,14 +80,20 @@ def create_black_box_image(file_name, size):
can.showPage()
can.save()
@given(u'the pdf contains {image_count:d} images of size {width:d}x{height:d} on {page_count:d} pages')
@given(
"the pdf contains {image_count:d} images of size {width:d}x{height:d} on {page_count:d} pages"
)
def step_impl(context, image_count, width, height, page_count):
context.param_name = "fileInput"
context.file_name = "genericNonCustomisableName.pdf"
create_pdf_with_images_and_boxes(context.file_name, image_count, page_count, width, height)
if not hasattr(context, 'files'):
create_pdf_with_images_and_boxes(
context.file_name, image_count, page_count, width, height
)
if not hasattr(context, "files"):
context.files = {}
context.files[context.param_name] = open(context.file_name, 'rb')
context.files[context.param_name] = open(context.file_name, "rb")
def add_black_boxes_to_image(image):
if isinstance(image, str):
@ -97,9 +103,14 @@ def add_black_boxes_to_image(image):
draw.rectangle([(0, 0), image.size], fill=(0, 0, 0)) # Fill image with black
return image
def create_pdf_with_images_and_boxes(file_name, image_count, page_count, image_width, image_height):
def create_pdf_with_images_and_boxes(
file_name, image_count, page_count, image_width, image_height
):
page_width, page_height = max(letter[0], image_width), max(letter[1], image_height)
boxes_per_page = image_count // page_count + (1 if image_count % page_count != 0 else 0)
boxes_per_page = image_count // page_count + (
1 if image_count % page_count != 0 else 0
)
writer = PdfWriter()
box_counter = 0
@ -114,12 +125,14 @@ def create_pdf_with_images_and_boxes(file_name, image_count, page_count, image_w
# Simulating a dynamic image creation (replace this with your actual image creation logic)
# For demonstration, we'll create a simple black image
dummy_image = Image.new('RGB', (image_width, image_height), color='white') # Create a white image
dummy_image = Image.new(
"RGB", (image_width, image_height), color="white"
) # Create a white image
dummy_image = add_black_boxes_to_image(dummy_image) # Add black boxes
# Convert the PIL Image to bytes to pass to drawImage
image_bytes = io.BytesIO()
dummy_image.save(image_bytes, format='PNG')
dummy_image.save(image_bytes, format="PNG")
image_bytes.seek(0)
# Check if the image fits in the current page dimensions
@ -130,7 +143,9 @@ def create_pdf_with_images_and_boxes(file_name, image_count, page_count, image_w
break
# Add the image to the PDF
can.drawImage(ImageReader(image_bytes), x, y, width=image_width, height=image_height)
can.drawImage(
ImageReader(image_bytes), x, y, width=image_width, height=image_height
)
box_counter += 1
can.showPage()
@ -140,7 +155,7 @@ def create_pdf_with_images_and_boxes(file_name, image_count, page_count, image_w
writer.add_page(new_pdf.pages[0])
# Write the PDF to file
with open(file_name, 'wb') as f:
with open(file_name, "wb") as f:
writer.write(f)
# Clean up temporary image files
@ -149,36 +164,81 @@ def create_pdf_with_images_and_boxes(file_name, image_count, page_count, image_w
if os.path.exists(temp_image_path):
os.remove(temp_image_path)
@given('the pdf contains {image_count:d} images on {page_count:d} pages')
@given("the pdf contains {image_count:d} images on {page_count:d} pages")
def step_pdf_contains_images(context, image_count, page_count):
if not hasattr(context, 'param_name'):
if not hasattr(context, "param_name"):
context.param_name = "default"
context.file_name = "genericNonCustomisableName.pdf"
create_pdf_with_black_boxes(context.file_name, image_count, page_count)
if not hasattr(context, 'files'):
if not hasattr(context, "files"):
context.files = {}
if context.param_name in context.files:
context.files[context.param_name].close()
context.files[context.param_name] = open(context.file_name, 'rb')
context.files[context.param_name] = open(context.file_name, "rb")
@given('the pdf contains {page_count:d} pages with random text')
def create_pdf_with_black_boxes(file_name, image_count, page_count):
page_width, page_height = letter
writer = PdfWriter()
box_counter = 0
for page in range(page_count):
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=(page_width, page_height))
boxes_per_page = image_count // page_count + (
1 if image_count % page_count != 0 else 0
)
for i in range(boxes_per_page):
if box_counter >= image_count:
break
# Create a black box image
dummy_image = Image.new("RGB", (100, 100), color="black")
image_bytes = io.BytesIO()
dummy_image.save(image_bytes, format="PNG")
image_bytes.seek(0)
x = (i % (page_width // 100)) * 100
y = page_height - (((i % (page_height // 100)) + 1) * 100)
if x + 100 > page_width or y < 0:
break
can.drawImage(ImageReader(image_bytes), x, y, width=100, height=100)
box_counter += 1
can.showPage()
can.save()
packet.seek(0)
new_pdf = PdfReader(packet)
writer.add_page(new_pdf.pages[0])
with open(file_name, "wb") as f:
writer.write(f)
@given("the pdf contains {page_count:d} pages with random text")
def step_pdf_contains_pages_with_random_text(context, page_count):
buffer = io.BytesIO()
c = canvas.Canvas(buffer, pagesize=letter)
width, height = letter
for _ in range(page_count):
text = ''.join(random.choices(string.ascii_letters + string.digits, k=100))
text = "".join(random.choices(string.ascii_letters + string.digits, k=100))
c.drawString(100, height - 100, text)
c.showPage()
c.save()
with open(context.file_name, 'wb') as f:
with open(context.file_name, "wb") as f:
f.write(buffer.getvalue())
context.files[context.param_name].close()
context.files[context.param_name] = open(context.file_name, 'rb')
context.files[context.param_name] = open(context.file_name, "rb")
@given('the pdf pages all contain the text "{text}"')
def step_pdf_pages_contain_text(context, text):
@ -192,11 +252,12 @@ def step_pdf_pages_contain_text(context, text):
c.save()
with open(context.file_name, 'wb') as f:
with open(context.file_name, "wb") as f:
f.write(buffer.getvalue())
context.files[context.param_name].close()
context.files[context.param_name] = open(context.file_name, 'rb')
context.files[context.param_name] = open(context.file_name, "rb")
@given('the pdf is encrypted with password "{password}"')
def step_encrypt_pdf(context, password):
@ -205,29 +266,34 @@ def step_encrypt_pdf(context, password):
for i in range(len(reader.pages)):
writer.add_page(reader.pages[i])
writer.encrypt(password)
with open(context.file_name, 'wb') as f:
with open(context.file_name, "wb") as f:
writer.write(f)
context.files[context.param_name].close()
context.files[context.param_name] = open(context.file_name, 'rb')
context.files[context.param_name] = open(context.file_name, "rb")
@given('the request data is')
@given("the request data is")
def step_request_data(context):
context.request_data = eval(context.text)
@given('the request data includes')
@given("the request data includes")
def step_request_data_table(context):
context.request_data = {row['parameter']: row['value'] for row in context.table}
context.request_data = {row["parameter"]: row["value"] for row in context.table}
@given('save the generated PDF file as "{filename}" for debugging')
def save_generated_pdf(context, filename):
with open(filename, 'wb') as f:
with open(filename, "wb") as f:
f.write(context.files[context.param_name].read())
print(f"Saved generated PDF content to {filename}")
########
# WHEN #
########
@when('I send a GET request to "{endpoint}"')
def step_send_get_request(context, endpoint):
base_url = "http://localhost:8080"
@ -235,20 +301,22 @@ def step_send_get_request(context, endpoint):
response = requests.get(full_url, headers=API_HEADERS)
context.response = response
@when('I send a GET request to "{endpoint}" with parameters')
def step_send_get_request_with_params(context, endpoint):
base_url = "http://localhost:8080"
params = {row['parameter']: row['value'] for row in context.table}
params = {row["parameter"]: row["value"] for row in context.table}
full_url = f"{base_url}{endpoint}"
response = requests.get(full_url, params=params, headers=API_HEADERS)
context.response = response
@when('I send the API request to the endpoint "{endpoint}"')
def step_send_api_request(context, endpoint):
url = f"http://localhost:8080{endpoint}"
files = context.files if hasattr(context, 'files') else {}
files = context.files if hasattr(context, "files") else {}
if not hasattr(context, 'request_data') or context.request_data is None:
if not hasattr(context, "request_data") or context.request_data is None:
context.request_data = {}
form_data = []
@ -257,130 +325,173 @@ def step_send_api_request(context, endpoint):
for key, file in files.items():
mime_type, _ = mimetypes.guess_type(file.name)
mime_type = mime_type or 'application/octet-stream'
mime_type = mime_type or "application/octet-stream"
print(f"form_data {file.name} with {mime_type}")
form_data.append((key, (file.name, file, mime_type)))
response = requests.post(url, files=form_data, headers=API_HEADERS)
context.response = response
########
# THEN #
########
@then('the response content type should be "{content_type}"')
def step_check_response_content_type(context, content_type):
actual_content_type = context.response.headers.get('Content-Type', '')
assert actual_content_type.startswith(content_type), f"Expected {content_type} but got {actual_content_type}. Response content: {context.response.content}"
actual_content_type = context.response.headers.get("Content-Type", "")
assert actual_content_type.startswith(
content_type
), f"Expected {content_type} but got {actual_content_type}. Response content: {context.response.content}"
@then('the response file should have size greater than {size:d}')
@then("the response file should have size greater than {size:d}")
def step_check_response_file_size(context, size):
response_file = io.BytesIO(context.response.content)
assert len(response_file.getvalue()) > size
@then('the response PDF is not passworded')
@then("the response PDF is not passworded")
def step_check_response_pdf_not_passworded(context):
response_file = io.BytesIO(context.response.content)
reader = PdfReader(response_file)
assert not reader.is_encrypted
@then('the response PDF is passworded')
@then("the response PDF is passworded")
def step_check_response_pdf_passworded(context):
response_file = io.BytesIO(context.response.content)
try:
reader = PdfReader(response_file)
assert reader.is_encrypted
except PdfReadError as e:
raise AssertionError(f"Failed to read PDF: {str(e)}. Response content: {context.response.content}")
raise AssertionError(
f"Failed to read PDF: {str(e)}. Response content: {context.response.content}"
)
except Exception as e:
raise AssertionError(f"An error occurred: {str(e)}. Response content: {context.response.content}")
raise AssertionError(
f"An error occurred: {str(e)}. Response content: {context.response.content}"
)
@then('the response status code should be {status_code:d}')
@then("the response status code should be {status_code:d}")
def step_check_response_status_code(context, status_code):
assert context.response.status_code == status_code, f"Expected status code {status_code} but got {context.response.status_code}"
assert (
context.response.status_code == status_code
), f"Expected status code {status_code} but got {context.response.status_code}"
@then('the response should contain error message "{message}"')
def step_check_response_error_message(context, message):
response_json = context.response.json()
assert response_json.get('error') == message, f"Expected error message '{message}' but got '{response_json.get('error')}'"
assert (
response_json.get("error") == message
), f"Expected error message '{message}' but got '{response_json.get('error')}'"
@then('the response PDF should contain {page_count:d} pages')
def step_check_response_pdf_page_count(context, page_count):
response_file = io.BytesIO(context.response.content)
reader = PdfReader(response_file)
assert len(reader.pages) == page_count, f"Expected {page_count} pages but got {len(reader.pages)} pages"
@then('the response PDF metadata should include "{metadata_key}" as "{metadata_value}"')
def step_check_response_pdf_metadata(context, metadata_key, metadata_value):
response_file = io.BytesIO(context.response.content)
reader = PdfReader(response_file)
metadata = reader.metadata
assert metadata.get("/" + metadata_key) == metadata_value, f"Expected {metadata_key} to be '{metadata_value}' but got '{metadata.get(metadata_key)}'"
assert (
metadata.get("/" + metadata_key) == metadata_value
), f"Expected {metadata_key} to be '{metadata_value}' but got '{metadata.get(metadata_key)}'"
@then('the response file should have extension "{extension}"')
def step_check_response_file_extension(context, extension):
content_disposition = context.response.headers.get('Content-Disposition', '')
content_disposition = context.response.headers.get("Content-Disposition", "")
filename = ""
if content_disposition:
parts = content_disposition.split(';')
parts = content_disposition.split(";")
for part in parts:
if part.strip().startswith('filename'):
filename = part.split('=')[1].strip().strip('"')
if part.strip().startswith("filename"):
filename = part.split("=")[1].strip().strip('"')
break
assert filename.endswith(extension), f"Expected file extension {extension} but got {filename}. Response content: {context.response.content}"
assert filename.endswith(
extension
), f"Expected file extension {extension} but got {filename}. Response content: {context.response.content}"
@then('save the response file as "{filename}" for debugging')
def step_save_response_file(context, filename):
with open(filename, 'wb') as f:
with open(filename, "wb") as f:
f.write(context.response.content)
print(f"Saved response content to {filename}")
@then('the response PDF should contain {page_count:d} pages')
@then("the response PDF should contain {page_count:d} pages")
def step_check_response_pdf_page_count(context, page_count):
response_file = io.BytesIO(context.response.content)
reader = PdfReader(io.BytesIO(response_file.getvalue()))
actual_page_count = len(reader.pages)
assert actual_page_count == page_count, f"Expected {page_count} pages but got {actual_page_count} pages"
assert (
actual_page_count == page_count
), f"Expected {page_count} pages but got {actual_page_count} pages"
@then('the response ZIP should contain {file_count:d} files')
@then("the response ZIP should contain {file_count:d} files")
def step_check_response_zip_file_count(context, file_count):
response_file = io.BytesIO(context.response.content)
with zipfile.ZipFile(io.BytesIO(response_file.getvalue())) as zip_file:
actual_file_count = len(zip_file.namelist())
assert actual_file_count == file_count, f"Expected {file_count} files but got {actual_file_count} files"
assert (
actual_file_count == file_count
), f"Expected {file_count} files but got {actual_file_count} files"
@then('the response ZIP file should contain {doc_count:d} documents each having {pages_per_doc:d} pages')
@then(
"the response ZIP file should contain {doc_count:d} documents each having {pages_per_doc:d} pages"
)
def step_check_response_zip_doc_page_count(context, doc_count, pages_per_doc):
response_file = io.BytesIO(context.response.content)
with zipfile.ZipFile(io.BytesIO(response_file.getvalue())) as zip_file:
actual_doc_count = len(zip_file.namelist())
assert actual_doc_count == doc_count, f"Expected {doc_count} documents but got {actual_doc_count} documents"
assert (
actual_doc_count == doc_count
), f"Expected {doc_count} documents but got {actual_doc_count} documents"
for file_name in zip_file.namelist():
with zip_file.open(file_name) as pdf_file:
reader = PdfReader(pdf_file)
actual_pages_per_doc = len(reader.pages)
assert actual_pages_per_doc == pages_per_doc, f"Expected {pages_per_doc} pages per document but got {actual_pages_per_doc} pages in document {file_name}"
assert (
actual_pages_per_doc == pages_per_doc
), f"Expected {pages_per_doc} pages per document but got {actual_pages_per_doc} pages in document {file_name}"
@then('the JSON value of "{key}" should be "{expected_value}"')
def step_check_json_value(context, key, expected_value):
actual_value = context.response.json().get(key)
assert actual_value == expected_value, \
f"Expected JSON value for '{key}' to be '{expected_value}' but got '{actual_value}'"
assert (
actual_value == expected_value
), f"Expected JSON value for '{key}' to be '{expected_value}' but got '{actual_value}'"
@then('JSON list entry containing "{identifier_key}" as "{identifier_value}" should have "{target_key}" as "{target_value}"')
def step_check_json_list_entry(context, identifier_key, identifier_self, target_key, target_value):
@then(
'JSON list entry containing "{identifier_key}" as "{identifier_value}" should have "{target_key}" as "{target_value}"'
)
def step_check_json_list_entry(
context, identifier_key, identifier_self, target_key, target_value
):
json_response = context.response.json()
for entry in json_response:
if entry.get(identifier_key) == identifier_value:
assert entry.get(target_key) == target_value, \
f"Expected {target_key} to be {target_value} in entry where {identifier_key} is {identifier_value}, but found {entry.get(target_key)}"
assert (
entry.get(target_key) == target_value
), f"Expected {target_key} to be {target_value} in entry where {identifier_key} is {identifier_value}, but found {entry.get(target_key)}"
break
else:
raise AssertionError(f"No entry with {identifier_key} as {identifier_value} found")
raise AssertionError(
f"No entry with {identifier_key} as {identifier_value} found"
)
@then('the response should match the regex "{pattern}"')
def step_response_matches_regex(context, pattern):
response_text = context.response.text
assert re.match(pattern, response_text), \
f"Response '{response_text}' does not match the expected pattern '{pattern}'"
assert re.match(
pattern, response_text
), f"Response '{response_text}' does not match the expected pattern '{pattern}'"