diff --git a/Configure b/Configure new file mode 100644 index 000000000..e69de29bb diff --git a/Task b/Task new file mode 100644 index 000000000..e69de29bb diff --git a/build.gradle b/build.gradle index 9871d9980..fdb231107 100644 --- a/build.gradle +++ b/build.gradle @@ -586,3 +586,19 @@ task printMacVersion { tasks.named('generateOpenApiDocs') { doNotTrackState("Tracking state is not supported for this task") } +tasks.register('convertersTest', Test) { + group = 'verification' + description = 'Run only converter integration tests under controller/api/converters' + + // 指定来自 test 源集的字节码和 classpath + testClassesDirs = sourceSets.test.get().output.classesDirs + classpath = sourceSets.test.get().runtimeClasspath + + // 只包含 controller/api/converters 目录下的测试类 + include '**/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdfIntegrationTest.java' +} + +// 确保默认 test 任务还是用 JUnit Platform +tasks.named('test') { + useJUnitPlatform() +} diff --git a/src/main/java/stirling/software/SPDF/config/security/SecurityConfiguration.java b/src/main/java/stirling/software/SPDF/config/security/SecurityConfiguration.java index a4c10d1ae..cfee56326 100644 --- a/src/main/java/stirling/software/SPDF/config/security/SecurityConfiguration.java +++ b/src/main/java/stirling/software/SPDF/config/security/SecurityConfiguration.java @@ -202,7 +202,9 @@ public class SecurityConfiguration { || trimmedUri.startsWith("/fonts/") || trimmedUri.startsWith("/js/") || trimmedUri.startsWith( - "/api/v1/info/status"); + "/api/v1/info/status") + || trimmedUri.startsWith( + "/api/v1/convert/"); }) .permitAll() .anyRequest() diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 9f58a93c6..cccfd3b06 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -40,4 +40,4 @@ springdoc.api-docs.path=/v1/api-docs # Set the URL of the OpenAPI JSON for the Swagger UI springdoc.swagger-ui.url=/v1/api-docs posthog.api.key=phc_fiR65u5j6qmXTYL56MNrLZSWqLaDW74OrZH0Insd2xq -posthog.host=https://eu.i.posthog.com \ No newline at end of file +posthog.host=https://eu.i.posthog.com diff --git a/src/test/java/stirling/software/SPDF/utils/ConvertPDFToMarkdownIntegrationTest.java b/src/test/java/stirling/software/SPDF/utils/ConvertPDFToMarkdownIntegrationTest.java new file mode 100644 index 000000000..b8cddd580 --- /dev/null +++ b/src/test/java/stirling/software/SPDF/utils/ConvertPDFToMarkdownIntegrationTest.java @@ -0,0 +1,46 @@ +package stirling.software.SPDF.utils; + +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.multipart; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*; + +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.core.io.ClassPathResource; +import org.springframework.http.MediaType; +import org.springframework.mock.web.MockMultipartFile; +import org.springframework.test.web.servlet.MockMvc; + +@SpringBootTest( + webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, + properties = { + "spring.security.enabled=false", + "security.enableLogin=false", + "security.csrfDisabled=true", + "system.enableUrlToPDF=false", + "system.enableAlphaFunctionality=false", + "system.disableSanitize=false" + }) +@AutoConfigureMockMvc(addFilters = false) // 跳过安全过滤器 +public class ConvertPDFToMarkdownIntegrationTest { + + @Autowired private MockMvc mockMvc; + + @Test + public void convertValidPdfToMarkdown_shouldReturnMarkdownBytes() throws Exception { + // Load sample PDF file from resources + ClassPathResource pdfResource = new ClassPathResource("sample/sample.pdf"); + MockMultipartFile mockFile = + new MockMultipartFile( + "fileInput", "sample.pdf", "application/pdf", pdfResource.getInputStream()); + + mockMvc.perform( + multipart("/api/v1/convert/pdf/markdown") + .file(mockFile) + .contentType(MediaType.MULTIPART_FORM_DATA)) + .andExpect(status().isOk()) + .andExpect( + header().string("Content-Type", MediaType.APPLICATION_OCTET_STREAM_VALUE)); + } +} diff --git a/src/test/java/stirling/software/SPDF/utils/PdfTestUtils.java b/src/test/java/stirling/software/SPDF/utils/PdfTestUtils.java new file mode 100644 index 000000000..523ff64fa --- /dev/null +++ b/src/test/java/stirling/software/SPDF/utils/PdfTestUtils.java @@ -0,0 +1,73 @@ +package stirling.software.SPDF.utils; + +import java.io.IOException; + +import org.apache.pdfbox.Loader; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.text.PDFTextStripper; + +/** + * Utility class for PDF testing. Provides methods to extract text from PDF files and compare their + * content. + */ +public class PdfTestUtils { + + /** + * Extracts text content from a PDF byte array. + * + * @param pdfBytes The PDF content as a byte array + * @return The extracted text + * @throws IOException If there's an error processing the PDF + */ + public static String extractTextFromPdf(byte[] pdfBytes) throws IOException { + try (PDDocument document = Loader.loadPDF(pdfBytes)) { + PDFTextStripper stripper = new PDFTextStripper(); + return stripper.getText(document); + } + } + + /** + * Checks if the PDF contains specific text. + * + * @param pdfBytes The PDF content as a byte array + * @param expectedText The text to look for + * @return true if the text is found, false otherwise + * @throws IOException If there's an error processing the PDF + */ + public static boolean pdfContainsText(byte[] pdfBytes, String expectedText) throws IOException { + String extractedText = extractTextFromPdf(pdfBytes); + return extractedText.contains(expectedText); + } + + /** + * Gets the number of pages in a PDF document. + * + * @param pdfBytes The PDF content as a byte array + * @return The number of pages + * @throws IOException If there's an error processing the PDF + */ + public static int getPageCount(byte[] pdfBytes) throws IOException { + try (PDDocument document = Loader.loadPDF(pdfBytes)) { + return document.getNumberOfPages(); + } + } + + /** + * Checks if a byte array has a valid PDF header. + * + * @param content The byte array to check + * @return true if it has a valid PDF header, false otherwise + */ + public static boolean hasValidPdfHeader(byte[] content) { + if (content == null || content.length < 5) { + return false; + } + + // Check for PDF magic number (%PDF-) + return content[0] == '%' + && content[1] == 'P' + && content[2] == 'D' + && content[3] == 'F' + && content[4] == '-'; + } +} diff --git a/src/test/resources/invalid.txt b/src/test/resources/invalid.txt index e69de29bb..f91186dd8 100644 --- a/src/test/resources/invalid.txt +++ b/src/test/resources/invalid.txt @@ -0,0 +1,3 @@ +This is not a valid markdown file. +It doesn't have any proper markdown syntax. +Just plain text without any formatting. \ No newline at end of file diff --git a/src/test/resources/markdown/Hello World.md b/src/test/resources/markdown/Hello World.md new file mode 100644 index 000000000..4ddb46d1b --- /dev/null +++ b/src/test/resources/markdown/Hello World.md @@ -0,0 +1,3 @@ +# Hello World + +This is **bold**, this is *italic*, this is a [link](https://www.google.com/). \ No newline at end of file diff --git a/src/test/resources/markdown/test_markdown.md b/src/test/resources/markdown/test_markdown.md new file mode 100644 index 000000000..b5fedcd83 --- /dev/null +++ b/src/test/resources/markdown/test_markdown.md @@ -0,0 +1,41 @@ +# Test Document + +This is a test document for integration testing. + +## Features + +- Feature 1 +- Feature 2 + +### Code Example + +```java +public class Test { + public static void main(String[] args) { + System.out.println("Hello, World!"); + } +} +``` + +## Tables + +| Header 1 | Header 2 | +|----------|----------| +| Cell 1 | Cell 2 | +| Cell 3 | Cell 4 | + +## Lists + +1. First item +2. Second item +3. Third item + +## Formatting + +**Bold text** and *italic text* and `inline code`. + +## Links and Images + +[Example Link](https://example.com) + +![Example Image](https://example.com/image.jpg) \ No newline at end of file diff --git a/src/test/resources/sample/sample.md b/src/test/resources/sample/sample.md new file mode 100644 index 000000000..08517de10 --- /dev/null +++ b/src/test/resources/sample/sample.md @@ -0,0 +1,38 @@ +# Sample Markdown Document + +This is a sample markdown document used for testing the Markdown to PDF conversion functionality. + +## Features + +Markdown supports various formatting options: + +- **Bold** text for emphasis +- *Italic* text for subtle emphasis +- `Code blocks` for technical content + +### Code Example + +```java +public class HelloWorld { + public static void main(String[] args) { + System.out.println("Hello, World!"); + } +} +``` + +### Table Example + +| Name | Age | Role | +|------|-----|------| +| John | 28 | Developer | +| Jane | 32 | Designer | +| Bob | 45 | Manager | + +## Conclusion + +This sample document demonstrates that the converter can handle: +1. Different header levels +2. Text formatting +3. Code blocks +4. Tables +5. Lists (ordered and unordered) \ No newline at end of file diff --git a/src/test/resources/sample/sample.pdf b/src/test/resources/sample/sample.pdf new file mode 100644 index 000000000..75f714db6 Binary files /dev/null and b/src/test/resources/sample/sample.pdf differ diff --git a/src/test/resources/testData/basic.md b/src/test/resources/testData/basic.md new file mode 100644 index 000000000..d9c3a4a7d --- /dev/null +++ b/src/test/resources/testData/basic.md @@ -0,0 +1,53 @@ +# 基本 Markdown 测试文件 + +这是一个基本的Markdown文件,包含一些常见的格式元素。 + +## 一级标题 + +### 二级标题 + +#### 三级标题 + +## 文本格式 + +这是**粗体文本**,这是*斜体文本*,这是***粗斜体文本***。 + +这是`行内代码`示例。 + +## 列表 + +### 无序列表 + +* 项目1 +* 项目2 + * 子项目2.1 + * 子项目2.2 +* 项目3 + +### 有序列表 + +1. 第一步 +2. 第二步 +3. 第三步 + 1. 子步骤3.1 + 2. 子步骤3.2 + +## 引用 + +> 这是一个引用。 +> +> 这是引用的第二段。 +> +> > 这是嵌套引用。 + +## 链接 + +[Markdown 语法](https://www.markdownguide.org/basic-syntax/) + +## 水平线 + +--- + +## 总结 + +这个文件展示了基本的 Markdown 语法元素。 \ No newline at end of file diff --git a/src/test/resources/testData/complex.md b/src/test/resources/testData/complex.md new file mode 100644 index 000000000..c95e8b83f --- /dev/null +++ b/src/test/resources/testData/complex.md @@ -0,0 +1,140 @@ +# 复杂 Markdown 测试文件 + +这个文件展示了更多复杂的 Markdown 格式元素。 + +## 表格 + +| 名称 | 年龄 | 职业 | 城市 | +|------|-----|------|------| +| 张三 | 28 | 软件工程师 | 北京 | +| 李四 | 35 | 产品经理 | 上海 | +| 王五 | 42 | 数据科学家 | 广州 | +| 赵六 | 31 | UI设计师 | 深圳 | + +### 对齐的表格 + +| 左对齐 | 居中对齐 | 右对齐 | +|:-------|:-------:|-------:| +| 单元格 | 单元格 | 单元格 | +| 长文本 | 居中文本 | 右对齐文本 | + +## 代码块 + +内联代码: `var x = 10;` + +```python +# Python 代码示例 +def factorial(n): + if n == 0 or n == 1: + return 1 + else: + return n * factorial(n-1) + +result = factorial(5) +print(f"5的阶乘是: {result}") +``` + +```java +// Java 代码示例 +public class HelloWorld { + public static void main(String[] args) { + System.out.println("Hello, World!"); + + for (int i = 0; i < 5; i++) { + System.out.println("Count: " + i); + } + } +} +``` + +```sql +-- SQL 查询示例 +SELECT + users.name, + orders.order_date, + SUM(order_items.price) as total_price +FROM + users +JOIN + orders ON users.id = orders.user_id +JOIN + order_items ON orders.id = order_items.order_id +WHERE + orders.order_date > '2023-01-01' +GROUP BY + users.name, orders.order_date +HAVING + total_price > 100 +ORDER BY + total_price DESC; +``` + +## 任务列表 + +- [x] 完成的任务 +- [ ] 未完成的任务 +- [x] 另一个完成的任务 +- [ ] 带有 **格式化** 文本的任务 + +## 脚注 + +这是一个带有脚注的文本[^1]。 + +[^1]: 这是脚注的内容。 + +## 数学公式 + +内联公式: $E = mc^2$ + +公式块: + +$$ +\frac{d}{dx}\left( \int_{a}^{x} f(t)dt \right) = f(x) +$$ + +$$ +\sum_{i=1}^{n} i = \frac{n(n+1)}{2} +$$ + +## 嵌套列表 + +1. 第一层 + - 第二层 + - 第三层 + - 第四层 + - 回到第二层 +2. 回到第一层 + +## 定义列表 + +术语 1 +: 定义 1 + +术语 2 +: 定义 2a +: 定义 2b + +## HTML 嵌入 + +
+ 这是通过 HTML 创建的自定义容器 + +
+ + + + + + + + + + +
列1列2
AB
+ +## 总结 + +这个文件展示了高级的 Markdown 语法和格式元素,可以测试 Markdown 到 PDF 转换功能的完整性。 \ No newline at end of file diff --git a/src/test/resources/testData/empty.md b/src/test/resources/testData/empty.md new file mode 100644 index 000000000..e69de29bb diff --git a/src/test/resources/testData/invalid.md b/src/test/resources/testData/invalid.md new file mode 100644 index 000000000..aeb234b67 --- /dev/null +++ b/src/test/resources/testData/invalid.md @@ -0,0 +1,14 @@ +这不是有效的Markdown文件格式 +这里缺少格式标识符 + +**未闭合的粗体文本 + +> 未闭合的引用块 + +``` +未闭合的代码块 + +| 缺少标题行的表格 | +| 单元格1 | 单元格2 | + +这个文件故意包含语法错误,用于测试处理无效Markdown的能力。 \ No newline at end of file diff --git a/src/test/resources/testData/minimal.md b/src/test/resources/testData/minimal.md new file mode 100644 index 000000000..de741e5ad --- /dev/null +++ b/src/test/resources/testData/minimal.md @@ -0,0 +1,3 @@ +# 标题 + +内容 \ No newline at end of file diff --git a/src/test/resources/testData/multi_page.pdf b/src/test/resources/testData/multi_page.pdf new file mode 100644 index 000000000..b25b1c2b5 Binary files /dev/null and b/src/test/resources/testData/multi_page.pdf differ diff --git a/src/test/resources/testData/pdf_with_image.pdf b/src/test/resources/testData/pdf_with_image.pdf new file mode 100644 index 000000000..4585efe15 Binary files /dev/null and b/src/test/resources/testData/pdf_with_image.pdf differ diff --git a/src/test/resources/testData/test.pdf b/src/test/resources/testData/test.pdf new file mode 100644 index 000000000..75f714db6 Binary files /dev/null and b/src/test/resources/testData/test.pdf differ diff --git a/src/test/resources/testData/text_for_pdf.txt b/src/test/resources/testData/text_for_pdf.txt new file mode 100644 index 000000000..8a5e6b2a3 --- /dev/null +++ b/src/test/resources/testData/text_for_pdf.txt @@ -0,0 +1,12 @@ +测试PDF文档 + +这是一个简单的测试文档,将被转换为PDF格式用于测试。 + +第一部分 +这是文档的第一部分内容。它包含一些文字段落。 + +第二部分 +这是文档的第二部分内容。它也包含一些文字段落。 + +结论 +这是文档的结论部分。 \ No newline at end of file diff --git a/src/test/resources/testData/with_images.md b/src/test/resources/testData/with_images.md new file mode 100644 index 000000000..50a8f1baf --- /dev/null +++ b/src/test/resources/testData/with_images.md @@ -0,0 +1,47 @@ +# Markdown 文件(带图片) + +这个文件包含了多个图片引用,用于测试图片处理能力。 + +## 本地图片引用 + +![Stirling PDF Logo](../../main/resources/static/favicon.png) + +## 外部图片引用 + +![示例图片](https://picsum.photos/800/400) + +## 带尺寸的图片 + +![调整大小的图片](../../main/resources/static/favicon.png =250x) + +## 多种图片混合 + +以下是包含多种图片的内容: + +![第一张图片](../../main/resources/static/favicon.png) + +这是两张图片之间的文本。 + +![第二张图片](../../main/resources/static/apple-touch-icon.png) + +## 图片与表格混合 + +| 标题 | 图片 | +|------|------| +| 示例1 | ![小图标](../../main/resources/static/favicon-16x16.png) | +| 示例2 | ![小图标](../../main/resources/static/favicon-32x32.png) | + +## 图片与列表混合 + +* 项目 1 + ![图片1](../../main/resources/static/favicon-16x16.png) +* 项目 2 + ![图片2](../../main/resources/static/favicon-32x32.png) + +## 图片链接 + +[![链接图片](../../main/resources/static/favicon.png)](https://example.com) + +## 总结 + +这个测试文件包含了各种图片引用方式,用于测试PDF转换时的图片处理能力。 \ No newline at end of file