diff --git a/Configure b/Configure new file mode 100644 index 000000000..e69de29bb diff --git a/Task b/Task new file mode 100644 index 000000000..e69de29bb diff --git a/build.gradle b/build.gradle index 9871d9980..fdb231107 100644 --- a/build.gradle +++ b/build.gradle @@ -586,3 +586,19 @@ task printMacVersion { tasks.named('generateOpenApiDocs') { doNotTrackState("Tracking state is not supported for this task") } +tasks.register('convertersTest', Test) { + group = 'verification' + description = 'Run only converter integration tests under controller/api/converters' + + // 指定来自 test 源集的字节码和 classpath + testClassesDirs = sourceSets.test.get().output.classesDirs + classpath = sourceSets.test.get().runtimeClasspath + + // 只包含 controller/api/converters 目录下的测试类 + include '**/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdfIntegrationTest.java' +} + +// 确保默认 test 任务还是用 JUnit Platform +tasks.named('test') { + useJUnitPlatform() +} diff --git a/src/main/java/stirling/software/SPDF/config/security/SecurityConfiguration.java b/src/main/java/stirling/software/SPDF/config/security/SecurityConfiguration.java index a4c10d1ae..cfee56326 100644 --- a/src/main/java/stirling/software/SPDF/config/security/SecurityConfiguration.java +++ b/src/main/java/stirling/software/SPDF/config/security/SecurityConfiguration.java @@ -202,7 +202,9 @@ public class SecurityConfiguration { || trimmedUri.startsWith("/fonts/") || trimmedUri.startsWith("/js/") || trimmedUri.startsWith( - "/api/v1/info/status"); + "/api/v1/info/status") + || trimmedUri.startsWith( + "/api/v1/convert/"); }) .permitAll() .anyRequest() diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 9f58a93c6..cccfd3b06 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -40,4 +40,4 @@ springdoc.api-docs.path=/v1/api-docs # Set the URL of the OpenAPI JSON for the Swagger UI springdoc.swagger-ui.url=/v1/api-docs posthog.api.key=phc_fiR65u5j6qmXTYL56MNrLZSWqLaDW74OrZH0Insd2xq -posthog.host=https://eu.i.posthog.com \ No newline at end of file +posthog.host=https://eu.i.posthog.com diff --git a/src/test/java/stirling/software/SPDF/utils/ConvertPDFToMarkdownIntegrationTest.java b/src/test/java/stirling/software/SPDF/utils/ConvertPDFToMarkdownIntegrationTest.java new file mode 100644 index 000000000..b8cddd580 --- /dev/null +++ b/src/test/java/stirling/software/SPDF/utils/ConvertPDFToMarkdownIntegrationTest.java @@ -0,0 +1,46 @@ +package stirling.software.SPDF.utils; + +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.multipart; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*; + +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.core.io.ClassPathResource; +import org.springframework.http.MediaType; +import org.springframework.mock.web.MockMultipartFile; +import org.springframework.test.web.servlet.MockMvc; + +@SpringBootTest( + webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, + properties = { + "spring.security.enabled=false", + "security.enableLogin=false", + "security.csrfDisabled=true", + "system.enableUrlToPDF=false", + "system.enableAlphaFunctionality=false", + "system.disableSanitize=false" + }) +@AutoConfigureMockMvc(addFilters = false) // 跳过安全过滤器 +public class ConvertPDFToMarkdownIntegrationTest { + + @Autowired private MockMvc mockMvc; + + @Test + public void convertValidPdfToMarkdown_shouldReturnMarkdownBytes() throws Exception { + // Load sample PDF file from resources + ClassPathResource pdfResource = new ClassPathResource("sample/sample.pdf"); + MockMultipartFile mockFile = + new MockMultipartFile( + "fileInput", "sample.pdf", "application/pdf", pdfResource.getInputStream()); + + mockMvc.perform( + multipart("/api/v1/convert/pdf/markdown") + .file(mockFile) + .contentType(MediaType.MULTIPART_FORM_DATA)) + .andExpect(status().isOk()) + .andExpect( + header().string("Content-Type", MediaType.APPLICATION_OCTET_STREAM_VALUE)); + } +} diff --git a/src/test/java/stirling/software/SPDF/utils/PdfTestUtils.java b/src/test/java/stirling/software/SPDF/utils/PdfTestUtils.java new file mode 100644 index 000000000..523ff64fa --- /dev/null +++ b/src/test/java/stirling/software/SPDF/utils/PdfTestUtils.java @@ -0,0 +1,73 @@ +package stirling.software.SPDF.utils; + +import java.io.IOException; + +import org.apache.pdfbox.Loader; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.text.PDFTextStripper; + +/** + * Utility class for PDF testing. Provides methods to extract text from PDF files and compare their + * content. + */ +public class PdfTestUtils { + + /** + * Extracts text content from a PDF byte array. + * + * @param pdfBytes The PDF content as a byte array + * @return The extracted text + * @throws IOException If there's an error processing the PDF + */ + public static String extractTextFromPdf(byte[] pdfBytes) throws IOException { + try (PDDocument document = Loader.loadPDF(pdfBytes)) { + PDFTextStripper stripper = new PDFTextStripper(); + return stripper.getText(document); + } + } + + /** + * Checks if the PDF contains specific text. + * + * @param pdfBytes The PDF content as a byte array + * @param expectedText The text to look for + * @return true if the text is found, false otherwise + * @throws IOException If there's an error processing the PDF + */ + public static boolean pdfContainsText(byte[] pdfBytes, String expectedText) throws IOException { + String extractedText = extractTextFromPdf(pdfBytes); + return extractedText.contains(expectedText); + } + + /** + * Gets the number of pages in a PDF document. + * + * @param pdfBytes The PDF content as a byte array + * @return The number of pages + * @throws IOException If there's an error processing the PDF + */ + public static int getPageCount(byte[] pdfBytes) throws IOException { + try (PDDocument document = Loader.loadPDF(pdfBytes)) { + return document.getNumberOfPages(); + } + } + + /** + * Checks if a byte array has a valid PDF header. + * + * @param content The byte array to check + * @return true if it has a valid PDF header, false otherwise + */ + public static boolean hasValidPdfHeader(byte[] content) { + if (content == null || content.length < 5) { + return false; + } + + // Check for PDF magic number (%PDF-) + return content[0] == '%' + && content[1] == 'P' + && content[2] == 'D' + && content[3] == 'F' + && content[4] == '-'; + } +} diff --git a/src/test/resources/invalid.txt b/src/test/resources/invalid.txt index e69de29bb..f91186dd8 100644 --- a/src/test/resources/invalid.txt +++ b/src/test/resources/invalid.txt @@ -0,0 +1,3 @@ +This is not a valid markdown file. +It doesn't have any proper markdown syntax. +Just plain text without any formatting. \ No newline at end of file diff --git a/src/test/resources/markdown/Hello World.md b/src/test/resources/markdown/Hello World.md new file mode 100644 index 000000000..4ddb46d1b --- /dev/null +++ b/src/test/resources/markdown/Hello World.md @@ -0,0 +1,3 @@ +# Hello World + +This is **bold**, this is *italic*, this is a [link](https://www.google.com/). \ No newline at end of file diff --git a/src/test/resources/markdown/test_markdown.md b/src/test/resources/markdown/test_markdown.md new file mode 100644 index 000000000..b5fedcd83 --- /dev/null +++ b/src/test/resources/markdown/test_markdown.md @@ -0,0 +1,41 @@ +# Test Document + +This is a test document for integration testing. + +## Features + +- Feature 1 +- Feature 2 + +### Code Example + +```java +public class Test { + public static void main(String[] args) { + System.out.println("Hello, World!"); + } +} +``` + +## Tables + +| Header 1 | Header 2 | +|----------|----------| +| Cell 1 | Cell 2 | +| Cell 3 | Cell 4 | + +## Lists + +1. First item +2. Second item +3. Third item + +## Formatting + +**Bold text** and *italic text* and `inline code`. + +## Links and Images + +[Example Link](https://example.com) + + \ No newline at end of file diff --git a/src/test/resources/sample/sample.md b/src/test/resources/sample/sample.md new file mode 100644 index 000000000..08517de10 --- /dev/null +++ b/src/test/resources/sample/sample.md @@ -0,0 +1,38 @@ +# Sample Markdown Document + +This is a sample markdown document used for testing the Markdown to PDF conversion functionality. + +## Features + +Markdown supports various formatting options: + +- **Bold** text for emphasis +- *Italic* text for subtle emphasis +- `Code blocks` for technical content + +### Code Example + +```java +public class HelloWorld { + public static void main(String[] args) { + System.out.println("Hello, World!"); + } +} +``` + +### Table Example + +| Name | Age | Role | +|------|-----|------| +| John | 28 | Developer | +| Jane | 32 | Designer | +| Bob | 45 | Manager | + +## Conclusion + +This sample document demonstrates that the converter can handle: +1. Different header levels +2. Text formatting +3. Code blocks +4. Tables +5. Lists (ordered and unordered) \ No newline at end of file diff --git a/src/test/resources/sample/sample.pdf b/src/test/resources/sample/sample.pdf new file mode 100644 index 000000000..75f714db6 Binary files /dev/null and b/src/test/resources/sample/sample.pdf differ diff --git a/src/test/resources/testData/basic.md b/src/test/resources/testData/basic.md new file mode 100644 index 000000000..d9c3a4a7d --- /dev/null +++ b/src/test/resources/testData/basic.md @@ -0,0 +1,53 @@ +# 基本 Markdown 测试文件 + +这是一个基本的Markdown文件,包含一些常见的格式元素。 + +## 一级标题 + +### 二级标题 + +#### 三级标题 + +## 文本格式 + +这是**粗体文本**,这是*斜体文本*,这是***粗斜体文本***。 + +这是`行内代码`示例。 + +## 列表 + +### 无序列表 + +* 项目1 +* 项目2 + * 子项目2.1 + * 子项目2.2 +* 项目3 + +### 有序列表 + +1. 第一步 +2. 第二步 +3. 第三步 + 1. 子步骤3.1 + 2. 子步骤3.2 + +## 引用 + +> 这是一个引用。 +> +> 这是引用的第二段。 +> +> > 这是嵌套引用。 + +## 链接 + +[Markdown 语法](https://www.markdownguide.org/basic-syntax/) + +## 水平线 + +--- + +## 总结 + +这个文件展示了基本的 Markdown 语法元素。 \ No newline at end of file diff --git a/src/test/resources/testData/complex.md b/src/test/resources/testData/complex.md new file mode 100644 index 000000000..c95e8b83f --- /dev/null +++ b/src/test/resources/testData/complex.md @@ -0,0 +1,140 @@ +# 复杂 Markdown 测试文件 + +这个文件展示了更多复杂的 Markdown 格式元素。 + +## 表格 + +| 名称 | 年龄 | 职业 | 城市 | +|------|-----|------|------| +| 张三 | 28 | 软件工程师 | 北京 | +| 李四 | 35 | 产品经理 | 上海 | +| 王五 | 42 | 数据科学家 | 广州 | +| 赵六 | 31 | UI设计师 | 深圳 | + +### 对齐的表格 + +| 左对齐 | 居中对齐 | 右对齐 | +|:-------|:-------:|-------:| +| 单元格 | 单元格 | 单元格 | +| 长文本 | 居中文本 | 右对齐文本 | + +## 代码块 + +内联代码: `var x = 10;` + +```python +# Python 代码示例 +def factorial(n): + if n == 0 or n == 1: + return 1 + else: + return n * factorial(n-1) + +result = factorial(5) +print(f"5的阶乘是: {result}") +``` + +```java +// Java 代码示例 +public class HelloWorld { + public static void main(String[] args) { + System.out.println("Hello, World!"); + + for (int i = 0; i < 5; i++) { + System.out.println("Count: " + i); + } + } +} +``` + +```sql +-- SQL 查询示例 +SELECT + users.name, + orders.order_date, + SUM(order_items.price) as total_price +FROM + users +JOIN + orders ON users.id = orders.user_id +JOIN + order_items ON orders.id = order_items.order_id +WHERE + orders.order_date > '2023-01-01' +GROUP BY + users.name, orders.order_date +HAVING + total_price > 100 +ORDER BY + total_price DESC; +``` + +## 任务列表 + +- [x] 完成的任务 +- [ ] 未完成的任务 +- [x] 另一个完成的任务 +- [ ] 带有 **格式化** 文本的任务 + +## 脚注 + +这是一个带有脚注的文本[^1]。 + +[^1]: 这是脚注的内容。 + +## 数学公式 + +内联公式: $E = mc^2$ + +公式块: + +$$ +\frac{d}{dx}\left( \int_{a}^{x} f(t)dt \right) = f(x) +$$ + +$$ +\sum_{i=1}^{n} i = \frac{n(n+1)}{2} +$$ + +## 嵌套列表 + +1. 第一层 + - 第二层 + - 第三层 + - 第四层 + - 回到第二层 +2. 回到第一层 + +## 定义列表 + +术语 1 +: 定义 1 + +术语 2 +: 定义 2a +: 定义 2b + +## HTML 嵌入 + +
列1 | +列2 | +
---|---|
A | +B | +