完成了一个整体测试

2025-09-12 17:52:13 +02:00 · 2025-04-19 14:27:24 -04:00 · 2025-04-19 14:27:24 -04:00 · 6a40a8be39
commit 6a40a8be39
parent 782ae778a7
22 changed files with 493 additions and 2 deletions
--- a/0
+++ b/0
--- a/0
+++ b/0
--- a/build.gradle
+++ b/build.gradle
@ -586,3 +586,19 @@ task printMacVersion {
 tasks.named('generateOpenApiDocs') {
    doNotTrackState("Tracking state is not supported for this task")
 }
 tasks.register('convertersTest', Test) {
    group       = 'verification'
    description = 'Run only converter integration tests under controller/api/converters'
    // 指定来自 test 源集的字节码和 classpath
    testClassesDirs = sourceSets.test.get().output.classesDirs
    classpath       = sourceSets.test.get().runtimeClasspath
    // 只包含 controller/api/converters 目录下的测试类
    include '**/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdfIntegrationTest.java'
 }
 // 确保默认 test 任务还是用 JUnit Platform
 tasks.named('test') {
    useJUnitPlatform()
 }
--- a/src/main/java/stirling/software/SPDF/config/security/SecurityConfiguration.java
+++ b/src/main/java/stirling/software/SPDF/config/security/SecurityConfiguration.java
@ -202,7 +202,9 @@ public class SecurityConfiguration {
                                                        || trimmedUri.startsWith("/fonts/")
                                                        || trimmedUri.startsWith("/js/")
                                                        || trimmedUri.startsWith(
-                                                                "/api/v1/info/status");
+                                                                "/api/v1/info/status")
                                                        || trimmedUri.startsWith(
                                                                "/api/v1/convert/");
                                            })
                                    .permitAll()
                                    .anyRequest()
--- a/src/main/resources/application.properties
+++ b/src/main/resources/application.properties
@ -40,4 +40,4 @@ springdoc.api-docs.path=/v1/api-docs
 # Set the URL of the OpenAPI JSON for the Swagger UI
 springdoc.swagger-ui.url=/v1/api-docs
 posthog.api.key=phc_fiR65u5j6qmXTYL56MNrLZSWqLaDW74OrZH0Insd2xq
-posthog.host=https://eu.i.posthog.com
+posthog.host=https://eu.i.posthog.com
--- a/src/test/java/stirling/software/SPDF/utils/ConvertPDFToMarkdownIntegrationTest.java
+++ b/src/test/java/stirling/software/SPDF/utils/ConvertPDFToMarkdownIntegrationTest.java
@ -0,0 +1,46 @@
 package stirling.software.SPDF.utils;
 import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.multipart;
 import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*;
 import org.junit.jupiter.api.Test;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc;
 import org.springframework.boot.test.context.SpringBootTest;
 import org.springframework.core.io.ClassPathResource;
 import org.springframework.http.MediaType;
 import org.springframework.mock.web.MockMultipartFile;
 import org.springframework.test.web.servlet.MockMvc;
@SpringBootTest(
        webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
        properties = {
            "spring.security.enabled=false",
            "security.enableLogin=false",
            "security.csrfDisabled=true",
            "system.enableUrlToPDF=false",
            "system.enableAlphaFunctionality=false",
            "system.disableSanitize=false"
        })
@AutoConfigureMockMvc(addFilters = false) // 跳过安全过滤器
 public class ConvertPDFToMarkdownIntegrationTest {
    @Autowired private MockMvc mockMvc;
    @Test
    public void convertValidPdfToMarkdown_shouldReturnMarkdownBytes() throws Exception {
        // Load sample PDF file from resources
        ClassPathResource pdfResource = new ClassPathResource("sample/sample.pdf");
        MockMultipartFile mockFile =
            new MockMultipartFile(
                "fileInput", "sample.pdf", "application/pdf", pdfResource.getInputStream());
        mockMvc.perform(
                multipart("/api/v1/convert/pdf/markdown")
                    .file(mockFile)
                    .contentType(MediaType.MULTIPART_FORM_DATA))
            .andExpect(status().isOk())
            .andExpect(
                header().string("Content-Type", MediaType.APPLICATION_OCTET_STREAM_VALUE));
    }
 }
--- a/src/test/java/stirling/software/SPDF/utils/PdfTestUtils.java
+++ b/src/test/java/stirling/software/SPDF/utils/PdfTestUtils.java
@ -0,0 +1,73 @@
 package stirling.software.SPDF.utils;
 import java.io.IOException;
 import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.text.PDFTextStripper;
 /**
 * Utility class for PDF testing. Provides methods to extract text from PDF files and compare their
 * content.
 */
 public class PdfTestUtils {
    /**
     * Extracts text content from a PDF byte array.
     *
     * @param pdfBytes The PDF content as a byte array
     * @return The extracted text
     * @throws IOException If there's an error processing the PDF
     */
    public static String extractTextFromPdf(byte[] pdfBytes) throws IOException {
        try (PDDocument document = Loader.loadPDF(pdfBytes)) {
            PDFTextStripper stripper = new PDFTextStripper();
            return stripper.getText(document);
        }
    }
    /**
     * Checks if the PDF contains specific text.
     *
     * @param pdfBytes The PDF content as a byte array
     * @param expectedText The text to look for
     * @return true if the text is found, false otherwise
     * @throws IOException If there's an error processing the PDF
     */
    public static boolean pdfContainsText(byte[] pdfBytes, String expectedText) throws IOException {
        String extractedText = extractTextFromPdf(pdfBytes);
        return extractedText.contains(expectedText);
    }
    /**
     * Gets the number of pages in a PDF document.
     *
     * @param pdfBytes The PDF content as a byte array
     * @return The number of pages
     * @throws IOException If there's an error processing the PDF
     */
    public static int getPageCount(byte[] pdfBytes) throws IOException {
        try (PDDocument document = Loader.loadPDF(pdfBytes)) {
            return document.getNumberOfPages();
        }
    }
    /**
     * Checks if a byte array has a valid PDF header.
     *
     * @param content The byte array to check
     * @return true if it has a valid PDF header, false otherwise
     */
    public static boolean hasValidPdfHeader(byte[] content) {
        if (content == null || content.length < 5) {
            return false;
        }
        // Check for PDF magic number (%PDF-)
        return content[0] == '%'
                && content[1] == 'P'
                && content[2] == 'D'
                && content[3] == 'F'
                && content[4] == '-';
    }
 }
--- a/src/test/resources/invalid.txt
+++ b/src/test/resources/invalid.txt
@ -0,0 +1,3 @@
 This is not a valid markdown file.
 It doesn't have any proper markdown syntax.
 Just plain text without any formatting.
--- a/src/test/resources/markdown/Hello
+++ b/src/test/resources/markdown/Hello
@ -0,0 +1,3 @@
 # Hello World
 This is **bold**, this is *italic*, this is a [link](https://www.google.com/).
--- a/src/test/resources/markdown/test_markdown.md
+++ b/src/test/resources/markdown/test_markdown.md
@ -0,0 +1,41 @@
 # Test Document
 This is a test document for integration testing.
 ## Features
 - Feature 1
 - Feature 2
 ### Code Example
 ```java
 public class Test {
    public static void main(String[] args) {
        System.out.println("Hello, World!");
    }
 }
 ```
 ## Tables
 | Header 1 | Header 2 |
 |----------|----------|
 | Cell 1   | Cell 2   |
 | Cell 3   | Cell 4   |
 ## Lists
 1. First item
 2. Second item
 3. Third item
 ## Formatting
 **Bold text** and *italic text* and `inline code`.
 ## Links and Images
 [Example Link](https://example.com)
 ![Example Image](https://example.com/image.jpg)
--- a/src/test/resources/sample/sample.md
+++ b/src/test/resources/sample/sample.md
@ -0,0 +1,38 @@
 # Sample Markdown Document
 This is a sample markdown document used for testing the Markdown to PDF conversion functionality.
 ## Features
 Markdown supports various formatting options:
 - **Bold** text for emphasis
 - *Italic* text for subtle emphasis
 - `Code blocks` for technical content
 ### Code Example
 ```java
 public class HelloWorld {
    public static void main(String[] args) {
        System.out.println("Hello, World!");
    }
 }
 ```
 ### Table Example
 | Name | Age | Role |
 |------|-----|------|
 | John | 28  | Developer |
 | Jane | 32  | Designer |
 | Bob  | 45  | Manager |
 ## Conclusion
 This sample document demonstrates that the converter can handle:
 1. Different header levels
 2. Text formatting
 3. Code blocks
 4. Tables
 5. Lists (ordered and unordered)
--- a/src/test/resources/sample/sample.pdf
+++ b/src/test/resources/sample/sample.pdf
--- a/src/test/resources/testData/basic.md
+++ b/src/test/resources/testData/basic.md
@ -0,0 +1,53 @@
 # 基本 Markdown 测试文件
 这是一个基本的Markdown文件，包含一些常见的格式元素。
 ## 一级标题
 ### 二级标题
 #### 三级标题
 ## 文本格式
 这是**粗体文本**，这是*斜体文本*，这是***粗斜体文本***。
 这是`行内代码`示例。
 ## 列表
 ### 无序列表
 * 项目1
 * 项目2
  * 子项目2.1
  * 子项目2.2
 * 项目3
 ### 有序列表
 1. 第一步
 2. 第二步
 3. 第三步
   1. 子步骤3.1
   2. 子步骤3.2
 ## 引用
 > 这是一个引用。
 > 
 > 这是引用的第二段。
 >
 > > 这是嵌套引用。
 ## 链接
 [Markdown 语法](https://www.markdownguide.org/basic-syntax/)
 ## 水平线
 ---
 ## 总结
 这个文件展示了基本的 Markdown 语法元素。
--- a/src/test/resources/testData/complex.md
+++ b/src/test/resources/testData/complex.md
@ -0,0 +1,140 @@
 # 复杂 Markdown 测试文件
 这个文件展示了更多复杂的 Markdown 格式元素。
 ## 表格
 | 名称 | 年龄 | 职业 | 城市 |
 |------|-----|------|------|
 | 张三 | 28  | 软件工程师 | 北京 |
 | 李四 | 35  | 产品经理 | 上海 |
 | 王五 | 42  | 数据科学家 | 广州 |
 | 赵六 | 31  | UI设计师 | 深圳 |
 ### 对齐的表格
 | 左对齐 | 居中对齐 | 右对齐 |
 |:-------|:-------:|-------:|
 | 单元格 | 单元格 | 单元格 |
 | 长文本 | 居中文本 | 右对齐文本 |
 ## 代码块
 内联代码: `var x = 10;`
 ```python
 # Python 代码示例
 def factorial(n):
    if n == 0 or n == 1:
        return 1
    else:
        return n * factorial(n-1)
 result = factorial(5)
 print(f"5的阶乘是: {result}")
 ```
 ```java
 // Java 代码示例
 public class HelloWorld {
    public static void main(String[] args) {
        System.out.println("Hello, World!");
        for (int i = 0; i < 5; i++) {
            System.out.println("Count: " + i);
        }
    }
 }
 ```
 ```sql
 -- SQL 查询示例
 SELECT 
    users.name, 
    orders.order_date,
    SUM(order_items.price) as total_price
 FROM 
    users
 JOIN 
    orders ON users.id = orders.user_id
 JOIN 
    order_items ON orders.id = order_items.order_id
 WHERE 
    orders.order_date > '2023-01-01'
 GROUP BY 
    users.name, orders.order_date
 HAVING 
    total_price > 100
 ORDER BY 
    total_price DESC;
 ```
 ## 任务列表
 - [x] 完成的任务
 - [ ] 未完成的任务
 - [x] 另一个完成的任务
 - [ ] 带有 **格式化** 文本的任务
 ## 脚注
 这是一个带有脚注的文本[^1]。
 [^1]: 这是脚注的内容。
 ## 数学公式
 内联公式: $E = mc^2$
 公式块:
 $$
 \frac{d}{dx}\left( \int_{a}^{x} f(t)dt \right) = f(x)
 $$
 $$
 \sum_{i=1}^{n} i = \frac{n(n+1)}{2}
 $$
 ## 嵌套列表
 1. 第一层
   - 第二层
     - 第三层
       - 第四层
   - 回到第二层
 2. 回到第一层
 ## 定义列表
 术语 1
 : 定义 1
 术语 2
 : 定义 2a
 : 定义 2b
 ## HTML 嵌入
 <div style="padding: 10px; border: 1px solid gray; background-color: #f0f0f0;">
  这是通过 HTML 创建的自定义容器
  <ul>
    <li>可以包含任何 HTML 元素</li>
    <li>比如这个列表</li>
  </ul>
 </div>
 <table>
  <tr>
    <th>列1</th>
    <th>列2</th>
  </tr>
  <tr>
    <td>A</td>
    <td>B</td>
  </tr>
 </table>
 ## 总结
 这个文件展示了高级的 Markdown 语法和格式元素，可以测试 Markdown 到 PDF 转换功能的完整性。
--- a/src/test/resources/testData/empty.md
+++ b/src/test/resources/testData/empty.md
--- a/src/test/resources/testData/invalid.md
+++ b/src/test/resources/testData/invalid.md
@ -0,0 +1,14 @@
 这不是有效的Markdown文件格式
 这里缺少格式标识符
 **未闭合的粗体文本
 > 未闭合的引用块
 ```
 未闭合的代码块
 | 缺少标题行的表格 |
 | 单元格1 | 单元格2 |
 这个文件故意包含语法错误,用于测试处理无效Markdown的能力。
--- a/src/test/resources/testData/minimal.md
+++ b/src/test/resources/testData/minimal.md
@ -0,0 +1,3 @@
 # 标题
 内容
--- a/src/test/resources/testData/multi_page.pdf
+++ b/src/test/resources/testData/multi_page.pdf
--- a/src/test/resources/testData/pdf_with_image.pdf
+++ b/src/test/resources/testData/pdf_with_image.pdf
--- a/src/test/resources/testData/test.pdf
+++ b/src/test/resources/testData/test.pdf
--- a/src/test/resources/testData/text_for_pdf.txt
+++ b/src/test/resources/testData/text_for_pdf.txt
@ -0,0 +1,12 @@
 测试PDF文档
 这是一个简单的测试文档，将被转换为PDF格式用于测试。
 第一部分
 这是文档的第一部分内容。它包含一些文字段落。
 第二部分
 这是文档的第二部分内容。它也包含一些文字段落。
 结论
 这是文档的结论部分。
--- a/src/test/resources/testData/with_images.md
+++ b/src/test/resources/testData/with_images.md
@ -0,0 +1,47 @@
 # Markdown 文件（带图片）
 这个文件包含了多个图片引用，用于测试图片处理能力。
 ## 本地图片引用
 ![Stirling PDF Logo](../../main/resources/static/favicon.png)
 ## 外部图片引用
 ![示例图片](https://picsum.photos/800/400)
 ## 带尺寸的图片
 ![调整大小的图片](../../main/resources/static/favicon.png =250x)
 ## 多种图片混合
 以下是包含多种图片的内容：
 ![第一张图片](../../main/resources/static/favicon.png)
 这是两张图片之间的文本。
 ![第二张图片](../../main/resources/static/apple-touch-icon.png)
 ## 图片与表格混合
 | 标题 | 图片 |
 |------|------|
 | 示例1 | ![小图标](../../main/resources/static/favicon-16x16.png) |
 | 示例2 | ![小图标](../../main/resources/static/favicon-32x32.png) |
 ## 图片与列表混合
 * 项目 1
  ![图片1](../../main/resources/static/favicon-16x16.png) 
 * 项目 2
  ![图片2](../../main/resources/static/favicon-32x32.png)
 ## 图片链接
 [![链接图片](../../main/resources/static/favicon.png)](https://example.com)
 ## 总结
 这个测试文件包含了各种图片引用方式，用于测试PDF转换时的图片处理能力。
		`@ -0,0 +1,3 @@`
							`# Hello World`

							`This is bold, this is italic, this is a [link](https://www.google.com/).`