mirror of
				https://github.com/Frooodle/Stirling-PDF.git
				synced 2025-10-25 11:17:28 +02:00 
			
		
		
		
	html to pdf
This commit is contained in:
		
							parent
							
								
									2fd8c643af
								
							
						
					
					
						commit
						116d103119
					
				| @ -1,5 +1,5 @@ | ||||
| # Build jbig2enc in a separate stage | ||||
| FROM frooodle/stirling-pdf-base:latest | ||||
| FROM frooodle/stirling-pdf-base:beta4 | ||||
| 
 | ||||
| # Create scripts folder and copy local scripts | ||||
| RUN mkdir /scripts | ||||
|  | ||||
| @ -83,7 +83,9 @@ public class EndpointConfiguration { | ||||
|         addEndpointToGroup("Convert", "pdf-to-text"); | ||||
|         addEndpointToGroup("Convert", "pdf-to-html"); | ||||
|         addEndpointToGroup("Convert", "pdf-to-xml"); | ||||
| 
 | ||||
|         addEndpointToGroup("Convert", "html-to-pdf"); | ||||
|         addEndpointToGroup("Convert", "url-to-pdf"); | ||||
|          | ||||
|         // Adding endpoints to "Security" group | ||||
|         addEndpointToGroup("Security", "add-password"); | ||||
|         addEndpointToGroup("Security", "remove-password"); | ||||
| @ -125,12 +127,15 @@ public class EndpointConfiguration { | ||||
|         addEndpointToGroup("CLI", "pdf-to-html"); | ||||
|         addEndpointToGroup("CLI", "pdf-to-xml"); | ||||
|         addEndpointToGroup("CLI", "ocr-pdf"); | ||||
|         addEndpointToGroup("CLI", "html-to-pdf"); | ||||
|         addEndpointToGroup("CLI", "url-to-pdf"); | ||||
|          | ||||
|          | ||||
|         //python | ||||
|         addEndpointToGroup("Python", "extract-image-scans"); | ||||
|         addEndpointToGroup("Python", "remove-blanks"); | ||||
|          | ||||
|    | ||||
|         addEndpointToGroup("Python", "html-to-pdf"); | ||||
|         addEndpointToGroup("Python", "url-to-pdf"); | ||||
|          | ||||
|         //openCV | ||||
|         addEndpointToGroup("OpenCV", "extract-image-scans"); | ||||
|  | ||||
| @ -4,9 +4,13 @@ import java.io.ByteArrayInputStream; | ||||
| import java.io.IOException; | ||||
| import java.nio.file.Files; | ||||
| import java.nio.file.Path; | ||||
| import java.util.ArrayList; | ||||
| import java.util.List; | ||||
| import java.util.stream.Collectors; | ||||
| import java.util.stream.Stream; | ||||
| import java.util.zip.ZipEntry; | ||||
| import java.util.zip.ZipInputStream; | ||||
| import java.util.*; | ||||
| 
 | ||||
| import org.springframework.http.ResponseEntity; | ||||
| import org.springframework.web.bind.annotation.PostMapping; | ||||
| import org.springframework.web.bind.annotation.RequestPart; | ||||
| @ -40,61 +44,83 @@ public class ConvertHtmlToPDF { | ||||
| 	        if (originalFilename == null || (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) { | ||||
| 	            throw new IllegalArgumentException("File must be either .html or .zip format."); | ||||
| 	        } | ||||
| 
 | ||||
| 	        Path tempOutputFile = Files.createTempFile("output_", ".pdf"); | ||||
| 	        Path tempInputFile; | ||||
| 
 | ||||
| 	        if (originalFilename.endsWith(".html")) { | ||||
| 	            tempInputFile = Files.createTempFile("input_", ".html"); | ||||
| 	            Files.write(tempInputFile, fileInput.getBytes()); | ||||
| 	        } else { | ||||
| 	            tempInputFile = unzipAndGetMainHtml(fileInput); | ||||
| 	        Path tempInputFile = null; | ||||
| 	        byte[] pdfBytes; | ||||
| 	        try { | ||||
| 		        if (originalFilename.endsWith(".html")) { | ||||
| 		            tempInputFile = Files.createTempFile("input_", ".html"); | ||||
| 		            Files.write(tempInputFile, fileInput.getBytes()); | ||||
| 		        } else { | ||||
| 		            tempInputFile = unzipAndGetMainHtml(fileInput); | ||||
| 		        } | ||||
| 	 | ||||
| 		        List<String> command = new ArrayList<>(); | ||||
| 		        command.add("weasyprint"); | ||||
| 		        command.add(tempInputFile.toString());  | ||||
| 		        command.add(tempOutputFile.toString()); | ||||
| 		        int returnCode = 0; | ||||
| 		        if (originalFilename.endsWith(".zip")) {	        	 | ||||
| 		        	returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) | ||||
| 	                .runCommandWithOutputHandling(command, tempInputFile.getParent().toFile()); | ||||
| 		        } else { | ||||
| 	 | ||||
| 		        	returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) | ||||
| 		                                        .runCommandWithOutputHandling(command); | ||||
| 		        } | ||||
| 	 | ||||
| 		        pdfBytes = Files.readAllBytes(tempOutputFile); | ||||
| 	        } finally { | ||||
| 		        // Clean up temporary files | ||||
| 		        Files.delete(tempOutputFile); | ||||
| 		        Files.delete(tempInputFile); | ||||
| 		         | ||||
| 		        if (originalFilename.endsWith(".zip")) { | ||||
| 		        	GeneralUtils.deleteDirectory(tempInputFile.getParent()); | ||||
| 		        } | ||||
| 	        } | ||||
| 
 | ||||
| 	        List<String> command = new ArrayList<>(); | ||||
| 	        command.add("weasyprint"); | ||||
| 	        command.add(tempInputFile.toString());  | ||||
| 	        command.add(tempOutputFile.toString()); | ||||
| 	        int returnCode = 0; | ||||
| 	        if (originalFilename.endsWith(".zip")) {	        	 | ||||
| 	        	returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) | ||||
|                 .runCommandWithOutputHandling(command, tempInputFile.getParent().toFile()); | ||||
| 	        } else { | ||||
| 
 | ||||
| 	        	returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) | ||||
| 	                                        .runCommandWithOutputHandling(command); | ||||
| 	        } | ||||
| 
 | ||||
| 	        byte[] pdfBytes = Files.readAllBytes(tempOutputFile); | ||||
| 
 | ||||
| 	        // Clean up temporary files | ||||
| 	        Files.delete(tempOutputFile); | ||||
| 	        Files.delete(tempInputFile); | ||||
| 	        if (originalFilename.endsWith(".zip")) { | ||||
| 	        	GeneralUtils.deleteDirectory(tempInputFile.getParent()); | ||||
| 	        } | ||||
| 
 | ||||
| 	        String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf";  // Remove file extension and append .pdf | ||||
| 	        return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename); | ||||
| 	    } | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
|     private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException { | ||||
|         Path tempDirectory = Files.createTempDirectory("unzipped_"); | ||||
|         try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) { | ||||
|             ZipEntry entry = zipIn.getNextEntry(); | ||||
|             while (entry != null) { | ||||
|                 Path filePath = tempDirectory.resolve(entry.getName()); | ||||
|                 if (!entry.isDirectory()) { | ||||
|                     Files.copy(zipIn, filePath); | ||||
|                 } | ||||
|                 zipIn.closeEntry(); | ||||
|                 entry = zipIn.getNextEntry(); | ||||
|             } | ||||
|         } | ||||
|         return tempDirectory.resolve("index.html"); | ||||
|     } | ||||
| 	    private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException { | ||||
| 	        Path tempDirectory = Files.createTempDirectory("unzipped_"); | ||||
| 	        try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) { | ||||
| 	            ZipEntry entry = zipIn.getNextEntry(); | ||||
| 	            while (entry != null) { | ||||
| 	                Path filePath = tempDirectory.resolve(entry.getName()); | ||||
| 	                if (entry.isDirectory()) { | ||||
| 	                    Files.createDirectories(filePath);  // Explicitly create the directory structure | ||||
| 	                } else { | ||||
| 	                    Files.createDirectories(filePath.getParent()); // Create parent directories if they don't exist | ||||
| 	                    Files.copy(zipIn, filePath); | ||||
| 	                } | ||||
| 	                zipIn.closeEntry(); | ||||
| 	                entry = zipIn.getNextEntry(); | ||||
| 	            } | ||||
| 	        } | ||||
| 
 | ||||
| 	        //search for the main HTML file. | ||||
| 	        try (Stream<Path> walk = Files.walk(tempDirectory)) { | ||||
| 	            List<Path> htmlFiles = walk.filter(file -> file.toString().endsWith(".html")) | ||||
| 	                                       .collect(Collectors.toList()); | ||||
| 
 | ||||
| 	            if (htmlFiles.isEmpty()) { | ||||
| 	                throw new IOException("No HTML files found in the unzipped directory."); | ||||
| 	            } | ||||
| 
 | ||||
| 	            // Prioritize 'index.html' if it exists, otherwise use the first .html file | ||||
| 	            for (Path htmlFile : htmlFiles) { | ||||
| 	                if (htmlFile.getFileName().toString().equals("index.html")) { | ||||
| 	                    return htmlFile; | ||||
| 	                } | ||||
| 	            } | ||||
| 
 | ||||
| 	            return htmlFiles.get(0); | ||||
| 	        } | ||||
| 	    } | ||||
| 
 | ||||
|      | ||||
|     | ||||
|  | ||||
| @ -34,27 +34,30 @@ public class ConvertWebsiteToPDF { | ||||
| 	        String URL) throws IOException, InterruptedException { | ||||
| 
 | ||||
| 	    // Validate the URL format | ||||
| 	    if(!URL.matches("^https?://.*") && GeneralUtils.isValidURL(URL)) { | ||||
| 	    if(!URL.matches("^https?://.*") || !GeneralUtils.isValidURL(URL)) { | ||||
| 	        throw new IllegalArgumentException("Invalid URL format provided."); | ||||
| 	    } | ||||
| 
 | ||||
| 	    // Prepare the output file path | ||||
| 	    Path tempOutputFile = Files.createTempFile("output_", ".pdf"); | ||||
| 
 | ||||
| 	    // Prepare the OCRmyPDF command | ||||
| 	    List<String> command = new ArrayList<>(); | ||||
| 	    command.add("weasyprint"); | ||||
| 	    command.add(URL); | ||||
| 	    command.add(tempOutputFile.toString()); | ||||
| 
 | ||||
| 	    int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command); | ||||
| 
 | ||||
| 	    // Read the optimized PDF file | ||||
| 	    byte[] pdfBytes = Files.readAllBytes(tempOutputFile); | ||||
| 
 | ||||
| 	    // Clean up the temporary files | ||||
| 	    Files.delete(tempOutputFile); | ||||
| 
 | ||||
| 	    Path tempOutputFile = null; | ||||
| 	    byte[] pdfBytes; | ||||
| 	    try { | ||||
| 		    // Prepare the output file path | ||||
| 		    tempOutputFile = Files.createTempFile("output_", ".pdf"); | ||||
| 	 | ||||
| 		    // Prepare the OCRmyPDF command | ||||
| 		    List<String> command = new ArrayList<>(); | ||||
| 		    command.add("weasyprint"); | ||||
| 		    command.add(URL); | ||||
| 		    command.add(tempOutputFile.toString()); | ||||
| 	 | ||||
| 		    int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command); | ||||
| 	 | ||||
| 		    // Read the optimized PDF file | ||||
| 		    pdfBytes = Files.readAllBytes(tempOutputFile); | ||||
| 	    } | ||||
| 	    finally { | ||||
| 		    // Clean up the temporary files | ||||
| 		    Files.delete(tempOutputFile); | ||||
| 	    } | ||||
| 	    // Convert URL to a safe filename | ||||
| 	    String outputFilename = convertURLToFileName(URL); | ||||
| 	     | ||||
|  | ||||
| @ -183,6 +183,12 @@ home.autoSplitPDF.desc=Auto Split Scanned PDF with physical scanned page splitte | ||||
| home.sanitizePdf.title=Sanitize | ||||
| home.sanitizePdf.desc=Remove scripts and other elements from PDF files | ||||
| 
 | ||||
| home.URLToPDF.title=URL/Website To PDF | ||||
| home.URLToPDF.desc=Converts any http(s)URL to PDF | ||||
| 
 | ||||
| home.HTMLToPDF.title=HTML to PDF | ||||
| home.HTMLToPDF.desc=Converts any HTML file or zip to PDF | ||||
| 
 | ||||
| 
 | ||||
| ########################### | ||||
| #                         # | ||||
|  | ||||
							
								
								
									
										3
									
								
								src/main/resources/static/images/html.svg
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								src/main/resources/static/images/html.svg
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,3 @@ | ||||
| <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-filetype-html" viewBox="0 0 16 16"> | ||||
|   <path fill-rule="evenodd" d="M14 4.5V11h-1V4.5h-2A1.5 1.5 0 0 1 9.5 3V1H4a1 1 0 0 0-1 1v9H2V2a2 2 0 0 1 2-2h5.5L14 4.5Zm-9.736 7.35v3.999h-.791v-1.714H1.79v1.714H1V11.85h.791v1.626h1.682V11.85h.79Zm2.251.662v3.337h-.794v-3.337H4.588v-.662h3.064v.662H6.515Zm2.176 3.337v-2.66h.038l.952 2.159h.516l.946-2.16h.038v2.661h.715V11.85h-.8l-1.14 2.596H9.93L8.79 11.85h-.805v3.999h.706Zm4.71-.674h1.696v.674H12.61V11.85h.79v3.325Z"/> | ||||
| </svg> | ||||
| After Width: | Height: | Size: 565 B | 
							
								
								
									
										4
									
								
								src/main/resources/static/images/url.svg
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								src/main/resources/static/images/url.svg
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,4 @@ | ||||
| <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-link" viewBox="0 0 16 16"> | ||||
|   <path d="M6.354 5.5H4a3 3 0 0 0 0 6h3a3 3 0 0 0 2.83-4H9c-.086 0-.17.01-.25.031A2 2 0 0 1 7 10.5H4a2 2 0 1 1 0-4h1.535c.218-.376.495-.714.82-1z"/> | ||||
|   <path d="M9 5.5a3 3 0 0 0-2.83 4h1.098A2 2 0 0 1 9 6.5h3a2 2 0 1 1 0 4h-1.535a4.02 4.02 0 0 1-.82 1H12a3 3 0 1 0 0-6H9z"/> | ||||
| </svg> | ||||
| After Width: | Height: | Size: 403 B | 
| @ -71,6 +71,9 @@ | ||||
|                             <!-- Existing menu items --> | ||||
|                             <div th:replace="~{fragments/navbarEntry :: navbarEntry ('img-to-pdf', 'images/image.svg', 'home.imageToPdf.title', 'home.imageToPdf.desc')}"></div> | ||||
|                             <div th:replace="~{fragments/navbarEntry :: navbarEntry ('file-to-pdf', 'images/file.svg', 'home.fileToPDF.title', 'home.fileToPDF.desc')}"></div> | ||||
|                             <div th:replace="~{fragments/navbarEntry :: navbarEntry ('html-to-pdf', 'images/html.svg', 'home.HTMLToPDF.title', 'home.HTMLToPDF.desc')}"></div> | ||||
|                             <div th:replace="~{fragments/navbarEntry :: navbarEntry ('url-to-pdf', 'images/url.svg', 'home.URLToPDF.title', 'home.URLToPDF.desc')}"></div> | ||||
| 				 | ||||
|                             <hr class="dropdown-divider"> | ||||
|                             <div th:replace="~{fragments/navbarEntry :: navbarEntry ('pdf-to-img', 'images/image.svg', 'home.pdfToImage.title', 'home.pdfToImage.desc')}"></div> | ||||
|                             <div th:replace="~{fragments/navbarEntry :: navbarEntry ('pdf-to-word', 'images/file-earmark-word.svg', 'home.PDFToWord.title', 'home.PDFToWord.desc')}"></div> | ||||
|  | ||||
| @ -34,6 +34,10 @@ | ||||
|                 <div th:replace="~{fragments/card :: card(id='split-pdfs', cardTitle=#{home.split.title}, cardText=#{home.split.desc}, cardLink='split-pdfs', svgPath='images/layout-split.svg')}"></div> | ||||
|                  | ||||
|                 <div th:replace="~{fragments/card :: card(id='rotate-pdf', cardTitle=#{home.rotate.title}, cardText=#{home.rotate.desc}, cardLink='rotate-pdf', svgPath='images/arrow-clockwise.svg')}"></div> | ||||
|                 <div th:replace="~{fragments/card :: card(id='crop', cardTitle=#{home.crop.title}, cardText=#{home.crop.desc}, cardLink='crop', svgPath='images/crop.svg')}"></div> | ||||
|                 <div th:replace="~{fragments/card :: card(id='add-page-numbers', cardTitle=#{home.add-page-numbers.title}, cardText=#{home.add-page-numbers.desc}, cardLink='add-page-numbers', svgPath='images/add-page-numbers.svg')}"></div> | ||||
|                  | ||||
|                 <div th:replace="~{fragments/card :: card(id='adjust-contrast', cardTitle=#{home.adjust-contrast.title}, cardText=#{home.adjust-contrast.desc}, cardLink='adjust-contrast', svgPath='images/adjust-contrast.svg')}"></div> | ||||
|                 <div th:replace="~{fragments/card :: card(id='img-to-pdf', cardTitle=#{home.imageToPdf.title}, cardText=#{home.imageToPdf.desc}, cardLink='img-to-pdf', svgPath='images/image.svg')}"></div> | ||||
|                 <div th:replace="~{fragments/card :: card(id='pdf-to-img', cardTitle=#{home.pdfToImage.title}, cardText=#{home.pdfToImage.desc}, cardLink='pdf-to-img', svgPath='images/image.svg')}"></div> | ||||
|                  | ||||
| @ -73,12 +77,13 @@ | ||||
|                 <div th:replace="~{fragments/card :: card(id='multi-page-layout', cardTitle=#{home.pageLayout.title}, cardText=#{home.pageLayout.desc}, cardLink='multi-page-layout', svgPath='images/page-layout.svg')}"></div> | ||||
|                 <div th:replace="~{fragments/card :: card(id='scale-pages', cardTitle=#{home.scalePages.title}, cardText=#{home.scalePages.desc}, cardLink='scale-pages', svgPath='images/scale-pages.svg')}"></div> | ||||
|                  | ||||
|                 <div th:replace="~{fragments/card :: card(id='add-page-numbers', cardTitle=#{home.add-page-numbers.title}, cardText=#{home.add-page-numbers.desc}, cardLink='add-page-numbers', svgPath='images/add-page-numbers.svg')}"></div> | ||||
|                  | ||||
|                 <div th:replace="~{fragments/card :: card(id='auto-rename', cardTitle=#{home.auto-rename.title}, cardText=#{home.auto-rename.desc}, cardLink='auto-rename', svgPath='images/fonts.svg')}"></div> | ||||
|                 <div th:replace="~{fragments/card :: card(id='adjust-contrast', cardTitle=#{home.adjust-contrast.title}, cardText=#{home.adjust-contrast.desc}, cardLink='adjust-contrast', svgPath='images/adjust-contrast.svg')}"></div> | ||||
|  				<div th:replace="~{fragments/card :: card(id='crop', cardTitle=#{home.crop.title}, cardText=#{home.crop.desc}, cardLink='crop', svgPath='images/crop.svg')}"></div> | ||||
|                 <div th:replace="~{fragments/card :: card(id='auto-split-pdf', cardTitle=#{home.autoSplitPDF.title}, cardText=#{home.autoSplitPDF.desc}, cardLink='auto-split-pdf', svgPath='images/layout-split.svg')}"></div> | ||||
| 				<div th:replace="~{fragments/card :: card(id='sanitize-pdf', cardTitle=#{home.sanitizePdf.title}, cardText=#{home.sanitizePdf.desc}, cardLink='sanitize-pdf', svgPath='images/sanitize.svg')}"></div> | ||||
| 				 | ||||
| 				<div th:replace="~{fragments/card :: card(id='url-to-pdf', cardTitle=#{home.URLToPDF.title}, cardText=#{home.URLToPDF.desc}, cardLink='url-to-pdf', svgPath='images/url.svg')}"></div> | ||||
| 				<div th:replace="~{fragments/card :: card(id='html-to-pdf', cardTitle=#{home.HTMLToPDF.title}, cardText=#{home.HTMLToPDF.desc}, cardLink='html-to-pdf', svgPath='images/html.svg')}"></div> | ||||
|                  | ||||
|             </div> | ||||
|         </div> </div> | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user