mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-02-17 13:52:14 +01:00
feat: Add RegexPatternUtils for centralized regex management, file naming funcs, UtilityClass annotation (#4218)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
This commit is contained in:
@@ -22,6 +22,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.common.model.job.JobResponse;
|
||||
import stirling.software.common.util.ExecutorFactory;
|
||||
import stirling.software.common.util.RegexPatternUtils;
|
||||
|
||||
/** Service for executing jobs asynchronously or synchronously */
|
||||
@Service
|
||||
@@ -426,8 +427,16 @@ public class JobExecutorService {
|
||||
}
|
||||
|
||||
try {
|
||||
String value = timeout.replaceAll("[^\\d.]", "");
|
||||
String unit = timeout.replaceAll("[\\d.]", "");
|
||||
String value =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getNonDigitDotPattern()
|
||||
.matcher(timeout)
|
||||
.replaceAll("");
|
||||
String unit =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getDigitDotPattern()
|
||||
.matcher(timeout)
|
||||
.replaceAll("");
|
||||
|
||||
double numericValue = Double.parseDouble(value);
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.common.model.ApplicationProperties;
|
||||
import stirling.software.common.util.RegexPatternUtils;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@@ -22,8 +23,9 @@ public class SsrfProtectionService {
|
||||
private final ApplicationProperties applicationProperties;
|
||||
|
||||
private static final Pattern DATA_URL_PATTERN =
|
||||
Pattern.compile("^data:.*", Pattern.CASE_INSENSITIVE);
|
||||
private static final Pattern FRAGMENT_PATTERN = Pattern.compile("^#.*");
|
||||
RegexPatternUtils.getInstance().getPattern("^data:.*", Pattern.CASE_INSENSITIVE);
|
||||
private static final Pattern FRAGMENT_PATTERN =
|
||||
RegexPatternUtils.getInstance().getPattern("^#.*");
|
||||
|
||||
public enum SsrfProtectionLevel {
|
||||
OFF, // No SSRF protection - allows all URLs
|
||||
|
||||
@@ -23,6 +23,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.common.model.ApplicationProperties;
|
||||
import stirling.software.common.util.GeneralUtils;
|
||||
import stirling.software.common.util.RegexPatternUtils;
|
||||
import stirling.software.common.util.TempFileManager;
|
||||
import stirling.software.common.util.TempFileRegistry;
|
||||
|
||||
@@ -61,8 +62,14 @@ public class TempFileCleanupService {
|
||||
// File patterns that identify common system temp files
|
||||
private static final Predicate<String> IS_SYSTEM_TEMP_FILE =
|
||||
fileName ->
|
||||
fileName.matches("lu\\d+[a-z0-9]*\\.tmp")
|
||||
|| fileName.matches("ocr_process\\d+")
|
||||
RegexPatternUtils.getInstance()
|
||||
.getSystemTempFile1Pattern()
|
||||
.matcher(fileName)
|
||||
.matches()
|
||||
|| RegexPatternUtils.getInstance()
|
||||
.getSystemTempFile2Pattern()
|
||||
.matcher(fileName)
|
||||
.matches()
|
||||
|| (fileName.startsWith("tmp") && !fileName.contains("jetty"))
|
||||
|| fileName.startsWith("OSL_PIPE_")
|
||||
|| (fileName.endsWith(".tmp") && !fileName.contains("jetty"));
|
||||
|
||||
@@ -29,7 +29,7 @@ public class EmlParser {
|
||||
private static volatile boolean mimeUtilityChecked = false;
|
||||
|
||||
private static final Pattern MIME_ENCODED_PATTERN =
|
||||
Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?=");
|
||||
RegexPatternUtils.getInstance().getMimeEncodedWordPattern();
|
||||
|
||||
private static final String DISPOSITION_ATTACHMENT = "attachment";
|
||||
private static final String TEXT_PLAIN = MediaType.TEXT_PLAIN_VALUE;
|
||||
@@ -357,7 +357,11 @@ public class EmlParser {
|
||||
for (String contentIdHeader : contentIdHeaders) {
|
||||
if (contentIdHeader != null && !contentIdHeader.trim().isEmpty()) {
|
||||
attachment.setEmbedded(true);
|
||||
String contentId = contentIdHeader.trim().replaceAll("[<>]", "");
|
||||
String contentId =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getAngleBracketsPattern()
|
||||
.matcher(contentIdHeader.trim())
|
||||
.replaceAll("");
|
||||
attachment.setContentId(contentId);
|
||||
break;
|
||||
}
|
||||
@@ -414,7 +418,8 @@ public class EmlParser {
|
||||
|
||||
private static String extractBasicHeader(String emlContent, String headerName) {
|
||||
try {
|
||||
String[] lines = emlContent.split("\r?\n");
|
||||
String[] lines =
|
||||
RegexPatternUtils.getInstance().getNewlineSplitPattern().split(emlContent);
|
||||
for (int i = 0; i < lines.length; i++) {
|
||||
String line = lines[i];
|
||||
if (line.toLowerCase().startsWith(headerName.toLowerCase())) {
|
||||
@@ -485,7 +490,10 @@ public class EmlParser {
|
||||
}
|
||||
|
||||
private static int findPartEnd(String content, int start) {
|
||||
String[] lines = content.substring(start).split("\r?\n");
|
||||
String[] lines =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getNewlineSplitPattern()
|
||||
.split(content.substring(start));
|
||||
StringBuilder result = new StringBuilder();
|
||||
|
||||
for (String line : lines) {
|
||||
@@ -499,7 +507,8 @@ public class EmlParser {
|
||||
private static List<EmailAttachment> extractAttachmentsBasic(String emlContent) {
|
||||
List<EmailAttachment> attachments = new ArrayList<>();
|
||||
try {
|
||||
String[] lines = emlContent.split("\r?\n");
|
||||
String[] lines =
|
||||
RegexPatternUtils.getInstance().getNewlineSplitPattern().split(emlContent);
|
||||
boolean inHeaders = true;
|
||||
String currentContentType = "";
|
||||
String currentDisposition = "";
|
||||
@@ -562,7 +571,11 @@ public class EmlParser {
|
||||
if (filenameStarEnd == -1) filenameStarEnd = disposition.length();
|
||||
String extendedFilename =
|
||||
disposition.substring(filenameStarStart, filenameStarEnd).trim();
|
||||
extendedFilename = extendedFilename.replaceAll("^\"|\"$", "");
|
||||
extendedFilename =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getQuotesRemovalPattern()
|
||||
.matcher(extendedFilename)
|
||||
.replaceAll("");
|
||||
|
||||
if (extendedFilename.contains("'")) {
|
||||
String[] parts = extendedFilename.split("'", 3);
|
||||
@@ -577,7 +590,11 @@ public class EmlParser {
|
||||
int filenameEnd = disposition.indexOf(";", filenameStart);
|
||||
if (filenameEnd == -1) filenameEnd = disposition.length();
|
||||
String filename = disposition.substring(filenameStart, filenameEnd).trim();
|
||||
filename = filename.replaceAll("^\"|\"$", "");
|
||||
filename =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getQuotesRemovalPattern()
|
||||
.matcher(filename)
|
||||
.replaceAll("");
|
||||
return safeMimeDecode(filename);
|
||||
}
|
||||
|
||||
@@ -630,11 +647,23 @@ public class EmlParser {
|
||||
private List<EmailAttachment> attachments = new ArrayList<>();
|
||||
|
||||
public void setHtmlBody(String htmlBody) {
|
||||
this.htmlBody = htmlBody != null ? htmlBody.replaceAll("\r", "") : null;
|
||||
this.htmlBody =
|
||||
htmlBody != null
|
||||
? RegexPatternUtils.getInstance()
|
||||
.getCarriageReturnPattern()
|
||||
.matcher(htmlBody)
|
||||
.replaceAll("")
|
||||
: null;
|
||||
}
|
||||
|
||||
public void setTextBody(String textBody) {
|
||||
this.textBody = textBody != null ? textBody.replaceAll("\r", "") : null;
|
||||
this.textBody =
|
||||
textBody != null
|
||||
? RegexPatternUtils.getInstance()
|
||||
.getCarriageReturnPattern()
|
||||
.matcher(textBody)
|
||||
.replaceAll("")
|
||||
: null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -199,8 +199,16 @@ public class EmlProcessingUtils {
|
||||
String processed =
|
||||
customHtmlSanitizer != null ? customHtmlSanitizer.sanitize(htmlBody) : htmlBody;
|
||||
|
||||
processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*fixed[^;]*;?", "");
|
||||
processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*absolute[^;]*;?", "");
|
||||
processed =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getFixedPositionCssPattern()
|
||||
.matcher(processed)
|
||||
.replaceAll("");
|
||||
processed =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getAbsolutePositionCssPattern()
|
||||
.matcher(processed)
|
||||
.replaceAll("");
|
||||
|
||||
if (emailContent != null && !emailContent.getAttachments().isEmpty()) {
|
||||
processed = PdfAttachmentHandler.processInlineImages(processed, emailContent);
|
||||
@@ -222,14 +230,18 @@ public class EmlProcessingUtils {
|
||||
html = html.replace("\n", "<br>\n");
|
||||
|
||||
html =
|
||||
html.replaceAll(
|
||||
"(https?://[\\w\\-._~:/?#\\[\\]@!$&'()*+,;=%]+)",
|
||||
"<a href=\"$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
|
||||
RegexPatternUtils.getInstance()
|
||||
.getUrlLinkPattern()
|
||||
.matcher(html)
|
||||
.replaceAll(
|
||||
"<a href=\"$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
|
||||
|
||||
html =
|
||||
html.replaceAll(
|
||||
"([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,63})",
|
||||
"<a href=\"mailto:$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
|
||||
RegexPatternUtils.getInstance()
|
||||
.getEmailLinkPattern()
|
||||
.matcher(html)
|
||||
.replaceAll(
|
||||
"<a href=\"mailto:$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
|
||||
|
||||
return html;
|
||||
}
|
||||
@@ -490,9 +502,13 @@ public class EmlProcessingUtils {
|
||||
Matcher concatenatedMatcher = concatenatedPattern.matcher(encodedText);
|
||||
String processedText =
|
||||
concatenatedMatcher.replaceAll(
|
||||
match -> match.group().replaceAll("\\s+(?==\\?)", ""));
|
||||
match ->
|
||||
RegexPatternUtils.getInstance()
|
||||
.getMimeHeaderWhitespacePattern()
|
||||
.matcher(match.group())
|
||||
.replaceAll(""));
|
||||
|
||||
Pattern mimePattern = Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?=");
|
||||
Pattern mimePattern = RegexPatternUtils.getInstance().getMimeEncodedWordPattern();
|
||||
Matcher matcher = mimePattern.matcher(processedText);
|
||||
int lastEnd = 0;
|
||||
|
||||
@@ -507,7 +523,11 @@ public class EmlProcessingUtils {
|
||||
String decodedValue =
|
||||
switch (encoding) {
|
||||
case "B" -> {
|
||||
String cleanBase64 = encodedValue.replaceAll("\\s", "");
|
||||
String cleanBase64 =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getWhitespacePattern()
|
||||
.matcher(encodedValue)
|
||||
.replaceAll("");
|
||||
byte[] decodedBytes = Base64.getDecoder().decode(cleanBase64);
|
||||
Charset targetCharset;
|
||||
try {
|
||||
@@ -596,8 +616,16 @@ public class EmlProcessingUtils {
|
||||
}
|
||||
|
||||
public static String simplifyHtmlContent(String htmlContent) {
|
||||
String simplified = htmlContent.replaceAll("(?i)<script[^>]*>.*?</script>", "");
|
||||
simplified = simplified.replaceAll("(?i)<style[^>]*>.*?</style>", "");
|
||||
String simplified =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getScriptTagPattern()
|
||||
.matcher(htmlContent)
|
||||
.replaceAll("");
|
||||
simplified =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getStyleTagPattern()
|
||||
.matcher(simplified)
|
||||
.replaceAll("");
|
||||
return simplified;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
package stirling.software.common.util;
|
||||
|
||||
import java.io.*;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.UncheckedIOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.FileVisitResult;
|
||||
import java.nio.file.Files;
|
||||
@@ -205,15 +208,27 @@ public class FileToPdf {
|
||||
return "";
|
||||
}
|
||||
// Remove any drive letters (e.g., "C:\") and leading forward/backslashes
|
||||
entryName = entryName.replaceAll("^[a-zA-Z]:[\\\\/]+", "");
|
||||
entryName = entryName.replaceAll("^[\\\\/]+", "");
|
||||
entryName =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getDriveLetterPattern()
|
||||
.matcher(entryName)
|
||||
.replaceAll("");
|
||||
entryName =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getLeadingSlashesPattern()
|
||||
.matcher(entryName)
|
||||
.replaceAll("");
|
||||
|
||||
// Recursively remove path traversal sequences
|
||||
while (entryName.contains("../") || entryName.contains("..\\")) {
|
||||
entryName = entryName.replace("../", "").replace("..\\", "");
|
||||
}
|
||||
// Normalize all backslashes to forward slashes
|
||||
entryName = entryName.replaceAll("\\\\", "/");
|
||||
entryName =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getBackslashPattern()
|
||||
.matcher(entryName)
|
||||
.replaceAll("/");
|
||||
return entryName;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,13 +9,9 @@ import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.*;
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
import java.security.MessageDigest;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Enumeration;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.core.io.Resource;
|
||||
@@ -28,25 +24,37 @@ import com.fathzer.soft.javaluator.DoubleEvaluator;
|
||||
import io.github.pixee.security.HostValidator;
|
||||
import io.github.pixee.security.Urls;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.common.configuration.InstallationPathConfig;
|
||||
|
||||
@Slf4j
|
||||
@UtilityClass
|
||||
public class GeneralUtils {
|
||||
|
||||
private static final Set<String> DEFAULT_VALID_SCRIPTS =
|
||||
Set.of("png_to_webp.py", "split_photos.py");
|
||||
private static final Set<String> DEFAULT_VALID_PIPELINE =
|
||||
private final Set<String> DEFAULT_VALID_SCRIPTS = Set.of("png_to_webp.py", "split_photos.py");
|
||||
private final Set<String> DEFAULT_VALID_PIPELINE =
|
||||
Set.of(
|
||||
"OCR images.json",
|
||||
"Prepare-pdfs-for-email.json",
|
||||
"split-rotate-auto-rename.json");
|
||||
|
||||
private static final String DEFAULT_WEBUI_CONFIGS_DIR = "defaultWebUIConfigs";
|
||||
private static final String PYTHON_SCRIPTS_DIR = "python";
|
||||
private final String DEFAULT_WEBUI_CONFIGS_DIR = "defaultWebUIConfigs";
|
||||
private final String PYTHON_SCRIPTS_DIR = "python";
|
||||
private final RegexPatternUtils patternCache = RegexPatternUtils.getInstance();
|
||||
// Valid size units used for convertSizeToBytes validation and parsing
|
||||
private final Set<String> VALID_SIZE_UNITS = Set.of("B", "KB", "MB", "GB", "TB");
|
||||
|
||||
public static File convertMultipartFileToFile(MultipartFile multipartFile) throws IOException {
|
||||
/*
|
||||
* Converts a MultipartFile to a regular File with improved performance and security.
|
||||
*
|
||||
* @param multipartFile the multipart file to convert
|
||||
* @return temporary File containing the multipart file data
|
||||
* @throws IOException if I/O error occurs during conversion
|
||||
* @throws IllegalArgumentException if file exceeds maximum allowed size
|
||||
*/
|
||||
public File convertMultipartFileToFile(MultipartFile multipartFile) throws IOException {
|
||||
String customTempDir = System.getenv("STIRLING_TEMPFILES_DIRECTORY");
|
||||
if (customTempDir == null || customTempDir.isEmpty()) {
|
||||
customTempDir = System.getProperty("stirling.tempfiles.directory");
|
||||
@@ -81,10 +89,137 @@ public class GeneralUtils {
|
||||
return tempFile;
|
||||
}
|
||||
|
||||
public static void deleteDirectory(Path path) throws IOException {
|
||||
/*
|
||||
* Gets the configured temporary directory, creating it if necessary.
|
||||
*
|
||||
* @return Path to the temporary directory
|
||||
* @throws IOException if directory creation fails
|
||||
*/
|
||||
private Path getTempDirectory() throws IOException {
|
||||
String customTempDir = System.getenv("STIRLING_TEMPFILES_DIRECTORY");
|
||||
if (customTempDir == null || customTempDir.isEmpty()) {
|
||||
customTempDir = System.getProperty("stirling.tempfiles.directory");
|
||||
}
|
||||
|
||||
Path tempDir;
|
||||
if (customTempDir != null && !customTempDir.isEmpty()) {
|
||||
tempDir = Path.of(customTempDir);
|
||||
} else {
|
||||
tempDir = Path.of(System.getProperty("java.io.tmpdir"), "stirling-pdf");
|
||||
}
|
||||
|
||||
if (!Files.exists(tempDir)) {
|
||||
Files.createDirectories(tempDir);
|
||||
}
|
||||
|
||||
return tempDir;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove file extension
|
||||
*
|
||||
* <p>Uses fast string operations for common cases (valid extensions) and falls back to
|
||||
* optimized regex for edge cases (no extension, hidden files, etc.).
|
||||
*
|
||||
* <ul>
|
||||
* <li>String operations avoid regex engine overhead for common cases
|
||||
* <li>Cached pattern compilation eliminates recompilation costs
|
||||
* <li>Fresh Matcher instances ensure thread safety
|
||||
* </ul>
|
||||
*
|
||||
* @param filename the filename to process, may be null
|
||||
* @return filename without extension, or "default" if input is null
|
||||
*/
|
||||
public String removeExtension(String filename) {
|
||||
if (filename == null) {
|
||||
return "default";
|
||||
}
|
||||
|
||||
if (filename.isEmpty()) {
|
||||
return filename;
|
||||
}
|
||||
|
||||
int dotIndex = filename.lastIndexOf('.');
|
||||
if (dotIndex > 0 && dotIndex < filename.length() - 1) {
|
||||
return filename.substring(0, dotIndex);
|
||||
}
|
||||
|
||||
if (dotIndex == 0 || dotIndex == filename.length() - 1 || dotIndex == -1) {
|
||||
return filename;
|
||||
}
|
||||
|
||||
Pattern pattern = patternCache.getPattern(RegexPatternUtils.getExtensionRegex());
|
||||
Matcher matcher = pattern.matcher(filename);
|
||||
return matcher.find() ? matcher.replaceFirst("") : filename;
|
||||
}
|
||||
|
||||
/*
|
||||
* Append suffix to base name with null safety.
|
||||
*
|
||||
* @param baseName the base filename, null becomes "default"
|
||||
* @param suffix the suffix to append, null becomes empty string
|
||||
* @return concatenated string with null safety
|
||||
*/
|
||||
public String appendSuffix(String baseName, String suffix) {
|
||||
return (baseName == null ? "default" : baseName) + (suffix != null ? suffix : "");
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a PDF filename by removing extension from first file and adding suffix.
|
||||
*
|
||||
* <p>High-level utility method for common PDF naming scenarios. Handles null safety and uses
|
||||
* extension removal.
|
||||
*
|
||||
* @param firstFilename the filename of the first file being, may be null
|
||||
* @param suffix the suffix to append (e.g., "_merged.pdf")
|
||||
* @return filename with suffix, or default name if input is null
|
||||
*/
|
||||
public String generateFilename(String firstFilename, String suffix) {
|
||||
String baseName = removeExtension(firstFilename);
|
||||
return appendSuffix(baseName, suffix);
|
||||
}
|
||||
|
||||
/*
|
||||
* Process a list of filenames by removing extensions and adding suffix.
|
||||
*
|
||||
* <p>Efficiently processes multiple filenames using streaming operations and bulk operations
|
||||
* where possible. Handles null safety for both input list and individual filenames.
|
||||
*
|
||||
* @param filenames the list of filenames to process, may be null
|
||||
* @param suffix the suffix to append to each processed filename
|
||||
* @param processor consumer to handle each processed filename, may be null
|
||||
*/
|
||||
public void processFilenames(
|
||||
List<String> filenames, String suffix, java.util.function.Consumer<String> processor) {
|
||||
if (filenames == null || processor == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
filenames.stream()
|
||||
.map(filename -> appendSuffix(removeExtension(filename), suffix))
|
||||
.forEach(processor);
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract title from filename by removing extension, with fallback handling.
|
||||
*
|
||||
* <p>Returns "Untitled" for null or empty filenames, otherwise removes the extension using the
|
||||
* optimized removeExtension method.
|
||||
*
|
||||
* @param filename the filename to extract title from, may be null
|
||||
* @return the title without extension, or "Untitled" if input is null/empty
|
||||
*/
|
||||
public String getTitleFromFilename(String filename) {
|
||||
if (filename == null || filename.isEmpty()) {
|
||||
return "Untitled";
|
||||
}
|
||||
return removeExtension(filename);
|
||||
}
|
||||
|
||||
public void deleteDirectory(Path path) throws IOException {
|
||||
Files.walkFileTree(
|
||||
path,
|
||||
new SimpleFileVisitor<Path>() {
|
||||
new SimpleFileVisitor<>() {
|
||||
@Override
|
||||
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
|
||||
throws IOException {
|
||||
@@ -101,8 +236,18 @@ public class GeneralUtils {
|
||||
});
|
||||
}
|
||||
|
||||
public static String convertToFileName(String name) {
|
||||
String safeName = name.replaceAll("[^a-zA-Z0-9]", "_");
|
||||
public String convertToFileName(String name) {
|
||||
if (name == null) return "_";
|
||||
StringBuilder safeNameBuilder = new StringBuilder(name.length());
|
||||
for (int i = 0; i < name.length(); i++) {
|
||||
char c = name.charAt(i);
|
||||
if (Character.isLetterOrDigit(c)) {
|
||||
safeNameBuilder.append(c);
|
||||
} else {
|
||||
safeNameBuilder.append('_');
|
||||
}
|
||||
}
|
||||
String safeName = safeNameBuilder.toString();
|
||||
if (safeName.length() > 50) {
|
||||
safeName = safeName.substring(0, 50);
|
||||
}
|
||||
@@ -110,19 +255,20 @@ public class GeneralUtils {
|
||||
}
|
||||
|
||||
// Get resources from a location pattern
|
||||
public static Resource[] getResourcesFromLocationPattern(
|
||||
public Resource[] getResourcesFromLocationPattern(
|
||||
String locationPattern, ResourceLoader resourceLoader) throws Exception {
|
||||
// Normalize the path for file resources
|
||||
if (locationPattern.startsWith("file:")) {
|
||||
String rawPath = locationPattern.substring(5).replace("\\*", "").replace("/*", "");
|
||||
String pattern = locationPattern;
|
||||
if (pattern.startsWith("file:")) {
|
||||
String rawPath = pattern.substring(5).replace("\\*", "").replace("/*", "");
|
||||
Path normalizePath = Paths.get(rawPath).normalize();
|
||||
locationPattern = "file:" + normalizePath.toString().replace("\\", "/") + "/*";
|
||||
pattern = "file:" + normalizePath.toString().replace("\\", "/") + "/*";
|
||||
}
|
||||
return ResourcePatternUtils.getResourcePatternResolver(resourceLoader)
|
||||
.getResources(locationPattern);
|
||||
.getResources(pattern);
|
||||
}
|
||||
|
||||
public static boolean isValidURL(String urlStr) {
|
||||
public boolean isValidURL(String urlStr) {
|
||||
try {
|
||||
Urls.create(
|
||||
urlStr, Urls.HTTP_PROTOCOLS, HostValidator.DENY_COMMON_INFRASTRUCTURE_TARGETS);
|
||||
@@ -132,7 +278,25 @@ public class GeneralUtils {
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean isURLReachable(String urlStr) {
|
||||
/*
|
||||
* Checks if a URL is reachable with proper timeout configuration and error handling.
|
||||
*
|
||||
* @param urlStr the URL string to check
|
||||
* @return true if URL is reachable, false otherwise
|
||||
*/
|
||||
public boolean isURLReachable(String urlStr) {
|
||||
return isURLReachable(urlStr, 5000, 5000);
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks if a URL is reachable with configurable timeouts.
|
||||
*
|
||||
* @param urlStr the URL string to check
|
||||
* @param connectTimeout connection timeout in milliseconds
|
||||
* @param readTimeout read timeout in milliseconds
|
||||
* @return true if URL is reachable, false otherwise
|
||||
*/
|
||||
public boolean isURLReachable(String urlStr, int connectTimeout, int readTimeout) {
|
||||
try {
|
||||
// Parse the URL
|
||||
URL url = URI.create(urlStr).toURL();
|
||||
@@ -152,16 +316,19 @@ public class GeneralUtils {
|
||||
// Check if the URL is reachable
|
||||
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
|
||||
connection.setRequestMethod("HEAD");
|
||||
// connection.setConnectTimeout(5000); // Set connection timeout
|
||||
// connection.setReadTimeout(5000); // Set read timeout
|
||||
connection.setConnectTimeout(connectTimeout);
|
||||
connection.setReadTimeout(readTimeout);
|
||||
connection.setInstanceFollowRedirects(false); // Security: prevent redirect loops
|
||||
|
||||
int responseCode = connection.getResponseCode();
|
||||
return (200 <= responseCode && responseCode <= 399);
|
||||
} catch (Exception e) {
|
||||
log.debug("URL {} is not reachable: {}", urlStr, e.getMessage());
|
||||
return false; // Return false in case of any exception
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean isLocalAddress(String host) {
|
||||
private boolean isLocalAddress(String host) {
|
||||
try {
|
||||
// Resolve DNS to IP address
|
||||
InetAddress address = InetAddress.getByName(host);
|
||||
@@ -181,7 +348,14 @@ public class GeneralUtils {
|
||||
}
|
||||
}
|
||||
|
||||
public static File multipartToFile(MultipartFile multipart) throws IOException {
|
||||
/*
|
||||
* Improved multipart file conversion using the shared helper method.
|
||||
*
|
||||
* @param multipart the multipart file to convert
|
||||
* @return temporary File containing the multipart file data
|
||||
* @throws IOException if I/O error occurs during conversion
|
||||
*/
|
||||
public File multipartToFile(MultipartFile multipart) throws IOException {
|
||||
Path tempFile = Files.createTempFile("overlay-", ".pdf");
|
||||
try (InputStream in = multipart.getInputStream();
|
||||
FileOutputStream out = new FileOutputStream(tempFile.toFile())) {
|
||||
@@ -194,54 +368,105 @@ public class GeneralUtils {
|
||||
return tempFile.toFile();
|
||||
}
|
||||
|
||||
public static Long convertSizeToBytes(String sizeStr) {
|
||||
/*
|
||||
* Supports TB/PB units and provides detailed error messages.
|
||||
*
|
||||
* @param sizeStr the size string to convert (e.g., "100MB", "1.5GB")
|
||||
* @param defaultUnit the default unit to assume if none specified ("MB", "GB", etc.)
|
||||
* @return size in bytes, or null if parsing fails
|
||||
* @throws IllegalArgumentException if defaultUnit is invalid
|
||||
*/
|
||||
public Long convertSizeToBytes(String sizeStr, String defaultUnit) {
|
||||
if (sizeStr == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (defaultUnit != null && !isValidSizeUnit(defaultUnit)) {
|
||||
throw new IllegalArgumentException("Invalid default unit: " + defaultUnit);
|
||||
}
|
||||
|
||||
sizeStr = sizeStr.trim().toUpperCase();
|
||||
sizeStr = sizeStr.replace(",", ".").replace(" ", "");
|
||||
|
||||
try {
|
||||
if (sizeStr.endsWith("KB")) {
|
||||
return (long)
|
||||
(Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2)) * 1024);
|
||||
} else if (sizeStr.endsWith("MB")) {
|
||||
if (sizeStr.endsWith("TB")) {
|
||||
return (long)
|
||||
(Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2))
|
||||
* 1024
|
||||
* 1024);
|
||||
* 1024L
|
||||
* 1024L
|
||||
* 1024L
|
||||
* 1024L);
|
||||
} else if (sizeStr.endsWith("GB")) {
|
||||
return (long)
|
||||
(Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2))
|
||||
* 1024
|
||||
* 1024
|
||||
* 1024);
|
||||
} else if (sizeStr.endsWith("B")) {
|
||||
* 1024L
|
||||
* 1024L
|
||||
* 1024L);
|
||||
} else if (sizeStr.endsWith("MB")) {
|
||||
return (long)
|
||||
(Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2))
|
||||
* 1024L
|
||||
* 1024L);
|
||||
} else if (sizeStr.endsWith("KB")) {
|
||||
return (long)
|
||||
(Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2)) * 1024L);
|
||||
} else if (!sizeStr.isEmpty() && sizeStr.charAt(sizeStr.length() - 1) == 'B') {
|
||||
return Long.parseLong(sizeStr.substring(0, sizeStr.length() - 1));
|
||||
} else {
|
||||
// Assume MB if no unit is specified
|
||||
return (long) (Double.parseDouble(sizeStr) * 1024 * 1024);
|
||||
// Use provided default unit or fall back to MB
|
||||
String unit = defaultUnit != null ? defaultUnit.toUpperCase() : "MB";
|
||||
double value = Double.parseDouble(sizeStr);
|
||||
return switch (unit) {
|
||||
case "TB" -> (long) (value * 1024L * 1024L * 1024L * 1024L);
|
||||
case "GB" -> (long) (value * 1024L * 1024L * 1024L);
|
||||
case "MB" -> (long) (value * 1024L * 1024L);
|
||||
case "KB" -> (long) (value * 1024L);
|
||||
case "B" -> (long) value;
|
||||
default -> (long) (value * 1024L * 1024L); // Default to MB
|
||||
};
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
// The numeric part of the input string cannot be parsed, handle this case
|
||||
log.warn("Failed to parse size string '{}': {}", sizeStr, e.getMessage());
|
||||
return null;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public static String formatBytes(long bytes) {
|
||||
/*
|
||||
* Converts size string to bytes using MB as default unit.
|
||||
*
|
||||
* @param sizeStr the size string to convert
|
||||
* @return size in bytes, or null if parsing fails
|
||||
*/
|
||||
public Long convertSizeToBytes(String sizeStr) {
|
||||
return convertSizeToBytes(sizeStr, "MB");
|
||||
}
|
||||
|
||||
/* Validates if a string represents a valid size unit. */
|
||||
private boolean isValidSizeUnit(String unit) {
|
||||
// Use a precomputed Set for O(1) lookup, normalize using a locale-safe toUpperCase
|
||||
return unit != null && VALID_SIZE_UNITS.contains(unit.toUpperCase(Locale.ROOT));
|
||||
}
|
||||
|
||||
/* Enhanced byte formatting with TB/PB support and better precision. */
|
||||
public String formatBytes(long bytes) {
|
||||
if (bytes < 0) {
|
||||
return "Invalid size";
|
||||
}
|
||||
|
||||
if (bytes < 1024) {
|
||||
return bytes + " B";
|
||||
} else if (bytes < 1024 * 1024) {
|
||||
} else if (bytes < 1024L * 1024L) {
|
||||
return String.format(Locale.US, "%.2f KB", bytes / 1024.0);
|
||||
} else if (bytes < 1024 * 1024 * 1024) {
|
||||
} else if (bytes < 1024L * 1024L * 1024L) {
|
||||
return String.format(Locale.US, "%.2f MB", bytes / (1024.0 * 1024.0));
|
||||
} else {
|
||||
} else if (bytes < 1024L * 1024L * 1024L * 1024L) {
|
||||
return String.format(Locale.US, "%.2f GB", bytes / (1024.0 * 1024.0 * 1024.0));
|
||||
} else {
|
||||
return String.format(Locale.US, "%.2f TB", bytes / (1024.0 * 1024.0 * 1024.0 * 1024.0));
|
||||
}
|
||||
}
|
||||
|
||||
public static List<Integer> parsePageList(String pages, int totalPages, boolean oneBased) {
|
||||
public List<Integer> parsePageList(String pages, int totalPages, boolean oneBased) {
|
||||
if (pages == null) {
|
||||
return List.of(1); // Default to first page if input is null
|
||||
}
|
||||
@@ -252,11 +477,11 @@ public class GeneralUtils {
|
||||
}
|
||||
}
|
||||
|
||||
public static List<Integer> parsePageList(String[] pages, int totalPages) {
|
||||
public List<Integer> parsePageList(String[] pages, int totalPages) {
|
||||
return parsePageList(pages, totalPages, false);
|
||||
}
|
||||
|
||||
public static List<Integer> parsePageList(String[] pages, int totalPages, boolean oneBased) {
|
||||
public List<Integer> parsePageList(String[] pages, int totalPages, boolean oneBased) {
|
||||
List<Integer> result = new ArrayList<>();
|
||||
int offset = oneBased ? 1 : 0;
|
||||
for (String page : pages) {
|
||||
@@ -278,33 +503,72 @@ public class GeneralUtils {
|
||||
return result;
|
||||
}
|
||||
|
||||
public static List<Integer> evaluateNFunc(String expression, int maxValue) {
|
||||
/*
|
||||
* Enhanced mathematical expression evaluation with bounds checking and timeout protection.
|
||||
*
|
||||
* @param expression the mathematical expression containing 'n'
|
||||
* @param maxValue the maximum value for 'n' and result bounds
|
||||
* @return list of valid page numbers
|
||||
* @throws IllegalArgumentException if expression is invalid or unsafe
|
||||
*/
|
||||
public List<Integer> evaluateNFunc(String expression, int maxValue) {
|
||||
if (expression == null || expression.trim().isEmpty()) {
|
||||
throw new IllegalArgumentException("Expression cannot be null or empty");
|
||||
}
|
||||
|
||||
if (maxValue <= 0 || maxValue > 10000) {
|
||||
throw new IllegalArgumentException("maxValue must be between 1 and 10000 for safety");
|
||||
}
|
||||
|
||||
List<Integer> results = new ArrayList<>();
|
||||
DoubleEvaluator evaluator = new DoubleEvaluator();
|
||||
|
||||
// Validate the expression
|
||||
if (!expression.matches("[0-9n+\\-*/() ]+")) {
|
||||
throw new IllegalArgumentException("Invalid expression");
|
||||
// Validate the expression format
|
||||
if (!RegexPatternUtils.getInstance()
|
||||
.getMathExpressionPattern()
|
||||
.matcher(expression.trim())
|
||||
.matches()) {
|
||||
throw new IllegalArgumentException("Invalid expression format: " + expression);
|
||||
}
|
||||
|
||||
for (int n = 1; n <= maxValue; n++) {
|
||||
// Replace 'n' with the current value of n, correctly handling numbers before
|
||||
// 'n'
|
||||
String sanitizedExpression = sanitizeNFunction(expression, n);
|
||||
Double result = evaluator.evaluate(sanitizedExpression);
|
||||
try {
|
||||
// Replace 'n' with the current value of n, correctly handling numbers before 'n'
|
||||
String sanitizedExpression = sanitizeNFunction(expression.trim(), n);
|
||||
Double result = evaluator.evaluate(sanitizedExpression);
|
||||
|
||||
// Check if the result is null or not within bounds
|
||||
if (result == null) break;
|
||||
// Check if the result is null or not within bounds
|
||||
if (result == null || !Double.isFinite(result)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (result.intValue() > 0 && result.intValue() <= maxValue)
|
||||
results.add(result.intValue());
|
||||
int intResult = result.intValue();
|
||||
if (intResult > 0 && intResult <= maxValue) {
|
||||
results.add(intResult);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.debug(
|
||||
"Failed to evaluate expression '{}' for n={}: {}",
|
||||
expression,
|
||||
n,
|
||||
e.getMessage());
|
||||
// Continue with next value instead of breaking
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private static String sanitizeNFunction(String expression, int nValue) {
|
||||
String sanitizedExpression = expression.replace(" ", "");
|
||||
private String sanitizeNFunction(String expression, int nValue) {
|
||||
// Remove all spaces using a specialized character removal
|
||||
StringBuilder sb = new StringBuilder(expression.length());
|
||||
for (int i = 0; i < expression.length(); i++) {
|
||||
char c = expression.charAt(i);
|
||||
if (c != ' ') {
|
||||
sb.append(c);
|
||||
}
|
||||
}
|
||||
String sanitizedExpression = sb.toString();
|
||||
String multiplyByOpeningRoundBracketPattern =
|
||||
"([0-9n)])\\("; // example: n(n-1), 9(n-1), (n-1)(n-2)
|
||||
sanitizedExpression =
|
||||
@@ -319,23 +583,45 @@ public class GeneralUtils {
|
||||
return sanitizedExpression;
|
||||
}
|
||||
|
||||
private static String insertMultiplicationBeforeN(String expression, int nValue) {
|
||||
// Insert multiplication between a number and 'n' (e.g., "4n" becomes "4*n")
|
||||
String withMultiplication = expression.replaceAll("(\\d)n", "$1*n");
|
||||
private String insertMultiplicationBeforeN(String expression, int nValue) {
|
||||
// Insert multiplication between a number and 'n' (e.g., "4n" becomes "4*n") using a loop
|
||||
StringBuilder sb = new StringBuilder(expression.length() + 4); // +4 for possible extra '*'
|
||||
for (int i = 0; i < expression.length(); i++) {
|
||||
char c = expression.charAt(i);
|
||||
sb.append(c);
|
||||
if (Character.isDigit(c)
|
||||
&& i + 1 < expression.length()
|
||||
&& expression.charAt(i + 1) == 'n') {
|
||||
sb.append('*');
|
||||
}
|
||||
}
|
||||
String withMultiplication = sb.toString();
|
||||
withMultiplication = formatConsecutiveNsForNFunction(withMultiplication);
|
||||
// Now replace 'n' with its current value
|
||||
return withMultiplication.replace("n", String.valueOf(nValue));
|
||||
}
|
||||
|
||||
private static String formatConsecutiveNsForNFunction(String expression) {
|
||||
private String formatConsecutiveNsForNFunction(String expression) {
|
||||
String text = expression;
|
||||
while (text.matches(".*n{2,}.*")) {
|
||||
text = text.replaceAll("(?<!n)n{2}", "n*n");
|
||||
// Replace all consecutive 'nn' with 'n*n' until no more 'nn' is found
|
||||
while (text.contains("nn")) {
|
||||
StringBuilder sb = new StringBuilder(text.length() + 2); // +2 for possible extra '*'
|
||||
int i = 0;
|
||||
while (i < text.length()) {
|
||||
if (i < text.length() - 1 && text.charAt(i) == 'n' && text.charAt(i + 1) == 'n') {
|
||||
sb.append("n*n");
|
||||
i += 2;
|
||||
} else {
|
||||
sb.append(text.charAt(i));
|
||||
i++;
|
||||
}
|
||||
}
|
||||
text = sb.toString();
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
private static List<Integer> handlePart(String part, int totalPages, int offset) {
|
||||
private List<Integer> handlePart(String part, int totalPages, int offset) {
|
||||
List<Integer> partResult = new ArrayList<>();
|
||||
|
||||
// First check for n-syntax because it should not be processed as a range
|
||||
@@ -361,7 +647,7 @@ public class GeneralUtils {
|
||||
}
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
// Range is invalid, ignore this part
|
||||
log.debug("Invalid range: {}", part);
|
||||
}
|
||||
} else {
|
||||
// This is a single page number
|
||||
@@ -370,14 +656,14 @@ public class GeneralUtils {
|
||||
if (pageNum >= 1 && pageNum <= totalPages) {
|
||||
partResult.add(pageNum - 1 + offset);
|
||||
}
|
||||
} catch (NumberFormatException ignored) {
|
||||
// Ignore invalid numbers
|
||||
} catch (NumberFormatException e) {
|
||||
log.debug("Invalid page number: {}", part);
|
||||
}
|
||||
}
|
||||
return partResult;
|
||||
}
|
||||
|
||||
public static boolean createDir(String path) {
|
||||
public boolean createDir(String path) {
|
||||
Path folder = Paths.get(path);
|
||||
if (!Files.exists(folder)) {
|
||||
try {
|
||||
@@ -390,7 +676,7 @@ public class GeneralUtils {
|
||||
return true;
|
||||
}
|
||||
|
||||
public static boolean isValidUUID(String uuid) {
|
||||
public boolean isValidUUID(String uuid) {
|
||||
if (uuid == null) {
|
||||
return false;
|
||||
}
|
||||
@@ -406,7 +692,7 @@ public class GeneralUtils {
|
||||
* Internal Implementation Details *
|
||||
*------------------------------------------------------------------------*/
|
||||
|
||||
public static void saveKeyToSettings(String key, Object newValue) throws IOException {
|
||||
public void saveKeyToSettings(String key, Object newValue) throws IOException {
|
||||
String[] keyArray = key.split("\\.");
|
||||
Path settingsPath = Paths.get(InstallationPathConfig.getSettingsPath());
|
||||
YamlHelper settingsYaml = new YamlHelper(settingsPath);
|
||||
@@ -414,48 +700,67 @@ public class GeneralUtils {
|
||||
settingsYaml.saveOverride(settingsPath);
|
||||
}
|
||||
|
||||
public static String generateMachineFingerprint() {
|
||||
/*
|
||||
* Machine fingerprint generation with better error logging and fallbacks.
|
||||
*
|
||||
* @return unique machine fingerprint or "GenericID" if generation fails
|
||||
*/
|
||||
public String generateMachineFingerprint() {
|
||||
try {
|
||||
// Get the MAC address
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
// Try to get MAC address from primary network interface
|
||||
InetAddress ip = InetAddress.getLocalHost();
|
||||
NetworkInterface network = NetworkInterface.getByInetAddress(ip);
|
||||
|
||||
if (network == null) {
|
||||
if (network == null || network.getHardwareAddress() == null) {
|
||||
// Fallback: iterate through all network interfaces
|
||||
Enumeration<NetworkInterface> networks = NetworkInterface.getNetworkInterfaces();
|
||||
while (networks.hasMoreElements()) {
|
||||
NetworkInterface net = networks.nextElement();
|
||||
byte[] mac = net.getHardwareAddress();
|
||||
if (mac != null) {
|
||||
for (int i = 0; i < mac.length; i++) {
|
||||
sb.append(String.format("%02X", mac[i]));
|
||||
if (net.isUp() && !net.isLoopback() && !net.isVirtual()) {
|
||||
byte[] mac = net.getHardwareAddress();
|
||||
if (mac != null && mac.length > 0) {
|
||||
for (byte b : mac) {
|
||||
sb.append(String.format("%02X", b));
|
||||
}
|
||||
break; // Use the first valid network interface
|
||||
}
|
||||
break; // Use the first network interface with a MAC address
|
||||
}
|
||||
}
|
||||
} else {
|
||||
byte[] mac = network.getHardwareAddress();
|
||||
if (mac != null) {
|
||||
for (int i = 0; i < mac.length; i++) {
|
||||
sb.append(String.format("%02X", mac[i]));
|
||||
for (byte b : mac) {
|
||||
sb.append(String.format("%02X", b));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Hash the MAC address for privacy and consistency
|
||||
// If no MAC address found, use hostname as fallback
|
||||
if (sb.length() == 0) {
|
||||
String hostname = InetAddress.getLocalHost().getHostName();
|
||||
sb.append(hostname != null ? hostname : "unknown-host");
|
||||
log.warn("No MAC address found, using hostname for fingerprint generation");
|
||||
}
|
||||
|
||||
// Hash the collected data for privacy and consistency
|
||||
MessageDigest md = MessageDigest.getInstance("SHA-256");
|
||||
byte[] hash = md.digest(sb.toString().getBytes(StandardCharsets.UTF_8));
|
||||
StringBuilder fingerprint = new StringBuilder();
|
||||
for (byte b : hash) {
|
||||
fingerprint.append(String.format("%02x", b));
|
||||
}
|
||||
|
||||
log.debug("Successfully generated machine fingerprint");
|
||||
return fingerprint.toString();
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to generate machine fingerprint: {}", e.getMessage());
|
||||
return "GenericID";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* Extracts the default pipeline configurations from the classpath to the installation path.
|
||||
* Creates directories if needed and copies default JSON files.
|
||||
*
|
||||
@@ -464,7 +769,7 @@ public class GeneralUtils {
|
||||
*
|
||||
* @throws IOException if an I/O error occurs during file operations
|
||||
*/
|
||||
public static void extractPipeline() throws IOException {
|
||||
public void extractPipeline() throws IOException {
|
||||
Path pipelineDir =
|
||||
Paths.get(InstallationPathConfig.getPipelinePath(), DEFAULT_WEBUI_CONFIGS_DIR);
|
||||
Files.createDirectories(pipelineDir);
|
||||
@@ -486,7 +791,7 @@ public class GeneralUtils {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* Extracts the specified Python script from the classpath to the installation path. Validates
|
||||
* name and copies file atomically when possible, overwriting existing.
|
||||
*
|
||||
@@ -497,7 +802,7 @@ public class GeneralUtils {
|
||||
* @throws IllegalArgumentException if the script name is invalid or not allowed
|
||||
* @throws IOException if an I/O error occurs
|
||||
*/
|
||||
public static Path extractScript(String scriptName) throws IOException {
|
||||
public Path extractScript(String scriptName) throws IOException {
|
||||
// Validate input
|
||||
if (scriptName == null || scriptName.trim().isEmpty()) {
|
||||
throw new IllegalArgumentException("scriptName must not be null or empty");
|
||||
@@ -530,15 +835,14 @@ public class GeneralUtils {
|
||||
return target;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* Copies a resource from the classpath to a specified target file.
|
||||
*
|
||||
* @param resource the ClassPathResource to copy
|
||||
* @param target the target Path where the resource will be copied
|
||||
* @throws IOException if an I/O error occurs during the copy operation
|
||||
*/
|
||||
private static void copyResourceToFile(ClassPathResource resource, Path target)
|
||||
throws IOException {
|
||||
private void copyResourceToFile(ClassPathResource resource, Path target) throws IOException {
|
||||
Path dir = target.getParent();
|
||||
Path tmp = Files.createTempFile(dir, target.getFileName().toString(), ".tmp");
|
||||
try (InputStream in = resource.getInputStream()) {
|
||||
@@ -573,7 +877,7 @@ public class GeneralUtils {
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean isVersionHigher(String currentVersion, String compareVersion) {
|
||||
public boolean isVersionHigher(String currentVersion, String compareVersion) {
|
||||
if (currentVersion == null || compareVersion == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -258,10 +258,7 @@ public class PdfAttachmentHandler {
|
||||
|
||||
if (contentIdMap.isEmpty()) return htmlContent;
|
||||
|
||||
Pattern cidPattern =
|
||||
Pattern.compile(
|
||||
"(?i)<img[^>]*\\ssrc\\s*=\\s*['\"]cid:([^'\"]+)['\"][^>]*>",
|
||||
Pattern.CASE_INSENSITIVE);
|
||||
Pattern cidPattern = RegexPatternUtils.getInstance().getInlineCidImagePattern();
|
||||
Matcher matcher = cidPattern.matcher(htmlContent);
|
||||
|
||||
StringBuilder result = new StringBuilder();
|
||||
@@ -319,121 +316,20 @@ public class PdfAttachmentHandler {
|
||||
}
|
||||
}
|
||||
|
||||
public static class AttachmentMarkerPositionFinder extends PDFTextStripper {
|
||||
@Getter private final List<MarkerPosition> positions = new ArrayList<>();
|
||||
private int currentPageIndex;
|
||||
protected boolean sortByPosition;
|
||||
private boolean isInAttachmentSection;
|
||||
private boolean attachmentSectionFound;
|
||||
private final StringBuilder currentText = new StringBuilder();
|
||||
|
||||
private static final Pattern ATTACHMENT_SECTION_PATTERN =
|
||||
Pattern.compile("attachments\\s*\\(\\d+\\)", Pattern.CASE_INSENSITIVE);
|
||||
|
||||
private static final Pattern FILENAME_PATTERN =
|
||||
Pattern.compile("@\\s*([^\\s\\(]+(?:\\.[a-zA-Z0-9]+)?)");
|
||||
|
||||
public AttachmentMarkerPositionFinder() {
|
||||
super();
|
||||
this.currentPageIndex = 0;
|
||||
this.sortByPosition = false; // Disable sorting to preserve document order
|
||||
this.isInAttachmentSection = false;
|
||||
this.attachmentSectionFound = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getText(PDDocument document) throws IOException {
|
||||
super.getText(document);
|
||||
|
||||
if (sortByPosition) {
|
||||
positions.sort(
|
||||
(a, b) -> {
|
||||
int pageCompare = Integer.compare(a.getPageIndex(), b.getPageIndex());
|
||||
if (pageCompare != 0) return pageCompare;
|
||||
return Float.compare(
|
||||
b.getY(), a.getY()); // Descending Y per PDF coordinate system
|
||||
});
|
||||
}
|
||||
|
||||
return ""; // Return empty string as we only need positions
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void startPage(PDPage page) throws IOException {
|
||||
super.startPage(page);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void endPage(PDPage page) throws IOException {
|
||||
currentPageIndex++;
|
||||
super.endPage(page);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeString(String string, List<TextPosition> textPositions)
|
||||
throws IOException {
|
||||
String lowerString = string.toLowerCase();
|
||||
|
||||
if (ATTACHMENT_SECTION_PATTERN.matcher(lowerString).find()) {
|
||||
isInAttachmentSection = true;
|
||||
attachmentSectionFound = true;
|
||||
}
|
||||
|
||||
if (isInAttachmentSection
|
||||
&& (lowerString.contains("</body>")
|
||||
|| lowerString.contains("</html>")
|
||||
|| (attachmentSectionFound
|
||||
&& lowerString.trim().isEmpty()
|
||||
&& string.length() > 50))) {
|
||||
isInAttachmentSection = false;
|
||||
}
|
||||
|
||||
if (isInAttachmentSection) {
|
||||
currentText.append(string);
|
||||
|
||||
for (int i = 0; (i = string.indexOf(ATTACHMENT_MARKER, i)) != -1; i++) {
|
||||
if (i < textPositions.size()) {
|
||||
TextPosition textPosition = textPositions.get(i);
|
||||
|
||||
String filename = extractFilenameAfterMarker(string, i);
|
||||
|
||||
MarkerPosition position =
|
||||
new MarkerPosition(
|
||||
currentPageIndex,
|
||||
textPosition.getXDirAdj(),
|
||||
textPosition.getYDirAdj(),
|
||||
ATTACHMENT_MARKER,
|
||||
filename);
|
||||
positions.add(position);
|
||||
}
|
||||
}
|
||||
}
|
||||
super.writeString(string, textPositions);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setSortByPosition(boolean sortByPosition) {
|
||||
this.sortByPosition = sortByPosition;
|
||||
}
|
||||
|
||||
private String extractFilenameAfterMarker(String text, int markerIndex) {
|
||||
String afterMarker = text.substring(markerIndex + 1);
|
||||
|
||||
Matcher matcher = FILENAME_PATTERN.matcher("@" + afterMarker);
|
||||
if (matcher.find()) {
|
||||
return matcher.group(1);
|
||||
}
|
||||
|
||||
String[] parts = afterMarker.split("[\\s\\(\\)]+");
|
||||
for (String part : parts) {
|
||||
part = part.trim();
|
||||
if (part.length() > 3 && part.contains(".")) {
|
||||
return part;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
private static String normalizeFilename(String filename) {
|
||||
if (filename == null) return "";
|
||||
String normalized = filename.toLowerCase().trim();
|
||||
normalized =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getWhitespacePattern()
|
||||
.matcher(normalized)
|
||||
.replaceAll(" ");
|
||||
normalized =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getPattern("[^a-zA-Z0-9._-]")
|
||||
.matcher(normalized)
|
||||
.replaceAll("");
|
||||
return normalized;
|
||||
}
|
||||
|
||||
private static Map<Integer, String> addAttachmentsToDocumentWithMapping(
|
||||
@@ -613,12 +509,122 @@ public class PdfAttachmentHandler {
|
||||
return null;
|
||||
}
|
||||
|
||||
private static String normalizeFilename(String filename) {
|
||||
if (filename == null) return "";
|
||||
return filename.toLowerCase()
|
||||
.trim()
|
||||
.replaceAll("\\s+", " ")
|
||||
.replaceAll("[^a-zA-Z0-9._-]", "");
|
||||
public static class AttachmentMarkerPositionFinder extends PDFTextStripper {
|
||||
private static final Pattern ATTACHMENT_SECTION_PATTERN =
|
||||
RegexPatternUtils.getInstance().getAttachmentSectionPattern();
|
||||
private static final Pattern FILENAME_PATTERN =
|
||||
RegexPatternUtils.getInstance().getAttachmentFilenamePattern();
|
||||
@Getter private final List<MarkerPosition> positions = new ArrayList<>();
|
||||
private final StringBuilder currentText = new StringBuilder();
|
||||
protected boolean sortByPosition;
|
||||
private int currentPageIndex;
|
||||
private boolean isInAttachmentSection;
|
||||
private boolean attachmentSectionFound;
|
||||
|
||||
public AttachmentMarkerPositionFinder() {
|
||||
super();
|
||||
this.currentPageIndex = 0;
|
||||
this.sortByPosition = false; // Disable sorting to preserve document order
|
||||
this.isInAttachmentSection = false;
|
||||
this.attachmentSectionFound = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getText(PDDocument document) throws IOException {
|
||||
super.getText(document);
|
||||
|
||||
if (sortByPosition) {
|
||||
positions.sort(
|
||||
(a, b) -> {
|
||||
int pageCompare = Integer.compare(a.getPageIndex(), b.getPageIndex());
|
||||
if (pageCompare != 0) return pageCompare;
|
||||
return Float.compare(
|
||||
b.getY(), a.getY()); // Descending Y per PDF coordinate system
|
||||
});
|
||||
}
|
||||
|
||||
return ""; // Return empty string as we only need positions
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void startPage(PDPage page) throws IOException {
|
||||
super.startPage(page);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void endPage(PDPage page) throws IOException {
|
||||
currentPageIndex++;
|
||||
super.endPage(page);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeString(String string, List<TextPosition> textPositions)
|
||||
throws IOException {
|
||||
String lowerString = string.toLowerCase();
|
||||
|
||||
if (ATTACHMENT_SECTION_PATTERN.matcher(lowerString).find()) {
|
||||
isInAttachmentSection = true;
|
||||
attachmentSectionFound = true;
|
||||
}
|
||||
|
||||
if (isInAttachmentSection
|
||||
&& (lowerString.contains("</body>")
|
||||
|| lowerString.contains("</html>")
|
||||
|| (attachmentSectionFound
|
||||
&& lowerString.trim().isEmpty()
|
||||
&& string.length() > 50))) {
|
||||
isInAttachmentSection = false;
|
||||
}
|
||||
|
||||
if (isInAttachmentSection) {
|
||||
currentText.append(string);
|
||||
|
||||
for (int i = 0; (i = string.indexOf(ATTACHMENT_MARKER, i)) != -1; i++) {
|
||||
if (i < textPositions.size()) {
|
||||
TextPosition textPosition = textPositions.get(i);
|
||||
|
||||
String filename = extractFilenameAfterMarker(string, i);
|
||||
|
||||
MarkerPosition position =
|
||||
new MarkerPosition(
|
||||
currentPageIndex,
|
||||
textPosition.getXDirAdj(),
|
||||
textPosition.getYDirAdj(),
|
||||
ATTACHMENT_MARKER,
|
||||
filename);
|
||||
positions.add(position);
|
||||
}
|
||||
}
|
||||
}
|
||||
super.writeString(string, textPositions);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setSortByPosition(boolean sortByPosition) {
|
||||
this.sortByPosition = sortByPosition;
|
||||
}
|
||||
|
||||
private String extractFilenameAfterMarker(String text, int markerIndex) {
|
||||
String afterMarker = text.substring(markerIndex + 1);
|
||||
|
||||
Matcher matcher = FILENAME_PATTERN.matcher("@" + afterMarker);
|
||||
if (matcher.find()) {
|
||||
return matcher.group(1);
|
||||
}
|
||||
|
||||
String[] parts =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getWhitespaceParenthesesSplitPattern()
|
||||
.split(afterMarker);
|
||||
for (String part : parts) {
|
||||
part = part.trim();
|
||||
if (part.length() > 3 && part.contains(".")) {
|
||||
return part;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static void addAttachmentAnnotationToPageWithMapping(
|
||||
|
||||
@@ -8,6 +8,8 @@ import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
@@ -34,15 +36,20 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.github.pixee.security.Filenames;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.common.model.ApplicationProperties;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
|
||||
@Slf4j
|
||||
@UtilityClass
|
||||
public class PdfUtils {
|
||||
|
||||
public static PDRectangle textToPageSize(String size) {
|
||||
private final RegexPatternUtils patternCache = RegexPatternUtils.getInstance();
|
||||
|
||||
public PDRectangle textToPageSize(String size) {
|
||||
|
||||
switch (size.toUpperCase()) {
|
||||
case "A0" -> {
|
||||
return PDRectangle.A0;
|
||||
@@ -75,7 +82,7 @@ public class PdfUtils {
|
||||
}
|
||||
}
|
||||
|
||||
public static List<RenderedImage> getAllImages(PDResources resources) throws IOException {
|
||||
public List<RenderedImage> getAllImages(PDResources resources) throws IOException {
|
||||
List<RenderedImage> images = new ArrayList<>();
|
||||
|
||||
for (COSName name : resources.getXObjectNames()) {
|
||||
@@ -92,7 +99,7 @@ public class PdfUtils {
|
||||
return images;
|
||||
}
|
||||
|
||||
public static boolean hasImages(PDDocument document, String pagesToCheck) throws IOException {
|
||||
public boolean hasImages(PDDocument document, String pagesToCheck) throws IOException {
|
||||
String[] pageOrderArr = pagesToCheck.split(",");
|
||||
List<Integer> pageList =
|
||||
GeneralUtils.parsePageList(pageOrderArr, document.getNumberOfPages());
|
||||
@@ -107,7 +114,7 @@ public class PdfUtils {
|
||||
return false;
|
||||
}
|
||||
|
||||
public static boolean hasText(PDDocument document, String pageNumbersToCheck, String phrase)
|
||||
public boolean hasText(PDDocument document, String pageNumbersToCheck, String phrase)
|
||||
throws IOException {
|
||||
String[] pageOrderArr = pageNumbersToCheck.split(",");
|
||||
List<Integer> pageList =
|
||||
@@ -123,11 +130,11 @@ public class PdfUtils {
|
||||
return false;
|
||||
}
|
||||
|
||||
public static boolean hasImagesOnPage(PDPage page) throws IOException {
|
||||
public boolean hasImagesOnPage(PDPage page) throws IOException {
|
||||
return getAllImages(page.getResources()).size() > 0;
|
||||
}
|
||||
|
||||
public static boolean hasTextOnPage(PDPage page, String phrase) throws IOException {
|
||||
public boolean hasTextOnPage(PDPage page, String phrase) throws IOException {
|
||||
PDFTextStripper textStripper = new PDFTextStripper();
|
||||
PDDocument tempDoc = new PDDocument();
|
||||
tempDoc.addPage(page);
|
||||
@@ -136,7 +143,7 @@ public class PdfUtils {
|
||||
return pageText.contains(phrase);
|
||||
}
|
||||
|
||||
public static byte[] convertFromPdf(
|
||||
public byte[] convertFromPdf(
|
||||
CustomPDFDocumentFactory pdfDocumentFactory,
|
||||
byte[] inputStream,
|
||||
String imageType,
|
||||
@@ -379,7 +386,7 @@ public class PdfUtils {
|
||||
* @return converted document to PDF-Image
|
||||
* @throws IOException if conversion fails
|
||||
*/
|
||||
public static PDDocument convertPdfToPdfImage(PDDocument document) throws IOException {
|
||||
public PDDocument convertPdfToPdfImage(PDDocument document) throws IOException {
|
||||
PDDocument imageDocument = new PDDocument();
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
pdfRenderer.setSubsamplingAllowed(true);
|
||||
@@ -428,8 +435,7 @@ public class PdfUtils {
|
||||
return imageDocument;
|
||||
}
|
||||
|
||||
private static BufferedImage prepareImageForPdfToImage(
|
||||
int maxWidth, int height, String imageType) {
|
||||
private BufferedImage prepareImageForPdfToImage(int maxWidth, int height, String imageType) {
|
||||
BufferedImage combined;
|
||||
if ("png".equalsIgnoreCase(imageType)) {
|
||||
combined = new BufferedImage(maxWidth, height, BufferedImage.TYPE_INT_ARGB);
|
||||
@@ -445,7 +451,7 @@ public class PdfUtils {
|
||||
return combined;
|
||||
}
|
||||
|
||||
public static byte[] imageToPdf(
|
||||
public byte[] imageToPdf(
|
||||
MultipartFile[] files,
|
||||
String fitOption,
|
||||
boolean autoRotate,
|
||||
@@ -489,7 +495,7 @@ public class PdfUtils {
|
||||
}
|
||||
}
|
||||
|
||||
public static void addImageToDocument(
|
||||
public void addImageToDocument(
|
||||
PDDocument doc, PDImageXObject image, String fitOption, boolean autoRotate)
|
||||
throws IOException {
|
||||
boolean imageIsLandscape = image.getWidth() > image.getHeight();
|
||||
@@ -539,7 +545,7 @@ public class PdfUtils {
|
||||
}
|
||||
}
|
||||
|
||||
public static byte[] overlayImage(
|
||||
public byte[] overlayImage(
|
||||
CustomPDFDocumentFactory pdfDocumentFactory,
|
||||
byte[] pdfBytes,
|
||||
byte[] imageBytes,
|
||||
@@ -586,8 +592,11 @@ public class PdfUtils {
|
||||
if (pagesToCheck == null || "all".equals(pagesToCheck)) {
|
||||
pdfText = new StringBuilder(textStripper.getText(pdfDocument));
|
||||
} else {
|
||||
// remove whitespaces
|
||||
pagesToCheck = pagesToCheck.replaceAll("\\s+", "");
|
||||
// remove whitespaces using cached pattern
|
||||
Pattern whitespacePattern =
|
||||
patternCache.getPattern(RegexPatternUtils.getWhitespaceRegex());
|
||||
Matcher whitespaceMatcher = whitespacePattern.matcher(pagesToCheck);
|
||||
pagesToCheck = whitespaceMatcher.replaceAll("");
|
||||
|
||||
String[] splitPoints = pagesToCheck.split(",");
|
||||
for (String splitPoint : splitPoints) {
|
||||
|
||||
@@ -0,0 +1,506 @@
|
||||
package stirling.software.common.util;
|
||||
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public final class RegexPatternUtils {
|
||||
|
||||
private static final RegexPatternUtils INSTANCE = new RegexPatternUtils();
|
||||
private final ConcurrentHashMap<PatternKey, Pattern> patternCache = new ConcurrentHashMap<>();
|
||||
|
||||
private static final String WHITESPACE_REGEX = "\\s++";
|
||||
private static final String EXTENSION_REGEX = "\\.(?:[^.]*+)?$";
|
||||
|
||||
private RegexPatternUtils() {
|
||||
super();
|
||||
// Initialize with commonly used patterns for immediate availability
|
||||
precompileCommonPatterns();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the singleton instance of the pattern cache.
|
||||
*
|
||||
* @return the singleton RegexPatternCache instance
|
||||
*/
|
||||
public static RegexPatternUtils getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a compiled pattern from cache, compiling and caching if not present.
|
||||
*
|
||||
* <p>This method is thread-safe and uses lazy initialization. Multiple threads calling with the
|
||||
* same regex will result in only one compilation, with all threads receiving the same cached
|
||||
* Pattern instance.
|
||||
*
|
||||
* <p>Performance: first call compiles and caches (expensive), subsequent calls return cached
|
||||
* pattern (fast O(1) lookup).
|
||||
*
|
||||
* @param regex the regular expression string to compile
|
||||
* @return compiled Pattern object, never null
|
||||
* @throws PatternSyntaxException if the regex syntax is invalid
|
||||
* @throws IllegalArgumentException if regex is null
|
||||
*/
|
||||
public Pattern getPattern(String regex) {
|
||||
if (regex == null) {
|
||||
throw new IllegalArgumentException("Regex pattern cannot be null");
|
||||
}
|
||||
|
||||
return patternCache.computeIfAbsent(new PatternKey(regex, 0), this::compilePattern);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a compiled pattern with flags.
|
||||
*
|
||||
* <p>Patterns with different flags are cached separately using a composite key. Common flags
|
||||
* include:
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@link Pattern#CASE_INSENSITIVE} - ignore case differences
|
||||
* <li>{@link Pattern#MULTILINE} - ^ and $ match line boundaries
|
||||
* <li>{@link Pattern#DOTALL} - . matches any character including newlines
|
||||
* </ul>
|
||||
*
|
||||
* @param regex the regular expression string
|
||||
* @param flags pattern flags (e.g., Pattern.CASE_INSENSITIVE)
|
||||
* @return compiled Pattern object with specified flags
|
||||
* @throws PatternSyntaxException if the regex syntax is invalid
|
||||
* @throws IllegalArgumentException if regex is null
|
||||
*/
|
||||
public Pattern getPattern(String regex, int flags) {
|
||||
if (regex == null) {
|
||||
throw new IllegalArgumentException("Regex pattern cannot be null");
|
||||
}
|
||||
|
||||
return patternCache.computeIfAbsent(new PatternKey(regex, flags), this::compilePattern);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a pattern is already cached.
|
||||
*
|
||||
* @param regex the regular expression string
|
||||
* @return true if pattern is cached, false otherwise
|
||||
*/
|
||||
public boolean isCached(String regex) {
|
||||
return isCached(regex, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a pattern with flags is already cached.
|
||||
*
|
||||
* @param regex the regular expression string
|
||||
* @param flags pattern flags
|
||||
* @return true if pattern is cached, false otherwise
|
||||
*/
|
||||
public boolean isCached(String regex, int flags) {
|
||||
return regex != null && patternCache.containsKey(new PatternKey(regex, flags));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current cache size (number of cached patterns). Useful for monitoring and debugging.
|
||||
*
|
||||
* @return number of patterns currently cached
|
||||
*/
|
||||
public int getCacheSize() {
|
||||
return patternCache.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all cached patterns. Use sparingly as it forces recompilation of all patterns. Mainly
|
||||
* useful for testing or memory cleanup in long-running applications.
|
||||
*/
|
||||
public void clearCache() {
|
||||
patternCache.clear();
|
||||
log.debug("Regex pattern cache cleared");
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a specific pattern from cache.
|
||||
*
|
||||
* @param regex the regular expression string to remove
|
||||
* @return true if pattern was cached and removed, false otherwise
|
||||
*/
|
||||
public boolean removeFromCache(String regex) {
|
||||
return removeFromCache(regex, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a specific pattern with flags from cache.
|
||||
*
|
||||
* @param regex the regular expression string to remove
|
||||
* @param flags pattern flags
|
||||
* @return true if pattern was cached and removed, false otherwise
|
||||
*/
|
||||
public boolean removeFromCache(String regex, int flags) {
|
||||
if (regex == null) {
|
||||
return false;
|
||||
}
|
||||
PatternKey key = new PatternKey(regex, flags);
|
||||
boolean removed = patternCache.remove(key) != null;
|
||||
if (removed) {
|
||||
log.debug("Removed regex pattern from cache: {} (flags: {})", regex, flags);
|
||||
}
|
||||
return removed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal method to compile a pattern and handle errors consistently.
|
||||
*
|
||||
* @return compiled Pattern
|
||||
* @throws PatternSyntaxException if regex is invalid
|
||||
*/
|
||||
private Pattern compilePattern(PatternKey key) {
|
||||
String regex = key.regex;
|
||||
int flags = key.flags;
|
||||
|
||||
try {
|
||||
Pattern pattern = Pattern.compile(regex, flags);
|
||||
log.trace("Compiled and cached regex pattern with flags {}: {}", flags, regex);
|
||||
return pattern;
|
||||
} catch (PatternSyntaxException e) {
|
||||
log.error(
|
||||
"Invalid regex pattern: '{}' with flags {} - {}", regex, flags, e.getMessage());
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
public static String getWhitespaceRegex() {
|
||||
return WHITESPACE_REGEX;
|
||||
}
|
||||
|
||||
/** Creates a case-insensitive pattern for text searching */
|
||||
public Pattern createSearchPattern(String regex, boolean caseInsensitive) {
|
||||
int flags = caseInsensitive ? (Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE) : 0;
|
||||
return getPattern(regex, flags);
|
||||
}
|
||||
|
||||
/** Pattern for matching trailing slashes (e.g., "/path/to/dir///") */
|
||||
public Pattern getTrailingSlashesPattern() {
|
||||
return getPattern("/+$");
|
||||
}
|
||||
|
||||
/** Pattern for removing drive letters from paths */
|
||||
public Pattern getDriveLetterPattern() {
|
||||
return getPattern("^[a-zA-Z]:[\\\\/]+");
|
||||
}
|
||||
|
||||
/** Pattern for removing leading slashes from paths */
|
||||
public Pattern getLeadingSlashesPattern() {
|
||||
return getPattern("^[\\\\/]+");
|
||||
}
|
||||
|
||||
/** Pattern for matching backslashes */
|
||||
public Pattern getBackslashPattern() {
|
||||
return getPattern("\\\\");
|
||||
}
|
||||
|
||||
/** Pattern for sanitizing filenames by removing problematic characters */
|
||||
public Pattern getSafeFilenamePattern() {
|
||||
return getPattern("[/\\\\?%*:|\"<>]");
|
||||
}
|
||||
|
||||
/** Pattern for sanitizing filenames (keeps only alphanumeric) */
|
||||
public Pattern getFilenameSafePattern() {
|
||||
return getPattern("[^a-zA-Z0-9]");
|
||||
}
|
||||
|
||||
/**
|
||||
* Pattern for replacing non-alphanumeric characters with underscore (explicit underscore
|
||||
* variant)
|
||||
*/
|
||||
public Pattern getNonAlnumUnderscorePattern() {
|
||||
return getPattern("[^A-Za-z0-9_]");
|
||||
}
|
||||
|
||||
/** Pattern for collapsing multiple underscores */
|
||||
public Pattern getMultipleUnderscoresPattern() {
|
||||
return getPattern("_+");
|
||||
}
|
||||
|
||||
/** Pattern for trimming leading underscores */
|
||||
public Pattern getLeadingUnderscoresPattern() {
|
||||
return getPattern("^_+");
|
||||
}
|
||||
|
||||
/** Pattern for trimming trailing underscores */
|
||||
public Pattern getTrailingUnderscoresPattern() {
|
||||
return getPattern("_+$");
|
||||
}
|
||||
|
||||
/** Pattern for matching upload/download paths (case insensitive) */
|
||||
public Pattern getUploadDownloadPathPattern() {
|
||||
return getPattern("(?i).*/(upload|download)/.*");
|
||||
}
|
||||
|
||||
/** Pattern for matching one or more whitespace characters */
|
||||
public Pattern getWhitespacePattern() {
|
||||
return getPattern("\\s+");
|
||||
}
|
||||
|
||||
/** Pattern for matching newlines (Windows and Unix style) */
|
||||
public Pattern getNewlinesPattern() {
|
||||
return getPattern("\\r?\\n");
|
||||
}
|
||||
|
||||
/** Pattern for splitting on newlines (Windows and Unix style) */
|
||||
public Pattern getNewlineSplitPattern() {
|
||||
return getPattern("\\r?\\n");
|
||||
}
|
||||
|
||||
/** Pattern for splitting text into words */
|
||||
public Pattern getWordSplitPattern() {
|
||||
return getPattern("\\s+");
|
||||
}
|
||||
|
||||
/** Pattern for removing carriage returns */
|
||||
public Pattern getCarriageReturnPattern() {
|
||||
return getPattern("\\r");
|
||||
}
|
||||
|
||||
/** Pattern for matching newline characters */
|
||||
public Pattern getNewlineCharsPattern() {
|
||||
return getPattern("[\n\r]");
|
||||
}
|
||||
|
||||
/** Pattern for multi-format newline splitting (Windows, Mac, Unix) */
|
||||
public Pattern getMultiFormatNewlinePattern() {
|
||||
return getPattern("\r\n|\r|\n");
|
||||
}
|
||||
|
||||
/** Pattern for encoded payload newline removal */
|
||||
public Pattern getEncodedPayloadNewlinePattern() {
|
||||
return getPattern("\\r?\\n");
|
||||
}
|
||||
|
||||
/** Pattern for escaped newlines in watermark text */
|
||||
public Pattern getEscapedNewlinePattern() {
|
||||
return getPattern("\\\\n");
|
||||
}
|
||||
|
||||
/** Pattern for input sanitization (allows only alphanumeric and spaces) */
|
||||
public Pattern getInputSanitizePattern() {
|
||||
return getPattern("[^a-zA-Z0-9 ]");
|
||||
}
|
||||
|
||||
/** Pattern for removing angle brackets */
|
||||
public Pattern getAngleBracketsPattern() {
|
||||
return getPattern("[<>]");
|
||||
}
|
||||
|
||||
/** Pattern for removing leading and trailing quotes */
|
||||
public Pattern getQuotesRemovalPattern() {
|
||||
return getPattern("^\"|\"$");
|
||||
}
|
||||
|
||||
/** Pattern for plus signs (URL encoding replacement) */
|
||||
public Pattern getPlusSignPattern() {
|
||||
return getPattern("\\+");
|
||||
}
|
||||
|
||||
/** Pattern for username validation */
|
||||
public Pattern getUsernameValidationPattern() {
|
||||
return getPattern("^[a-zA-Z0-9](?!.*[-@._+]{2,})[a-zA-Z0-9@._+-]{1,48}[a-zA-Z0-9]$");
|
||||
}
|
||||
|
||||
public static String getExtensionRegex() {
|
||||
return EXTENSION_REGEX;
|
||||
}
|
||||
|
||||
/** Pattern for extracting non-numeric characters */
|
||||
public Pattern getNumericExtractionPattern() {
|
||||
return getPattern("\\D");
|
||||
}
|
||||
|
||||
/** Pattern for removing non-digit/dot characters (for timeout parsing) */
|
||||
public Pattern getNonDigitDotPattern() {
|
||||
return getPattern("[^\\d.]");
|
||||
}
|
||||
|
||||
/** Pattern for matching digit/dot characters (for timeout parsing) */
|
||||
public Pattern getDigitDotPattern() {
|
||||
return getPattern("[\\d.]");
|
||||
}
|
||||
|
||||
/** Pattern for detecting strings containing digits */
|
||||
public Pattern getContainsDigitsPattern() {
|
||||
return getPattern(".*\\d+.*");
|
||||
}
|
||||
|
||||
/** Pattern for matching 1-3 digit numbers */
|
||||
public Pattern getNumberRangePattern() {
|
||||
return getPattern("[1-9][0-9]{0,2}");
|
||||
}
|
||||
|
||||
/** Pattern for validating mathematical expressions */
|
||||
public Pattern getMathExpressionPattern() {
|
||||
return getPattern("[0-9n+\\-*/() ]+");
|
||||
}
|
||||
|
||||
/** Pattern for adding multiplication between numbers and 'n' */
|
||||
public Pattern getNumberBeforeNPattern() {
|
||||
return getPattern("(\\d)n");
|
||||
}
|
||||
|
||||
/** Pattern for detecting consecutive 'n' characters */
|
||||
public Pattern getConsecutiveNPattern() {
|
||||
return getPattern(".*n{2,}.*");
|
||||
}
|
||||
|
||||
/** Pattern for replacing consecutive 'n' characters */
|
||||
public Pattern getConsecutiveNReplacementPattern() {
|
||||
return getPattern("(?<!n)n{2}");
|
||||
}
|
||||
|
||||
/** Pattern for validating HTTP/HTTPS URLs */
|
||||
public Pattern getHttpUrlPattern() {
|
||||
return getPattern("^https?://.*");
|
||||
}
|
||||
|
||||
/** Pattern for matching URLs in text for link creation */
|
||||
public Pattern getUrlLinkPattern() {
|
||||
return getPattern("(https?://[\\w\\-._~:/?#\\[\\]@!$&'()*+,;=%]+)");
|
||||
}
|
||||
|
||||
/** Pattern for matching email addresses in text for link creation */
|
||||
public Pattern getEmailLinkPattern() {
|
||||
return getPattern("([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,63})");
|
||||
}
|
||||
|
||||
/** Pattern for removing script tags from HTML */
|
||||
public Pattern getScriptTagPattern() {
|
||||
return getPattern("(?i)<script[^>]*>.*?</script>");
|
||||
}
|
||||
|
||||
/** Pattern for removing style tags from HTML */
|
||||
public Pattern getStyleTagPattern() {
|
||||
return getPattern("(?i)<style[^>]*>.*?</style>");
|
||||
}
|
||||
|
||||
/** Pattern for removing fixed position CSS */
|
||||
public Pattern getFixedPositionCssPattern() {
|
||||
return getPattern("(?i)\\s*position\\s*:\\s*fixed[^;]*;?");
|
||||
}
|
||||
|
||||
/** Pattern for removing absolute position CSS */
|
||||
public Pattern getAbsolutePositionCssPattern() {
|
||||
return getPattern("(?i)\\s*position\\s*:\\s*absolute[^;]*;?");
|
||||
}
|
||||
|
||||
/** Pattern for matching size unit suffixes (KB, MB, GB, etc.) */
|
||||
public Pattern getSizeUnitPattern() {
|
||||
return getPattern("[KMGkmg][Bb]");
|
||||
}
|
||||
|
||||
/** Pattern for system temp file type 1 */
|
||||
public Pattern getSystemTempFile1Pattern() {
|
||||
return getPattern("lu\\d+[a-z0-9]*\\.tmp");
|
||||
}
|
||||
|
||||
/** Pattern for system temp file type 2 (OCR processes) */
|
||||
public Pattern getSystemTempFile2Pattern() {
|
||||
return getPattern("ocr_process\\d+");
|
||||
}
|
||||
|
||||
/** Pattern for splitting on whitespace and parentheses */
|
||||
public Pattern getWhitespaceParenthesesSplitPattern() {
|
||||
return getPattern("[\\s\\(\\)]+");
|
||||
}
|
||||
|
||||
/** Pattern for MIME header whitespace cleanup before encoded sequences */
|
||||
public Pattern getMimeHeaderWhitespacePattern() {
|
||||
return getPattern("\\s+(?==\\?)");
|
||||
}
|
||||
|
||||
/** Pattern for font name validation (6 uppercase letters + plus + rest) */
|
||||
public Pattern getFontNamePattern() {
|
||||
return getPattern("^[A-Z]{6}\\+.*");
|
||||
}
|
||||
|
||||
/** Pattern for matching access="readOnly" attribute in XFA XML (with optional whitespace) */
|
||||
public Pattern getAccessReadOnlyPattern() {
|
||||
return getPattern("access\\s*=\\s*\"readOnly\"");
|
||||
}
|
||||
|
||||
/** Pattern for matching MIME encoded-word headers (RFC 2047) Example: =?charset?B?encoded?= */
|
||||
public Pattern getMimeEncodedWordPattern() {
|
||||
return getPattern("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?=");
|
||||
}
|
||||
|
||||
/** Pattern for matching inline CID images in HTML (case-insensitive) */
|
||||
public Pattern getInlineCidImagePattern() {
|
||||
return getPattern(
|
||||
"(?i)<img[^>]*\\ssrc\\s*=\\s*['\"]cid:([^'\"]+)['\"][^>]*>",
|
||||
Pattern.CASE_INSENSITIVE);
|
||||
}
|
||||
|
||||
/** Pattern for matching attachment section headers (case-insensitive) */
|
||||
public Pattern getAttachmentSectionPattern() {
|
||||
return getPattern("attachments\\s*\\(\\d+\\)", Pattern.CASE_INSENSITIVE);
|
||||
}
|
||||
|
||||
/** Pattern for matching filenames in attachment markers */
|
||||
public Pattern getAttachmentFilenamePattern() {
|
||||
return getPattern("@\\s*([^\\s\\(]+(?:\\.[a-zA-Z0-9]+)?)");
|
||||
}
|
||||
|
||||
// API doc parsing patterns
|
||||
|
||||
/**
|
||||
* Pre-compile commonly used patterns for immediate availability. This eliminates first-call
|
||||
* compilation overhead for frequent patterns.
|
||||
*/
|
||||
private void precompileCommonPatterns() {
|
||||
getPattern("\\.(?:[^.]*+)?$"); // Extension removal - possessive, optional, anchored
|
||||
getPattern("\\.[^.]+$"); // Simple extension match - anchored
|
||||
|
||||
getPattern("\\s+"); // One or more whitespace
|
||||
getPattern("\\s*"); // Zero or more whitespace
|
||||
|
||||
getPattern("/+$"); // Trailing slashes
|
||||
getPattern("\\D"); // Non-numeric characters
|
||||
getPattern("[/\\\\?%*:|\"<>]"); // Unsafe filename characters
|
||||
getPattern("[^a-zA-Z0-9 ]"); // Input sanitization
|
||||
getPattern("[^a-zA-Z0-9]"); // Filename sanitization
|
||||
// API doc patterns
|
||||
getPattern("Output:(\\w+)"); // precompiled single-escaped for runtime regex \w
|
||||
getPattern("Input:(\\w+)");
|
||||
getPattern("Type:(\\w+)");
|
||||
log.debug("Pre-compiled {} common regex patterns", patternCache.size());
|
||||
}
|
||||
|
||||
/** Pattern for email validation */
|
||||
public Pattern getEmailValidationPattern() {
|
||||
return getPattern(
|
||||
"^(?=.{1,320}$)(?=.{1,64}@)[A-Za-z0-9](?:[A-Za-z0-9_.+-]*[A-Za-z0-9])?@[^-][A-Za-z0-9-]+(?:\\.[A-Za-z0-9-]+)*(?:\\.[A-Za-z]{2,})$");
|
||||
}
|
||||
|
||||
/* Pattern for matching Output:<TYPE> in API descriptions */
|
||||
public Pattern getApiDocOutputTypePattern() {
|
||||
return getPattern("Output:(\\w+)");
|
||||
}
|
||||
|
||||
/* Pattern for matching Input:<TYPE> in API descriptions */
|
||||
public Pattern getApiDocInputTypePattern() {
|
||||
return getPattern("Input:(\\w+)");
|
||||
}
|
||||
|
||||
/**
|
||||
* Pattern for matching Type:<CODE> in API descriptions
|
||||
*/
|
||||
public Pattern getApiDocTypePattern() {
|
||||
return getPattern("Type:(\\w+)");
|
||||
}
|
||||
|
||||
/* Pattern for validating file extensions (2-4 alphanumeric, case-insensitive) */
|
||||
public Pattern getFileExtensionValidationPattern() {
|
||||
return getPattern("^[a-zA-Z0-9]{2,4}$", Pattern.CASE_INSENSITIVE);
|
||||
}
|
||||
|
||||
private record PatternKey(String regex, int flags) {
|
||||
// Record automatically provides equals, hashCode, and toString
|
||||
}
|
||||
}
|
||||
@@ -50,7 +50,10 @@ public class WebResponseUtils {
|
||||
headers.setContentType(mediaType);
|
||||
headers.setContentLength(bytes.length);
|
||||
String encodedDocName =
|
||||
URLEncoder.encode(docName, StandardCharsets.UTF_8).replaceAll("\\+", "%20");
|
||||
RegexPatternUtils.getInstance()
|
||||
.getPlusSignPattern()
|
||||
.matcher(URLEncoder.encode(docName, StandardCharsets.UTF_8))
|
||||
.replaceAll("%20");
|
||||
headers.setContentDispositionFormData("attachment", encodedDocName);
|
||||
return new ResponseEntity<>(bytes, headers, HttpStatus.OK);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user