feat: Add RegexPatternUtils for centralized regex management, file naming funcs, UtilityClass annotation (#4218)

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
This commit is contained in:
Balázs Szücs
2025-09-28 17:56:35 +02:00
committed by GitHub
parent 133e6d3de6
commit 045f4cc591
78 changed files with 1947 additions and 617 deletions

View File

@@ -22,6 +22,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.job.JobResponse;
import stirling.software.common.util.ExecutorFactory;
import stirling.software.common.util.RegexPatternUtils;
/** Service for executing jobs asynchronously or synchronously */
@Service
@@ -426,8 +427,16 @@ public class JobExecutorService {
}
try {
String value = timeout.replaceAll("[^\\d.]", "");
String unit = timeout.replaceAll("[\\d.]", "");
String value =
RegexPatternUtils.getInstance()
.getNonDigitDotPattern()
.matcher(timeout)
.replaceAll("");
String unit =
RegexPatternUtils.getInstance()
.getDigitDotPattern()
.matcher(timeout)
.replaceAll("");
double numericValue = Double.parseDouble(value);

View File

@@ -13,6 +13,7 @@ import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.ApplicationProperties;
import stirling.software.common.util.RegexPatternUtils;
@Service
@RequiredArgsConstructor
@@ -22,8 +23,9 @@ public class SsrfProtectionService {
private final ApplicationProperties applicationProperties;
private static final Pattern DATA_URL_PATTERN =
Pattern.compile("^data:.*", Pattern.CASE_INSENSITIVE);
private static final Pattern FRAGMENT_PATTERN = Pattern.compile("^#.*");
RegexPatternUtils.getInstance().getPattern("^data:.*", Pattern.CASE_INSENSITIVE);
private static final Pattern FRAGMENT_PATTERN =
RegexPatternUtils.getInstance().getPattern("^#.*");
public enum SsrfProtectionLevel {
OFF, // No SSRF protection - allows all URLs

View File

@@ -23,6 +23,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.ApplicationProperties;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.RegexPatternUtils;
import stirling.software.common.util.TempFileManager;
import stirling.software.common.util.TempFileRegistry;
@@ -61,8 +62,14 @@ public class TempFileCleanupService {
// File patterns that identify common system temp files
private static final Predicate<String> IS_SYSTEM_TEMP_FILE =
fileName ->
fileName.matches("lu\\d+[a-z0-9]*\\.tmp")
|| fileName.matches("ocr_process\\d+")
RegexPatternUtils.getInstance()
.getSystemTempFile1Pattern()
.matcher(fileName)
.matches()
|| RegexPatternUtils.getInstance()
.getSystemTempFile2Pattern()
.matcher(fileName)
.matches()
|| (fileName.startsWith("tmp") && !fileName.contains("jetty"))
|| fileName.startsWith("OSL_PIPE_")
|| (fileName.endsWith(".tmp") && !fileName.contains("jetty"));

View File

@@ -29,7 +29,7 @@ public class EmlParser {
private static volatile boolean mimeUtilityChecked = false;
private static final Pattern MIME_ENCODED_PATTERN =
Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?=");
RegexPatternUtils.getInstance().getMimeEncodedWordPattern();
private static final String DISPOSITION_ATTACHMENT = "attachment";
private static final String TEXT_PLAIN = MediaType.TEXT_PLAIN_VALUE;
@@ -357,7 +357,11 @@ public class EmlParser {
for (String contentIdHeader : contentIdHeaders) {
if (contentIdHeader != null && !contentIdHeader.trim().isEmpty()) {
attachment.setEmbedded(true);
String contentId = contentIdHeader.trim().replaceAll("[<>]", "");
String contentId =
RegexPatternUtils.getInstance()
.getAngleBracketsPattern()
.matcher(contentIdHeader.trim())
.replaceAll("");
attachment.setContentId(contentId);
break;
}
@@ -414,7 +418,8 @@ public class EmlParser {
private static String extractBasicHeader(String emlContent, String headerName) {
try {
String[] lines = emlContent.split("\r?\n");
String[] lines =
RegexPatternUtils.getInstance().getNewlineSplitPattern().split(emlContent);
for (int i = 0; i < lines.length; i++) {
String line = lines[i];
if (line.toLowerCase().startsWith(headerName.toLowerCase())) {
@@ -485,7 +490,10 @@ public class EmlParser {
}
private static int findPartEnd(String content, int start) {
String[] lines = content.substring(start).split("\r?\n");
String[] lines =
RegexPatternUtils.getInstance()
.getNewlineSplitPattern()
.split(content.substring(start));
StringBuilder result = new StringBuilder();
for (String line : lines) {
@@ -499,7 +507,8 @@ public class EmlParser {
private static List<EmailAttachment> extractAttachmentsBasic(String emlContent) {
List<EmailAttachment> attachments = new ArrayList<>();
try {
String[] lines = emlContent.split("\r?\n");
String[] lines =
RegexPatternUtils.getInstance().getNewlineSplitPattern().split(emlContent);
boolean inHeaders = true;
String currentContentType = "";
String currentDisposition = "";
@@ -562,7 +571,11 @@ public class EmlParser {
if (filenameStarEnd == -1) filenameStarEnd = disposition.length();
String extendedFilename =
disposition.substring(filenameStarStart, filenameStarEnd).trim();
extendedFilename = extendedFilename.replaceAll("^\"|\"$", "");
extendedFilename =
RegexPatternUtils.getInstance()
.getQuotesRemovalPattern()
.matcher(extendedFilename)
.replaceAll("");
if (extendedFilename.contains("'")) {
String[] parts = extendedFilename.split("'", 3);
@@ -577,7 +590,11 @@ public class EmlParser {
int filenameEnd = disposition.indexOf(";", filenameStart);
if (filenameEnd == -1) filenameEnd = disposition.length();
String filename = disposition.substring(filenameStart, filenameEnd).trim();
filename = filename.replaceAll("^\"|\"$", "");
filename =
RegexPatternUtils.getInstance()
.getQuotesRemovalPattern()
.matcher(filename)
.replaceAll("");
return safeMimeDecode(filename);
}
@@ -630,11 +647,23 @@ public class EmlParser {
private List<EmailAttachment> attachments = new ArrayList<>();
public void setHtmlBody(String htmlBody) {
this.htmlBody = htmlBody != null ? htmlBody.replaceAll("\r", "") : null;
this.htmlBody =
htmlBody != null
? RegexPatternUtils.getInstance()
.getCarriageReturnPattern()
.matcher(htmlBody)
.replaceAll("")
: null;
}
public void setTextBody(String textBody) {
this.textBody = textBody != null ? textBody.replaceAll("\r", "") : null;
this.textBody =
textBody != null
? RegexPatternUtils.getInstance()
.getCarriageReturnPattern()
.matcher(textBody)
.replaceAll("")
: null;
}
}

View File

@@ -199,8 +199,16 @@ public class EmlProcessingUtils {
String processed =
customHtmlSanitizer != null ? customHtmlSanitizer.sanitize(htmlBody) : htmlBody;
processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*fixed[^;]*;?", "");
processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*absolute[^;]*;?", "");
processed =
RegexPatternUtils.getInstance()
.getFixedPositionCssPattern()
.matcher(processed)
.replaceAll("");
processed =
RegexPatternUtils.getInstance()
.getAbsolutePositionCssPattern()
.matcher(processed)
.replaceAll("");
if (emailContent != null && !emailContent.getAttachments().isEmpty()) {
processed = PdfAttachmentHandler.processInlineImages(processed, emailContent);
@@ -222,14 +230,18 @@ public class EmlProcessingUtils {
html = html.replace("\n", "<br>\n");
html =
html.replaceAll(
"(https?://[\\w\\-._~:/?#\\[\\]@!$&'()*+,;=%]+)",
"<a href=\"$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
RegexPatternUtils.getInstance()
.getUrlLinkPattern()
.matcher(html)
.replaceAll(
"<a href=\"$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
html =
html.replaceAll(
"([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,63})",
"<a href=\"mailto:$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
RegexPatternUtils.getInstance()
.getEmailLinkPattern()
.matcher(html)
.replaceAll(
"<a href=\"mailto:$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
return html;
}
@@ -490,9 +502,13 @@ public class EmlProcessingUtils {
Matcher concatenatedMatcher = concatenatedPattern.matcher(encodedText);
String processedText =
concatenatedMatcher.replaceAll(
match -> match.group().replaceAll("\\s+(?==\\?)", ""));
match ->
RegexPatternUtils.getInstance()
.getMimeHeaderWhitespacePattern()
.matcher(match.group())
.replaceAll(""));
Pattern mimePattern = Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?=");
Pattern mimePattern = RegexPatternUtils.getInstance().getMimeEncodedWordPattern();
Matcher matcher = mimePattern.matcher(processedText);
int lastEnd = 0;
@@ -507,7 +523,11 @@ public class EmlProcessingUtils {
String decodedValue =
switch (encoding) {
case "B" -> {
String cleanBase64 = encodedValue.replaceAll("\\s", "");
String cleanBase64 =
RegexPatternUtils.getInstance()
.getWhitespacePattern()
.matcher(encodedValue)
.replaceAll("");
byte[] decodedBytes = Base64.getDecoder().decode(cleanBase64);
Charset targetCharset;
try {
@@ -596,8 +616,16 @@ public class EmlProcessingUtils {
}
public static String simplifyHtmlContent(String htmlContent) {
String simplified = htmlContent.replaceAll("(?i)<script[^>]*>.*?</script>", "");
simplified = simplified.replaceAll("(?i)<style[^>]*>.*?</style>", "");
String simplified =
RegexPatternUtils.getInstance()
.getScriptTagPattern()
.matcher(htmlContent)
.replaceAll("");
simplified =
RegexPatternUtils.getInstance()
.getStyleTagPattern()
.matcher(simplified)
.replaceAll("");
return simplified;
}
}

View File

@@ -1,6 +1,9 @@
package stirling.software.common.util;
import java.io.*;
import java.io.ByteArrayInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
@@ -205,15 +208,27 @@ public class FileToPdf {
return "";
}
// Remove any drive letters (e.g., "C:\") and leading forward/backslashes
entryName = entryName.replaceAll("^[a-zA-Z]:[\\\\/]+", "");
entryName = entryName.replaceAll("^[\\\\/]+", "");
entryName =
RegexPatternUtils.getInstance()
.getDriveLetterPattern()
.matcher(entryName)
.replaceAll("");
entryName =
RegexPatternUtils.getInstance()
.getLeadingSlashesPattern()
.matcher(entryName)
.replaceAll("");
// Recursively remove path traversal sequences
while (entryName.contains("../") || entryName.contains("..\\")) {
entryName = entryName.replace("../", "").replace("..\\", "");
}
// Normalize all backslashes to forward slashes
entryName = entryName.replaceAll("\\\\", "/");
entryName =
RegexPatternUtils.getInstance()
.getBackslashPattern()
.matcher(entryName)
.replaceAll("/");
return entryName;
}
}

View File

@@ -9,13 +9,9 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.UUID;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;
@@ -28,25 +24,37 @@ import com.fathzer.soft.javaluator.DoubleEvaluator;
import io.github.pixee.security.HostValidator;
import io.github.pixee.security.Urls;
import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
import stirling.software.common.configuration.InstallationPathConfig;
@Slf4j
@UtilityClass
public class GeneralUtils {
private static final Set<String> DEFAULT_VALID_SCRIPTS =
Set.of("png_to_webp.py", "split_photos.py");
private static final Set<String> DEFAULT_VALID_PIPELINE =
private final Set<String> DEFAULT_VALID_SCRIPTS = Set.of("png_to_webp.py", "split_photos.py");
private final Set<String> DEFAULT_VALID_PIPELINE =
Set.of(
"OCR images.json",
"Prepare-pdfs-for-email.json",
"split-rotate-auto-rename.json");
private static final String DEFAULT_WEBUI_CONFIGS_DIR = "defaultWebUIConfigs";
private static final String PYTHON_SCRIPTS_DIR = "python";
private final String DEFAULT_WEBUI_CONFIGS_DIR = "defaultWebUIConfigs";
private final String PYTHON_SCRIPTS_DIR = "python";
private final RegexPatternUtils patternCache = RegexPatternUtils.getInstance();
// Valid size units used for convertSizeToBytes validation and parsing
private final Set<String> VALID_SIZE_UNITS = Set.of("B", "KB", "MB", "GB", "TB");
public static File convertMultipartFileToFile(MultipartFile multipartFile) throws IOException {
/*
* Converts a MultipartFile to a regular File with improved performance and security.
*
* @param multipartFile the multipart file to convert
* @return temporary File containing the multipart file data
* @throws IOException if I/O error occurs during conversion
* @throws IllegalArgumentException if file exceeds maximum allowed size
*/
public File convertMultipartFileToFile(MultipartFile multipartFile) throws IOException {
String customTempDir = System.getenv("STIRLING_TEMPFILES_DIRECTORY");
if (customTempDir == null || customTempDir.isEmpty()) {
customTempDir = System.getProperty("stirling.tempfiles.directory");
@@ -81,10 +89,137 @@ public class GeneralUtils {
return tempFile;
}
public static void deleteDirectory(Path path) throws IOException {
/*
* Gets the configured temporary directory, creating it if necessary.
*
* @return Path to the temporary directory
* @throws IOException if directory creation fails
*/
private Path getTempDirectory() throws IOException {
String customTempDir = System.getenv("STIRLING_TEMPFILES_DIRECTORY");
if (customTempDir == null || customTempDir.isEmpty()) {
customTempDir = System.getProperty("stirling.tempfiles.directory");
}
Path tempDir;
if (customTempDir != null && !customTempDir.isEmpty()) {
tempDir = Path.of(customTempDir);
} else {
tempDir = Path.of(System.getProperty("java.io.tmpdir"), "stirling-pdf");
}
if (!Files.exists(tempDir)) {
Files.createDirectories(tempDir);
}
return tempDir;
}
/*
* Remove file extension
*
* <p>Uses fast string operations for common cases (valid extensions) and falls back to
* optimized regex for edge cases (no extension, hidden files, etc.).
*
* <ul>
* <li>String operations avoid regex engine overhead for common cases
* <li>Cached pattern compilation eliminates recompilation costs
* <li>Fresh Matcher instances ensure thread safety
* </ul>
*
* @param filename the filename to process, may be null
* @return filename without extension, or "default" if input is null
*/
public String removeExtension(String filename) {
if (filename == null) {
return "default";
}
if (filename.isEmpty()) {
return filename;
}
int dotIndex = filename.lastIndexOf('.');
if (dotIndex > 0 && dotIndex < filename.length() - 1) {
return filename.substring(0, dotIndex);
}
if (dotIndex == 0 || dotIndex == filename.length() - 1 || dotIndex == -1) {
return filename;
}
Pattern pattern = patternCache.getPattern(RegexPatternUtils.getExtensionRegex());
Matcher matcher = pattern.matcher(filename);
return matcher.find() ? matcher.replaceFirst("") : filename;
}
/*
* Append suffix to base name with null safety.
*
* @param baseName the base filename, null becomes "default"
* @param suffix the suffix to append, null becomes empty string
* @return concatenated string with null safety
*/
public String appendSuffix(String baseName, String suffix) {
return (baseName == null ? "default" : baseName) + (suffix != null ? suffix : "");
}
/*
* Generate a PDF filename by removing extension from first file and adding suffix.
*
* <p>High-level utility method for common PDF naming scenarios. Handles null safety and uses
* extension removal.
*
* @param firstFilename the filename of the first file being, may be null
* @param suffix the suffix to append (e.g., "_merged.pdf")
* @return filename with suffix, or default name if input is null
*/
public String generateFilename(String firstFilename, String suffix) {
String baseName = removeExtension(firstFilename);
return appendSuffix(baseName, suffix);
}
/*
* Process a list of filenames by removing extensions and adding suffix.
*
* <p>Efficiently processes multiple filenames using streaming operations and bulk operations
* where possible. Handles null safety for both input list and individual filenames.
*
* @param filenames the list of filenames to process, may be null
* @param suffix the suffix to append to each processed filename
* @param processor consumer to handle each processed filename, may be null
*/
public void processFilenames(
List<String> filenames, String suffix, java.util.function.Consumer<String> processor) {
if (filenames == null || processor == null) {
return;
}
filenames.stream()
.map(filename -> appendSuffix(removeExtension(filename), suffix))
.forEach(processor);
}
/*
* Extract title from filename by removing extension, with fallback handling.
*
* <p>Returns "Untitled" for null or empty filenames, otherwise removes the extension using the
* optimized removeExtension method.
*
* @param filename the filename to extract title from, may be null
* @return the title without extension, or "Untitled" if input is null/empty
*/
public String getTitleFromFilename(String filename) {
if (filename == null || filename.isEmpty()) {
return "Untitled";
}
return removeExtension(filename);
}
public void deleteDirectory(Path path) throws IOException {
Files.walkFileTree(
path,
new SimpleFileVisitor<Path>() {
new SimpleFileVisitor<>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
throws IOException {
@@ -101,8 +236,18 @@ public class GeneralUtils {
});
}
public static String convertToFileName(String name) {
String safeName = name.replaceAll("[^a-zA-Z0-9]", "_");
public String convertToFileName(String name) {
if (name == null) return "_";
StringBuilder safeNameBuilder = new StringBuilder(name.length());
for (int i = 0; i < name.length(); i++) {
char c = name.charAt(i);
if (Character.isLetterOrDigit(c)) {
safeNameBuilder.append(c);
} else {
safeNameBuilder.append('_');
}
}
String safeName = safeNameBuilder.toString();
if (safeName.length() > 50) {
safeName = safeName.substring(0, 50);
}
@@ -110,19 +255,20 @@ public class GeneralUtils {
}
// Get resources from a location pattern
public static Resource[] getResourcesFromLocationPattern(
public Resource[] getResourcesFromLocationPattern(
String locationPattern, ResourceLoader resourceLoader) throws Exception {
// Normalize the path for file resources
if (locationPattern.startsWith("file:")) {
String rawPath = locationPattern.substring(5).replace("\\*", "").replace("/*", "");
String pattern = locationPattern;
if (pattern.startsWith("file:")) {
String rawPath = pattern.substring(5).replace("\\*", "").replace("/*", "");
Path normalizePath = Paths.get(rawPath).normalize();
locationPattern = "file:" + normalizePath.toString().replace("\\", "/") + "/*";
pattern = "file:" + normalizePath.toString().replace("\\", "/") + "/*";
}
return ResourcePatternUtils.getResourcePatternResolver(resourceLoader)
.getResources(locationPattern);
.getResources(pattern);
}
public static boolean isValidURL(String urlStr) {
public boolean isValidURL(String urlStr) {
try {
Urls.create(
urlStr, Urls.HTTP_PROTOCOLS, HostValidator.DENY_COMMON_INFRASTRUCTURE_TARGETS);
@@ -132,7 +278,25 @@ public class GeneralUtils {
}
}
public static boolean isURLReachable(String urlStr) {
/*
* Checks if a URL is reachable with proper timeout configuration and error handling.
*
* @param urlStr the URL string to check
* @return true if URL is reachable, false otherwise
*/
public boolean isURLReachable(String urlStr) {
return isURLReachable(urlStr, 5000, 5000);
}
/*
* Checks if a URL is reachable with configurable timeouts.
*
* @param urlStr the URL string to check
* @param connectTimeout connection timeout in milliseconds
* @param readTimeout read timeout in milliseconds
* @return true if URL is reachable, false otherwise
*/
public boolean isURLReachable(String urlStr, int connectTimeout, int readTimeout) {
try {
// Parse the URL
URL url = URI.create(urlStr).toURL();
@@ -152,16 +316,19 @@ public class GeneralUtils {
// Check if the URL is reachable
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestMethod("HEAD");
// connection.setConnectTimeout(5000); // Set connection timeout
// connection.setReadTimeout(5000); // Set read timeout
connection.setConnectTimeout(connectTimeout);
connection.setReadTimeout(readTimeout);
connection.setInstanceFollowRedirects(false); // Security: prevent redirect loops
int responseCode = connection.getResponseCode();
return (200 <= responseCode && responseCode <= 399);
} catch (Exception e) {
log.debug("URL {} is not reachable: {}", urlStr, e.getMessage());
return false; // Return false in case of any exception
}
}
private static boolean isLocalAddress(String host) {
private boolean isLocalAddress(String host) {
try {
// Resolve DNS to IP address
InetAddress address = InetAddress.getByName(host);
@@ -181,7 +348,14 @@ public class GeneralUtils {
}
}
public static File multipartToFile(MultipartFile multipart) throws IOException {
/*
* Improved multipart file conversion using the shared helper method.
*
* @param multipart the multipart file to convert
* @return temporary File containing the multipart file data
* @throws IOException if I/O error occurs during conversion
*/
public File multipartToFile(MultipartFile multipart) throws IOException {
Path tempFile = Files.createTempFile("overlay-", ".pdf");
try (InputStream in = multipart.getInputStream();
FileOutputStream out = new FileOutputStream(tempFile.toFile())) {
@@ -194,54 +368,105 @@ public class GeneralUtils {
return tempFile.toFile();
}
public static Long convertSizeToBytes(String sizeStr) {
/*
* Supports TB/PB units and provides detailed error messages.
*
* @param sizeStr the size string to convert (e.g., "100MB", "1.5GB")
* @param defaultUnit the default unit to assume if none specified ("MB", "GB", etc.)
* @return size in bytes, or null if parsing fails
* @throws IllegalArgumentException if defaultUnit is invalid
*/
public Long convertSizeToBytes(String sizeStr, String defaultUnit) {
if (sizeStr == null) {
return null;
}
if (defaultUnit != null && !isValidSizeUnit(defaultUnit)) {
throw new IllegalArgumentException("Invalid default unit: " + defaultUnit);
}
sizeStr = sizeStr.trim().toUpperCase();
sizeStr = sizeStr.replace(",", ".").replace(" ", "");
try {
if (sizeStr.endsWith("KB")) {
return (long)
(Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2)) * 1024);
} else if (sizeStr.endsWith("MB")) {
if (sizeStr.endsWith("TB")) {
return (long)
(Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2))
* 1024
* 1024);
* 1024L
* 1024L
* 1024L
* 1024L);
} else if (sizeStr.endsWith("GB")) {
return (long)
(Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2))
* 1024
* 1024
* 1024);
} else if (sizeStr.endsWith("B")) {
* 1024L
* 1024L
* 1024L);
} else if (sizeStr.endsWith("MB")) {
return (long)
(Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2))
* 1024L
* 1024L);
} else if (sizeStr.endsWith("KB")) {
return (long)
(Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2)) * 1024L);
} else if (!sizeStr.isEmpty() && sizeStr.charAt(sizeStr.length() - 1) == 'B') {
return Long.parseLong(sizeStr.substring(0, sizeStr.length() - 1));
} else {
// Assume MB if no unit is specified
return (long) (Double.parseDouble(sizeStr) * 1024 * 1024);
// Use provided default unit or fall back to MB
String unit = defaultUnit != null ? defaultUnit.toUpperCase() : "MB";
double value = Double.parseDouble(sizeStr);
return switch (unit) {
case "TB" -> (long) (value * 1024L * 1024L * 1024L * 1024L);
case "GB" -> (long) (value * 1024L * 1024L * 1024L);
case "MB" -> (long) (value * 1024L * 1024L);
case "KB" -> (long) (value * 1024L);
case "B" -> (long) value;
default -> (long) (value * 1024L * 1024L); // Default to MB
};
}
} catch (NumberFormatException e) {
// The numeric part of the input string cannot be parsed, handle this case
log.warn("Failed to parse size string '{}': {}", sizeStr, e.getMessage());
return null;
}
return null;
}
public static String formatBytes(long bytes) {
/*
* Converts size string to bytes using MB as default unit.
*
* @param sizeStr the size string to convert
* @return size in bytes, or null if parsing fails
*/
public Long convertSizeToBytes(String sizeStr) {
return convertSizeToBytes(sizeStr, "MB");
}
/* Validates if a string represents a valid size unit. */
private boolean isValidSizeUnit(String unit) {
// Use a precomputed Set for O(1) lookup, normalize using a locale-safe toUpperCase
return unit != null && VALID_SIZE_UNITS.contains(unit.toUpperCase(Locale.ROOT));
}
/* Enhanced byte formatting with TB/PB support and better precision. */
public String formatBytes(long bytes) {
if (bytes < 0) {
return "Invalid size";
}
if (bytes < 1024) {
return bytes + " B";
} else if (bytes < 1024 * 1024) {
} else if (bytes < 1024L * 1024L) {
return String.format(Locale.US, "%.2f KB", bytes / 1024.0);
} else if (bytes < 1024 * 1024 * 1024) {
} else if (bytes < 1024L * 1024L * 1024L) {
return String.format(Locale.US, "%.2f MB", bytes / (1024.0 * 1024.0));
} else {
} else if (bytes < 1024L * 1024L * 1024L * 1024L) {
return String.format(Locale.US, "%.2f GB", bytes / (1024.0 * 1024.0 * 1024.0));
} else {
return String.format(Locale.US, "%.2f TB", bytes / (1024.0 * 1024.0 * 1024.0 * 1024.0));
}
}
public static List<Integer> parsePageList(String pages, int totalPages, boolean oneBased) {
public List<Integer> parsePageList(String pages, int totalPages, boolean oneBased) {
if (pages == null) {
return List.of(1); // Default to first page if input is null
}
@@ -252,11 +477,11 @@ public class GeneralUtils {
}
}
public static List<Integer> parsePageList(String[] pages, int totalPages) {
public List<Integer> parsePageList(String[] pages, int totalPages) {
return parsePageList(pages, totalPages, false);
}
public static List<Integer> parsePageList(String[] pages, int totalPages, boolean oneBased) {
public List<Integer> parsePageList(String[] pages, int totalPages, boolean oneBased) {
List<Integer> result = new ArrayList<>();
int offset = oneBased ? 1 : 0;
for (String page : pages) {
@@ -278,33 +503,72 @@ public class GeneralUtils {
return result;
}
public static List<Integer> evaluateNFunc(String expression, int maxValue) {
/*
* Enhanced mathematical expression evaluation with bounds checking and timeout protection.
*
* @param expression the mathematical expression containing 'n'
* @param maxValue the maximum value for 'n' and result bounds
* @return list of valid page numbers
* @throws IllegalArgumentException if expression is invalid or unsafe
*/
public List<Integer> evaluateNFunc(String expression, int maxValue) {
if (expression == null || expression.trim().isEmpty()) {
throw new IllegalArgumentException("Expression cannot be null or empty");
}
if (maxValue <= 0 || maxValue > 10000) {
throw new IllegalArgumentException("maxValue must be between 1 and 10000 for safety");
}
List<Integer> results = new ArrayList<>();
DoubleEvaluator evaluator = new DoubleEvaluator();
// Validate the expression
if (!expression.matches("[0-9n+\\-*/() ]+")) {
throw new IllegalArgumentException("Invalid expression");
// Validate the expression format
if (!RegexPatternUtils.getInstance()
.getMathExpressionPattern()
.matcher(expression.trim())
.matches()) {
throw new IllegalArgumentException("Invalid expression format: " + expression);
}
for (int n = 1; n <= maxValue; n++) {
// Replace 'n' with the current value of n, correctly handling numbers before
// 'n'
String sanitizedExpression = sanitizeNFunction(expression, n);
Double result = evaluator.evaluate(sanitizedExpression);
try {
// Replace 'n' with the current value of n, correctly handling numbers before 'n'
String sanitizedExpression = sanitizeNFunction(expression.trim(), n);
Double result = evaluator.evaluate(sanitizedExpression);
// Check if the result is null or not within bounds
if (result == null) break;
// Check if the result is null or not within bounds
if (result == null || !Double.isFinite(result)) {
continue;
}
if (result.intValue() > 0 && result.intValue() <= maxValue)
results.add(result.intValue());
int intResult = result.intValue();
if (intResult > 0 && intResult <= maxValue) {
results.add(intResult);
}
} catch (Exception e) {
log.debug(
"Failed to evaluate expression '{}' for n={}: {}",
expression,
n,
e.getMessage());
// Continue with next value instead of breaking
}
}
return results;
}
private static String sanitizeNFunction(String expression, int nValue) {
String sanitizedExpression = expression.replace(" ", "");
private String sanitizeNFunction(String expression, int nValue) {
// Remove all spaces using a specialized character removal
StringBuilder sb = new StringBuilder(expression.length());
for (int i = 0; i < expression.length(); i++) {
char c = expression.charAt(i);
if (c != ' ') {
sb.append(c);
}
}
String sanitizedExpression = sb.toString();
String multiplyByOpeningRoundBracketPattern =
"([0-9n)])\\("; // example: n(n-1), 9(n-1), (n-1)(n-2)
sanitizedExpression =
@@ -319,23 +583,45 @@ public class GeneralUtils {
return sanitizedExpression;
}
private static String insertMultiplicationBeforeN(String expression, int nValue) {
// Insert multiplication between a number and 'n' (e.g., "4n" becomes "4*n")
String withMultiplication = expression.replaceAll("(\\d)n", "$1*n");
private String insertMultiplicationBeforeN(String expression, int nValue) {
// Insert multiplication between a number and 'n' (e.g., "4n" becomes "4*n") using a loop
StringBuilder sb = new StringBuilder(expression.length() + 4); // +4 for possible extra '*'
for (int i = 0; i < expression.length(); i++) {
char c = expression.charAt(i);
sb.append(c);
if (Character.isDigit(c)
&& i + 1 < expression.length()
&& expression.charAt(i + 1) == 'n') {
sb.append('*');
}
}
String withMultiplication = sb.toString();
withMultiplication = formatConsecutiveNsForNFunction(withMultiplication);
// Now replace 'n' with its current value
return withMultiplication.replace("n", String.valueOf(nValue));
}
private static String formatConsecutiveNsForNFunction(String expression) {
private String formatConsecutiveNsForNFunction(String expression) {
String text = expression;
while (text.matches(".*n{2,}.*")) {
text = text.replaceAll("(?<!n)n{2}", "n*n");
// Replace all consecutive 'nn' with 'n*n' until no more 'nn' is found
while (text.contains("nn")) {
StringBuilder sb = new StringBuilder(text.length() + 2); // +2 for possible extra '*'
int i = 0;
while (i < text.length()) {
if (i < text.length() - 1 && text.charAt(i) == 'n' && text.charAt(i + 1) == 'n') {
sb.append("n*n");
i += 2;
} else {
sb.append(text.charAt(i));
i++;
}
}
text = sb.toString();
}
return text;
}
private static List<Integer> handlePart(String part, int totalPages, int offset) {
private List<Integer> handlePart(String part, int totalPages, int offset) {
List<Integer> partResult = new ArrayList<>();
// First check for n-syntax because it should not be processed as a range
@@ -361,7 +647,7 @@ public class GeneralUtils {
}
}
} catch (NumberFormatException e) {
// Range is invalid, ignore this part
log.debug("Invalid range: {}", part);
}
} else {
// This is a single page number
@@ -370,14 +656,14 @@ public class GeneralUtils {
if (pageNum >= 1 && pageNum <= totalPages) {
partResult.add(pageNum - 1 + offset);
}
} catch (NumberFormatException ignored) {
// Ignore invalid numbers
} catch (NumberFormatException e) {
log.debug("Invalid page number: {}", part);
}
}
return partResult;
}
public static boolean createDir(String path) {
public boolean createDir(String path) {
Path folder = Paths.get(path);
if (!Files.exists(folder)) {
try {
@@ -390,7 +676,7 @@ public class GeneralUtils {
return true;
}
public static boolean isValidUUID(String uuid) {
public boolean isValidUUID(String uuid) {
if (uuid == null) {
return false;
}
@@ -406,7 +692,7 @@ public class GeneralUtils {
* Internal Implementation Details *
*------------------------------------------------------------------------*/
public static void saveKeyToSettings(String key, Object newValue) throws IOException {
public void saveKeyToSettings(String key, Object newValue) throws IOException {
String[] keyArray = key.split("\\.");
Path settingsPath = Paths.get(InstallationPathConfig.getSettingsPath());
YamlHelper settingsYaml = new YamlHelper(settingsPath);
@@ -414,48 +700,67 @@ public class GeneralUtils {
settingsYaml.saveOverride(settingsPath);
}
public static String generateMachineFingerprint() {
/*
* Machine fingerprint generation with better error logging and fallbacks.
*
* @return unique machine fingerprint or "GenericID" if generation fails
*/
public String generateMachineFingerprint() {
try {
// Get the MAC address
StringBuilder sb = new StringBuilder();
// Try to get MAC address from primary network interface
InetAddress ip = InetAddress.getLocalHost();
NetworkInterface network = NetworkInterface.getByInetAddress(ip);
if (network == null) {
if (network == null || network.getHardwareAddress() == null) {
// Fallback: iterate through all network interfaces
Enumeration<NetworkInterface> networks = NetworkInterface.getNetworkInterfaces();
while (networks.hasMoreElements()) {
NetworkInterface net = networks.nextElement();
byte[] mac = net.getHardwareAddress();
if (mac != null) {
for (int i = 0; i < mac.length; i++) {
sb.append(String.format("%02X", mac[i]));
if (net.isUp() && !net.isLoopback() && !net.isVirtual()) {
byte[] mac = net.getHardwareAddress();
if (mac != null && mac.length > 0) {
for (byte b : mac) {
sb.append(String.format("%02X", b));
}
break; // Use the first valid network interface
}
break; // Use the first network interface with a MAC address
}
}
} else {
byte[] mac = network.getHardwareAddress();
if (mac != null) {
for (int i = 0; i < mac.length; i++) {
sb.append(String.format("%02X", mac[i]));
for (byte b : mac) {
sb.append(String.format("%02X", b));
}
}
}
// Hash the MAC address for privacy and consistency
// If no MAC address found, use hostname as fallback
if (sb.length() == 0) {
String hostname = InetAddress.getLocalHost().getHostName();
sb.append(hostname != null ? hostname : "unknown-host");
log.warn("No MAC address found, using hostname for fingerprint generation");
}
// Hash the collected data for privacy and consistency
MessageDigest md = MessageDigest.getInstance("SHA-256");
byte[] hash = md.digest(sb.toString().getBytes(StandardCharsets.UTF_8));
StringBuilder fingerprint = new StringBuilder();
for (byte b : hash) {
fingerprint.append(String.format("%02x", b));
}
log.debug("Successfully generated machine fingerprint");
return fingerprint.toString();
} catch (Exception e) {
log.warn("Failed to generate machine fingerprint: {}", e.getMessage());
return "GenericID";
}
}
/**
/*
* Extracts the default pipeline configurations from the classpath to the installation path.
* Creates directories if needed and copies default JSON files.
*
@@ -464,7 +769,7 @@ public class GeneralUtils {
*
* @throws IOException if an I/O error occurs during file operations
*/
public static void extractPipeline() throws IOException {
public void extractPipeline() throws IOException {
Path pipelineDir =
Paths.get(InstallationPathConfig.getPipelinePath(), DEFAULT_WEBUI_CONFIGS_DIR);
Files.createDirectories(pipelineDir);
@@ -486,7 +791,7 @@ public class GeneralUtils {
}
}
/**
/*
* Extracts the specified Python script from the classpath to the installation path. Validates
* name and copies file atomically when possible, overwriting existing.
*
@@ -497,7 +802,7 @@ public class GeneralUtils {
* @throws IllegalArgumentException if the script name is invalid or not allowed
* @throws IOException if an I/O error occurs
*/
public static Path extractScript(String scriptName) throws IOException {
public Path extractScript(String scriptName) throws IOException {
// Validate input
if (scriptName == null || scriptName.trim().isEmpty()) {
throw new IllegalArgumentException("scriptName must not be null or empty");
@@ -530,15 +835,14 @@ public class GeneralUtils {
return target;
}
/**
/*
* Copies a resource from the classpath to a specified target file.
*
* @param resource the ClassPathResource to copy
* @param target the target Path where the resource will be copied
* @throws IOException if an I/O error occurs during the copy operation
*/
private static void copyResourceToFile(ClassPathResource resource, Path target)
throws IOException {
private void copyResourceToFile(ClassPathResource resource, Path target) throws IOException {
Path dir = target.getParent();
Path tmp = Files.createTempFile(dir, target.getFileName().toString(), ".tmp");
try (InputStream in = resource.getInputStream()) {
@@ -573,7 +877,7 @@ public class GeneralUtils {
}
}
public static boolean isVersionHigher(String currentVersion, String compareVersion) {
public boolean isVersionHigher(String currentVersion, String compareVersion) {
if (currentVersion == null || compareVersion == null) {
return false;
}

View File

@@ -258,10 +258,7 @@ public class PdfAttachmentHandler {
if (contentIdMap.isEmpty()) return htmlContent;
Pattern cidPattern =
Pattern.compile(
"(?i)<img[^>]*\\ssrc\\s*=\\s*['\"]cid:([^'\"]+)['\"][^>]*>",
Pattern.CASE_INSENSITIVE);
Pattern cidPattern = RegexPatternUtils.getInstance().getInlineCidImagePattern();
Matcher matcher = cidPattern.matcher(htmlContent);
StringBuilder result = new StringBuilder();
@@ -319,121 +316,20 @@ public class PdfAttachmentHandler {
}
}
public static class AttachmentMarkerPositionFinder extends PDFTextStripper {
@Getter private final List<MarkerPosition> positions = new ArrayList<>();
private int currentPageIndex;
protected boolean sortByPosition;
private boolean isInAttachmentSection;
private boolean attachmentSectionFound;
private final StringBuilder currentText = new StringBuilder();
private static final Pattern ATTACHMENT_SECTION_PATTERN =
Pattern.compile("attachments\\s*\\(\\d+\\)", Pattern.CASE_INSENSITIVE);
private static final Pattern FILENAME_PATTERN =
Pattern.compile("@\\s*([^\\s\\(]+(?:\\.[a-zA-Z0-9]+)?)");
public AttachmentMarkerPositionFinder() {
super();
this.currentPageIndex = 0;
this.sortByPosition = false; // Disable sorting to preserve document order
this.isInAttachmentSection = false;
this.attachmentSectionFound = false;
}
@Override
public String getText(PDDocument document) throws IOException {
super.getText(document);
if (sortByPosition) {
positions.sort(
(a, b) -> {
int pageCompare = Integer.compare(a.getPageIndex(), b.getPageIndex());
if (pageCompare != 0) return pageCompare;
return Float.compare(
b.getY(), a.getY()); // Descending Y per PDF coordinate system
});
}
return ""; // Return empty string as we only need positions
}
@Override
protected void startPage(PDPage page) throws IOException {
super.startPage(page);
}
@Override
protected void endPage(PDPage page) throws IOException {
currentPageIndex++;
super.endPage(page);
}
@Override
protected void writeString(String string, List<TextPosition> textPositions)
throws IOException {
String lowerString = string.toLowerCase();
if (ATTACHMENT_SECTION_PATTERN.matcher(lowerString).find()) {
isInAttachmentSection = true;
attachmentSectionFound = true;
}
if (isInAttachmentSection
&& (lowerString.contains("</body>")
|| lowerString.contains("</html>")
|| (attachmentSectionFound
&& lowerString.trim().isEmpty()
&& string.length() > 50))) {
isInAttachmentSection = false;
}
if (isInAttachmentSection) {
currentText.append(string);
for (int i = 0; (i = string.indexOf(ATTACHMENT_MARKER, i)) != -1; i++) {
if (i < textPositions.size()) {
TextPosition textPosition = textPositions.get(i);
String filename = extractFilenameAfterMarker(string, i);
MarkerPosition position =
new MarkerPosition(
currentPageIndex,
textPosition.getXDirAdj(),
textPosition.getYDirAdj(),
ATTACHMENT_MARKER,
filename);
positions.add(position);
}
}
}
super.writeString(string, textPositions);
}
@Override
public void setSortByPosition(boolean sortByPosition) {
this.sortByPosition = sortByPosition;
}
private String extractFilenameAfterMarker(String text, int markerIndex) {
String afterMarker = text.substring(markerIndex + 1);
Matcher matcher = FILENAME_PATTERN.matcher("@" + afterMarker);
if (matcher.find()) {
return matcher.group(1);
}
String[] parts = afterMarker.split("[\\s\\(\\)]+");
for (String part : parts) {
part = part.trim();
if (part.length() > 3 && part.contains(".")) {
return part;
}
}
return null;
}
private static String normalizeFilename(String filename) {
if (filename == null) return "";
String normalized = filename.toLowerCase().trim();
normalized =
RegexPatternUtils.getInstance()
.getWhitespacePattern()
.matcher(normalized)
.replaceAll(" ");
normalized =
RegexPatternUtils.getInstance()
.getPattern("[^a-zA-Z0-9._-]")
.matcher(normalized)
.replaceAll("");
return normalized;
}
private static Map<Integer, String> addAttachmentsToDocumentWithMapping(
@@ -613,12 +509,122 @@ public class PdfAttachmentHandler {
return null;
}
private static String normalizeFilename(String filename) {
if (filename == null) return "";
return filename.toLowerCase()
.trim()
.replaceAll("\\s+", " ")
.replaceAll("[^a-zA-Z0-9._-]", "");
public static class AttachmentMarkerPositionFinder extends PDFTextStripper {
private static final Pattern ATTACHMENT_SECTION_PATTERN =
RegexPatternUtils.getInstance().getAttachmentSectionPattern();
private static final Pattern FILENAME_PATTERN =
RegexPatternUtils.getInstance().getAttachmentFilenamePattern();
@Getter private final List<MarkerPosition> positions = new ArrayList<>();
private final StringBuilder currentText = new StringBuilder();
protected boolean sortByPosition;
private int currentPageIndex;
private boolean isInAttachmentSection;
private boolean attachmentSectionFound;
public AttachmentMarkerPositionFinder() {
super();
this.currentPageIndex = 0;
this.sortByPosition = false; // Disable sorting to preserve document order
this.isInAttachmentSection = false;
this.attachmentSectionFound = false;
}
@Override
public String getText(PDDocument document) throws IOException {
super.getText(document);
if (sortByPosition) {
positions.sort(
(a, b) -> {
int pageCompare = Integer.compare(a.getPageIndex(), b.getPageIndex());
if (pageCompare != 0) return pageCompare;
return Float.compare(
b.getY(), a.getY()); // Descending Y per PDF coordinate system
});
}
return ""; // Return empty string as we only need positions
}
@Override
protected void startPage(PDPage page) throws IOException {
super.startPage(page);
}
@Override
protected void endPage(PDPage page) throws IOException {
currentPageIndex++;
super.endPage(page);
}
@Override
protected void writeString(String string, List<TextPosition> textPositions)
throws IOException {
String lowerString = string.toLowerCase();
if (ATTACHMENT_SECTION_PATTERN.matcher(lowerString).find()) {
isInAttachmentSection = true;
attachmentSectionFound = true;
}
if (isInAttachmentSection
&& (lowerString.contains("</body>")
|| lowerString.contains("</html>")
|| (attachmentSectionFound
&& lowerString.trim().isEmpty()
&& string.length() > 50))) {
isInAttachmentSection = false;
}
if (isInAttachmentSection) {
currentText.append(string);
for (int i = 0; (i = string.indexOf(ATTACHMENT_MARKER, i)) != -1; i++) {
if (i < textPositions.size()) {
TextPosition textPosition = textPositions.get(i);
String filename = extractFilenameAfterMarker(string, i);
MarkerPosition position =
new MarkerPosition(
currentPageIndex,
textPosition.getXDirAdj(),
textPosition.getYDirAdj(),
ATTACHMENT_MARKER,
filename);
positions.add(position);
}
}
}
super.writeString(string, textPositions);
}
@Override
public void setSortByPosition(boolean sortByPosition) {
this.sortByPosition = sortByPosition;
}
private String extractFilenameAfterMarker(String text, int markerIndex) {
String afterMarker = text.substring(markerIndex + 1);
Matcher matcher = FILENAME_PATTERN.matcher("@" + afterMarker);
if (matcher.find()) {
return matcher.group(1);
}
String[] parts =
RegexPatternUtils.getInstance()
.getWhitespaceParenthesesSplitPattern()
.split(afterMarker);
for (String part : parts) {
part = part.trim();
if (part.length() > 3 && part.contains(".")) {
return part;
}
}
return null;
}
}
private static void addAttachmentAnnotationToPageWithMapping(

View File

@@ -8,6 +8,8 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
@@ -34,15 +36,20 @@ import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.ApplicationProperties;
import stirling.software.common.service.CustomPDFDocumentFactory;
@Slf4j
@UtilityClass
public class PdfUtils {
public static PDRectangle textToPageSize(String size) {
private final RegexPatternUtils patternCache = RegexPatternUtils.getInstance();
public PDRectangle textToPageSize(String size) {
switch (size.toUpperCase()) {
case "A0" -> {
return PDRectangle.A0;
@@ -75,7 +82,7 @@ public class PdfUtils {
}
}
public static List<RenderedImage> getAllImages(PDResources resources) throws IOException {
public List<RenderedImage> getAllImages(PDResources resources) throws IOException {
List<RenderedImage> images = new ArrayList<>();
for (COSName name : resources.getXObjectNames()) {
@@ -92,7 +99,7 @@ public class PdfUtils {
return images;
}
public static boolean hasImages(PDDocument document, String pagesToCheck) throws IOException {
public boolean hasImages(PDDocument document, String pagesToCheck) throws IOException {
String[] pageOrderArr = pagesToCheck.split(",");
List<Integer> pageList =
GeneralUtils.parsePageList(pageOrderArr, document.getNumberOfPages());
@@ -107,7 +114,7 @@ public class PdfUtils {
return false;
}
public static boolean hasText(PDDocument document, String pageNumbersToCheck, String phrase)
public boolean hasText(PDDocument document, String pageNumbersToCheck, String phrase)
throws IOException {
String[] pageOrderArr = pageNumbersToCheck.split(",");
List<Integer> pageList =
@@ -123,11 +130,11 @@ public class PdfUtils {
return false;
}
public static boolean hasImagesOnPage(PDPage page) throws IOException {
public boolean hasImagesOnPage(PDPage page) throws IOException {
return getAllImages(page.getResources()).size() > 0;
}
public static boolean hasTextOnPage(PDPage page, String phrase) throws IOException {
public boolean hasTextOnPage(PDPage page, String phrase) throws IOException {
PDFTextStripper textStripper = new PDFTextStripper();
PDDocument tempDoc = new PDDocument();
tempDoc.addPage(page);
@@ -136,7 +143,7 @@ public class PdfUtils {
return pageText.contains(phrase);
}
public static byte[] convertFromPdf(
public byte[] convertFromPdf(
CustomPDFDocumentFactory pdfDocumentFactory,
byte[] inputStream,
String imageType,
@@ -379,7 +386,7 @@ public class PdfUtils {
* @return converted document to PDF-Image
* @throws IOException if conversion fails
*/
public static PDDocument convertPdfToPdfImage(PDDocument document) throws IOException {
public PDDocument convertPdfToPdfImage(PDDocument document) throws IOException {
PDDocument imageDocument = new PDDocument();
PDFRenderer pdfRenderer = new PDFRenderer(document);
pdfRenderer.setSubsamplingAllowed(true);
@@ -428,8 +435,7 @@ public class PdfUtils {
return imageDocument;
}
private static BufferedImage prepareImageForPdfToImage(
int maxWidth, int height, String imageType) {
private BufferedImage prepareImageForPdfToImage(int maxWidth, int height, String imageType) {
BufferedImage combined;
if ("png".equalsIgnoreCase(imageType)) {
combined = new BufferedImage(maxWidth, height, BufferedImage.TYPE_INT_ARGB);
@@ -445,7 +451,7 @@ public class PdfUtils {
return combined;
}
public static byte[] imageToPdf(
public byte[] imageToPdf(
MultipartFile[] files,
String fitOption,
boolean autoRotate,
@@ -489,7 +495,7 @@ public class PdfUtils {
}
}
public static void addImageToDocument(
public void addImageToDocument(
PDDocument doc, PDImageXObject image, String fitOption, boolean autoRotate)
throws IOException {
boolean imageIsLandscape = image.getWidth() > image.getHeight();
@@ -539,7 +545,7 @@ public class PdfUtils {
}
}
public static byte[] overlayImage(
public byte[] overlayImage(
CustomPDFDocumentFactory pdfDocumentFactory,
byte[] pdfBytes,
byte[] imageBytes,
@@ -586,8 +592,11 @@ public class PdfUtils {
if (pagesToCheck == null || "all".equals(pagesToCheck)) {
pdfText = new StringBuilder(textStripper.getText(pdfDocument));
} else {
// remove whitespaces
pagesToCheck = pagesToCheck.replaceAll("\\s+", "");
// remove whitespaces using cached pattern
Pattern whitespacePattern =
patternCache.getPattern(RegexPatternUtils.getWhitespaceRegex());
Matcher whitespaceMatcher = whitespacePattern.matcher(pagesToCheck);
pagesToCheck = whitespaceMatcher.replaceAll("");
String[] splitPoints = pagesToCheck.split(",");
for (String splitPoint : splitPoints) {

View File

@@ -0,0 +1,506 @@
package stirling.software.common.util;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public final class RegexPatternUtils {
private static final RegexPatternUtils INSTANCE = new RegexPatternUtils();
private final ConcurrentHashMap<PatternKey, Pattern> patternCache = new ConcurrentHashMap<>();
private static final String WHITESPACE_REGEX = "\\s++";
private static final String EXTENSION_REGEX = "\\.(?:[^.]*+)?$";
private RegexPatternUtils() {
super();
// Initialize with commonly used patterns for immediate availability
precompileCommonPatterns();
}
/**
* Get the singleton instance of the pattern cache.
*
* @return the singleton RegexPatternCache instance
*/
public static RegexPatternUtils getInstance() {
return INSTANCE;
}
/**
* Get a compiled pattern from cache, compiling and caching if not present.
*
* <p>This method is thread-safe and uses lazy initialization. Multiple threads calling with the
* same regex will result in only one compilation, with all threads receiving the same cached
* Pattern instance.
*
* <p>Performance: first call compiles and caches (expensive), subsequent calls return cached
* pattern (fast O(1) lookup).
*
* @param regex the regular expression string to compile
* @return compiled Pattern object, never null
* @throws PatternSyntaxException if the regex syntax is invalid
* @throws IllegalArgumentException if regex is null
*/
public Pattern getPattern(String regex) {
if (regex == null) {
throw new IllegalArgumentException("Regex pattern cannot be null");
}
return patternCache.computeIfAbsent(new PatternKey(regex, 0), this::compilePattern);
}
/**
* Get a compiled pattern with flags.
*
* <p>Patterns with different flags are cached separately using a composite key. Common flags
* include:
*
* <ul>
* <li>{@link Pattern#CASE_INSENSITIVE} - ignore case differences
* <li>{@link Pattern#MULTILINE} - ^ and $ match line boundaries
* <li>{@link Pattern#DOTALL} - . matches any character including newlines
* </ul>
*
* @param regex the regular expression string
* @param flags pattern flags (e.g., Pattern.CASE_INSENSITIVE)
* @return compiled Pattern object with specified flags
* @throws PatternSyntaxException if the regex syntax is invalid
* @throws IllegalArgumentException if regex is null
*/
public Pattern getPattern(String regex, int flags) {
if (regex == null) {
throw new IllegalArgumentException("Regex pattern cannot be null");
}
return patternCache.computeIfAbsent(new PatternKey(regex, flags), this::compilePattern);
}
/**
* Check if a pattern is already cached.
*
* @param regex the regular expression string
* @return true if pattern is cached, false otherwise
*/
public boolean isCached(String regex) {
return isCached(regex, 0);
}
/**
* Check if a pattern with flags is already cached.
*
* @param regex the regular expression string
* @param flags pattern flags
* @return true if pattern is cached, false otherwise
*/
public boolean isCached(String regex, int flags) {
return regex != null && patternCache.containsKey(new PatternKey(regex, flags));
}
/**
* Get current cache size (number of cached patterns). Useful for monitoring and debugging.
*
* @return number of patterns currently cached
*/
public int getCacheSize() {
return patternCache.size();
}
/**
* Clear all cached patterns. Use sparingly as it forces recompilation of all patterns. Mainly
* useful for testing or memory cleanup in long-running applications.
*/
public void clearCache() {
patternCache.clear();
log.debug("Regex pattern cache cleared");
}
/**
* Remove a specific pattern from cache.
*
* @param regex the regular expression string to remove
* @return true if pattern was cached and removed, false otherwise
*/
public boolean removeFromCache(String regex) {
return removeFromCache(regex, 0);
}
/**
* Remove a specific pattern with flags from cache.
*
* @param regex the regular expression string to remove
* @param flags pattern flags
* @return true if pattern was cached and removed, false otherwise
*/
public boolean removeFromCache(String regex, int flags) {
if (regex == null) {
return false;
}
PatternKey key = new PatternKey(regex, flags);
boolean removed = patternCache.remove(key) != null;
if (removed) {
log.debug("Removed regex pattern from cache: {} (flags: {})", regex, flags);
}
return removed;
}
/**
* Internal method to compile a pattern and handle errors consistently.
*
* @return compiled Pattern
* @throws PatternSyntaxException if regex is invalid
*/
private Pattern compilePattern(PatternKey key) {
String regex = key.regex;
int flags = key.flags;
try {
Pattern pattern = Pattern.compile(regex, flags);
log.trace("Compiled and cached regex pattern with flags {}: {}", flags, regex);
return pattern;
} catch (PatternSyntaxException e) {
log.error(
"Invalid regex pattern: '{}' with flags {} - {}", regex, flags, e.getMessage());
throw e;
}
}
public static String getWhitespaceRegex() {
return WHITESPACE_REGEX;
}
/** Creates a case-insensitive pattern for text searching */
public Pattern createSearchPattern(String regex, boolean caseInsensitive) {
int flags = caseInsensitive ? (Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE) : 0;
return getPattern(regex, flags);
}
/** Pattern for matching trailing slashes (e.g., "/path/to/dir///") */
public Pattern getTrailingSlashesPattern() {
return getPattern("/+$");
}
/** Pattern for removing drive letters from paths */
public Pattern getDriveLetterPattern() {
return getPattern("^[a-zA-Z]:[\\\\/]+");
}
/** Pattern for removing leading slashes from paths */
public Pattern getLeadingSlashesPattern() {
return getPattern("^[\\\\/]+");
}
/** Pattern for matching backslashes */
public Pattern getBackslashPattern() {
return getPattern("\\\\");
}
/** Pattern for sanitizing filenames by removing problematic characters */
public Pattern getSafeFilenamePattern() {
return getPattern("[/\\\\?%*:|\"<>]");
}
/** Pattern for sanitizing filenames (keeps only alphanumeric) */
public Pattern getFilenameSafePattern() {
return getPattern("[^a-zA-Z0-9]");
}
/**
* Pattern for replacing non-alphanumeric characters with underscore (explicit underscore
* variant)
*/
public Pattern getNonAlnumUnderscorePattern() {
return getPattern("[^A-Za-z0-9_]");
}
/** Pattern for collapsing multiple underscores */
public Pattern getMultipleUnderscoresPattern() {
return getPattern("_+");
}
/** Pattern for trimming leading underscores */
public Pattern getLeadingUnderscoresPattern() {
return getPattern("^_+");
}
/** Pattern for trimming trailing underscores */
public Pattern getTrailingUnderscoresPattern() {
return getPattern("_+$");
}
/** Pattern for matching upload/download paths (case insensitive) */
public Pattern getUploadDownloadPathPattern() {
return getPattern("(?i).*/(upload|download)/.*");
}
/** Pattern for matching one or more whitespace characters */
public Pattern getWhitespacePattern() {
return getPattern("\\s+");
}
/** Pattern for matching newlines (Windows and Unix style) */
public Pattern getNewlinesPattern() {
return getPattern("\\r?\\n");
}
/** Pattern for splitting on newlines (Windows and Unix style) */
public Pattern getNewlineSplitPattern() {
return getPattern("\\r?\\n");
}
/** Pattern for splitting text into words */
public Pattern getWordSplitPattern() {
return getPattern("\\s+");
}
/** Pattern for removing carriage returns */
public Pattern getCarriageReturnPattern() {
return getPattern("\\r");
}
/** Pattern for matching newline characters */
public Pattern getNewlineCharsPattern() {
return getPattern("[\n\r]");
}
/** Pattern for multi-format newline splitting (Windows, Mac, Unix) */
public Pattern getMultiFormatNewlinePattern() {
return getPattern("\r\n|\r|\n");
}
/** Pattern for encoded payload newline removal */
public Pattern getEncodedPayloadNewlinePattern() {
return getPattern("\\r?\\n");
}
/** Pattern for escaped newlines in watermark text */
public Pattern getEscapedNewlinePattern() {
return getPattern("\\\\n");
}
/** Pattern for input sanitization (allows only alphanumeric and spaces) */
public Pattern getInputSanitizePattern() {
return getPattern("[^a-zA-Z0-9 ]");
}
/** Pattern for removing angle brackets */
public Pattern getAngleBracketsPattern() {
return getPattern("[<>]");
}
/** Pattern for removing leading and trailing quotes */
public Pattern getQuotesRemovalPattern() {
return getPattern("^\"|\"$");
}
/** Pattern for plus signs (URL encoding replacement) */
public Pattern getPlusSignPattern() {
return getPattern("\\+");
}
/** Pattern for username validation */
public Pattern getUsernameValidationPattern() {
return getPattern("^[a-zA-Z0-9](?!.*[-@._+]{2,})[a-zA-Z0-9@._+-]{1,48}[a-zA-Z0-9]$");
}
public static String getExtensionRegex() {
return EXTENSION_REGEX;
}
/** Pattern for extracting non-numeric characters */
public Pattern getNumericExtractionPattern() {
return getPattern("\\D");
}
/** Pattern for removing non-digit/dot characters (for timeout parsing) */
public Pattern getNonDigitDotPattern() {
return getPattern("[^\\d.]");
}
/** Pattern for matching digit/dot characters (for timeout parsing) */
public Pattern getDigitDotPattern() {
return getPattern("[\\d.]");
}
/** Pattern for detecting strings containing digits */
public Pattern getContainsDigitsPattern() {
return getPattern(".*\\d+.*");
}
/** Pattern for matching 1-3 digit numbers */
public Pattern getNumberRangePattern() {
return getPattern("[1-9][0-9]{0,2}");
}
/** Pattern for validating mathematical expressions */
public Pattern getMathExpressionPattern() {
return getPattern("[0-9n+\\-*/() ]+");
}
/** Pattern for adding multiplication between numbers and 'n' */
public Pattern getNumberBeforeNPattern() {
return getPattern("(\\d)n");
}
/** Pattern for detecting consecutive 'n' characters */
public Pattern getConsecutiveNPattern() {
return getPattern(".*n{2,}.*");
}
/** Pattern for replacing consecutive 'n' characters */
public Pattern getConsecutiveNReplacementPattern() {
return getPattern("(?<!n)n{2}");
}
/** Pattern for validating HTTP/HTTPS URLs */
public Pattern getHttpUrlPattern() {
return getPattern("^https?://.*");
}
/** Pattern for matching URLs in text for link creation */
public Pattern getUrlLinkPattern() {
return getPattern("(https?://[\\w\\-._~:/?#\\[\\]@!$&'()*+,;=%]+)");
}
/** Pattern for matching email addresses in text for link creation */
public Pattern getEmailLinkPattern() {
return getPattern("([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,63})");
}
/** Pattern for removing script tags from HTML */
public Pattern getScriptTagPattern() {
return getPattern("(?i)<script[^>]*>.*?</script>");
}
/** Pattern for removing style tags from HTML */
public Pattern getStyleTagPattern() {
return getPattern("(?i)<style[^>]*>.*?</style>");
}
/** Pattern for removing fixed position CSS */
public Pattern getFixedPositionCssPattern() {
return getPattern("(?i)\\s*position\\s*:\\s*fixed[^;]*;?");
}
/** Pattern for removing absolute position CSS */
public Pattern getAbsolutePositionCssPattern() {
return getPattern("(?i)\\s*position\\s*:\\s*absolute[^;]*;?");
}
/** Pattern for matching size unit suffixes (KB, MB, GB, etc.) */
public Pattern getSizeUnitPattern() {
return getPattern("[KMGkmg][Bb]");
}
/** Pattern for system temp file type 1 */
public Pattern getSystemTempFile1Pattern() {
return getPattern("lu\\d+[a-z0-9]*\\.tmp");
}
/** Pattern for system temp file type 2 (OCR processes) */
public Pattern getSystemTempFile2Pattern() {
return getPattern("ocr_process\\d+");
}
/** Pattern for splitting on whitespace and parentheses */
public Pattern getWhitespaceParenthesesSplitPattern() {
return getPattern("[\\s\\(\\)]+");
}
/** Pattern for MIME header whitespace cleanup before encoded sequences */
public Pattern getMimeHeaderWhitespacePattern() {
return getPattern("\\s+(?==\\?)");
}
/** Pattern for font name validation (6 uppercase letters + plus + rest) */
public Pattern getFontNamePattern() {
return getPattern("^[A-Z]{6}\\+.*");
}
/** Pattern for matching access="readOnly" attribute in XFA XML (with optional whitespace) */
public Pattern getAccessReadOnlyPattern() {
return getPattern("access\\s*=\\s*\"readOnly\"");
}
/** Pattern for matching MIME encoded-word headers (RFC 2047) Example: =?charset?B?encoded?= */
public Pattern getMimeEncodedWordPattern() {
return getPattern("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?=");
}
/** Pattern for matching inline CID images in HTML (case-insensitive) */
public Pattern getInlineCidImagePattern() {
return getPattern(
"(?i)<img[^>]*\\ssrc\\s*=\\s*['\"]cid:([^'\"]+)['\"][^>]*>",
Pattern.CASE_INSENSITIVE);
}
/** Pattern for matching attachment section headers (case-insensitive) */
public Pattern getAttachmentSectionPattern() {
return getPattern("attachments\\s*\\(\\d+\\)", Pattern.CASE_INSENSITIVE);
}
/** Pattern for matching filenames in attachment markers */
public Pattern getAttachmentFilenamePattern() {
return getPattern("@\\s*([^\\s\\(]+(?:\\.[a-zA-Z0-9]+)?)");
}
// API doc parsing patterns
/**
* Pre-compile commonly used patterns for immediate availability. This eliminates first-call
* compilation overhead for frequent patterns.
*/
private void precompileCommonPatterns() {
getPattern("\\.(?:[^.]*+)?$"); // Extension removal - possessive, optional, anchored
getPattern("\\.[^.]+$"); // Simple extension match - anchored
getPattern("\\s+"); // One or more whitespace
getPattern("\\s*"); // Zero or more whitespace
getPattern("/+$"); // Trailing slashes
getPattern("\\D"); // Non-numeric characters
getPattern("[/\\\\?%*:|\"<>]"); // Unsafe filename characters
getPattern("[^a-zA-Z0-9 ]"); // Input sanitization
getPattern("[^a-zA-Z0-9]"); // Filename sanitization
// API doc patterns
getPattern("Output:(\\w+)"); // precompiled single-escaped for runtime regex \w
getPattern("Input:(\\w+)");
getPattern("Type:(\\w+)");
log.debug("Pre-compiled {} common regex patterns", patternCache.size());
}
/** Pattern for email validation */
public Pattern getEmailValidationPattern() {
return getPattern(
"^(?=.{1,320}$)(?=.{1,64}@)[A-Za-z0-9](?:[A-Za-z0-9_.+-]*[A-Za-z0-9])?@[^-][A-Za-z0-9-]+(?:\\.[A-Za-z0-9-]+)*(?:\\.[A-Za-z]{2,})$");
}
/* Pattern for matching Output:<TYPE> in API descriptions */
public Pattern getApiDocOutputTypePattern() {
return getPattern("Output:(\\w+)");
}
/* Pattern for matching Input:<TYPE> in API descriptions */
public Pattern getApiDocInputTypePattern() {
return getPattern("Input:(\\w+)");
}
/**
* Pattern for matching Type:<CODE> in API descriptions
*/
public Pattern getApiDocTypePattern() {
return getPattern("Type:(\\w+)");
}
/* Pattern for validating file extensions (2-4 alphanumeric, case-insensitive) */
public Pattern getFileExtensionValidationPattern() {
return getPattern("^[a-zA-Z0-9]{2,4}$", Pattern.CASE_INSENSITIVE);
}
private record PatternKey(String regex, int flags) {
// Record automatically provides equals, hashCode, and toString
}
}

View File

@@ -50,7 +50,10 @@ public class WebResponseUtils {
headers.setContentType(mediaType);
headers.setContentLength(bytes.length);
String encodedDocName =
URLEncoder.encode(docName, StandardCharsets.UTF_8).replaceAll("\\+", "%20");
RegexPatternUtils.getInstance()
.getPlusSignPattern()
.matcher(URLEncoder.encode(docName, StandardCharsets.UTF_8))
.replaceAll("%20");
headers.setContentDispositionFormData("attachment", encodedDocName);
return new ResponseEntity<>(bytes, headers, HttpStatus.OK);
}