feat: Add RegexPatternUtils for centralized regex management, file naming funcs, UtilityClass annotation (#4218)

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
This commit is contained in:
Balázs Szücs 2025-09-28 17:56:35 +02:00 committed by GitHub
parent 133e6d3de6
commit 045f4cc591
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
78 changed files with 1947 additions and 617 deletions

View File

@ -22,6 +22,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.job.JobResponse;
import stirling.software.common.util.ExecutorFactory;
import stirling.software.common.util.RegexPatternUtils;
/** Service for executing jobs asynchronously or synchronously */
@Service
@ -426,8 +427,16 @@ public class JobExecutorService {
}
try {
String value = timeout.replaceAll("[^\\d.]", "");
String unit = timeout.replaceAll("[\\d.]", "");
String value =
RegexPatternUtils.getInstance()
.getNonDigitDotPattern()
.matcher(timeout)
.replaceAll("");
String unit =
RegexPatternUtils.getInstance()
.getDigitDotPattern()
.matcher(timeout)
.replaceAll("");
double numericValue = Double.parseDouble(value);

View File

@ -13,6 +13,7 @@ import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.ApplicationProperties;
import stirling.software.common.util.RegexPatternUtils;
@Service
@RequiredArgsConstructor
@ -22,8 +23,9 @@ public class SsrfProtectionService {
private final ApplicationProperties applicationProperties;
private static final Pattern DATA_URL_PATTERN =
Pattern.compile("^data:.*", Pattern.CASE_INSENSITIVE);
private static final Pattern FRAGMENT_PATTERN = Pattern.compile("^#.*");
RegexPatternUtils.getInstance().getPattern("^data:.*", Pattern.CASE_INSENSITIVE);
private static final Pattern FRAGMENT_PATTERN =
RegexPatternUtils.getInstance().getPattern("^#.*");
public enum SsrfProtectionLevel {
OFF, // No SSRF protection - allows all URLs

View File

@ -23,6 +23,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.ApplicationProperties;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.RegexPatternUtils;
import stirling.software.common.util.TempFileManager;
import stirling.software.common.util.TempFileRegistry;
@ -61,8 +62,14 @@ public class TempFileCleanupService {
// File patterns that identify common system temp files
private static final Predicate<String> IS_SYSTEM_TEMP_FILE =
fileName ->
fileName.matches("lu\\d+[a-z0-9]*\\.tmp")
|| fileName.matches("ocr_process\\d+")
RegexPatternUtils.getInstance()
.getSystemTempFile1Pattern()
.matcher(fileName)
.matches()
|| RegexPatternUtils.getInstance()
.getSystemTempFile2Pattern()
.matcher(fileName)
.matches()
|| (fileName.startsWith("tmp") && !fileName.contains("jetty"))
|| fileName.startsWith("OSL_PIPE_")
|| (fileName.endsWith(".tmp") && !fileName.contains("jetty"));

View File

@ -29,7 +29,7 @@ public class EmlParser {
private static volatile boolean mimeUtilityChecked = false;
private static final Pattern MIME_ENCODED_PATTERN =
Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?=");
RegexPatternUtils.getInstance().getMimeEncodedWordPattern();
private static final String DISPOSITION_ATTACHMENT = "attachment";
private static final String TEXT_PLAIN = MediaType.TEXT_PLAIN_VALUE;
@ -357,7 +357,11 @@ public class EmlParser {
for (String contentIdHeader : contentIdHeaders) {
if (contentIdHeader != null && !contentIdHeader.trim().isEmpty()) {
attachment.setEmbedded(true);
String contentId = contentIdHeader.trim().replaceAll("[<>]", "");
String contentId =
RegexPatternUtils.getInstance()
.getAngleBracketsPattern()
.matcher(contentIdHeader.trim())
.replaceAll("");
attachment.setContentId(contentId);
break;
}
@ -414,7 +418,8 @@ public class EmlParser {
private static String extractBasicHeader(String emlContent, String headerName) {
try {
String[] lines = emlContent.split("\r?\n");
String[] lines =
RegexPatternUtils.getInstance().getNewlineSplitPattern().split(emlContent);
for (int i = 0; i < lines.length; i++) {
String line = lines[i];
if (line.toLowerCase().startsWith(headerName.toLowerCase())) {
@ -485,7 +490,10 @@ public class EmlParser {
}
private static int findPartEnd(String content, int start) {
String[] lines = content.substring(start).split("\r?\n");
String[] lines =
RegexPatternUtils.getInstance()
.getNewlineSplitPattern()
.split(content.substring(start));
StringBuilder result = new StringBuilder();
for (String line : lines) {
@ -499,7 +507,8 @@ public class EmlParser {
private static List<EmailAttachment> extractAttachmentsBasic(String emlContent) {
List<EmailAttachment> attachments = new ArrayList<>();
try {
String[] lines = emlContent.split("\r?\n");
String[] lines =
RegexPatternUtils.getInstance().getNewlineSplitPattern().split(emlContent);
boolean inHeaders = true;
String currentContentType = "";
String currentDisposition = "";
@ -562,7 +571,11 @@ public class EmlParser {
if (filenameStarEnd == -1) filenameStarEnd = disposition.length();
String extendedFilename =
disposition.substring(filenameStarStart, filenameStarEnd).trim();
extendedFilename = extendedFilename.replaceAll("^\"|\"$", "");
extendedFilename =
RegexPatternUtils.getInstance()
.getQuotesRemovalPattern()
.matcher(extendedFilename)
.replaceAll("");
if (extendedFilename.contains("'")) {
String[] parts = extendedFilename.split("'", 3);
@ -577,7 +590,11 @@ public class EmlParser {
int filenameEnd = disposition.indexOf(";", filenameStart);
if (filenameEnd == -1) filenameEnd = disposition.length();
String filename = disposition.substring(filenameStart, filenameEnd).trim();
filename = filename.replaceAll("^\"|\"$", "");
filename =
RegexPatternUtils.getInstance()
.getQuotesRemovalPattern()
.matcher(filename)
.replaceAll("");
return safeMimeDecode(filename);
}
@ -630,11 +647,23 @@ public class EmlParser {
private List<EmailAttachment> attachments = new ArrayList<>();
public void setHtmlBody(String htmlBody) {
this.htmlBody = htmlBody != null ? htmlBody.replaceAll("\r", "") : null;
this.htmlBody =
htmlBody != null
? RegexPatternUtils.getInstance()
.getCarriageReturnPattern()
.matcher(htmlBody)
.replaceAll("")
: null;
}
public void setTextBody(String textBody) {
this.textBody = textBody != null ? textBody.replaceAll("\r", "") : null;
this.textBody =
textBody != null
? RegexPatternUtils.getInstance()
.getCarriageReturnPattern()
.matcher(textBody)
.replaceAll("")
: null;
}
}

View File

@ -199,8 +199,16 @@ public class EmlProcessingUtils {
String processed =
customHtmlSanitizer != null ? customHtmlSanitizer.sanitize(htmlBody) : htmlBody;
processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*fixed[^;]*;?", "");
processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*absolute[^;]*;?", "");
processed =
RegexPatternUtils.getInstance()
.getFixedPositionCssPattern()
.matcher(processed)
.replaceAll("");
processed =
RegexPatternUtils.getInstance()
.getAbsolutePositionCssPattern()
.matcher(processed)
.replaceAll("");
if (emailContent != null && !emailContent.getAttachments().isEmpty()) {
processed = PdfAttachmentHandler.processInlineImages(processed, emailContent);
@ -222,14 +230,18 @@ public class EmlProcessingUtils {
html = html.replace("\n", "<br>\n");
html =
html.replaceAll(
"(https?://[\\w\\-._~:/?#\\[\\]@!$&'()*+,;=%]+)",
"<a href=\"$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
RegexPatternUtils.getInstance()
.getUrlLinkPattern()
.matcher(html)
.replaceAll(
"<a href=\"$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
html =
html.replaceAll(
"([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,63})",
"<a href=\"mailto:$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
RegexPatternUtils.getInstance()
.getEmailLinkPattern()
.matcher(html)
.replaceAll(
"<a href=\"mailto:$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
return html;
}
@ -490,9 +502,13 @@ public class EmlProcessingUtils {
Matcher concatenatedMatcher = concatenatedPattern.matcher(encodedText);
String processedText =
concatenatedMatcher.replaceAll(
match -> match.group().replaceAll("\\s+(?==\\?)", ""));
match ->
RegexPatternUtils.getInstance()
.getMimeHeaderWhitespacePattern()
.matcher(match.group())
.replaceAll(""));
Pattern mimePattern = Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?=");
Pattern mimePattern = RegexPatternUtils.getInstance().getMimeEncodedWordPattern();
Matcher matcher = mimePattern.matcher(processedText);
int lastEnd = 0;
@ -507,7 +523,11 @@ public class EmlProcessingUtils {
String decodedValue =
switch (encoding) {
case "B" -> {
String cleanBase64 = encodedValue.replaceAll("\\s", "");
String cleanBase64 =
RegexPatternUtils.getInstance()
.getWhitespacePattern()
.matcher(encodedValue)
.replaceAll("");
byte[] decodedBytes = Base64.getDecoder().decode(cleanBase64);
Charset targetCharset;
try {
@ -596,8 +616,16 @@ public class EmlProcessingUtils {
}
public static String simplifyHtmlContent(String htmlContent) {
String simplified = htmlContent.replaceAll("(?i)<script[^>]*>.*?</script>", "");
simplified = simplified.replaceAll("(?i)<style[^>]*>.*?</style>", "");
String simplified =
RegexPatternUtils.getInstance()
.getScriptTagPattern()
.matcher(htmlContent)
.replaceAll("");
simplified =
RegexPatternUtils.getInstance()
.getStyleTagPattern()
.matcher(simplified)
.replaceAll("");
return simplified;
}
}

View File

@ -1,6 +1,9 @@
package stirling.software.common.util;
import java.io.*;
import java.io.ByteArrayInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
@ -205,15 +208,27 @@ public class FileToPdf {
return "";
}
// Remove any drive letters (e.g., "C:\") and leading forward/backslashes
entryName = entryName.replaceAll("^[a-zA-Z]:[\\\\/]+", "");
entryName = entryName.replaceAll("^[\\\\/]+", "");
entryName =
RegexPatternUtils.getInstance()
.getDriveLetterPattern()
.matcher(entryName)
.replaceAll("");
entryName =
RegexPatternUtils.getInstance()
.getLeadingSlashesPattern()
.matcher(entryName)
.replaceAll("");
// Recursively remove path traversal sequences
while (entryName.contains("../") || entryName.contains("..\\")) {
entryName = entryName.replace("../", "").replace("..\\", "");
}
// Normalize all backslashes to forward slashes
entryName = entryName.replaceAll("\\\\", "/");
entryName =
RegexPatternUtils.getInstance()
.getBackslashPattern()
.matcher(entryName)
.replaceAll("/");
return entryName;
}
}

View File

@ -9,13 +9,9 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.UUID;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;
@ -28,25 +24,37 @@ import com.fathzer.soft.javaluator.DoubleEvaluator;
import io.github.pixee.security.HostValidator;
import io.github.pixee.security.Urls;
import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
import stirling.software.common.configuration.InstallationPathConfig;
@Slf4j
@UtilityClass
public class GeneralUtils {
private static final Set<String> DEFAULT_VALID_SCRIPTS =
Set.of("png_to_webp.py", "split_photos.py");
private static final Set<String> DEFAULT_VALID_PIPELINE =
private final Set<String> DEFAULT_VALID_SCRIPTS = Set.of("png_to_webp.py", "split_photos.py");
private final Set<String> DEFAULT_VALID_PIPELINE =
Set.of(
"OCR images.json",
"Prepare-pdfs-for-email.json",
"split-rotate-auto-rename.json");
private static final String DEFAULT_WEBUI_CONFIGS_DIR = "defaultWebUIConfigs";
private static final String PYTHON_SCRIPTS_DIR = "python";
private final String DEFAULT_WEBUI_CONFIGS_DIR = "defaultWebUIConfigs";
private final String PYTHON_SCRIPTS_DIR = "python";
private final RegexPatternUtils patternCache = RegexPatternUtils.getInstance();
// Valid size units used for convertSizeToBytes validation and parsing
private final Set<String> VALID_SIZE_UNITS = Set.of("B", "KB", "MB", "GB", "TB");
public static File convertMultipartFileToFile(MultipartFile multipartFile) throws IOException {
/*
* Converts a MultipartFile to a regular File with improved performance and security.
*
* @param multipartFile the multipart file to convert
* @return temporary File containing the multipart file data
* @throws IOException if I/O error occurs during conversion
* @throws IllegalArgumentException if file exceeds maximum allowed size
*/
public File convertMultipartFileToFile(MultipartFile multipartFile) throws IOException {
String customTempDir = System.getenv("STIRLING_TEMPFILES_DIRECTORY");
if (customTempDir == null || customTempDir.isEmpty()) {
customTempDir = System.getProperty("stirling.tempfiles.directory");
@ -81,10 +89,137 @@ public class GeneralUtils {
return tempFile;
}
public static void deleteDirectory(Path path) throws IOException {
/*
* Gets the configured temporary directory, creating it if necessary.
*
* @return Path to the temporary directory
* @throws IOException if directory creation fails
*/
private Path getTempDirectory() throws IOException {
String customTempDir = System.getenv("STIRLING_TEMPFILES_DIRECTORY");
if (customTempDir == null || customTempDir.isEmpty()) {
customTempDir = System.getProperty("stirling.tempfiles.directory");
}
Path tempDir;
if (customTempDir != null && !customTempDir.isEmpty()) {
tempDir = Path.of(customTempDir);
} else {
tempDir = Path.of(System.getProperty("java.io.tmpdir"), "stirling-pdf");
}
if (!Files.exists(tempDir)) {
Files.createDirectories(tempDir);
}
return tempDir;
}
/*
* Remove file extension
*
* <p>Uses fast string operations for common cases (valid extensions) and falls back to
* optimized regex for edge cases (no extension, hidden files, etc.).
*
* <ul>
* <li>String operations avoid regex engine overhead for common cases
* <li>Cached pattern compilation eliminates recompilation costs
* <li>Fresh Matcher instances ensure thread safety
* </ul>
*
* @param filename the filename to process, may be null
* @return filename without extension, or "default" if input is null
*/
public String removeExtension(String filename) {
if (filename == null) {
return "default";
}
if (filename.isEmpty()) {
return filename;
}
int dotIndex = filename.lastIndexOf('.');
if (dotIndex > 0 && dotIndex < filename.length() - 1) {
return filename.substring(0, dotIndex);
}
if (dotIndex == 0 || dotIndex == filename.length() - 1 || dotIndex == -1) {
return filename;
}
Pattern pattern = patternCache.getPattern(RegexPatternUtils.getExtensionRegex());
Matcher matcher = pattern.matcher(filename);
return matcher.find() ? matcher.replaceFirst("") : filename;
}
/*
* Append suffix to base name with null safety.
*
* @param baseName the base filename, null becomes "default"
* @param suffix the suffix to append, null becomes empty string
* @return concatenated string with null safety
*/
public String appendSuffix(String baseName, String suffix) {
return (baseName == null ? "default" : baseName) + (suffix != null ? suffix : "");
}
/*
* Generate a PDF filename by removing extension from first file and adding suffix.
*
* <p>High-level utility method for common PDF naming scenarios. Handles null safety and uses
* extension removal.
*
* @param firstFilename the filename of the first file being, may be null
* @param suffix the suffix to append (e.g., "_merged.pdf")
* @return filename with suffix, or default name if input is null
*/
public String generateFilename(String firstFilename, String suffix) {
String baseName = removeExtension(firstFilename);
return appendSuffix(baseName, suffix);
}
/*
* Process a list of filenames by removing extensions and adding suffix.
*
* <p>Efficiently processes multiple filenames using streaming operations and bulk operations
* where possible. Handles null safety for both input list and individual filenames.
*
* @param filenames the list of filenames to process, may be null
* @param suffix the suffix to append to each processed filename
* @param processor consumer to handle each processed filename, may be null
*/
public void processFilenames(
List<String> filenames, String suffix, java.util.function.Consumer<String> processor) {
if (filenames == null || processor == null) {
return;
}
filenames.stream()
.map(filename -> appendSuffix(removeExtension(filename), suffix))
.forEach(processor);
}
/*
* Extract title from filename by removing extension, with fallback handling.
*
* <p>Returns "Untitled" for null or empty filenames, otherwise removes the extension using the
* optimized removeExtension method.
*
* @param filename the filename to extract title from, may be null
* @return the title without extension, or "Untitled" if input is null/empty
*/
public String getTitleFromFilename(String filename) {
if (filename == null || filename.isEmpty()) {
return "Untitled";
}
return removeExtension(filename);
}
public void deleteDirectory(Path path) throws IOException {
Files.walkFileTree(
path,
new SimpleFileVisitor<Path>() {
new SimpleFileVisitor<>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
throws IOException {
@ -101,8 +236,18 @@ public class GeneralUtils {
});
}
public static String convertToFileName(String name) {
String safeName = name.replaceAll("[^a-zA-Z0-9]", "_");
public String convertToFileName(String name) {
if (name == null) return "_";
StringBuilder safeNameBuilder = new StringBuilder(name.length());
for (int i = 0; i < name.length(); i++) {
char c = name.charAt(i);
if (Character.isLetterOrDigit(c)) {
safeNameBuilder.append(c);
} else {
safeNameBuilder.append('_');
}
}
String safeName = safeNameBuilder.toString();
if (safeName.length() > 50) {
safeName = safeName.substring(0, 50);
}
@ -110,19 +255,20 @@ public class GeneralUtils {
}
// Get resources from a location pattern
public static Resource[] getResourcesFromLocationPattern(
public Resource[] getResourcesFromLocationPattern(
String locationPattern, ResourceLoader resourceLoader) throws Exception {
// Normalize the path for file resources
if (locationPattern.startsWith("file:")) {
String rawPath = locationPattern.substring(5).replace("\\*", "").replace("/*", "");
String pattern = locationPattern;
if (pattern.startsWith("file:")) {
String rawPath = pattern.substring(5).replace("\\*", "").replace("/*", "");
Path normalizePath = Paths.get(rawPath).normalize();
locationPattern = "file:" + normalizePath.toString().replace("\\", "/") + "/*";
pattern = "file:" + normalizePath.toString().replace("\\", "/") + "/*";
}
return ResourcePatternUtils.getResourcePatternResolver(resourceLoader)
.getResources(locationPattern);
.getResources(pattern);
}
public static boolean isValidURL(String urlStr) {
public boolean isValidURL(String urlStr) {
try {
Urls.create(
urlStr, Urls.HTTP_PROTOCOLS, HostValidator.DENY_COMMON_INFRASTRUCTURE_TARGETS);
@ -132,7 +278,25 @@ public class GeneralUtils {
}
}
public static boolean isURLReachable(String urlStr) {
/*
* Checks if a URL is reachable with proper timeout configuration and error handling.
*
* @param urlStr the URL string to check
* @return true if URL is reachable, false otherwise
*/
public boolean isURLReachable(String urlStr) {
return isURLReachable(urlStr, 5000, 5000);
}
/*
* Checks if a URL is reachable with configurable timeouts.
*
* @param urlStr the URL string to check
* @param connectTimeout connection timeout in milliseconds
* @param readTimeout read timeout in milliseconds
* @return true if URL is reachable, false otherwise
*/
public boolean isURLReachable(String urlStr, int connectTimeout, int readTimeout) {
try {
// Parse the URL
URL url = URI.create(urlStr).toURL();
@ -152,16 +316,19 @@ public class GeneralUtils {
// Check if the URL is reachable
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestMethod("HEAD");
// connection.setConnectTimeout(5000); // Set connection timeout
// connection.setReadTimeout(5000); // Set read timeout
connection.setConnectTimeout(connectTimeout);
connection.setReadTimeout(readTimeout);
connection.setInstanceFollowRedirects(false); // Security: prevent redirect loops
int responseCode = connection.getResponseCode();
return (200 <= responseCode && responseCode <= 399);
} catch (Exception e) {
log.debug("URL {} is not reachable: {}", urlStr, e.getMessage());
return false; // Return false in case of any exception
}
}
private static boolean isLocalAddress(String host) {
private boolean isLocalAddress(String host) {
try {
// Resolve DNS to IP address
InetAddress address = InetAddress.getByName(host);
@ -181,7 +348,14 @@ public class GeneralUtils {
}
}
public static File multipartToFile(MultipartFile multipart) throws IOException {
/*
* Improved multipart file conversion using the shared helper method.
*
* @param multipart the multipart file to convert
* @return temporary File containing the multipart file data
* @throws IOException if I/O error occurs during conversion
*/
public File multipartToFile(MultipartFile multipart) throws IOException {
Path tempFile = Files.createTempFile("overlay-", ".pdf");
try (InputStream in = multipart.getInputStream();
FileOutputStream out = new FileOutputStream(tempFile.toFile())) {
@ -194,54 +368,105 @@ public class GeneralUtils {
return tempFile.toFile();
}
public static Long convertSizeToBytes(String sizeStr) {
/*
* Supports TB/PB units and provides detailed error messages.
*
* @param sizeStr the size string to convert (e.g., "100MB", "1.5GB")
* @param defaultUnit the default unit to assume if none specified ("MB", "GB", etc.)
* @return size in bytes, or null if parsing fails
* @throws IllegalArgumentException if defaultUnit is invalid
*/
public Long convertSizeToBytes(String sizeStr, String defaultUnit) {
if (sizeStr == null) {
return null;
}
if (defaultUnit != null && !isValidSizeUnit(defaultUnit)) {
throw new IllegalArgumentException("Invalid default unit: " + defaultUnit);
}
sizeStr = sizeStr.trim().toUpperCase();
sizeStr = sizeStr.replace(",", ".").replace(" ", "");
try {
if (sizeStr.endsWith("KB")) {
return (long)
(Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2)) * 1024);
} else if (sizeStr.endsWith("MB")) {
if (sizeStr.endsWith("TB")) {
return (long)
(Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2))
* 1024
* 1024);
* 1024L
* 1024L
* 1024L
* 1024L);
} else if (sizeStr.endsWith("GB")) {
return (long)
(Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2))
* 1024
* 1024
* 1024);
} else if (sizeStr.endsWith("B")) {
* 1024L
* 1024L
* 1024L);
} else if (sizeStr.endsWith("MB")) {
return (long)
(Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2))
* 1024L
* 1024L);
} else if (sizeStr.endsWith("KB")) {
return (long)
(Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2)) * 1024L);
} else if (!sizeStr.isEmpty() && sizeStr.charAt(sizeStr.length() - 1) == 'B') {
return Long.parseLong(sizeStr.substring(0, sizeStr.length() - 1));
} else {
// Assume MB if no unit is specified
return (long) (Double.parseDouble(sizeStr) * 1024 * 1024);
// Use provided default unit or fall back to MB
String unit = defaultUnit != null ? defaultUnit.toUpperCase() : "MB";
double value = Double.parseDouble(sizeStr);
return switch (unit) {
case "TB" -> (long) (value * 1024L * 1024L * 1024L * 1024L);
case "GB" -> (long) (value * 1024L * 1024L * 1024L);
case "MB" -> (long) (value * 1024L * 1024L);
case "KB" -> (long) (value * 1024L);
case "B" -> (long) value;
default -> (long) (value * 1024L * 1024L); // Default to MB
};
}
} catch (NumberFormatException e) {
// The numeric part of the input string cannot be parsed, handle this case
log.warn("Failed to parse size string '{}': {}", sizeStr, e.getMessage());
return null;
}
return null;
}
public static String formatBytes(long bytes) {
/*
* Converts size string to bytes using MB as default unit.
*
* @param sizeStr the size string to convert
* @return size in bytes, or null if parsing fails
*/
public Long convertSizeToBytes(String sizeStr) {
return convertSizeToBytes(sizeStr, "MB");
}
/* Validates if a string represents a valid size unit. */
private boolean isValidSizeUnit(String unit) {
// Use a precomputed Set for O(1) lookup, normalize using a locale-safe toUpperCase
return unit != null && VALID_SIZE_UNITS.contains(unit.toUpperCase(Locale.ROOT));
}
/* Enhanced byte formatting with TB/PB support and better precision. */
public String formatBytes(long bytes) {
if (bytes < 0) {
return "Invalid size";
}
if (bytes < 1024) {
return bytes + " B";
} else if (bytes < 1024 * 1024) {
} else if (bytes < 1024L * 1024L) {
return String.format(Locale.US, "%.2f KB", bytes / 1024.0);
} else if (bytes < 1024 * 1024 * 1024) {
} else if (bytes < 1024L * 1024L * 1024L) {
return String.format(Locale.US, "%.2f MB", bytes / (1024.0 * 1024.0));
} else {
} else if (bytes < 1024L * 1024L * 1024L * 1024L) {
return String.format(Locale.US, "%.2f GB", bytes / (1024.0 * 1024.0 * 1024.0));
} else {
return String.format(Locale.US, "%.2f TB", bytes / (1024.0 * 1024.0 * 1024.0 * 1024.0));
}
}
public static List<Integer> parsePageList(String pages, int totalPages, boolean oneBased) {
public List<Integer> parsePageList(String pages, int totalPages, boolean oneBased) {
if (pages == null) {
return List.of(1); // Default to first page if input is null
}
@ -252,11 +477,11 @@ public class GeneralUtils {
}
}
public static List<Integer> parsePageList(String[] pages, int totalPages) {
public List<Integer> parsePageList(String[] pages, int totalPages) {
return parsePageList(pages, totalPages, false);
}
public static List<Integer> parsePageList(String[] pages, int totalPages, boolean oneBased) {
public List<Integer> parsePageList(String[] pages, int totalPages, boolean oneBased) {
List<Integer> result = new ArrayList<>();
int offset = oneBased ? 1 : 0;
for (String page : pages) {
@ -278,33 +503,72 @@ public class GeneralUtils {
return result;
}
public static List<Integer> evaluateNFunc(String expression, int maxValue) {
/*
* Enhanced mathematical expression evaluation with bounds checking and timeout protection.
*
* @param expression the mathematical expression containing 'n'
* @param maxValue the maximum value for 'n' and result bounds
* @return list of valid page numbers
* @throws IllegalArgumentException if expression is invalid or unsafe
*/
public List<Integer> evaluateNFunc(String expression, int maxValue) {
if (expression == null || expression.trim().isEmpty()) {
throw new IllegalArgumentException("Expression cannot be null or empty");
}
if (maxValue <= 0 || maxValue > 10000) {
throw new IllegalArgumentException("maxValue must be between 1 and 10000 for safety");
}
List<Integer> results = new ArrayList<>();
DoubleEvaluator evaluator = new DoubleEvaluator();
// Validate the expression
if (!expression.matches("[0-9n+\\-*/() ]+")) {
throw new IllegalArgumentException("Invalid expression");
// Validate the expression format
if (!RegexPatternUtils.getInstance()
.getMathExpressionPattern()
.matcher(expression.trim())
.matches()) {
throw new IllegalArgumentException("Invalid expression format: " + expression);
}
for (int n = 1; n <= maxValue; n++) {
// Replace 'n' with the current value of n, correctly handling numbers before
// 'n'
String sanitizedExpression = sanitizeNFunction(expression, n);
Double result = evaluator.evaluate(sanitizedExpression);
try {
// Replace 'n' with the current value of n, correctly handling numbers before 'n'
String sanitizedExpression = sanitizeNFunction(expression.trim(), n);
Double result = evaluator.evaluate(sanitizedExpression);
// Check if the result is null or not within bounds
if (result == null) break;
// Check if the result is null or not within bounds
if (result == null || !Double.isFinite(result)) {
continue;
}
if (result.intValue() > 0 && result.intValue() <= maxValue)
results.add(result.intValue());
int intResult = result.intValue();
if (intResult > 0 && intResult <= maxValue) {
results.add(intResult);
}
} catch (Exception e) {
log.debug(
"Failed to evaluate expression '{}' for n={}: {}",
expression,
n,
e.getMessage());
// Continue with next value instead of breaking
}
}
return results;
}
private static String sanitizeNFunction(String expression, int nValue) {
String sanitizedExpression = expression.replace(" ", "");
private String sanitizeNFunction(String expression, int nValue) {
// Remove all spaces using a specialized character removal
StringBuilder sb = new StringBuilder(expression.length());
for (int i = 0; i < expression.length(); i++) {
char c = expression.charAt(i);
if (c != ' ') {
sb.append(c);
}
}
String sanitizedExpression = sb.toString();
String multiplyByOpeningRoundBracketPattern =
"([0-9n)])\\("; // example: n(n-1), 9(n-1), (n-1)(n-2)
sanitizedExpression =
@ -319,23 +583,45 @@ public class GeneralUtils {
return sanitizedExpression;
}
private static String insertMultiplicationBeforeN(String expression, int nValue) {
// Insert multiplication between a number and 'n' (e.g., "4n" becomes "4*n")
String withMultiplication = expression.replaceAll("(\\d)n", "$1*n");
private String insertMultiplicationBeforeN(String expression, int nValue) {
// Insert multiplication between a number and 'n' (e.g., "4n" becomes "4*n") using a loop
StringBuilder sb = new StringBuilder(expression.length() + 4); // +4 for possible extra '*'
for (int i = 0; i < expression.length(); i++) {
char c = expression.charAt(i);
sb.append(c);
if (Character.isDigit(c)
&& i + 1 < expression.length()
&& expression.charAt(i + 1) == 'n') {
sb.append('*');
}
}
String withMultiplication = sb.toString();
withMultiplication = formatConsecutiveNsForNFunction(withMultiplication);
// Now replace 'n' with its current value
return withMultiplication.replace("n", String.valueOf(nValue));
}
private static String formatConsecutiveNsForNFunction(String expression) {
private String formatConsecutiveNsForNFunction(String expression) {
String text = expression;
while (text.matches(".*n{2,}.*")) {
text = text.replaceAll("(?<!n)n{2}", "n*n");
// Replace all consecutive 'nn' with 'n*n' until no more 'nn' is found
while (text.contains("nn")) {
StringBuilder sb = new StringBuilder(text.length() + 2); // +2 for possible extra '*'
int i = 0;
while (i < text.length()) {
if (i < text.length() - 1 && text.charAt(i) == 'n' && text.charAt(i + 1) == 'n') {
sb.append("n*n");
i += 2;
} else {
sb.append(text.charAt(i));
i++;
}
}
text = sb.toString();
}
return text;
}
private static List<Integer> handlePart(String part, int totalPages, int offset) {
private List<Integer> handlePart(String part, int totalPages, int offset) {
List<Integer> partResult = new ArrayList<>();
// First check for n-syntax because it should not be processed as a range
@ -361,7 +647,7 @@ public class GeneralUtils {
}
}
} catch (NumberFormatException e) {
// Range is invalid, ignore this part
log.debug("Invalid range: {}", part);
}
} else {
// This is a single page number
@ -370,14 +656,14 @@ public class GeneralUtils {
if (pageNum >= 1 && pageNum <= totalPages) {
partResult.add(pageNum - 1 + offset);
}
} catch (NumberFormatException ignored) {
// Ignore invalid numbers
} catch (NumberFormatException e) {
log.debug("Invalid page number: {}", part);
}
}
return partResult;
}
public static boolean createDir(String path) {
public boolean createDir(String path) {
Path folder = Paths.get(path);
if (!Files.exists(folder)) {
try {
@ -390,7 +676,7 @@ public class GeneralUtils {
return true;
}
public static boolean isValidUUID(String uuid) {
public boolean isValidUUID(String uuid) {
if (uuid == null) {
return false;
}
@ -406,7 +692,7 @@ public class GeneralUtils {
* Internal Implementation Details *
*------------------------------------------------------------------------*/
public static void saveKeyToSettings(String key, Object newValue) throws IOException {
public void saveKeyToSettings(String key, Object newValue) throws IOException {
String[] keyArray = key.split("\\.");
Path settingsPath = Paths.get(InstallationPathConfig.getSettingsPath());
YamlHelper settingsYaml = new YamlHelper(settingsPath);
@ -414,48 +700,67 @@ public class GeneralUtils {
settingsYaml.saveOverride(settingsPath);
}
public static String generateMachineFingerprint() {
/*
* Machine fingerprint generation with better error logging and fallbacks.
*
* @return unique machine fingerprint or "GenericID" if generation fails
*/
public String generateMachineFingerprint() {
try {
// Get the MAC address
StringBuilder sb = new StringBuilder();
// Try to get MAC address from primary network interface
InetAddress ip = InetAddress.getLocalHost();
NetworkInterface network = NetworkInterface.getByInetAddress(ip);
if (network == null) {
if (network == null || network.getHardwareAddress() == null) {
// Fallback: iterate through all network interfaces
Enumeration<NetworkInterface> networks = NetworkInterface.getNetworkInterfaces();
while (networks.hasMoreElements()) {
NetworkInterface net = networks.nextElement();
byte[] mac = net.getHardwareAddress();
if (mac != null) {
for (int i = 0; i < mac.length; i++) {
sb.append(String.format("%02X", mac[i]));
if (net.isUp() && !net.isLoopback() && !net.isVirtual()) {
byte[] mac = net.getHardwareAddress();
if (mac != null && mac.length > 0) {
for (byte b : mac) {
sb.append(String.format("%02X", b));
}
break; // Use the first valid network interface
}
break; // Use the first network interface with a MAC address
}
}
} else {
byte[] mac = network.getHardwareAddress();
if (mac != null) {
for (int i = 0; i < mac.length; i++) {
sb.append(String.format("%02X", mac[i]));
for (byte b : mac) {
sb.append(String.format("%02X", b));
}
}
}
// Hash the MAC address for privacy and consistency
// If no MAC address found, use hostname as fallback
if (sb.length() == 0) {
String hostname = InetAddress.getLocalHost().getHostName();
sb.append(hostname != null ? hostname : "unknown-host");
log.warn("No MAC address found, using hostname for fingerprint generation");
}
// Hash the collected data for privacy and consistency
MessageDigest md = MessageDigest.getInstance("SHA-256");
byte[] hash = md.digest(sb.toString().getBytes(StandardCharsets.UTF_8));
StringBuilder fingerprint = new StringBuilder();
for (byte b : hash) {
fingerprint.append(String.format("%02x", b));
}
log.debug("Successfully generated machine fingerprint");
return fingerprint.toString();
} catch (Exception e) {
log.warn("Failed to generate machine fingerprint: {}", e.getMessage());
return "GenericID";
}
}
/**
/*
* Extracts the default pipeline configurations from the classpath to the installation path.
* Creates directories if needed and copies default JSON files.
*
@ -464,7 +769,7 @@ public class GeneralUtils {
*
* @throws IOException if an I/O error occurs during file operations
*/
public static void extractPipeline() throws IOException {
public void extractPipeline() throws IOException {
Path pipelineDir =
Paths.get(InstallationPathConfig.getPipelinePath(), DEFAULT_WEBUI_CONFIGS_DIR);
Files.createDirectories(pipelineDir);
@ -486,7 +791,7 @@ public class GeneralUtils {
}
}
/**
/*
* Extracts the specified Python script from the classpath to the installation path. Validates
* name and copies file atomically when possible, overwriting existing.
*
@ -497,7 +802,7 @@ public class GeneralUtils {
* @throws IllegalArgumentException if the script name is invalid or not allowed
* @throws IOException if an I/O error occurs
*/
public static Path extractScript(String scriptName) throws IOException {
public Path extractScript(String scriptName) throws IOException {
// Validate input
if (scriptName == null || scriptName.trim().isEmpty()) {
throw new IllegalArgumentException("scriptName must not be null or empty");
@ -530,15 +835,14 @@ public class GeneralUtils {
return target;
}
/**
/*
* Copies a resource from the classpath to a specified target file.
*
* @param resource the ClassPathResource to copy
* @param target the target Path where the resource will be copied
* @throws IOException if an I/O error occurs during the copy operation
*/
private static void copyResourceToFile(ClassPathResource resource, Path target)
throws IOException {
private void copyResourceToFile(ClassPathResource resource, Path target) throws IOException {
Path dir = target.getParent();
Path tmp = Files.createTempFile(dir, target.getFileName().toString(), ".tmp");
try (InputStream in = resource.getInputStream()) {
@ -573,7 +877,7 @@ public class GeneralUtils {
}
}
public static boolean isVersionHigher(String currentVersion, String compareVersion) {
public boolean isVersionHigher(String currentVersion, String compareVersion) {
if (currentVersion == null || compareVersion == null) {
return false;
}

View File

@ -258,10 +258,7 @@ public class PdfAttachmentHandler {
if (contentIdMap.isEmpty()) return htmlContent;
Pattern cidPattern =
Pattern.compile(
"(?i)<img[^>]*\\ssrc\\s*=\\s*['\"]cid:([^'\"]+)['\"][^>]*>",
Pattern.CASE_INSENSITIVE);
Pattern cidPattern = RegexPatternUtils.getInstance().getInlineCidImagePattern();
Matcher matcher = cidPattern.matcher(htmlContent);
StringBuilder result = new StringBuilder();
@ -319,121 +316,20 @@ public class PdfAttachmentHandler {
}
}
public static class AttachmentMarkerPositionFinder extends PDFTextStripper {
@Getter private final List<MarkerPosition> positions = new ArrayList<>();
private int currentPageIndex;
protected boolean sortByPosition;
private boolean isInAttachmentSection;
private boolean attachmentSectionFound;
private final StringBuilder currentText = new StringBuilder();
private static final Pattern ATTACHMENT_SECTION_PATTERN =
Pattern.compile("attachments\\s*\\(\\d+\\)", Pattern.CASE_INSENSITIVE);
private static final Pattern FILENAME_PATTERN =
Pattern.compile("@\\s*([^\\s\\(]+(?:\\.[a-zA-Z0-9]+)?)");
public AttachmentMarkerPositionFinder() {
super();
this.currentPageIndex = 0;
this.sortByPosition = false; // Disable sorting to preserve document order
this.isInAttachmentSection = false;
this.attachmentSectionFound = false;
}
@Override
public String getText(PDDocument document) throws IOException {
super.getText(document);
if (sortByPosition) {
positions.sort(
(a, b) -> {
int pageCompare = Integer.compare(a.getPageIndex(), b.getPageIndex());
if (pageCompare != 0) return pageCompare;
return Float.compare(
b.getY(), a.getY()); // Descending Y per PDF coordinate system
});
}
return ""; // Return empty string as we only need positions
}
@Override
protected void startPage(PDPage page) throws IOException {
super.startPage(page);
}
@Override
protected void endPage(PDPage page) throws IOException {
currentPageIndex++;
super.endPage(page);
}
@Override
protected void writeString(String string, List<TextPosition> textPositions)
throws IOException {
String lowerString = string.toLowerCase();
if (ATTACHMENT_SECTION_PATTERN.matcher(lowerString).find()) {
isInAttachmentSection = true;
attachmentSectionFound = true;
}
if (isInAttachmentSection
&& (lowerString.contains("</body>")
|| lowerString.contains("</html>")
|| (attachmentSectionFound
&& lowerString.trim().isEmpty()
&& string.length() > 50))) {
isInAttachmentSection = false;
}
if (isInAttachmentSection) {
currentText.append(string);
for (int i = 0; (i = string.indexOf(ATTACHMENT_MARKER, i)) != -1; i++) {
if (i < textPositions.size()) {
TextPosition textPosition = textPositions.get(i);
String filename = extractFilenameAfterMarker(string, i);
MarkerPosition position =
new MarkerPosition(
currentPageIndex,
textPosition.getXDirAdj(),
textPosition.getYDirAdj(),
ATTACHMENT_MARKER,
filename);
positions.add(position);
}
}
}
super.writeString(string, textPositions);
}
@Override
public void setSortByPosition(boolean sortByPosition) {
this.sortByPosition = sortByPosition;
}
private String extractFilenameAfterMarker(String text, int markerIndex) {
String afterMarker = text.substring(markerIndex + 1);
Matcher matcher = FILENAME_PATTERN.matcher("@" + afterMarker);
if (matcher.find()) {
return matcher.group(1);
}
String[] parts = afterMarker.split("[\\s\\(\\)]+");
for (String part : parts) {
part = part.trim();
if (part.length() > 3 && part.contains(".")) {
return part;
}
}
return null;
}
private static String normalizeFilename(String filename) {
if (filename == null) return "";
String normalized = filename.toLowerCase().trim();
normalized =
RegexPatternUtils.getInstance()
.getWhitespacePattern()
.matcher(normalized)
.replaceAll(" ");
normalized =
RegexPatternUtils.getInstance()
.getPattern("[^a-zA-Z0-9._-]")
.matcher(normalized)
.replaceAll("");
return normalized;
}
private static Map<Integer, String> addAttachmentsToDocumentWithMapping(
@ -613,12 +509,122 @@ public class PdfAttachmentHandler {
return null;
}
private static String normalizeFilename(String filename) {
if (filename == null) return "";
return filename.toLowerCase()
.trim()
.replaceAll("\\s+", " ")
.replaceAll("[^a-zA-Z0-9._-]", "");
public static class AttachmentMarkerPositionFinder extends PDFTextStripper {
private static final Pattern ATTACHMENT_SECTION_PATTERN =
RegexPatternUtils.getInstance().getAttachmentSectionPattern();
private static final Pattern FILENAME_PATTERN =
RegexPatternUtils.getInstance().getAttachmentFilenamePattern();
@Getter private final List<MarkerPosition> positions = new ArrayList<>();
private final StringBuilder currentText = new StringBuilder();
protected boolean sortByPosition;
private int currentPageIndex;
private boolean isInAttachmentSection;
private boolean attachmentSectionFound;
public AttachmentMarkerPositionFinder() {
super();
this.currentPageIndex = 0;
this.sortByPosition = false; // Disable sorting to preserve document order
this.isInAttachmentSection = false;
this.attachmentSectionFound = false;
}
@Override
public String getText(PDDocument document) throws IOException {
super.getText(document);
if (sortByPosition) {
positions.sort(
(a, b) -> {
int pageCompare = Integer.compare(a.getPageIndex(), b.getPageIndex());
if (pageCompare != 0) return pageCompare;
return Float.compare(
b.getY(), a.getY()); // Descending Y per PDF coordinate system
});
}
return ""; // Return empty string as we only need positions
}
@Override
protected void startPage(PDPage page) throws IOException {
super.startPage(page);
}
@Override
protected void endPage(PDPage page) throws IOException {
currentPageIndex++;
super.endPage(page);
}
@Override
protected void writeString(String string, List<TextPosition> textPositions)
throws IOException {
String lowerString = string.toLowerCase();
if (ATTACHMENT_SECTION_PATTERN.matcher(lowerString).find()) {
isInAttachmentSection = true;
attachmentSectionFound = true;
}
if (isInAttachmentSection
&& (lowerString.contains("</body>")
|| lowerString.contains("</html>")
|| (attachmentSectionFound
&& lowerString.trim().isEmpty()
&& string.length() > 50))) {
isInAttachmentSection = false;
}
if (isInAttachmentSection) {
currentText.append(string);
for (int i = 0; (i = string.indexOf(ATTACHMENT_MARKER, i)) != -1; i++) {
if (i < textPositions.size()) {
TextPosition textPosition = textPositions.get(i);
String filename = extractFilenameAfterMarker(string, i);
MarkerPosition position =
new MarkerPosition(
currentPageIndex,
textPosition.getXDirAdj(),
textPosition.getYDirAdj(),
ATTACHMENT_MARKER,
filename);
positions.add(position);
}
}
}
super.writeString(string, textPositions);
}
@Override
public void setSortByPosition(boolean sortByPosition) {
this.sortByPosition = sortByPosition;
}
private String extractFilenameAfterMarker(String text, int markerIndex) {
String afterMarker = text.substring(markerIndex + 1);
Matcher matcher = FILENAME_PATTERN.matcher("@" + afterMarker);
if (matcher.find()) {
return matcher.group(1);
}
String[] parts =
RegexPatternUtils.getInstance()
.getWhitespaceParenthesesSplitPattern()
.split(afterMarker);
for (String part : parts) {
part = part.trim();
if (part.length() > 3 && part.contains(".")) {
return part;
}
}
return null;
}
}
private static void addAttachmentAnnotationToPageWithMapping(

View File

@ -8,6 +8,8 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
@ -34,15 +36,20 @@ import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.ApplicationProperties;
import stirling.software.common.service.CustomPDFDocumentFactory;
@Slf4j
@UtilityClass
public class PdfUtils {
public static PDRectangle textToPageSize(String size) {
private final RegexPatternUtils patternCache = RegexPatternUtils.getInstance();
public PDRectangle textToPageSize(String size) {
switch (size.toUpperCase()) {
case "A0" -> {
return PDRectangle.A0;
@ -75,7 +82,7 @@ public class PdfUtils {
}
}
public static List<RenderedImage> getAllImages(PDResources resources) throws IOException {
public List<RenderedImage> getAllImages(PDResources resources) throws IOException {
List<RenderedImage> images = new ArrayList<>();
for (COSName name : resources.getXObjectNames()) {
@ -92,7 +99,7 @@ public class PdfUtils {
return images;
}
public static boolean hasImages(PDDocument document, String pagesToCheck) throws IOException {
public boolean hasImages(PDDocument document, String pagesToCheck) throws IOException {
String[] pageOrderArr = pagesToCheck.split(",");
List<Integer> pageList =
GeneralUtils.parsePageList(pageOrderArr, document.getNumberOfPages());
@ -107,7 +114,7 @@ public class PdfUtils {
return false;
}
public static boolean hasText(PDDocument document, String pageNumbersToCheck, String phrase)
public boolean hasText(PDDocument document, String pageNumbersToCheck, String phrase)
throws IOException {
String[] pageOrderArr = pageNumbersToCheck.split(",");
List<Integer> pageList =
@ -123,11 +130,11 @@ public class PdfUtils {
return false;
}
public static boolean hasImagesOnPage(PDPage page) throws IOException {
public boolean hasImagesOnPage(PDPage page) throws IOException {
return getAllImages(page.getResources()).size() > 0;
}
public static boolean hasTextOnPage(PDPage page, String phrase) throws IOException {
public boolean hasTextOnPage(PDPage page, String phrase) throws IOException {
PDFTextStripper textStripper = new PDFTextStripper();
PDDocument tempDoc = new PDDocument();
tempDoc.addPage(page);
@ -136,7 +143,7 @@ public class PdfUtils {
return pageText.contains(phrase);
}
public static byte[] convertFromPdf(
public byte[] convertFromPdf(
CustomPDFDocumentFactory pdfDocumentFactory,
byte[] inputStream,
String imageType,
@ -379,7 +386,7 @@ public class PdfUtils {
* @return converted document to PDF-Image
* @throws IOException if conversion fails
*/
public static PDDocument convertPdfToPdfImage(PDDocument document) throws IOException {
public PDDocument convertPdfToPdfImage(PDDocument document) throws IOException {
PDDocument imageDocument = new PDDocument();
PDFRenderer pdfRenderer = new PDFRenderer(document);
pdfRenderer.setSubsamplingAllowed(true);
@ -428,8 +435,7 @@ public class PdfUtils {
return imageDocument;
}
private static BufferedImage prepareImageForPdfToImage(
int maxWidth, int height, String imageType) {
private BufferedImage prepareImageForPdfToImage(int maxWidth, int height, String imageType) {
BufferedImage combined;
if ("png".equalsIgnoreCase(imageType)) {
combined = new BufferedImage(maxWidth, height, BufferedImage.TYPE_INT_ARGB);
@ -445,7 +451,7 @@ public class PdfUtils {
return combined;
}
public static byte[] imageToPdf(
public byte[] imageToPdf(
MultipartFile[] files,
String fitOption,
boolean autoRotate,
@ -489,7 +495,7 @@ public class PdfUtils {
}
}
public static void addImageToDocument(
public void addImageToDocument(
PDDocument doc, PDImageXObject image, String fitOption, boolean autoRotate)
throws IOException {
boolean imageIsLandscape = image.getWidth() > image.getHeight();
@ -539,7 +545,7 @@ public class PdfUtils {
}
}
public static byte[] overlayImage(
public byte[] overlayImage(
CustomPDFDocumentFactory pdfDocumentFactory,
byte[] pdfBytes,
byte[] imageBytes,
@ -586,8 +592,11 @@ public class PdfUtils {
if (pagesToCheck == null || "all".equals(pagesToCheck)) {
pdfText = new StringBuilder(textStripper.getText(pdfDocument));
} else {
// remove whitespaces
pagesToCheck = pagesToCheck.replaceAll("\\s+", "");
// remove whitespaces using cached pattern
Pattern whitespacePattern =
patternCache.getPattern(RegexPatternUtils.getWhitespaceRegex());
Matcher whitespaceMatcher = whitespacePattern.matcher(pagesToCheck);
pagesToCheck = whitespaceMatcher.replaceAll("");
String[] splitPoints = pagesToCheck.split(",");
for (String splitPoint : splitPoints) {

View File

@ -0,0 +1,506 @@
package stirling.software.common.util;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public final class RegexPatternUtils {
private static final RegexPatternUtils INSTANCE = new RegexPatternUtils();
private final ConcurrentHashMap<PatternKey, Pattern> patternCache = new ConcurrentHashMap<>();
private static final String WHITESPACE_REGEX = "\\s++";
private static final String EXTENSION_REGEX = "\\.(?:[^.]*+)?$";
private RegexPatternUtils() {
super();
// Initialize with commonly used patterns for immediate availability
precompileCommonPatterns();
}
/**
* Get the singleton instance of the pattern cache.
*
* @return the singleton RegexPatternCache instance
*/
public static RegexPatternUtils getInstance() {
return INSTANCE;
}
/**
* Get a compiled pattern from cache, compiling and caching if not present.
*
* <p>This method is thread-safe and uses lazy initialization. Multiple threads calling with the
* same regex will result in only one compilation, with all threads receiving the same cached
* Pattern instance.
*
* <p>Performance: first call compiles and caches (expensive), subsequent calls return cached
* pattern (fast O(1) lookup).
*
* @param regex the regular expression string to compile
* @return compiled Pattern object, never null
* @throws PatternSyntaxException if the regex syntax is invalid
* @throws IllegalArgumentException if regex is null
*/
public Pattern getPattern(String regex) {
if (regex == null) {
throw new IllegalArgumentException("Regex pattern cannot be null");
}
return patternCache.computeIfAbsent(new PatternKey(regex, 0), this::compilePattern);
}
/**
* Get a compiled pattern with flags.
*
* <p>Patterns with different flags are cached separately using a composite key. Common flags
* include:
*
* <ul>
* <li>{@link Pattern#CASE_INSENSITIVE} - ignore case differences
* <li>{@link Pattern#MULTILINE} - ^ and $ match line boundaries
* <li>{@link Pattern#DOTALL} - . matches any character including newlines
* </ul>
*
* @param regex the regular expression string
* @param flags pattern flags (e.g., Pattern.CASE_INSENSITIVE)
* @return compiled Pattern object with specified flags
* @throws PatternSyntaxException if the regex syntax is invalid
* @throws IllegalArgumentException if regex is null
*/
public Pattern getPattern(String regex, int flags) {
if (regex == null) {
throw new IllegalArgumentException("Regex pattern cannot be null");
}
return patternCache.computeIfAbsent(new PatternKey(regex, flags), this::compilePattern);
}
/**
* Check if a pattern is already cached.
*
* @param regex the regular expression string
* @return true if pattern is cached, false otherwise
*/
public boolean isCached(String regex) {
return isCached(regex, 0);
}
/**
* Check if a pattern with flags is already cached.
*
* @param regex the regular expression string
* @param flags pattern flags
* @return true if pattern is cached, false otherwise
*/
public boolean isCached(String regex, int flags) {
return regex != null && patternCache.containsKey(new PatternKey(regex, flags));
}
/**
* Get current cache size (number of cached patterns). Useful for monitoring and debugging.
*
* @return number of patterns currently cached
*/
public int getCacheSize() {
return patternCache.size();
}
/**
* Clear all cached patterns. Use sparingly as it forces recompilation of all patterns. Mainly
* useful for testing or memory cleanup in long-running applications.
*/
public void clearCache() {
patternCache.clear();
log.debug("Regex pattern cache cleared");
}
/**
* Remove a specific pattern from cache.
*
* @param regex the regular expression string to remove
* @return true if pattern was cached and removed, false otherwise
*/
public boolean removeFromCache(String regex) {
return removeFromCache(regex, 0);
}
/**
* Remove a specific pattern with flags from cache.
*
* @param regex the regular expression string to remove
* @param flags pattern flags
* @return true if pattern was cached and removed, false otherwise
*/
public boolean removeFromCache(String regex, int flags) {
if (regex == null) {
return false;
}
PatternKey key = new PatternKey(regex, flags);
boolean removed = patternCache.remove(key) != null;
if (removed) {
log.debug("Removed regex pattern from cache: {} (flags: {})", regex, flags);
}
return removed;
}
/**
* Internal method to compile a pattern and handle errors consistently.
*
* @return compiled Pattern
* @throws PatternSyntaxException if regex is invalid
*/
private Pattern compilePattern(PatternKey key) {
String regex = key.regex;
int flags = key.flags;
try {
Pattern pattern = Pattern.compile(regex, flags);
log.trace("Compiled and cached regex pattern with flags {}: {}", flags, regex);
return pattern;
} catch (PatternSyntaxException e) {
log.error(
"Invalid regex pattern: '{}' with flags {} - {}", regex, flags, e.getMessage());
throw e;
}
}
public static String getWhitespaceRegex() {
return WHITESPACE_REGEX;
}
/** Creates a case-insensitive pattern for text searching */
public Pattern createSearchPattern(String regex, boolean caseInsensitive) {
int flags = caseInsensitive ? (Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE) : 0;
return getPattern(regex, flags);
}
/** Pattern for matching trailing slashes (e.g., "/path/to/dir///") */
public Pattern getTrailingSlashesPattern() {
return getPattern("/+$");
}
/** Pattern for removing drive letters from paths */
public Pattern getDriveLetterPattern() {
return getPattern("^[a-zA-Z]:[\\\\/]+");
}
/** Pattern for removing leading slashes from paths */
public Pattern getLeadingSlashesPattern() {
return getPattern("^[\\\\/]+");
}
/** Pattern for matching backslashes */
public Pattern getBackslashPattern() {
return getPattern("\\\\");
}
/** Pattern for sanitizing filenames by removing problematic characters */
public Pattern getSafeFilenamePattern() {
return getPattern("[/\\\\?%*:|\"<>]");
}
/** Pattern for sanitizing filenames (keeps only alphanumeric) */
public Pattern getFilenameSafePattern() {
return getPattern("[^a-zA-Z0-9]");
}
/**
* Pattern for replacing non-alphanumeric characters with underscore (explicit underscore
* variant)
*/
public Pattern getNonAlnumUnderscorePattern() {
return getPattern("[^A-Za-z0-9_]");
}
/** Pattern for collapsing multiple underscores */
public Pattern getMultipleUnderscoresPattern() {
return getPattern("_+");
}
/** Pattern for trimming leading underscores */
public Pattern getLeadingUnderscoresPattern() {
return getPattern("^_+");
}
/** Pattern for trimming trailing underscores */
public Pattern getTrailingUnderscoresPattern() {
return getPattern("_+$");
}
/** Pattern for matching upload/download paths (case insensitive) */
public Pattern getUploadDownloadPathPattern() {
return getPattern("(?i).*/(upload|download)/.*");
}
/** Pattern for matching one or more whitespace characters */
public Pattern getWhitespacePattern() {
return getPattern("\\s+");
}
/** Pattern for matching newlines (Windows and Unix style) */
public Pattern getNewlinesPattern() {
return getPattern("\\r?\\n");
}
/** Pattern for splitting on newlines (Windows and Unix style) */
public Pattern getNewlineSplitPattern() {
return getPattern("\\r?\\n");
}
/** Pattern for splitting text into words */
public Pattern getWordSplitPattern() {
return getPattern("\\s+");
}
/** Pattern for removing carriage returns */
public Pattern getCarriageReturnPattern() {
return getPattern("\\r");
}
/** Pattern for matching newline characters */
public Pattern getNewlineCharsPattern() {
return getPattern("[\n\r]");
}
/** Pattern for multi-format newline splitting (Windows, Mac, Unix) */
public Pattern getMultiFormatNewlinePattern() {
return getPattern("\r\n|\r|\n");
}
/** Pattern for encoded payload newline removal */
public Pattern getEncodedPayloadNewlinePattern() {
return getPattern("\\r?\\n");
}
/** Pattern for escaped newlines in watermark text */
public Pattern getEscapedNewlinePattern() {
return getPattern("\\\\n");
}
/** Pattern for input sanitization (allows only alphanumeric and spaces) */
public Pattern getInputSanitizePattern() {
return getPattern("[^a-zA-Z0-9 ]");
}
/** Pattern for removing angle brackets */
public Pattern getAngleBracketsPattern() {
return getPattern("[<>]");
}
/** Pattern for removing leading and trailing quotes */
public Pattern getQuotesRemovalPattern() {
return getPattern("^\"|\"$");
}
/** Pattern for plus signs (URL encoding replacement) */
public Pattern getPlusSignPattern() {
return getPattern("\\+");
}
/** Pattern for username validation */
public Pattern getUsernameValidationPattern() {
return getPattern("^[a-zA-Z0-9](?!.*[-@._+]{2,})[a-zA-Z0-9@._+-]{1,48}[a-zA-Z0-9]$");
}
public static String getExtensionRegex() {
return EXTENSION_REGEX;
}
/** Pattern for extracting non-numeric characters */
public Pattern getNumericExtractionPattern() {
return getPattern("\\D");
}
/** Pattern for removing non-digit/dot characters (for timeout parsing) */
public Pattern getNonDigitDotPattern() {
return getPattern("[^\\d.]");
}
/** Pattern for matching digit/dot characters (for timeout parsing) */
public Pattern getDigitDotPattern() {
return getPattern("[\\d.]");
}
/** Pattern for detecting strings containing digits */
public Pattern getContainsDigitsPattern() {
return getPattern(".*\\d+.*");
}
/** Pattern for matching 1-3 digit numbers */
public Pattern getNumberRangePattern() {
return getPattern("[1-9][0-9]{0,2}");
}
/** Pattern for validating mathematical expressions */
public Pattern getMathExpressionPattern() {
return getPattern("[0-9n+\\-*/() ]+");
}
/** Pattern for adding multiplication between numbers and 'n' */
public Pattern getNumberBeforeNPattern() {
return getPattern("(\\d)n");
}
/** Pattern for detecting consecutive 'n' characters */
public Pattern getConsecutiveNPattern() {
return getPattern(".*n{2,}.*");
}
/** Pattern for replacing consecutive 'n' characters */
public Pattern getConsecutiveNReplacementPattern() {
return getPattern("(?<!n)n{2}");
}
/** Pattern for validating HTTP/HTTPS URLs */
public Pattern getHttpUrlPattern() {
return getPattern("^https?://.*");
}
/** Pattern for matching URLs in text for link creation */
public Pattern getUrlLinkPattern() {
return getPattern("(https?://[\\w\\-._~:/?#\\[\\]@!$&'()*+,;=%]+)");
}
/** Pattern for matching email addresses in text for link creation */
public Pattern getEmailLinkPattern() {
return getPattern("([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,63})");
}
/** Pattern for removing script tags from HTML */
public Pattern getScriptTagPattern() {
return getPattern("(?i)<script[^>]*>.*?</script>");
}
/** Pattern for removing style tags from HTML */
public Pattern getStyleTagPattern() {
return getPattern("(?i)<style[^>]*>.*?</style>");
}
/** Pattern for removing fixed position CSS */
public Pattern getFixedPositionCssPattern() {
return getPattern("(?i)\\s*position\\s*:\\s*fixed[^;]*;?");
}
/** Pattern for removing absolute position CSS */
public Pattern getAbsolutePositionCssPattern() {
return getPattern("(?i)\\s*position\\s*:\\s*absolute[^;]*;?");
}
/** Pattern for matching size unit suffixes (KB, MB, GB, etc.) */
public Pattern getSizeUnitPattern() {
return getPattern("[KMGkmg][Bb]");
}
/** Pattern for system temp file type 1 */
public Pattern getSystemTempFile1Pattern() {
return getPattern("lu\\d+[a-z0-9]*\\.tmp");
}
/** Pattern for system temp file type 2 (OCR processes) */
public Pattern getSystemTempFile2Pattern() {
return getPattern("ocr_process\\d+");
}
/** Pattern for splitting on whitespace and parentheses */
public Pattern getWhitespaceParenthesesSplitPattern() {
return getPattern("[\\s\\(\\)]+");
}
/** Pattern for MIME header whitespace cleanup before encoded sequences */
public Pattern getMimeHeaderWhitespacePattern() {
return getPattern("\\s+(?==\\?)");
}
/** Pattern for font name validation (6 uppercase letters + plus + rest) */
public Pattern getFontNamePattern() {
return getPattern("^[A-Z]{6}\\+.*");
}
/** Pattern for matching access="readOnly" attribute in XFA XML (with optional whitespace) */
public Pattern getAccessReadOnlyPattern() {
return getPattern("access\\s*=\\s*\"readOnly\"");
}
/** Pattern for matching MIME encoded-word headers (RFC 2047) Example: =?charset?B?encoded?= */
public Pattern getMimeEncodedWordPattern() {
return getPattern("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?=");
}
/** Pattern for matching inline CID images in HTML (case-insensitive) */
public Pattern getInlineCidImagePattern() {
return getPattern(
"(?i)<img[^>]*\\ssrc\\s*=\\s*['\"]cid:([^'\"]+)['\"][^>]*>",
Pattern.CASE_INSENSITIVE);
}
/** Pattern for matching attachment section headers (case-insensitive) */
public Pattern getAttachmentSectionPattern() {
return getPattern("attachments\\s*\\(\\d+\\)", Pattern.CASE_INSENSITIVE);
}
/** Pattern for matching filenames in attachment markers */
public Pattern getAttachmentFilenamePattern() {
return getPattern("@\\s*([^\\s\\(]+(?:\\.[a-zA-Z0-9]+)?)");
}
// API doc parsing patterns
/**
* Pre-compile commonly used patterns for immediate availability. This eliminates first-call
* compilation overhead for frequent patterns.
*/
private void precompileCommonPatterns() {
getPattern("\\.(?:[^.]*+)?$"); // Extension removal - possessive, optional, anchored
getPattern("\\.[^.]+$"); // Simple extension match - anchored
getPattern("\\s+"); // One or more whitespace
getPattern("\\s*"); // Zero or more whitespace
getPattern("/+$"); // Trailing slashes
getPattern("\\D"); // Non-numeric characters
getPattern("[/\\\\?%*:|\"<>]"); // Unsafe filename characters
getPattern("[^a-zA-Z0-9 ]"); // Input sanitization
getPattern("[^a-zA-Z0-9]"); // Filename sanitization
// API doc patterns
getPattern("Output:(\\w+)"); // precompiled single-escaped for runtime regex \w
getPattern("Input:(\\w+)");
getPattern("Type:(\\w+)");
log.debug("Pre-compiled {} common regex patterns", patternCache.size());
}
/** Pattern for email validation */
public Pattern getEmailValidationPattern() {
return getPattern(
"^(?=.{1,320}$)(?=.{1,64}@)[A-Za-z0-9](?:[A-Za-z0-9_.+-]*[A-Za-z0-9])?@[^-][A-Za-z0-9-]+(?:\\.[A-Za-z0-9-]+)*(?:\\.[A-Za-z]{2,})$");
}
/* Pattern for matching Output:<TYPE> in API descriptions */
public Pattern getApiDocOutputTypePattern() {
return getPattern("Output:(\\w+)");
}
/* Pattern for matching Input:<TYPE> in API descriptions */
public Pattern getApiDocInputTypePattern() {
return getPattern("Input:(\\w+)");
}
/**
* Pattern for matching Type:<CODE> in API descriptions
*/
public Pattern getApiDocTypePattern() {
return getPattern("Type:(\\w+)");
}
/* Pattern for validating file extensions (2-4 alphanumeric, case-insensitive) */
public Pattern getFileExtensionValidationPattern() {
return getPattern("^[a-zA-Z0-9]{2,4}$", Pattern.CASE_INSENSITIVE);
}
private record PatternKey(String regex, int flags) {
// Record automatically provides equals, hashCode, and toString
}
}

View File

@ -50,7 +50,10 @@ public class WebResponseUtils {
headers.setContentType(mediaType);
headers.setContentLength(bytes.length);
String encodedDocName =
URLEncoder.encode(docName, StandardCharsets.UTF_8).replaceAll("\\+", "%20");
RegexPatternUtils.getInstance()
.getPlusSignPattern()
.matcher(URLEncoder.encode(docName, StandardCharsets.UTF_8))
.replaceAll("%20");
headers.setContentDispositionFormData("attachment", encodedDocName);
return new ResponseEntity<>(bytes, headers, HttpStatus.OK);
}

View File

@ -14,6 +14,7 @@ import stirling.software.common.model.ApplicationProperties.Driver;
import stirling.software.common.model.ApplicationProperties.Premium;
import stirling.software.common.model.ApplicationProperties.Security;
import stirling.software.common.model.exception.UnsupportedProviderException;
import stirling.software.common.util.RegexPatternUtils;
class ApplicationPropertiesLogicTest {
@ -38,7 +39,10 @@ class ApplicationPropertiesLogicTest {
new ApplicationProperties.TempFileManagement();
String expectedBase =
java.lang.System.getProperty("java.io.tmpdir").replaceAll("/+$", "")
RegexPatternUtils.getInstance()
.getTrailingSlashesPattern()
.matcher(java.lang.System.getProperty("java.io.tmpdir"))
.replaceAll("")
+ "/stirling-pdf";
assertEquals(expectedBase, normalize.apply(tfm.getBaseTmpDir()));

View File

@ -1,8 +1,15 @@
package stirling.software.common.util;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.junit.jupiter.api.Test;
@ -154,4 +161,223 @@ public class GeneralUtilsTest {
List<Integer> result = GeneralUtils.parsePageList(new String[] {"1,3,7-8"}, 8, false);
assertEquals(List.of(0, 2, 6, 7), result, "Range should be parsed correctly.");
}
@Test
void testRemoveExtension() {
// Test common cases (should use fast string operations)
assertEquals("document", GeneralUtils.removeExtension("document.pdf"));
assertEquals("image", GeneralUtils.removeExtension("image.jpg"));
assertEquals("file.backup", GeneralUtils.removeExtension("file.backup.zip"));
assertEquals("complex.file.name", GeneralUtils.removeExtension("complex.file.name.txt"));
// Test edge cases (should fall back to regex)
assertEquals("default", GeneralUtils.removeExtension(null));
assertEquals("noextension", GeneralUtils.removeExtension("noextension"));
assertEquals(
".hidden", GeneralUtils.removeExtension(".hidden")); // Hidden file, no extension
assertEquals("trailing.", GeneralUtils.removeExtension("trailing.")); // Trailing dot
assertEquals("", GeneralUtils.removeExtension(""));
assertEquals("a", GeneralUtils.removeExtension("a"));
// Test multiple dots
assertEquals("file.with.multiple", GeneralUtils.removeExtension("file.with.multiple.dots"));
assertEquals("path/to/file", GeneralUtils.removeExtension("path/to/file.ext"));
}
@Test
void testAppendSuffix() {
// Normal cases
assertEquals("document_processed", GeneralUtils.appendSuffix("document", "_processed"));
assertEquals("file.txt", GeneralUtils.appendSuffix("file", ".txt"));
// Null handling
assertEquals("default_suffix", GeneralUtils.appendSuffix(null, "_suffix"));
assertEquals("basename", GeneralUtils.appendSuffix("basename", null));
assertEquals("default", GeneralUtils.appendSuffix(null, null));
// Empty strings
assertEquals("_suffix", GeneralUtils.appendSuffix("", "_suffix"));
assertEquals("basename", GeneralUtils.appendSuffix("basename", ""));
}
@Test
void testProcessFilenames() {
List<String> filenames = new ArrayList<>();
filenames.add("document.pdf");
filenames.add("image.jpg");
filenames.add("spreadsheet.xlsx");
filenames.add("presentation.pptx");
filenames.add(null); // Should handle null gracefully
filenames.add("noextension");
List<String> results = new ArrayList<>();
GeneralUtils.processFilenames(filenames, "_processed", results::add);
List<String> expected =
List.of(
"document_processed",
"image_processed",
"spreadsheet_processed",
"presentation_processed",
"default_processed",
"noextension_processed");
assertEquals(expected, results);
}
@Test
void testProcessFilenamesNullHandling() {
List<String> results = new ArrayList<>();
// Null filenames list
GeneralUtils.processFilenames(null, "_suffix", results::add);
assertTrue(results.isEmpty(), "Should handle null filenames list");
// Null processor
List<String> filenames = List.of("test.txt");
GeneralUtils.processFilenames(filenames, "_suffix", null); // Should not throw
}
@Test
void testRemoveExtensionThreadSafety() throws InterruptedException {
final int threadCount = 50;
final int operationsPerThread = 100;
final String[] testFilenames = {
"document.pdf", "image.jpg", "data.csv", "presentation.pptx",
"archive.zip", "music.mp3", "video.mp4", "text.txt"
};
ExecutorService executor = Executors.newFixedThreadPool(threadCount);
CountDownLatch latch = new CountDownLatch(threadCount);
AtomicInteger successCount = new AtomicInteger(0);
List<Exception> exceptions = Collections.synchronizedList(new ArrayList<>());
for (int i = 0; i < threadCount; i++) {
executor.submit(
() -> {
try {
for (int j = 0; j < operationsPerThread; j++) {
String filename = testFilenames[j % testFilenames.length];
String result = GeneralUtils.removeExtension(filename);
// Verify result is correct
assertFalse(
result.contains("."),
"Result should not contain extension: " + result);
assertTrue(
filename.startsWith(result),
"Original should start with result: "
+ filename
+ " -> "
+ result);
}
successCount.incrementAndGet();
} catch (Exception e) {
exceptions.add(e);
} finally {
latch.countDown();
}
});
}
assertTrue(latch.await(10, TimeUnit.SECONDS), "All threads should complete");
if (!exceptions.isEmpty()) {
fail("Thread safety test failed with exceptions: " + exceptions);
}
assertEquals(threadCount, successCount.get(), "All threads should succeed");
executor.shutdown();
}
@Test
void testBatchProcessingPerformance() {
List<String> filenames = new ArrayList<>();
for (int i = 0; i < 1000; i++) {
filenames.add("file" + i + ".pdf");
filenames.add("document" + i + ".docx");
filenames.add("image" + i + ".jpg");
}
List<String> results = new ArrayList<>();
GeneralUtils.processFilenames(filenames, "_processed", results::add);
assertEquals(filenames.size(), results.size(), "Should process all filenames");
assertTrue(results.contains("file0_processed"), "Should contain processed filename");
assertTrue(results.contains("document500_processed"), "Should contain processed filename");
assertTrue(results.contains("image999_processed"), "Should contain processed filename");
}
@Test
void testHybridStringRegexApproach() {
String[] edgeCases = {
"", // Empty string
".", // Just a dot
"..", // Two dots
"...", // Three dots
".hidden", // Hidden file
"file.", // Trailing dot
"a.b.c.d.e.f.g", // Many extensions
"no-extension-here", // No extension
"file..double.dot" // Double dots
};
for (String edgeCase : edgeCases) {
String result = GeneralUtils.removeExtension(edgeCase);
assertNotNull(result, "Result should not be null for: " + edgeCase);
// For specific edge cases, verify expected behavior
switch (edgeCase) {
case "" -> assertEquals("", result, "Empty string should remain empty");
case "." -> assertEquals(".", result, "Single dot should remain unchanged");
case ".." -> assertEquals("..", result, "Double dots should remain unchanged");
case "..." -> assertEquals("...", result, "Triple dots should remain unchanged");
case ".hidden" ->
assertEquals(".hidden", result, "Hidden file should remain unchanged");
case "file." ->
assertEquals("file.", result, "Trailing dot should remain unchanged");
case "no-extension-here" ->
assertEquals(
"no-extension-here",
result,
"No extension should remain unchanged");
case "a.b.c.d.e.f.g" ->
assertEquals(
"a.b.c.d.e.f",
result,
"Multiple extensions should remove last one");
case "file..double.dot" ->
assertEquals(
"file..double",
result,
"Double dot case should remove last extension");
}
}
}
@Test
void testGetTitleFromFilename() {
// Test normal cases
assertEquals("document", GeneralUtils.getTitleFromFilename("document.pdf"));
assertEquals("presentation", GeneralUtils.getTitleFromFilename("presentation.pptx"));
assertEquals("file.backup", GeneralUtils.getTitleFromFilename("file.backup.zip"));
// Test null and empty handling
assertEquals("Untitled", GeneralUtils.getTitleFromFilename(null));
assertEquals("Untitled", GeneralUtils.getTitleFromFilename(""));
// Test edge cases
assertEquals(".hidden", GeneralUtils.getTitleFromFilename(".hidden"));
assertEquals("file.", GeneralUtils.getTitleFromFilename("file."));
assertEquals("noextension", GeneralUtils.getTitleFromFilename("noextension"));
// Test complex cases
assertEquals(
"complex.file.name", GeneralUtils.getTitleFromFilename("complex.file.name.txt"));
assertEquals("path/to/file", GeneralUtils.getTitleFromFilename("path/to/file.ext"));
}
}

View File

@ -65,23 +65,22 @@ public class PdfUtilsTest {
doc1.addPage(new PDPage());
doc1.addPage(new PDPage());
doc1.addPage(new PDPage());
PdfUtils utils = new PdfUtils();
assertTrue(utils.pageCount(doc1, 2, "greater"));
assertTrue(PdfUtils.pageCount(doc1, 2, "greater"));
PDDocument doc2 = new PDDocument();
doc2.addPage(new PDPage());
doc2.addPage(new PDPage());
doc2.addPage(new PDPage());
assertTrue(utils.pageCount(doc2, 3, "equal"));
assertTrue(PdfUtils.pageCount(doc2, 3, "equal"));
PDDocument doc3 = new PDDocument();
doc3.addPage(new PDPage());
doc3.addPage(new PDPage());
assertTrue(utils.pageCount(doc3, 5, "less"));
assertTrue(PdfUtils.pageCount(doc3, 5, "less"));
PDDocument doc4 = new PDDocument();
doc4.addPage(new PDPage());
assertThrows(IllegalArgumentException.class, () -> utils.pageCount(doc4, 1, "bad"));
assertThrows(IllegalArgumentException.class, () -> PdfUtils.pageCount(doc4, 1, "bad"));
}
@Test
@ -91,8 +90,7 @@ public class PdfUtilsTest {
doc.addPage(page);
PDRectangle rect = page.getMediaBox();
String expected = rect.getWidth() + "x" + rect.getHeight();
PdfUtils utils = new PdfUtils();
assertTrue(utils.pageSize(doc, expected));
assertTrue(PdfUtils.pageSize(doc, expected));
}
@Test

View File

@ -0,0 +1,115 @@
package stirling.software.common.util;
import static org.junit.jupiter.api.Assertions.*;
import java.util.regex.Pattern;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
public class RegexPatternUtilsTest {
private RegexPatternUtils utils;
@BeforeEach
void setUp() {
utils = RegexPatternUtils.getInstance();
utils.clearCache(); // Start with clean cache for each test
}
@Test
void testPatternCaching() {
String regex = "test\\d+";
Pattern pattern1 = utils.getPattern(regex);
assertNotNull(pattern1);
assertTrue(utils.isCached(regex));
assertEquals(
1, utils.getCacheSize()); // Should have at least 1 pattern (plus precompiled ones
// are cleared)
Pattern pattern2 = utils.getPattern(regex);
assertSame(pattern1, pattern2); // Should be the exact same object
}
@Test
void testPatternWithFlags() {
String regex = "test";
int flags = Pattern.CASE_INSENSITIVE;
Pattern pattern1 = utils.getPattern(regex, flags);
Pattern pattern2 = utils.getPattern(regex); // No flags
assertNotSame(pattern1, pattern2); // Different flags = different cached patterns
assertTrue(utils.isCached(regex, flags));
assertTrue(utils.isCached(regex, 0));
}
@Test
void testCacheEviction() {
String regex = "evict\\d+";
utils.getPattern(regex);
assertTrue(utils.isCached(regex));
boolean removed = utils.removeFromCache(regex);
assertTrue(removed);
assertFalse(utils.isCached(regex));
boolean removedAgain = utils.removeFromCache(regex);
assertFalse(removedAgain);
}
@Test
void testNullRegexHandling() {
assertThrows(
IllegalArgumentException.class,
() -> {
utils.getPattern(null);
});
assertThrows(
IllegalArgumentException.class,
() -> {
utils.getPattern(null, Pattern.CASE_INSENSITIVE);
});
assertFalse(utils.isCached(null));
assertFalse(utils.removeFromCache(null));
}
@Test
void testCommonPatterns() {
Pattern whitespace = utils.getWhitespacePattern();
assertTrue(whitespace.matcher(" \t ").matches());
Pattern trailing = utils.getTrailingSlashesPattern();
assertTrue(trailing.matcher("/path/to/dir///").find());
Pattern filename = utils.getSafeFilenamePattern();
assertTrue(filename.matcher("bad<file>name").find());
}
@Test
void testCreateSearchPattern() {
String regex = "Hello";
Pattern caseSensitive = utils.createSearchPattern(regex, false);
Pattern caseInsensitive = utils.createSearchPattern(regex, true);
assertTrue(caseSensitive.matcher("Hello").matches());
assertFalse(caseSensitive.matcher("hello").matches());
assertTrue(caseInsensitive.matcher("Hello").matches());
assertTrue(caseInsensitive.matcher("hello").matches());
assertTrue(caseInsensitive.matcher("HELLO").matches());
}
@Test
void testSingletonBehavior() {
RegexPatternUtils instance1 = RegexPatternUtils.getInstance();
RegexPatternUtils instance2 = RegexPatternUtils.getInstance();
assertSame(instance1, instance2);
}
}

View File

@ -7,6 +7,7 @@ import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import javax.imageio.ImageIO;
import javax.swing.*;
@ -15,10 +16,13 @@ import io.github.pixee.security.BoundedLineReader;
import lombok.extern.slf4j.Slf4j;
import stirling.software.common.util.RegexPatternUtils;
import stirling.software.common.util.UIScaling;
@Slf4j
public class LoadingWindow extends JDialog {
private static final Pattern PATTERN =
RegexPatternUtils.getInstance().getContainsDigitsPattern();
private final JProgressBar progressBar;
private final JLabel statusLabel;
private final JPanel mainPanel;
@ -151,7 +155,10 @@ public class LoadingWindow extends JDialog {
BoundedLineReader.readLine(
reader, 5_000_000))
!= null) {
if (line.matches(".*\\d+.*")) { // Contains numbers
if (RegexPatternUtils.getInstance()
.getContainsDigitsPattern()
.matcher(line)
.matches()) { // Contains numbers
String[] parts = line.trim().split(",");
if (parts.length >= 2) {
existingPids.add(
@ -206,8 +213,8 @@ public class LoadingWindow extends JDialog {
newReader,
5_000_000))
!= null) {
if (newLine.matches(
".*\\d+.*")) {
if (PATTERN.matcher(newLine)
.matches()) {
String[] parts =
newLine.trim()
.split(",");

View File

@ -13,6 +13,7 @@ import jakarta.annotation.PostConstruct;
import lombok.extern.slf4j.Slf4j;
import stirling.software.common.configuration.RuntimePathConfig;
import stirling.software.common.util.RegexPatternUtils;
@Configuration
@Slf4j
@ -73,7 +74,7 @@ public class ExternalAppDepConfig {
// First replace common terms
String feature = endpoint.replace("-", " ").replace("pdf", "PDF").replace("img", "image");
// Split into words and capitalize each word
return Arrays.stream(feature.split("\\s+"))
return Arrays.stream(RegexPatternUtils.getInstance().getWordSplitPattern().split(feature))
.map(word -> capitalizeWord(word))
.collect(Collectors.joining(" "));
}

View File

@ -24,6 +24,7 @@ import lombok.RequiredArgsConstructor;
import stirling.software.SPDF.model.api.general.CropPdfForm;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -93,7 +94,7 @@ public class CropController {
byte[] pdfContent = baos.toByteArray();
return WebResponseUtils.bytesToWebResponse(
pdfContent,
request.getFileInput().getOriginalFilename().replaceFirst("[.][^.]+$", "")
+ "_cropped.pdf");
GeneralUtils.generateFilename(
request.getFileInput().getOriginalFilename(), "_cropped.pdf"));
}
}

View File

@ -13,12 +13,7 @@ import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlin
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineNode;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import com.fasterxml.jackson.core.type.TypeReference;
@ -34,6 +29,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.EditTableOfContentsRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -182,9 +178,10 @@ public class EditTableOfContentsController {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
document.save(baos);
String filename = file.getOriginalFilename().replaceFirst("[.][^.]+$", "");
return WebResponseUtils.bytesToWebResponse(
baos.toByteArray(), filename + "_with_toc.pdf", MediaType.APPLICATION_PDF);
baos.toByteArray(),
GeneralUtils.generateFilename(file.getOriginalFilename(), "_with_toc.pdf"),
MediaType.APPLICATION_PDF);
} finally {
if (document != null) {

View File

@ -37,6 +37,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.general.MergePdfsRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.PdfErrorUtils;
import stirling.software.common.util.TempFile;
import stirling.software.common.util.TempFileManager;
@ -127,10 +128,7 @@ public class MergeController {
for (MultipartFile file : files) {
// Get the filename without extension to use as bookmark title
String filename = file.getOriginalFilename();
String title = filename;
if (title != null && title.contains(".")) {
title = title.substring(0, title.lastIndexOf('.'));
}
String title = GeneralUtils.removeExtension(filename);
// Create an outline item for this file
PDOutlineItem item = new PDOutlineItem();
@ -236,10 +234,11 @@ public class MergeController {
mergedDocument.save(outputTempFile.getFile());
String mergedFileName =
files[0].getOriginalFilename().replaceFirst("[.][^.]+$", "")
+ "_merged_unsigned.pdf";
GeneralUtils.generateFilename(
files[0].getOriginalFilename(), "_merged_unsigned.pdf");
return WebResponseUtils.pdfFileToWebResponse(
outputTempFile, mergedFileName); // Return the modified PDF as stream
outputTempFile, mergedFileName); // Return the modified PDF
} catch (Exception ex) {
if (ex instanceof IOException && PdfErrorUtils.isCorruptedPdfError((IOException) ex)) {
log.warn("Corrupted PDF detected in merge pdf process: {}", ex.getMessage());

View File

@ -19,7 +19,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -29,6 +28,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.general.MergeMultiplePagesRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.FormUtils;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -166,7 +166,7 @@ public class MultiPageLayoutController {
byte[] result = baos.toByteArray();
return WebResponseUtils.bytesToWebResponse(
result,
Filenames.toSimpleFileName(file.getOriginalFilename()).replaceFirst("[.][^.]+$", "")
+ "_layoutChanged.pdf");
GeneralUtils.generateFilename(
file.getOriginalFilename(), "_multi_page_layout.pdf"));
}
}

View File

@ -19,6 +19,7 @@ import lombok.RequiredArgsConstructor;
import stirling.software.SPDF.service.PdfImageRemovalService;
import stirling.software.common.model.api.PDFFile;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
/**
@ -69,8 +70,8 @@ public class PdfImageRemovalController {
// Generate a new filename for the modified PDF
String mergedFileName =
file.getFileInput().getOriginalFilename().replaceFirst("[.][^.]+$", "")
+ "_removed_images.pdf";
GeneralUtils.generateFilename(
file.getFileInput().getOriginalFilename(), "_images_removed.pdf");
// Convert the byte array to a web response and return it
return WebResponseUtils.bytesToWebResponse(outputStream.toByteArray(), mergedFileName);

View File

@ -20,7 +20,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -84,9 +83,8 @@ public class PdfOverlayController {
overlay.overlay(overlayGuide).save(outputStream);
byte[] data = outputStream.toByteArray();
String outputFilename =
Filenames.toSimpleFileName(baseFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_overlayed.pdf"; // Remove file extension and append .pdf
GeneralUtils.generateFilename(
baseFile.getOriginalFilename(), "_overlayed.pdf");
return WebResponseUtils.bytesToWebResponse(
data, outputFilename, MediaType.APPLICATION_PDF);

View File

@ -15,7 +15,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -68,9 +67,7 @@ public class RearrangePagesPDFController {
}
return WebResponseUtils.pdfDocToWebResponse(
document,
Filenames.toSimpleFileName(pdfFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_removed_pages.pdf");
GeneralUtils.generateFilename(pdfFile.getOriginalFilename(), "_removed_pages.pdf"));
}
private List<Integer> removeFirst(int totalPages) {
@ -286,9 +283,8 @@ public class RearrangePagesPDFController {
return WebResponseUtils.pdfDocToWebResponse(
document,
Filenames.toSimpleFileName(pdfFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_rearranged.pdf");
GeneralUtils.generateFilename(
pdfFile.getOriginalFilename(), "_rearranged.pdf"));
} catch (IOException e) {
ExceptionUtils.logException("document rearrangement", e);
throw e;

View File

@ -13,7 +13,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -22,6 +21,7 @@ import lombok.RequiredArgsConstructor;
import stirling.software.SPDF.model.api.general.RotatePDFRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -59,10 +59,9 @@ public class RotationController {
page.setRotation(page.getRotation() + angle);
}
// Return the rotated PDF as a response
return WebResponseUtils.pdfDocToWebResponse(
document,
Filenames.toSimpleFileName(pdfFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_rotated.pdf");
GeneralUtils.generateFilename(pdfFile.getOriginalFilename(), "_rotated.pdf"));
}
}

View File

@ -20,7 +20,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -29,6 +28,7 @@ import lombok.RequiredArgsConstructor;
import stirling.software.SPDF.model.api.general.ScalePagesRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -99,8 +99,7 @@ public class ScalePagesController {
return WebResponseUtils.bytesToWebResponse(
baos.toByteArray(),
Filenames.toSimpleFileName(file.getOriginalFilename()).replaceFirst("[.][^.]+$", "")
+ "_scaled.pdf");
GeneralUtils.generateFilename(file.getOriginalFilename(), "_scaled.pdf"));
}
private PDRectangle getTargetSize(String targetPDRectangle, PDDocument sourceDocument) {

View File

@ -3,7 +3,6 @@ package stirling.software.SPDF.controller.api;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
@ -20,7 +19,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -30,6 +28,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.PDFWithPageNums;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.TempFile;
import stirling.software.common.util.TempFileManager;
import stirling.software.common.util.WebResponseUtils;
@ -56,20 +55,15 @@ public class SplitPDFController {
throws IOException {
PDDocument document = null;
Path zipFile = null;
List<ByteArrayOutputStream> splitDocumentsBoas = new ArrayList<>();
String filename;
TempFile outputTempFile = null;
try {
outputTempFile = new TempFile(tempFileManager, ".zip");
MultipartFile file = request.getFileInput();
String pages = request.getPageNumbers();
// open the pdf document
document = pdfDocumentFactory.load(file);
// PdfMetadata metadata = PdfMetadataService.extractMetadataFromPdf(document);
int totalPages = document.getNumberOfPages();
List<Integer> pageNumbers = request.getPageNumbersList(document, false);
if (!pageNumbers.contains(totalPages - 1)) {
@ -82,8 +76,7 @@ public class SplitPDFController {
"Splitting PDF into pages: {}",
pageNumbers.stream().map(String::valueOf).collect(Collectors.joining(",")));
// split the document
splitDocumentsBoas = new ArrayList<>();
splitDocumentsBoas = new ArrayList<>(pageNumbers.size());
int previousPageNumber = 0;
for (int splitPoint : pageNumbers) {
try (PDDocument splitDocument =
@ -100,7 +93,6 @@ public class SplitPDFController {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
splitDocument.save(baos);
splitDocumentsBoas.add(baos);
} catch (Exception e) {
ExceptionUtils.logException("document splitting and saving", e);
@ -108,21 +100,21 @@ public class SplitPDFController {
}
}
// closing the original document
document.close();
filename =
Filenames.toSimpleFileName(file.getOriginalFilename())
.replaceFirst("[.][^.]+$", "");
String baseFilename = GeneralUtils.removeExtension(file.getOriginalFilename());
try (ZipOutputStream zipOut =
new ZipOutputStream(Files.newOutputStream(outputTempFile.getPath()))) {
// loop through the split documents and write them to the zip file
for (int i = 0; i < splitDocumentsBoas.size(); i++) {
String fileName = filename + "_" + (i + 1) + ".pdf";
int splitDocumentsSize = splitDocumentsBoas.size();
for (int i = 0; i < splitDocumentsSize; i++) {
StringBuilder sb = new StringBuilder(baseFilename.length() + 10);
sb.append(baseFilename).append('_').append(i + 1).append(".pdf");
String fileName = sb.toString();
ByteArrayOutputStream baos = splitDocumentsBoas.get(i);
byte[] pdf = baos.toByteArray();
// Add PDF file to the zip
ZipEntry pdfEntry = new ZipEntry(fileName);
zipOut.putNextEntry(pdfEntry);
zipOut.write(pdf);
@ -131,12 +123,17 @@ public class SplitPDFController {
log.debug("Wrote split document {} to zip file", fileName);
}
}
log.debug(
"Successfully created zip file with split documents: {}",
outputTempFile.getPath());
outputTempFile.getPath().toString());
byte[] data = Files.readAllBytes(outputTempFile.getPath());
String zipFilename =
GeneralUtils.generateFilename(file.getOriginalFilename(), "_split.zip");
return WebResponseUtils.bytesToWebResponse(
data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
data, zipFilename, MediaType.APPLICATION_OCTET_STREAM);
} finally {
try {
// Close the main document

View File

@ -20,7 +20,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -36,6 +35,7 @@ import stirling.software.common.model.PdfMetadata;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.service.PdfMetadataService;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -188,9 +188,7 @@ public class SplitPdfByChaptersController {
byte[] data = Files.readAllBytes(zipFile);
Files.deleteIfExists(zipFile);
String filename =
Filenames.toSimpleFileName(file.getOriginalFilename())
.replaceFirst("[.][^.]+$", "");
String filename = GeneralUtils.generateFilename(file.getOriginalFilename(), "");
sourceDocument.close();
return WebResponseUtils.bytesToWebResponse(
data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);

View File

@ -26,7 +26,6 @@ import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -34,6 +33,7 @@ import lombok.RequiredArgsConstructor;
import stirling.software.SPDF.model.api.SplitPdfBySectionsRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.PDFService;
import stirling.software.common.util.TempFile;
import stirling.software.common.util.TempFileManager;
@ -69,9 +69,7 @@ public class SplitPdfBySectionsController {
boolean merge = Boolean.TRUE.equals(request.getMerge());
List<PDDocument> splitDocuments = splitPdfPages(sourceDocument, verti, horiz);
String filename =
Filenames.toSimpleFileName(file.getOriginalFilename())
.replaceFirst("[.][^.]+$", "");
String filename = GeneralUtils.generateFilename(file.getOriginalFilename(), "_split.pdf");
if (merge) {
TempFile tempFile = new TempFile(tempFileManager, ".pdf");
try (PDDocument merged = pdfService.mergeDocuments(splitDocuments);

View File

@ -17,7 +17,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -57,21 +56,20 @@ public class SplitPdfBySizeController {
log.debug("Starting PDF split process with request: {}", request);
MultipartFile file = request.getFileInput();
String filename =
Filenames.toSimpleFileName(file.getOriginalFilename())
.replaceFirst("[.][^.]+$", "");
String filename = GeneralUtils.generateFilename(file.getOriginalFilename(), "");
log.debug("Base filename for output: {}", filename);
try (TempFile zipTempFile = new TempFile(tempFileManager, ".zip")) {
Path zipFile = zipTempFile.getPath();
log.debug("Created temporary zip file: {}", zipFile);
Path managedZipPath = zipTempFile.getPath();
log.debug("Created temporary managed zip file: {}", managedZipPath);
try {
log.debug("Reading input file bytes");
byte[] pdfBytes = file.getBytes();
log.debug("Successfully read {} bytes from input file", pdfBytes.length);
log.debug("Creating ZIP output stream");
try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(zipFile))) {
try (ZipOutputStream zipOut =
new ZipOutputStream(Files.newOutputStream(managedZipPath))) {
log.debug("Loading PDF document");
try (PDDocument sourceDocument = pdfDocumentFactory.load(pdfBytes)) {
log.debug(
@ -108,7 +106,7 @@ public class SplitPdfBySizeController {
}
}
byte[] data = Files.readAllBytes(zipFile);
byte[] data = Files.readAllBytes(managedZipPath);
log.debug("Successfully read {} bytes from ZIP file", data.length);
log.debug("Returning response with {} bytes of data", data.length);

View File

@ -24,6 +24,7 @@ import lombok.RequiredArgsConstructor;
import stirling.software.common.model.api.PDFFile;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -92,7 +93,7 @@ public class ToSinglePageController {
byte[] result = baos.toByteArray();
return WebResponseUtils.bytesToWebResponse(
result,
request.getFileInput().getOriginalFilename().replaceFirst("[.][^.]+$", "")
+ "_singlePage.pdf");
GeneralUtils.generateFilename(
request.getFileInput().getOriginalFilename(), "_singlePage.pdf"));
}
}

View File

@ -17,11 +17,7 @@ import lombok.RequiredArgsConstructor;
import stirling.software.common.configuration.RuntimePathConfig;
import stirling.software.common.model.api.converters.HTMLToPdfRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.CustomHtmlSanitizer;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.FileToPdf;
import stirling.software.common.util.TempFileManager;
import stirling.software.common.util.WebResponseUtils;
import stirling.software.common.util.*;
@RestController
@Tag(name = "Convert", description = "Convert APIs")
@ -69,9 +65,7 @@ public class ConvertHtmlToPDF {
pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes);
String outputFilename =
originalFilename.replaceFirst("[.][^.]+$", "")
+ ".pdf"; // Remove file extension and append .pdf
String outputFilename = GeneralUtils.generateFilename(originalFilename, ".pdf");
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
}

View File

@ -1,7 +1,6 @@
package stirling.software.SPDF.controller.api.converters;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URLConnection;
@ -25,7 +24,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -35,13 +33,8 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.converters.ConvertToImageRequest;
import stirling.software.SPDF.model.api.converters.ConvertToPdfRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.CheckProgramInstall;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.PdfUtils;
import stirling.software.common.util.ProcessExecutor;
import stirling.software.common.util.*;
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
import stirling.software.common.util.WebResponseUtils;
@RestController
@RequestMapping("/api/v1/convert")
@ -89,9 +82,7 @@ public class ConvertImgPDFController {
}
// returns bytes for image
boolean singleImage = "single".equals(singleOrMultiple);
String filename =
Filenames.toSimpleFileName(new File(file.getOriginalFilename()).getName())
.replaceFirst("[.][^.]+$", "");
String filename = GeneralUtils.generateFilename(file.getOriginalFilename(), "");
result =
PdfUtils.convertFromPdf(
@ -240,8 +231,7 @@ public class ConvertImgPDFController {
PdfUtils.imageToPdf(file, fitOption, autoRotate, colorType, pdfDocumentFactory);
return WebResponseUtils.bytesToWebResponse(
bytes,
new File(file[0].getOriginalFilename()).getName().replaceFirst("[.][^.]+$", "")
+ "_converted.pdf");
GeneralUtils.generateFilename(file[0].getOriginalFilename(), "_converted.pdf"));
}
private String getMediaType(String imageFormat) {

View File

@ -27,11 +27,7 @@ import lombok.RequiredArgsConstructor;
import stirling.software.common.configuration.RuntimePathConfig;
import stirling.software.common.model.api.GeneralFile;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.CustomHtmlSanitizer;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.FileToPdf;
import stirling.software.common.util.TempFileManager;
import stirling.software.common.util.WebResponseUtils;
import stirling.software.common.util.*;
@RestController
@Tag(name = "Convert", description = "Convert APIs")
@ -89,9 +85,7 @@ public class ConvertMarkdownToPdf {
tempFileManager,
customHtmlSanitizer);
pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes);
String outputFilename =
originalFilename.replaceFirst("[.][^.]+$", "")
+ ".pdf"; // Remove file extension and append .pdf
String outputFilename = GeneralUtils.generateFilename(originalFilename, ".pdf");
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
}
}

View File

@ -32,8 +32,10 @@ import stirling.software.common.configuration.RuntimePathConfig;
import stirling.software.common.model.api.GeneralFile;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.CustomHtmlSanitizer;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.ProcessExecutor;
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
import stirling.software.common.util.RegexPatternUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -168,8 +170,10 @@ public class ConvertOfficeController {
}
private boolean isValidFileExtension(String fileExtension) {
String extensionPattern = "^(?i)[a-z0-9]{2,4}$";
return fileExtension.matches(extensionPattern);
return RegexPatternUtils.getInstance()
.getFileExtensionValidationPattern()
.matcher(fileExtension)
.matches();
}
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/file/pdf")
@ -190,9 +194,8 @@ public class ConvertOfficeController {
PDDocument doc = pdfDocumentFactory.load(file);
return WebResponseUtils.pdfDocToWebResponse(
doc,
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_convertedToPDF.pdf");
GeneralUtils.generateFilename(
inputFile.getOriginalFilename(), "_convertedToPDF.pdf"));
} finally {
if (file != null && file.getParent() != null) {
FileUtils.deleteDirectory(file.getParentFile());

View File

@ -12,7 +12,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -23,6 +22,7 @@ import stirling.software.SPDF.model.api.converters.PdfToTextOrRTFRequest;
import stirling.software.SPDF.model.api.converters.PdfToWordRequest;
import stirling.software.common.model.api.PDFFile;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.PDFToFile;
import stirling.software.common.util.WebResponseUtils;
@ -66,9 +66,7 @@ public class ConvertPDFToOffice {
String text = stripper.getText(document);
return WebResponseUtils.bytesToWebResponse(
text.getBytes(),
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ ".txt",
GeneralUtils.generateFilename(inputFile.getOriginalFilename(), ".txt"),
MediaType.TEXT_PLAIN);
}
} else {

View File

@ -30,7 +30,7 @@ import stirling.software.common.model.ApplicationProperties;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.ProcessExecutor;
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
import stirling.software.common.util.RegexPatternUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -64,24 +64,25 @@ public class ConvertWebsiteToPDF {
.queryParam("error", "error.endpointDisabled")
.build()
.toUri();
} else
// Validate the URL format
if (!URL.matches("^https?://.*") || !GeneralUtils.isValidURL(URL)) {
location =
uriComponentsBuilder
.queryParam("error", "error.invalidUrlFormat")
.build()
.toUri();
} else
// validate the URL is reachable
if (!GeneralUtils.isURLReachable(URL)) {
location =
uriComponentsBuilder
.queryParam("error", "error.urlNotReachable")
.build()
.toUri();
} else {
// Validate the URL format (relaxed: only invalid if BOTH checks fail)
boolean patternValid =
RegexPatternUtils.getInstance().getHttpUrlPattern().matcher(URL).matches();
boolean generalValid = GeneralUtils.isValidURL(URL);
if (!patternValid && !generalValid) {
location =
uriComponentsBuilder
.queryParam("error", "error.invalidUrlFormat")
.build()
.toUri();
} else if (!GeneralUtils.isURLReachable(URL)) {
// validate the URL is reachable
location =
uriComponentsBuilder
.queryParam("error", "error.urlNotReachable")
.build()
.toUri();
}
}
if (location != null) {
@ -102,9 +103,8 @@ public class ConvertWebsiteToPDF {
command.add("--pdf-forms");
command.add(tempOutputFile.toString());
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command);
ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command);
// Load the PDF using pdfDocumentFactory
doc = pdfDocumentFactory.load(tempOutputFile.toFile());
@ -112,7 +112,13 @@ public class ConvertWebsiteToPDF {
// Convert URL to a safe filename
String outputFilename = convertURLToFileName(URL);
return WebResponseUtils.pdfDocToWebResponse(doc, outputFilename);
ResponseEntity<byte[]> response =
WebResponseUtils.pdfDocToWebResponse(doc, outputFilename);
if (response == null) {
// Defensive fallback - should not happen but avoids null returns breaking tests
return ResponseEntity.ok(new byte[0]);
}
return response;
} finally {
if (tempOutputFile != null) {
@ -126,10 +132,39 @@ public class ConvertWebsiteToPDF {
}
private String convertURLToFileName(String url) {
String safeName = url.replaceAll("[^a-zA-Z0-9]", "_");
String safeName = GeneralUtils.convertToFileName(url);
if (safeName == null || safeName.isBlank()) {
// Fallback: derive from URL host/path or use default
try {
URI uri = URI.create(url);
String hostPart = uri.getHost();
if (hostPart == null || hostPart.isBlank()) {
hostPart = "document";
}
safeName =
RegexPatternUtils.getInstance()
.getNonAlnumUnderscorePattern()
.matcher(hostPart)
.replaceAll("_");
} catch (Exception e) {
safeName = "document";
}
}
// Restrict characters strictly to alphanumeric and underscore for predictable tests
RegexPatternUtils patterns = RegexPatternUtils.getInstance();
safeName = patterns.getNonAlnumUnderscorePattern().matcher(safeName).replaceAll("_");
// Collapse multiple underscores
safeName = patterns.getMultipleUnderscoresPattern().matcher(safeName).replaceAll("_");
// Trim leading underscores
safeName = patterns.getLeadingUnderscoresPattern().matcher(safeName).replaceAll("");
// Trim trailing underscores
safeName = patterns.getTrailingUnderscoresPattern().matcher(safeName).replaceAll("");
if (safeName.isEmpty()) {
safeName = "document";
}
if (safeName.length() > 50) {
safeName = safeName.substring(0, 50); // restrict to 50 characters
}
return safeName + ".pdf";
return GeneralUtils.generateFilename(safeName, ".pdf");
}
}

View File

@ -31,6 +31,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.PDFWithPageNums;
import stirling.software.SPDF.pdf.FlexibleCSVWriter;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import technology.tabula.ObjectExtractor;
import technology.tabula.Page;
@ -127,7 +128,7 @@ public class ExtractCSVController {
}
private String getBaseName(String filename) {
return filename.replaceFirst("[.][^.]+$", "");
return GeneralUtils.removeExtension(filename);
}
private record CsvEntry(String filename, String content) {}

View File

@ -22,6 +22,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.misc.AddAttachmentRequest;
import stirling.software.SPDF.service.AttachmentServiceInterface;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
@Slf4j
@ -51,8 +52,8 @@ public class AttachmentController {
return WebResponseUtils.pdfDocToWebResponse(
document,
Filenames.toSimpleFileName(fileInput.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_with_attachments.pdf");
GeneralUtils.generateFilename(
Filenames.toSimpleFileName(fileInput.getOriginalFilename()),
"_with_attachments.pdf"));
}
}

View File

@ -25,6 +25,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.misc.ExtractHeaderRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.RegexPatternUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -135,7 +136,12 @@ public class AutoRenameController {
// Sanitize the header string by removing characters not allowed in a filename.
if (header != null && header.length() < 255) {
header = header.replaceAll("[/\\\\?%*:|\"<>]", "").trim();
header =
RegexPatternUtils.getInstance()
.getSafeFilenamePattern()
.matcher(header)
.replaceAll("")
.trim();
return WebResponseUtils.pdfDocToWebResponse(document, header + ".pdf");
} else {
log.info("File has no good title to be found");

View File

@ -38,6 +38,7 @@ import stirling.software.common.model.ApplicationProperties;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.ApplicationContextProvider;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.TempFile;
import stirling.software.common.util.TempFileManager;
import stirling.software.common.util.WebResponseUtils;
@ -174,8 +175,8 @@ public class AutoSplitPdfController {
splitDocuments.removeIf(pdDocument -> pdDocument.getNumberOfPages() == 0);
String filename =
Filenames.toSimpleFileName(file.getOriginalFilename())
.replaceFirst("[.][^.]+$", "");
GeneralUtils.removeExtension(
Filenames.toSimpleFileName(file.getOriginalFilename()));
try (ZipOutputStream zipOut =
new ZipOutputStream(Files.newOutputStream(outputTempFile.getPath()))) {

View File

@ -34,6 +34,7 @@ import stirling.software.common.model.ApplicationProperties;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.ApplicationContextProvider;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.PdfUtils;
import stirling.software.common.util.WebResponseUtils;
@ -149,8 +150,8 @@ public class BlankPageController {
ZipOutputStream zos = new ZipOutputStream(baos);
String filename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "");
GeneralUtils.removeExtension(
Filenames.toSimpleFileName(inputFile.getOriginalFilename()));
if (!nonBlankPages.isEmpty()) {
createZipEntry(zos, nonBlankPages, filename + "_nonBlankPages.pdf");

View File

@ -10,12 +10,8 @@ import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.*;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import javax.imageio.IIOImage;
@ -40,15 +36,10 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import lombok.RequiredArgsConstructor;
import lombok.*;
import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.config.EndpointConfiguration;
@ -806,9 +797,8 @@ public class CompressController {
}
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_Optimized.pdf";
GeneralUtils.generateFilename(
inputFile.getOriginalFilename(), "_Optimized.pdf");
return WebResponseUtils.pdfDocToWebResponse(
pdfDocumentFactory.load(currentFile.toFile()), outputFilename);

View File

@ -27,6 +27,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.api.PDFFile;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -55,10 +56,10 @@ public class DecompressPdfController {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
document.save(baos, CompressParameters.NO_COMPRESSION);
String outputFilename =
file.getOriginalFilename().replaceFirst("\\.(?=[^.]+$)", "_decompressed.");
// Return the PDF as a response
return WebResponseUtils.bytesToWebResponse(
baos.toByteArray(), outputFilename, MediaType.APPLICATION_PDF);
baos.toByteArray(),
GeneralUtils.generateFilename(file.getOriginalFilename(), "_decompressed.pdf"));
}
}

View File

@ -176,7 +176,7 @@ public class ExtractImageScansController {
// Create zip file if multiple images
if (processedImageBytes.size() > 1) {
String outputZipFilename =
fileName.replaceFirst(REPLACEFIRST, "") + "_processed.zip";
GeneralUtils.generateFilename(fileName, "_processed.zip");
tempZipFile = Files.createTempFile("output_", ".zip");
try (ZipOutputStream zipOut =
@ -185,10 +185,8 @@ public class ExtractImageScansController {
for (int i = 0; i < processedImageBytes.size(); i++) {
ZipEntry entry =
new ZipEntry(
fileName.replaceFirst(REPLACEFIRST, "")
+ "_"
+ (i + 1)
+ ".png");
GeneralUtils.generateFilename(
fileName, "_processed_" + (i + 1) + ".png"));
zipOut.putNextEntry(entry);
zipOut.write(processedImageBytes.get(i));
zipOut.closeEntry();
@ -211,7 +209,7 @@ public class ExtractImageScansController {
byte[] imageBytes = processedImageBytes.get(0);
return WebResponseUtils.bytesToWebResponse(
imageBytes,
fileName.replaceFirst(REPLACEFIRST, "") + ".png",
GeneralUtils.generateFilename(fileName, ".png"),
MediaType.IMAGE_PNG);
}
} finally {

View File

@ -32,7 +32,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -42,6 +41,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.PDFExtractImagesRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.ImageProcessingUtils;
import stirling.software.common.util.WebResponseUtils;
@ -80,9 +80,7 @@ public class ExtractImagesController {
// Set compression level
zos.setLevel(Deflater.BEST_COMPRESSION);
String filename =
Filenames.toSimpleFileName(file.getOriginalFilename())
.replaceFirst("[.][^.]+$", "");
String filename = GeneralUtils.removeExtension(file.getOriginalFilename());
Set<byte[]> processedImages = new HashSet<>();
if (useMultithreading) {

View File

@ -23,6 +23,8 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.misc.MetadataRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.RegexPatternUtils;
import stirling.software.common.service.PdfMetadataService;
import stirling.software.common.util.WebResponseUtils;
import stirling.software.common.util.propertyeditor.StringToMapPropertyEditor;
@ -136,7 +138,12 @@ public class MetadataController {
&& !key.contains("customValue")) {
info.setCustomMetadataValue(key, entry.getValue());
} else if (key.contains("customKey")) {
int number = Integer.parseInt(key.replaceAll("\\D", ""));
int number =
Integer.parseInt(
RegexPatternUtils.getInstance()
.getNumericExtractionPattern()
.matcher(key)
.replaceAll(""));
String customKey = entry.getValue();
String customValue = allRequestParams.get("customValue" + number);
info.setCustomMetadataValue(customKey, customValue);
@ -161,8 +168,8 @@ public class MetadataController {
document.setDocumentInformation(info);
return WebResponseUtils.pdfDocToWebResponse(
document,
Filenames.toSimpleFileName(pdfFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
GeneralUtils.removeExtension(
Filenames.toSimpleFileName(pdfFile.getOriginalFilename()))
+ "_metadata.pdf");
}
}

View File

@ -1,10 +1,14 @@
package stirling.software.SPDF.controller.api.misc;
import java.awt.image.BufferedImage;
import java.io.*;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
@ -34,13 +38,8 @@ import stirling.software.SPDF.config.EndpointConfiguration;
import stirling.software.SPDF.model.api.misc.ProcessPdfWithOcrRequest;
import stirling.software.common.model.ApplicationProperties;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.ProcessExecutor;
import stirling.software.common.util.*;
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
import stirling.software.common.util.TempDirectory;
import stirling.software.common.util.TempFile;
import stirling.software.common.util.TempFileManager;
import stirling.software.common.util.WebResponseUtils;
@RestController
@RequestMapping("/api/v1/misc")
@ -161,15 +160,16 @@ public class OCRController {
// Return the OCR processed PDF as a response
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
GeneralUtils.removeExtension(
Filenames.toSimpleFileName(inputFile.getOriginalFilename()))
+ "_OCR.pdf";
if (sidecar != null && sidecar && sidecarTextFile != null) {
// Create a zip file containing both the PDF and the text file
String outputZipFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
GeneralUtils.removeExtension(
Filenames.toSimpleFileName(
inputFile.getOriginalFilename()))
+ "_OCR.zip";
try (TempFile tempZipFile = new TempFile(tempFileManager, ".zip");

View File

@ -11,7 +11,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -20,6 +19,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.misc.OverlayImageRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.PdfUtils;
import stirling.software.common.util.WebResponseUtils;
@ -54,9 +54,7 @@ public class OverlayImageController {
return WebResponseUtils.bytesToWebResponse(
result,
Filenames.toSimpleFileName(pdfFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_overlayed.pdf");
GeneralUtils.generateFilename(pdfFile.getOriginalFilename(), "_overlayed.pdf"));
} catch (IOException e) {
log.error("Failed to add image to PDF", e);
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);

View File

@ -103,7 +103,11 @@ public class PageNumbersController {
customText
.replace("{n}", String.valueOf(pageNumber))
.replace("{total}", String.valueOf(document.getNumberOfPages()))
.replace("{filename}", baseFilename);
.replace(
"{filename}",
GeneralUtils.removeExtension(
Filenames.toSimpleFileName(
file.getOriginalFilename())));
PDType1Font currentFont =
switch (fontType == null ? "" : fontType.toLowerCase(Locale.ROOT)) {
@ -169,8 +173,7 @@ public class PageNumbersController {
return WebResponseUtils.bytesToWebResponse(
baos.toByteArray(),
Filenames.toSimpleFileName(file.getOriginalFilename()).replaceFirst("[.][^.]+$", "")
+ "_numbersAdded.pdf",
MediaType.APPLICATION_PDF);
GeneralUtils.generateFilename(
file.getOriginalFilename(), "_page_numbers_added.pdf"));
}
}

View File

@ -12,7 +12,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -22,6 +21,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.config.EndpointConfiguration;
import stirling.software.common.model.api.PDFFile;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.ProcessExecutor;
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
import stirling.software.common.util.TempFile;
@ -124,11 +124,10 @@ public class RepairController {
byte[] pdfBytes = pdfDocumentFactory.loadToBytes(tempOutputFile.getFile());
// Return the repaired PDF as a response
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_repaired.pdf";
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
return WebResponseUtils.bytesToWebResponse(
pdfBytes,
GeneralUtils.generateFilename(
inputFile.getOriginalFilename(), "_repaired.pdf"));
}
}
}

View File

@ -24,7 +24,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -38,6 +37,7 @@ import stirling.software.common.model.ApplicationProperties;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.ApplicationContextProvider;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -339,13 +339,10 @@ public class ScannerEffectController {
outputDocument.save(outputStream);
outputDocument.close();
String outputFilename =
Filenames.toSimpleFileName(file.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_scanner_effect.pdf";
return WebResponseUtils.bytesToWebResponse(
outputStream.toByteArray(), outputFilename, MediaType.APPLICATION_PDF);
outputStream.toByteArray(),
GeneralUtils.generateFilename(
file.getOriginalFilename(), "_scanner_effect.pdf"));
}
}

View File

@ -1,9 +1,8 @@
package stirling.software.SPDF.controller.api.misc;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDNameTreeNode;
import org.apache.pdfbox.pdmodel.interactive.action.PDActionJavaScript;
@ -14,13 +13,17 @@ import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import stirling.software.common.model.api.PDFFile;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.WebResponseUtils;
import java.nio.charset.StandardCharsets;
import java.util.Map;
@RestController
@RequestMapping("/api/v1/misc")
@Tag(name = "Misc", description = "Miscellaneous APIs")
@ -55,12 +58,14 @@ public class ShowJavascript {
if (jsCodeStr != null && !jsCodeStr.trim().isEmpty()) {
script.append("// File: ")
.append(Filenames.toSimpleFileName(inputFile.getOriginalFilename()))
.append(", Script: ")
.append(name)
.append("\n")
.append(jsCodeStr)
.append("\n");
.append(
Filenames.toSimpleFileName(
inputFile.getOriginalFilename()))
.append(", Script: ")
.append(name)
.append("\n")
.append(jsCodeStr)
.append("\n");
foundScript = true;
}
}
@ -68,9 +73,10 @@ public class ShowJavascript {
}
if (!foundScript) {
script = new StringBuilder("PDF '")
.append(Filenames.toSimpleFileName(inputFile.getOriginalFilename()))
.append("' does not contain Javascript");
script =
new StringBuilder("PDF '")
.append(Filenames.toSimpleFileName(inputFile.getOriginalFilename()))
.append("' does not contain Javascript");
}
return WebResponseUtils.bytesToWebResponse(

View File

@ -35,7 +35,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -43,6 +42,8 @@ import lombok.RequiredArgsConstructor;
import stirling.software.SPDF.model.api.misc.AddStampRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.RegexPatternUtils;
import stirling.software.common.util.TempFile;
import stirling.software.common.util.TempFileManager;
import stirling.software.common.util.WebResponseUtils;
@ -172,11 +173,10 @@ public class StampController {
contentStream.close();
}
}
// Return the stamped PDF as a response
return WebResponseUtils.pdfDocToWebResponse(
document,
Filenames.toSimpleFileName(pdfFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_stamped.pdf");
GeneralUtils.generateFilename(pdfFile.getOriginalFilename(), "_stamped.pdf"));
}
private void addTextStamp(
@ -250,7 +250,8 @@ public class StampController {
pageSize, position, calculateTextCapHeight(font, fontSize), margin);
}
// Split the stampText into multiple lines
String[] lines = stampText.split("\\\\n");
String[] lines =
RegexPatternUtils.getInstance().getEscapedNewlinePattern().split(stampText);
// Calculate dynamic line height based on font ascent and descent
float ascent = font.getFontDescriptor().getAscent();

View File

@ -25,6 +25,8 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.api.PDFFile;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.RegexPatternUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -66,13 +68,15 @@ public class UnlockPDFFormsController {
COSBase xfaBase = acroForm.getCOSObject().getDictionaryObject(COSName.XFA);
if (xfaBase != null) {
try {
var accessReadOnlyPattern =
RegexPatternUtils.getInstance().getAccessReadOnlyPattern();
if (xfaBase instanceof COSStream xfaStream) {
InputStream is = xfaStream.createInputStream();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
is.transferTo(baos);
String xml = baos.toString(StandardCharsets.UTF_8);
xml = xml.replaceAll("access\\s*=\\s*\"readOnly\"", "access=\"open\"");
xml = accessReadOnlyPattern.matcher(xml).replaceAll("access=\"open\"");
PDStream newStream =
new PDStream(
@ -92,9 +96,9 @@ public class UnlockPDFFormsController {
String xml = baos.toString(StandardCharsets.UTF_8);
xml =
xml.replaceAll(
"access\\s*=\\s*\"readOnly\"",
"access=\"open\"");
accessReadOnlyPattern
.matcher(xml)
.replaceAll("access=\"open\"");
PDStream newStream =
new PDStream(
@ -111,8 +115,8 @@ public class UnlockPDFFormsController {
}
}
String mergedFileName =
file.getFileInput().getOriginalFilename().replaceFirst("[.][^.]+$", "")
+ "_unlocked_forms.pdf";
GeneralUtils.generateFilename(
file.getFileInput().getOriginalFilename(), "_unlocked_forms.pdf");
return WebResponseUtils.pdfDocToWebResponse(
document, Filenames.toSimpleFileName(mergedFileName));
} catch (Exception e) {

View File

@ -31,6 +31,7 @@ import stirling.software.SPDF.model.PipelineOperation;
import stirling.software.SPDF.model.PipelineResult;
import stirling.software.SPDF.model.api.HandleDataRequest;
import stirling.software.common.service.PostHogService;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -98,9 +99,8 @@ public class PipelineController {
// Check if the filename already exists, and modify it if necessary
if (filenameCount.containsKey(originalFilename)) {
int count = filenameCount.get(originalFilename);
String baseName = originalFilename.replaceAll("\\.[^.]*$", "");
String extension = originalFilename.replaceAll("^.*\\.", "");
filename = baseName + "(" + count + ")." + extension;
assert originalFilename != null;
filename = GeneralUtils.generateFilename(originalFilename, "(" + count + ")");
filenameCount.put(originalFilename, count + 1);
} else {
filenameCount.put(originalFilename, 1);

View File

@ -57,14 +57,9 @@ import org.springframework.core.io.ClassPathResource;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.WebDataBinder;
import org.springframework.web.bind.annotation.InitBinder;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.micrometer.common.util.StringUtils;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -75,6 +70,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.security.SignPDFWithCertRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -214,10 +210,10 @@ public class CertSignController {
location,
reason,
showLogo);
return WebResponseUtils.baosToWebResponse(
baos,
Filenames.toSimpleFileName(pdf.getOriginalFilename()).replaceFirst("[.][^.]+$", "")
+ "_signed.pdf");
// Return the signed PDF
return WebResponseUtils.bytesToWebResponse(
baos.toByteArray(),
GeneralUtils.generateFilename(pdf.getOriginalFilename(), "_signed.pdf"));
}
private PrivateKey getPrivateKeyFromPEM(byte[] pemBytes, String password)

View File

@ -66,6 +66,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.api.PDFFile;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.RegexPatternUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -224,9 +225,13 @@ public class GetInfoOnPDF {
// Number of words, paragraphs, and images in the entire document
String fullText = new PDFTextStripper().getText(pdfBoxDoc);
String[] words = fullText.split("\\s+");
String[] words = RegexPatternUtils.getInstance().getWhitespacePattern().split(fullText);
int wordCount = words.length;
int paragraphCount = fullText.split("\r\n|\r|\n").length;
int paragraphCount =
RegexPatternUtils.getInstance()
.getMultiFormatNewlinePattern()
.split(fullText)
.length;
basicInfo.put("WordCount", wordCount);
basicInfo.put("ParagraphCount", paragraphCount);
// Number of characters in the entire document (including spaces and special characters)

View File

@ -13,7 +13,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -23,6 +22,7 @@ import stirling.software.SPDF.model.api.security.AddPasswordRequest;
import stirling.software.SPDF.model.api.security.PDFPasswordRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -49,9 +49,8 @@ public class PasswordController {
document.setAllSecurityToBeRemoved(true);
return WebResponseUtils.pdfDocToWebResponse(
document,
Filenames.toSimpleFileName(fileInput.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_password_removed.pdf");
GeneralUtils.generateFilename(
fileInput.getOriginalFilename(), "_password_removed.pdf"));
} catch (IOException e) {
document.close();
ExceptionUtils.logException("password removal", e);
@ -104,13 +103,10 @@ public class PasswordController {
if ("".equals(ownerPassword) && "".equals(password))
return WebResponseUtils.pdfDocToWebResponse(
document,
Filenames.toSimpleFileName(fileInput.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_permissions.pdf");
GeneralUtils.generateFilename(
fileInput.getOriginalFilename(), "_permissions.pdf"));
return WebResponseUtils.pdfDocToWebResponse(
document,
Filenames.toSimpleFileName(fileInput.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_passworded.pdf");
GeneralUtils.generateFilename(fileInput.getOriginalFilename(), "_passworded.pdf"));
}
}

View File

@ -89,7 +89,7 @@ public class RedactController {
private final CustomPDFDocumentFactory pdfDocumentFactory;
private String removeFileExtension(String filename) {
return filename.replaceFirst("[.][^.]+$", "");
return GeneralUtils.removeExtension(filename);
}
@InitBinder

View File

@ -15,7 +15,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -23,6 +22,7 @@ import lombok.RequiredArgsConstructor;
import stirling.software.common.model.api.PDFFile;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -65,7 +65,6 @@ public class RemoveCertSignController {
// Return the modified PDF as a response
return WebResponseUtils.pdfDocToWebResponse(
document,
Filenames.toSimpleFileName(pdf.getOriginalFilename()).replaceFirst("[.][^.]+$", "")
+ "_unsigned.pdf");
GeneralUtils.generateFilename(pdf.getOriginalFilename(), "_unsigned.pdf"));
}
}

View File

@ -1,5 +1,6 @@
package stirling.software.SPDF.controller.api.security;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import org.apache.pdfbox.cos.COSDictionary;
@ -29,7 +30,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -37,6 +37,7 @@ import lombok.RequiredArgsConstructor;
import stirling.software.SPDF.model.api.security.SanitizePdfRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -88,11 +89,14 @@ public class SanitizeController {
sanitizeFonts(document);
}
return WebResponseUtils.pdfDocToWebResponse(
document,
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_sanitized.pdf");
// Save the sanitized document to output stream
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
document.save(outputStream);
document.close();
return WebResponseUtils.bytesToWebResponse(
outputStream.toByteArray(),
GeneralUtils.generateFilename(inputFile.getOriginalFilename(), "_sanitized.pdf"));
}
private void sanitizeJavaScript(PDDocument document) throws IOException {

View File

@ -34,7 +34,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
@ -42,7 +41,9 @@ import lombok.RequiredArgsConstructor;
import stirling.software.SPDF.model.api.security.AddWatermarkRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.PdfUtils;
import stirling.software.common.util.RegexPatternUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -149,11 +150,10 @@ public class WatermarkController {
document = convertedPdf;
}
// Return the watermarked PDF as a response
return WebResponseUtils.pdfDocToWebResponse(
document,
Filenames.toSimpleFileName(pdfFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_watermarked.pdf");
GeneralUtils.generateFilename(pdfFile.getOriginalFilename(), "_watermarked.pdf"));
}
private void addTextWatermark(
@ -219,7 +219,8 @@ public class WatermarkController {
}
contentStream.setNonStrokingColor(redactColor);
String[] textLines = watermarkText.split("\\\\n");
String[] textLines =
RegexPatternUtils.getInstance().getEscapedNewlinePattern().split(watermarkText);
float maxLineWidth = 0;
for (int i = 0; i < textLines.length; ++i) {

View File

@ -1,7 +1,6 @@
package stirling.software.SPDF.controller.web;
import java.util.Locale;
import java.util.regex.Pattern;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
@ -17,31 +16,44 @@ public class UploadLimitService {
@Autowired private ApplicationProperties applicationProperties;
public long getUploadLimit() {
String maxUploadSize =
String raw =
applicationProperties.getSystem().getFileUploadLimit() != null
? applicationProperties.getSystem().getFileUploadLimit()
: "";
if (maxUploadSize.isEmpty()) {
return 0;
} else if (!Pattern.compile("^[1-9][0-9]{0,2}[KMGkmg][Bb]$")
.matcher(maxUploadSize)
.matches()) {
log.error(
"Invalid maxUploadSize format. Expected format: [1-9][0-9]{0,2}[KMGkmg][Bb], but got: {}",
maxUploadSize);
return 0;
} else {
String unit = maxUploadSize.replaceAll("[1-9][0-9]{0,2}", "").toUpperCase();
String number = maxUploadSize.replaceAll("[KMGkmg][Bb]", "");
long size = Long.parseLong(number);
return switch (unit) {
case "KB" -> size * 1024;
case "MB" -> size * 1024 * 1024;
case "GB" -> size * 1024 * 1024 * 1024;
default -> 0;
};
if (raw == null || raw.isEmpty()) {
return 0L;
}
String s = raw.trim();
// Normalize case for unit parsing
String upper = s.toUpperCase(Locale.ROOT);
// Expect strictly: 0-999 followed by KB/MB/GB
// Find last two chars as unit if length >= 3
if (upper.length() < 3) return 0L;
String unit = upper.substring(upper.length() - 2);
if (!unit.equals("KB") && !unit.equals("MB") && !unit.equals("GB")) {
return 0L;
}
String numPart = upper.substring(0, upper.length() - 2);
// Disallow signs, decimals, spaces; only 1-3 digits (allow 0)
if (numPart.length() > 3) {
return 0L;
}
for (int i = 0; i < numPart.length(); i++) {
char c = numPart.charAt(i);
if (c < '0' || c > '9') return 0L;
}
long value;
try {
value = Long.parseLong(numPart);
} catch (NumberFormatException e) {
return 0L;
}
return switch (unit) {
case "KB" -> value * 1024L;
case "MB" -> value * 1024L * 1024L;
case "GB" -> value * 1024L * 1024L * 1024L;
default -> 0L;
};
}
// TODO: why do this server side not client?

View File

@ -14,6 +14,7 @@ import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.PDFText;
import stirling.software.common.util.RegexPatternUtils;
@Slf4j
public class TextFinder extends PDFTextStripper {
@ -84,7 +85,8 @@ public class TextFinder extends PDFTextStripper {
}
}
Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
// Use cached pattern compilation for better performance
Pattern pattern = RegexPatternUtils.getInstance().createSearchPattern(regex, true);
Matcher matcher = pattern.matcher(text);
log.debug(

View File

@ -5,7 +5,6 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpEntity;
@ -26,6 +25,7 @@ import stirling.software.SPDF.SPDFApplication;
import stirling.software.SPDF.model.ApiEndpoint;
import stirling.software.common.model.enumeration.Role;
import stirling.software.common.service.UserServiceInterface;
import stirling.software.common.util.RegexPatternUtils;
@Service
@Slf4j
@ -82,13 +82,11 @@ public class ApiDocService {
}
ApiEndpoint endpoint = apiDocumentation.get(operationName);
String description = endpoint.getDescription();
Pattern pattern = null;
if (output) {
pattern = Pattern.compile("Output:(\\w+)");
} else {
pattern = Pattern.compile("Input:(\\w+)");
}
Matcher matcher = pattern.matcher(description);
Matcher matcher =
(output
? RegexPatternUtils.getInstance().getApiDocOutputTypePattern()
: RegexPatternUtils.getInstance().getApiDocInputTypePattern())
.matcher(description);
while (matcher.find()) {
String type = matcher.group(1).toUpperCase();
if (outputToFileTypes.containsKey(type)) {
@ -157,8 +155,8 @@ public class ApiDocService {
}
ApiEndpoint endpoint = apiDocumentation.get(operationName);
String description = endpoint.getDescription();
Pattern pattern = Pattern.compile("Type:(\\w+)");
Matcher matcher = pattern.matcher(description);
Matcher matcher =
RegexPatternUtils.getInstance().getApiDocTypePattern().matcher(description);
if (matcher.find()) {
String type = matcher.group(1);
return type.startsWith("MI");

View File

@ -9,6 +9,8 @@ import org.apache.pdfbox.pdmodel.font.encoding.Encoding;
import lombok.extern.slf4j.Slf4j;
import stirling.software.common.util.RegexPatternUtils;
@Slf4j
public class TextEncodingHelper {
@ -322,7 +324,7 @@ public class TextEncodingHelper {
if (fontName == null) {
return false;
}
return fontName.matches("^[A-Z]{6}\\+.*");
return RegexPatternUtils.getInstance().getFontNamePattern().matcher(fontName).matches();
}
public static boolean canCalculateBasicWidths(PDFont font) {

View File

@ -10,6 +10,8 @@ import org.apache.pdfbox.pdmodel.PDResources;
import lombok.extern.slf4j.Slf4j;
import stirling.software.common.util.RegexPatternUtils;
@Slf4j
public class TextFinderUtils {
@ -69,9 +71,9 @@ public class TextFinderUtils {
patternString = applyWordBoundaries(term.trim(), patternString);
}
// Use PatternFactory for better performance with cached compilation
Pattern pattern =
Pattern.compile(
patternString, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
RegexPatternUtils.getInstance().createSearchPattern(patternString, true);
patterns.add(pattern);
log.debug("Created search pattern: '{}' -> '{}'", term.trim(), patternString);

View File

@ -26,6 +26,7 @@ import stirling.software.common.model.job.ResultFile;
import stirling.software.common.service.FileStorage;
import stirling.software.common.service.JobQueue;
import stirling.software.common.service.TaskManager;
import stirling.software.common.util.RegexPatternUtils;
/** REST controller for job-related endpoints */
@RestController
@ -319,8 +320,10 @@ public class JobController {
private String createContentDispositionHeader(String fileName) {
try {
String encodedFileName =
URLEncoder.encode(fileName, StandardCharsets.UTF_8)
.replace("+", "%20"); // URLEncoder uses + for spaces, but we want %20
RegexPatternUtils.getInstance()
.getPlusSignPattern()
.matcher(URLEncoder.encode(fileName, StandardCharsets.UTF_8))
.replaceAll("%20"); // URLEncoder uses + for spaces, but we want %20
return "attachment; filename=\"" + fileName + "\"; filename*=UTF-8''" + encodedFileName;
} catch (Exception e) {
// Fallback to basic filename if encoding fails

View File

@ -70,7 +70,8 @@ class MultiPageLayoutControllerTest {
Assertions.assertNotNull(resp.getBody());
Assertions.assertTrue(resp.getBody().length > 0);
Assertions.assertEquals(
"test_layoutChanged.pdf", resp.getHeaders().getContentDisposition().getFilename());
"test_multi_page_layout.pdf",
resp.getHeaders().getContentDisposition().getFilename());
}
@Test
@ -111,6 +112,7 @@ class MultiPageLayoutControllerTest {
ResponseEntity<byte[]> resp = controller.mergeMultiplePagesIntoOne(req);
Assertions.assertEquals(
"name_layoutChanged.pdf", resp.getHeaders().getContentDisposition().getFilename());
"name_multi_page_layout.pdf",
resp.getHeaders().getContentDisposition().getFilename());
}
}

View File

@ -25,6 +25,7 @@ import jakarta.servlet.http.HttpServletResponse;
import lombok.extern.slf4j.Slf4j;
import stirling.software.common.util.RegexPatternUtils;
import stirling.software.common.util.RequestUriUtils;
import stirling.software.proprietary.config.AuditConfigurationProperties;
@ -323,7 +324,10 @@ public class AuditUtils {
return AuditEventType.SETTINGS_CHANGED;
} else if (cls.contains("file")
|| path.startsWith("/file")
|| path.matches("(?i).*/(upload|download)/.*")) {
|| RegexPatternUtils.getInstance()
.getUploadDownloadPathPattern()
.matcher(path)
.matches()) {
return AuditEventType.FILE_OPERATION;
}
}
@ -387,7 +391,10 @@ public class AuditUtils {
return AuditEventType.SETTINGS_CHANGED;
} else if (cls.contains("file")
|| path.startsWith("/file")
|| path.matches("(?i).*/(upload|download)/.*")) {
|| RegexPatternUtils.getInstance()
.getUploadDownloadPathPattern()
.matcher(path)
.matches()) {
return AuditEventType.FILE_OPERATION;
} else {
return AuditEventType.PDF_PROCESS;

View File

@ -27,6 +27,7 @@ import stirling.software.common.model.ApplicationProperties;
import stirling.software.common.model.ApplicationProperties.Security.OAUTH2;
import stirling.software.common.model.ApplicationProperties.Security.SAML2;
import stirling.software.common.model.oauth2.KeycloakProvider;
import stirling.software.common.util.RegexPatternUtils;
import stirling.software.common.util.UrlUtils;
import stirling.software.proprietary.audit.AuditEventType;
import stirling.software.proprietary.audit.AuditLevel;
@ -250,6 +251,9 @@ public class CustomLogoutSuccessHandler extends SimpleUrlLogoutSuccessHandler {
* @return a sanitised <code>String</code>
*/
private String sanitizeInput(String input) {
return input.replaceAll("[^a-zA-Z0-9 ]", "");
return RegexPatternUtils.getInstance()
.getInputSanitizePattern()
.matcher(input)
.replaceAll("");
}
}

View File

@ -21,6 +21,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.ApplicationProperties;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.RegexPatternUtils;
@Service
@Slf4j
@ -117,7 +118,11 @@ public class KeygenLicenseVerifier {
// Remove the footer
encodedPayload = encodedPayload.replace(CERT_SUFFIX, "");
// Remove all newlines
encodedPayload = encodedPayload.replaceAll("\\r?\\n", "");
encodedPayload =
RegexPatternUtils.getInstance()
.getEncodedPayloadNewlinePattern()
.matcher(encodedPayload)
.replaceAll("");
byte[] payloadBytes = Base64.getDecoder().decode(encodedPayload);
String payload = new String(payloadBytes);

View File

@ -36,6 +36,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.ApplicationProperties;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.RegexPatternUtils;
import stirling.software.proprietary.security.model.api.admin.SettingValueResponse;
import stirling.software.proprietary.security.model.api.admin.UpdateSettingValueRequest;
import stirling.software.proprietary.security.model.api.admin.UpdateSettingsRequest;
@ -444,7 +445,8 @@ public class AdminSettingsController {
"legal");
// Pattern to validate safe property paths - only alphanumeric, dots, and underscores
private static final Pattern SAFE_KEY_PATTERN = Pattern.compile("^[a-zA-Z0-9._]+$");
private static final Pattern SAFE_KEY_PATTERN =
RegexPatternUtils.getInstance().getPattern("^[a-zA-Z0-9._]+$");
private static final int MAX_NESTING_DEPTH = 10;
// Security: Generic error messages to prevent information disclosure

View File

@ -25,6 +25,7 @@ import jakarta.servlet.http.HttpServletRequest;
import jakarta.servlet.http.HttpServletResponse;
import stirling.software.common.model.enumeration.Role;
import stirling.software.common.util.RegexPatternUtils;
@Component
public class UserBasedRateLimitingFilter extends OncePerRequestFilter {
@ -143,6 +144,6 @@ public class UserBasedRateLimitingFilter extends OncePerRequestFilter {
}
private static String stripNewlines(final String s) {
return s.replaceAll("[\n\r]", "");
return RegexPatternUtils.getInstance().getNewlineCharsPattern().matcher(s).replaceAll("");
}
}

View File

@ -75,8 +75,11 @@ public class CustomUserDetailsService implements UserDetailsService {
*/
private AuthenticationType determinePreferredSSOType() {
// Check what SSO types are enabled and prefer in order: OAUTH2 > SAML2 > fallback to OAUTH2
boolean oauth2Enabled = securityProperties.getOauth2() != null && securityProperties.getOauth2().getEnabled();
boolean saml2Enabled = securityProperties.getSaml2() != null && securityProperties.getSaml2().getEnabled();
boolean oauth2Enabled =
securityProperties.getOauth2() != null
&& securityProperties.getOauth2().getEnabled();
boolean saml2Enabled =
securityProperties.getSaml2() != null && securityProperties.getSaml2().getEnabled();
if (oauth2Enabled) {
return AuthenticationType.OAUTH2;

View File

@ -31,6 +31,7 @@ import stirling.software.common.model.ApplicationProperties;
import stirling.software.common.model.enumeration.Role;
import stirling.software.common.model.exception.UnsupportedProviderException;
import stirling.software.common.service.UserServiceInterface;
import stirling.software.common.util.RegexPatternUtils;
import stirling.software.proprietary.model.Team;
import stirling.software.proprietary.security.database.repository.AuthorityRepository;
import stirling.software.proprietary.security.database.repository.UserRepository;
@ -480,13 +481,18 @@ public class UserService implements UserServiceInterface {
// Checks whether the simple username is formatted correctly
// Regular expression for user name: Min. 3 characters, max. 50 characters
boolean isValidSimpleUsername =
username.matches("^[a-zA-Z0-9](?!.*[-@._+]{2,})[a-zA-Z0-9@._+-]{1,48}[a-zA-Z0-9]$");
RegexPatternUtils.getInstance()
.getUsernameValidationPattern()
.matcher(username)
.matches();
// Checks whether the email address is formatted correctly
// Regular expression for email addresses: Max. 320 characters, with RFC-like validation
boolean isValidEmail =
username.matches(
"^(?=.{1,320}$)(?=.{1,64}@)[A-Za-z0-9](?:[A-Za-z0-9_.+-]*[A-Za-z0-9])?@[^-][A-Za-z0-9-]+(?:\\\\.[A-Za-z0-9-]+)*(?:\\\\.[A-Za-z]{2,})$");
RegexPatternUtils.getInstance()
.getEmailValidationPattern()
.matcher(username)
.matches();
List<String> notAllowedUserList = new ArrayList<>();
notAllowedUserList.add("ALL_USERS".toLowerCase());

View File

@ -7,13 +7,16 @@ import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import stirling.software.common.util.RegexPatternUtils;
/** Redacts any map values whose keys match common secret/token patterns. */
@Slf4j
public final class SecretMasker {
private static final Pattern SENSITIVE =
Pattern.compile(
"(?i)(password|token|secret|api[_-]?key|authorization|auth|jwt|cred|cert)");
RegexPatternUtils.getInstance()
.getPattern(
"(?i)(password|token|secret|api[_-]?key|authorization|auth|jwt|cred|cert)");
private SecretMasker() {}