mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-12 17:52:13 +02:00
Default langs, and working on reducing metrics
This commit is contained in:
parent
5e6e0b773e
commit
b4da18659d
@ -66,6 +66,10 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
|
||||
poppler-utils \
|
||||
# OCR MY PDF (unpaper for descew and other advanced features)
|
||||
tesseract-ocr-data-eng \
|
||||
tesseract-ocr-data-chi_sim \
|
||||
tesseract-ocr-data-deu \
|
||||
tesseract-ocr-data-fra \
|
||||
tesseract-ocr-data-por \
|
||||
# CV
|
||||
py3-opencv \
|
||||
python3 \
|
||||
|
@ -75,7 +75,10 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
|
||||
# OCR MY PDF (unpaper for descew and other advanced featues)
|
||||
qpdf \
|
||||
tesseract-ocr-data-eng \
|
||||
|
||||
tesseract-ocr-data-chi_sim \
|
||||
tesseract-ocr-data-deu \
|
||||
tesseract-ocr-data-fra \
|
||||
tesseract-ocr-data-por \
|
||||
font-terminus font-dejavu font-noto font-noto-cjk font-awesome font-noto-extra font-liberation font-linux-libertine \
|
||||
# CV
|
||||
py3-opencv \
|
||||
|
@ -176,21 +176,17 @@ public class EndpointConfiguration {
|
||||
addEndpointToGroup("OpenCV", "extract-image-scans");
|
||||
|
||||
// LibreOffice
|
||||
addEndpointToGroup("qpdf", "repair");
|
||||
addEndpointToGroup("LibreOffice", "file-to-pdf");
|
||||
addEndpointToGroup("LibreOffice", "pdf-to-word");
|
||||
addEndpointToGroup("LibreOffice", "pdf-to-presentation");
|
||||
addEndpointToGroup("LibreOffice", "pdf-to-rtf");
|
||||
addEndpointToGroup("LibreOffice", "pdf-to-html");
|
||||
addEndpointToGroup("LibreOffice", "pdf-to-xml");
|
||||
addEndpointToGroup("LibreOffice", "pdf-to-pdfa");
|
||||
|
||||
// Unoconvert
|
||||
addEndpointToGroup("Unoconvert", "file-to-pdf");
|
||||
|
||||
// qpdf
|
||||
addEndpointToGroup("qpdf", "compress-pdf");
|
||||
addEndpointToGroup("qpdf", "pdf-to-pdfa");
|
||||
|
||||
addEndpointToGroup("tesseract", "ocr-pdf");
|
||||
|
||||
// Java
|
||||
@ -240,8 +236,6 @@ public class EndpointConfiguration {
|
||||
addEndpointToGroup("Javascript", "adjust-contrast");
|
||||
|
||||
// qpdf dependent endpoints
|
||||
addEndpointToGroup("qpdf", "compress-pdf");
|
||||
addEndpointToGroup("qpdf", "pdf-to-pdfa");
|
||||
addEndpointToGroup("qpdf", "repair");
|
||||
|
||||
// Weasyprint dependent endpoints
|
||||
|
@ -0,0 +1,291 @@
|
||||
package stirling.software.SPDF.config;
|
||||
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.context.ApplicationListener;
|
||||
import org.springframework.context.event.ContextRefreshedEvent;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.web.bind.annotation.RequestMethod;
|
||||
import org.springframework.web.method.HandlerMethod;
|
||||
import org.springframework.web.servlet.mvc.method.RequestMappingInfo;
|
||||
import org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerMapping;
|
||||
|
||||
@Component
|
||||
public class EndpointInspector implements ApplicationListener<ContextRefreshedEvent> {
|
||||
private static final Logger logger = LoggerFactory.getLogger(EndpointInspector.class);
|
||||
|
||||
private final ApplicationContext applicationContext;
|
||||
private final Set<String> validGetEndpoints = new HashSet<>();
|
||||
private boolean endpointsDiscovered = false;
|
||||
|
||||
@Autowired
|
||||
public EndpointInspector(ApplicationContext applicationContext) {
|
||||
this.applicationContext = applicationContext;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onApplicationEvent(ContextRefreshedEvent event) {
|
||||
if (!endpointsDiscovered) {
|
||||
discoverEndpoints();
|
||||
endpointsDiscovered = true;
|
||||
}
|
||||
}
|
||||
|
||||
private void discoverEndpoints() {
|
||||
try {
|
||||
// Get all request mapping beans from the application context
|
||||
Map<String, RequestMappingHandlerMapping> mappings =
|
||||
applicationContext.getBeansOfType(RequestMappingHandlerMapping.class);
|
||||
|
||||
// Process each mapping bean
|
||||
for (Map.Entry<String, RequestMappingHandlerMapping> entry : mappings.entrySet()) {
|
||||
String beanName = entry.getKey();
|
||||
RequestMappingHandlerMapping mapping = entry.getValue();
|
||||
|
||||
// Get all handler methods registered in this mapping
|
||||
Map<RequestMappingInfo, HandlerMethod> handlerMethods = mapping.getHandlerMethods();
|
||||
int methodsWithEmptyMethodsCondition = 0;
|
||||
int methodsWithGetMethod = 0;
|
||||
int methodsWithGetOrEmpty = 0;
|
||||
|
||||
// Process each handler method
|
||||
for (Map.Entry<RequestMappingInfo, HandlerMethod> handlerEntry :
|
||||
handlerMethods.entrySet()) {
|
||||
RequestMappingInfo mappingInfo = handlerEntry.getKey();
|
||||
HandlerMethod handlerMethod = handlerEntry.getValue();
|
||||
|
||||
// Debug info
|
||||
logger.debug(
|
||||
"Examining handler: {} -> {}",
|
||||
mappingInfo,
|
||||
handlerMethod.getMethod().getName());
|
||||
|
||||
boolean hasEmptyMethodsCondition = false;
|
||||
boolean hasGetMethod = false;
|
||||
|
||||
// Get methods through reflection if standard approach fails
|
||||
Set<RequestMethod> methods = Collections.emptySet();
|
||||
|
||||
try {
|
||||
methods = mappingInfo.getMethodsCondition().getMethods();
|
||||
|
||||
// Standard approach
|
||||
hasEmptyMethodsCondition = methods.isEmpty();
|
||||
hasGetMethod = methods.contains(RequestMethod.GET);
|
||||
|
||||
logger.debug(
|
||||
"Standard method detection: methods={}, isEmpty={}, hasGET={}",
|
||||
methods,
|
||||
hasEmptyMethodsCondition,
|
||||
hasGetMethod);
|
||||
} catch (Exception e) {
|
||||
logger.warn(
|
||||
"Error accessing methods through standard API: {}", e.getMessage());
|
||||
}
|
||||
|
||||
if (hasEmptyMethodsCondition) {
|
||||
methodsWithEmptyMethodsCondition++;
|
||||
}
|
||||
|
||||
if (hasGetMethod) {
|
||||
methodsWithGetMethod++;
|
||||
}
|
||||
|
||||
// Count any method that could potentially handle GET requests
|
||||
if (hasEmptyMethodsCondition || hasGetMethod) {
|
||||
methodsWithGetOrEmpty++;
|
||||
|
||||
// Try to get patterns using reflection if direct approach fails
|
||||
Set<String> patterns = extractPatternsUsingReflection(mappingInfo);
|
||||
|
||||
if (patterns.isEmpty()) {
|
||||
// Fall back to toString parsing
|
||||
String infoString = mappingInfo.toString();
|
||||
// Extract patterns from toString if possible
|
||||
if (infoString.contains("{")) {
|
||||
String patternsSection =
|
||||
infoString.substring(
|
||||
infoString.indexOf("{") + 1,
|
||||
infoString.indexOf("}"));
|
||||
|
||||
for (String pattern : patternsSection.split(",")) {
|
||||
pattern = pattern.trim();
|
||||
if (!pattern.isEmpty()) {
|
||||
patterns.add(pattern);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add all patterns
|
||||
validGetEndpoints.addAll(patterns);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (validGetEndpoints.isEmpty()) {
|
||||
// If we still couldn't find any endpoints, add some common ones as a fallback
|
||||
logger.warn("No endpoints discovered. Adding common endpoints as fallback.");
|
||||
validGetEndpoints.add("/");
|
||||
validGetEndpoints.add("/api/**");
|
||||
validGetEndpoints.add("/**");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("Error discovering endpoints", e);
|
||||
}
|
||||
}
|
||||
|
||||
private Set<String> extractPatternsUsingReflection(RequestMappingInfo mappingInfo) {
|
||||
Set<String> patterns = new HashSet<>();
|
||||
|
||||
try {
|
||||
// First try standard API
|
||||
if (mappingInfo.getPatternsCondition() != null) {
|
||||
patterns.addAll(mappingInfo.getPatternsCondition().getPatterns());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.debug("Standard pattern access failed: {}", e.getMessage());
|
||||
}
|
||||
|
||||
// If standard approach failed, try reflection
|
||||
if (patterns.isEmpty()) {
|
||||
try {
|
||||
// Try to access patterns through reflection on different Spring versions
|
||||
Method[] methods = mappingInfo.getClass().getMethods();
|
||||
|
||||
// Look for methods that might return patterns
|
||||
for (Method method : methods) {
|
||||
String methodName = method.getName();
|
||||
if ((methodName.contains("pattern") || methodName.contains("Path"))
|
||||
&& method.getParameterCount() == 0) {
|
||||
|
||||
logger.debug("Trying reflection method: {}", methodName);
|
||||
try {
|
||||
Object result = method.invoke(mappingInfo);
|
||||
if (result instanceof Set) {
|
||||
@SuppressWarnings("unchecked")
|
||||
Set<String> resultSet = (Set<String>) result;
|
||||
patterns.addAll(resultSet);
|
||||
logger.debug(
|
||||
"Found {} patterns using method {}",
|
||||
resultSet.size(),
|
||||
methodName);
|
||||
} else if (result != null) {
|
||||
logger.debug(
|
||||
"Method {} returned non-Set result: {}",
|
||||
methodName,
|
||||
result);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.debug(
|
||||
"Method {} invocation failed: {}", methodName, e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.warn("Reflection-based pattern extraction failed: {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
return patterns;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a URI corresponds to a valid GET endpoint - Fixed to handle path variables safely
|
||||
*/
|
||||
public boolean isValidGetEndpoint(String uri) {
|
||||
// Ensure endpoints are discovered
|
||||
if (!endpointsDiscovered) {
|
||||
discoverEndpoints();
|
||||
endpointsDiscovered = true;
|
||||
}
|
||||
|
||||
// If no endpoints were discovered, assume all endpoints are valid
|
||||
if (validGetEndpoints.isEmpty()) {
|
||||
logger.warn(
|
||||
"No valid endpoints were discovered. Assuming all GET endpoints are valid.");
|
||||
return true;
|
||||
}
|
||||
|
||||
// Direct match
|
||||
if (validGetEndpoints.contains(uri)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Try simple prefix matching first (safer than regex)
|
||||
for (String pattern : validGetEndpoints) {
|
||||
// Handle wildcards and path variables with simple prefix matching
|
||||
if (pattern.contains("*") || pattern.contains("{")) {
|
||||
int wildcardIndex = pattern.indexOf('*');
|
||||
int variableIndex = pattern.indexOf('{');
|
||||
|
||||
// Find the earliest special character
|
||||
int cutoffIndex;
|
||||
if (wildcardIndex < 0) {
|
||||
cutoffIndex = variableIndex;
|
||||
} else if (variableIndex < 0) {
|
||||
cutoffIndex = wildcardIndex;
|
||||
} else {
|
||||
cutoffIndex = Math.min(wildcardIndex, variableIndex);
|
||||
}
|
||||
|
||||
// Get the static part of the pattern
|
||||
String staticPrefix = pattern.substring(0, cutoffIndex);
|
||||
|
||||
// If the URI starts with this prefix, consider it a match
|
||||
if (uri.startsWith(staticPrefix)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For patterns without wildcards or variables, try path-segment-by-segment matching
|
||||
for (String pattern : validGetEndpoints) {
|
||||
if (!pattern.contains("*") && !pattern.contains("{")) {
|
||||
// Split the pattern and URI into path segments
|
||||
String[] patternSegments = pattern.split("/");
|
||||
String[] uriSegments = uri.split("/");
|
||||
|
||||
// If URI has fewer segments than the pattern, it can't match
|
||||
if (uriSegments.length < patternSegments.length) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check each segment
|
||||
boolean match = true;
|
||||
for (int i = 0; i < patternSegments.length; i++) {
|
||||
if (!patternSegments[i].equals(uriSegments[i])) {
|
||||
match = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (match) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no match was found, the URI is not valid
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Get all discovered valid GET endpoints */
|
||||
public Set<String> getValidGetEndpoints() {
|
||||
// Ensure endpoints are discovered
|
||||
if (!endpointsDiscovered) {
|
||||
discoverEndpoints();
|
||||
endpointsDiscovered = true;
|
||||
}
|
||||
return new HashSet<>(validGetEndpoints);
|
||||
}
|
||||
}
|
@ -4,6 +4,8 @@ import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
@ -11,22 +13,41 @@ import org.springframework.stereotype.Service;
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
import io.micrometer.core.instrument.search.Search;
|
||||
|
||||
import stirling.software.SPDF.config.EndpointInspector;
|
||||
|
||||
@Service
|
||||
public class MetricsAggregatorService {
|
||||
private static final Logger logger = LoggerFactory.getLogger(MetricsAggregatorService.class);
|
||||
|
||||
private final MeterRegistry meterRegistry;
|
||||
private final PostHogService postHogService;
|
||||
private final EndpointInspector endpointInspector;
|
||||
private final Map<String, Double> lastSentMetrics = new ConcurrentHashMap<>();
|
||||
|
||||
// Flag to decide behavior if no endpoints are discovered
|
||||
private boolean allowAllGetEndpointsIfNoneDiscovered = true;
|
||||
|
||||
@Autowired
|
||||
public MetricsAggregatorService(MeterRegistry meterRegistry, PostHogService postHogService) {
|
||||
public MetricsAggregatorService(
|
||||
MeterRegistry meterRegistry,
|
||||
PostHogService postHogService,
|
||||
EndpointInspector endpointInspector) {
|
||||
this.meterRegistry = meterRegistry;
|
||||
this.postHogService = postHogService;
|
||||
this.endpointInspector = endpointInspector;
|
||||
}
|
||||
|
||||
@Scheduled(fixedRate = 7200000) // Run every 2 hours
|
||||
@Scheduled(fixedRate = 72000) // Run every 2 hours
|
||||
public void aggregateAndSendMetrics() {
|
||||
Map<String, Object> metrics = new HashMap<>();
|
||||
|
||||
int endpointCount = endpointInspector.getValidGetEndpoints().size();
|
||||
|
||||
boolean validateGetEndpoints = true;
|
||||
if (endpointCount == 0 && allowAllGetEndpointsIfNoneDiscovered) {
|
||||
validateGetEndpoints = false;
|
||||
}
|
||||
final boolean validateGetEndpointsFinal = validateGetEndpoints;
|
||||
Search.in(meterRegistry)
|
||||
.name("http.requests")
|
||||
.counters()
|
||||
@ -34,35 +55,53 @@ public class MetricsAggregatorService {
|
||||
counter -> {
|
||||
String method = counter.getId().getTag("method");
|
||||
String uri = counter.getId().getTag("uri");
|
||||
|
||||
// Skip if either method or uri is null
|
||||
if (method == null || uri == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip URIs that are 2 characters or shorter
|
||||
if (uri.length() <= 2) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip non-GET and non-POST requests
|
||||
if (!"GET".equals(method) && !"POST".equals(method)) {
|
||||
return;
|
||||
}
|
||||
// Skip URIs that are 2 characters or shorter
|
||||
if (uri.length() <= 2) {
|
||||
|
||||
// For POST requests, only include if they start with /api/v1
|
||||
if ("POST".equals(method) && !uri.contains("api/v1")) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
if(uri.contains(".txt")) {
|
||||
return;
|
||||
}
|
||||
// For GET requests, validate if we have a list of valid endpoints
|
||||
if ("GET".equals(method)
|
||||
&& validateGetEndpointsFinal
|
||||
&& !endpointInspector.isValidGetEndpoint(uri)) {
|
||||
logger.debug("Skipping invalid GET endpoint: {}", uri);
|
||||
return;
|
||||
}
|
||||
|
||||
String key =
|
||||
String.format(
|
||||
"http_requests_%s_%s", method, uri.replace("/", "_"));
|
||||
|
||||
double currentCount = counter.count();
|
||||
double lastCount = lastSentMetrics.getOrDefault(key, 0.0);
|
||||
double difference = currentCount - lastCount;
|
||||
|
||||
if (difference > 0) {
|
||||
logger.info("{}, {}", key, difference);
|
||||
metrics.put(key, difference);
|
||||
lastSentMetrics.put(key, currentCount);
|
||||
}
|
||||
});
|
||||
|
||||
// Send aggregated metrics to PostHog
|
||||
if (!metrics.isEmpty()) {
|
||||
|
||||
postHogService.captureEvent("aggregated_metrics", metrics);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user