Default langs, and working on reducing metrics

This commit is contained in:
Anthony Stirling 2025-03-18 23:04:09 +00:00
parent 5e6e0b773e
commit b4da18659d
5 changed files with 347 additions and 16 deletions

View File

@ -66,6 +66,10 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
poppler-utils \ poppler-utils \
# OCR MY PDF (unpaper for descew and other advanced features) # OCR MY PDF (unpaper for descew and other advanced features)
tesseract-ocr-data-eng \ tesseract-ocr-data-eng \
tesseract-ocr-data-chi_sim \
tesseract-ocr-data-deu \
tesseract-ocr-data-fra \
tesseract-ocr-data-por \
# CV # CV
py3-opencv \ py3-opencv \
python3 \ python3 \

View File

@ -75,7 +75,10 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
# OCR MY PDF (unpaper for descew and other advanced featues) # OCR MY PDF (unpaper for descew and other advanced featues)
qpdf \ qpdf \
tesseract-ocr-data-eng \ tesseract-ocr-data-eng \
tesseract-ocr-data-chi_sim \
tesseract-ocr-data-deu \
tesseract-ocr-data-fra \
tesseract-ocr-data-por \
font-terminus font-dejavu font-noto font-noto-cjk font-awesome font-noto-extra font-liberation font-linux-libertine \ font-terminus font-dejavu font-noto font-noto-cjk font-awesome font-noto-extra font-liberation font-linux-libertine \
# CV # CV
py3-opencv \ py3-opencv \

View File

@ -176,21 +176,17 @@ public class EndpointConfiguration {
addEndpointToGroup("OpenCV", "extract-image-scans"); addEndpointToGroup("OpenCV", "extract-image-scans");
// LibreOffice // LibreOffice
addEndpointToGroup("qpdf", "repair");
addEndpointToGroup("LibreOffice", "file-to-pdf"); addEndpointToGroup("LibreOffice", "file-to-pdf");
addEndpointToGroup("LibreOffice", "pdf-to-word"); addEndpointToGroup("LibreOffice", "pdf-to-word");
addEndpointToGroup("LibreOffice", "pdf-to-presentation"); addEndpointToGroup("LibreOffice", "pdf-to-presentation");
addEndpointToGroup("LibreOffice", "pdf-to-rtf"); addEndpointToGroup("LibreOffice", "pdf-to-rtf");
addEndpointToGroup("LibreOffice", "pdf-to-html"); addEndpointToGroup("LibreOffice", "pdf-to-html");
addEndpointToGroup("LibreOffice", "pdf-to-xml"); addEndpointToGroup("LibreOffice", "pdf-to-xml");
addEndpointToGroup("LibreOffice", "pdf-to-pdfa");
// Unoconvert // Unoconvert
addEndpointToGroup("Unoconvert", "file-to-pdf"); addEndpointToGroup("Unoconvert", "file-to-pdf");
// qpdf
addEndpointToGroup("qpdf", "compress-pdf");
addEndpointToGroup("qpdf", "pdf-to-pdfa");
addEndpointToGroup("tesseract", "ocr-pdf"); addEndpointToGroup("tesseract", "ocr-pdf");
// Java // Java
@ -240,8 +236,6 @@ public class EndpointConfiguration {
addEndpointToGroup("Javascript", "adjust-contrast"); addEndpointToGroup("Javascript", "adjust-contrast");
// qpdf dependent endpoints // qpdf dependent endpoints
addEndpointToGroup("qpdf", "compress-pdf");
addEndpointToGroup("qpdf", "pdf-to-pdfa");
addEndpointToGroup("qpdf", "repair"); addEndpointToGroup("qpdf", "repair");
// Weasyprint dependent endpoints // Weasyprint dependent endpoints

View File

@ -0,0 +1,291 @@
package stirling.software.SPDF.config;
import java.lang.reflect.Method;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationListener;
import org.springframework.context.event.ContextRefreshedEvent;
import org.springframework.stereotype.Component;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.method.HandlerMethod;
import org.springframework.web.servlet.mvc.method.RequestMappingInfo;
import org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerMapping;
@Component
public class EndpointInspector implements ApplicationListener<ContextRefreshedEvent> {
private static final Logger logger = LoggerFactory.getLogger(EndpointInspector.class);
private final ApplicationContext applicationContext;
private final Set<String> validGetEndpoints = new HashSet<>();
private boolean endpointsDiscovered = false;
@Autowired
public EndpointInspector(ApplicationContext applicationContext) {
this.applicationContext = applicationContext;
}
@Override
public void onApplicationEvent(ContextRefreshedEvent event) {
if (!endpointsDiscovered) {
discoverEndpoints();
endpointsDiscovered = true;
}
}
private void discoverEndpoints() {
try {
// Get all request mapping beans from the application context
Map<String, RequestMappingHandlerMapping> mappings =
applicationContext.getBeansOfType(RequestMappingHandlerMapping.class);
// Process each mapping bean
for (Map.Entry<String, RequestMappingHandlerMapping> entry : mappings.entrySet()) {
String beanName = entry.getKey();
RequestMappingHandlerMapping mapping = entry.getValue();
// Get all handler methods registered in this mapping
Map<RequestMappingInfo, HandlerMethod> handlerMethods = mapping.getHandlerMethods();
int methodsWithEmptyMethodsCondition = 0;
int methodsWithGetMethod = 0;
int methodsWithGetOrEmpty = 0;
// Process each handler method
for (Map.Entry<RequestMappingInfo, HandlerMethod> handlerEntry :
handlerMethods.entrySet()) {
RequestMappingInfo mappingInfo = handlerEntry.getKey();
HandlerMethod handlerMethod = handlerEntry.getValue();
// Debug info
logger.debug(
"Examining handler: {} -> {}",
mappingInfo,
handlerMethod.getMethod().getName());
boolean hasEmptyMethodsCondition = false;
boolean hasGetMethod = false;
// Get methods through reflection if standard approach fails
Set<RequestMethod> methods = Collections.emptySet();
try {
methods = mappingInfo.getMethodsCondition().getMethods();
// Standard approach
hasEmptyMethodsCondition = methods.isEmpty();
hasGetMethod = methods.contains(RequestMethod.GET);
logger.debug(
"Standard method detection: methods={}, isEmpty={}, hasGET={}",
methods,
hasEmptyMethodsCondition,
hasGetMethod);
} catch (Exception e) {
logger.warn(
"Error accessing methods through standard API: {}", e.getMessage());
}
if (hasEmptyMethodsCondition) {
methodsWithEmptyMethodsCondition++;
}
if (hasGetMethod) {
methodsWithGetMethod++;
}
// Count any method that could potentially handle GET requests
if (hasEmptyMethodsCondition || hasGetMethod) {
methodsWithGetOrEmpty++;
// Try to get patterns using reflection if direct approach fails
Set<String> patterns = extractPatternsUsingReflection(mappingInfo);
if (patterns.isEmpty()) {
// Fall back to toString parsing
String infoString = mappingInfo.toString();
// Extract patterns from toString if possible
if (infoString.contains("{")) {
String patternsSection =
infoString.substring(
infoString.indexOf("{") + 1,
infoString.indexOf("}"));
for (String pattern : patternsSection.split(",")) {
pattern = pattern.trim();
if (!pattern.isEmpty()) {
patterns.add(pattern);
}
}
}
}
// Add all patterns
validGetEndpoints.addAll(patterns);
}
}
}
if (validGetEndpoints.isEmpty()) {
// If we still couldn't find any endpoints, add some common ones as a fallback
logger.warn("No endpoints discovered. Adding common endpoints as fallback.");
validGetEndpoints.add("/");
validGetEndpoints.add("/api/**");
validGetEndpoints.add("/**");
}
} catch (Exception e) {
logger.error("Error discovering endpoints", e);
}
}
private Set<String> extractPatternsUsingReflection(RequestMappingInfo mappingInfo) {
Set<String> patterns = new HashSet<>();
try {
// First try standard API
if (mappingInfo.getPatternsCondition() != null) {
patterns.addAll(mappingInfo.getPatternsCondition().getPatterns());
}
} catch (Exception e) {
logger.debug("Standard pattern access failed: {}", e.getMessage());
}
// If standard approach failed, try reflection
if (patterns.isEmpty()) {
try {
// Try to access patterns through reflection on different Spring versions
Method[] methods = mappingInfo.getClass().getMethods();
// Look for methods that might return patterns
for (Method method : methods) {
String methodName = method.getName();
if ((methodName.contains("pattern") || methodName.contains("Path"))
&& method.getParameterCount() == 0) {
logger.debug("Trying reflection method: {}", methodName);
try {
Object result = method.invoke(mappingInfo);
if (result instanceof Set) {
@SuppressWarnings("unchecked")
Set<String> resultSet = (Set<String>) result;
patterns.addAll(resultSet);
logger.debug(
"Found {} patterns using method {}",
resultSet.size(),
methodName);
} else if (result != null) {
logger.debug(
"Method {} returned non-Set result: {}",
methodName,
result);
}
} catch (Exception e) {
logger.debug(
"Method {} invocation failed: {}", methodName, e.getMessage());
}
}
}
} catch (Exception e) {
logger.warn("Reflection-based pattern extraction failed: {}", e.getMessage());
}
}
return patterns;
}
/**
* Check if a URI corresponds to a valid GET endpoint - Fixed to handle path variables safely
*/
public boolean isValidGetEndpoint(String uri) {
// Ensure endpoints are discovered
if (!endpointsDiscovered) {
discoverEndpoints();
endpointsDiscovered = true;
}
// If no endpoints were discovered, assume all endpoints are valid
if (validGetEndpoints.isEmpty()) {
logger.warn(
"No valid endpoints were discovered. Assuming all GET endpoints are valid.");
return true;
}
// Direct match
if (validGetEndpoints.contains(uri)) {
return true;
}
// Try simple prefix matching first (safer than regex)
for (String pattern : validGetEndpoints) {
// Handle wildcards and path variables with simple prefix matching
if (pattern.contains("*") || pattern.contains("{")) {
int wildcardIndex = pattern.indexOf('*');
int variableIndex = pattern.indexOf('{');
// Find the earliest special character
int cutoffIndex;
if (wildcardIndex < 0) {
cutoffIndex = variableIndex;
} else if (variableIndex < 0) {
cutoffIndex = wildcardIndex;
} else {
cutoffIndex = Math.min(wildcardIndex, variableIndex);
}
// Get the static part of the pattern
String staticPrefix = pattern.substring(0, cutoffIndex);
// If the URI starts with this prefix, consider it a match
if (uri.startsWith(staticPrefix)) {
return true;
}
}
}
// For patterns without wildcards or variables, try path-segment-by-segment matching
for (String pattern : validGetEndpoints) {
if (!pattern.contains("*") && !pattern.contains("{")) {
// Split the pattern and URI into path segments
String[] patternSegments = pattern.split("/");
String[] uriSegments = uri.split("/");
// If URI has fewer segments than the pattern, it can't match
if (uriSegments.length < patternSegments.length) {
continue;
}
// Check each segment
boolean match = true;
for (int i = 0; i < patternSegments.length; i++) {
if (!patternSegments[i].equals(uriSegments[i])) {
match = false;
break;
}
}
if (match) {
return true;
}
}
}
// If no match was found, the URI is not valid
return false;
}
/** Get all discovered valid GET endpoints */
public Set<String> getValidGetEndpoints() {
// Ensure endpoints are discovered
if (!endpointsDiscovered) {
discoverEndpoints();
endpointsDiscovered = true;
}
return new HashSet<>(validGetEndpoints);
}
}

View File

@ -4,6 +4,8 @@ import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Scheduled; import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
@ -11,22 +13,41 @@ import org.springframework.stereotype.Service;
import io.micrometer.core.instrument.MeterRegistry; import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.search.Search; import io.micrometer.core.instrument.search.Search;
import stirling.software.SPDF.config.EndpointInspector;
@Service @Service
public class MetricsAggregatorService { public class MetricsAggregatorService {
private static final Logger logger = LoggerFactory.getLogger(MetricsAggregatorService.class);
private final MeterRegistry meterRegistry; private final MeterRegistry meterRegistry;
private final PostHogService postHogService; private final PostHogService postHogService;
private final EndpointInspector endpointInspector;
private final Map<String, Double> lastSentMetrics = new ConcurrentHashMap<>(); private final Map<String, Double> lastSentMetrics = new ConcurrentHashMap<>();
// Flag to decide behavior if no endpoints are discovered
private boolean allowAllGetEndpointsIfNoneDiscovered = true;
@Autowired @Autowired
public MetricsAggregatorService(MeterRegistry meterRegistry, PostHogService postHogService) { public MetricsAggregatorService(
MeterRegistry meterRegistry,
PostHogService postHogService,
EndpointInspector endpointInspector) {
this.meterRegistry = meterRegistry; this.meterRegistry = meterRegistry;
this.postHogService = postHogService; this.postHogService = postHogService;
this.endpointInspector = endpointInspector;
} }
@Scheduled(fixedRate = 7200000) // Run every 2 hours @Scheduled(fixedRate = 72000) // Run every 2 hours
public void aggregateAndSendMetrics() { public void aggregateAndSendMetrics() {
Map<String, Object> metrics = new HashMap<>(); Map<String, Object> metrics = new HashMap<>();
int endpointCount = endpointInspector.getValidGetEndpoints().size();
boolean validateGetEndpoints = true;
if (endpointCount == 0 && allowAllGetEndpointsIfNoneDiscovered) {
validateGetEndpoints = false;
}
final boolean validateGetEndpointsFinal = validateGetEndpoints;
Search.in(meterRegistry) Search.in(meterRegistry)
.name("http.requests") .name("http.requests")
.counters() .counters()
@ -34,35 +55,53 @@ public class MetricsAggregatorService {
counter -> { counter -> {
String method = counter.getId().getTag("method"); String method = counter.getId().getTag("method");
String uri = counter.getId().getTag("uri"); String uri = counter.getId().getTag("uri");
// Skip if either method or uri is null // Skip if either method or uri is null
if (method == null || uri == null) { if (method == null || uri == null) {
return; return;
} }
// Skip URIs that are 2 characters or shorter
if (uri.length() <= 2) {
return;
}
// Skip non-GET and non-POST requests
if (!"GET".equals(method) && !"POST".equals(method)) { if (!"GET".equals(method) && !"POST".equals(method)) {
return; return;
} }
// Skip URIs that are 2 characters or shorter
if (uri.length() <= 2) { // For POST requests, only include if they start with /api/v1
if ("POST".equals(method) && !uri.contains("api/v1")) {
return;
}
if(uri.contains(".txt")) {
return;
}
// For GET requests, validate if we have a list of valid endpoints
if ("GET".equals(method)
&& validateGetEndpointsFinal
&& !endpointInspector.isValidGetEndpoint(uri)) {
logger.debug("Skipping invalid GET endpoint: {}", uri);
return; return;
} }
String key = String key =
String.format( String.format(
"http_requests_%s_%s", method, uri.replace("/", "_")); "http_requests_%s_%s", method, uri.replace("/", "_"));
double currentCount = counter.count(); double currentCount = counter.count();
double lastCount = lastSentMetrics.getOrDefault(key, 0.0); double lastCount = lastSentMetrics.getOrDefault(key, 0.0);
double difference = currentCount - lastCount; double difference = currentCount - lastCount;
if (difference > 0) { if (difference > 0) {
logger.info("{}, {}", key, difference);
metrics.put(key, difference); metrics.put(key, difference);
lastSentMetrics.put(key, currentCount); lastSentMetrics.put(key, currentCount);
} }
}); });
// Send aggregated metrics to PostHog // Send aggregated metrics to PostHog
if (!metrics.isEmpty()) { if (!metrics.isEmpty()) {
postHogService.captureEvent("aggregated_metrics", metrics); postHogService.captureEvent("aggregated_metrics", metrics);
} }
} }