From d89b96088bc3a3b7411a3d72b2dcadb2a4625712 Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com> Date: Wed, 4 Feb 2026 23:20:38 +0000 Subject: [PATCH] diagnostic script (#5646) # Description of Changes --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### Translations (if applicable) - [ ] I ran [`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --- docker/embedded/Dockerfile.ultra-lite | 1 + scripts/init-without-ocr.sh | 13 + scripts/stirling-diagnostics.sh | 450 ++++++++++++++++++++++++++ 3 files changed, 464 insertions(+) create mode 100644 scripts/stirling-diagnostics.sh diff --git a/docker/embedded/Dockerfile.ultra-lite b/docker/embedded/Dockerfile.ultra-lite index 145803d6b..50b65b6cc 100644 --- a/docker/embedded/Dockerfile.ultra-lite +++ b/docker/embedded/Dockerfile.ultra-lite @@ -58,6 +58,7 @@ LABEL org.opencontainers.image.keywords="PDF, manipulation, ultra-lite, API, Spr # Copy scripts COPY scripts/init-without-ocr.sh /scripts/init-without-ocr.sh COPY scripts/installFonts.sh /scripts/installFonts.sh +COPY scripts/stirling-diagnostics.sh /scripts/stirling-diagnostics.sh # Copy built JAR from build stage COPY --from=build /app/app/core/build/libs/*.jar /app.jar diff --git a/scripts/init-without-ocr.sh b/scripts/init-without-ocr.sh index d8ac8f2e2..1afec3b9b 100644 --- a/scripts/init-without-ocr.sh +++ b/scripts/init-without-ocr.sh @@ -5,6 +5,19 @@ set -euo pipefail log() { printf '%s\n' "$*" >&2; } command_exists() { command -v "$1" >/dev/null 2>&1; } +if [ -d /scripts ] && [[ ":${PATH}:" != *":/scripts:"* ]]; then + export PATH="/scripts:${PATH}" +fi + +if [ -x /scripts/stirling-diagnostics.sh ]; then + mkdir -p /usr/local/bin + ln -sf /scripts/stirling-diagnostics.sh /usr/local/bin/diagnostics + ln -sf /scripts/stirling-diagnostics.sh /usr/local/bin/stirling-diagnostics + ln -sf /scripts/stirling-diagnostics.sh /usr/local/bin/diag + ln -sf /scripts/stirling-diagnostics.sh /usr/local/bin/debug + ln -sf /scripts/stirling-diagnostics.sh /usr/local/bin/diagnostic +fi + run_with_timeout() { local secs=$1; shift if command_exists timeout; then diff --git a/scripts/stirling-diagnostics.sh b/scripts/stirling-diagnostics.sh new file mode 100644 index 000000000..05d78b472 --- /dev/null +++ b/scripts/stirling-diagnostics.sh @@ -0,0 +1,450 @@ +#!/bin/bash +set -euo pipefail + +SCRIPT_NAME="stirling-diagnostics" +DEFAULT_DAYS=1 +DEFAULT_OUT_DIR="/configs" +DEFAULT_OUTPUT_DIR="$DEFAULT_OUT_DIR" +DEFAULT_STAGE_DIR="/tmp/${SCRIPT_NAME}-$(date +%Y%m%d-%H%M%S)" +CONFIG_FILE="${CONFIG_FILE:-/configs/settings.yml}" + +log() { printf '%s\n' "$*" >&2; } +command_exists() { command -v "$1" >/dev/null 2>&1; } + +prompt_yes_no() { + local prompt="$1" default="$2" reply + if [ "$default" = "y" ]; then + read -r -p "${prompt} [Y/n] " reply + else + read -r -p "${prompt} [y/N] " reply + fi + reply="${reply:-$default}" + case "$reply" in + y|Y|yes|YES) return 0 ;; + *) return 1 ;; + esac +} + +prompt_value() { + local prompt="$1" default="$2" reply + read -r -p "${prompt} [${default}] " reply + printf '%s' "${reply:-$default}" +} + +is_text_file() { + local file="$1" + case "$file" in + *.log|*.txt|*.yml|*.yaml|*.json|*.properties|*.cfg|*.conf|*.ini|*.csv|*.md) return 0 ;; + esac + if command_exists file; then + file -b --mime-type "$file" 2>/dev/null | grep -q '^text/' && return 0 + fi + return 1 +} + +redact_file() { + local file="$1" + local redact_secrets="$2" + local redact_urls="$3" + local redact_emails="$4" + local redact_hosts="$5" + local tmp="${file}.redact.tmp" + local sed_args=() + if [ "$redact_secrets" = true ]; then + sed_args+=( + -e 's/(Authorization:[[:space:]]*Bearer[[:space:]]+)[^[:space:]]+/\1[REDACTED]/g' + -e 's/([A-Za-z0-9_.-]*(token|secret|password|passwd|pwd|api[_-]?key|access[_-]?key|authorization|bearer)[A-Za-z0-9_.-]*[[:space:]]*[:=][[:space:]]*)[^[:space:]]+/\1[REDACTED]/g' + ) + fi + if [ "$redact_urls" = true ]; then + sed_args+=( + -e 's#(https?://)[^/@: ]+#\1[REDACTED_HOST]#g' + ) + fi + if [ "$redact_emails" = true ]; then + sed_args+=( + -e 's/[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/[REDACTED_EMAIL]/g' + ) + fi + if [ "$redact_hosts" = true ]; then + sed_args+=( + -e 's/([Hh]ost(name)?|[Dd]omain|[Ss]erver)[[:space:]]*[:=][[:space:]]*[^[:space:]]+/\1: [REDACTED_HOST]/g' + ) + fi + if [ ${#sed_args[@]} -eq 0 ]; then + return 0 + fi + sed -E "${sed_args[@]}" "$file" > "$tmp" && mv "$tmp" "$file" +} + +copy_file() { + local src="$1" dest_root="$2" + local rel="${src#/}" + local dest="${dest_root}/${rel}" + mkdir -p "$(dirname "$dest")" + cp -p "$src" "$dest" +} + +copy_dir_filtered() { + local src="$1" dest_root="$2" days="$3" + [ -d "$src" ] || return 0 + local find_args=(-type f) + if [ "$days" -gt 0 ]; then + find_args+=(-mtime "-${days}") + fi + while IFS= read -r -d '' file; do + case "$file" in + *.tar|*.tar.gz|*.tgz|*.zip|*.rar|*.7z|*.gz|*.bz2|*.xz|*.lz4|*.zst) continue ;; + *.pdf|*.PDF) continue ;; + *.jpg|*.jpeg|*.png|*.gif|*.bmp|*.tif|*.tiff|*.webp|*.heic) continue ;; + esac + copy_file "$file" "$dest_root" + done < <(find "$src" "${find_args[@]}" -print0 2>/dev/null || true) +} + +copy_dir_filtered_no_pdfs() { + local src="$1" dest_root="$2" + [ -d "$src" ] || return 0 + while IFS= read -r -d '' file; do + case "$file" in + *.tar|*.tar.gz|*.tgz|*.zip|*.rar|*.7z|*.gz|*.bz2|*.xz|*.lz4|*.zst) continue ;; + *.pdf|*.PDF) continue ;; + *.jpg|*.jpeg|*.png|*.gif|*.bmp|*.tif|*.tiff|*.webp|*.heic) continue ;; + esac + copy_file "$file" "$dest_root" + done < <(find "$src" -type f -print0 2>/dev/null || true) +} + +write_tree() { + local src="$1" out="$2" + [ -d "$src" ] || return 0 + if command_exists tree; then + tree -a "$src" > "$out" 2>/dev/null || true + else + find "$src" -print > "$out" 2>/dev/null || true + fi +} + +write_kv() { + local key="$1" value="$2" file="$3" + printf '%s: %s\n' "$key" "$value" >> "$file" +} + +fetch_url() { + local url="$1" out="$2" + if command_exists curl; then + curl -fsS --max-time 5 "$url" -o "$out" 2>/dev/null && return 0 + elif command_exists wget; then + wget -q -T 5 -O "$out" "$url" 2>/dev/null && return 0 + fi + return 1 +} + +record_success() { + local label="$1" value="$2" + if [ "$label" = "metrics" ]; then + METRICS_SUCCESS+=("$value") + elif [ "$label" = "ui" ]; then + UI_SUCCESS+=("$value") + fi +} + +usage() { + cat < "$SUMMARY" +log "Writing summary..." +write_kv "created_at" "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" "$SUMMARY" +write_kv "version_tag" "${VERSION_TAG:-}" "$SUMMARY" +if [ -f /etc/stirling_version ]; then + write_kv "stirling_version_file" "$(tr -d '\r\n' < /etc/stirling_version)" "$SUMMARY" +fi +write_kv "config_file" "$CONFIG_FILE" "$SUMMARY" +write_kv "output_dir" "$OUTPUT_DIR" "$SUMMARY" +write_kv "include_logs" "$INCLUDE_LOGS" "$SUMMARY" +write_kv "include_configs" "$INCLUDE_CONFIGS" "$SUMMARY" +write_kv "include_customFiles" "$INCLUDE_CUSTOM" "$SUMMARY" +write_kv "include_pipeline" "$INCLUDE_PIPELINE" "$SUMMARY" +write_kv "include_temp" "$INCLUDE_TEMP" "$SUMMARY" +write_kv "include_system" "$INCLUDE_SYSTEM" "$SUMMARY" +write_kv "include_env" "$INCLUDE_ENV" "$SUMMARY" +write_kv "include_metrics" "$INCLUDE_METRICS" "$SUMMARY" +write_kv "include_ui_data" "$INCLUDE_UI_DATA" "$SUMMARY" +write_kv "redaction" "$REDACT" "$SUMMARY" +write_kv "redact_secrets" "$REDACT_SECRETS" "$SUMMARY" +write_kv "redact_urls" "$REDACT_URLS" "$SUMMARY" +write_kv "redact_emails" "$REDACT_EMAILS" "$SUMMARY" +write_kv "redact_host_fields" "$REDACT_HOST_FIELDS" "$SUMMARY" +write_kv "log_days" "$LOG_DAYS" "$SUMMARY" + +if [ "$INCLUDE_SYSTEM" = true ]; then + log "Collecting system info..." + SYS_DIR="${DATA_DIR}/system" + mkdir -p "$SYS_DIR" + uname -a > "${SYS_DIR}/uname.txt" 2>/dev/null || true + if [ -f /etc/os-release ]; then cp -p /etc/os-release "${SYS_DIR}/os-release"; fi + id > "${SYS_DIR}/id.txt" 2>/dev/null || true + whoami > "${SYS_DIR}/whoami.txt" 2>/dev/null || true + date -u > "${SYS_DIR}/date_utc.txt" 2>/dev/null || true + df -h > "${SYS_DIR}/df.txt" 2>/dev/null || true + if [ -f /proc/meminfo ]; then cp -p /proc/meminfo "${SYS_DIR}/meminfo.txt"; fi + if [ -f /proc/cpuinfo ]; then cp -p /proc/cpuinfo "${SYS_DIR}/cpuinfo.txt"; fi + if [ -f /proc/mounts ]; then cp -p /proc/mounts "${SYS_DIR}/mounts.txt"; fi + if [ -f /proc/self/mountinfo ]; then cp -p /proc/self/mountinfo "${SYS_DIR}/mountinfo.txt"; fi + if command_exists lscpu; then lscpu > "${SYS_DIR}/lscpu.txt" 2>/dev/null || true; fi + if command_exists free; then free -m > "${SYS_DIR}/free.txt" 2>/dev/null || true; fi + if command_exists ps; then ps -eo pid,ppid,user,etime,cmd --sort=pid > "${SYS_DIR}/ps.txt" 2>/dev/null || true; fi + if command_exists java; then java -version > "${SYS_DIR}/java-version.txt" 2>&1 || true; fi + if command_exists python; then python --version > "${SYS_DIR}/python-version.txt" 2>&1 || true; fi +fi + +if [ "$INCLUDE_ENV" = true ]; then + log "Collecting environment variables..." + ENV_DIR="${DATA_DIR}/env" + mkdir -p "$ENV_DIR" + env | sort > "${ENV_DIR}/environment.txt" +fi + +if [ "$INCLUDE_METRICS" = true ] && [ -n "$BASE_URL" ]; then + log "Fetching metrics from ${BASE_URL}..." + MET_DIR="${DATA_DIR}/metrics" + mkdir -p "$MET_DIR" + for endpoint in \ + /api/v1/info/status \ + /api/v1/info/uptime \ + /api/v1/info/wau \ + /api/v1/info/requests \ + /api/v1/info/requests/unique \ + /api/v1/info/requests/all \ + /api/v1/info/requests/all/unique \ + /api/v1/info/load \ + /api/v1/info/load/unique \ + /api/v1/info/load/all \ + /api/v1/info/load/all/unique; do + if fetch_url "${BASE_URL}${endpoint}" "${MET_DIR}${endpoint}.json"; then + record_success "metrics" "$endpoint" + fi + done + if fetch_url "${BASE_URL}/api/v1/info/health" "${MET_DIR}/api/v1/info/health.json"; then + record_success "metrics" "/api/v1/info/health" + fi + if fetch_url "${BASE_URL}/actuator/health" "${MET_DIR}/actuator/health.json"; then + record_success "metrics" "/actuator/health" + fi + if fetch_url "${BASE_URL}/actuator/prometheus" "${MET_DIR}/actuator/prometheus.txt"; then + record_success "metrics" "/actuator/prometheus" + fi + if [ "$INCLUDE_UI_DATA" = true ]; then + for endpoint in \ + /api/v1/ui-data/sign \ + /api/v1/ui-data/pipeline \ + /api/v1/ui-data/ocr-pdf; do + if fetch_url "${BASE_URL}${endpoint}" "${MET_DIR}${endpoint}.json"; then + record_success "ui" "$endpoint" + fi + done + fi +fi + +if [ "$INCLUDE_METRICS" = true ] && [ -z "$BASE_URL" ]; then + log "Auto-detecting metrics endpoint..." + MET_DIR="${DATA_DIR}/metrics" + mkdir -p "$MET_DIR" + for port in ${SERVER_PORT:-} ${PORT:-} 8080 80 8081 8082 8888 5000; do + [ -n "$port" ] || continue + BASE_URL="http://127.0.0.1:${port}" + if fetch_url "${BASE_URL}/api/v1/info/status" "${MET_DIR}/api/v1/info/status.json"; then + record_success "metrics" "/api/v1/info/status" + break + fi + if fetch_url "${BASE_URL}/actuator/health" "${MET_DIR}/actuator/health.json"; then + record_success "metrics" "/actuator/health" + break + fi + BASE_URL="" + done + if [ -n "$BASE_URL" ]; then + for endpoint in \ + /api/v1/info/status \ + /api/v1/info/uptime \ + /api/v1/info/wau \ + /api/v1/info/requests \ + /api/v1/info/requests/unique \ + /api/v1/info/requests/all \ + /api/v1/info/requests/all/unique \ + /api/v1/info/load \ + /api/v1/info/load/unique \ + /api/v1/info/load/all \ + /api/v1/info/load/all/unique; do + if fetch_url "${BASE_URL}${endpoint}" "${MET_DIR}${endpoint}.json"; then + record_success "metrics" "$endpoint" + fi + done + if fetch_url "${BASE_URL}/api/v1/info/health" "${MET_DIR}/api/v1/info/health.json"; then + record_success "metrics" "/api/v1/info/health" + fi + if fetch_url "${BASE_URL}/actuator/prometheus" "${MET_DIR}/actuator/prometheus.txt"; then + record_success "metrics" "/actuator/prometheus" + fi + if [ "$INCLUDE_UI_DATA" = true ]; then + for endpoint in \ + /api/v1/ui-data/sign \ + /api/v1/ui-data/pipeline \ + /api/v1/ui-data/ocr-pdf; do + if fetch_url "${BASE_URL}${endpoint}" "${MET_DIR}${endpoint}.json"; then + record_success "ui" "$endpoint" + fi + done + fi + fi +fi + +if [ "$INCLUDE_METRICS" = true ]; then + write_kv "metrics_base_url" "${BASE_URL:-}" "$SUMMARY" + if [ ${#METRICS_SUCCESS[@]} -gt 0 ]; then + write_kv "metrics_endpoints" "$(IFS=,; printf '%s' "${METRICS_SUCCESS[*]}")" "$SUMMARY" + else + write_kv "metrics_endpoints" "" "$SUMMARY" + fi +fi + +if [ "$INCLUDE_LOGS" = true ]; then + log "Collecting logs (last ${LOG_DAYS} days)..." + copy_dir_filtered "/logs" "$DATA_DIR" "$LOG_DAYS" +fi +if [ "$INCLUDE_CONFIGS" = true ]; then + log "Collecting configs..." + copy_dir_filtered "/configs" "$DATA_DIR" 0 +fi +if [ "$INCLUDE_CUSTOM" = true ]; then + log "Collecting /customFiles (excluding PDFs)..." + copy_dir_filtered_no_pdfs "/customFiles" "$DATA_DIR" +fi +if [ "$INCLUDE_PIPELINE" = true ]; then + log "Collecting /pipeline (excluding PDFs)..." + copy_dir_filtered_no_pdfs "/pipeline" "$DATA_DIR" +fi +if [ "$INCLUDE_TEMP" = true ]; then + log "Collecting /tmp/stirling-pdf..." + copy_dir_filtered "/tmp/stirling-pdf" "$DATA_DIR" 0 +fi + +log "Writing directory trees..." +TREE_DIR="${DATA_DIR}/tree" +mkdir -p "$TREE_DIR" +write_tree "/logs" "${TREE_DIR}/logs.txt" +write_tree "/configs" "${TREE_DIR}/configs.txt" +write_tree "/customFiles" "${TREE_DIR}/customFiles.txt" +write_tree "/pipeline" "${TREE_DIR}/pipeline.txt" +write_tree "/tmp/stirling-pdf" "${TREE_DIR}/tmp-stirling-pdf.txt" +write_tree "/usr/share/tesseract-ocr/5/tessdata" "${TREE_DIR}/tessdata.txt" +write_tree "/usr/share/tessdata" "${TREE_DIR}/tessdata-mount.txt" + +if [ "$REDACT" = true ]; then + log "Applying redaction..." + while IFS= read -r -d '' file; do + if is_text_file "$file"; then + redact_file "$file" "$REDACT_SECRETS" "$REDACT_URLS" "$REDACT_EMAILS" "$REDACT_HOST_FIELDS" + fi + done < <(find "$DATA_DIR" -type f -print0 2>/dev/null || true) + if [ "$INCLUDE_ENV" = true ]; then + redact_file "${DATA_DIR}/env/environment.txt" "$REDACT_SECRETS" "$REDACT_URLS" "$REDACT_EMAILS" "$REDACT_HOST_FIELDS" || true + fi +fi + +log "Packaging tar.gz..." +TAR_BASENAME="${SCRIPT_NAME}-$(date +%Y%m%d-%H%M%S).tar.gz" +TAR_PATH="${OUTPUT_DIR}/${TAR_BASENAME}" +mkdir -p "$OUTPUT_DIR" +(cd "$OUT_DIR" && tar -czf "$TAR_PATH" "bundle" "summary.txt") +log "Created tar.gz: $TAR_PATH" + +rm -rf "$OUT_DIR" + +log "Diagnostics complete."