removal of all getByte loads (#3153)

# Description of Changes Please provide a summary of the changes, including: - What was changed - Why the change was made - Any challenges encountered Closes #(issue_number) --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: a <a>
2026-04-22 23:08:53 +02:00 · 2025-03-10 20:17:45 +00:00
parent d0a5416570
commit a61749d500
34 changed files with 504 additions and 214 deletions
--- a/testing/test.sh
+++ b/testing/test.sh
@@ -39,6 +39,136 @@ check_health() {
    return 0
 }

+# Function to capture file list from a Docker container
+capture_file_list() {
+    local container_name=$1
+    local output_file=$2
+    
+    echo "Capturing file list from $container_name..."
+    # Get all files in one command, output directly from Docker to avoid path issues
+    # Skip proc, sys, dev, and the specified LibreOffice config directory
+    # Also skip PDFBox and LibreOffice temporary files
+    docker exec $container_name sh -c "find / -type f \
+        -not -path '*/proc/*' \
+        -not -path '*/sys/*' \
+        -not -path '*/dev/*' \
+        -not -path '/config/*' \
+        -not -path '/logs/*' \
+        -not -path '*/home/stirlingpdfuser/.config/libreoffice/*' \
+        -not -path '*/tmp/PDFBox*' \
+        -not -path '*/tmp/hsperfdata_stirlingpdfuser/*' \
+        -not -path '*/tmp/lu*' \
+        -not -path '*/tmp/tmp*' \
+        2>/dev/null | xargs -I{} sh -c 'stat -c \"%n %s %Y\" \"{}\" 2>/dev/null || true' | sort" > "$output_file"
+    
+    # Check if the output file has content
+    if [ ! -s "$output_file" ]; then
+        echo "WARNING: Failed to capture file list or container returned empty list"
+        echo "Trying alternative approach..."
+        
+        # Alternative simpler approach - just get paths as a fallback
+        docker exec $container_name sh -c "find / -type f \
+            -not -path '*/proc/*' \
+            -not -path '*/sys/*' \
+            -not -path '*/dev/*' \
+            -not -path '/config/*' \
+            -not -path '/logs/*' \
+            -not -path '*/home/stirlingpdfuser/.config/libreoffice/*' \
+            -not -path '*/tmp/PDFBox*' \
+            -not -path '*/tmp/hsperfdata_stirlingpdfuser/*' \
+            -not -path '*/tmp/lu*' \
+            -not -path '*/tmp/tmp*' \
+            2>/dev/null | sort" > "$output_file"
+        
+        if [ ! -s "$output_file" ]; then
+            echo "ERROR: All attempts to capture file list failed"
+            # Create a dummy entry to prevent diff errors
+            echo "NO_FILES_FOUND 0 0" > "$output_file"
+        fi
+    fi
+    
+    echo "File list captured to $output_file"
+}
+
+# Function to compare before and after file lists
+compare_file_lists() {
+    local before_file=$1
+    local after_file=$2
+    local diff_file=$3
+    local container_name=$4  # Added container_name parameter
+    
+    echo "Comparing file lists..."
+    
+    # Check if files exist and have content
+    if [ ! -s "$before_file" ] || [ ! -s "$after_file" ]; then
+        echo "WARNING: One or both file lists are empty."
+        
+        if [ ! -s "$before_file" ]; then
+            echo "Before file is empty: $before_file"
+        fi
+        
+        if [ ! -s "$after_file" ]; then
+            echo "After file is empty: $after_file"
+        fi
+        
+        # Create empty diff file
+        > "$diff_file"
+        
+        # Check if we at least have the after file to look for temp files
+        if [ -s "$after_file" ]; then
+            echo "Checking for temp files in the after snapshot..."
+            grep -i "tmp\|temp" "$after_file" > "${diff_file}.tmp"
+            if [ -s "${diff_file}.tmp" ]; then
+                echo "WARNING: Temporary files found:"
+                cat "${diff_file}.tmp"
+                echo "Printing docker logs due to temporary file detection:"
+                docker logs "$container_name"  # Print logs when temp files are found
+                return 1
+            else
+                echo "No temporary files found in the after snapshot."
+            fi
+        fi
+        
+        return 0
+    fi
+    
+    # Both files exist and have content, proceed with diff
+    diff "$before_file" "$after_file" > "$diff_file"
+    
+    if [ -s "$diff_file" ]; then
+        echo "Detected changes in files:"
+        cat "$diff_file"
+        
+        # Extract only added files (lines starting with ">")
+        grep "^>" "$diff_file" > "${diff_file}.added" || true
+        if [ -s "${diff_file}.added" ]; then
+            echo "New files created during test:"
+            cat "${diff_file}.added" | sed 's/^> //'
+            
+            # Check for tmp files
+            grep -i "tmp\|temp" "${diff_file}.added" > "${diff_file}.tmp" || true
+            if [ -s "${diff_file}.tmp" ]; then
+                echo "WARNING: Temporary files detected:"
+                cat "${diff_file}.tmp"
+                echo "Printing docker logs due to temporary file detection:"
+                docker logs "$container_name"  # Print logs when temp files are found
+                return 1
+            fi
+        fi
+        
+        # Extract only removed files (lines starting with "<")
+        grep "^<" "$diff_file" > "${diff_file}.removed" || true
+        if [ -s "${diff_file}.removed" ]; then
+            echo "Files removed during test:"
+            cat "${diff_file}.removed" | sed 's/^< //'
+        fi
+    else
+        echo "No file changes detected during test."
+    fi
+    
+    return 0
+}
+
 # Function to test a Docker Compose configuration
 test_compose() {
    local compose_file=$1
@@ -91,7 +221,7 @@ main() {

    # Building Docker images
    # docker build --no-cache --pull --build-arg VERSION_TAG=alpha -t stirlingtools/stirling-pdf:latest -f ./Dockerfile .
-    docker build --no-cache --pull --build-arg VERSION_TAG=alpha -t stirlingtools/stirling-pdf:latest-ultra-lite -f ./Dockerfile.ultra-lite .
+    docker build --build-arg VERSION_TAG=alpha -t docker.stirlingpdf.com/stirlingtools/stirling-pdf:latest-ultra-lite -f ./Dockerfile.ultra-lite .

    # Test each configuration
    run_tests "Stirling-PDF-Ultra-Lite" "./exampleYmlFiles/docker-compose-latest-ultra-lite.yml"
@@ -147,16 +277,55 @@ main() {
    run_tests "Stirling-PDF-Security-Fat-with-login" "./exampleYmlFiles/test_cicd.yml"

    if [ $? -eq 0 ]; then
+        # Create directory for file snapshots if it doesn't exist
+        SNAPSHOT_DIR="$PROJECT_ROOT/testing/file_snapshots"
+        mkdir -p "$SNAPSHOT_DIR"
+        
+        # Capture file list before running behave tests
+        BEFORE_FILE="$SNAPSHOT_DIR/files_before_behave.txt"
+        AFTER_FILE="$SNAPSHOT_DIR/files_after_behave.txt"
+        DIFF_FILE="$SNAPSHOT_DIR/files_diff.txt"
+        
+        # Define container name variable for consistency
+        CONTAINER_NAME="Stirling-PDF-Security-Fat-with-login"
+        
+        capture_file_list "$CONTAINER_NAME" "$BEFORE_FILE"
+        
        cd "testing/cucumber"
        if python -m behave; then
+            # Wait 10 seconds before capturing the file list after tests
+            echo "Waiting 5 seconds for any file operations to complete..."
+            sleep 5
+            
+            # Capture file list after running behave tests
+            cd "$PROJECT_ROOT"
+            capture_file_list "$CONTAINER_NAME" "$AFTER_FILE"
+            
+            # Compare file lists
+            if compare_file_lists "$BEFORE_FILE" "$AFTER_FILE" "$DIFF_FILE" "$CONTAINER_NAME"; then
+                echo "No unexpected temporary files found."
+                passed_tests+=("Stirling-PDF-Regression")
+            else
+                echo "WARNING: Unexpected temporary files detected after behave tests!"
+                failed_tests+=("Stirling-PDF-Regression-Temp-Files")
+            fi
+            
            passed_tests+=("Stirling-PDF-Regression")
        else
            failed_tests+=("Stirling-PDF-Regression")
            echo "Printing docker logs of failed regression"
-            docker logs "Stirling-PDF-Security-Fat-with-login"
+            docker logs "$CONTAINER_NAME"
            echo "Printed docker logs of failed regression"
+            
+            # Still capture file list after failure for analysis
+            # Wait 10 seconds before capturing the file list
+            echo "Waiting 5 seconds before capturing file list..."
+            sleep 10
+            
+            cd "$PROJECT_ROOT"
+            capture_file_list "$CONTAINER_NAME" "$AFTER_FILE"
+            compare_file_lists "$BEFORE_FILE" "$AFTER_FILE" "$DIFF_FILE" "$CONTAINER_NAME"
        fi
-        cd "$PROJECT_ROOT"
    fi

    docker-compose -f "./exampleYmlFiles/test_cicd.yml" down
--- a/testing/test_webpages.sh
+++ b/testing/test_webpages.sh
@@ -2,122 +2,173 @@

 # Function to check a single webpage
 check_webpage() {
-    local url=$(echo "$1" | tr -d '\r')  # Remove carriage returns
-    local base_url=$(echo "$2" | tr -d '\r')
-    local full_url="${base_url}${url}"
-    local timeout=10
-    echo -n "Testing $full_url ... "
-    
-    # Use curl to fetch the page with timeout
-    response=$(curl -s -w "\n%{http_code}" --max-time $timeout "$full_url")
-    if [ $? -ne 0 ]; then
-        echo "FAILED - Connection error or timeout $full_url "
-        return 1
-    fi
+  local url=$(echo "$1" | tr -d '\r') # Remove carriage returns
+  local base_url=$(echo "$2" | tr -d '\r')
+  local full_url="${base_url}${url}"
+  local timeout=10
+  local result_file="$3"

-    # Split response into body and status code
-    HTTP_STATUS=$(echo "$response" | tail -n1)
-    BODY=$(echo "$response" | sed '$d')
+  # Use curl to fetch the page with timeout
+  response=$(curl -s -w "\n%{http_code}" --max-time $timeout "$full_url")
+  if [ $? -ne 0 ]; then
+    echo "FAILED - Connection error or timeout $full_url" >> "$result_file"
+    return 1
+  fi

-    # Check HTTP status
-    if [ "$HTTP_STATUS" != "200" ]; then
-        echo "FAILED - HTTP Status: $HTTP_STATUS"
-        return 1
-    fi
+  # Split response into body and status code
+  HTTP_STATUS=$(echo "$response" | tail -n1)
+  BODY=$(echo "$response" | sed '$d')

-    # Check if response contains HTML
-    if ! printf '%s' "$BODY" | grep -q "<!DOCTYPE html>\|<html"; then
-        echo "FAILED - Response is not HTML"
-        return 1
-    fi
+  # Check HTTP status
+  if [ "$HTTP_STATUS" != "200" ]; then
+    echo "FAILED - HTTP Status: $HTTP_STATUS - $full_url" >> "$result_file"
+    return 1
+  fi

-    echo "OK"
-    return 0
+  # Check if response contains HTML
+  if ! printf '%s' "$BODY" | grep -q "<!DOCTYPE html>\|<html"; then
+    echo "FAILED - Response is not HTML - $full_url" >> "$result_file"
+    return 1
+  fi
+
+  echo "OK - $full_url" >> "$result_file"
+  return 0
 }

-# Main function to test all URLs from the list
+# Function to test a URL and update counters
+test_url() {
+  local url="$1"
+  local base_url="$2"
+  local tmp_dir="$3"
+  local url_index="$4"
+  local result_file="${tmp_dir}/result_${url_index}.txt"
+
+  if ! check_webpage "$url" "$base_url" "$result_file"; then
+    echo "1" > "${tmp_dir}/failed_${url_index}"
+  else
+    echo "0" > "${tmp_dir}/failed_${url_index}"
+  fi
+}
+
+# Main function to test all URLs from the list in parallel
 test_all_urls() {
-    local url_file=$1
-    local base_url=${2:-"http://localhost:8080"}
-    local failed_count=0
-    local total_count=0
-    local start_time=$(date +%s)
+  local url_file="$1"
+  local base_url="${2:-"http://localhost:8080"}"
+  local max_parallel="${3:-10}"  # Default to 10 parallel processes
+  local failed_count=0
+  local total_count=0
+  local start_time=$(date +%s)
+  local tmp_dir=$(mktemp -d)
+  local active_jobs=0
+  local url_index=0

-    echo "Starting webpage tests..."
-    echo "Base URL: $base_url"
-	echo "Number of lines: $(wc -l < "$url_file")"
-    echo "----------------------------------------"
-	
-    while IFS= read -r url || [ -n "$url" ]; do
-        # Skip empty lines and comments
-        [[ -z "$url" || "$url" =~ ^#.*$ ]] && continue
-        
-        ((total_count++))
-        if ! check_webpage "$url" "$base_url"; then
-            ((failed_count++))
-        fi
-    done < "$url_file"
+  echo "Starting webpage tests..."
+  echo "Base URL: $base_url"
+  echo "Number of lines: $(wc -l < "$url_file")"
+  echo "Max parallel jobs: $max_parallel"
+  echo "----------------------------------------"

-    local end_time=$(date +%s)
-    local duration=$((end_time - start_time))
+  # Process each URL
+  while IFS= read -r url || [ -n "$url" ]; do
+    # Skip empty lines and comments
+    [[ -z "$url" || "$url" =~ ^#.*$ ]] && continue
+
+    ((total_count++))
+    ((url_index++))
    
-    echo "----------------------------------------"
-    echo "Test Summary:"
-    echo "Total tests: $total_count"
-    echo "Failed tests: $failed_count"
-    echo "Passed tests: $((total_count - failed_count))"
-    echo "Duration: ${duration} seconds"
+    # Run the check in background
+    test_url "$url" "$base_url" "$tmp_dir" "$url_index" &
+    
+    # Track the job
+    ((active_jobs++))
+    
+    # If we've reached max_parallel, wait for a job to finish
+    if [ $active_jobs -ge $max_parallel ]; then
+      wait -n  # Wait for any child process to exit
+      ((active_jobs--))
+    fi
+  done < "$url_file"

-    return $failed_count
+  # Wait for remaining jobs to finish
+  wait
+
+  # Print results in order and count failures
+  for i in $(seq 1 $url_index); do
+    if [ -f "${tmp_dir}/result_${i}.txt" ]; then
+      cat "${tmp_dir}/result_${i}.txt"
+    fi
+    
+    if [ -f "${tmp_dir}/failed_${i}" ]; then
+      failed_count=$((failed_count + $(cat "${tmp_dir}/failed_${i}")))
+    fi
+  done
+
+  # Clean up
+  rm -rf "$tmp_dir"
+
+  local end_time=$(date +%s)
+  local duration=$((end_time - start_time))
+
+  echo "----------------------------------------"
+  echo "Test Summary:"
+  echo "Total tests: $total_count"
+  echo "Failed tests: $failed_count"
+  echo "Passed tests: $((total_count - failed_count))"
+  echo "Duration: ${duration} seconds"
+
+  return $failed_count
 }

 # Print usage information
 usage() {
-    echo "Usage: $0 [-f url_file] [-b base_url]"
-    echo "Options:"
-    echo "  -f url_file   Path to file containing URLs to test (required)"
-    echo "  -b base_url   Base URL to prepend to test URLs (default: http://localhost:8080)"
-    exit 1
+  echo "Usage: $0 [-f url_file] [-b base_url] [-p max_parallel]"
+  echo "Options:"
+  echo "  -f url_file    Path to file containing URLs to test (required)"
+  echo "  -b base_url    Base URL to prepend to test URLs (default: http://localhost:8080)"
+  echo "  -p max_parallel Maximum number of parallel requests (default: 10)"
+  exit 1
 }

 # Main execution
 main() {
-    local url_file=""
-    local base_url="http://localhost:8080"
+  local url_file=""
+  local base_url="http://localhost:8080"
+  local max_parallel=10

-    # Parse command line options
-    while getopts ":f:b:h" opt; do
-        case $opt in
-            f) url_file="$OPTARG" ;;
-            b) base_url="$OPTARG" ;;
-            h) usage ;;
-            \?) echo "Invalid option -$OPTARG" >&2; usage ;;
-        esac
-    done
+  # Parse command line options
+  while getopts ":f:b:p:h" opt; do
+    case $opt in
+      f) url_file="$OPTARG" ;;
+      b) base_url="$OPTARG" ;;
+      p) max_parallel="$OPTARG" ;;
+      h) usage ;;
+      \?) echo "Invalid option -$OPTARG" >&2; usage ;;
+    esac
+  done

-    # Check if URL file is provided
-    if [ -z "$url_file" ]; then
-        echo "Error: URL file is required"
-        usage
-    fi
+  # Check if URL file is provided
+  if [ -z "$url_file" ]; then
+    echo "Error: URL file is required"
+    usage
+  fi

-    # Check if URL file exists
-    if [ ! -f "$url_file" ]; then
-        echo "Error: URL list file not found: $url_file"
-        exit 1
-    fi
-    
-    # Run tests using the URL list
-    if test_all_urls "$url_file" "$base_url"; then
-        echo "All webpage tests passed!"
-        exit 0
-    else
-        echo "Some webpage tests failed!"
-        exit 1
-    fi
+  # Check if URL file exists
+  if [ ! -f "$url_file" ]; then
+    echo "Error: URL list file not found: $url_file"
+    exit 1
+  fi
+
+  # Run tests using the URL list
+  if test_all_urls "$url_file" "$base_url" "$max_parallel"; then
+    echo "All webpage tests passed!"
+    exit 0
+  else
+    echo "Some webpage tests failed!"
+    exit 1
+  fi
 }

 # Run main if script is executed directly
 if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
-    main "$@"
+  main "$@"
 fi