mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
feat(common): add ChecksumUtils
for MD5/SHA*/CRC32/Adler32 with Base64 and multi-algorithm support (#4261)
This commit is contained in:
parent
74870615df
commit
cb7471024b
@ -0,0 +1,301 @@
|
||||
package stirling.software.common.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.Base64;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.zip.Adler32;
|
||||
import java.util.zip.CRC32;
|
||||
import java.util.zip.Checksum;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class ChecksumUtils {
|
||||
|
||||
/** Shared buffer size for streaming I/O. */
|
||||
private static final int BUFFER_SIZE = 8192;
|
||||
|
||||
/** Mask to extract the lower 32 bits of a long value (unsigned int). */
|
||||
private static final long UNSIGNED_32_BIT_MASK = 0xFFFFFFFFL;
|
||||
|
||||
/**
|
||||
* Computes a checksum for the given file using the chosen algorithm and returns a lowercase hex
|
||||
* string.
|
||||
*
|
||||
* <p>For digest algorithms (e.g., SHA-256, SHA-1, MD5), this returns the digest as hex. For
|
||||
* 32-bit {@link Checksum} algorithms ("CRC32", "ADLER32"), this returns an 8-character
|
||||
* lowercase hex string of the unsigned 32-bit value.
|
||||
*
|
||||
* @param path file to read
|
||||
* @param algorithm algorithm name (case-insensitive). Special values: "CRC32", "ADLER32".
|
||||
* @return hex string of the checksum
|
||||
* @throws IOException if the file cannot be read
|
||||
*/
|
||||
public static String checksum(Path path, String algorithm) throws IOException {
|
||||
try (InputStream is = Files.newInputStream(path)) {
|
||||
return checksum(is, algorithm);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes a checksum for the given stream using the chosen algorithm and returns a lowercase
|
||||
* hex string.
|
||||
*
|
||||
* <p><strong>Note:</strong> This method does <em>not</em> close the provided stream.
|
||||
*
|
||||
* @param is input stream (not closed by this method)
|
||||
* @param algorithm algorithm name (case-insensitive). Special values: "CRC32", "ADLER32".
|
||||
* @return hex string of the checksum
|
||||
* @throws IOException if reading from the stream fails
|
||||
*/
|
||||
public static String checksum(InputStream is, String algorithm) throws IOException {
|
||||
switch (algorithm.toUpperCase(Locale.ROOT)) {
|
||||
case "CRC32":
|
||||
return checksumChecksum(is, new CRC32());
|
||||
case "ADLER32":
|
||||
return checksumChecksum(is, new Adler32());
|
||||
default:
|
||||
return toHex(checksumBytes(is, algorithm));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes a checksum for the given file using the chosen algorithm and returns a Base64
|
||||
* encoded string.
|
||||
*
|
||||
* <p>For digest algorithms this is the Base64 of the raw digest bytes. For 32-bit checksum
|
||||
* algorithms ("CRC32", "ADLER32"), this is the Base64 of the 4-byte big-endian unsigned value.
|
||||
*
|
||||
* @param path file to read
|
||||
* @param algorithm algorithm name (case-insensitive). Special values: "CRC32", "ADLER32".
|
||||
* @return Base64-encoded checksum bytes
|
||||
* @throws IOException if the file cannot be read
|
||||
*/
|
||||
public static String checksumBase64(Path path, String algorithm) throws IOException {
|
||||
try (InputStream is = Files.newInputStream(path)) {
|
||||
return checksumBase64(is, algorithm);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes a checksum for the given stream using the chosen algorithm and returns a Base64
|
||||
* encoded string.
|
||||
*
|
||||
* <p><strong>Note:</strong> This method does <em>not</em> close the provided stream.
|
||||
*
|
||||
* @param is input stream (not closed by this method)
|
||||
* @param algorithm algorithm name (case-insensitive). Special values: "CRC32", "ADLER32".
|
||||
* @return Base64-encoded checksum bytes
|
||||
* @throws IOException if reading from the stream fails
|
||||
*/
|
||||
public static String checksumBase64(InputStream is, String algorithm) throws IOException {
|
||||
switch (algorithm.toUpperCase(Locale.ROOT)) {
|
||||
case "CRC32":
|
||||
return Base64.getEncoder().encodeToString(checksumChecksumBytes(is, new CRC32()));
|
||||
case "ADLER32":
|
||||
return Base64.getEncoder().encodeToString(checksumChecksumBytes(is, new Adler32()));
|
||||
default:
|
||||
return Base64.getEncoder().encodeToString(checksumBytes(is, algorithm));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes multiple checksums for the given file in a single pass over the data.
|
||||
*
|
||||
* <p>Returns a map from algorithm name to lowercase hex string. Order of results follows the
|
||||
* order of the provided {@code algorithms}.
|
||||
*
|
||||
* @param path file to read
|
||||
* @param algorithms algorithm names (case-insensitive). Special: "CRC32", "ADLER32".
|
||||
* @return map of algorithm → hex string
|
||||
* @throws IOException if the file cannot be read
|
||||
*/
|
||||
public static Map<String, String> checksums(Path path, String... algorithms)
|
||||
throws IOException {
|
||||
try (InputStream is = Files.newInputStream(path)) {
|
||||
return checksums(is, algorithms);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes multiple checksums for the given stream in a single pass over the data.
|
||||
*
|
||||
* <p><strong>Note:</strong> This method does <em>not</em> close the provided stream.
|
||||
*
|
||||
* @param is input stream (not closed by this method)
|
||||
* @param algorithms algorithm names (case-insensitive). Special: "CRC32", "ADLER32".
|
||||
* @return map of algorithm → hex string
|
||||
* @throws IOException if reading from the stream fails
|
||||
*/
|
||||
public static Map<String, String> checksums(InputStream is, String... algorithms)
|
||||
throws IOException {
|
||||
// Use LinkedHashMap to preserve the order of requested algorithms in the result.
|
||||
Map<String, MessageDigest> digests = new LinkedHashMap<>();
|
||||
Map<String, Checksum> checksums = new LinkedHashMap<>();
|
||||
|
||||
for (String algorithm : algorithms) {
|
||||
String key = algorithm; // keep original key for output
|
||||
switch (algorithm.toUpperCase(Locale.ROOT)) {
|
||||
case "CRC32":
|
||||
checksums.put(key, new CRC32());
|
||||
break;
|
||||
case "ADLER32":
|
||||
checksums.put(key, new Adler32());
|
||||
break;
|
||||
default:
|
||||
try {
|
||||
// For MessageDigest, pass the original name (case-insensitive per JCA)
|
||||
digests.put(key, MessageDigest.getInstance(algorithm));
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new IllegalStateException("Unsupported algorithm: " + algorithm, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
byte[] buffer = new byte[BUFFER_SIZE];
|
||||
int read;
|
||||
while ((read = is.read(buffer)) != -1) {
|
||||
for (MessageDigest digest : digests.values()) {
|
||||
digest.update(buffer, 0, read);
|
||||
}
|
||||
for (Checksum cs : checksums.values()) {
|
||||
cs.update(buffer, 0, read);
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, String> results = new LinkedHashMap<>();
|
||||
for (Map.Entry<String, MessageDigest> entry : digests.entrySet()) {
|
||||
results.put(entry.getKey(), toHex(entry.getValue().digest()));
|
||||
}
|
||||
for (Map.Entry<String, Checksum> entry : checksums.entrySet()) {
|
||||
// Keep value as long and mask to ensure unsigned hex formatting.
|
||||
long unsigned32 = entry.getValue().getValue() & UNSIGNED_32_BIT_MASK;
|
||||
results.put(entry.getKey(), String.format("%08x", unsigned32));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares the checksum of a file with an expected hex string (case-insensitive).
|
||||
*
|
||||
* @param path file to read
|
||||
* @param algorithm algorithm name (case-insensitive). Special: "CRC32", "ADLER32".
|
||||
* @param expected expected hex string (case-insensitive)
|
||||
* @return {@code true} if they match, otherwise {@code false}
|
||||
* @throws IOException if the file cannot be read
|
||||
*/
|
||||
public static boolean matches(Path path, String algorithm, String expected) throws IOException {
|
||||
try (InputStream is = Files.newInputStream(path)) {
|
||||
return matches(is, algorithm, expected);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares the checksum of a stream with an expected hex string (case-insensitive).
|
||||
*
|
||||
* <p><strong>Note:</strong> This method does <em>not</em> close the provided stream.
|
||||
*
|
||||
* @param is input stream (not closed by this method)
|
||||
* @param algorithm algorithm name (case-insensitive). Special: "CRC32", "ADLER32".
|
||||
* @param expected expected hex string (case-insensitive)
|
||||
* @return {@code true} if they match, otherwise {@code false}
|
||||
* @throws IOException if reading from the stream fails
|
||||
*/
|
||||
public static boolean matches(InputStream is, String algorithm, String expected)
|
||||
throws IOException {
|
||||
return checksum(is, algorithm).equalsIgnoreCase(expected);
|
||||
}
|
||||
|
||||
// ---------- Internal helpers ----------
|
||||
|
||||
/**
|
||||
* Computes a MessageDigest over a stream and returns the raw digest bytes.
|
||||
*
|
||||
* @param is input stream (not closed)
|
||||
* @param algorithm JCA MessageDigest algorithm (e.g., "SHA-256")
|
||||
* @return raw digest bytes
|
||||
* @throws IOException if reading fails
|
||||
* @throws IllegalStateException if the algorithm is unsupported
|
||||
*/
|
||||
private static byte[] checksumBytes(InputStream is, String algorithm) throws IOException {
|
||||
try {
|
||||
MessageDigest digest = MessageDigest.getInstance(algorithm);
|
||||
byte[] buffer = new byte[BUFFER_SIZE];
|
||||
int read;
|
||||
while ((read = is.read(buffer)) != -1) {
|
||||
digest.update(buffer, 0, read);
|
||||
}
|
||||
return digest.digest();
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
// Keep the message explicit to aid debugging
|
||||
throw new IllegalStateException("Unsupported algorithm: " + algorithm, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes a 32-bit {@link Checksum} over a stream and returns the lowercase 8-char hex of the
|
||||
* unsigned 32-bit value.
|
||||
*
|
||||
* @param is input stream (not closed)
|
||||
* @param checksum checksum implementation (CRC32, Adler32, etc.)
|
||||
* @return 8-character lowercase hex (big-endian representation)
|
||||
* @throws IOException if reading fails
|
||||
*/
|
||||
private static String checksumChecksum(InputStream is, Checksum checksum) throws IOException {
|
||||
byte[] buffer = new byte[BUFFER_SIZE];
|
||||
int read;
|
||||
while ((read = is.read(buffer)) != -1) {
|
||||
checksum.update(buffer, 0, read);
|
||||
}
|
||||
// Keep as long and mask to ensure correct unsigned representation.
|
||||
long unsigned32 = checksum.getValue() & UNSIGNED_32_BIT_MASK;
|
||||
return String.format("%08x", unsigned32);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes a 32-bit {@link Checksum} over a stream and returns the raw 4-byte big-endian
|
||||
* representation of the unsigned 32-bit value.
|
||||
*
|
||||
* <p>Cast to int already truncates to the lower 32 bits; the sign is irrelevant because we
|
||||
* serialize the bit pattern directly into 4 bytes.
|
||||
*
|
||||
* @param is input stream (not closed)
|
||||
* @param checksum checksum implementation (CRC32, Adler32, etc.)
|
||||
* @return 4 bytes (big-endian)
|
||||
* @throws IOException if reading fails
|
||||
*/
|
||||
private static byte[] checksumChecksumBytes(InputStream is, Checksum checksum)
|
||||
throws IOException {
|
||||
byte[] buffer = new byte[BUFFER_SIZE];
|
||||
int read;
|
||||
while ((read = is.read(buffer)) != -1) {
|
||||
checksum.update(buffer, 0, read);
|
||||
}
|
||||
// Cast keeps only the lower 32 bits; mask is unnecessary here.
|
||||
int v = (int) checksum.getValue();
|
||||
return ByteBuffer.allocate(4).order(ByteOrder.BIG_ENDIAN).putInt(v).array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts bytes to a lowercase hex string.
|
||||
*
|
||||
* @param hash the byte array to convert
|
||||
* @return the lowercase hex string
|
||||
*/
|
||||
private static String toHex(byte[] hash) {
|
||||
StringBuilder sb = new StringBuilder(hash.length * 2);
|
||||
for (byte b : hash) {
|
||||
sb.append(String.format("%02x", b));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
@ -0,0 +1,66 @@
|
||||
package stirling.software.common.util;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Map;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class ChecksumUtilsTest {
|
||||
|
||||
@Test
|
||||
void computeChecksums_basic() throws Exception {
|
||||
byte[] data = "hello".getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
// MD5 (hex)
|
||||
try (InputStream is = new ByteArrayInputStream(data)) {
|
||||
assertEquals("5d41402abc4b2a76b9719d911017c592", ChecksumUtils.checksum(is, "MD5"));
|
||||
}
|
||||
|
||||
// MD5 (Base64)
|
||||
try (InputStream is = new ByteArrayInputStream(data)) {
|
||||
assertEquals("XUFAKrxLKna5cZ2REBfFkg==", ChecksumUtils.checksumBase64(is, "MD5"));
|
||||
}
|
||||
|
||||
// MD5 + CRC32 (hex map)
|
||||
try (InputStream is = new ByteArrayInputStream(data)) {
|
||||
Map<String, String> map = ChecksumUtils.checksums(is, "MD5", "CRC32");
|
||||
assertEquals("5d41402abc4b2a76b9719d911017c592", map.get("MD5"));
|
||||
assertEquals("3610a686", map.get("CRC32"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void crc32_base64_bigEndianBytes_forHello() throws Exception {
|
||||
// CRC32("hello") = 0x3610A686 → bytes: 36 10 A6 86 → Base64: "NhCmhg=="
|
||||
byte[] data = "hello".getBytes(StandardCharsets.UTF_8);
|
||||
try (InputStream is = new ByteArrayInputStream(data)) {
|
||||
assertEquals("NhCmhg==", ChecksumUtils.checksumBase64(is, "CRC32"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void crc32_unsignedFormatting_highBitSet() throws Exception {
|
||||
// CRC32 of single zero byte (0x00) is 0xD202EF8D (>= 0x8000_0000)
|
||||
byte[] data = new byte[] {0x00};
|
||||
|
||||
// Hex (unsigned, 8 chars, lowercase)
|
||||
try (InputStream is = new ByteArrayInputStream(data)) {
|
||||
assertEquals("d202ef8d", ChecksumUtils.checksum(is, "CRC32"));
|
||||
}
|
||||
|
||||
// Base64 of the 4-byte big-endian representation
|
||||
try (InputStream is = new ByteArrayInputStream(data)) {
|
||||
assertEquals("0gLvjQ==", ChecksumUtils.checksumBase64(is, "CRC32"));
|
||||
}
|
||||
|
||||
// matches(..) must be case-insensitive for hex
|
||||
try (InputStream is = new ByteArrayInputStream("hello".getBytes(StandardCharsets.UTF_8))) {
|
||||
assertTrue(ChecksumUtils.matches(is, "CRC32", "3610A686")); // uppercase expected
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user