feat(common): add ChecksumUtils for MD5/SHA*/CRC32/Adler32 with Base64 and multi-algorithm support (#4261)

This commit is contained in:
Ludy 2025-09-04 16:38:28 +02:00 committed by GitHub
parent 74870615df
commit cb7471024b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 367 additions and 0 deletions

View File

@ -0,0 +1,301 @@
package stirling.software.common.util;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Base64;
import java.util.LinkedHashMap;
import java.util.Locale;
import java.util.Map;
import java.util.zip.Adler32;
import java.util.zip.CRC32;
import java.util.zip.Checksum;
import lombok.experimental.UtilityClass;
@UtilityClass
public class ChecksumUtils {
/** Shared buffer size for streaming I/O. */
private static final int BUFFER_SIZE = 8192;
/** Mask to extract the lower 32 bits of a long value (unsigned int). */
private static final long UNSIGNED_32_BIT_MASK = 0xFFFFFFFFL;
/**
* Computes a checksum for the given file using the chosen algorithm and returns a lowercase hex
* string.
*
* <p>For digest algorithms (e.g., SHA-256, SHA-1, MD5), this returns the digest as hex. For
* 32-bit {@link Checksum} algorithms ("CRC32", "ADLER32"), this returns an 8-character
* lowercase hex string of the unsigned 32-bit value.
*
* @param path file to read
* @param algorithm algorithm name (case-insensitive). Special values: "CRC32", "ADLER32".
* @return hex string of the checksum
* @throws IOException if the file cannot be read
*/
public static String checksum(Path path, String algorithm) throws IOException {
try (InputStream is = Files.newInputStream(path)) {
return checksum(is, algorithm);
}
}
/**
* Computes a checksum for the given stream using the chosen algorithm and returns a lowercase
* hex string.
*
* <p><strong>Note:</strong> This method does <em>not</em> close the provided stream.
*
* @param is input stream (not closed by this method)
* @param algorithm algorithm name (case-insensitive). Special values: "CRC32", "ADLER32".
* @return hex string of the checksum
* @throws IOException if reading from the stream fails
*/
public static String checksum(InputStream is, String algorithm) throws IOException {
switch (algorithm.toUpperCase(Locale.ROOT)) {
case "CRC32":
return checksumChecksum(is, new CRC32());
case "ADLER32":
return checksumChecksum(is, new Adler32());
default:
return toHex(checksumBytes(is, algorithm));
}
}
/**
* Computes a checksum for the given file using the chosen algorithm and returns a Base64
* encoded string.
*
* <p>For digest algorithms this is the Base64 of the raw digest bytes. For 32-bit checksum
* algorithms ("CRC32", "ADLER32"), this is the Base64 of the 4-byte big-endian unsigned value.
*
* @param path file to read
* @param algorithm algorithm name (case-insensitive). Special values: "CRC32", "ADLER32".
* @return Base64-encoded checksum bytes
* @throws IOException if the file cannot be read
*/
public static String checksumBase64(Path path, String algorithm) throws IOException {
try (InputStream is = Files.newInputStream(path)) {
return checksumBase64(is, algorithm);
}
}
/**
* Computes a checksum for the given stream using the chosen algorithm and returns a Base64
* encoded string.
*
* <p><strong>Note:</strong> This method does <em>not</em> close the provided stream.
*
* @param is input stream (not closed by this method)
* @param algorithm algorithm name (case-insensitive). Special values: "CRC32", "ADLER32".
* @return Base64-encoded checksum bytes
* @throws IOException if reading from the stream fails
*/
public static String checksumBase64(InputStream is, String algorithm) throws IOException {
switch (algorithm.toUpperCase(Locale.ROOT)) {
case "CRC32":
return Base64.getEncoder().encodeToString(checksumChecksumBytes(is, new CRC32()));
case "ADLER32":
return Base64.getEncoder().encodeToString(checksumChecksumBytes(is, new Adler32()));
default:
return Base64.getEncoder().encodeToString(checksumBytes(is, algorithm));
}
}
/**
* Computes multiple checksums for the given file in a single pass over the data.
*
* <p>Returns a map from algorithm name to lowercase hex string. Order of results follows the
* order of the provided {@code algorithms}.
*
* @param path file to read
* @param algorithms algorithm names (case-insensitive). Special: "CRC32", "ADLER32".
* @return map of algorithm hex string
* @throws IOException if the file cannot be read
*/
public static Map<String, String> checksums(Path path, String... algorithms)
throws IOException {
try (InputStream is = Files.newInputStream(path)) {
return checksums(is, algorithms);
}
}
/**
* Computes multiple checksums for the given stream in a single pass over the data.
*
* <p><strong>Note:</strong> This method does <em>not</em> close the provided stream.
*
* @param is input stream (not closed by this method)
* @param algorithms algorithm names (case-insensitive). Special: "CRC32", "ADLER32".
* @return map of algorithm hex string
* @throws IOException if reading from the stream fails
*/
public static Map<String, String> checksums(InputStream is, String... algorithms)
throws IOException {
// Use LinkedHashMap to preserve the order of requested algorithms in the result.
Map<String, MessageDigest> digests = new LinkedHashMap<>();
Map<String, Checksum> checksums = new LinkedHashMap<>();
for (String algorithm : algorithms) {
String key = algorithm; // keep original key for output
switch (algorithm.toUpperCase(Locale.ROOT)) {
case "CRC32":
checksums.put(key, new CRC32());
break;
case "ADLER32":
checksums.put(key, new Adler32());
break;
default:
try {
// For MessageDigest, pass the original name (case-insensitive per JCA)
digests.put(key, MessageDigest.getInstance(algorithm));
} catch (NoSuchAlgorithmException e) {
throw new IllegalStateException("Unsupported algorithm: " + algorithm, e);
}
}
}
byte[] buffer = new byte[BUFFER_SIZE];
int read;
while ((read = is.read(buffer)) != -1) {
for (MessageDigest digest : digests.values()) {
digest.update(buffer, 0, read);
}
for (Checksum cs : checksums.values()) {
cs.update(buffer, 0, read);
}
}
Map<String, String> results = new LinkedHashMap<>();
for (Map.Entry<String, MessageDigest> entry : digests.entrySet()) {
results.put(entry.getKey(), toHex(entry.getValue().digest()));
}
for (Map.Entry<String, Checksum> entry : checksums.entrySet()) {
// Keep value as long and mask to ensure unsigned hex formatting.
long unsigned32 = entry.getValue().getValue() & UNSIGNED_32_BIT_MASK;
results.put(entry.getKey(), String.format("%08x", unsigned32));
}
return results;
}
/**
* Compares the checksum of a file with an expected hex string (case-insensitive).
*
* @param path file to read
* @param algorithm algorithm name (case-insensitive). Special: "CRC32", "ADLER32".
* @param expected expected hex string (case-insensitive)
* @return {@code true} if they match, otherwise {@code false}
* @throws IOException if the file cannot be read
*/
public static boolean matches(Path path, String algorithm, String expected) throws IOException {
try (InputStream is = Files.newInputStream(path)) {
return matches(is, algorithm, expected);
}
}
/**
* Compares the checksum of a stream with an expected hex string (case-insensitive).
*
* <p><strong>Note:</strong> This method does <em>not</em> close the provided stream.
*
* @param is input stream (not closed by this method)
* @param algorithm algorithm name (case-insensitive). Special: "CRC32", "ADLER32".
* @param expected expected hex string (case-insensitive)
* @return {@code true} if they match, otherwise {@code false}
* @throws IOException if reading from the stream fails
*/
public static boolean matches(InputStream is, String algorithm, String expected)
throws IOException {
return checksum(is, algorithm).equalsIgnoreCase(expected);
}
// ---------- Internal helpers ----------
/**
* Computes a MessageDigest over a stream and returns the raw digest bytes.
*
* @param is input stream (not closed)
* @param algorithm JCA MessageDigest algorithm (e.g., "SHA-256")
* @return raw digest bytes
* @throws IOException if reading fails
* @throws IllegalStateException if the algorithm is unsupported
*/
private static byte[] checksumBytes(InputStream is, String algorithm) throws IOException {
try {
MessageDigest digest = MessageDigest.getInstance(algorithm);
byte[] buffer = new byte[BUFFER_SIZE];
int read;
while ((read = is.read(buffer)) != -1) {
digest.update(buffer, 0, read);
}
return digest.digest();
} catch (NoSuchAlgorithmException e) {
// Keep the message explicit to aid debugging
throw new IllegalStateException("Unsupported algorithm: " + algorithm, e);
}
}
/**
* Computes a 32-bit {@link Checksum} over a stream and returns the lowercase 8-char hex of the
* unsigned 32-bit value.
*
* @param is input stream (not closed)
* @param checksum checksum implementation (CRC32, Adler32, etc.)
* @return 8-character lowercase hex (big-endian representation)
* @throws IOException if reading fails
*/
private static String checksumChecksum(InputStream is, Checksum checksum) throws IOException {
byte[] buffer = new byte[BUFFER_SIZE];
int read;
while ((read = is.read(buffer)) != -1) {
checksum.update(buffer, 0, read);
}
// Keep as long and mask to ensure correct unsigned representation.
long unsigned32 = checksum.getValue() & UNSIGNED_32_BIT_MASK;
return String.format("%08x", unsigned32);
}
/**
* Computes a 32-bit {@link Checksum} over a stream and returns the raw 4-byte big-endian
* representation of the unsigned 32-bit value.
*
* <p>Cast to int already truncates to the lower 32 bits; the sign is irrelevant because we
* serialize the bit pattern directly into 4 bytes.
*
* @param is input stream (not closed)
* @param checksum checksum implementation (CRC32, Adler32, etc.)
* @return 4 bytes (big-endian)
* @throws IOException if reading fails
*/
private static byte[] checksumChecksumBytes(InputStream is, Checksum checksum)
throws IOException {
byte[] buffer = new byte[BUFFER_SIZE];
int read;
while ((read = is.read(buffer)) != -1) {
checksum.update(buffer, 0, read);
}
// Cast keeps only the lower 32 bits; mask is unnecessary here.
int v = (int) checksum.getValue();
return ByteBuffer.allocate(4).order(ByteOrder.BIG_ENDIAN).putInt(v).array();
}
/**
* Converts bytes to a lowercase hex string.
*
* @param hash the byte array to convert
* @return the lowercase hex string
*/
private static String toHex(byte[] hash) {
StringBuilder sb = new StringBuilder(hash.length * 2);
for (byte b : hash) {
sb.append(String.format("%02x", b));
}
return sb.toString();
}
}

View File

@ -0,0 +1,66 @@
package stirling.software.common.util;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import org.junit.jupiter.api.Test;
public class ChecksumUtilsTest {
@Test
void computeChecksums_basic() throws Exception {
byte[] data = "hello".getBytes(StandardCharsets.UTF_8);
// MD5 (hex)
try (InputStream is = new ByteArrayInputStream(data)) {
assertEquals("5d41402abc4b2a76b9719d911017c592", ChecksumUtils.checksum(is, "MD5"));
}
// MD5 (Base64)
try (InputStream is = new ByteArrayInputStream(data)) {
assertEquals("XUFAKrxLKna5cZ2REBfFkg==", ChecksumUtils.checksumBase64(is, "MD5"));
}
// MD5 + CRC32 (hex map)
try (InputStream is = new ByteArrayInputStream(data)) {
Map<String, String> map = ChecksumUtils.checksums(is, "MD5", "CRC32");
assertEquals("5d41402abc4b2a76b9719d911017c592", map.get("MD5"));
assertEquals("3610a686", map.get("CRC32"));
}
}
@Test
void crc32_base64_bigEndianBytes_forHello() throws Exception {
// CRC32("hello") = 0x3610A686 bytes: 36 10 A6 86 Base64: "NhCmhg=="
byte[] data = "hello".getBytes(StandardCharsets.UTF_8);
try (InputStream is = new ByteArrayInputStream(data)) {
assertEquals("NhCmhg==", ChecksumUtils.checksumBase64(is, "CRC32"));
}
}
@Test
void crc32_unsignedFormatting_highBitSet() throws Exception {
// CRC32 of single zero byte (0x00) is 0xD202EF8D (>= 0x8000_0000)
byte[] data = new byte[] {0x00};
// Hex (unsigned, 8 chars, lowercase)
try (InputStream is = new ByteArrayInputStream(data)) {
assertEquals("d202ef8d", ChecksumUtils.checksum(is, "CRC32"));
}
// Base64 of the 4-byte big-endian representation
try (InputStream is = new ByteArrayInputStream(data)) {
assertEquals("0gLvjQ==", ChecksumUtils.checksumBase64(is, "CRC32"));
}
// matches(..) must be case-insensitive for hex
try (InputStream is = new ByteArrayInputStream("hello".getBytes(StandardCharsets.UTF_8))) {
assertTrue(ChecksumUtils.matches(is, "CRC32", "3610A686")); // uppercase expected
}
}
}