Security Fixes: - CVE-2024-45339: Add O_EXCL flag to temp file creation in storage_write_entries() Prevents symlink attacks on predictable .tmp file paths - CVE-2025-47290: Use openat_nofollow() in storage_open() Closes TOCTOU race condition via path_sanitizer infrastructure - CVE-2025-0838: Add MAX_BATCH_SIZE=10000 to add_tasks() Prevents integer overflow in batch operations Research Trustworthiness (dataset_hash): - Deterministic file ordering: std::sort after collect_files() - Recursive directory traversal: depth-limited with cycle detection - Documented exclusions: hidden files and special files noted in API Bug Fixes: - R1: storage_init path validation for non-existent directories - R2: safe_strncpy return value check before strcat - R3: parallel_hash 256-file cap replaced with std::vector - R4: wire qi_compact_index/qi_rebuild_index stubs - R5: CompletionLatch race condition fix (hold mutex during decrement) - R6: ARMv8 SHA256 transform fix (save abcd_pre before vsha256hq_u32) - R7: fuzz_index_storage header format fix - R8: enforce null termination in add_tasks/update_tasks - R9: use 64 bytes (not 65) in combined hash to exclude null terminator - R10: status field persistence in save() New Tests: - test_recursive_dataset.cpp: Verify deterministic recursive hashing - test_storage_symlink_resistance.cpp: Verify CVE-2024-45339 fix - test_queue_index_batch_limit.cpp: Verify CVE-2025-0838 fix - test_sha256_arm_kat.cpp: ARMv8 known-answer tests - test_storage_init_new_dir.cpp: F1 verification - test_parallel_hash_large_dir.cpp: F3 verification - test_queue_index_compact.cpp: F4 verification All 8 native tests passing. Library ready for research lab deployment.
114 lines
3.7 KiB
C++
114 lines
3.7 KiB
C++
#include "sha256_base.h"
|
|
|
|
// ARMv8-A Cryptographic Extensions implementation
|
|
#if defined(__aarch64__) || defined(_M_ARM64)
|
|
#include <arm_neon.h>
|
|
|
|
static void transform_armv8(uint32_t* state, const uint8_t* data) {
|
|
// Load message and reverse bytes within each 32-bit word (big-endian -> native)
|
|
uint32x4_t w0 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(data)));
|
|
uint32x4_t w1 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(data + 16)));
|
|
uint32x4_t w2 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(data + 32)));
|
|
uint32x4_t w3 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(data + 48)));
|
|
|
|
// Load current hash state (native endianness)
|
|
uint32x4_t abcd = vld1q_u32(state);
|
|
uint32x4_t efgh = vld1q_u32(state + 4);
|
|
uint32x4_t abcd_orig = abcd;
|
|
uint32x4_t efgh_orig = efgh;
|
|
uint32x4_t abcd_pre;
|
|
|
|
// Rounds 0-15 with pre-expanded message
|
|
uint32x4_t k0 = vld1q_u32(&K[0]);
|
|
uint32x4_t k1 = vld1q_u32(&K[4]);
|
|
uint32x4_t k2 = vld1q_u32(&K[8]);
|
|
uint32x4_t k3 = vld1q_u32(&K[12]);
|
|
|
|
uint32x4_t tmp = vaddq_u32(w0, k0);
|
|
abcd_pre = abcd; // Save pre-round state
|
|
uint32x4_t abcd_new = vsha256hq_u32(abcd, efgh, tmp);
|
|
efgh = vsha256h2q_u32(efgh, abcd_pre, tmp);
|
|
abcd = abcd_new;
|
|
|
|
tmp = vaddq_u32(w1, k1);
|
|
abcd_pre = abcd;
|
|
abcd_new = vsha256hq_u32(abcd, efgh, tmp);
|
|
efgh = vsha256h2q_u32(efgh, abcd_pre, tmp);
|
|
abcd = abcd_new;
|
|
|
|
tmp = vaddq_u32(w2, k2);
|
|
abcd_pre = abcd;
|
|
abcd_new = vsha256hq_u32(abcd, efgh, tmp);
|
|
efgh = vsha256h2q_u32(efgh, abcd_pre, tmp);
|
|
abcd = abcd_new;
|
|
|
|
tmp = vaddq_u32(w3, k3);
|
|
abcd_pre = abcd;
|
|
abcd_new = vsha256hq_u32(abcd, efgh, tmp);
|
|
efgh = vsha256h2q_u32(efgh, abcd_pre, tmp);
|
|
abcd = abcd_new;
|
|
|
|
// Rounds 16-63: Message schedule expansion + rounds
|
|
for (int i = 16; i < 64; i += 16) {
|
|
// Schedule expansion for rounds i..i+3
|
|
uint32x4_t w4 = vsha256su0q_u32(w0, w1);
|
|
w4 = vsha256su1q_u32(w4, w2, w3);
|
|
k0 = vld1q_u32(&K[i]);
|
|
tmp = vaddq_u32(w4, k0);
|
|
abcd_pre = abcd;
|
|
abcd_new = vsha256hq_u32(abcd, efgh, tmp);
|
|
efgh = vsha256h2q_u32(efgh, abcd_pre, tmp);
|
|
abcd = abcd_new;
|
|
|
|
// Schedule expansion for rounds i+4..i+7
|
|
uint32x4_t w5 = vsha256su0q_u32(w1, w2);
|
|
w5 = vsha256su1q_u32(w5, w3, w4);
|
|
k1 = vld1q_u32(&K[i + 4]);
|
|
tmp = vaddq_u32(w5, k1);
|
|
abcd_pre = abcd;
|
|
abcd_new = vsha256hq_u32(abcd, efgh, tmp);
|
|
efgh = vsha256h2q_u32(efgh, abcd_pre, tmp);
|
|
abcd = abcd_new;
|
|
|
|
// Schedule expansion for rounds i+8..i+11
|
|
uint32x4_t w6 = vsha256su0q_u32(w2, w3);
|
|
w6 = vsha256su1q_u32(w6, w4, w5);
|
|
k2 = vld1q_u32(&K[i + 8]);
|
|
tmp = vaddq_u32(w6, k2);
|
|
abcd_pre = abcd;
|
|
abcd_new = vsha256hq_u32(abcd, efgh, tmp);
|
|
efgh = vsha256h2q_u32(efgh, abcd_pre, tmp);
|
|
abcd = abcd_new;
|
|
|
|
// Schedule expansion for rounds i+12..i+15
|
|
uint32x4_t w7 = vsha256su0q_u32(w3, w4);
|
|
w7 = vsha256su1q_u32(w7, w5, w6);
|
|
k3 = vld1q_u32(&K[i + 12]);
|
|
tmp = vaddq_u32(w7, k3);
|
|
abcd_pre = abcd;
|
|
abcd_new = vsha256hq_u32(abcd, efgh, tmp);
|
|
efgh = vsha256h2q_u32(efgh, abcd_pre, tmp);
|
|
abcd = abcd_new;
|
|
|
|
// Rotate working variables
|
|
w0 = w4; w1 = w5; w2 = w6; w3 = w7;
|
|
}
|
|
|
|
// Add original state back
|
|
abcd = vaddq_u32(abcd, abcd_orig);
|
|
efgh = vaddq_u32(efgh, efgh_orig);
|
|
|
|
// Store result
|
|
vst1q_u32(state, abcd);
|
|
vst1q_u32(state + 4, efgh);
|
|
}
|
|
|
|
TransformFunc detect_armv8_transform(void) {
|
|
return transform_armv8;
|
|
}
|
|
|
|
#else // No ARMv8 support
|
|
|
|
TransformFunc detect_armv8_transform(void) { return nullptr; }
|
|
|
|
#endif
|