- Add arena allocator for zero-allocation hot paths - Add thread pool for parallel operations - Add mmap utilities for memory-mapped I/O - Implement queue_index with heap-based priority queue - Implement dataset_hash with SIMD support (SHA-NI, ARMv8) - Add runtime SIMD detection for cross-platform correctness - Add comprehensive tests and benchmarks
54 lines
2.3 KiB
C++
54 lines
2.3 KiB
C++
#include "sha256_base.h"
|
|
|
|
const uint32_t K[64] = {
|
|
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
|
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
|
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
|
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
|
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
|
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
|
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
|
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
|
};
|
|
|
|
void transform_generic(uint32_t* state, const uint8_t* data) {
|
|
uint32_t W[64];
|
|
uint32_t a, b, c, d, e, f, g, h;
|
|
|
|
// Prepare message schedule
|
|
for (int i = 0; i < 16; ++i) {
|
|
W[i] = (data[i * 4] << 24) | (data[i * 4 + 1] << 16) |
|
|
(data[i * 4 + 2] << 8) | data[i * 4 + 3];
|
|
}
|
|
|
|
for (int i = 16; i < 64; ++i) {
|
|
uint32_t s0 = (W[i-15] >> 7 | W[i-15] << 25) ^
|
|
(W[i-15] >> 18 | W[i-15] << 14) ^
|
|
(W[i-15] >> 3);
|
|
uint32_t s1 = (W[i-2] >> 17 | W[i-2] << 15) ^
|
|
(W[i-2] >> 19 | W[i-2] << 13) ^
|
|
(W[i-2] >> 10);
|
|
W[i] = W[i-16] + s0 + W[i-7] + s1;
|
|
}
|
|
|
|
// Initialize working variables
|
|
a = state[0]; b = state[1]; c = state[2]; d = state[3];
|
|
e = state[4]; f = state[5]; g = state[6]; h = state[7];
|
|
|
|
// Main loop
|
|
for (int i = 0; i < 64; ++i) {
|
|
uint32_t S1 = (e >> 6 | e << 26) ^ (e >> 11 | e << 21) ^ (e >> 25 | e << 7);
|
|
uint32_t ch = (e & f) ^ ((~e) & g);
|
|
uint32_t temp1 = h + S1 + ch + K[i] + W[i];
|
|
uint32_t S0 = (a >> 2 | a << 30) ^ (a >> 13 | a << 19) ^ (a >> 22 | a << 10);
|
|
uint32_t maj = (a & b) ^ (a & c) ^ (b & c);
|
|
uint32_t temp2 = S0 + maj;
|
|
|
|
h = g; g = f; f = e; e = d + temp1;
|
|
d = c; c = b; b = a; a = temp1 + temp2;
|
|
}
|
|
|
|
// Update state
|
|
state[0] += a; state[1] += b; state[2] += c; state[3] += d;
|
|
state[4] += e; state[5] += f; state[6] += g; state[7] += h;
|
|
}
|