fetch_ml/native/dataset_hash/io/file_hash.cpp
Jeremie Fraeys 37aad7ae87
feat: add manifest signing and native hashing support
- Integrate RunManifest.Validate with existing Validator
- Add manifest Sign() and Verify() methods
- Add native C++ hashing libraries (dataset_hash, queue_index)
- Add native bridge for Go/C++ integration
- Add deduplication support in queue
2026-02-19 15:34:39 -05:00

106 lines
2.6 KiB
C++

#include "file_hash.h"
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
int hash_file(const char* path, size_t buffer_size, char* out_hash) {
if (!path || !out_hash) return -1;
int fd = open(path, O_RDONLY);
if (fd < 0) {
return -1;
}
struct stat st;
if (fstat(fd, &st) < 0) {
close(fd);
return -1;
}
Sha256State hasher;
sha256_init(&hasher);
if (st.st_size == 0) {
// Empty file
uint8_t result[32];
sha256_finalize(&hasher, result);
close(fd);
// Convert to hex
static const char hex[] = "0123456789abcdef";
for (int i = 0; i < 32; i++) {
out_hash[i*2] = hex[(result[i] >> 4) & 0xf];
out_hash[i*2+1] = hex[result[i] & 0xf];
}
out_hash[64] = '\0';
return 0;
}
// Try memory map first
void* mapped = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (mapped != MAP_FAILED) {
sha256_update(&hasher, (const uint8_t*)mapped, st.st_size);
munmap(mapped, st.st_size);
} else {
// Fallback to buffered read
uint8_t* buffer = (uint8_t*)malloc(buffer_size);
if (!buffer) {
close(fd);
return -1;
}
ssize_t n;
while ((n = read(fd, buffer, buffer_size)) > 0) {
sha256_update(&hasher, buffer, n);
}
free(buffer);
}
close(fd);
uint8_t result[32];
sha256_finalize(&hasher, result);
// Convert to hex
static const char hex[] = "0123456789abcdef";
for (int i = 0; i < 32; i++) {
out_hash[i*2] = hex[(result[i] >> 4) & 0xf];
out_hash[i*2+1] = hex[result[i] & 0xf];
}
out_hash[64] = '\0';
return 0;
}
// Hash a single file, allocating result buffer
char* hash_file_alloc(const char* path, size_t buffer_size) {
char* out_hash = (char*)malloc(65); // 64 hex + null
if (!out_hash) return nullptr;
if (hash_file(path, buffer_size, out_hash) != 0) {
free(out_hash);
return nullptr;
}
return out_hash;
}
int hash_files_batch(
const char* const* paths,
uint32_t count,
char** out_hashes,
size_t buffer_size) {
if (!paths || !out_hashes) return -1;
int all_success = 1;
for (uint32_t i = 0; i < count; ++i) {
out_hashes[i] = hash_file_alloc(paths[i], buffer_size);
if (out_hashes[i] == nullptr) {
all_success = 0;
}
}
return all_success ? 0 : -1;
}