fetch_ml/native/tests/test_dataset_hash.cpp
Jeremie Fraeys 37aad7ae87
feat: add manifest signing and native hashing support
- Integrate RunManifest.Validate with existing Validator
- Add manifest Sign() and Verify() methods
- Add native C++ hashing libraries (dataset_hash, queue_index)
- Add native bridge for Go/C++ integration
- Add deduplication support in queue
2026-02-19 15:34:39 -05:00

286 lines
7.5 KiB
C++

// Simple test suite for dataset_hash library (no external dependencies)
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cassert>
#include <string>
#include <vector>
#include <chrono>
#include <filesystem>
#include <fstream>
#include <iostream>
#include "dataset_hash/dataset_hash.h"
namespace fs = std::filesystem;
using namespace std::chrono;
// Simple test macros
#define TEST_ASSERT(cond) \
do { \
if (!(cond)) { \
fprintf(stderr, "ASSERTION FAILED: %s at line %d\n", #cond, __LINE__); \
return 1; \
} \
} while(0)
#define TEST_ASSERT_EQ(a, b) TEST_ASSERT((a) == (b))
#define TEST_ASSERT_NE(a, b) TEST_ASSERT((a) != (b))
#define TEST_ASSERT_STR_EQ(a, b) TEST_ASSERT(strcmp((a), (b)) == 0)
// Helper functions
fs::path create_temp_dir() {
fs::path temp = fs::temp_directory_path() / "dataset_hash_test_XXXXXX";
fs::create_directories(temp);
return temp;
}
void cleanup_temp_dir(const fs::path& dir) {
fs::remove_all(dir);
}
void create_test_file(const fs::path& dir, const std::string& name, const std::string& content) {
std::ofstream file(dir / name);
file << content;
file.close();
}
// Test 1: Context creation
int test_context_creation() {
printf("Testing context creation...\n");
// Auto-detect threads
fh_context_t* ctx = fh_init(0);
TEST_ASSERT_NE(ctx, nullptr);
fh_cleanup(ctx);
// Specific thread count
ctx = fh_init(4);
TEST_ASSERT_NE(ctx, nullptr);
fh_cleanup(ctx);
printf(" PASSED\n");
return 0;
}
// Test 2: SIMD detection
int test_simd_detection() {
printf("Testing SIMD detection...\n");
int has_simd = fh_has_simd_sha256();
const char* impl_name = fh_get_simd_impl_name();
printf(" SIMD available: %s\n", has_simd ? "yes" : "no");
printf(" Implementation: %s\n", impl_name);
TEST_ASSERT_NE(impl_name, nullptr);
TEST_ASSERT(strlen(impl_name) > 0);
printf(" PASSED\n");
return 0;
}
// Test 3: Hash single file
int test_hash_single_file() {
printf("Testing single file hash...\n");
fs::path temp = create_temp_dir();
fh_context_t* ctx = fh_init(1);
TEST_ASSERT_NE(ctx, nullptr);
// Create test file
create_test_file(temp, "test.txt", "Hello, World!");
// Hash it
char* hash = fh_hash_file(ctx, (temp / "test.txt").string().c_str());
TEST_ASSERT_NE(hash, nullptr);
// Verify hash format (64 hex characters + null)
TEST_ASSERT_EQ(strlen(hash), 64);
// Hash should be deterministic
char* hash2 = fh_hash_file(ctx, (temp / "test.txt").string().c_str());
TEST_ASSERT_NE(hash2, nullptr);
TEST_ASSERT_STR_EQ(hash, hash2);
fh_free_string(hash);
fh_free_string(hash2);
fh_cleanup(ctx);
cleanup_temp_dir(temp);
printf(" PASSED\n");
return 0;
}
// Test 4: Hash empty file (known hash)
int test_hash_empty_file() {
printf("Testing empty file hash...\n");
fs::path temp = create_temp_dir();
fh_context_t* ctx = fh_init(1);
TEST_ASSERT_NE(ctx, nullptr);
// Create empty file
create_test_file(temp, "empty.txt", "");
char* hash = fh_hash_file(ctx, (temp / "empty.txt").string().c_str());
TEST_ASSERT_NE(hash, nullptr);
TEST_ASSERT_EQ(strlen(hash), 64);
// Debug: print actual hash
printf(" Empty file hash: %s\n", hash);
// Known SHA-256 of empty string
const char* expected = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
printf(" Expected hash: %s\n", expected);
TEST_ASSERT_STR_EQ(hash, expected);
fh_free_string(hash);
fh_cleanup(ctx);
cleanup_temp_dir(temp);
printf(" PASSED\n");
return 0;
}
// Test 5: Hash directory
int test_hash_directory() {
printf("Testing directory hash...\n");
fs::path temp = create_temp_dir();
fh_context_t* ctx = fh_init(4);
TEST_ASSERT_NE(ctx, nullptr);
// Create directory structure
create_test_file(temp, "root.txt", "root");
fs::create_directories(temp / "subdir");
create_test_file(temp, "subdir/file1.txt", "file1");
create_test_file(temp, "subdir/file2.txt", "file2");
// Hash directory
char* hash = fh_hash_directory(ctx, temp.string().c_str());
TEST_ASSERT_NE(hash, nullptr);
TEST_ASSERT_EQ(strlen(hash), 64);
// Hash should be deterministic
char* hash2 = fh_hash_directory(ctx, temp.string().c_str());
TEST_ASSERT_NE(hash2, nullptr);
TEST_ASSERT_STR_EQ(hash, hash2);
fh_free_string(hash);
fh_free_string(hash2);
fh_cleanup(ctx);
cleanup_temp_dir(temp);
printf(" PASSED\n");
return 0;
}
// Test 6: Batch hash
int test_batch_hash() {
printf("Testing batch hash...\n");
fs::path temp = create_temp_dir();
fh_context_t* ctx = fh_init(4);
TEST_ASSERT_NE(ctx, nullptr);
// Create test files
const int num_files = 10;
std::vector<std::string> paths;
std::vector<const char*> c_paths;
for (int i = 0; i < num_files; i++) {
std::string name = "file_" + std::to_string(i) + ".txt";
create_test_file(temp, name, "Content " + std::to_string(i));
paths.push_back((temp / name).string());
c_paths.push_back(paths.back().c_str());
}
// Hash batch
std::vector<char*> results(num_files, nullptr);
int ret = fh_hash_batch(ctx, c_paths.data(), num_files, results.data());
TEST_ASSERT_EQ(ret, 0);
// Verify all hashes
for (int i = 0; i < num_files; i++) {
TEST_ASSERT_NE(results[i], nullptr);
TEST_ASSERT_EQ(strlen(results[i]), 64);
fh_free_string(results[i]);
}
fh_cleanup(ctx);
cleanup_temp_dir(temp);
printf(" PASSED\n");
return 0;
}
// Test 7: Performance test
int test_performance() {
printf("Testing performance...\n");
fs::path temp = create_temp_dir();
fh_context_t* ctx = fh_init(4);
TEST_ASSERT_NE(ctx, nullptr);
// Create 1000 small files
const int num_files = 1000;
auto start = high_resolution_clock::now();
for (int i = 0; i < num_files; i++) {
create_test_file(temp, "perf_" + std::to_string(i) + ".txt", "content");
}
auto create_end = high_resolution_clock::now();
// Hash all files
char* hash = fh_hash_directory(ctx, temp.string().c_str());
TEST_ASSERT_NE(hash, nullptr);
auto hash_end = high_resolution_clock::now();
auto create_time = duration_cast<milliseconds>(create_end - start);
auto hash_time = duration_cast<milliseconds>(hash_end - create_end);
printf(" Created %d files in %lld ms\n", num_files, create_time.count());
printf(" Hashed %d files in %lld ms\n", num_files, hash_time.count());
printf(" Throughput: %.1f files/sec\n", num_files * 1000.0 / hash_time.count());
fh_free_string(hash);
fh_cleanup(ctx);
cleanup_temp_dir(temp);
printf(" PASSED\n");
return 0;
}
// Main test runner
int main() {
printf("\n=== Dataset Hash Library Test Suite ===\n\n");
int failed = 0;
failed += test_context_creation();
failed += test_simd_detection();
failed += test_hash_single_file();
failed += test_hash_empty_file();
failed += test_hash_directory();
failed += test_batch_hash();
failed += test_performance();
printf("\n=== Test Results ===\n");
if (failed == 0) {
printf("All tests PASSED!\n");
return 0;
} else {
printf("%d test(s) FAILED\n", failed);
return 1;
}
}