// Simple test suite for dataset_hash library (no external dependencies) #include #include #include #include #include #include #include #include #include #include #include "dataset_hash/dataset_hash.h" namespace fs = std::filesystem; using namespace std::chrono; // Simple test macros #define TEST_ASSERT(cond) \ do { \ if (!(cond)) { \ fprintf(stderr, "ASSERTION FAILED: %s at line %d\n", #cond, __LINE__); \ return 1; \ } \ } while(0) #define TEST_ASSERT_EQ(a, b) TEST_ASSERT((a) == (b)) #define TEST_ASSERT_NE(a, b) TEST_ASSERT((a) != (b)) #define TEST_ASSERT_STR_EQ(a, b) TEST_ASSERT(strcmp((a), (b)) == 0) // Helper functions fs::path create_temp_dir() { fs::path temp = fs::temp_directory_path() / "dataset_hash_test_XXXXXX"; fs::create_directories(temp); return temp; } void cleanup_temp_dir(const fs::path& dir) { fs::remove_all(dir); } void create_test_file(const fs::path& dir, const std::string& name, const std::string& content) { std::ofstream file(dir / name); file << content; file.close(); } // Test 1: Context creation int test_context_creation() { printf("Testing context creation...\n"); // Auto-detect threads fh_context_t* ctx = fh_init(0); TEST_ASSERT_NE(ctx, nullptr); fh_cleanup(ctx); // Specific thread count ctx = fh_init(4); TEST_ASSERT_NE(ctx, nullptr); fh_cleanup(ctx); printf(" PASSED\n"); return 0; } // Test 2: SIMD detection int test_simd_detection() { printf("Testing SIMD detection...\n"); int has_simd = fh_has_simd_sha256(); const char* impl_name = fh_get_simd_impl_name(); printf(" SIMD available: %s\n", has_simd ? "yes" : "no"); printf(" Implementation: %s\n", impl_name); TEST_ASSERT_NE(impl_name, nullptr); TEST_ASSERT(strlen(impl_name) > 0); printf(" PASSED\n"); return 0; } // Test 3: Hash single file int test_hash_single_file() { printf("Testing single file hash...\n"); fs::path temp = create_temp_dir(); fh_context_t* ctx = fh_init(1); TEST_ASSERT_NE(ctx, nullptr); // Create test file create_test_file(temp, "test.txt", "Hello, World!"); // Hash it char* hash = fh_hash_file(ctx, (temp / "test.txt").string().c_str()); TEST_ASSERT_NE(hash, nullptr); // Verify hash format (64 hex characters + null) TEST_ASSERT_EQ(strlen(hash), 64); // Hash should be deterministic char* hash2 = fh_hash_file(ctx, (temp / "test.txt").string().c_str()); TEST_ASSERT_NE(hash2, nullptr); TEST_ASSERT_STR_EQ(hash, hash2); fh_free_string(hash); fh_free_string(hash2); fh_cleanup(ctx); cleanup_temp_dir(temp); printf(" PASSED\n"); return 0; } // Test 4: Hash empty file (known hash) int test_hash_empty_file() { printf("Testing empty file hash...\n"); fs::path temp = create_temp_dir(); fh_context_t* ctx = fh_init(1); TEST_ASSERT_NE(ctx, nullptr); // Create empty file create_test_file(temp, "empty.txt", ""); char* hash = fh_hash_file(ctx, (temp / "empty.txt").string().c_str()); TEST_ASSERT_NE(hash, nullptr); TEST_ASSERT_EQ(strlen(hash), 64); // Debug: print actual hash printf(" Empty file hash: %s\n", hash); // Known SHA-256 of empty string const char* expected = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; printf(" Expected hash: %s\n", expected); TEST_ASSERT_STR_EQ(hash, expected); fh_free_string(hash); fh_cleanup(ctx); cleanup_temp_dir(temp); printf(" PASSED\n"); return 0; } // Test 5: Hash directory int test_hash_directory() { printf("Testing directory hash...\n"); fs::path temp = create_temp_dir(); fh_context_t* ctx = fh_init(4); TEST_ASSERT_NE(ctx, nullptr); // Create directory structure create_test_file(temp, "root.txt", "root"); fs::create_directories(temp / "subdir"); create_test_file(temp, "subdir/file1.txt", "file1"); create_test_file(temp, "subdir/file2.txt", "file2"); // Hash directory char* hash = fh_hash_directory(ctx, temp.string().c_str()); TEST_ASSERT_NE(hash, nullptr); TEST_ASSERT_EQ(strlen(hash), 64); // Hash should be deterministic char* hash2 = fh_hash_directory(ctx, temp.string().c_str()); TEST_ASSERT_NE(hash2, nullptr); TEST_ASSERT_STR_EQ(hash, hash2); fh_free_string(hash); fh_free_string(hash2); fh_cleanup(ctx); cleanup_temp_dir(temp); printf(" PASSED\n"); return 0; } // Test 6: Batch hash int test_batch_hash() { printf("Testing batch hash...\n"); fs::path temp = create_temp_dir(); fh_context_t* ctx = fh_init(4); TEST_ASSERT_NE(ctx, nullptr); // Create test files const int num_files = 10; std::vector paths; std::vector c_paths; for (int i = 0; i < num_files; i++) { std::string name = "file_" + std::to_string(i) + ".txt"; create_test_file(temp, name, "Content " + std::to_string(i)); paths.push_back((temp / name).string()); c_paths.push_back(paths.back().c_str()); } // Hash batch std::vector results(num_files, nullptr); int ret = fh_hash_batch(ctx, c_paths.data(), num_files, results.data()); TEST_ASSERT_EQ(ret, 0); // Verify all hashes for (int i = 0; i < num_files; i++) { TEST_ASSERT_NE(results[i], nullptr); TEST_ASSERT_EQ(strlen(results[i]), 64); fh_free_string(results[i]); } fh_cleanup(ctx); cleanup_temp_dir(temp); printf(" PASSED\n"); return 0; } // Test 7: Performance test int test_performance() { printf("Testing performance...\n"); fs::path temp = create_temp_dir(); fh_context_t* ctx = fh_init(4); TEST_ASSERT_NE(ctx, nullptr); // Create 1000 small files const int num_files = 1000; auto start = high_resolution_clock::now(); for (int i = 0; i < num_files; i++) { create_test_file(temp, "perf_" + std::to_string(i) + ".txt", "content"); } auto create_end = high_resolution_clock::now(); // Hash all files char* hash = fh_hash_directory(ctx, temp.string().c_str()); TEST_ASSERT_NE(hash, nullptr); auto hash_end = high_resolution_clock::now(); auto create_time = duration_cast(create_end - start); auto hash_time = duration_cast(hash_end - create_end); printf(" Created %d files in %ld ms\n", num_files, create_time.count()); printf(" Hashed %d files in %ld ms\n", num_files, hash_time.count()); printf(" Throughput: %.1f files/sec\n", num_files * 1000.0 / hash_time.count()); fh_free_string(hash); fh_cleanup(ctx); cleanup_temp_dir(temp); printf(" PASSED\n"); return 0; } // Main test runner int main() { printf("\n=== Dataset Hash Library Test Suite ===\n\n"); int failed = 0; failed += test_context_creation(); failed += test_simd_detection(); failed += test_hash_single_file(); failed += test_hash_empty_file(); failed += test_hash_directory(); failed += test_batch_hash(); failed += test_performance(); printf("\n=== Test Results ===\n"); if (failed == 0) { printf("All tests PASSED!\n"); return 0; } else { printf("%d test(s) FAILED\n", failed); return 1; } }