Implements two production-ready Rust native libraries: ## dataset_hash (BLAKE3-based hashing) - FFI exports: ds_hash_file, ds_hash_directory_batch, ds_hash_directory_combined - BLAKE3 hashing for files and directory trees - Hidden file filtering (respects .hidden and _prefix files) - Prometheus-compatible metrics export - Comprehensive integration tests (12 tests) - Benchmarks: hash_file_1kb (~14µs), hash_file_1mb (~610µs), dir_100files (~1.6ms) ## queue_index (priority queue) - FFI exports: 25+ functions matching C++ API - Lifecycle: qi_open, qi_close - Task ops: add_tasks, update_tasks, remove_tasks, get_task_by_id - Queue ops: get_next_batch, peek_next, mark_completed - Priority: get_next_priority_task, peek_priority_task - Query: get_all_tasks, get_tasks_by_status, get_task_count - Retry/DLQ: retry_task, move_to_dlq - Lease: renew_lease, release_lease - Maintenance: rebuild_index, compact_index - BinaryHeap-based priority queue with correct Ord (max-heap) - Memory-mapped storage with safe Rust wrappers - Panic-safe FFI boundaries using catch_unwind - Comprehensive integration tests (7 tests, 1 ignored for persistence) - Benchmarks: add_100 (~60µs), get_10 (~24ns), priority (~5µs) ## Architecture - Cargo workspace with shared common crate - Criterion benchmarks for both crates - Rust 1.85.0 toolchain pinned - Zero compiler warnings - All 19 tests passing Compare: make compare-benchmarks (Rust/Go/C++ comparison)
76 lines
2.1 KiB
Rust
76 lines
2.1 KiB
Rust
//! Benchmarks for dataset_hash
|
|
//!
|
|
//! Compares BLAKE3 hashing performance
|
|
|
|
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
|
use dataset_hash::{hash_file, hash_directory_batch, hash_directory_combined};
|
|
use std::io::Write;
|
|
use tempfile::TempDir;
|
|
|
|
fn create_test_file(dir: &TempDir, name: &str, size: usize) -> std::path::PathBuf {
|
|
let path = dir.path().join(name);
|
|
let mut file = std::fs::File::create(&path).unwrap();
|
|
let data = vec![0u8; size];
|
|
file.write_all(&data).unwrap();
|
|
path
|
|
}
|
|
|
|
fn bench_hash_file_small(c: &mut Criterion) {
|
|
let temp = TempDir::new().unwrap();
|
|
let path = create_test_file(&temp, "small.bin", 1024); // 1KB
|
|
|
|
c.bench_function("rust_hash_file_1kb", |b| {
|
|
b.iter(|| {
|
|
black_box(hash_file(&path).unwrap());
|
|
});
|
|
});
|
|
}
|
|
|
|
fn bench_hash_file_medium(c: &mut Criterion) {
|
|
let temp = TempDir::new().unwrap();
|
|
let path = create_test_file(&temp, "medium.bin", 1024 * 1024); // 1MB
|
|
|
|
c.bench_function("rust_hash_file_1mb", |b| {
|
|
b.iter(|| {
|
|
black_box(hash_file(&path).unwrap());
|
|
});
|
|
});
|
|
}
|
|
|
|
fn bench_hash_directory_batch(c: &mut Criterion) {
|
|
let temp = TempDir::new().unwrap();
|
|
|
|
// Create 100 small files
|
|
for i in 0..100 {
|
|
create_test_file(&temp, &format!("file_{}.bin", i), 1024);
|
|
}
|
|
|
|
c.bench_function("rust_hash_dir_100files", |b| {
|
|
b.iter(|| {
|
|
black_box(hash_directory_batch(temp.path()).unwrap());
|
|
});
|
|
});
|
|
}
|
|
|
|
fn bench_hash_directory_combined(c: &mut Criterion) {
|
|
let temp = TempDir::new().unwrap();
|
|
|
|
// Create 100 small files
|
|
for i in 0..100 {
|
|
create_test_file(&temp, &format!("file_{}.bin", i), 1024);
|
|
}
|
|
|
|
c.bench_function("rust_hash_dir_combined", |b| {
|
|
b.iter(|| {
|
|
black_box(hash_directory_combined(temp.path()).unwrap());
|
|
});
|
|
});
|
|
}
|
|
|
|
criterion_group!(benches,
|
|
bench_hash_file_small,
|
|
bench_hash_file_medium,
|
|
bench_hash_directory_batch,
|
|
bench_hash_directory_combined
|
|
);
|
|
criterion_main!(benches);
|