#include "artifact_scanner.h" #include #include #include #include #include #include #include #include #include namespace fs = std::filesystem; struct as_scanner { std::vector exclude_patterns; std::string last_error; uint64_t scan_count = 0; }; // Check if path matches any exclude pattern static bool should_exclude(as_scanner_t* scanner, const char* path) { for (const auto& pattern : scanner->exclude_patterns) { if (fnmatch(pattern.c_str(), path, FNM_PATHNAME) == 0) { return true; } } return false; } // Platform-optimized directory traversal // Uses simple but efficient approach: batch readdir + minimal stat calls #ifdef __linux__ // On Linux, we could use getdents64 for even better performance // But standard readdir is fine for now and more portable #endif as_scanner_t* as_create(const char** exclude_patterns, size_t pattern_count) { auto* scanner = new as_scanner_t; for (size_t i = 0; i < pattern_count; ++i) { if (exclude_patterns[i]) { scanner->exclude_patterns.push_back(exclude_patterns[i]); } } // Default excludes scanner->exclude_patterns.push_back("run_manifest.json"); scanner->exclude_patterns.push_back("output.log"); scanner->exclude_patterns.push_back("code/*"); scanner->exclude_patterns.push_back("snapshot/*"); return scanner; } void as_destroy(as_scanner_t* scanner) { delete scanner; } void as_add_exclude(as_scanner_t* scanner, const char* pattern) { if (scanner && pattern) { scanner->exclude_patterns.push_back(pattern); } } // Fast directory scan using modern C++ filesystem (which uses optimal syscalls internally) as_result_t* as_scan_directory(as_scanner_t* scanner, const char* run_dir) { if (!scanner || !run_dir) return nullptr; auto start_time = std::chrono::steady_clock::now(); as_result_t* result = new as_result_t; result->artifacts = nullptr; result->count = 0; result->total_size = 0; result->discovery_time_ms = 0; std::vector artifacts; artifacts.reserve(128); // Pre-allocate to avoid reallocations try { fs::path root(run_dir); // Use recursive_directory_iterator with optimized options // skip_permission_denied prevents exceptions on permission errors auto options = fs::directory_options::skip_permission_denied; for (const auto& entry : fs::recursive_directory_iterator(root, options)) { scanner->scan_count++; if (!entry.is_regular_file()) { continue; } // Get relative path fs::path rel_path = fs::relative(entry.path(), root); std::string rel_str = rel_path.string(); // Check exclusions if (should_exclude(scanner, rel_str.c_str())) { continue; } // Get file info as_artifact_t artifact; std::strncpy(artifact.path, rel_str.c_str(), sizeof(artifact.path) - 1); artifact.path[sizeof(artifact.path) - 1] = '\0'; auto status = entry.status(); artifact.size_bytes = entry.file_size(); auto mtime = fs::last_write_time(entry); // Convert to Unix timestamp (approximate) auto sctp = std::chrono::time_point_cast( mtime - fs::file_time_type::clock::now() + std::chrono::system_clock::now() ); artifact.mtime = std::chrono::system_clock::to_time_t(sctp); artifact.mode = static_cast(status.permissions()); artifacts.push_back(artifact); result->total_size += artifact.size_bytes; } } catch (const std::exception& e) { scanner->last_error = e.what(); delete result; return nullptr; } // Sort artifacts by path for deterministic order std::sort(artifacts.begin(), artifacts.end(), [](const as_artifact_t& a, const as_artifact_t& b) { return std::strcmp(a.path, b.path) < 0; }); // Copy to result result->count = artifacts.size(); if (result->count > 0) { result->artifacts = new as_artifact_t[result->count]; std::memcpy(result->artifacts, artifacts.data(), result->count * sizeof(as_artifact_t)); } auto end_time = std::chrono::steady_clock::now(); result->discovery_time_ms = std::chrono::duration_cast(end_time - start_time).count(); return result; } void as_free_result(as_result_t* result) { if (result) { delete[] result->artifacts; delete result; } } const char* as_last_error(as_scanner_t* scanner) { if (!scanner || scanner->last_error.empty()) return nullptr; return scanner->last_error.c_str(); } uint64_t as_get_scan_count(as_scanner_t* scanner) { return scanner ? scanner->scan_count : 0; }