diff options
| author | Gangzheng Tong <tonggangzheng@gmail.com> | 2025-09-22 15:46:42 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-22 22:46:42 +0000 |
| commit | ba8132345cbae5b749b4a01deda732ad6f8251a0 (patch) | |
| tree | f00ad0dd2d26f49112e430615106c9f6d22de032 /tools/render-test/slang-test-device-cache.cpp | |
| parent | bd24cc271c5d151dbaa7e4da674cbc219aef8153 (diff) | |
Add RHI Device Caching and Test Prefix Exclusion (#8448)
# Add RHI Device Caching and Test Prefix Exclusion
## Summary
This PR introduces two key improvements to the Slang test
infrastructure:
1. **RHI Device Caching**: Implements device caching to significantly
speed up test execution by reusing graphics devices across tests, **RHI
Device Caching reduces slang-test execution time from ~15 minutes to ~5
minutes in Windows release builds**
2. **Test Prefix Exclusion**: Adds `-exclude-prefix` option to skip
tests matching specified path prefixes
## Changes
### RHI Device Caching
- **New `DeviceCache` class** (`slang-test-device-cache.h/cpp`):
Thread-safe device cache with LRU eviction (max 10 devices)
- **Cache control option**: `-cache-rhi-device` flag in both
`slang-test` and `render-test`
- Default: **enabled** in slang-test, **disabled** in render-test when
run standalone
- Automatically skips caching for CUDA devices (due to driver issues)
- **Performance benefit**: Eliminates expensive device
creation/destruction cycles, especially beneficial for Vulkan on Tegra
platforms
### Test Prefix Exclusion
- **New `-exclude-prefix <prefix>` option** in slang-test
- Allows excluding entire test directories or patterns from execution
- Complements existing `-category` and individual test filtering options
### Usage Examples
```bash
# Enable device caching (default)
slang-test
# Disable device caching
slang-test -cache-rhi-device false
# Exclude tests from specific directories
slang-test -exclude-prefix tests/problematic/
slang-test -exclude-prefix tests/slow/ -exclude-prefix tests/experimental/
```
This change should significantly improve test execution performance,
particularly in CI environments with frequent device operations. This is
needed for running the GPU test in aarch64, where repeated device
creation/destroy is causing driver issues.
Needed by: https://github.com/shader-slang/slang/issues/8346
---------
Co-authored-by: slangbot <ellieh+slangbot@nvidia.com>
Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com>
Diffstat (limited to 'tools/render-test/slang-test-device-cache.cpp')
| -rw-r--r-- | tools/render-test/slang-test-device-cache.cpp | 160 |
1 files changed, 160 insertions, 0 deletions
diff --git a/tools/render-test/slang-test-device-cache.cpp b/tools/render-test/slang-test-device-cache.cpp new file mode 100644 index 000000000..a486ee3f3 --- /dev/null +++ b/tools/render-test/slang-test-device-cache.cpp @@ -0,0 +1,160 @@ +#include "slang-test-device-cache.h" + +#include <algorithm> + +// Static member accessor functions (Meyer's singleton pattern) +// This ensures proper destruction order - function-local statics are destroyed +// in reverse order of first access, avoiding the static destruction order fiasco +std::mutex& DeviceCache::getMutex() +{ + static std::mutex instance; + return instance; +} + +std::unordered_map< + DeviceCache::DeviceCacheKey, + DeviceCache::CachedDevice, + DeviceCache::DeviceCacheKeyHash>& +DeviceCache::getDeviceCache() +{ + static std::unordered_map<DeviceCacheKey, CachedDevice, DeviceCacheKeyHash> instance; + return instance; +} + +uint64_t& DeviceCache::getNextCreationOrder() +{ + static uint64_t instance = 0; + return instance; +} + +bool DeviceCache::DeviceCacheKey::operator==(const DeviceCacheKey& other) const +{ + return deviceType == other.deviceType && enableValidation == other.enableValidation && + enableRayTracingValidation == other.enableRayTracingValidation && + profileName == other.profileName && requiredFeatures == other.requiredFeatures; +} + +std::size_t DeviceCache::DeviceCacheKeyHash::operator()(const DeviceCacheKey& key) const +{ + std::size_t h1 = std::hash<int>{}(static_cast<int>(key.deviceType)); + std::size_t h2 = std::hash<bool>{}(key.enableValidation); + std::size_t h3 = std::hash<bool>{}(key.enableRayTracingValidation); + std::size_t h4 = std::hash<std::string>{}(key.profileName); + + std::size_t h5 = 0; + for (const auto& feature : key.requiredFeatures) + { + h5 ^= std::hash<std::string>{}(feature) + 0x9e3779b9 + (h5 << 6) + (h5 >> 2); + } + + return h1 ^ (h2 << 1) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4); +} + +DeviceCache::CachedDevice::CachedDevice() + : creationOrder(0) +{ +} + +void DeviceCache::evictOldestDeviceIfNeeded() +{ + auto& deviceCache = getDeviceCache(); + if (deviceCache.size() < MAX_CACHED_DEVICES) + return; + + // Find the oldest device to evict + auto oldestIt = deviceCache.end(); + uint64_t oldestCreationOrder = UINT64_MAX; + + for (auto it = deviceCache.begin(); it != deviceCache.end(); ++it) + { + if (it->second.creationOrder < oldestCreationOrder) + { + oldestCreationOrder = it->second.creationOrder; + oldestIt = it; + } + } + + // Remove the oldest device - ComPtr will handle the actual device release + if (oldestIt != deviceCache.end()) + { + deviceCache.erase(oldestIt); + } +} + +SlangResult DeviceCache::acquireDevice(const rhi::DeviceDesc& desc, rhi::IDevice** outDevice) +{ + if (!outDevice) + return SLANG_E_INVALID_ARG; + + *outDevice = nullptr; + + // Skip caching for CUDA devices due to crashes + if (desc.deviceType == rhi::DeviceType::CUDA) + { + return rhi::getRHI()->createDevice(desc, outDevice); + } + + std::lock_guard<std::mutex> lock(getMutex()); + auto& deviceCache = getDeviceCache(); + auto& nextCreationOrder = getNextCreationOrder(); + + // Create cache key + DeviceCacheKey key; + key.deviceType = desc.deviceType; + key.enableValidation = desc.enableValidation; + key.enableRayTracingValidation = desc.enableRayTracingValidation; + key.profileName = desc.slang.targetProfile ? desc.slang.targetProfile : "Unknown"; + + // Add required features to key + for (int i = 0; i < desc.requiredFeatureCount; ++i) + { + key.requiredFeatures.push_back(desc.requiredFeatures[i]); + } + std::sort(key.requiredFeatures.begin(), key.requiredFeatures.end()); + + // Evict oldest device if we've reached the limit + evictOldestDeviceIfNeeded(); + + // Check if we have a cached device + auto it = deviceCache.find(key); + if (it != deviceCache.end()) + { + // Return the cached device - COM reference counting handles the references + *outDevice = it->second.device.get(); + if (*outDevice) + { + (*outDevice)->addRef(); + return SLANG_OK; + } + } + + // Create new device + Slang::ComPtr<rhi::IDevice> device; + auto result = rhi::getRHI()->createDevice(desc, device.writeRef()); + if (SLANG_FAILED(result)) + { + return result; + } + + // Cache the device + CachedDevice& cached = deviceCache[key]; + cached.device = device; + cached.creationOrder = nextCreationOrder++; + + // Return the device with proper reference counting + *outDevice = device.get(); + if (*outDevice) + { + (*outDevice)->addRef(); + } + + return SLANG_OK; +} + + +void DeviceCache::cleanCache() +{ + std::lock_guard<std::mutex> lock(getMutex()); + auto& deviceCache = getDeviceCache(); + deviceCache.clear(); +} |
