diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2023-04-04 18:00:16 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-04 18:00:16 -0400 |
| commit | 68c7d5cda2d6f2eb7bfb3a7e15860eb3ded25424 (patch) | |
| tree | ac4e8384108e70109b084782b414296d015f92b8 | |
| parent | 7bb2de1bc40e535fae93940113db97b5ea44a6f2 (diff) | |
Preliminary support for realtime clock (#2772)
* #include an absolute path didn't work - because paths were taken to always be relative.
* Initial support for realtime clock.
* Add realtime-clock render feature where seems appropriate.
* Fixes to make NVAPI compile properly.
Change realtime-clock.slang check to use maths that can't overflow.
| -rw-r--r-- | docs/target-compatibility.md | 23 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 32 | ||||
| -rw-r--r-- | tests/slang-extension/realtime-clock.slang | 31 | ||||
| -rw-r--r-- | tests/slang-extension/realtime-clock.slang.expected.txt | 5 | ||||
| -rw-r--r-- | tools/gfx/cuda/cuda-device.cpp | 5 | ||||
| -rw-r--r-- | tools/gfx/d3d11/d3d11-device.cpp | 5 | ||||
| -rw-r--r-- | tools/gfx/d3d11/d3d11-scopeNVAPI.cpp | 2 | ||||
| -rw-r--r-- | tools/gfx/d3d12/d3d12-device.cpp | 9 | ||||
| -rw-r--r-- | tools/gfx/d3d12/d3d12-pipeline-state.cpp | 2 | ||||
| -rw-r--r-- | tools/gfx/vulkan/vk-api.h | 5 | ||||
| -rw-r--r-- | tools/gfx/vulkan/vk-device.cpp | 14 |
11 files changed, 129 insertions, 4 deletions
diff --git a/docs/target-compatibility.md b/docs/target-compatibility.md index 96d1353a9..0fdeeca8d 100644 --- a/docs/target-compatibility.md +++ b/docs/target-compatibility.md @@ -44,6 +44,7 @@ Items with ^ means there is some discussion about support later in the document | [RWByteAddressBuffer Atomic](#byte-address-atomic) | No | Yes ^ | Yes ^ | Yes | No + | [Shader Execution Reordering](#ser) | No | Yes ^ | Yes ^ | No | No | [debugBreak](#debug-break) | No | No | Yes | Yes | Yes +| [realtime clock](#realtime-clock) | No | Yes ^ | Yes | Yes | No <a id="half"></a> ## Half Type @@ -275,4 +276,24 @@ On C++ targets debugBreak is implemented using SLANG_BREAKPOINT defined in "slan Some additional details: * If [slang-llvm](cpu-target.md#slang-llvm) is being used as the downstream compiler (as is typical with `host-callable`), it will crash into the debugger, but may not produce a usable stack trace. -* For "normal" C++ downstream compilers such as Clang/Gcc/Visual Studio, to break into readable source code, debug information is typically necessary. Disabling optimizations may be useful to break on the appropriate specific line, and have variables inspectable.
\ No newline at end of file +* For "normal" C++ downstream compilers such as Clang/Gcc/Visual Studio, to break into readable source code, debug information is typically necessary. Disabling optimizations may be useful to break on the appropriate specific line, and have variables inspectable. + +<a id="realtime-clock"></a> +## Realtime Clock + +Realtime clock support is available via the API + +``` +// Get low 32 bits of realtime clock +uint getRealtimeClockLow(); +// Get 64 bit realtime clock, with low bits in .x and high bits in .y +uint2 getRealtimeClock(); +``` + +On D3D this is supported through NVAPI via `NvGetSpecial`. + +On Vulkan this is supported via [VK_KHR_shader_clock extension](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_clock.html) + +On CUDA this is supported via [clock](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#time-function). + +Currently this is not supported on CPU, although this will potentially be added in the future.
\ No newline at end of file diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 7774c0d01..8ad99d71b 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6835,6 +6835,38 @@ __specialized_for_target(glsl) [[vk::spirv_instruction(1, "NonSemantic.DebugBreak")]] void debugBreak(); +// +// Realtime Clock support +// + +// https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GL_EXT_shader_realtime_clock.txt + +[__requiresNVAPI] +__target_intrinsic(hlsl, "NvGetSpecial( NV_SPECIALOP_GLOBAL_TIMER_LO)") +__glsl_extension(GL_EXT_shader_realtime_clock) +__target_intrinsic(glsl, "clockRealtime2x32EXT().x") +__target_intrinsic(cuda, "clock") +uint getRealtimeClockLow(); + +__target_intrinsic(cuda, "clock64") +int64_t __cudaGetRealtimeClock(); + +[__requiresNVAPI] +__target_intrinsic(hlsl, "uint2(NvGetSpecial( NV_SPECIALOP_GLOBAL_TIMER_LO), NvGetSpecial( NV_SPECIALOP_GLOBAL_TIMER_HI)) ") +__glsl_extension(GL_EXT_shader_realtime_clock) +__target_intrinsic(glsl, "clockRealtime2x32EXT()") +uint2 getRealtimeClock(); + +__specialized_for_target(cuda) +uint2 getRealtimeClock() +{ + int64_t ticks = __cudaGetRealtimeClock(); + return uint2(uint(ticks), uint(uint64_t(ticks) >> 32)); +} + +// +// CUDA specific +// __target_intrinsic(cuda, "(threadIdx)") [__readNone] diff --git a/tests/slang-extension/realtime-clock.slang b/tests/slang-extension/realtime-clock.slang new file mode 100644 index 000000000..67d6e2ab0 --- /dev/null +++ b/tests/slang-extension/realtime-clock.slang @@ -0,0 +1,31 @@ +// realtime-clock.slang + +// TODO(JS): +// Disable FXC DX11/DX12 because currently FXC can't compile nvHLSLExtns.h + +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -output-using-type -shaderobj +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-dx11 -slang -compute -output-using-type -nvapi-slot u0 -shaderobj +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -render-feature realtime-clock -output-using-type -shaderobj +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-d3d12 -compute -output-using-type -nvapi-slot u0 -shaderobj +//TEST(compute):COMPARE_COMPUTE_EX:-d3d12 -compute -use-dxil -output-using-type -nvapi-slot u0 -shaderobj +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj + +// The test doesn't directly use this, but having this defined makes the 0 slot available if NVAPI is going to be used +// Only strictly necessary on the D3D11/D3D12 paths +//TEST_INPUT:ubuffer(data=[0 0 0 0 ], stride=4):name=nvapiBuffer +RWStructuredBuffer<int> nvapiBuffer; + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer<int> outputBuffer; + +[numthreads(8, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + uint idx = dispatchThreadID.x; + + uint ticksLow = getRealtimeClockLow(); + + uint2 ticks = getRealtimeClock(); + + outputBuffer[idx] = int(idx + ((ticksLow ^ ticks.x) ^ (ticks.x ^ ticksLow))); +}
\ No newline at end of file diff --git a/tests/slang-extension/realtime-clock.slang.expected.txt b/tests/slang-extension/realtime-clock.slang.expected.txt new file mode 100644 index 000000000..9d029e9f2 --- /dev/null +++ b/tests/slang-extension/realtime-clock.slang.expected.txt @@ -0,0 +1,5 @@ +type: int32_t +0 +1 +2 +3 diff --git a/tools/gfx/cuda/cuda-device.cpp b/tools/gfx/cuda/cuda-device.cpp index f81bcfe99..4aeecb606 100644 --- a/tools/gfx/cuda/cuda-device.cpp +++ b/tools/gfx/cuda/cuda-device.cpp @@ -179,9 +179,12 @@ SLANG_NO_THROW SlangResult SLANG_MCALL DeviceImpl::initialize(const Desc& desc) SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL( cuCtxCreate(&m_context->m_context, 0, m_device), reportType); - // Not clear how to detect half support on CUDA. For now we'll assume we have it { + // Not clear how to detect half support on CUDA. For now we'll assume we have it m_features.add("half"); + + // CUDA has support for realtime clock + m_features.add("realtime-clock"); } cudaDeviceProp deviceProps; diff --git a/tools/gfx/d3d11/d3d11-device.cpp b/tools/gfx/d3d11/d3d11-device.cpp index e32bdf7ed..cc2eda089 100644 --- a/tools/gfx/d3d11/d3d11-device.cpp +++ b/tools/gfx/d3d11/d3d11-device.cpp @@ -196,6 +196,11 @@ SlangResult DeviceImpl::initialize(const Desc& desc) m_features.add("atomic-float"); } + // If we have NVAPI well assume we have realtime clock + { + m_features.add("realtime-clock"); + } + m_nvapi = true; #endif } diff --git a/tools/gfx/d3d11/d3d11-scopeNVAPI.cpp b/tools/gfx/d3d11/d3d11-scopeNVAPI.cpp index b230623fe..1a662e999 100644 --- a/tools/gfx/d3d11/d3d11-scopeNVAPI.cpp +++ b/tools/gfx/d3d11/d3d11-scopeNVAPI.cpp @@ -20,7 +20,7 @@ SlangResult ScopeNVAPI::init(DeviceImpl* device, Index regIndex) } #ifdef GFX_NVAPI - NvAPI_Status nvapiStatus = NvAPI_D3D11_SetNvShaderExtnSlot(renderer->m_device, NvU32(regIndex)); + NvAPI_Status nvapiStatus = NvAPI_D3D11_SetNvShaderExtnSlot(device->m_device, NvU32(regIndex)); if (nvapiStatus != NVAPI_OK) { return SLANG_FAIL; diff --git a/tools/gfx/d3d12/d3d12-device.cpp b/tools/gfx/d3d12/d3d12-device.cpp index 8f1a3e366..312c81d75 100644 --- a/tools/gfx/d3d12/d3d12-device.cpp +++ b/tools/gfx/d3d12/d3d12-device.cpp @@ -24,6 +24,10 @@ # define ENABLE_DEBUG_LAYER 0 #endif +#ifdef GFX_NVAPI +# include "../nvapi/nvapi-include.h" +#endif + namespace gfx { namespace d3d12 @@ -597,6 +601,11 @@ Result DeviceImpl::initialize(const Desc& desc) m_features.add("atomic-float"); } + // If we have NVAPI well assume we have realtime clock + { + m_features.add("realtime-clock"); + } + m_nvapi = true; #endif } diff --git a/tools/gfx/d3d12/d3d12-pipeline-state.cpp b/tools/gfx/d3d12/d3d12-pipeline-state.cpp index adfdcd518..35313f676 100644 --- a/tools/gfx/d3d12/d3d12-pipeline-state.cpp +++ b/tools/gfx/d3d12/d3d12-pipeline-state.cpp @@ -240,7 +240,7 @@ Result PipelineStateImpl::ensureAPIPipelineStateCreated() SIZE_T(programImpl->m_shaders[0].code.getCount()) }; #ifdef GFX_NVAPI - if (m_nvapi) + if (m_device->m_nvapi) { // Also fill the extension structure. // Use the same UAV slot index and register space that are declared in the shader. diff --git a/tools/gfx/vulkan/vk-api.h b/tools/gfx/vulkan/vk-api.h index af2234f55..c34372f45 100644 --- a/tools/gfx/vulkan/vk-api.h +++ b/tools/gfx/vulkan/vk-api.h @@ -270,6 +270,11 @@ struct VulkanExtendedFeatureProperties // Robustness2 features VkPhysicalDeviceRobustness2FeaturesEXT robustness2Features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT}; + + // Clock features + VkPhysicalDeviceShaderClockFeaturesKHR clockFeatures = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR + }; }; struct VulkanApi diff --git a/tools/gfx/vulkan/vk-device.cpp b/tools/gfx/vulkan/vk-device.cpp index b5eec0b72..488f39be6 100644 --- a/tools/gfx/vulkan/vk-device.cpp +++ b/tools/gfx/vulkan/vk-device.cpp @@ -424,6 +424,10 @@ Result DeviceImpl::initVulkanInstanceAndDevice( extendedFeatures.robustness2Features.pNext = deviceFeatures2.pNext; deviceFeatures2.pNext = &extendedFeatures.robustness2Features; + // clock features + extendedFeatures.clockFeatures.pNext = deviceFeatures2.pNext; + deviceFeatures2.pNext = &extendedFeatures.clockFeatures; + // Atomic Float // To detect atomic float we need // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkPhysicalDeviceShaderAtomicFloatFeaturesEXT.html @@ -575,6 +579,16 @@ Result DeviceImpl::initVulkanInstanceAndDevice( m_features.add("robustness2"); } + if (extendedFeatures.clockFeatures.shaderDeviceClock) + { + deviceExtensions.add(VK_KHR_SHADER_CLOCK_EXTENSION_NAME); + + extendedFeatures.clockFeatures.pNext = (void*)deviceCreateInfo.pNext; + deviceCreateInfo.pNext = &extendedFeatures.clockFeatures; + + m_features.add("realtime-clock"); + } + VkPhysicalDeviceProperties2 extendedProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2 }; VkPhysicalDeviceRayTracingPipelinePropertiesKHR rtProps = { |
