summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2023-04-04 18:00:16 -0400
committerGitHub <noreply@github.com>2023-04-04 18:00:16 -0400
commit68c7d5cda2d6f2eb7bfb3a7e15860eb3ded25424 (patch)
treeac4e8384108e70109b084782b414296d015f92b8
parent7bb2de1bc40e535fae93940113db97b5ea44a6f2 (diff)
Preliminary support for realtime clock (#2772)
* #include an absolute path didn't work - because paths were taken to always be relative. * Initial support for realtime clock. * Add realtime-clock render feature where seems appropriate. * Fixes to make NVAPI compile properly. Change realtime-clock.slang check to use maths that can't overflow.
-rw-r--r--docs/target-compatibility.md23
-rw-r--r--source/slang/hlsl.meta.slang32
-rw-r--r--tests/slang-extension/realtime-clock.slang31
-rw-r--r--tests/slang-extension/realtime-clock.slang.expected.txt5
-rw-r--r--tools/gfx/cuda/cuda-device.cpp5
-rw-r--r--tools/gfx/d3d11/d3d11-device.cpp5
-rw-r--r--tools/gfx/d3d11/d3d11-scopeNVAPI.cpp2
-rw-r--r--tools/gfx/d3d12/d3d12-device.cpp9
-rw-r--r--tools/gfx/d3d12/d3d12-pipeline-state.cpp2
-rw-r--r--tools/gfx/vulkan/vk-api.h5
-rw-r--r--tools/gfx/vulkan/vk-device.cpp14
11 files changed, 129 insertions, 4 deletions
diff --git a/docs/target-compatibility.md b/docs/target-compatibility.md
index 96d1353a9..0fdeeca8d 100644
--- a/docs/target-compatibility.md
+++ b/docs/target-compatibility.md
@@ -44,6 +44,7 @@ Items with ^ means there is some discussion about support later in the document
| [RWByteAddressBuffer Atomic](#byte-address-atomic) | No | Yes ^ | Yes ^ | Yes | No +
| [Shader Execution Reordering](#ser) | No | Yes ^ | Yes ^ | No | No
| [debugBreak](#debug-break) | No | No | Yes | Yes | Yes
+| [realtime clock](#realtime-clock) | No | Yes ^ | Yes | Yes | No
<a id="half"></a>
## Half Type
@@ -275,4 +276,24 @@ On C++ targets debugBreak is implemented using SLANG_BREAKPOINT defined in "slan
Some additional details:
* If [slang-llvm](cpu-target.md#slang-llvm) is being used as the downstream compiler (as is typical with `host-callable`), it will crash into the debugger, but may not produce a usable stack trace.
-* For "normal" C++ downstream compilers such as Clang/Gcc/Visual Studio, to break into readable source code, debug information is typically necessary. Disabling optimizations may be useful to break on the appropriate specific line, and have variables inspectable. \ No newline at end of file
+* For "normal" C++ downstream compilers such as Clang/Gcc/Visual Studio, to break into readable source code, debug information is typically necessary. Disabling optimizations may be useful to break on the appropriate specific line, and have variables inspectable.
+
+<a id="realtime-clock"></a>
+## Realtime Clock
+
+Realtime clock support is available via the API
+
+```
+// Get low 32 bits of realtime clock
+uint getRealtimeClockLow();
+// Get 64 bit realtime clock, with low bits in .x and high bits in .y
+uint2 getRealtimeClock();
+```
+
+On D3D this is supported through NVAPI via `NvGetSpecial`.
+
+On Vulkan this is supported via [VK_KHR_shader_clock extension](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_clock.html)
+
+On CUDA this is supported via [clock](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#time-function).
+
+Currently this is not supported on CPU, although this will potentially be added in the future. \ No newline at end of file
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 7774c0d01..8ad99d71b 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -6835,6 +6835,38 @@ __specialized_for_target(glsl)
[[vk::spirv_instruction(1, "NonSemantic.DebugBreak")]]
void debugBreak();
+//
+// Realtime Clock support
+//
+
+// https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GL_EXT_shader_realtime_clock.txt
+
+[__requiresNVAPI]
+__target_intrinsic(hlsl, "NvGetSpecial( NV_SPECIALOP_GLOBAL_TIMER_LO)")
+__glsl_extension(GL_EXT_shader_realtime_clock)
+__target_intrinsic(glsl, "clockRealtime2x32EXT().x")
+__target_intrinsic(cuda, "clock")
+uint getRealtimeClockLow();
+
+__target_intrinsic(cuda, "clock64")
+int64_t __cudaGetRealtimeClock();
+
+[__requiresNVAPI]
+__target_intrinsic(hlsl, "uint2(NvGetSpecial( NV_SPECIALOP_GLOBAL_TIMER_LO), NvGetSpecial( NV_SPECIALOP_GLOBAL_TIMER_HI)) ")
+__glsl_extension(GL_EXT_shader_realtime_clock)
+__target_intrinsic(glsl, "clockRealtime2x32EXT()")
+uint2 getRealtimeClock();
+
+__specialized_for_target(cuda)
+uint2 getRealtimeClock()
+{
+ int64_t ticks = __cudaGetRealtimeClock();
+ return uint2(uint(ticks), uint(uint64_t(ticks) >> 32));
+}
+
+//
+// CUDA specific
+//
__target_intrinsic(cuda, "(threadIdx)")
[__readNone]
diff --git a/tests/slang-extension/realtime-clock.slang b/tests/slang-extension/realtime-clock.slang
new file mode 100644
index 000000000..67d6e2ab0
--- /dev/null
+++ b/tests/slang-extension/realtime-clock.slang
@@ -0,0 +1,31 @@
+// realtime-clock.slang
+
+// TODO(JS):
+// Disable FXC DX11/DX12 because currently FXC can't compile nvHLSLExtns.h
+
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -output-using-type -shaderobj
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-dx11 -slang -compute -output-using-type -nvapi-slot u0 -shaderobj
+//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -render-feature realtime-clock -output-using-type -shaderobj
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-d3d12 -compute -output-using-type -nvapi-slot u0 -shaderobj
+//TEST(compute):COMPARE_COMPUTE_EX:-d3d12 -compute -use-dxil -output-using-type -nvapi-slot u0 -shaderobj
+//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj
+
+// The test doesn't directly use this, but having this defined makes the 0 slot available if NVAPI is going to be used
+// Only strictly necessary on the D3D11/D3D12 paths
+//TEST_INPUT:ubuffer(data=[0 0 0 0 ], stride=4):name=nvapiBuffer
+RWStructuredBuffer<int> nvapiBuffer;
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+
+[numthreads(8, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ uint idx = dispatchThreadID.x;
+
+ uint ticksLow = getRealtimeClockLow();
+
+ uint2 ticks = getRealtimeClock();
+
+ outputBuffer[idx] = int(idx + ((ticksLow ^ ticks.x) ^ (ticks.x ^ ticksLow)));
+} \ No newline at end of file
diff --git a/tests/slang-extension/realtime-clock.slang.expected.txt b/tests/slang-extension/realtime-clock.slang.expected.txt
new file mode 100644
index 000000000..9d029e9f2
--- /dev/null
+++ b/tests/slang-extension/realtime-clock.slang.expected.txt
@@ -0,0 +1,5 @@
+type: int32_t
+0
+1
+2
+3
diff --git a/tools/gfx/cuda/cuda-device.cpp b/tools/gfx/cuda/cuda-device.cpp
index f81bcfe99..4aeecb606 100644
--- a/tools/gfx/cuda/cuda-device.cpp
+++ b/tools/gfx/cuda/cuda-device.cpp
@@ -179,9 +179,12 @@ SLANG_NO_THROW SlangResult SLANG_MCALL DeviceImpl::initialize(const Desc& desc)
SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(
cuCtxCreate(&m_context->m_context, 0, m_device), reportType);
- // Not clear how to detect half support on CUDA. For now we'll assume we have it
{
+ // Not clear how to detect half support on CUDA. For now we'll assume we have it
m_features.add("half");
+
+ // CUDA has support for realtime clock
+ m_features.add("realtime-clock");
}
cudaDeviceProp deviceProps;
diff --git a/tools/gfx/d3d11/d3d11-device.cpp b/tools/gfx/d3d11/d3d11-device.cpp
index e32bdf7ed..cc2eda089 100644
--- a/tools/gfx/d3d11/d3d11-device.cpp
+++ b/tools/gfx/d3d11/d3d11-device.cpp
@@ -196,6 +196,11 @@ SlangResult DeviceImpl::initialize(const Desc& desc)
m_features.add("atomic-float");
}
+ // If we have NVAPI well assume we have realtime clock
+ {
+ m_features.add("realtime-clock");
+ }
+
m_nvapi = true;
#endif
}
diff --git a/tools/gfx/d3d11/d3d11-scopeNVAPI.cpp b/tools/gfx/d3d11/d3d11-scopeNVAPI.cpp
index b230623fe..1a662e999 100644
--- a/tools/gfx/d3d11/d3d11-scopeNVAPI.cpp
+++ b/tools/gfx/d3d11/d3d11-scopeNVAPI.cpp
@@ -20,7 +20,7 @@ SlangResult ScopeNVAPI::init(DeviceImpl* device, Index regIndex)
}
#ifdef GFX_NVAPI
- NvAPI_Status nvapiStatus = NvAPI_D3D11_SetNvShaderExtnSlot(renderer->m_device, NvU32(regIndex));
+ NvAPI_Status nvapiStatus = NvAPI_D3D11_SetNvShaderExtnSlot(device->m_device, NvU32(regIndex));
if (nvapiStatus != NVAPI_OK)
{
return SLANG_FAIL;
diff --git a/tools/gfx/d3d12/d3d12-device.cpp b/tools/gfx/d3d12/d3d12-device.cpp
index 8f1a3e366..312c81d75 100644
--- a/tools/gfx/d3d12/d3d12-device.cpp
+++ b/tools/gfx/d3d12/d3d12-device.cpp
@@ -24,6 +24,10 @@
# define ENABLE_DEBUG_LAYER 0
#endif
+#ifdef GFX_NVAPI
+# include "../nvapi/nvapi-include.h"
+#endif
+
namespace gfx
{
namespace d3d12
@@ -597,6 +601,11 @@ Result DeviceImpl::initialize(const Desc& desc)
m_features.add("atomic-float");
}
+ // If we have NVAPI well assume we have realtime clock
+ {
+ m_features.add("realtime-clock");
+ }
+
m_nvapi = true;
#endif
}
diff --git a/tools/gfx/d3d12/d3d12-pipeline-state.cpp b/tools/gfx/d3d12/d3d12-pipeline-state.cpp
index adfdcd518..35313f676 100644
--- a/tools/gfx/d3d12/d3d12-pipeline-state.cpp
+++ b/tools/gfx/d3d12/d3d12-pipeline-state.cpp
@@ -240,7 +240,7 @@ Result PipelineStateImpl::ensureAPIPipelineStateCreated()
SIZE_T(programImpl->m_shaders[0].code.getCount()) };
#ifdef GFX_NVAPI
- if (m_nvapi)
+ if (m_device->m_nvapi)
{
// Also fill the extension structure.
// Use the same UAV slot index and register space that are declared in the shader.
diff --git a/tools/gfx/vulkan/vk-api.h b/tools/gfx/vulkan/vk-api.h
index af2234f55..c34372f45 100644
--- a/tools/gfx/vulkan/vk-api.h
+++ b/tools/gfx/vulkan/vk-api.h
@@ -270,6 +270,11 @@ struct VulkanExtendedFeatureProperties
// Robustness2 features
VkPhysicalDeviceRobustness2FeaturesEXT robustness2Features = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT};
+
+ // Clock features
+ VkPhysicalDeviceShaderClockFeaturesKHR clockFeatures = {
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR
+ };
};
struct VulkanApi
diff --git a/tools/gfx/vulkan/vk-device.cpp b/tools/gfx/vulkan/vk-device.cpp
index b5eec0b72..488f39be6 100644
--- a/tools/gfx/vulkan/vk-device.cpp
+++ b/tools/gfx/vulkan/vk-device.cpp
@@ -424,6 +424,10 @@ Result DeviceImpl::initVulkanInstanceAndDevice(
extendedFeatures.robustness2Features.pNext = deviceFeatures2.pNext;
deviceFeatures2.pNext = &extendedFeatures.robustness2Features;
+ // clock features
+ extendedFeatures.clockFeatures.pNext = deviceFeatures2.pNext;
+ deviceFeatures2.pNext = &extendedFeatures.clockFeatures;
+
// Atomic Float
// To detect atomic float we need
// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkPhysicalDeviceShaderAtomicFloatFeaturesEXT.html
@@ -575,6 +579,16 @@ Result DeviceImpl::initVulkanInstanceAndDevice(
m_features.add("robustness2");
}
+ if (extendedFeatures.clockFeatures.shaderDeviceClock)
+ {
+ deviceExtensions.add(VK_KHR_SHADER_CLOCK_EXTENSION_NAME);
+
+ extendedFeatures.clockFeatures.pNext = (void*)deviceCreateInfo.pNext;
+ deviceCreateInfo.pNext = &extendedFeatures.clockFeatures;
+
+ m_features.add("realtime-clock");
+ }
+
VkPhysicalDeviceProperties2 extendedProps = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2 };
VkPhysicalDeviceRayTracingPipelinePropertiesKHR rtProps = {