diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2020-03-27 16:16:27 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-03-27 16:16:27 -0400 |
| commit | e267ce24e37b9b7f98921f75abc150c1463b1d6d (patch) | |
| tree | 331660a83ae0b72116b79b5cc8bf7a9c06555db5 | |
| parent | 5b0b8436123aa2faa9b682ed45efe2bd7edbf01b (diff) | |
Adds WaveShuffle intrinsic (#1298)
* Support for WaveReadLaneAt with dynamic (but uniform across Wave) on Vk by enabling VK1.4.
Fixed wave-lane-at.slang test to test with laneId that is uniform across the Wave.
* Added WaveShuffle intrinsic.
Test for WaveShuffle intrinsic.
* Added some documentation on WaveShuffle
* Fix that version required for subgroupBroadcast to be non constexpr is actually 1.5
| -rw-r--r-- | docs/target-compatibility.md | 11 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 31 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-lane-at-vk.slang | 2 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-lane-at.slang | 2 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-shuffle-vk.slang | 33 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-shuffle-vk.slang.expected.txt | 4 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-shuffle.slang | 42 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-shuffle.slang.expected.txt | 4 |
8 files changed, 123 insertions, 6 deletions
diff --git a/docs/target-compatibility.md b/docs/target-compatibility.md index e8edeeaf5..6967f7454 100644 --- a/docs/target-compatibility.md +++ b/docs/target-compatibility.md @@ -20,6 +20,7 @@ Items with ^ means there is some discussion about support later in the document | SM6.0 Wave Intrinsics | No | Yes | Partial | Yes | No | SM6.0 Quad Intrinsics | No | Yes | No + | No | No | SM6.5 Wave Intrinsics | No | Yes ^ | No + | Yes | No +| WaveShuffle | No | Limited ^ | Yes + | Yes | No | Tesselation | Yes ^ | Yes ^ | No + | No | No | Graphics Pipeline | Yes | Yes | Yes | No | No | Ray Tracing DXR 1.0 | No | Yes ^ | Yes ^ | No | No @@ -56,6 +57,16 @@ tex.GetDimensions is the GetDimensions method on 'texture' objects. This is not SM6.5 Wave Intrinsics are supported, but requires a downstream DXC compiler that supports SM6.5. As it stands the DXC shipping with windows does not. +## WaveShuffle + +WaveShuffle is an intrinsic added to the Slang stdlibrary to expose the glsl `subgroupShuffle` intrinsics and allow loosened requirements on laneId. + +`HLSL` uses `WaveReadLaneAt` and this requires the `laneId` must be 'dynamically uniform' across the wave. WaveShuffle has the same functionality but relaxes this restriction. + +`WaveReadLaneAt` most obviously maps to `subgroupBroadcast` in GLSL. This has the extra restriction the index must be compile time consts. With SPIR-V 1.5 it is allowed to be 'dynamically uniform', but doesn't work on current glslang. + +NOTE! That using WaveShuffle to target `HLSL` will produce `WaveReadLaneAt` - that means strictly speaking the restriction *still applies*, and the correct behavior will only be seen on hardware that allows the loosed requirements of laneId, on hardware that does not result of `WaveShuffle` is the same as `WaveReadLaneId` which is undefined. + ## Tesselation Although tesselation stages should work on D3D11 and D3D12 they are not tested within our test framework, and may have problems. diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 739b8579d..e29e47581 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2722,17 +2722,18 @@ matrix<T,N,M> WaveReadLaneFirst(matrix<T,N,M> expr); // NOTE! On GLSL based targets the lane index *must* be a compile time expression! // See https://github.com/KhronosGroup/GLSL/blob/master/extensions/khr/GL_KHR_shader_subgroup.txt -// It is allowed to be 'dynamically uniform within the subgroup' if it's SPIR-V 1.4. -// TODO(JS): For now we'll use 1.4, but aim for the future for the compiler to determine +// It is allowed to be 'dynamically uniform within the subgroup' if it's SPIR-V 1.5. +// TODO(JS): For now we'll use 1.5, but aim for the future for the compiler to determine // if the line the is compile constant, and reduce requirement to 1.3 __generic<T : __BuiltinType> __glsl_extension(GL_KHR_shader_subgroup_ballot) -__spirv_version(1.4) +__spirv_version(1.5) __target_intrinsic(glsl, "subgroupBroadcast($0, $1)") __target_intrinsic(cuda, "__shfl_sync(__activemask(), $0, $1)") T WaveReadLaneAt(T value, int lane); __generic<T : __BuiltinType, let N : int> -__spirv_version(1.4) +__spirv_version(1.5) +__glsl_extension(GL_KHR_shader_subgroup_ballot) __target_intrinsic(glsl, "subgroupBroadcast($0, $1)") __target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") vector<T,N> WaveReadLaneAt(vector<T,N> value, int lane); @@ -2740,6 +2741,28 @@ __generic<T : __BuiltinType, let N : int, let M : int> __target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> value, int lane); +// NOTE! WaveShuffle is a NON STANDARD HLSL intrinsic! It will map to WaveReadLaneAt on HLSL +// which means it will only work on hardware which allows arbitrary laneIds which is not true +// in general because it breaks the HLSL standard, which requires it's 'dynamically uniform' across the Wave. +__generic<T : __BuiltinType> +__glsl_extension(GL_KHR_shader_subgroup_shuffle) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupShuffle($0, $1)") +__target_intrinsic(cuda, "__shfl_sync(__activemask(), $0, $1)") +__target_intrinsic(hlsl, "WaveReadLaneAt") +T WaveShuffle(T value, int lane); +__generic<T : __BuiltinType, let N : int> +__glsl_extension(GL_KHR_shader_subgroup_shuffle) +__spirv_version(1.3) +__target_intrinsic(glsl, "subgroupShuffle($0, $1)") +__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") +__target_intrinsic(hlsl, "WaveReadLaneAt") +vector<T,N> WaveShuffle(vector<T,N> value, int lane); +__generic<T : __BuiltinType, let N : int, let M : int> +__target_intrinsic(cuda, "_waveReadLaneAtMultiple($0, $1)") +__target_intrinsic(hlsl, "WaveReadLaneAt") +matrix<T,N,M> WaveShuffle(matrix<T,N,M> value, int lane); + __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBallotExclusiveBitCount(subgroupBallot($0))") diff --git a/tests/hlsl-intrinsic/wave-lane-at-vk.slang b/tests/hlsl-intrinsic/wave-lane-at-vk.slang index 137632f16..0d52f781e 100644 --- a/tests/hlsl-intrinsic/wave-lane-at-vk.slang +++ b/tests/hlsl-intrinsic/wave-lane-at-vk.slang @@ -9,7 +9,7 @@ RWStructuredBuffer<int> outputBuffer; // `The input lane index must be uniform across the wave.`. -// The same restriction applies to glsl/SPIR-V 1.4 +// The same restriction applies to glsl/SPIR-V 1.5 // So we are going to use the input buffer to achieve this. //TEST_INPUT:ubuffer(data=[1 2 3 0], stride=4):name inputBuffer diff --git a/tests/hlsl-intrinsic/wave-lane-at.slang b/tests/hlsl-intrinsic/wave-lane-at.slang index 8661dbc55..c3caaa4e8 100644 --- a/tests/hlsl-intrinsic/wave-lane-at.slang +++ b/tests/hlsl-intrinsic/wave-lane-at.slang @@ -9,7 +9,7 @@ RWStructuredBuffer<int> outputBuffer; // Note from HLSL: `The input lane index must be uniform across the wave.`. -// The same restriction applies to glsl/SPIR-V 1.4 +// The same restriction applies to glsl/SPIR-V 1.5 // So we are going to use the input buffer to achieve this. //TEST_INPUT:ubuffer(data=[1 2 3 0], stride=4):name inputBuffer diff --git a/tests/hlsl-intrinsic/wave-shuffle-vk.slang b/tests/hlsl-intrinsic/wave-shuffle-vk.slang new file mode 100644 index 000000000..01fb59155 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-shuffle-vk.slang @@ -0,0 +1,33 @@ +// Disabled because main tests is wave-shuffle.slang, this just tests VK +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 + +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer<int> outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = int(dispatchThreadID.x); + + int value = 0; + + // Scalar + + value += WaveShuffle(idx, (idx + 1) & 3); + + // vector + + { + float2 v = float2(idx + 1, idx + 2); + float2 readValue = WaveShuffle(v, (idx - 1) & 3); + + value += int(readValue[0] + readValue[1]); + } + + outputBuffer[idx] = value; +}
\ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-shuffle-vk.slang.expected.txt b/tests/hlsl-intrinsic/wave-shuffle-vk.slang.expected.txt new file mode 100644 index 000000000..b20444fc5 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-shuffle-vk.slang.expected.txt @@ -0,0 +1,4 @@ +A +5 +8 +7 diff --git a/tests/hlsl-intrinsic/wave-shuffle.slang b/tests/hlsl-intrinsic/wave-shuffle.slang new file mode 100644 index 000000000..093babcce --- /dev/null +++ b/tests/hlsl-intrinsic/wave-shuffle.slang @@ -0,0 +1,42 @@ +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +//Disabled on D3D, because in general WaveShuffle requires hardware that doesn't have the 'uniform laneId across Wave' restriction. +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 +// Disabled because vk doesn't currently support matrix types. See wave-shuffle-vk.slang +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer<int> outputBuffer; + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = int(dispatchThreadID.x); + + int value = 0; + + // Scalar + + value += WaveShuffle(idx, (idx + 1) & 3); + + // vector + + { + float2 v = float2(idx + 1, idx + 2); + float2 readValue = WaveShuffle(v, (idx - 1) & 3); + + value += int(readValue[0] + readValue[1]); + } + + // matrix + { + matrix<int, 2, 2> v = matrix<int, 2, 2>(idx, idx - 1, idx * 3, idx - 2); + + matrix<int, 2, 2> readValue = WaveShuffle(v, (idx - 1) & 3); + + value += int(readValue[0][0] + readValue[0][1] + readValue[1][0] + readValue[1][1]); + } + + outputBuffer[idx] = value; +}
\ No newline at end of file diff --git a/tests/hlsl-intrinsic/wave-shuffle.slang.expected.txt b/tests/hlsl-intrinsic/wave-shuffle.slang.expected.txt new file mode 100644 index 000000000..a327b0804 --- /dev/null +++ b/tests/hlsl-intrinsic/wave-shuffle.slang.expected.txt @@ -0,0 +1,4 @@ +19 +2 +B +10 |
