diff options
| author | Harsh Aggarwal (NVIDIA) <haaggarwal@nvidia.com> | 2025-08-20 14:41:06 +0530 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-20 09:11:06 +0000 |
| commit | e0c20a076f2ec84586b6508664df4f59273c6aaf (patch) | |
| tree | ae629eb56413f1ffd1d269ffe447471c07aa8137 | |
| parent | e4a7129b84692ddc3c586f0d0dde95e80e173ed8 (diff) | |
Updated support to enable batch3 (#8219)
Enable CUDA support for batch 3 tests
- Enhanced wave operations with exclusive support
- Added proper identity values for min/max operations
- Fixed intrinsic name mapping issues
- Updated test configurations
Co-authored-by: Ellie Hermaszewska <ellieh@nvidia.com>
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 295 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 16 | ||||
| -rw-r--r-- | tests/autodiff-dstdlib/determinant.slang | 1 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang | 1 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/debug-break.slang | 5 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/scalar-double-vk-intrinsic.slang | 5 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-active-count-bits.slang | 3 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang | 1 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-is-first-lane.slang | 2 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-max.slang | 2 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-min.slang | 2 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-scalar-functional.slang | 2 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-prefix-product.slang | 2 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-prefix-sum.slang | 2 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/wave-vector.slang | 2 |
15 files changed, 321 insertions, 20 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 9df2727f6..44afd71b9 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -2661,17 +2661,67 @@ struct WaveOpMul template<typename T> struct WaveOpMax { - __inline__ __device__ static T getInitial(T a) { return a; } + __inline__ __device__ static T getInitial(T a, bool exclusive = false); __inline__ __device__ static T doOp(T a, T b) { return a > b ? a : b; } }; template<typename T> struct WaveOpMin { - __inline__ __device__ static T getInitial(T a) { return a; } + __inline__ __device__ static T getInitial(T a, bool exclusive = false); __inline__ __device__ static T doOp(T a, T b) { return a < b ? a : b; } }; +// Compact specializations using macro for getInitial +#define SLANG_WAVE_MIN_SPEC(T, EXCL_VAL) \ + template<> \ + __inline__ __device__ T WaveOpMin<T>::getInitial(T a, bool exclusive) \ + { \ + return exclusive ? (EXCL_VAL) : a; \ + } + +#define SLANG_WAVE_MAX_SPEC(T, EXCL_VAL) \ + template<> \ + __inline__ __device__ T WaveOpMax<T>::getInitial(T a, bool exclusive) \ + { \ + return exclusive ? (EXCL_VAL) : a; \ + } + +// Min specializations (exclusive identity = max value) +SLANG_WAVE_MIN_SPEC(float, SLANG_INFINITY) +SLANG_WAVE_MIN_SPEC(double, SLANG_INFINITY) +SLANG_WAVE_MIN_SPEC(int, 0x7FFFFFFF) +SLANG_WAVE_MIN_SPEC(uint, 0xFFFFFFFF) +SLANG_WAVE_MIN_SPEC(char, (char)0x7F) +SLANG_WAVE_MIN_SPEC(int8_t, (int8_t)0x7F) +SLANG_WAVE_MIN_SPEC(uint8_t, (uint8_t)0xFF) +SLANG_WAVE_MIN_SPEC(int16_t, (int16_t)0x7FFF) +SLANG_WAVE_MIN_SPEC(uint16_t, (uint16_t)0xFFFF) +SLANG_WAVE_MIN_SPEC(int64_t, 0x7FFFFFFFFFFFFFFFLL) +SLANG_WAVE_MIN_SPEC(uint64_t, 0xFFFFFFFFFFFFFFFFULL) +#if SLANG_CUDA_ENABLE_HALF +SLANG_WAVE_MIN_SPEC(__half, __ushort_as_half(0x7BFF)) +#endif + +// Max specializations (exclusive identity = min value) +SLANG_WAVE_MAX_SPEC(float, -SLANG_INFINITY) +SLANG_WAVE_MAX_SPEC(double, -SLANG_INFINITY) +SLANG_WAVE_MAX_SPEC(int, (int)0x80000000) +SLANG_WAVE_MAX_SPEC(uint, 0) +SLANG_WAVE_MAX_SPEC(char, (char)0x80) +SLANG_WAVE_MAX_SPEC(int8_t, (int8_t)0x80) +SLANG_WAVE_MAX_SPEC(uint8_t, 0) +SLANG_WAVE_MAX_SPEC(int16_t, (int16_t)0x8000) +SLANG_WAVE_MAX_SPEC(uint16_t, 0) +SLANG_WAVE_MAX_SPEC(int64_t, (int64_t)0x8000000000000000LL) +SLANG_WAVE_MAX_SPEC(uint64_t, 0) +#if SLANG_CUDA_ENABLE_HALF +SLANG_WAVE_MAX_SPEC(__half, __ushort_as_half(0xFBFF)) +#endif + +#undef SLANG_WAVE_MIN_SPEC +#undef SLANG_WAVE_MAX_SPEC + template<typename T> struct ElementTypeTrait; @@ -2706,6 +2756,33 @@ struct ElementTypeTrait<int64_t> { typedef int64_t Type; }; +template<> +struct ElementTypeTrait<char> +{ + typedef char Type; +}; +template<> +struct ElementTypeTrait<uchar> +{ + typedef uchar Type; +}; +template<> +struct ElementTypeTrait<short> +{ + typedef short Type; +}; +template<> +struct ElementTypeTrait<ushort> +{ + typedef ushort Type; +}; +#if SLANG_CUDA_ENABLE_HALF +template<> +struct ElementTypeTrait<__half> +{ + typedef __half Type; +}; +#endif // Vector template<> @@ -2792,6 +2869,115 @@ struct ElementTypeTrait<double4> typedef double Type; }; +// Additional vector types +template<> +struct ElementTypeTrait<char2> +{ + typedef char Type; +}; +template<> +struct ElementTypeTrait<char3> +{ + typedef char Type; +}; +template<> +struct ElementTypeTrait<char4> +{ + typedef char Type; +}; +template<> +struct ElementTypeTrait<uchar2> +{ + typedef uchar Type; +}; +template<> +struct ElementTypeTrait<uchar3> +{ + typedef uchar Type; +}; +template<> +struct ElementTypeTrait<uchar4> +{ + typedef uchar Type; +}; +template<> +struct ElementTypeTrait<short2> +{ + typedef short Type; +}; +template<> +struct ElementTypeTrait<short3> +{ + typedef short Type; +}; +template<> +struct ElementTypeTrait<short4> +{ + typedef short Type; +}; +template<> +struct ElementTypeTrait<ushort2> +{ + typedef ushort Type; +}; +template<> +struct ElementTypeTrait<ushort3> +{ + typedef ushort Type; +}; +template<> +struct ElementTypeTrait<ushort4> +{ + typedef ushort Type; +}; +template<> +struct ElementTypeTrait<longlong2> +{ + typedef int64_t Type; +}; +template<> +struct ElementTypeTrait<longlong3> +{ + typedef int64_t Type; +}; +template<> +struct ElementTypeTrait<longlong4> +{ + typedef int64_t Type; +}; +template<> +struct ElementTypeTrait<ulonglong2> +{ + typedef uint64_t Type; +}; +template<> +struct ElementTypeTrait<ulonglong3> +{ + typedef uint64_t Type; +}; +template<> +struct ElementTypeTrait<ulonglong4> +{ + typedef uint64_t Type; +}; +#if SLANG_CUDA_ENABLE_HALF +template<> +struct ElementTypeTrait<__half2> +{ + typedef __half Type; +}; +template<> +struct ElementTypeTrait<__half3> +{ + typedef __half Type; +}; +template<> +struct ElementTypeTrait<__half4> +{ + typedef __half Type; +}; +#endif + // Matrix template<typename T, int ROWS, int COLS> struct ElementTypeTrait<Matrix<T, ROWS, COLS>> @@ -3431,6 +3617,111 @@ __inline__ __device__ T _wavePrefixAndMultiple(WarpMask mask, T val) } template<typename T> +__inline__ __device__ T _wavePrefixMin(WarpMask mask, T val) +{ + return _wavePrefixScalar<WaveOpMin<T>, T>(mask, val); +} + +template<typename T> +__inline__ __device__ T _wavePrefixMax(WarpMask mask, T val) +{ + return _wavePrefixScalar<WaveOpMax<T>, T>(mask, val); +} + +template<typename T> +__inline__ __device__ T _wavePrefixMinMultiple(WarpMask mask, T val) +{ + typedef typename ElementTypeTrait<T>::Type ElemType; + _wavePrefixMultiple<WaveOpMin<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>( + mask, + (ElemType*)&val); + return val; +} + +template<typename T> +__inline__ __device__ T _wavePrefixMaxMultiple(WarpMask mask, T val) +{ + typedef typename ElementTypeTrait<T>::Type ElemType; + _wavePrefixMultiple<WaveOpMax<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>( + mask, + (ElemType*)&val); + return val; +} + +// Wrapper structures for exclusive operations that use the overloaded getInitial method +template<typename T> +struct WaveOpExclusiveMin +{ + __inline__ __device__ static T getInitial(T a) { return WaveOpMin<T>::getInitial(a, true); } + __inline__ __device__ static T doOp(T a, T b) { return WaveOpMin<T>::doOp(a, b); } +}; + +template<typename T> +struct WaveOpExclusiveMax +{ + __inline__ __device__ static T getInitial(T a) { return WaveOpMax<T>::getInitial(a, true); } + __inline__ __device__ static T doOp(T a, T b) { return WaveOpMax<T>::doOp(a, b); } +}; + +// Inclusive prefix min/max functions (for WaveMultiPrefixInclusive*) +template<typename T> +__inline__ __device__ T _wavePrefixInclusiveMin(WarpMask mask, T val) +{ + return _wavePrefixMin(mask, val); +} + +template<typename T> +__inline__ __device__ T _wavePrefixInclusiveMax(WarpMask mask, T val) +{ + return _wavePrefixMax(mask, val); +} + +template<typename T> +__inline__ __device__ T _wavePrefixInclusiveMinMultiple(WarpMask mask, T val) +{ + return _wavePrefixMinMultiple(mask, val); +} + +template<typename T> +__inline__ __device__ T _wavePrefixInclusiveMaxMultiple(WarpMask mask, T val) +{ + return _wavePrefixMaxMultiple(mask, val); +} + +// Explicit exclusive prefix min/max functions (for WaveMultiPrefixExclusive*) +template<typename T> +__inline__ __device__ T _wavePrefixExclusiveMin(WarpMask mask, T val) +{ + return _wavePrefixScalar<WaveOpExclusiveMin<T>, T>(mask, val); +} + +template<typename T> +__inline__ __device__ T _wavePrefixExclusiveMax(WarpMask mask, T val) +{ + return _wavePrefixScalar<WaveOpExclusiveMax<T>, T>(mask, val); +} + +template<typename T> +__inline__ __device__ T _wavePrefixExclusiveMinMultiple(WarpMask mask, T val) +{ + typedef typename ElementTypeTrait<T>::Type ElemType; + _wavePrefixMultiple<WaveOpExclusiveMin<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>( + mask, + (ElemType*)&val); + return val; +} + +template<typename T> +__inline__ __device__ T _wavePrefixExclusiveMaxMultiple(WarpMask mask, T val) +{ + typedef typename ElementTypeTrait<T>::Type ElemType; + _wavePrefixMultiple<WaveOpExclusiveMax<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>( + mask, + (ElemType*)&val); + return val; +} + +template<typename T> __inline__ __device__ uint4 _waveMatchScalar(WarpMask mask, T val) { int pred; diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 0d5b8cb1f..d2e98529b 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -8578,7 +8578,7 @@ matrix<T, N, M> degrees(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType, let N : int> [__readNone] [PreferCheckpoint] -[require(glsl_hlsl_metal_spirv_wgsl)] +[require(cuda_glsl_hlsl_metal_spirv_wgsl)] T determinant(matrix<T,N,N> m) { __target_switch @@ -8590,6 +8590,7 @@ T determinant(matrix<T,N,N> m) OpExtInst $$T result glsl450 Determinant $m }; case wgsl: __intrinsic_asm "determinant"; + case cuda: default: static_assert(N >= 1 && N <= 4, "determinant is only implemented up to 4x4 matrices"); if (N == 1) @@ -8629,13 +8630,14 @@ T determinant(matrix<T,N,N> m) /// @category math __generic<T : __BuiltinIntegerType, let N : int> [__readNone] -[require(glsl_hlsl_metal_spirv_wgsl)] +[require(cuda_glsl_hlsl_metal_spirv_wgsl)] T determinant(matrix<T,N,N> m) { __target_switch { case hlsl: __intrinsic_asm "determinant"; - // GLSL, WGSL, and SPIR-V don't support integer determinants for lowered matrices, so we need to implement it manually + // GLSL, WGSL, SPIR-V, and CUDA don't support integer determinants for lowered matrices, so we need to implement it manually + case cuda: default: static_assert(N >= 1 && N <= 4, "determinant is only implemented up to 4x4 matrices"); if (N == 1) @@ -16680,7 +16682,7 @@ for (auto opName : kWaveMultiPrefixMinMaxNames) { __generic<T : __BuiltinArithmeticType> __spirv_version(1.3) [ForceInline] -[require(glsl_spirv, subgroup_partitioned)] +[require(cuda_glsl_spirv, subgroup_partitioned)] T WaveMultiPrefix$(opName.name)(T value, uint4 mask) { __shaderSubgroupPartitionedPreamble<T>(); @@ -16688,6 +16690,8 @@ T WaveMultiPrefix$(opName.name)(T value, uint4 mask) { case glsl: __intrinsic_asm "subgroupPartitioned$(opName.glslName)NV"; + case cuda: + __intrinsic_asm "_wavePrefix$(opName.name)(($1).x, $0)"; case spirv: { if (__isFloat<T>()) @@ -16703,7 +16707,7 @@ T WaveMultiPrefix$(opName.name)(T value, uint4 mask) __generic<T : __BuiltinArithmeticType, let N : int> __spirv_version(1.3) [ForceInline] -[require(glsl_spirv, subgroup_partitioned)] +[require(cuda_glsl_spirv, subgroup_partitioned)] vector<T, N> WaveMultiPrefix$(opName.name)(vector<T, N> value, uint4 mask) { __shaderSubgroupPartitionedPreamble<T>(); @@ -16711,6 +16715,8 @@ vector<T, N> WaveMultiPrefix$(opName.name)(vector<T, N> value, uint4 mask) { case glsl: __intrinsic_asm "subgroupPartitioned$(opName.glslName)NV"; + case cuda: + __intrinsic_asm "_wavePrefix$(opName.name)Multiple(($1).x, $0)"; case spirv: { if (__isFloat<T>()) diff --git a/tests/autodiff-dstdlib/determinant.slang b/tests/autodiff-dstdlib/determinant.slang index d2e699551..f73d3cdfa 100644 --- a/tests/autodiff-dstdlib/determinant.slang +++ b/tests/autodiff-dstdlib/determinant.slang @@ -1,5 +1,6 @@ //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -output-using-type //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -output-using-type -cuda //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer RWStructuredBuffer<float> outputBuffer; diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang index f53d38d74..d23a675b2 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang @@ -3,6 +3,7 @@ // Disabled because validation layer doesn't like vector atomics, although nv driver does allow it. //DISABLED_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=CHECK): -vk -compute -profile cs_6_2 -render-features half -shaderobj -emit-spirv-directly -output-using-type +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -cuda -compute -profile cs_6_2 -render-features half -shaderobj -emit-spirv-directly -output-using-type //TEST:SIMPLE(filecheck=SPIRV):-target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation //TEST:SIMPLE(filecheck=HLSL):-target hlsl -entry computeMain -profile cs_6_3 //TEST_INPUT:set tmpBuffer = ubuffer(data=[0 0 0 0], stride=4) diff --git a/tests/hlsl-intrinsic/debug-break.slang b/tests/hlsl-intrinsic/debug-break.slang index c16139f72..8fd4567dc 100644 --- a/tests/hlsl-intrinsic/debug-break.slang +++ b/tests/hlsl-intrinsic/debug-break.slang @@ -1,7 +1,7 @@ //TEST:SIMPLE(filecheck=CHECK_GLSL):-stage compute -entry computeMain -target glsl -line-directive-mode none // We can't enable because output source includes path to prelude. //DISABLE_TEST:SIMPLE:-stage compute -entry computeMain -target cpp -line-directive-mode none -//DISABLE_TEST:SIMPLE:-stage compute -entry computeMain -target cuda -line-directive-mode none +//TEST:SIMPLE(filecheck=CHECK_CUDA):-stage compute -entry computeMain -target cuda -line-directive-mode none // Not currently supported on HLSL //DISABLE_TEST:SIMPLE:-stage compute -entry computeMain -target hlsl -line-directive-mode none // With `slang-llvm` this will crash, but the call stack isn't really usable. @@ -15,6 +15,7 @@ // CHECK_GLSL: void main // CHECK_GLSL: glslDebugBreak{{.*}}(); +// CHECK_CUDA:__brkpt() RWStructuredBuffer<int> outputBuffer; [numthreads(4, 1, 1)] @@ -25,4 +26,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) debugBreak(); outputBuffer[idx] = idx; -}
\ No newline at end of file +} diff --git a/tests/hlsl-intrinsic/scalar-double-vk-intrinsic.slang b/tests/hlsl-intrinsic/scalar-double-vk-intrinsic.slang index 128c1c1cd..042c0b62e 100644 --- a/tests/hlsl-intrinsic/scalar-double-vk-intrinsic.slang +++ b/tests/hlsl-intrinsic/scalar-double-vk-intrinsic.slang @@ -3,8 +3,7 @@ //TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -output-using-type -shaderobj //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type -shaderobj -render-feature double -// We don't want to run a cuda test here... -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj -output-using-type //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer<double> outputBuffer; @@ -54,4 +53,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) } outputBuffer[idx] = ft; -}
\ No newline at end of file +} diff --git a/tests/hlsl-intrinsic/wave-active-count-bits.slang b/tests/hlsl-intrinsic/wave-active-count-bits.slang index 105b95a6f..a7aa48687 100644 --- a/tests/hlsl-intrinsic/wave-active-count-bits.slang +++ b/tests/hlsl-intrinsic/wave-active-count-bits.slang @@ -2,6 +2,7 @@ //DISABLE_TEST:COMPARE_COMPUTE_EX:-cpu -compute -shaderobj //DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device +//TEST:COMPARE_COMPUTE_EX:-slang -compute -cuda -profile cs_6_0 -shaderobj -render-feature hardware-device //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device //TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj @@ -14,4 +15,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) int idx = int(dispatchThreadID.x); outputBuffer[idx] = int(WaveActiveCountBits(bool(idx & 5))); -}
\ No newline at end of file +} diff --git a/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang b/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang index b0cff08a9..4a32ab736 100644 --- a/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang +++ b/tests/hlsl-intrinsic/wave-broadcast-lane-at-vk.slang @@ -1,5 +1,6 @@ //TEST_CATEGORY(wave, compute) //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-slang -compute -cuda -profile cs_6_0 -shaderobj //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj diff --git a/tests/hlsl-intrinsic/wave-is-first-lane.slang b/tests/hlsl-intrinsic/wave-is-first-lane.slang index 220a5758b..ae986c7b3 100644 --- a/tests/hlsl-intrinsic/wave-is-first-lane.slang +++ b/tests/hlsl-intrinsic/wave-is-first-lane.slang @@ -3,7 +3,7 @@ //DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device -//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj diff --git a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-max.slang b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-max.slang index 654fd6130..0aca11f38 100644 --- a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-max.slang +++ b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-max.slang @@ -1,7 +1,7 @@ //TEST_CATEGORY(wave, compute) //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-via-glsl - +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -capability cuda_sm_7_0 -compute -shaderobj //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly -xslang -DUSE_GLSL_SYNTAX -allow-glsl //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer diff --git a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-min.slang b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-min.slang index 68e1e9c05..321b99a0e 100644 --- a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-min.slang +++ b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-min.slang @@ -1,7 +1,7 @@ //TEST_CATEGORY(wave, compute) //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-via-glsl - +//TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -capability cuda_sm_7_0 -compute -shaderobj //TEST:COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -emit-spirv-directly -xslang -DUSE_GLSL_SYNTAX -allow-glsl //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer diff --git a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-scalar-functional.slang b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-scalar-functional.slang index 5de34b20a..67367f264 100644 --- a/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-scalar-functional.slang +++ b/tests/hlsl-intrinsic/wave-multi/wave-multi-prefix-scalar-functional.slang @@ -4,7 +4,7 @@ //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile sm_6_5 -shaderobj //TEST:COMPARE_COMPUTE_EX:-vk -compute -shaderobj -//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer<uint> outputBuffer; diff --git a/tests/hlsl-intrinsic/wave-prefix-product.slang b/tests/hlsl-intrinsic/wave-prefix-product.slang index 774f5996e..f8c0ed57a 100644 --- a/tests/hlsl-intrinsic/wave-prefix-product.slang +++ b/tests/hlsl-intrinsic/wave-prefix-product.slang @@ -3,7 +3,7 @@ //DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device -//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj diff --git a/tests/hlsl-intrinsic/wave-prefix-sum.slang b/tests/hlsl-intrinsic/wave-prefix-sum.slang index 4f7c2912d..8a092e20d 100644 --- a/tests/hlsl-intrinsic/wave-prefix-sum.slang +++ b/tests/hlsl-intrinsic/wave-prefix-sum.slang @@ -3,7 +3,7 @@ //DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device -//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj diff --git a/tests/hlsl-intrinsic/wave-vector.slang b/tests/hlsl-intrinsic/wave-vector.slang index f786794ec..4c3b7ccb6 100644 --- a/tests/hlsl-intrinsic/wave-vector.slang +++ b/tests/hlsl-intrinsic/wave-vector.slang @@ -3,7 +3,7 @@ //DISABLE_TEST:COMPARE_COMPUTE_EX:-slang -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 -shaderobj -render-feature hardware-device //TEST(vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -render-feature hardware-device -//TEST:COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 -shaderobj +//TEST:COMPARE_COMPUTE_EX:-cuda -compute -capability cuda_sm_7_0 -shaderobj //TEST:COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj //TEST:COMPARE_COMPUTE_EX:-metal -compute -shaderobj |
