diff options
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 93 | ||||
| -rw-r--r-- | source/slang/slang-capabilities.capdef | 2 |
2 files changed, 70 insertions, 25 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 2af0dbcf7..d5f8cd2e1 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -3480,23 +3480,31 @@ ${{{{ const char* componentArg = (isShadow ? "" : componentArgString[componentId]); }}}} [ForceInline] - [require(glsl_hlsl_metal_spirv_wgsl, texture_gather)] + [require(cuda_glsl_hlsl_metal_spirv_wgsl, texture_gather)] vector<T.Element,4> Gather$(compareFunc)$(componentFunc)( $(samplerParam) vector<float, Shape.dimensions+isArray> location $(compareParam)) { - static_assert(Shape.flavor == $(SLANG_TEXTURE_2D) || Shape.flavor == $(SLANG_TEXTURE_CUBE), - "Gather is supported only for 2D and 3D textures"); - __target_switch { - case hlsl: __intrinsic_asm ".Gather$(compareFunc)$(componentFunc)"; + case hlsl: + static_assert(Shape.flavor == $(SLANG_TEXTURE_2D) || Shape.flavor == $(SLANG_TEXTURE_CUBE), + "Gather is supported only for 2D and 3D textures"); + __intrinsic_asm ".Gather$(compareFunc)$(componentFunc)"; + case cuda: + static_assert(Shape.flavor == $(SLANG_TEXTURE_2D), "CUDA Gather is supported only for 2D textures"); + static_assert(isArray == 0, "CUDA Gather does not support texture arrays"); + __intrinsic_asm "tex2Dgather<$T0>($0, ($2).x, ($2).y$(componentArg))"; case metal: case wgsl: + static_assert(Shape.flavor == $(SLANG_TEXTURE_2D) || Shape.flavor == $(SLANG_TEXTURE_CUBE), + "Gather is supported only for 2D and 3D textures"); return __texture_gather$(compareFunc)($(getTexture) $(getSampler), location $(compareArg) $(componentArg)); case glsl: case spirv: + static_assert(Shape.flavor == $(SLANG_TEXTURE_2D) || Shape.flavor == $(SLANG_TEXTURE_CUBE), + "Gather is supported only for 2D and 3D textures"); return __texture_gather$(compareFunc)(this $(samplerArg), location $(compareArg) $(componentArg)); } } @@ -3519,24 +3527,33 @@ ${{{{ } [ForceInline] - [require(glsl_hlsl_metal_spirv_wgsl, texture_gather)] + [require(cuda_glsl_hlsl_metal_spirv_wgsl, texture_gather)] vector<T.Element,4> Gather$(compareFunc)$(componentFunc)( $(samplerParam) vector<float, Shape.dimensions+isArray> location $(compareParam), constexpr vector<int, Shape.planeDimensions> offset) { - static_assert(Shape.flavor == $(SLANG_TEXTURE_2D) || Shape.flavor == $(SLANG_TEXTURE_CUBE), - "Gather is supported only for 2D and 3D textures"); - __target_switch { - case hlsl: __intrinsic_asm ".Gather$(compareFunc)$(componentFunc)"; + case hlsl: + static_assert(Shape.flavor == $(SLANG_TEXTURE_2D) || Shape.flavor == $(SLANG_TEXTURE_CUBE), + "Gather is supported only for 2D and 3D textures"); + __intrinsic_asm ".Gather$(compareFunc)$(componentFunc)"; + case cuda: + // CUDA tex2Dgather doesn't support offset - ignore offset parameter + static_assert(Shape.flavor == $(SLANG_TEXTURE_2D), "CUDA Gather is supported only for 2D textures"); + static_assert(isArray == 0, "CUDA Gather does not support texture arrays"); + __intrinsic_asm "tex2Dgather<$T0>($0, ($2).x, ($2).y$(componentArg))"; case metal: case wgsl: + static_assert(Shape.flavor == $(SLANG_TEXTURE_2D) || Shape.flavor == $(SLANG_TEXTURE_CUBE), + "Gather is supported only for 2D and 3D textures"); return __texture_gather$(compareFunc)_offset($(getTexture) $(getSampler), location $(compareArg), offset $(componentArg)); case glsl: case spirv: + static_assert(Shape.flavor == $(SLANG_TEXTURE_2D) || Shape.flavor == $(SLANG_TEXTURE_CUBE), + "Gather is supported only for 2D and 3D textures"); return __texture_gather$(compareFunc)_offset(this $(samplerArg), location $(compareArg), offset $(componentArg)); } } @@ -5704,7 +5721,7 @@ ${{{{ case hlsl: __intrinsic_asm ".InterlockedCompareStore64"; default: let buf = __getEquivalentStructuredBuffer<T>(this); - __atomic_compare_exchange(buf[byteAddress / 4], compareValue, value); + __atomic_compare_exchange(buf[byteAddress / 8], compareValue, value); return; } } @@ -14917,7 +14934,7 @@ matrix<T,N,M> WaveMaskPrefixBitXor(WaveMask mask, matrix<T,N,M> expr) __generic<T : __BuiltinType> __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) -[require(glsl_hlsl_metal_spirv, subgroup_quad)] +[require(cuda_glsl_hlsl_metal_spirv, subgroup_quad)] T QuadReadLaneAt(T sourceValue, uint quadLaneID) { __target_switch @@ -14934,12 +14951,14 @@ T QuadReadLaneAt(T sourceValue, uint quadLaneID) OpCapability GroupNonUniformQuad; result:$$T = OpGroupNonUniformQuadBroadcast Subgroup $sourceValue $quadLaneID; }; + case cuda: + __intrinsic_asm "_waveShuffleMultiple(0xFFFFFFFF, $0, (_getLaneId() & 0xFFFFFFFC) | ($1 & 3))"; } } __generic<T : __BuiltinType, let N : int> __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) -[require(glsl_hlsl_metal_spirv, subgroup_quad)] +[require(cuda_glsl_hlsl_metal_spirv, subgroup_quad)] vector<T,N> QuadReadLaneAt(vector<T,N> sourceValue, uint quadLaneID) { __target_switch @@ -14955,6 +14974,8 @@ vector<T,N> QuadReadLaneAt(vector<T,N> sourceValue, uint quadLaneID) OpCapability GroupNonUniformQuad; result:$$vector<T,N> = OpGroupNonUniformQuadBroadcast Subgroup $sourceValue $quadLaneID; }; + case cuda: + __intrinsic_asm "_waveShuffleMultiple(0xFFFFFFFF, $0, (_getLaneId() & 0xFFFFFFFC) | ($1 & 3))"; } } __generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> QuadReadLaneAt(matrix<T,N,M> sourceValue, uint quadLaneID); @@ -14965,7 +14986,7 @@ __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) __wgsl_extension(subgroups) [ForceInline] -[require(glsl_hlsl_metal_spirv_wgsl, subgroup_quad)] +[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_quad)] T QuadReadAcrossX(T localValue) { __target_switch @@ -14982,6 +15003,8 @@ T QuadReadAcrossX(T localValue) result:$$T = OpGroupNonUniformQuadSwap Subgroup $localValue $direction; }; case wgsl: __intrinsic_asm "quadSwapX"; + case cuda: + __intrinsic_asm "_waveShuffleMultiple(0xFFFFFFFF, $0, _getLaneId() ^ 1)"; } } @@ -14990,7 +15013,7 @@ __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) __wgsl_extension(subgroups) [ForceInline] -[require(glsl_hlsl_metal_spirv_wgsl, subgroup_quad)] +[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_quad)] vector<T,N> QuadReadAcrossX(vector<T,N> localValue) { __target_switch @@ -15007,6 +15030,8 @@ vector<T,N> QuadReadAcrossX(vector<T,N> localValue) result:$$vector<T,N> = OpGroupNonUniformQuadSwap Subgroup $localValue $direction; }; case wgsl: __intrinsic_asm "quadSwapX"; + case cuda: + __intrinsic_asm "_waveShuffleMultiple(0xFFFFFFFF, $0, _getLaneId() ^ 1)"; } } __generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> QuadReadAcrossX(matrix<T,N,M> localValue); @@ -15017,7 +15042,7 @@ __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) __wgsl_extension(subgroups) [ForceInline] -[require(glsl_hlsl_metal_spirv_wgsl, subgroup_quad)] +[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_quad)] T QuadReadAcrossY(T localValue) { __target_switch @@ -15034,6 +15059,8 @@ T QuadReadAcrossY(T localValue) result:$$T = OpGroupNonUniformQuadSwap Subgroup $localValue $direction; }; case wgsl: __intrinsic_asm "quadSwapY"; + case cuda: + __intrinsic_asm "_waveShuffleMultiple(0xFFFFFFFF, $0, _getLaneId() ^ 2)"; } } __generic<T : __BuiltinType, let N : int> @@ -15041,7 +15068,7 @@ __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) __wgsl_extension(subgroups) [ForceInline] -[require(glsl_hlsl_metal_spirv_wgsl, subgroup_quad)] +[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_quad)] vector<T,N> QuadReadAcrossY(vector<T,N> localValue) { __target_switch @@ -15058,6 +15085,8 @@ vector<T,N> QuadReadAcrossY(vector<T,N> localValue) result:$$vector<T,N> = OpGroupNonUniformQuadSwap Subgroup $localValue $direction; }; case wgsl: __intrinsic_asm "quadSwapY"; + case cuda: + __intrinsic_asm "_waveShuffleMultiple(0xFFFFFFFF, $0, _getLaneId() ^ 2)"; } } __generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> QuadReadAcrossY(matrix<T,N,M> localValue); @@ -15068,7 +15097,7 @@ __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) __wgsl_extension(subgroups) [ForceInline] -[require(glsl_hlsl_metal_spirv_wgsl, subgroup_quad)] +[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_quad)] T QuadReadAcrossDiagonal(T localValue) { __target_switch @@ -15085,6 +15114,8 @@ T QuadReadAcrossDiagonal(T localValue) result:$$T = OpGroupNonUniformQuadSwap Subgroup $localValue $direction; }; case wgsl: __intrinsic_asm "quadSwapDiagonal"; + case cuda: + __intrinsic_asm "_waveShuffleMultiple(0xFFFFFFFF, $0, _getLaneId() ^ 3)"; } } __generic<T : __BuiltinType, let N : int> @@ -15092,7 +15123,7 @@ __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) __wgsl_extension(subgroups) [ForceInline] -[require(glsl_hlsl_metal_spirv_wgsl, subgroup_quad)] +[require(cuda_glsl_hlsl_metal_spirv_wgsl, subgroup_quad)] vector<T,N> QuadReadAcrossDiagonal(vector<T,N> localValue) { __target_switch @@ -15109,6 +15140,8 @@ vector<T,N> QuadReadAcrossDiagonal(vector<T,N> localValue) result:$$vector<T,N> = OpGroupNonUniformQuadSwap Subgroup $localValue $direction; }; case wgsl: __intrinsic_asm "quadSwapDiagonal"; + case cuda: + __intrinsic_asm "_waveShuffleMultiple(0xFFFFFFFF, $0, _getLaneId() ^ 3)"; } } __generic<T : __BuiltinType, let N : int, let M : int> matrix<T,N,M> QuadReadAcrossDiagonal(matrix<T,N,M> localValue); @@ -16300,7 +16333,7 @@ void shader_subgroup_preamble() __generic<T : __BuiltinType> __glsl_extension(GL_KHR_shader_subgroup_rotate) -[require(glsl_metal_spirv, subgroup_rotate)] +[require(cuda_glsl_metal_spirv, subgroup_rotate)] T WaveRotate(T value, uint delta) { shader_subgroup_preamble<T>(); @@ -16308,6 +16341,8 @@ T WaveRotate(T value, uint delta) { case glsl: __intrinsic_asm "subgroupRotate"; + case cuda: + __intrinsic_asm "__shfl_sync(0xFFFFFFFF, $0, (_getLaneId() + $1) % 32)"; case metal: __intrinsic_asm "simd_shuffle_rotate_down"; case spirv: @@ -16322,7 +16357,7 @@ T WaveRotate(T value, uint delta) __generic<T : __BuiltinType, let N : int> __glsl_extension(GL_KHR_shader_subgroup_rotate) -[require(glsl_metal_spirv, subgroup_rotate)] +[require(cuda_glsl_metal_spirv, subgroup_rotate)] vector<T, N> WaveRotate(vector<T, N> value, uint delta) { shader_subgroup_preamble<T>(); @@ -16330,6 +16365,8 @@ vector<T, N> WaveRotate(vector<T, N> value, uint delta) { case glsl: __intrinsic_asm "subgroupRotate"; + case cuda: + __intrinsic_asm "_slang_waveRotate($0, $1)"; case metal: __intrinsic_asm "simd_shuffle_rotate_down"; case spirv: @@ -16344,7 +16381,7 @@ vector<T, N> WaveRotate(vector<T, N> value, uint delta) __generic<T : __BuiltinType> __glsl_extension(GL_KHR_shader_subgroup_rotate) -[require(glsl_spirv, subgroup_rotate)] +[require(cuda_glsl_spirv, subgroup_rotate)] T WaveClusteredRotate(T value, uint delta, constexpr uint clusterSize) { shader_subgroup_preamble<T>(); @@ -16352,6 +16389,8 @@ T WaveClusteredRotate(T value, uint delta, constexpr uint clusterSize) { case glsl: __intrinsic_asm "subgroupClusteredRotate"; + case cuda: + __intrinsic_asm "_slang_waveClusteredRotate($0, $1, $2)"; case spirv: return spirv_asm { @@ -16364,7 +16403,7 @@ T WaveClusteredRotate(T value, uint delta, constexpr uint clusterSize) __generic<T : __BuiltinType, let N : int> __glsl_extension(GL_KHR_shader_subgroup_rotate) -[require(glsl_spirv, subgroup_rotate)] +[require(cuda_glsl_spirv, subgroup_rotate)] vector<T, N> WaveClusteredRotate(vector<T, N> value, uint delta, constexpr uint clusterSize) { shader_subgroup_preamble<T>(); @@ -16372,6 +16411,8 @@ vector<T, N> WaveClusteredRotate(vector<T, N> value, uint delta, constexpr uint { case glsl: __intrinsic_asm "subgroupClusteredRotate"; + case cuda: + __intrinsic_asm "_slang_waveClusteredRotate($0, $1, $2)"; case spirv: return spirv_asm { @@ -16978,7 +17019,7 @@ __glsl_extension(GL_EXT_maximal_reconvergence) __glsl_extension(GL_EXT_shader_quad_control) __spirv_version(1.3) [ForceInline] -[require(glsl_hlsl_metal_spirv, quad_control)] +[require(cuda_glsl_hlsl_metal_spirv, quad_control)] bool QuadAny(bool expr) { __requireMaximallyReconverges(); @@ -16988,6 +17029,7 @@ bool QuadAny(bool expr) case hlsl: __intrinsic_asm "QuadAny"; case glsl: __intrinsic_asm "subgroupQuadAny"; case metal: __intrinsic_asm "quad_any"; + case cuda: __intrinsic_asm "_slang_quadAny"; case spirv: return spirv_asm { @@ -17003,7 +17045,7 @@ __glsl_extension(GL_EXT_maximal_reconvergence) __glsl_extension(GL_EXT_shader_quad_control) __spirv_version(1.3) [ForceInline] -[require(glsl_hlsl_metal_spirv, quad_control)] +[require(cuda_glsl_hlsl_metal_spirv, quad_control)] bool QuadAll(bool expr) { __requireMaximallyReconverges(); @@ -17013,6 +17055,7 @@ bool QuadAll(bool expr) case hlsl: __intrinsic_asm "QuadAll"; case glsl: __intrinsic_asm "subgroupQuadAll"; case metal: __intrinsic_asm "quad_all"; + case cuda: __intrinsic_asm "_slang_quadAll"; case spirv: return spirv_asm { diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 822356312..ff9697f7d 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -2226,6 +2226,7 @@ alias subgroup_partitioned = _sm_6_5 /// [Compound] alias subgroup_rotate = GL_KHR_shader_subgroup_rotate | metal + | _cuda_sm_5_0 ; /// (All implemented targets) Capabilities required to use atomic operations of GLSL tier-1 float atomics @@ -2260,6 +2261,7 @@ alias helper_lane = _sm_6_0 + fragment alias quad_control = _sm_6_7 | GL_EXT_shader_quad_control + GL_EXT_maximal_reconvergence + GL_KHR_shader_subgroup_vote | metal + | _cuda_sm_5_0 ; /// Capabilities required to enable shader breakpoints |
