// Slang HLSL compatibility library typedef uint UINT; __intrinsic_op($(kIROp_RequireGLSLExtension)) void __requireGLSLExtension(String extensionName); [sealed] interface IBufferDataLayout { } __intrinsic_type($(kIROp_DefaultBufferLayoutType)) struct DefaultDataLayout : IBufferDataLayout {}; __intrinsic_type($(kIROp_Std140BufferLayoutType)) [require(spirv)] [require(glsl)] struct Std140DataLayout : IBufferDataLayout {}; __intrinsic_type($(kIROp_Std430BufferLayoutType)) [require(spirv)] [require(glsl)] struct Std430DataLayout : IBufferDataLayout {}; __intrinsic_type($(kIROp_ScalarBufferLayoutType)) struct ScalarDataLayout : IBufferDataLayout {}; __generic __intrinsic_type($(kIROp_GLSLShaderStorageBufferType)) __magic_type(GLSLShaderStorageBufferType) struct GLSLShaderStorageBuffer {} __generic __intrinsic_op($(kIROp_StructuredBufferGetDimensions)) [require(cpp_cuda_glsl_hlsl_metal_spirv, appendstructuredbuffer)] uint2 __structuredBufferGetDimensions(AppendStructuredBuffer buffer); __generic __intrinsic_op($(kIROp_StructuredBufferGetDimensions)) [require(cpp_cuda_glsl_hlsl_metal_spirv, consumestructuredbuffer)] uint2 __structuredBufferGetDimensions(ConsumeStructuredBuffer buffer); __intrinsic_op($(kIROp_StructuredBufferGetDimensions)) [require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer)] uint2 __structuredBufferGetDimensions(StructuredBuffer buffer); __intrinsic_op($(kIROp_StructuredBufferGetDimensions)) [require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer_rw)] uint2 __structuredBufferGetDimensions(RWStructuredBuffer buffer); __intrinsic_op($(kIROp_StructuredBufferGetDimensions)) [require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer_rw)] uint2 __structuredBufferGetDimensions(RasterizerOrderedStructuredBuffer buffer); __generic __magic_type(HLSLAppendStructuredBufferType) __intrinsic_type($(kIROp_HLSLAppendStructuredBufferType)) [require(cpp_cuda_glsl_hlsl_spirv, appendstructuredbuffer)] struct AppendStructuredBuffer { __intrinsic_op($(kIROp_StructuredBufferAppend)) void Append(T value); [ForceInline] void GetDimensions( out uint numStructs, out uint stride) { let result = __structuredBufferGetDimensions(this); numStructs = result.x; stride = result.y; } }; __magic_type(HLSLByteAddressBufferType) __intrinsic_type($(kIROp_HLSLByteAddressBufferType)) [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)] struct ByteAddressBuffer { [__readNone] [__unsafeForceInlineEarly] [require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer)] void GetDimensions(out uint dim) { __target_switch { case cpp: __intrinsic_asm ".GetDimensions"; case cuda: __intrinsic_asm ".GetDimensions"; case hlsl: __intrinsic_asm ".GetDimensions"; case glsl: case metal: case spirv: dim = __structuredBufferGetDimensions(__getEquivalentStructuredBuffer(this)).x*4; } } [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)] uint Load(int location) { __target_switch { case hlsl: __intrinsic_asm ".Load"; default: return __byteAddressBufferLoad(this, location, 0); } } [__readNone] [ForceInline] [require(hlsl, byteaddressbuffer)] uint Load(int location, out uint status) { __target_switch { case hlsl: __intrinsic_asm ".Load"; } } [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)] uint2 Load2(int location) { __target_switch { case hlsl: __intrinsic_asm ".Load2"; default: return __byteAddressBufferLoad(this, location, 0); } } [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)] uint2 Load2(int location, int alignment) { __target_switch { case hlsl: __intrinsic_asm ".Load2"; default: return __byteAddressBufferLoad(this, location, alignment); } } [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)] uint2 Load2Aligned(int location) { __target_switch { case hlsl: __intrinsic_asm ".Load2"; default: return __byteAddressBufferLoad(this, location, __naturalStrideOf()); } } [__readNone] [ForceInline] [require(hlsl, byteaddressbuffer)] uint2 Load2(int location, out uint status) { __target_switch { case hlsl: __intrinsic_asm ".Load2"; } } [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)] uint3 Load3(int location) { __target_switch { case hlsl: __intrinsic_asm ".Load3"; default: return __byteAddressBufferLoad(this, location, 0); } } [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)] uint3 Load3(int location, int alignment) { __target_switch { case hlsl: __intrinsic_asm ".Load3"; default: return __byteAddressBufferLoad(this, location, alignment); } } [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)] uint3 Load3Aligned(int location) { __target_switch { case hlsl: __intrinsic_asm ".Load3"; default: return __byteAddressBufferLoad(this, location, __naturalStrideOf()); } } [__readNone] [ForceInline] [require(hlsl, byteaddressbuffer)] uint3 Load3(int location, out uint status) { __target_switch { case hlsl: __intrinsic_asm ".Load3"; } } [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)] uint4 Load4(int location) { __target_switch { case hlsl: __intrinsic_asm ".Load4"; default: return __byteAddressBufferLoad(this, location, 0); } } [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)] uint4 Load4(int location, int alignment) { __target_switch { case hlsl: __intrinsic_asm ".Load4"; default: return __byteAddressBufferLoad(this, location, alignment); } } [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)] uint4 Load4Aligned(int location) { __target_switch { case hlsl: __intrinsic_asm ".Load4"; default: return __byteAddressBufferLoad(this, location, __naturalStrideOf()); } } [__readNone] [ForceInline] [require(hlsl, byteaddressbuffer)] uint4 Load4(int location, out uint status) { __target_switch { case hlsl: __intrinsic_asm ".Load4"; } } [__readNone] [ForceInline] T Load(int location) { return __byteAddressBufferLoad(this, location, 0); } [__readNone] [ForceInline] T Load(int location, int alignment) { return __byteAddressBufferLoad(this, location, alignment); } [__readNone] [ForceInline] T LoadAligned(int location) { return __byteAddressBufferLoad(this, location, __naturalStrideOf()); } }; // Texture [sealed] [builtin] interface __ITextureShape { static const int flavor; static const int dimensions; static const int planeDimensions; } [sealed] [builtin] interface __ITextureShape1D2D3D : __ITextureShape { } __magic_type(TextureShape1DType) __intrinsic_type($(kIROp_TextureShape1DType)) struct __Shape1D : __ITextureShape1D2D3D { static const int flavor = $(SLANG_TEXTURE_1D); static const int dimensions = 1; static const int planeDimensions = 1; } __magic_type(TextureShape2DType) __intrinsic_type($(kIROp_TextureShape2DType)) struct __Shape2D : __ITextureShape1D2D3D { static const int flavor = $(SLANG_TEXTURE_2D); static const int dimensions = 2; static const int planeDimensions = 2; } __magic_type(TextureShape3DType) __intrinsic_type($(kIROp_TextureShape3DType)) struct __Shape3D : __ITextureShape1D2D3D { static const int flavor = $(SLANG_TEXTURE_3D); static const int dimensions = 3; static const int planeDimensions = 3; } __magic_type(TextureShapeCubeType) __intrinsic_type($(kIROp_TextureShapeCubeType)) struct __ShapeCube : __ITextureShape { static const int flavor = $(SLANG_TEXTURE_CUBE); static const int dimensions = 3; static const int planeDimensions = 2; } __magic_type(TextureShapeBufferType) __intrinsic_type($(kIROp_TextureShapeBufferType)) struct __ShapeBuffer : __ITextureShape { static const int flavor = $(SLANG_TEXTURE_BUFFER); static const int dimensions = 1; static const int planeDimensions = 1; } __intrinsic_op(vectorReshape) vector __vectorReshape(vector vin); __intrinsic_op(makeVector) __generic vector __makeVector(vector vec, T scalar); __magic_type(TextureType) __intrinsic_type($(kIROp_TextureType)) struct __TextureImpl { } // Combined texture sampler specific functions [require(glsl, texture_sm_4_1)] float __glsl_texture(TSampler s, TCoord value) { __target_switch { case glsl: __intrinsic_asm "texture($0, $1)"; } } __glsl_extension(GL_EXT_texture_shadow_lod) [require(glsl, texture_shadowlod)] float __glsl_texture_1d_shadow(TSampler s, TCoord value) { __target_switch { case glsl: __intrinsic_asm "texture($0, $1)"; } } __glsl_extension(GL_EXT_texture_shadow_lod) [require(glsl, texture_shadowlod)] float __glsl_texture_3d_array_shadow(TSampler s, TCoord value, float compare) { __target_switch { case glsl: __intrinsic_asm "texture($0, $1, $2)"; } } __glsl_extension(GL_EXT_texture_shadow_lod) [require(glsl, texture_sm_4_1)] float __glsl_texture_offset( TSampler s, TCoord value, constexpr TOffset offset) { __target_switch { case glsl: __intrinsic_asm "textureOffset($0, $1, $2)"; } } __glsl_extension(GL_EXT_texture_shadow_lod) [require(glsl, texture_shadowlod)] float __glsl_texture_offset_1d_shadow(TSampler s, TCoord value, constexpr TOffset offset) { __target_switch { case glsl: __intrinsic_asm "textureOffset($0, $1, $2)"; } } __glsl_extension(GL_EXT_texture_shadow_lod) [require(glsl, texture_sm_4_1)] float __glsl_texture_level_zero(TSampler s, TCoord value) { __target_switch { case glsl: __intrinsic_asm "textureLod($0, $1, 0)"; } } __glsl_extension(GL_EXT_texture_shadow_lod) [require(glsl, texture_shadowlod)] float __glsl_texture_level_zero_1d_shadow(TSampler s, TCoord value) { __target_switch { case glsl: __intrinsic_asm "textureLod($0, $1, 0)"; } } __glsl_extension(GL_EXT_texture_shadow_lod) [require(glsl, texture_shadowlod)] float __glsl_texture_offset_level_zero(TSampler s, TCoord value, constexpr TOffset offset) { __target_switch { case glsl: __intrinsic_asm "textureLodOffset($0, $1, 0, $2)"; } } __glsl_extension(GL_EXT_texture_shadow_lod) [require(glsl, texture_shadowlod)] float __glsl_texture_offset_level_zero_1d_shadow(TSampler s, TCoord value, constexpr TOffset offset) { __target_switch { case glsl: __intrinsic_asm "textureLodOffset($0, $1, 0, $2)"; } } [require(glsl, texture_sm_4_1)] float __glsl_texture(TTexture t, SamplerComparisonState s, TCoord value) { __target_switch { case glsl: __intrinsic_asm "texture($p, $2)"; } } __glsl_extension(GL_EXT_texture_shadow_lod) [require(glsl, texture_shadowlod)] float __glsl_texture_1d_shadow(TTexture t, SamplerComparisonState s, TCoord value) { __target_switch { case glsl: __intrinsic_asm "texture($p, $2)"; } } __glsl_extension(GL_EXT_texture_shadow_lod) [require(glsl, texture_shadowlod)] float __glsl_texture_3d_array_shadow(TTexture t, SamplerComparisonState s, TCoord value, float compare) { __target_switch { case glsl: __intrinsic_asm "texture($p, $2, $3)"; } } [require(glsl, texture_sm_4_1)] float __glsl_texture_offset(TTexture t,SamplerComparisonState s, TCoord value, constexpr TOffset offset) { __target_switch { case glsl: __intrinsic_asm "textureOffset($p, $2, $3)"; } } __glsl_extension(GL_EXT_texture_shadow_lod) [require(glsl, texture_shadowlod)] float __glsl_texture_offset_1d_shadow(TTexture t,SamplerComparisonState s, TCoord value, constexpr TOffset offset) { __target_switch { case glsl: __intrinsic_asm "textureOffset($p, $2, $3)"; } } [require(glsl, texture_sm_4_1)] float __glsl_texture_level_zero(TTexture t,SamplerComparisonState s, TCoord value) { __target_switch { case glsl: __intrinsic_asm "textureLod($p, $2, 0)"; } } __glsl_extension(GL_EXT_texture_shadow_lod) [require(glsl, texture_shadowlod)] float __glsl_texture_level_zero_1d_shadow(TTexture t,SamplerComparisonState s, TCoord value) { __target_switch { case glsl: __intrinsic_asm "textureLod($p, $2, 0)"; } } __glsl_extension(GL_EXT_texture_shadow_lod) [require(glsl, texture_shadowlod)] float __glsl_texture_offset_level_zero(TTexture t,SamplerComparisonState s, TCoord value, constexpr TOffset offset) { __target_switch { case glsl: __intrinsic_asm "textureLodOffset($p, $2, 0, $3)"; } } __glsl_extension(GL_EXT_texture_shadow_lod) [require(glsl, texture_shadowlod)] float __glsl_texture_offset_level_zero_1d_shadow(TTexture t,SamplerComparisonState s, TCoord value, constexpr TOffset offset) { __target_switch { case glsl: __intrinsic_asm "textureLodOffset($p, $2, 0, $3)"; } } __generic extension __TextureImpl { static const int access = 0; typealias TextureCoord = vector; __intrinsic_op($(kIROp_CombinedTextureSamplerGetTexture)) __TextureImpl __getTexture(); __intrinsic_op($(kIROp_CombinedTextureSamplerGetSampler)) SamplerState __getSampler(); __intrinsic_op($(kIROp_CombinedTextureSamplerGetSampler)) SamplerComparisonState __getComparisonSampler(); [__readNone] [ForceInline] [require(glsl_hlsl_metal_spirv, texture_querylod)] float CalculateLevelOfDetail(TextureCoord location) { __requireComputeDerivative(); __target_switch { case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); return __getTexture().CalculateLevelOfDetail(__getSampler(), location); case metal: return __getTexture().CalculateLevelOfDetail(__getSampler(), location); case glsl: __intrinsic_asm "textureQueryLod($0, $1).x"; case spirv: return (spirv_asm { result:$$float2 = OpImageQueryLod $this $location }).x; } } [__readNone] [ForceInline] [require(glsl_hlsl_metal_spirv, texture_querylod)] float CalculateLevelOfDetailUnclamped(TextureCoord location) { __requireComputeDerivative(); __target_switch { case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); return __getTexture().CalculateLevelOfDetailUnclamped(__getSampler(), location); case metal: return __getTexture().CalculateLevelOfDetailUnclamped(__getSampler(), location); case glsl: __intrinsic_asm "textureQueryLod($0, $1).y"; case spirv: return (spirv_asm { result:$$float2 = OpImageQueryLod $this $location }).y; } } [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(vector location) { __requireComputeDerivative(); __target_switch { case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); return __getTexture().Sample(__getSampler(), location); case cpp: case metal: return __getTexture().Sample(__getSampler(), location); case glsl: __intrinsic_asm "$ctexture($0, $1)$z"; case cuda: if (isArray != 0) { switch(Shape.flavor) { case $(SLANG_TEXTURE_1D): __intrinsic_asm "tex1DLayered<$T0>($0, ($1).x, int(($1).y))"; case $(SLANG_TEXTURE_2D): __intrinsic_asm "tex2DLayered<$T0>($0, ($1).x, ($1).y, int(($1).z))"; case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "texCubemapLayered<$T0>($0, ($1).x, ($1).y, ($1).z, int(($1).w))"; default: __intrinsic_asm "invalid texture shape"; } } else { switch(Shape.flavor) { case $(SLANG_TEXTURE_1D): __intrinsic_asm "tex1D<$T0>($0, ($1))"; case $(SLANG_TEXTURE_2D): __intrinsic_asm "tex2D<$T0>($0, ($1).x, ($1).y)"; case $(SLANG_TEXTURE_3D): __intrinsic_asm "tex3D<$T0>($0, ($1).x, ($1).y, ($1).z)"; case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "texCubemap<$T0>($0, ($1).x, ($1).y, ($1).z)"; default: __intrinsic_asm "invalid texture shape"; } } case spirv: return spirv_asm { %sampled : __sampledType(T) = OpImageSampleImplicitLod $this $location None; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(vector location, constexpr vector offset) { __requireComputeDerivative(); __target_switch { case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); return __getTexture().Sample(__getSampler(), location, offset); case cpp: case metal: return __getTexture().Sample(__getSampler(), location, offset); case glsl: __intrinsic_asm "$ctextureOffsetClampARB($0, $1, $2)$z"; case spirv: return spirv_asm { OpCapability MinLod; %sampled : __sampledType(T) = OpImageSampleImplicitLod $this $location None|ConstOffset $offset; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] __glsl_extension(GL_ARB_sparse_texture_clamp) [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(vector location, vector offset, float clamp) { __requireComputeDerivative(); __target_switch { case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); return __getTexture().Sample(__getSampler(), location, offset, clamp); case cpp: case metal: return __getTexture().Sample(__getSampler(), location, offset, clamp); case glsl: __intrinsic_asm "$ctextureOffsetClampARB($0, $1, $2, $3)$z"; case spirv: return spirv_asm { OpCapability MinLod; %sampled : __sampledType(T) = OpImageSampleImplicitLod $this $location None|ConstOffset|MinLod $offset $clamp; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] T Sample(vector location, vector offset, float clamp, out uint status) { __target_switch { case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); __intrinsic_asm ".Sample"; default: status = 0; return Sample(location, offset, clamp); } } [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T SampleBias(vector location, float bias) { __requireComputeDerivative(); __target_switch { case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); return __getTexture().SampleBias(__getSampler(), location, bias); case cpp: case metal: return __getTexture().SampleBias(__getSampler(), location, bias); case glsl: __intrinsic_asm "$ctexture($0, $1, $2)$z"; case spirv: return spirv_asm { %sampled : __sampledType(T) = OpImageSampleImplicitLod $this $location None|Bias $bias; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T SampleBias(vector location, float bias, constexpr vector offset) { __requireComputeDerivative(); __target_switch { case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); return __getTexture().SampleBias(__getSampler(), location, bias, offset); case cpp: case metal: return __getTexture().SampleBias(__getSampler(), location, bias, offset); case glsl: __intrinsic_asm "$ctextureOffset($0, $1, $3, $2)$z"; case spirv: return spirv_asm { %sampled : __sampledType(T) = OpImageSampleImplicitLod $this $location None|Bias|ConstOffset $bias $offset; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] [require(glsl_hlsl_metal_spirv, texture_shadowlod)] float SampleCmp(vector location, float compareValue) { __requireComputeDerivative(); __target_switch { case glsl: if (Shape.dimensions == 1 && isArray == 0) { return __glsl_texture_1d_shadow(this, __makeVector(__makeVector(location, 0.0), compareValue)); } else if (Shape.dimensions == 3 && isArray == 1) { return __glsl_texture_3d_array_shadow(this, location, compareValue); } else { return __glsl_texture(this, __makeVector(location, compareValue)); } case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); return __getTexture().SampleCmp(__getComparisonSampler(), location, compareValue); case metal: return __getTexture().SampleCmp(__getComparisonSampler(), location, compareValue); case spirv: return spirv_asm { result:$$float = OpImageSampleDrefImplicitLod $this $location $compareValue; }; } } [__readNone] [ForceInline] [require(glsl_hlsl_metal_spirv, texture_shadowlod)] float SampleCmpLevelZero(vector location, float compareValue) { __target_switch { case glsl: if (Shape.dimensions == 1 && isArray == 0) { return __glsl_texture_level_zero_1d_shadow(this, __makeVector(__makeVector(location, 0.0), compareValue)); } else { return __glsl_texture_level_zero(this, __makeVector(location, compareValue)); } case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); return __getTexture().SampleCmpLevelZero(__getComparisonSampler(), location, compareValue); case metal: return __getTexture().SampleCmpLevelZero(__getComparisonSampler(), location, compareValue); case spirv: const float zeroFloat = 0.0f; return spirv_asm { result:$$float = OpImageSampleDrefExplicitLod $this $location $compareValue Lod $zeroFloat; }; } } [__readNone] [ForceInline] [require(glsl_hlsl_metal_spirv, texture_shadowlod)] float SampleCmp(vector location, float compareValue, constexpr vector offset) { __requireComputeDerivative(); __target_switch { case glsl: if (Shape.dimensions == 1 && isArray == 0) { return __glsl_texture_offset_1d_shadow(this, __makeVector(__makeVector(location, 0.0), compareValue), offset); } else { return __glsl_texture_offset(this, __makeVector(location, compareValue), offset); } case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); return __getTexture().SampleCmp(__getComparisonSampler(), location, compareValue, offset); case metal: return __getTexture().SampleCmp(__getComparisonSampler(), location, compareValue, offset); case spirv: return spirv_asm { result:$$float = OpImageSampleDrefImplicitLod $this $location $compareValue ConstOffset $offset; }; } } [__readNone] [ForceInline] [require(glsl_hlsl_metal_spirv, texture_shadowlod)] float SampleCmpLevelZero(vector location, float compareValue, constexpr vector offset) { __target_switch { case glsl: if (Shape.dimensions == 1 && isArray == 0) { return __glsl_texture_offset_level_zero_1d_shadow(this, __makeVector(__makeVector(location,0.0), compareValue), offset); } else { return __glsl_texture_offset_level_zero(this, __makeVector(location, compareValue), offset); } case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); return __getTexture().SampleCmpLevelZero(__getComparisonSampler(), location, compareValue, offset); case metal: return __getTexture().SampleCmpLevelZero(__getComparisonSampler(), location, compareValue, offset); case spirv: const float zeroFloat = 0.0f; return spirv_asm { result:$$float = OpImageSampleDrefExplicitLod $this $location $compareValue Lod|ConstOffset $zeroFloat $offset; }; } } [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(vector location, vector gradX, vector gradY) { __target_switch { case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); return __getTexture().SampleGrad(__getSampler(), location, gradX, gradY); case cpp: case metal: return __getTexture().SampleGrad(__getSampler(), location, gradX, gradY); case glsl: __intrinsic_asm "$ctextureGrad($0, $1, $2, $3)$z"; case spirv: return spirv_asm { %sampled : __sampledType(T) = OpImageSampleExplicitLod $this $location None|Grad $gradX $gradY; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(vector location, vector gradX, vector gradY, constexpr vector offset) { __target_switch { case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); return __getTexture().SampleGrad(__getSampler(), location, gradX, gradY, offset); case cpp: case metal: return __getTexture().SampleGrad(__getSampler(), location, gradX, gradY, offset); case glsl: __intrinsic_asm "$ctextureGradOffset($0, $1, $2, $3, $4)$z"; case spirv: return spirv_asm { %sampled : __sampledType(T) = OpImageSampleExplicitLod $this $location None|Grad|ConstOffset $gradX $gradY $offset; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] __glsl_extension(GL_ARB_sparse_texture_clamp) [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(vector location, vector gradX, vector gradY, constexpr vector offset, float lodClamp) { __target_switch { case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); return __getTexture().SampleGrad(__getSampler(), location, gradX, gradY, offset, lodClamp); case cpp: case metal: return __getTexture().SampleGrad(__getSampler(), location, gradX, gradY, offset, lodClamp); case glsl: __intrinsic_asm "$ctextureGradOffsetClampARB($0, $1, $2, $3, $4, $5)$z"; case spirv: return spirv_asm { OpCapability MinLod; %sampled : __sampledType(T) = OpImageSampleExplicitLod $this $location None|Grad|ConstOffset|MinLod $gradX $gradY $offset $lodClamp; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleLevel(vector location, float level) { __target_switch { case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); return __getTexture().SampleLevel(__getSampler(), location, level); case cpp: case metal: return __getTexture().SampleLevel(__getSampler(), location, level); case glsl: __intrinsic_asm "$ctextureLod($0, $1, $2)$z"; case cuda: if (isArray != 0) { switch(Shape.flavor) { case $(SLANG_TEXTURE_1D): __intrinsic_asm "tex1DLayeredLod<$T0>($0, ($1).x, int(($1).y), ($2))"; case $(SLANG_TEXTURE_2D): __intrinsic_asm "tex2DLayeredLod<$T0>($0, ($1).x, ($1).y, int(($1).z), ($2))"; case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "texCubemapLayeredLod<$T0>($0, ($1).x, ($1).y, ($1).z, int(($1).w), ($2))"; default: __intrinsic_asm ""; } } else { switch(Shape.flavor) { case $(SLANG_TEXTURE_1D): __intrinsic_asm "tex1DLod<$T0>($0, ($1), ($2))"; case $(SLANG_TEXTURE_2D): __intrinsic_asm "tex2DLod<$T0>($0, ($1).x, ($1).y, ($2))"; case $(SLANG_TEXTURE_3D): __intrinsic_asm "tex3DLod<$T0>($0, ($1).x, ($1).y, ($1).z, ($2))"; case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "texCubemapLod<$T0>($0, ($1).x, ($1).y, ($1).z, ($2))"; default: __intrinsic_asm ""; } } case spirv: return spirv_asm { %sampled : __sampledType(T) = OpImageSampleExplicitLod $this $location None|Lod $level; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleLevel(vector location, float level, constexpr vector offset) { __target_switch { case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); return __getTexture().SampleLevel(__getSampler(), location, level, offset); case cpp: case metal: return __getTexture().SampleLevel(__getSampler(), location, level, offset); case glsl: __intrinsic_asm "$ctextureLodOffset($0, $1, $2, $3)$z"; case spirv: return spirv_asm { %sampled : __sampledType(T) = OpImageSampleExplicitLod $this $location None|Lod|ConstOffset $level $offset; __truncate $$T result __sampledType(T) %sampled; }; } } } // Non-combined texture types specific functions __generic extension __TextureImpl { typealias TextureCoord = vector; [__readNone] [ForceInline] [require(glsl_hlsl_metal_spirv, texture_querylod)] float CalculateLevelOfDetail(SamplerState s, TextureCoord location) { __requireComputeDerivative(); __target_switch { case hlsl: __intrinsic_asm ".CalculateLevelOfDetail"; case metal: __intrinsic_asm ".calculate_clamped_lod"; case glsl: __intrinsic_asm "textureQueryLod($p, $2).x"; case spirv: return (spirv_asm { %sampledImage : __sampledImageType(this) = OpSampledImage $this $s; result:$$float2 = OpImageQueryLod %sampledImage $location; }).x; } } [__readNone] [ForceInline] [require(glsl_hlsl_metal_spirv, texture_querylod)] float CalculateLevelOfDetailUnclamped(SamplerState s, TextureCoord location) { __requireComputeDerivative(); __target_switch { case hlsl: __intrinsic_asm ".CalculateLevelOfDetailUnclamped"; case metal: __intrinsic_asm ".calculate_unclamped_lod"; case glsl: __intrinsic_asm "textureQueryLod($p, $2).y"; case spirv: return (spirv_asm { %sampledImage : __sampledImageType(this) = OpSampledImage $this $s; result:$$float2 = OpImageQueryLod %sampledImage $location; }).y; } } } __generic extension __TextureImpl { [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(SamplerState s, vector location) { __requireComputeDerivative(); __target_switch { case cpp: __intrinsic_asm ".Sample"; case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); __intrinsic_asm ".Sample"; case metal: if (isArray == 1) { switch (Shape.flavor) { case $(SLANG_TEXTURE_1D): __intrinsic_asm "$c$0.sample($1, ($2).x, uint(($2).y))$z"; case $(SLANG_TEXTURE_2D): __intrinsic_asm "$c$0.sample($1, ($2).xy, uint(($2).z))$z"; case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "$c$0.sample($1, ($2).xyz, uint(($2).w))$z"; } } else { switch (Shape.flavor) { case $(SLANG_TEXTURE_1D): case $(SLANG_TEXTURE_2D): case $(SLANG_TEXTURE_3D): case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "$c$0.sample($1, $2)$z"; } } // TODO: This needs to be handled by the capability system __intrinsic_asm ""; case glsl: __intrinsic_asm "$ctexture($p, $2)$z"; case cuda: if (isArray != 0) { switch(Shape.flavor) { case $(SLANG_TEXTURE_1D): __intrinsic_asm "tex1DLayered<$T0>($0, ($2).x, int(($2).y))"; case $(SLANG_TEXTURE_2D): __intrinsic_asm "tex2DLayered<$T0>($0, ($2).x, ($2).y, int(($2).z))"; case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "texCubemapLayered<$T0>($0, ($2).x, ($2).y, ($2).z, int(($2).w))"; default: __intrinsic_asm ""; } } else { switch(Shape.flavor) { case $(SLANG_TEXTURE_1D): __intrinsic_asm "tex1D<$T0>($0, ($2))"; case $(SLANG_TEXTURE_2D): __intrinsic_asm "tex2D<$T0>($0, ($2).x, ($2).y)"; case $(SLANG_TEXTURE_3D): __intrinsic_asm "tex3D<$T0>($0, ($2).x, ($2).y, ($2).z)"; case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "texCubemap<$T0>($0, ($2).x, ($2).y, ($2).z)"; default: __intrinsic_asm ""; } } case spirv: return spirv_asm { %sampledImage : __sampledImageType(this) = OpSampledImage $this $s; %sampled : __sampledType(T) = OpImageSampleImplicitLod %sampledImage $location None; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(SamplerState s, vector location, constexpr vector offset) { __requireComputeDerivative(); __target_switch { case cpp: __intrinsic_asm ".Sample"; case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); __intrinsic_asm ".Sample"; case metal: if (isArray == 1) { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): __intrinsic_asm "$c$0.sample($1, ($2).xy, uint(($2).z), $3)$z"; } } else { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): case $(SLANG_TEXTURE_3D): __intrinsic_asm "$c$0.sample($1, $2, $3)$z"; } } // TODO: This needs to be handled by the capability system __intrinsic_asm ""; case glsl: __intrinsic_asm "$ctextureOffset($p, $2, $3)$z"; case spirv: return spirv_asm { %sampledImage : __sampledImageType(this) = OpSampledImage $this $s; %sampled : __sampledType(T) = OpImageSampleImplicitLod %sampledImage $location None|ConstOffset $offset; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] __glsl_extension(GL_ARB_sparse_texture_clamp) [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T Sample(SamplerState s, vector location, constexpr vector offset, float clamp) { __requireComputeDerivative(); __target_switch { case cpp: __intrinsic_asm ".Sample"; case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); __intrinsic_asm ".Sample"; case metal: if (isArray == 1) { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): __intrinsic_asm "$c$0.sample($1, ($2).xy, uint(($2).z), min_lod_clamp($4), $3)$z"; } } else { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): case $(SLANG_TEXTURE_3D): __intrinsic_asm "$c$0.sample($1, $2, min_lod_clamp($4), $3)$z"; } } // TODO: This needs to be handled by the capability system __intrinsic_asm ""; case glsl: __intrinsic_asm "$ctextureOffsetClampARB($p, $2, $3, $4)$z"; case spirv: return spirv_asm { OpCapability MinLod; %sampledImage : __sampledImageType(this) = OpSampledImage $this $s; %sampled : __sampledType(T) = OpImageSampleImplicitLod %sampledImage $location None|ConstOffset|MinLod $offset $clamp; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] T Sample(SamplerState s, vector location, constexpr vector offset, float clamp, out uint status) { __target_switch { case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); __intrinsic_asm ".Sample"; default: status = 0; return Sample(s, location, offset, clamp); } } [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T SampleBias(SamplerState s, vector location, float bias) { __requireComputeDerivative(); __target_switch { case cpp: __intrinsic_asm ".SampleBias"; case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); __intrinsic_asm ".SampleBias"; case metal: if (isArray == 1) { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): __intrinsic_asm "$c$0.sample($1, ($2).xy, uint(($2).z), bias($3))$z"; case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "$c$0.sample($1, ($2).xyz, uint(($2).w), bias($3))$z"; } } else { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): case $(SLANG_TEXTURE_3D): case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "$c$0.sample($1, $2, bias($3))$z"; } } // TODO: This needs to be handled by the capability system __intrinsic_asm ""; case glsl: __intrinsic_asm "$ctexture($p, $2, $3)$z"; case spirv: return spirv_asm { %sampledImage : __sampledImageType(this) = OpSampledImage $this $s; %sampled : __sampledType(T) = OpImageSampleImplicitLod %sampledImage $location None|Bias $bias; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0_fragment)] T SampleBias(SamplerState s, vector location, float bias, constexpr vector offset) { __requireComputeDerivative(); __target_switch { case cpp: __intrinsic_asm ".SampleBias"; case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); __intrinsic_asm ".SampleBias"; case metal: if (isArray == 1) { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): __intrinsic_asm "$c$0.sample($1, ($2).xy, uint(($2).z), bias($3), $4)$z"; } } else { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): case $(SLANG_TEXTURE_3D): __intrinsic_asm "$c$0.sample($1, $2, bias($3), $4)$z"; } } // TODO: This needs to be handled by the capability system __intrinsic_asm ""; case glsl: __intrinsic_asm "$ctextureOffset($p, $2, $4, $3)$z"; case spirv: return spirv_asm { %sampledImage : __sampledImageType(this) = OpSampledImage $this $s; %sampled : __sampledType(T) = OpImageSampleImplicitLod %sampledImage $location None|Bias|ConstOffset $bias $offset; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] [require(glsl_hlsl_metal_spirv, texture_shadowlod)] float SampleCmp(SamplerComparisonState s, vector location, float compareValue) { __requireComputeDerivative(); __target_switch { case glsl: if (Shape.dimensions == 1 && isArray == 0) { return __glsl_texture_1d_shadow(this, s, __makeVector(__makeVector(location, 0.0), compareValue)); } else if (Shape.dimensions == 3 && isArray == 1) { return __glsl_texture_3d_array_shadow(this, s, location, compareValue); } else { return __glsl_texture(this, s, __makeVector(location,compareValue)); } case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); __intrinsic_asm ".SampleCmp"; case metal: if (isArray == 1) { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): __intrinsic_asm "$0.sample_compare($1, ($2).xy, uint(($2).z), $3)"; case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "$0.sample_compare($1, ($2).xyz, uint(($2).w), $3)"; } } else { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): case $(SLANG_TEXTURE_CUBE): __intrinsic_asm ".sample_compare"; } } __intrinsic_asm ""; case spirv: return spirv_asm { %sampledImage : __sampledImageType(this) = OpSampledImage $this $s; result:$$float = OpImageSampleDrefImplicitLod %sampledImage $location $compareValue; }; } } [__readNone] [ForceInline] [require(glsl_hlsl_metal_spirv, texture_shadowlod)] float SampleCmpLevelZero(SamplerComparisonState s, vector location, float compareValue) { __target_switch { case glsl: if (Shape.dimensions == 1 && isArray == 0) { return __glsl_texture_level_zero_1d_shadow(this, s, __makeVector(__makeVector(location, 0.0), compareValue)); } else { return __glsl_texture_level_zero(this, s, __makeVector(location,compareValue)); } case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); __intrinsic_asm ".SampleCmpLevelZero"; case metal: if (isArray == 1) { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): __intrinsic_asm "$0.sample_compare($1, ($2).xy, uint(($2).z), $3, level(0))"; case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "$0.sample_compare($1, ($2).xyz, uint(($2).w), $3, level(0))"; } } else { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "$0.sample_compare($1, $2, $3, level(0))"; } } __intrinsic_asm ""; case spirv: const float zeroFloat = 0.0f; return spirv_asm { %sampledImage : __sampledImageType(this) = OpSampledImage $this $s; result:$$float = OpImageSampleDrefExplicitLod %sampledImage $location $compareValue Lod $zeroFloat; }; } } [__readNone] [ForceInline] [require(glsl_hlsl_metal_spirv, texture_shadowlod)] float SampleCmp(SamplerComparisonState s, vector location, float compareValue, constexpr vector offset) { __requireComputeDerivative(); __target_switch { case glsl: if (Shape.dimensions == 1 && isArray == 0) { return __glsl_texture_offset_1d_shadow(this, s, __makeVector(__makeVector(location, 0.0), compareValue), offset); } else { return __glsl_texture_offset(this, s, __makeVector(location,compareValue), offset); } case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); __intrinsic_asm ".SampleCmp"; case metal: if (isArray == 1) { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): __intrinsic_asm "$0.sample_compare($1, ($2).xy, uint(($2).z), $3, $4)"; } } else { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): __intrinsic_asm ".sample_compare"; } } __intrinsic_asm ""; case spirv: return spirv_asm { %sampledImage : __sampledImageType(this) = OpSampledImage $this $s; result:$$float = OpImageSampleDrefImplicitLod %sampledImage $location $compareValue ConstOffset $offset; }; } } [__readNone] [ForceInline] [require(glsl_hlsl_metal_spirv, texture_shadowlod)] float SampleCmpLevelZero(SamplerComparisonState s, vector location, float compareValue, constexpr vector offset) { __target_switch { case glsl: if (Shape.dimensions == 1 && isArray == 0) { return __glsl_texture_offset_level_zero_1d_shadow(this, s, __makeVector(__makeVector(location,0.0),compareValue), offset); } else { return __glsl_texture_offset_level_zero(this, s, __makeVector(location,compareValue), offset); } case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); __intrinsic_asm ".SampleCmpLevelZero"; case metal: if (isShadow == 1) { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): if (isArray == 1) { // T sample_compare(sampler s, float2 coord, uint array, float compare_value, lod_options options, int2 offset = int2(0)) const __intrinsic_asm "$0.sample_compare($1, ($2).xy, uint(($2).z), $3, level(0), $4)"; } else { // T sample_compare(sampler s, float2 coord, float compare_value, lod_options options, int2 offset = int2(0)) const __intrinsic_asm "$0.sample_compare($1, $2, $3, level(0), $4)"; } break; } } __intrinsic_asm ""; case spirv: const float zeroFloat = 0.0f; return spirv_asm { %sampledImage : __sampledImageType(this) = OpSampledImage $this $s; result:$$float = OpImageSampleDrefExplicitLod %sampledImage $location $compareValue Lod|ConstOffset $zeroFloat $offset; }; } } [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(SamplerState s, vector location, vector gradX, vector gradY) { __target_switch { case cpp: __intrinsic_asm ".SampleGrad"; case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); __intrinsic_asm ".SampleGrad"; case metal: if (isArray == 1) { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): __intrinsic_asm "$c$0.sample($1, ($2).xy, uint(($2).z), gradient2d($3, $4))$z"; case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "$c$0.sample($1, ($2).xyz, uint(($2).w), gradientcube($3, $4))$z"; } } else { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): __intrinsic_asm "$c$0.sample($1, $2, gradient2d($3, $4))$z"; case $(SLANG_TEXTURE_3D): __intrinsic_asm "$c$0.sample($1, $2, gradient3d($3, $4))$z"; case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "$c$0.sample($1, $2, gradientcube($3, $4))$z"; } } // TODO: This needs to be handled by the capability system __intrinsic_asm ""; case glsl: __intrinsic_asm "$ctextureGrad($p, $2, $3, $4)$z"; case spirv: return spirv_asm { %sampledImage : __sampledImageType(this) = OpSampledImage $this $s; %sampled : __sampledType(T) = OpImageSampleExplicitLod %sampledImage $location None|Grad $gradX $gradY; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(SamplerState s, vector location, vector gradX, vector gradY, constexpr vector offset) { __target_switch { case cpp: __intrinsic_asm ".SampleGrad"; case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); __intrinsic_asm ".SampleGrad"; case metal: if (isArray == 1) { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): __intrinsic_asm "$c$0.sample($1, ($2).xy, uint(($2).z), gradient2d($3, $4), $5)$z"; } } else { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): __intrinsic_asm "$c$0.sample($1, $2, gradient2d($3, $4), $5)$z"; case $(SLANG_TEXTURE_3D): __intrinsic_asm "$c$0.sample($1, $2, gradient3d($3, $4), $5)$z"; } } // TODO: This needs to be handled by the capability system __intrinsic_asm ""; case glsl: __intrinsic_asm "$ctextureGradOffset($p, $2, $3, $4, $5)$z"; case spirv: return spirv_asm { %sampledImage : __sampledImageType(this) = OpSampledImage $this $s; %sampled : __sampledType(T) = OpImageSampleExplicitLod %sampledImage $location None|Grad|ConstOffset $gradX $gradY $offset; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] __glsl_extension(GL_ARB_sparse_texture_clamp) [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleGrad(SamplerState s, vector location, vector gradX, vector gradY, constexpr vector offset, float lodClamp) { __target_switch { case cpp: __intrinsic_asm ".SampleGrad"; case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); __intrinsic_asm ".SampleGrad"; case metal: if (isArray == 1) { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): __intrinsic_asm "$c$0.sample($1, ($2).xy, uint(($2).z), gradient2d($3, $4), min_lod_clamp($6), $5)$z"; } } else { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): __intrinsic_asm "$c$0.sample($1, $2, gradient2d($3, $4), min_lod_clamp($6), $5)$z"; case $(SLANG_TEXTURE_3D): __intrinsic_asm "$c$0.sample($1, $2, gradient3d($3, $4), min_lod_clamp($6), $5)$z"; } } // TODO: This needs to be handled by the capability system __intrinsic_asm ""; case glsl: __intrinsic_asm "$ctextureGradOffsetClampARB($p, $2, $3, $4, $5, $6)$z"; case spirv: return spirv_asm { OpCapability MinLod; %sampledImage : __sampledImageType(this) = OpSampledImage $this $s; %sampled : __sampledType(T) = OpImageSampleExplicitLod %sampledImage $location None|Grad|ConstOffset|MinLod $gradX $gradY $offset $lodClamp; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleLevel(SamplerState s, vector location, float level) { __target_switch { case cpp: __intrinsic_asm ".SampleLevel"; case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); __intrinsic_asm ".SampleLevel"; case metal: if (isArray == 1) { switch (Shape.flavor) { case $(SLANG_TEXTURE_1D): __intrinsic_asm "$c$0.sample($1, ($2).x, uint(($2).y))$z"; case $(SLANG_TEXTURE_2D): __intrinsic_asm "$c$0.sample($1, ($2).xy, uint(($2).z), level($3))$z"; case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "$c$0.sample($1, ($2).xyz, uint(($2).w), level($3))$z"; } } else { switch (Shape.flavor) { case $(SLANG_TEXTURE_1D): __intrinsic_asm "$c$0.sample($1, $2)$z"; case $(SLANG_TEXTURE_2D): case $(SLANG_TEXTURE_3D): case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "$c$0.sample($1, $2, level($3))$z"; } } // TODO: This needs to be handled by the capability system __intrinsic_asm ""; case glsl: __intrinsic_asm "$ctextureLod($p, $2, $3)$z"; case cuda: if (isArray != 0) { switch(Shape.flavor) { case $(SLANG_TEXTURE_1D): __intrinsic_asm "tex1DLayeredLod<$T0>($0, ($2).x, int(($2).y), ($3))"; case $(SLANG_TEXTURE_2D): __intrinsic_asm "tex2DLayeredLod<$T0>($0, ($2).x, ($2).y, int(($2).z), ($3))"; case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "texCubemapLayeredLod<$T0>($0, ($2).x, ($2).y, ($2).z, int(($2).w), ($3))"; default: __intrinsic_asm ""; } } else { switch(Shape.flavor) { case $(SLANG_TEXTURE_1D): __intrinsic_asm "tex1DLod<$T0>($0, ($2), ($3))"; case $(SLANG_TEXTURE_2D): __intrinsic_asm "tex2DLod<$T0>($0, ($2).x, ($2).y, ($3))"; case $(SLANG_TEXTURE_3D): __intrinsic_asm "tex3DLod<$T0>($0, ($2).x, ($2).y, ($2).z, ($3))"; case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "texCubemapLod<$T0>($0, ($2).x, ($2).y, ($2).z, ($3))"; default: __intrinsic_asm ""; } } case spirv: return spirv_asm { %sampledImage : __sampledImageType(this) = OpSampledImage $this $s; %sampled : __sampledType(T) = OpImageSampleExplicitLod %sampledImage $location None|Lod $level; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_0)] T SampleLevel(SamplerState s, vector location, float level, constexpr vector offset) { __target_switch { case cpp: __intrinsic_asm ".SampleLevel"; case hlsl: static_assert(T is float || T is vector || T is vector || T is vector || T is half || T is vector || T is vector || T is vector , "HLSL supports only float and half type textures"); __intrinsic_asm ".SampleLevel"; case metal: if (isArray == 1) { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): __intrinsic_asm "$c$0.sample($1, ($2).xy, uint(($2).z), level($3), $4)$z"; case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "$c$0.sample($1, ($2).xyz, uint(($2).w), level($3), $4)$z"; } } else { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): case $(SLANG_TEXTURE_3D): case $(SLANG_TEXTURE_CUBE): __intrinsic_asm "$c$0.sample($1, $2, level($3), $4)$z"; } } __intrinsic_asm ""; case glsl: __intrinsic_asm "$ctextureLodOffset($p, $2, $3, $4)$z"; case spirv: return spirv_asm { %sampledImage : __sampledImageType(this) = OpSampledImage $this $s; %sampled : __sampledType(T) = OpImageSampleExplicitLod %sampledImage $location None|Lod|ConstOffset $level $offset; __truncate $$T result __sampledType(T) %sampled; }; } } } // Texture.GetDimensions and Sampler.GetDimensions ${{{{ const char* kTextureShapeTypeNames[] = { "__Shape1D", "__Shape2D", "__Shape3D", "__ShapeCube"}; for (int shapeIndex = 0; shapeIndex < 4; shapeIndex++) for (int isArray = 0; isArray <= 1; isArray++) for (int isMS = 0; isMS <= 1; isMS++) { if (isMS) { if (shapeIndex != kStdlibShapeIndex2D) continue; } if (isArray) { if (shapeIndex == kStdlibShapeIndex3D) continue; } auto shapeTypeName = kTextureShapeTypeNames[shapeIndex]; TextureTypeInfo textureTypeInfo(kBaseTextureShapes[shapeIndex], isArray, isMS, 0, sb, path); }}}} __generic extension __TextureImpl { ${{{{ textureTypeInfo.writeGetDimensionFunctions(); }}}} } ${{{{ } }}}} // Texture.GetSamplePosition(int s); __generic extension __TextureImpl { [require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_vertex_fragment_geometry)] float2 GetSamplePosition(int s); } __intrinsic_op($(kIROp_MakeArray)) Array __makeArray(T v0, T v1, T v2, T v3); // Gather for scalar textures. __generic [ForceInline] [require(glsl_metal_spirv, texture_gather)] vector __texture_gather(__TextureImpl texture, SamplerState s, vector location, int component) { __target_switch { case glsl: __intrinsic_asm "textureGather($p, $2, $3)"; case metal: if (isShadow == 0) { switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): if (isArray == 1) { // Tv gather(sampler s, float2 coord, uint array, int2 offset = int2(0), component c = component::x) const __intrinsic_asm "$0.gather($1, ($2).xy, uint(($2).z), int2(0), metal::component($3))"; } else { // Tv gather(sampler s, float2 coord, int2 offset = int2(0), component c = component::x) const __intrinsic_asm "$0.gather($1, $2, int2(0), metal::component($3))"; } break; case $(SLANG_TEXTURE_CUBE): if (isArray == 1) { // Tv gather(sampler s, float3 coord, uint array, component c = component::x) const __intrinsic_asm "$0.gather($1, ($2).xyz, uint(($2).w), metal::component($3))"; } else { // Tv gather(sampler s, float3 coord, component c = component::x) const __intrinsic_asm "$0.gather($1, $2, metal::component($3))"; } break; } } // TODO: This needs to be handled by the capability system __intrinsic_asm ""; case spirv: return spirv_asm { %sampledImage : __sampledImageType(texture) = OpSampledImage $texture $s; result:$$vector = OpImageGather %sampledImage $location $component; }; } } __generic [ForceInline] [require(glsl_spirv, texture_gather)] vector __texture_gather(__TextureImpl sampler, vector location, int component) { __target_switch { case glsl: __intrinsic_asm "textureGather($0, $1, $2)"; case spirv: return spirv_asm { result:$$vector = OpImageGather $sampler $location $component; }; } } __generic [ForceInline] [require(glsl_metal_spirv, texture_gather)] vector __texture_gather_offset(__TextureImpl texture, SamplerState s, constexpr vector location, constexpr vector offset, int component) { __target_switch { case glsl: __intrinsic_asm "textureGatherOffset($p, $2, $3, $4)"; case metal: if (Shape.flavor == $(SLANG_TEXTURE_2D)) { if (isShadow == 0) { if (isArray == 1) { // Tv gather(sampler s, float2 coord, uint array, int2 offset = int2(0), component c = component::x) const __intrinsic_asm "$0.gather($1, ($2).xy, uint(($2).z), $3, metal::component($4))"; } else { // Tv gather(sampler s, float2 coord, int2 offset = int2(0), component c = component::x) const __intrinsic_asm "$0.gather($1, $2, $3, metal::component($4))"; } } } // TODO: This needs to be handled by the capability system __intrinsic_asm ""; case spirv: return spirv_asm { %sampledImage : __sampledImageType(texture) = OpSampledImage $texture $s; result:$$vector = OpImageGather %sampledImage $location $component ConstOffset $offset; }; } } __generic [ForceInline] [require(glsl_spirv, texture_gather)] vector __texture_gather_offset(__TextureImpl sampler, vector location, constexpr vector offset, int component) { __target_switch { case glsl: __intrinsic_asm "textureGatherOffset($0, $1, $2, $3)"; case spirv: return spirv_asm { result:$$vector = OpImageGather $sampler $location $component ConstOffset $offset; }; } } __generic [ForceInline] [require(glsl_spirv, texture_gather)] vector __texture_gather_offsets(__TextureImpl texture, SamplerState s, vector location, constexpr vector offset1, constexpr vector offset2, constexpr vector offset3, constexpr vector offset4, int component) { __target_switch { case glsl: __intrinsic_asm "textureGatherOffsets($p, $2, $T3[]($3, $4, $5, $6)), $7"; case spirv: let offsets = __makeArray(offset1,offset2,offset3,offset4); return spirv_asm { OpCapability ImageGatherExtended; %sampledImage : __sampledImageType(texture) = OpSampledImage $texture $s; result:$$vector = OpImageGather %sampledImage $location $component ConstOffsets $offsets; }; } } __generic [ForceInline] [require(glsl_spirv, texture_gather)] vector __texture_gather_offsets(__TextureImpl sampler, vector location, constexpr vector offset1, constexpr vector offset2, constexpr vector offset3, constexpr vector offset4, int component) { __target_switch { case glsl: __intrinsic_asm "textureGatherOffsets($0, $1, $T2[]($2, $3, $4, $5), $6)"; case spirv: let offsets = __makeArray(offset1,offset2,offset3,offset4); return spirv_asm { OpCapability ImageGatherExtended; result:$$vector = OpImageGather $sampler $location $component ConstOffsets $offsets; }; } } __generic [ForceInline] [require(glsl_metal_spirv, texture_gather)] vector __texture_gatherCmp(__TextureImpl texture, SamplerComparisonState s, vector location, TElement compareValue) { __target_switch { case glsl: __intrinsic_asm "textureGather($p, $2, $3)"; case metal: if (isShadow == 1) { if (Shape.flavor == $(SLANG_TEXTURE_2D)) { if (isArray == 1) { // Tv gather_compare(sampler s, float2 coord, uint array, float compare_value, int2 offset = int2(0)) const __intrinsic_asm "$0.gather_compare($1, ($2).xy, uint(($2).z), $3)"; } else { // Tv gather_compare(sampler s, float2 coord, float compare_value, int2 offset = int2(0)) const __intrinsic_asm "$0.gather_compare($1, $2, $3)"; } } else if (Shape.flavor == $(SLANG_TEXTURE_CUBE)) { if (isArray == 1) { // Tv gather_compare(sampler s, float3 coord, uint array, float compare_value) const __intrinsic_asm "$0.gather_compare($1, ($2).xyz, uint(($2).w), $3)"; } else { // Tv gather_compare(sampler s, float3 coord, float compare_value) const __intrinsic_asm "$0.gather_compare($1, $2, $3)"; } } } // TODO: This needs to be handled by the capability system __intrinsic_asm ""; case spirv: return spirv_asm { %sampledImage : __sampledImageType(texture) = OpSampledImage $texture $s; result:$$vector = OpImageDrefGather %sampledImage $location $compareValue; }; } } __generic [ForceInline] [require(glsl_spirv, texture_gather)] vector __texture_gatherCmp(__TextureImpl sampler, vector location, TElement compareValue) { __target_switch { case glsl: __intrinsic_asm "textureGather($0, $1, $2)"; case spirv: return spirv_asm { result:$$vector = OpImageDrefGather $sampler $location $compareValue; }; } } __generic [ForceInline] [require(glsl_metal_spirv, texture_gather)] vector __texture_gatherCmp_offset(__TextureImpl texture, SamplerComparisonState s, vector location, TElement compareValue, constexpr vector offset) { __target_switch { case glsl: __intrinsic_asm "textureGatherOffset($p, $2, $3, $4)"; case metal: if (isShadow == 1) { if (Shape.flavor == $(SLANG_TEXTURE_2D)) { if (isArray == 1) { // Tv gather_compare(sampler s, float2 coord, uint array, float compare_value, int2 offset = int2(0)) const __intrinsic_asm "$0.gather_compare($1, ($2).xy, uint(($2).z), $3, $4)"; } else { // Tv gather_compare(sampler s, float2 coord, float compare_value, int2 offset = int2(0)) const __intrinsic_asm "$0.gather_compare($1, $2, $3, $4)"; } } } // TODO: This needs to be handled by the capability system __intrinsic_asm ""; case spirv: return spirv_asm { %sampledImage : __sampledImageType(texture) = OpSampledImage $texture $s; result:$$vector = OpImageDrefGather %sampledImage $location $compareValue ConstOffset $offset; }; } } __generic [ForceInline] [require(glsl_spirv, texture_gather)] vector __texture_gatherCmp_offset(__TextureImpl sampler, vector location, TElement compareValue, constexpr vector offset) { __target_switch { case glsl: __intrinsic_asm "textureGatherOffset($0, $1, $2, $3)"; case spirv: return spirv_asm { result:$$vector = OpImageDrefGather $sampler $location $compareValue ConstOffset $offset; }; } } __generic [ForceInline] [require(glsl_spirv, texture_gather)] vector __texture_gatherCmp_offsets(__TextureImpl texture, SamplerComparisonState s, vector location, TElement compareValue, vector offset1, vector offset2, vector offset3, vector offset4) { __target_switch { case glsl: __intrinsic_asm "textureGatherOffsets($p, $2, $3, $T4[]($4, $5, $6, $7))"; case spirv: let offsets = __makeArray(offset1,offset2,offset3,offset4); return spirv_asm { OpCapability ImageGatherExtended; %sampledImage : __sampledImageType(texture) = OpSampledImage $texture $s; result:$$vector = OpImageDrefGather %sampledImage $location $compareValue ConstOffsets $offsets; }; } } __generic [ForceInline] [require(glsl_spirv, texture_gather)] vector __texture_gatherCmp_offsets(__TextureImpl sampler, vector location, TElement compareValue, vector offset1, vector offset2, vector offset3, vector offset4) { __target_switch { case glsl: __intrinsic_asm "textureGatherOffsets($0, $1, $2, $T3[]($3, $4, $5, $6))"; case spirv: let offsets = __makeArray(offset1,offset2,offset3,offset4); return spirv_asm { OpCapability ImageGatherExtended; result:$$vector = OpImageDrefGather $sampler $location $compareValue ConstOffsets $offsets; }; } } ${{{{ for (int isCombined = 0; isCombined < 2; isCombined++) for (int isScalarTexture = 0; isScalarTexture < 2; isScalarTexture++) { if (isScalarTexture == 0) { sb << "__generic\n"; sb << "extension __TextureImpl\n"; } else { sb << "__generic\n"; sb << "extension __TextureImpl,Shape,isArray,0,sampleCount,0,isShadow," << isCombined << ",format>\n"; } }}}} { // begin extension for gather ${{{{ // Gather component const char* samplerStateParam = isCombined ? "" : " s,"; const char* getTexture = isCombined ? "__getTexture()" : "this"; for (int isCmp = 0; isCmp < 2; ++isCmp) { const char* cmp = isCmp ? "Cmp" : ""; const char* cmpParam = isCmp ? ", T compareValue" : ""; const char* compareArg = isCmp ? ", compareValue" : ""; const char* samplerStateType = isCombined ? "" : (isCmp ? "SamplerComparisonState" : "SamplerState"); const char* getSampler = isCombined ? (isCmp ? " __getComparisonSampler()," : " __getSampler(),") : samplerStateParam; const char* componentNames[] = { "", "Red", "Green", "Blue", "Alpha"}; const char* glslComponentNames[] = { ", 0", ", 1", ", 2", ", 3" }; for (auto componentId = 0; componentId < 5; componentId++) { auto componentName = componentNames[componentId]; auto glslComponent = (isCmp ? "" :glslComponentNames[componentId == 0 ? 0 : componentId - 1]); for (bool isStatus : { false, true }) { const char* statusDecl = isStatus ? ", out uint status" : ""; const char* statusInit = isStatus ? " status = 0;\n" : ""; const char* statusCapWithMetal = isStatus ? "hlsl" : "glsl_hlsl_metal_spirv"; const char* statusCapWithoutMetal = isStatus ? "hlsl" : "glsl_hlsl_spirv"; }}}} [ForceInline] [require($(statusCapWithMetal), texture_gather)] vector Gather$(cmp)$(componentName)($(samplerStateType)$(samplerStateParam) vector location $(cmpParam) $(statusDecl)) { $(statusInit) __target_switch { case hlsl: __intrinsic_asm ".Gather$(cmp)$(componentName)"; ${{{{ if (!isStatus) { }}}} case metal: return __texture_gather$(cmp)($(getTexture),$(getSampler) location $(compareArg) $(glslComponent)); case glsl: case spirv: return __texture_gather$(cmp)(this,$(samplerStateParam) location $(compareArg) $(glslComponent)); ${{{{ } // if(!isStatus) }}}} } } [ForceInline] [require($(statusCapWithMetal), texture_gather)] vector Gather$(cmp)$(componentName)($(samplerStateType)$(samplerStateParam) vector location $(cmpParam), constexpr vector offset $(statusDecl)) { $(statusInit) __target_switch { case hlsl: __intrinsic_asm ".Gather$(cmp)$(componentName)"; ${{{{ if (!isStatus) { }}}} case metal: return __texture_gather$(cmp)_offset($(getTexture),$(getSampler) location $(compareArg), offset $(glslComponent)); case glsl: case spirv: return __texture_gather$(cmp)_offset(this,$(samplerStateParam) location $(compareArg), offset $(glslComponent)); ${{{{ } // if(!isStatus) }}}} } } [ForceInline] [require($(statusCapWithoutMetal), texture_gather)] vector Gather$(cmp)$(componentName)($(samplerStateType)$(samplerStateParam) vector location $(cmpParam), constexpr vector offset1, constexpr vector offset2, constexpr vector offset3, constexpr vector offset4 $(statusDecl)) { $(statusInit) __target_switch { case hlsl: __intrinsic_asm ".Gather$(cmp)$(componentName)"; ${{{{ if (!isStatus) { }}}} case glsl: case spirv: return __texture_gather$(cmp)_offsets(this,$(samplerStateParam) location $(compareArg), offset1,offset2,offset3,offset4 $(glslComponent)); ${{{{ } // if(!isStatus) }}}} } } ${{{{ } // for (isStatus) } // for (componentId) } // for (isCmp) }}}} } // end extension for gather ${{{{ } // for (isScalarTexture) }}}} // Load/Subscript for readonly, no MS textures __generic extension __TextureImpl { static const int isMS = 0; static const int access = $(kStdlibResourceAccessReadOnly); __glsl_extension(GL_EXT_samplerless_texture_functions) [__readNone] [require(glsl, texture_sm_4_1_samplerless)] T __glsl_load(vector location) { __intrinsic_asm "$ctexelFetch($0, ($1), 0)$z"; } __glsl_extension(GL_EXT_samplerless_texture_functions) [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_samplerless)] T Load(vector location) { __target_switch { case cpp: case hlsl: __intrinsic_asm ".Load"; case metal: switch (Shape.flavor) { case $(SLANG_TEXTURE_1D): // lod is not supported for 1D texture if (isArray == 1) // Tv read(uint coord, uint array, uint lod = 0) const __intrinsic_asm "$c$0.read(uint(($1).x), uint(($1).y))$z"; else // Tv read(uint coord, uint lod = 0) const __intrinsic_asm "$c$0.read(uint(($1).x))$z"; break; case $(SLANG_TEXTURE_2D): if (isShadow == 1) { if (isArray == 1) // T read(uint2 coord, uint array, uint lod = 0) const __intrinsic_asm "$c$0.read(vec(($1).xy), uint(($1).z), uint(($1).w))$z"; else // T read(uint2 coord, uint lod = 0) const __intrinsic_asm "$c$0.read(vec(($1).xy), uint(($1).z))$z"; } else { if (isArray == 1) // Tv read(uint2 coord, uint array, uint lod = 0) const __intrinsic_asm "$c$0.read(vec(($1).xy), uint(($1).z), uint(($1).w))$z"; else // Tv read(uint2 coord, uint lod = 0) const __intrinsic_asm "$c$0.read(vec(($1).xy), uint(($1).z))$z"; } break; case $(SLANG_TEXTURE_3D): if (isShadow == 0 && isArray == 0) // Tv read(uint3 coord, uint lod = 0) const __intrinsic_asm "$c$0.read(vec(($1).xyz), uint(($1).w))$z"; break; case $(SLANG_TEXTURE_CUBE): static_assert(isArray == 0, "Unsupported 'Load' of 'texture cube array' for 'metal' target"); if (isShadow == 1) { if (isArray == 1) // T read(uint2 coord, uint face, uint array, uint lod = 0) const __intrinsic_asm ""; else // T read(uint2 coord, uint face, uint lod = 0) const __intrinsic_asm "$c$0.read(vec(($1).xy), uint(($1).z), uint(($1).w))$z"; } else { if (isArray == 1) // Tv read(uint2 coord, uint face, uint array, uint lod = 0) const __intrinsic_asm ""; else // Tv read(uint2 coord, uint face, uint lod = 0) const __intrinsic_asm "$c$0.read(vec(($1).xy), uint(($1).z), uint(($1).w))$z"; } break; } static_assert(false, "Unsupported 'Load' of 'texture' for 'metal' target"); __intrinsic_asm ""; case glsl: __intrinsic_asm "$ctexelFetch($0, ($1).$w1b, ($1).$w1e)$z"; case spirv: const int lodLoc = Shape.dimensions+isArray; let coord = __vectorReshape(location); let lod = location[lodLoc]; if (isCombined != 0) { return spirv_asm { %image:__imageType(this) = OpImage $this; %sampled:__sampledType(T) = OpImageFetch %image $coord Lod $lod; __truncate $$T result __sampledType(T) %sampled; }; } else { return spirv_asm { %sampled:__sampledType(T) = OpImageFetch $this $coord Lod $lod; __truncate $$T result __sampledType(T) %sampled; }; } } } __glsl_extension(GL_EXT_samplerless_texture_functions) [__readNone] [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_samplerless)] T Load(vector location, constexpr vector offset) { __target_switch { case cpp: case hlsl: __intrinsic_asm ".Load"; case glsl: __intrinsic_asm "$ctexelFetchOffset($0, ($1).$w1b, ($1).$w1e, ($2))$z"; case spirv: const int lodLoc = Shape.dimensions+isArray; let coord = __vectorReshape(location); let lod = location[lodLoc]; if (isCombined != 0) { return spirv_asm { %image:__imageType(this) = OpImage $this; %sampled:__sampledType(T) = OpImageFetch %image $coord Lod|ConstOffset $lod $offset; __truncate $$T result __sampledType(T) %sampled; }; } else { return spirv_asm { %sampled:__sampledType(T) = OpImageFetch $this $coord Lod|ConstOffset $lod $offset; __truncate $$T result __sampledType(T) %sampled; }; } } } [__readNone] [ForceInline] [require(hlsl, texture_sm_4_1_samplerless)] T Load(vector location, constexpr vector offset, out uint status) { __target_switch { case hlsl: __intrinsic_asm ".Load"; default: status = 0; return Load(location, offset); } } __subscript(vector location) -> T { __glsl_extension(GL_EXT_samplerless_texture_functions) [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_samplerless)] get { __target_switch { case cpp: case hlsl: __intrinsic_asm ".operator[]"; case metal: return Load(__makeVector(location, 0)); case glsl: return __glsl_load(location); case spirv: if (isCombined != 0) { return spirv_asm { %image:__imageType(this) = OpImage $this; %sampled:__sampledType(T) = OpImageFetch %image $location; __truncate $$T result __sampledType(T) %sampled; }; } else { return spirv_asm { %sampled:__sampledType(T) = OpImageFetch $this $location; __truncate $$T result __sampledType(T) %sampled; }; } } } } } // Texture Load/Subscript for readonly, MS textures __generic extension __TextureImpl { static const int access = $(kStdlibResourceAccessReadOnly); static const int isMS = 1; __glsl_extension(GL_EXT_samplerless_texture_functions) [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_samplerless)] T Load(vector location, int sampleIndex) { __target_switch { case cpp: case hlsl: __intrinsic_asm ".Load"; case metal: switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): if (isShadow == 1) { if (isArray == 1) // Document seems to have a typo. `lod` must be `sample`. // Tv read(uint2 coord, uint array, uint lod = 0) const __intrinsic_asm "$c$0.read(($1).xy, ($1).z, uint($2))$z"; else // T read(uint2 coord, uint sample) const __intrinsic_asm "$c$0.read($1, uint($2))$z"; } else { if (isArray == 1) // Document seems to have a typo. `lod` must be `sample`. // Tv read(uint2 coord, uint array, uint lod = 0) const __intrinsic_asm "$c$0.read(($1).xy, ($1).z, uint($2))$z"; else // Tv read(uint2 coord, uint sample) const __intrinsic_asm "$c$0.read($1, uint($2))$z"; } break; } // TODO: This needs to be handled by the capability system __intrinsic_asm ""; case glsl: __intrinsic_asm "$ctexelFetch($0, $1, ($2))$z"; case spirv: if (isCombined != 0) { return spirv_asm { %image:__imageType(this) = OpImage $this; %sampled:__sampledType(T) = OpImageFetch %image $location Sample $sampleIndex; __truncate $$T result __sampledType(T) %sampled; }; } else { return spirv_asm { %sampled:__sampledType(T) = OpImageFetch $this $location Sample $sampleIndex; __truncate $$T result __sampledType(T) %sampled; }; } } } [__readNone] [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_samplerless)] T Load(vector locationAndSampleIndex) { return Load(__vectorReshape(locationAndSampleIndex), locationAndSampleIndex[Shape.dimensions + isArray]); } __glsl_extension(GL_EXT_samplerless_texture_functions) [__readNone] [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_samplerless)] T Load(vector location, int sampleIndex, constexpr vector offset) { __target_switch { case cpp: case hlsl: __intrinsic_asm ".Load"; case glsl: __intrinsic_asm "$ctexelFetchOffset($0, $1, ($2), ($3))$z"; case spirv: if (isCombined != 0) { return spirv_asm { %image:__imageType(this) = OpImage $this; %sampled:__sampledType(T) = OpImageFetch %image $location ConstOffset|Sample $offset $sampleIndex; __truncate $$T result __sampledType(T) %sampled; }; } else { return spirv_asm { %sampled:__sampledType(T) = OpImageFetch $this $location ConstOffset|Sample $offset $sampleIndex; __truncate $$T result __sampledType(T) %sampled; }; } } } [__readNone] [ForceInline] [require(hlsl, texture_sm_4_1_samplerless)] T Load(vector location, int sampleIndex, constexpr vector offset, out uint status) { __target_switch { case hlsl: __intrinsic_asm ".Load"; default: status = 0; return Load(location, sampleIndex, offset); } } __subscript(vector location) -> T { __glsl_extension(GL_EXT_samplerless_texture_functions) [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_samplerless)] get { __target_switch { case cpp: case hlsl: __intrinsic_asm "($0).sample[$1]"; case metal: case glsl: case spirv: return Load(location, 0); } } } __subscript(vector location, int sampleIndex) -> T { __glsl_extension(GL_EXT_samplerless_texture_functions) [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_samplerless)] get { __target_switch { case cpp: case hlsl: __intrinsic_asm "($0).sample[$2][$1]"; case metal: case glsl: case spirv: return Load(location, sampleIndex); } } } } // Load/Subscript for readwrite textures ${{{{ for (int access = kStdlibResourceAccessReadWrite; access<=kStdlibResourceAccessRasterizerOrdered; access++) { const char* glslIntrinsic = "$cimageLoad($0, $1)$z"; const char* glslIntrinsicOffset = "$cimageLoad($0, ($1)+($2))$z"; const char* glslIntrinsicMS = "$cimageLoad($0, $1, $2)$z"; const char* glslIntrinsicMSOffset = "$cimageLoad($0, ($1)+($3), $2)$z"; }}}} __generic extension __TextureImpl { [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_1)] T Load(vector location) { __target_switch { case cpp: case hlsl: __intrinsic_asm ".Load"; case glsl: __intrinsic_asm "$(glslIntrinsic)"; case cuda: if (isArray != 0) { switch(Shape.flavor) { case $(SLANG_TEXTURE_1D): __intrinsic_asm "surf1DLayeredread$C<$T0>($0, ($1).x * $E, ($1).y, SLANG_CUDA_BOUNDARY_MODE)"; case $(SLANG_TEXTURE_2D): __intrinsic_asm "surf2DLayeredread$C<$T0>($0, ($1).x * $E, ($1).y, ($1).z, SLANG_CUDA_BOUNDARY_MODE)"; case $(SLANG_TEXTURE_3D): __intrinsic_asm "surf3DLayeredread$C<$T0>($0, ($1).x * $E, ($1).y, ($1).z, ($1).w, SLANG_CUDA_BOUNDARY_MODE)"; default: __intrinsic_asm ""; } } else { switch(Shape.flavor) { case $(SLANG_TEXTURE_1D): __intrinsic_asm "surf1Dread$C<$T0>($0, ($1) * $E, SLANG_CUDA_BOUNDARY_MODE)"; case $(SLANG_TEXTURE_2D): __intrinsic_asm "surf2Dread$C<$T0>($0, ($1).x * $E, ($1).y, SLANG_CUDA_BOUNDARY_MODE)"; case $(SLANG_TEXTURE_3D): __intrinsic_asm "surf3Dread$C<$T0>($0, ($1).x * $E, ($1).y, ($1).z, SLANG_CUDA_BOUNDARY_MODE)"; default: __intrinsic_asm ""; } } case spirv: return spirv_asm { %sampled:__sampledType(T) = OpImageRead $this $location; __truncate $$T result __sampledType(T) %sampled; }; case metal: switch (Shape.flavor) { case $(SLANG_TEXTURE_1D): // lod is not supported for 1D texture if (isArray == 1) // Tv read(uint coord, uint array, uint lod = 0) const __intrinsic_asm "$c$0.read(uint(($1).x), uint(($1).y))$z"; else // Tv read(uint coord, uint lod = 0) const __intrinsic_asm "$c$0.read(uint($1))$z"; break; case $(SLANG_TEXTURE_2D): if (isShadow == 1) { if (isArray == 1) // T read(uint2 coord, uint array, uint lod = 0) const __intrinsic_asm "$c$0.read(vec(($1).xy), uint(($1).z))$z"; else // T read(uint2 coord, uint lod = 0) const __intrinsic_asm "$c$0.read(vec(($1).xy))$z"; } else { if (isArray == 1) // Tv read(uint2 coord, uint array, uint lod = 0) const __intrinsic_asm "$c$0.read(vec(($1).xy), uint(($1).z))$z"; else // Tv read(uint2 coord, uint lod = 0) const __intrinsic_asm "$c$0.read(vec(($1).xy))$z"; } break; case $(SLANG_TEXTURE_3D): if (isShadow == 0 && isArray == 0) // Tv read(uint3 coord, uint lod = 0) const __intrinsic_asm "$c$0.read(vec(($1).xyz))$z"; break; case $(SLANG_TEXTURE_CUBE): static_assert(isArray == 0, "Unsupported 'Load' of 'texture cube array' for 'metal' target"); if (isShadow == 1) { if (isArray == 1) // T read(uint2 coord, uint face, uint array, uint lod = 0) const __intrinsic_asm "$0.read(vec(($1).xy), uint(($1).z)%6, uint(($1).z)/6, uint(($1).w))"; else // T read(uint2 coord, uint face, uint lod = 0) const __intrinsic_asm "$c$0.read(vec(($1).xy), uint(($1).z), uint(($1).w))$z"; } else { if (isArray == 1) // Tv read(uint2 coord, uint face, uint array, uint lod = 0) const __intrinsic_asm "$0.read(vec(($1).xy), uint(($1).z)%6, uint(($1).z)/6, uint(($1).w))"; else // Tv read(uint2 coord, uint face, uint lod = 0) const __intrinsic_asm "$c$0.read(vec(($1).xy), uint(($1).z), uint(($1).w))$z"; } break; } static_assert(false, "Unsupported 'Load' of 'texture' for 'metal' target"); __intrinsic_asm ""; } } [__readNone] [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T Load(vector location, vector offset) { __target_switch { case cpp: case hlsl: __intrinsic_asm ".Load"; case glsl: __intrinsic_asm "$(glslIntrinsicOffset)"; case spirv: return spirv_asm { %sampled:__sampledType(T) = OpImageRead $this $location ConstOffset $offset; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] T Load(vector location, vector offset, out uint status) { __target_switch { case hlsl: case cpp: __intrinsic_asm ".Load"; default: status = 0; return Load(location, offset); } } [require(glsl, texture_sm_4_1)] void __glslImageStore(vector location, T value) { __intrinsic_asm "imageStore($0, $1, $V2)"; } __subscript(vector location) -> T { [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_1)] get { __target_switch { case cpp: case hlsl: __intrinsic_asm ".operator[]"; case glsl: case spirv: case cuda: case metal: return Load(location); } } [nonmutating] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_1)] set(T newValue) { __target_switch { case cpp: case hlsl: __intrinsic_asm ".operator[]"; case glsl: __glslImageStore(location, newValue); case cuda: if (isArray != 0) { switch(Shape.flavor) { case $(SLANG_TEXTURE_1D): __intrinsic_asm "surf1DLayeredwrite$C<$T0>($2, $0, ($1).x * $E, ($1).y, SLANG_CUDA_BOUNDARY_MODE)"; case $(SLANG_TEXTURE_2D): __intrinsic_asm "surf2DLayeredwrite$C<$T0>($2, $0, ($1).x * $E, ($1).y, ($1).z, SLANG_CUDA_BOUNDARY_MODE)"; case $(SLANG_TEXTURE_3D): __intrinsic_asm "surf3DLayeredwrite$C<$T0>($2, $0, ($1).x * $E, ($1).y, ($1).z, ($1).w, SLANG_CUDA_BOUNDARY_MODE)"; default: __intrinsic_asm ""; } } else { switch(Shape.flavor) { case $(SLANG_TEXTURE_1D): __intrinsic_asm "surf1Dwrite$C<$T0>($2, $0, ($1) * $E, SLANG_CUDA_BOUNDARY_MODE)"; case $(SLANG_TEXTURE_2D): __intrinsic_asm "surf2Dwrite$C<$T0>($2, $0, ($1).x * $E, ($1).y, SLANG_CUDA_BOUNDARY_MODE)"; case $(SLANG_TEXTURE_3D): __intrinsic_asm "surf3Dwrite$C<$T0>($2, $0, ($1).x * $E, ($1).y, ($1).z, SLANG_CUDA_BOUNDARY_MODE)"; default: __intrinsic_asm ""; } } case spirv: return spirv_asm { OpImageWrite $this $location __convertTexel(newValue); }; case metal: switch (Shape.flavor) { case $(SLANG_TEXTURE_1D): // lod is not supported for 1D texture if (isArray == 1) // void write(Tv color, uint coord, uint array, uint lod = 0) const __intrinsic_asm "$0.write($2, uint(($1).x), uint(($1).y))"; else // void write(Tv color, uint coord, uint lod = 0) const __intrinsic_asm "$0.write($2, uint($1))"; break; case $(SLANG_TEXTURE_2D): if (isShadow == 1) { if (isArray == 1) // void write(Tv color, uint2 coord, uint array, uint lod = 0) const __intrinsic_asm "$0.write($2, vec(($1).xy), uint(($1).z))"; else // void write(Tv color, uint2 coord, uint lod = 0) const __intrinsic_asm "$0.write($2, vec(($1).xy))"; } else { if (isArray == 1) // void write(Tv color, uint2 coord, uint array, uint lod = 0) const __intrinsic_asm "$0.write($2, vec(($1).xy), uint(($1).z))"; else // void write(Tv color, uint2 coord, uint lod = 0) const __intrinsic_asm "$0.write($2, vec(($1).xy))"; } break; case $(SLANG_TEXTURE_3D): if (isShadow == 0 && isArray == 0) // void write(Tv color, uint3 coord, uint lod = 0) const __intrinsic_asm "$0.write($2, vec(($1).xyz))"; break; case $(SLANG_TEXTURE_CUBE): static_assert(isArray == 0, "Unsupported 'Store' of 'texture cube array' for 'metal' target"); if (isShadow == 1) { // void write(Tv color, uint2 coord, uint face, uint lod = 0) const __intrinsic_asm "$0.write($2, vec(($1).xy), uint(($1).z), uint(($1).w))"; } else { // void write(Tv color, uint2 coord, uint face, uint lod = 0) const __intrinsic_asm "$0.write($2, vec(($1).xy), uint(($1).z), uint(($1).w))"; } break; } } } __intrinsic_op($(kIROp_ImageSubscript)) ref; } } ${{{{ if (access == kStdlibResourceAccessReadWrite) { }}}} // RW MS textures. __generic extension __TextureImpl { [__readNone] [ForceInline] [require(cpp_glsl_hlsl_metal_spirv, texture_sm_4_1_compute_fragment)] T Load(vector location, int sampleIndex) { __target_switch { case cpp: case hlsl: __intrinsic_asm ".Load"; case metal: switch (Shape.flavor) { case $(SLANG_TEXTURE_2D): if (isShadow == 1) { if (isArray == 1) // The document seems to have a typo. `lod` must mean `sample`. // Tv read(uint2 coord, uint array, uint lod = 0) const __intrinsic_asm "$c$0.read(vec(($1).xy), uint(($1).z), $2)$z"; else // T read(uint2 coord, uint sample) const __intrinsic_asm "$c$0.read(vec(($1).xy), $2)$z"; } else { if (isArray == 1) // The document seems to have a typo. `lod` must mean `sample`. // Tv read(uint2 coord, uint array, uint lod = 0) const __intrinsic_asm "$c$0.read(vec(($1).xy), uint(($1).z), $2)$z"; else // Tv read(uint2 coord, uint sample) const __intrinsic_asm "$c$0.read(vec(($1).xy), $2)$z"; } break; } // TODO: This needs to be handled by the capability system __intrinsic_asm ""; case glsl: __intrinsic_asm "$(glslIntrinsicMS)"; case spirv: return spirv_asm { %sampled:__sampledType(T) = OpImageRead $this $location Sample $sampleIndex; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_compute_fragment)] T Load(vector location, int sampleIndex, vector offset) { __target_switch { case cpp: case hlsl: __intrinsic_asm ".Load"; case glsl: __intrinsic_asm "$(glslIntrinsicMSOffset)"; case spirv: return spirv_asm { %sampled:__sampledType(T) = OpImageRead $this $location ConstOffset|Sample $offset $sampleIndex; __truncate $$T result __sampledType(T) %sampled; }; } } [__readNone] [ForceInline] T Load(vector location, int sampleIndex, vector offset, out uint status) { __target_switch { case hlsl: case cpp: __intrinsic_asm ".Load"; default: status = 0; return Load(location, sampleIndex, offset); } } [require(glsl, texture_sm_4_1_compute_fragment)] void __glslImageStore(vector location, int sampleIndex, T value) { __intrinsic_asm "imageStore($0, $1, $2, $V3)"; } __subscript(vector location, int sampleIndex) -> T { [__readNone] [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_compute_fragment)] get { __target_switch { case cpp: case hlsl: __intrinsic_asm "$0.sample[$2][$1]"; case glsl: case spirv: return Load(location, sampleIndex); } } [nonmutating] [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_compute_fragment)] set(T newValue) { __target_switch { case cpp: case hlsl: __intrinsic_asm "$0.sample[$2][$1]"; case glsl: __glslImageStore(location, sampleIndex, newValue); case spirv: return spirv_asm { OpImageWrite $this $location __convertTexel(newValue) Sample $sampleIndex; }; } } __intrinsic_op($(kIROp_ImageSubscript)) ref; } } ${{{{ } // if (access == kStdlibResourceAccessReadWrite) // for RW MS textures. } // for (access). }}}} // Definitions to support the legacy texture .mips[][] operator. struct __TextureMip { __TextureImpl tex; int mip; __subscript(vector pos)->T { [__unsafeForceInlineEarly] get { return tex.Load(__makeVector(pos, mip)); } } } struct __TextureMips { __TextureImpl tex; __subscript(int mip)->__TextureMip { [__unsafeForceInlineEarly] get { return { tex, mip }; } } } __generic extension __TextureImpl { property __TextureMips mips { [__unsafeForceInlineEarly] get { return { this }; } } } // Definitions to support the .sample[][] operator. struct __TextureSample { __TextureImpl tex; int sample; __subscript(vector pos)->T { [__unsafeForceInlineEarly] get { return tex[pos, sample]; } } } struct __TextureSampleMS { __TextureImpl tex; __subscript(int sample)->__TextureSample { [__unsafeForceInlineEarly] get { return { tex, sample }; } } } __generic extension __TextureImpl { property __TextureSampleMS sample { [__unsafeForceInlineEarly] get { return { this }; } } } // Texture type aliases. // T, Shape: __ITextureShape, let isArray:int, let isMS:int, let sampleCount:int, let access:int, let isShadow:int, let isCombined:int, let format:int ${{{{ const char* shapeTypeNames[] = {"1D", "2D", "3D", "Cube"}; const char* accessPrefix[] = {"", "RW", "RasterizerOrdered", "Feedback"}; const char* arrayPostFix[] = {"", "Array"}; const char* msPostFix[] = {"", "MS"}; for (int shape = 0; shape < 4; shape++) for (int isArray = 0; isArray<=1; isArray++) for (int isMS = 0; isMS<=1; isMS++) for (int isCombined = 0; isCombined<=1; isCombined++) for (int access = kStdlibResourceAccessReadOnly; access<=kStdlibResourceAccessFeedback; access++) { if (access != kStdlibResourceAccessReadOnly) { // No RW Cube. if (shape == kStdlibShapeIndexCube) continue; } if (access == kStdlibResourceAccessFeedback) { // Feedback only defined for Texture2D and Texture2DArray. if (shape != 1) continue; if (isMS) continue; if (isCombined) continue; } if (isMS) { // Only Texture2DMS. if (shape != kStdlibShapeIndex2D) continue; // Only Texture2DMS or RWTexture2DMS. if (access >= kStdlibShapeIndex3D) continue; } // No 3D Array. if (shape == kStdlibShapeIndex3D && isArray == 1) continue; const char* textureTypeName = isCombined ? "Sampler" : "Texture"; }}}} typealias $(accessPrefix[access])$(textureTypeName)$(shapeTypeNames[shape])$(msPostFix[isMS])$(arrayPostFix[isArray]) = __TextureImpl; ${{{{ } }}}} // AtomicAdd // Make the GLSL atomicAdd available. // We have separate int/float implementations, as the float version requires some specific extensions // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_shader_atomic_float.txt __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_float) [ForceInline] [require(glsl_spirv, atomic_glsl_float1)] float __atomicAdd(__ref float value, float amount) { __target_switch { case glsl: __intrinsic_asm "atomicAdd($0, $1)"; case spirv: return spirv_asm { OpExtension "SPV_EXT_shader_atomic_float_add"; OpCapability AtomicFloat32AddEXT; result:$$float = OpAtomicFAddEXT &value Device None $amount }; } } __glsl_version(430) __glsl_extension(GL_NV_shader_atomic_fp16_vector) [ForceInline] [require(glsl_spirv, atomic_glsl_halfvec)] half2 __atomicAdd(__ref half2 value, half2 amount) { __target_switch { case glsl: __intrinsic_asm "atomicAdd($0, $1)"; case spirv: return spirv_asm { OpExtension "SPV_EXT_shader_atomic_float_add"; OpCapability AtomicFloat32AddEXT; result:$$half2 = OpAtomicFAddEXT &value Device None $amount }; } } // Helper for hlsl, using NVAPI [__requiresNVAPI] [require(hlsl, atomic_hlsl_nvapi)] uint2 __atomicAdd(RWByteAddressBuffer buf, uint offset, uint2) { __target_switch { case hlsl: __intrinsic_asm "NvInterlockedAddUint64($0, $1, $2)"; } } // atomic add for hlsl using SM6.6 [require(hlsl, atomic_hlsl_sm_6_6)] void __atomicAdd(RWByteAddressBuffer buf, uint offset, int64_t value, out int64_t originalValue) { __target_switch { case hlsl: __intrinsic_asm "$0.InterlockedAdd64($1, $2, $3)"; } } [require(hlsl, atomic_hlsl_sm_6_6)] void __atomicAdd(RWByteAddressBuffer buf, uint offset, uint64_t value, out uint64_t originalValue) { __target_switch { case hlsl: __intrinsic_asm "$0.InterlockedAdd64($1, $2, $3)"; } } // Int versions require glsl 4.30 // https://www.khronos.org/registry/OpenGL-Refpages/gl4/html/atomicAdd.xhtml __glsl_version(430) [ForceInline] [require(glsl_spirv, atomic_glsl)] int __atomicAdd(__ref int value, int amount) { __target_switch { case glsl: __intrinsic_asm "atomicAdd($0, $1)"; case spirv: return spirv_asm { result:$$int = OpAtomicIAdd &value Device None $amount; }; } } __glsl_version(430) [ForceInline] [require(glsl_spirv, atomic_glsl)] uint __atomicAdd(__ref uint value, uint amount) { __target_switch { case glsl: __intrinsic_asm "atomicAdd($0, $1)"; case spirv: return spirv_asm { result:$$uint = OpAtomicIAdd &value Device None $amount; }; } } __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) [ForceInline] [require(glsl_spirv, atomic_glsl_int64)] int64_t __atomicAdd(__ref int64_t value, int64_t amount) { __target_switch { case glsl: __intrinsic_asm "atomicAdd($0, $1)"; case spirv: return spirv_asm { OpCapability Int64Atomics; result:$$int64_t = OpAtomicIAdd &value Device None $amount }; } } __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) [ForceInline] [require(glsl_spirv, atomic_glsl_int64)] uint64_t __atomicAdd(__ref uint64_t value, uint64_t amount) { __target_switch { case glsl: __intrinsic_asm "atomicAdd($0, $1)"; case spirv: return spirv_asm { OpCapability Int64Atomics; result:$$uint64_t = OpAtomicIAdd &value Device None $amount }; } } // Cas - Compare and swap // Helper for HLSL, using NVAPI [__requiresNVAPI] [require(hlsl, atomic_hlsl_nvapi)] uint2 __cas(RWByteAddressBuffer buf, uint offset, uint2 compareValue, uint2 value) { __target_switch { case hlsl: __intrinsic_asm "NvInterlockedCompareExchangeUint64($0, $1, $2, $3)"; } } // CAS using SM6.6 [require(hlsl, atomic_hlsl_sm_6_6)] void __cas(RWByteAddressBuffer buf, uint offset, in int64_t compare_value, in int64_t value, out int64_t original_value) { __target_switch { case hlsl: __intrinsic_asm "$0.InterlockedCompareExchange64($1, $2, $3, $4)"; } } [require(hlsl, atomic_hlsl_sm_6_6)] void __cas(RWByteAddressBuffer buf, uint offset, in uint64_t compare_value, in uint64_t value, out uint64_t original_value) { __target_switch { case hlsl: __intrinsic_asm "$0.InterlockedCompareExchange64($1, $2, $3, $4)"; } } __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) [ForceInline] [require(glsl_spirv, atomic_glsl_int64)] int64_t __cas(__ref int64_t ioValue, int64_t compareValue, int64_t newValue) { __target_switch { case glsl: __intrinsic_asm "atomicCompSwap($0, $1, $2)"; case spirv: return spirv_asm { OpCapability Int64Atomics; result:$$int64_t = OpAtomicCompareExchange &ioValue Device None None $newValue $compareValue }; } } __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) [ForceInline] [require(glsl_spirv, atomic_glsl_int64)] uint64_t __cas(__ref uint64_t ioValue, uint64_t compareValue, uint64_t newValue) { __target_switch { case glsl: __intrinsic_asm "atomicCompSwap($0, $1, $2)"; case spirv: return spirv_asm { OpCapability Int64Atomics; result:$$uint64_t = OpAtomicCompareExchange &ioValue Device None None $newValue $compareValue }; } } // Max [__requiresNVAPI] [require(hlsl, atomic_hlsl_nvapi)] uint2 __atomicMax(RWByteAddressBuffer buf, uint offset, uint2 value) { __target_switch { case hlsl: __intrinsic_asm "NvInterlockedMaxUint64($0, $1, $2)"; } } __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) [ForceInline] [require(glsl_spirv, atomic_glsl_int64)] uint64_t __atomicMax(__ref uint64_t ioValue, uint64_t value) { __target_switch { case glsl: __intrinsic_asm "atomicMax($0, $1)"; case spirv: return spirv_asm { OpCapability Int64Atomics; result:$$uint64_t = OpAtomicUMax &ioValue Device None $value }; } } __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_float2) [ForceInline] [require(glsl_spirv, atomic_glsl_float2)] float __atomicMax(__ref float ioValue, float value) { __target_switch { case glsl: __intrinsic_asm "atomicMax($0, $1)"; case spirv: return spirv_asm { OpExtension "SPV_EXT_shader_atomic_float_min_max"; OpCapability AtomicFloat32MinMaxEXT; result:$$float = OpAtomicFMaxEXT &ioValue Device None $value }; } } __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_float2) [ForceInline] [require(glsl_spirv, atomic_glsl_float2)] half __atomicMax(__ref half ioValue, half value) { __target_switch { case glsl: __intrinsic_asm "atomicMax($0, $1)"; case spirv: return spirv_asm { OpExtension "SPV_EXT_shader_atomic_float_min_max"; OpCapability AtomicFloat16MinMaxEXT; result:$$half = OpAtomicFMaxEXT &ioValue Device None $value }; } } // Min [__requiresNVAPI] [require(hlsl, atomic_hlsl_nvapi)] uint2 __atomicMin(RWByteAddressBuffer buf, uint offset, uint2 value) { __target_switch { case hlsl: __intrinsic_asm "NvInterlockedMinUint64($0, $1, $2)"; } } __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) [ForceInline] [require(glsl_spirv, atomic_glsl_int64)] uint64_t __atomicMin(__ref uint64_t ioValue, uint64_t value) { __target_switch { case glsl: __intrinsic_asm "atomicMin($0, $1)"; case spirv: return spirv_asm { OpCapability Int64Atomics; result:$$uint64_t = OpAtomicUMin &ioValue Device None $value }; } } __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_float2) [ForceInline] [require(glsl_spirv, atomic_glsl_float2)] float __atomicMin(__ref float ioValue, float value) { __target_switch { case glsl: __intrinsic_asm "atomicMin($0, $1)"; case spirv: return spirv_asm { OpExtension "SPV_EXT_shader_atomic_float_min_max"; OpCapability AtomicFloat32MinMaxEXT; result:$$float = OpAtomicFMinEXT &ioValue Device None $value }; } } __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_float2) [ForceInline] [require(glsl_spirv, atomic_glsl_float2)] half __atomicMin(__ref half ioValue, half value) { __target_switch { case glsl: __intrinsic_asm "atomicMin($0, $1)"; case spirv: return spirv_asm { OpExtension "SPV_EXT_shader_atomic_float_min_max"; OpCapability AtomicFloat16MinMaxEXT; result:$$half = OpAtomicFMinEXT &ioValue Device None $value }; } } // And [__requiresNVAPI] [require(hlsl, atomic_hlsl_nvapi)] uint2 __atomicAnd(RWByteAddressBuffer buf, uint offset, uint2 value) { __target_switch { case hlsl: __intrinsic_asm "NvInterlockedAndUint64($0, $1, $2)"; } } __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) [ForceInline] [require(glsl_spirv, atomic_glsl_int64)] uint64_t __atomicAnd(__ref uint64_t ioValue, uint64_t value) { __target_switch { case glsl: __intrinsic_asm "atomicAnd($0, $1)"; case spirv: return spirv_asm { OpCapability Int64Atomics; result:$$uint64_t = OpAtomicAnd &ioValue Device None $value }; } } // Or [__requiresNVAPI] [require(hlsl, atomic_hlsl_nvapi)] uint2 __atomicOr(RWByteAddressBuffer buf, uint offset, uint2 value) { __target_switch { case hlsl: __intrinsic_asm "NvInterlockedOrUint64($0, $1, $2)"; } } __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) [ForceInline] [require(glsl_spirv, atomic_glsl_int64)] uint64_t __atomicOr(__ref uint64_t ioValue, uint64_t value) { __target_switch { case glsl: __intrinsic_asm "atomicOr($0, $1)"; case spirv: return spirv_asm { OpCapability Int64Atomics; result:$$uint64_t = OpAtomicOr &ioValue Device None $value }; } } // Xor [__requiresNVAPI] [require(hlsl, atomic_hlsl_nvapi)] uint2 __atomicXor(RWByteAddressBuffer buf, uint offset, uint2 value) { __target_switch { case hlsl: __intrinsic_asm "NvInterlockedXorUint64($0, $1, $2)"; } } __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) [ForceInline] [require(glsl_spirv, atomic_glsl_int64)] uint64_t __atomicXor(__ref uint64_t ioValue, uint64_t value) { __target_switch { case glsl: __intrinsic_asm "atomicXor($0, $1)"; case spirv: return spirv_asm { OpCapability Int64Atomics; result:$$uint64_t = OpAtomicXor &ioValue Device None $value }; } } // Exchange [__requiresNVAPI] [require(hlsl, atomic_hlsl_nvapi)] uint2 __atomicExchange(RWByteAddressBuffer buf, uint offset, uint2 value) { __target_switch { case hlsl: __intrinsic_asm "NvInterlockedExchangeUint64($0, $1, $2)"; } } __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) [ForceInline] [require(glsl_spirv, atomic_glsl_int64)] uint64_t __atomicExchange(__ref uint64_t ioValue, uint64_t value) { __target_switch { case glsl: __intrinsic_asm "atomicExchange($0, $1)"; case spirv: return spirv_asm { OpCapability Int64Atomics; result:$$uint64_t = OpAtomicExchange &ioValue Device None $value }; } } // Conversion between uint64_t and uint2 [require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] uint2 __asuint2(uint64_t i) { return uint2(uint(i), uint(uint64_t(i) >> 32)); } [require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] uint64_t __asuint64(uint2 i) { return (uint64_t(i.y) << 32) | i.x; } // __intrinsic_op($(kIROp_ByteAddressBufferLoad)) [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer)] T __byteAddressBufferLoad(ByteAddressBuffer buffer, int offset, int alignment); __intrinsic_op($(kIROp_ByteAddressBufferLoad)) [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] T __byteAddressBufferLoad(RWByteAddressBuffer buffer, int offset, int alignment); __intrinsic_op($(kIROp_ByteAddressBufferLoad)) [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] T __byteAddressBufferLoad(RasterizerOrderedByteAddressBuffer buffer, int offset, int alignment); __intrinsic_op($(kIROp_ByteAddressBufferStore)) [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void __byteAddressBufferStore(RWByteAddressBuffer buffer, int offset, int alignment, T value); __intrinsic_op($(kIROp_ByteAddressBufferStore)) [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void __byteAddressBufferStore(RasterizerOrderedByteAddressBuffer buffer, int offset, int alignment, T value); __generic __magic_type(HLSLStructuredBufferType) __intrinsic_type($(kIROp_HLSLStructuredBufferType)) struct StructuredBuffer { [__readNone] [__unsafeForceInlineEarly] void GetDimensions( out uint numStructs, out uint stride) { let rs = __structuredBufferGetDimensions(this); numStructs = rs.x; stride = rs.y; } __intrinsic_op($(kIROp_StructuredBufferLoad)) [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, structuredbuffer)] T Load(TIndex location); __intrinsic_op($(kIROp_StructuredBufferLoadStatus)) [require(hlsl, structuredbuffer)] T Load(TIndex location, out uint status); __generic __subscript(TIndex index) -> T { [__readNone] __intrinsic_op($(kIROp_StructuredBufferLoad)) [require(cpp_cuda_glsl_hlsl_spirv, structuredbuffer)] get; }; }; __generic __magic_type(HLSLConsumeStructuredBufferType) __intrinsic_type($(kIROp_HLSLConsumeStructuredBufferType)) [require(cpp_cuda_glsl_hlsl_spirv, consumestructuredbuffer)] struct ConsumeStructuredBuffer { __intrinsic_op($(kIROp_StructuredBufferConsume)) T Consume(); [ForceInline] void GetDimensions( out uint numStructs, out uint stride) { let result = __structuredBufferGetDimensions(this); numStructs = result.x; stride = result.y; } }; __intrinsic_op($(kIROp_GetElement)) T __getElement(U collection, I index); __generic [require(glsl_hlsl_spirv, hull)] __magic_type(HLSLInputPatchType) __intrinsic_type($(kIROp_HLSLInputPatchType)) struct InputPatch { __generic __subscript(TIndex index)->T { [__unsafeForceInlineEarly] get { __target_switch { case hlsl: __intrinsic_asm ".operator[]"; default: return __getElement(this, index); } } } }; __generic [require(glsl_hlsl_spirv, domain_hull)] __magic_type(HLSLOutputPatchType) __intrinsic_type($(kIROp_HLSLOutputPatchType)) struct OutputPatch { __generic __subscript(TIndex index)->T { [__unsafeForceInlineEarly] get { __target_switch { case hlsl: __intrinsic_asm ".operator[]"; default: return __getElement(this, index); } } } }; ${{{{ static const struct { IROp op; char const* name; } kMutableByteAddressBufferCases[] = { { kIROp_HLSLRWByteAddressBufferType, "RWByteAddressBuffer" }, { kIROp_HLSLRasterizerOrderedByteAddressBufferType, "RasterizerOrderedByteAddressBuffer" }, }; for(auto item : kMutableByteAddressBufferCases) { }}}} __magic_type(HLSL$(item.name)Type) __intrinsic_type($(item.op)) struct $(item.name) { // Note(tfoley): supports all operations from `ByteAddressBuffer` // TODO(tfoley): can this be made a sub-type? [__unsafeForceInlineEarly] [require(cpp_cuda_glsl_hlsl_spirv, structuredbuffer_rw)] void GetDimensions(out uint dim) { __target_switch { case cpp: __intrinsic_asm ".GetDimensions"; case cuda: __intrinsic_asm ".GetDimensions"; case hlsl: __intrinsic_asm ".GetDimensions"; case glsl: case spirv: dim = __structuredBufferGetDimensions(__getEquivalentStructuredBuffer(this)).x*4; } } [__NoSideEffect] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint Load(int location) { __target_switch { case hlsl: __intrinsic_asm ".Load"; default: return __byteAddressBufferLoad(this, location, 0); } } [__NoSideEffect] [ForceInline] [require(hlsl, byteaddressbuffer_rw)] uint Load(int location, out uint status) { __target_switch { case hlsl: __intrinsic_asm ".Load"; } } [__NoSideEffect] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint2 Load2(int location) { __target_switch { case hlsl: __intrinsic_asm ".Load2"; default: return __byteAddressBufferLoad(this, location, 0); } } [__NoSideEffect] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint2 Load2(int location, int alignment) { __target_switch { case hlsl: __intrinsic_asm ".Load2"; default: return __byteAddressBufferLoad(this, location, alignment); } } [__NoSideEffect] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint2 Load2Aligned(int location) { __target_switch { case hlsl: __intrinsic_asm ".Load2"; default: return __byteAddressBufferLoad(this, location, __naturalStrideOf()); } } [__NoSideEffect] [ForceInline] [require(hlsl, byteaddressbuffer_rw)] uint2 Load2(int location, out uint status) { __target_switch { case hlsl: __intrinsic_asm ".Load2"; } } [__NoSideEffect] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint3 Load3(int location) { __target_switch { case hlsl: __intrinsic_asm ".Load3"; default: return __byteAddressBufferLoad(this, location, 0); } } [__NoSideEffect] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint3 Load3(int location, int alignment) { __target_switch { case hlsl: __intrinsic_asm ".Load3"; default: return __byteAddressBufferLoad(this, location, alignment); } } [__NoSideEffect] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint3 Load3Aligned(int location) { __target_switch { case hlsl: __intrinsic_asm ".Load3"; default: return __byteAddressBufferLoad(this, location, __naturalStrideOf()); } } [__NoSideEffect] [ForceInline] [require(hlsl, byteaddressbuffer_rw)] uint3 Load3(int location, out uint status) { __target_switch { case hlsl: __intrinsic_asm ".Load3"; } } [__NoSideEffect] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint4 Load4(int location) { __target_switch { case hlsl: __intrinsic_asm ".Load4"; default: return __byteAddressBufferLoad(this, location, 0); } } [__NoSideEffect] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint4 Load4(int location, int alignment) { __target_switch { case hlsl: __intrinsic_asm ".Load4"; default: return __byteAddressBufferLoad(this, location, alignment); } } [__NoSideEffect] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] uint4 Load4Aligned(int location) { __target_switch { case hlsl: __intrinsic_asm ".Load4"; default: return __byteAddressBufferLoad(this, location, __naturalStrideOf()); } } [__NoSideEffect] [ForceInline] [require(hlsl, byteaddressbuffer_rw)] uint4 Load4(int location, out uint status) { __target_switch { case hlsl: __intrinsic_asm ".Load4"; } } [__NoSideEffect] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] T Load(int location) { return __byteAddressBufferLoad(this, location, 0); } [__NoSideEffect] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] T Load(int location, int alignment) { return __byteAddressBufferLoad(this, location, alignment); } [__NoSideEffect] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] T LoadAligned(int location) { return __byteAddressBufferLoad(this, location, __naturalStrideOf()); } ${{{{ if (item.op == kIROp_HLSLRWByteAddressBufferType) { }}}} // float32 and int64 atomic support. This is a Slang specific extension, it uses // GL_EXT_shader_atomic_float on Vulkan // NvAPI support on DX // NOTE! To use this feature on HLSL based targets the path to 'nvHLSLExtns.h' from the NvAPI SDK must // be set. That this include will be added to the *output* that is passed to a downstram compiler. // Also note that you *can* include NVAPI headers in your Slang source, and directly use NVAPI functions // Directly using NVAPI functions does *not* add the #include on the output // Finally note you can *mix* NVAPI direct calls, and use of NVAPI intrinsics below. This doesn't cause // any clashes, as Slang will emit any NVAPI function it parsed (say via a include in Slang source) with // unique functions. // // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/vkspec.html#VK_EXT_shader_atomic_float // https://htmlpreview.github.io/?https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/EXT/SPV_EXT_shader_atomic_float_add.html // F32 Add __cuda_sm_version(2.0) [__requiresNVAPI] [ForceInline] [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_nvapi_cuda_metal_float1)] void InterlockedAddF32(uint byteAddress, float valueToAdd, out float originalValue) { __target_switch { case hlsl: __intrinsic_asm "($3 = NvInterlockedAddFp32($0, $1, $2))"; case cuda: __intrinsic_asm "(*$3 = atomicAdd($0._getPtrAt($1), $2))"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_add(__getMetalAtomicRef(buf[byteAddress / 4]), valueToAdd, originalValue); return; } case glsl: case spirv: { let buf = __getEquivalentStructuredBuffer(this); originalValue = __atomicAdd(buf[byteAddress / 4], valueToAdd); return; } } } // FP16x2 [__requiresNVAPI] [ForceInline] [require(hlsl, atomic_hlsl_nvapi)] uint _NvInterlockedAddFp16x2(uint byteAddress, uint fp16x2Value) { __target_switch { case hlsl: __intrinsic_asm "NvInterlockedAddFp16x2($0, $1, $2)"; } } [__requiresNVAPI] [ForceInline] void InterlockedAddF16(uint byteAddress, half value, out half originalValue) { __target_switch { case hlsl: if ((byteAddress & 2) == 0) { uint packedInput = asuint16(value); originalValue = asfloat16((uint16_t)_NvInterlockedAddFp16x2(byteAddress, packedInput)); } else { byteAddress = byteAddress & ~3; uint packedInput = ((uint)asuint16(value)) << 16; originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16)); } return; case glsl: case spirv: { let buf = __getEquivalentStructuredBuffer(this); if ((byteAddress & 2) == 0) { originalValue = __atomicAdd(buf[byteAddress/4], half2(value, half(0.0))).x; } else { originalValue = __atomicAdd(buf[byteAddress/4], half2(half(0.0), value)).y; } return; } } } // Without returning original value [__requiresNVAPI] [ForceInline] __cuda_sm_version(2.0) [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_nvapi_cuda_metal_float1)] void InterlockedAddF32(uint byteAddress, float valueToAdd) { __target_switch { case hlsl: __intrinsic_asm "(NvInterlockedAddFp32($0, $1, $2))"; case cuda: __intrinsic_asm "atomicAdd($0._getPtrAt($1), $2)"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_add(__getMetalAtomicRef(buf[byteAddress / 4]), valueToAdd); return; } case glsl: case spirv: { let buf = __getEquivalentStructuredBuffer(this); __atomicAdd(buf[byteAddress / 4], valueToAdd); return; } } } // Int64 Add [ForceInline] __cuda_sm_version(6.0) [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda6_int64)] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd, out int64_t originalValue) { __target_switch { case cuda: __intrinsic_asm "(*$3 = atomicAdd($0._getPtrAt($1), $2))"; case hlsl: originalValue = __asuint64(__atomicAdd(this, byteAddress, __asuint2(valueToAdd))); case glsl: case spirv: { let buf = __getEquivalentStructuredBuffer(this); originalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd); } } } // Without returning original value __cuda_sm_version(6.0) [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda6_int64)] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd) { __target_switch { case cuda: __intrinsic_asm "atomicAdd($0._getPtrAt($1), $2)"; case hlsl: __atomicAdd(this, byteAddress, __asuint2(valueToAdd)); case glsl: case spirv: let buf = __getEquivalentStructuredBuffer(this); __atomicAdd(buf[byteAddress / 8], valueToAdd); } } // Cas uint64_t [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda9_int64)] void InterlockedCompareExchangeU64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) { __target_switch { case cuda: __intrinsic_asm "(*$4 = atomicCAS($0._getPtrAt($1), $2, $3))"; case hlsl: outOriginalValue = __asuint64(__cas(this, byteAddress, __asuint2(compareValue), __asuint2(value))); case glsl: case spirv: let buf = __getEquivalentStructuredBuffer(this); outOriginalValue = __cas(buf[byteAddress / 8], compareValue, value); } } // Max __cuda_sm_version(5.0) [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value) { __target_switch { case cuda: __intrinsic_asm "atomicMax($0._getPtrAt($1), $2)"; case hlsl: return __asuint64(__atomicMax(this, byteAddress, __asuint2(value))); case glsl: case spirv: let buf = __getEquivalentStructuredBuffer(this); return __atomicMax(buf[byteAddress / 8], value); } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedMax64(uint byteAddress, int64_t value) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedMax64"; } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedMax64(uint byteAddress, int64_t value, out int64_t outOriginalValue) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedMax64"; } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedMax64(uint byteAddress, uint64_t value) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedMax64"; } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedMax64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedMax64"; } } // Min __cuda_sm_version(5.0) [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] uint64_t InterlockedMinU64(uint byteAddress, uint64_t value) { __target_switch { case cuda: __intrinsic_asm "atomicMin($0._getPtrAt($1), $2)"; case hlsl: return __asuint64(__atomicMin(this, byteAddress, __asuint2(value))); case glsl: case spirv: let buf = __getEquivalentStructuredBuffer(this); return __atomicMin(buf[byteAddress / 8], value); } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedMin64(uint byteAddress, int64_t value) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedMin64"; } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedMin64(uint byteAddress, int64_t value, out int64_t outOriginalValue) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedMin64"; } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedMin64(uint byteAddress, uint64_t value) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedMin64"; } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedMin64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedMin64"; } } // And __cuda_sm_version(5.0) [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] uint64_t InterlockedAndU64(uint byteAddress, uint64_t value) { __target_switch { case cuda: __intrinsic_asm "atomicAnd($0._getPtrAt($1), $2)"; case hlsl: return __asuint64(__atomicAnd(this, byteAddress, __asuint2(value))); case glsl: case spirv: let buf = __getEquivalentStructuredBuffer(this); return __atomicAnd(buf[byteAddress / 8], value); } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedAnd64(uint byteAddress, uint64_t value) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedAnd64"; } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedAnd64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedAnd64"; } } // Or __cuda_sm_version(5.0) [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] uint64_t InterlockedOrU64(uint byteAddress, uint64_t value) { __target_switch { case cuda: __intrinsic_asm "atomicOr($0._getPtrAt($1), $2)"; case hlsl: return __asuint64(__atomicOr(this, byteAddress, __asuint2(value))); case glsl: case spirv: let buf = __getEquivalentStructuredBuffer(this); return __atomicOr(buf[byteAddress / 8], value); } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedOr64(uint byteAddress, uint64_t value) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedOr64"; } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedOr64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedOr64"; } } // Xor __cuda_sm_version(5.0) [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] uint64_t InterlockedXorU64(uint byteAddress, uint64_t value) { __target_switch { case cuda: __intrinsic_asm "atomicXor($0._getPtrAt($1), $2)"; case hlsl: return __asuint64(__atomicXor(this, byteAddress, __asuint2(value))); case glsl: case spirv: let buf = __getEquivalentStructuredBuffer(this); return __atomicXor(buf[byteAddress / 8], value); } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedXor64(uint byteAddress, uint64_t value) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedXor64"; } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedXor64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedXor64"; } } // Exchange [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda9_int64)] uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value) { __target_switch { case cuda: __intrinsic_asm "atomicExch($0._getPtrAt($1), $2)"; case hlsl: return __asuint64(__atomicExchange(this, byteAddress, __asuint2(value))); case glsl: case spirv: let buf = __getEquivalentStructuredBuffer(this); return __atomicExchange(buf[byteAddress / 8], value); } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedExchangeFloat(uint byteAddress, float value, out float outOriginalValue) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedExchangeFloat"; } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedExchange64(uint byteAddress, int64_t value, out int64_t outOriginalValue) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedExchange64"; } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedExchange64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedExchange64"; } } // SM6.6 6 64bit atomics. [ForceInline] [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedAdd64(uint byteAddress, int64_t valueToAdd) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedAdd64"; case glsl: case spirv: let buf = __getEquivalentStructuredBuffer(this); __atomicAdd(buf[byteAddress / 8], valueToAdd); } } [ForceInline] [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedAdd64(uint byteAddress, int64_t valueToAdd, out int64_t outOriginalValue) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedAdd64"; case glsl: case spirv: let buf = __getEquivalentStructuredBuffer(this); outOriginalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd); return; } } [ForceInline] [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedAdd64(uint byteAddress, uint64_t valueToAdd) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedAdd64"; case glsl: case spirv: let buf = __getEquivalentStructuredBuffer(this); __atomicAdd(buf[byteAddress / 8], valueToAdd); } } [ForceInline] [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedAdd64(uint byteAddress, uint64_t valueToAdd, out uint64_t outOriginalValue) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedAdd64"; case glsl: case spirv: let buf = __getEquivalentStructuredBuffer(this); outOriginalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd); return; } } [ForceInline] [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedCompareExchange64(uint byteAddress, int64_t compareValue, int64_t value, out int64_t outOriginalValue) { __target_switch { case hlsl: __cas(this, byteAddress, compareValue, value, outOriginalValue); return; case glsl: case spirv: let buf = __getEquivalentStructuredBuffer(this); outOriginalValue = __cas(buf[byteAddress / 8], compareValue, value); return; } } [ForceInline] [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedCompareExchange64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) { __target_switch { case hlsl: __cas(this, byteAddress, compareValue, value, outOriginalValue); return; case glsl: case spirv: let buf = __getEquivalentStructuredBuffer(this); outOriginalValue = __cas(buf[byteAddress / 8], compareValue, value); return; } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedCompareStoreFloatBitwise(uint byteAddress, float compareValue, float value) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedCompareStoreFloatBitwise"; } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedCompareExchangeFloatBitwise(uint byteAddress, float compareValue, float value, out float outOriginalValue) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedCompareExchangeFloatBitwise"; } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedCompareStore64(uint byteAddress, int64_t compareValue, int64_t value) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedCompareStore64"; } } [ForceInline] [require(hlsl, atomic_hlsl_sm_6_6)] void InterlockedCompareStore64(uint byteAddress, uint64_t compareValue, uint64_t value) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedCompareStore64"; } } ${{{{ } // endif (type == RWByteAddressBuffer) }}}} // Added operations: [ForceInline] [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedAdd( UINT dest, UINT value, out UINT original_value) { __target_switch { case glsl: __intrinsic_asm "($3 = atomicAdd($0._data[$1/4], $2))"; case cuda: __intrinsic_asm "(*$3 = atomicAdd($0._getPtrAt($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedAdd"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_add(__getMetalAtomicRef(buf[dest / 4]), value, original_value); return; } case spirv: let buf = __getEquivalentStructuredBuffer(this); ::InterlockedAdd(buf[dest / 4], value, original_value); } } [ForceInline] [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedAdd( UINT dest, UINT value) { __target_switch { case glsl: __intrinsic_asm "atomicAdd($0._data[$1/4], $2)"; case cuda: __intrinsic_asm "atomicAdd($0._getPtrAt($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedAdd"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_add(__getMetalAtomicRef(buf[dest / 4]), value); return; } case spirv: let buf = __getEquivalentStructuredBuffer(this); ::InterlockedAdd(buf[dest / 4], value); } } [ForceInline] [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedAnd( UINT dest, UINT value, out UINT original_value) { __target_switch { case glsl: __intrinsic_asm "$3 = atomicAnd($0._data[$1/4], $2)"; case cuda: __intrinsic_asm "(*$3 = atomicAnd($0._getPtrAt($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedAnd"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_and(__getMetalAtomicRef(buf[dest / 4]), value, original_value); return; } case spirv: let buf = __getEquivalentStructuredBuffer(this); ::InterlockedAnd(buf[dest / 4], value, original_value); } } [ForceInline] [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedAnd( UINT dest, UINT value) { __target_switch { case glsl: __intrinsic_asm "atomicAnd($0._data[$1/4], $2)"; case cuda: __intrinsic_asm "atomicAnd($0._getPtrAt($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedAnd"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_and(__getMetalAtomicRef(buf[dest / 4]), value); return; } case spirv: let buf = __getEquivalentStructuredBuffer(this); ::InterlockedAnd(buf[dest / 4], value); } } [ForceInline] [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedCompareExchange( UINT dest, UINT compare_value, UINT value, out UINT original_value) { __target_switch { case glsl: __intrinsic_asm "($4 = atomicCompSwap($0._data[$1/4], $2, $3))"; case cuda: __intrinsic_asm "(*$4 = atomicCAS($0._getPtrAt($1), $2, $3))"; case hlsl: __intrinsic_asm ".InterlockedCompareExchange"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_compare_exchange(__getMetalAtomicRef(buf[dest / 4]), compare_value, value, original_value); return; } case spirv: let buf = __getEquivalentStructuredBuffer(this); ::InterlockedCompareExchange(buf[dest / 4], compare_value, value, original_value); } } [ForceInline] [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedCompareStore( UINT dest, UINT compare_value, UINT value) { __target_switch { case glsl: __intrinsic_asm "atomicCompSwap($0._data[$1/4], $2, $3)"; case cuda: __intrinsic_asm "atomicCAS($0._getPtrAt($1), $2, $3)"; case hlsl: __intrinsic_asm ".InterlockedCompareStore"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_compare_exchange(__getMetalAtomicRef(buf[dest / 4]), compare_value, value); return; } case spirv: let buf = __getEquivalentStructuredBuffer(this); ::InterlockedCompareStore(buf[dest / 4], compare_value, value); } } [ForceInline] [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedExchange( UINT dest, UINT value, out UINT original_value) { __target_switch { case glsl: __intrinsic_asm "($3 = atomicExchange($0._data[$1/4], $2))"; case cuda: __intrinsic_asm "(*$3 = atomicExch($0._getPtrAt($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedExchange"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_exchange(__getMetalAtomicRef(buf[dest / 4]), value, original_value); return; } case spirv: let buf = __getEquivalentStructuredBuffer(this); ::InterlockedExchange(buf[dest / 4], value, original_value); } } [ForceInline] [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedMax( UINT dest, UINT value, out UINT original_value) { __target_switch { case glsl: __intrinsic_asm "($3 = atomicMax($0._data[$1/4], $2))"; case cuda: __intrinsic_asm "(*$3 = atomicMax($0._getPtrAt($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedMax"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_max(__getMetalAtomicRef(buf[dest / 4]), value, original_value); return; } case spirv: let buf = __getEquivalentStructuredBuffer(this); ::InterlockedMax(buf[dest / 4], value, original_value); } } [ForceInline] [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedMax( UINT dest, UINT value) { __target_switch { case glsl: __intrinsic_asm "atomicMax($0._data[$1/4], $2)"; case cuda: __intrinsic_asm "atomicMax($0._getPtrAt($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedMax"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_max(__getMetalAtomicRef(buf[dest / 4]), value); return; } case spirv: let buf = __getEquivalentStructuredBuffer(this); ::InterlockedMax(buf[dest / 4], value); } } [ForceInline] [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedMin( UINT dest, UINT value, out UINT original_value) { __target_switch { case glsl: __intrinsic_asm "($3 = atomicMin($0._data[$1/4], $2))"; case cuda: __intrinsic_asm "(*$3 = atomicMin($0._getPtrAt($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedMin"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_min(__getMetalAtomicRef(buf[dest / 4]), value, original_value); return; } case spirv: let buf = __getEquivalentStructuredBuffer(this); ::InterlockedMin(buf[dest / 4], value, original_value); } } [ForceInline] [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedMin( UINT dest, UINT value) { __target_switch { case glsl: __intrinsic_asm "atomicMin($0._data[$1/4], $2)"; case cuda: __intrinsic_asm "atomicMin($0._getPtrAt($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedMin"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_min(__getMetalAtomicRef(buf[dest / 4]), value); return; } case spirv: let buf = __getEquivalentStructuredBuffer(this); ::InterlockedMin(buf[dest / 4], value); } } [ForceInline] [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedOr( UINT dest, UINT value, out UINT original_value) { __target_switch { case glsl: __intrinsic_asm "($3 = atomicOr($0._data[$1/4], $2))"; case cuda: __intrinsic_asm "(*$3 = atomicOr($0._getPtrAt($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedOr"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_or(__getMetalAtomicRef(buf[dest / 4]), value, original_value); return; } case spirv: let buf = __getEquivalentStructuredBuffer(this); ::InterlockedOr(buf[dest / 4], value, original_value); } } [ForceInline] [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedOr( UINT dest, UINT value) { __target_switch { case glsl: __intrinsic_asm "atomicOr($0._data[$1/4], $2)"; case cuda: __intrinsic_asm "atomicOr($0._getPtrAt($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedOr"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_or(__getMetalAtomicRef(buf[dest / 4]), value); return; } case spirv: let buf = __getEquivalentStructuredBuffer(this); ::InterlockedOr(buf[dest / 4], value); } } [ForceInline] [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedXor( UINT dest, UINT value, out UINT original_value) { __target_switch { case glsl: __intrinsic_asm "($3 = atomicXor($0._data[$1/4], $2))"; case cuda: __intrinsic_asm "(*$3 = atomicXor($0._getPtrAt($1), $2))"; case hlsl: __intrinsic_asm ".InterlockedXor"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_xor(__getMetalAtomicRef(buf[dest / 4]), value, original_value); return; } case spirv: let buf = __getEquivalentStructuredBuffer(this); ::InterlockedXor(buf[dest / 4], value, original_value); } } [ForceInline] [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void InterlockedXor( UINT dest, UINT value) { __target_switch { case glsl: __intrinsic_asm "atomicXor($0._data[$1/4], $2)"; case cuda: __intrinsic_asm "atomicXor($0._getPtrAt($1), $2)"; case hlsl: __intrinsic_asm ".InterlockedXor"; case metal: { let buf = __getEquivalentStructuredBuffer(this); __metalInterlocked_xor(__getMetalAtomicRef(buf[dest / 4]), value); return; } case spirv: let buf = __getEquivalentStructuredBuffer(this); ::InterlockedXor(buf[dest / 4], value); } } [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void Store(uint address, uint value) { __target_switch { case hlsl: __intrinsic_asm ".Store"; default: __byteAddressBufferStore(this, address, 0, value); } } [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void Store2(uint address, uint2 value) { __target_switch { case hlsl: __intrinsic_asm ".Store2"; default: __byteAddressBufferStore(this, address, 0, value); } } [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void Store2(uint address, uint2 value, uint alignment) { __target_switch { case hlsl: __intrinsic_asm ".Store2"; default: __byteAddressBufferStore(this, address, alignment, value); } } [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void Store2Aligned(uint address, uint2 value) { __target_switch { case hlsl: __intrinsic_asm ".Store2"; default: __byteAddressBufferStore(this, address, __naturalStrideOf(), value); } } [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void Store3(uint address, uint3 value) { __target_switch { case hlsl: __intrinsic_asm ".Store3"; default: __byteAddressBufferStore(this, address, 0, value); } } [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void Store3(uint address, uint3 value, uint alignment) { __target_switch { case hlsl: __intrinsic_asm ".Store3"; default: __byteAddressBufferStore(this, address, alignment, value); } } [ForceInline] [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void Store3Aligned(uint address, uint3 value) { __target_switch { case hlsl: __intrinsic_asm ".Store3"; default: __byteAddressBufferStore(this, address, __naturalStrideOf(), value); } } [ForceInline] [require(cpp_cuda_glsl_hlsl_spirv, byteaddressbuffer_rw)] void Store4(uint address, uint4 value) { __target_switch { case hlsl: __intrinsic_asm ".Store4"; default: __byteAddressBufferStore(this, address, 0, value); } } [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void Store4(uint address, uint4 value, uint alignment) { __target_switch { case hlsl: __intrinsic_asm ".Store4"; default: __byteAddressBufferStore(this, address, alignment, value); } } [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] void Store4Aligned(uint address, uint4 value) { __target_switch { case hlsl: __intrinsic_asm ".Store4"; default: __byteAddressBufferStore(this, address, __naturalStrideOf(), value); } } [ForceInline] void Store(int offset, T value) { __byteAddressBufferStore(this, offset, 0, value); } [ForceInline] void Store(int offset, T value, uint alignment) { __byteAddressBufferStore(this, offset, alignment, value); } [ForceInline] void StoreAligned(int offset, T value) { __byteAddressBufferStore(this, offset, __naturalStrideOf(), value); } }; ${{{{ } }}}} ${{{{ static const struct { IROp op; char const* name; } kMutableStructuredBufferCases[] = { { kIROp_HLSLRWStructuredBufferType, "RWStructuredBuffer" }, { kIROp_HLSLRasterizerOrderedStructuredBufferType, "RasterizerOrderedStructuredBuffer" }, }; for(auto item : kMutableStructuredBufferCases) { }}}} __generic __magic_type(HLSL$(item.name)Type) __intrinsic_type($(item.op)) [require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer_rw)] struct $(item.name) { uint DecrementCounter(); [__readNone] [__unsafeForceInlineEarly] [require(cpp_cuda_glsl_hlsl_metal_spirv, structuredbuffer_rw)] void GetDimensions( out uint numStructs, out uint stride) { __target_switch { case hlsl: __intrinsic_asm ".GetDimensions"; default: let rs = __structuredBufferGetDimensions(this); numStructs = rs.x; stride = rs.y; } } uint IncrementCounter(); [__NoSideEffect] __intrinsic_op($(kIROp_RWStructuredBufferLoad)) T Load(TIndex location); [__NoSideEffect] __intrinsic_op($(kIROp_RWStructuredBufferLoadStatus)) T Load(TIndex location, out uint status); __generic __subscript(TIndex index) -> T { [__NoSideEffect] __intrinsic_op($(kIROp_RWStructuredBufferGetElementPtr)) ref; } }; ${{{{ } }}}} __generic [require(glsl_hlsl_spirv, geometry)] __magic_type(HLSLPointStreamType) __intrinsic_type($(kIROp_HLSLPointStreamType)) struct PointStream { [KnownBuiltin("GeometryStreamAppend")] void Append(T value) { __target_switch { case glsl: __intrinsic_asm "EmitVertex()"; case hlsl: __intrinsic_asm ".Append"; case spirv: spirv_asm { OpEmitVertex; }; } } [KnownBuiltin("GeometryStreamRestart")] void RestartStrip() { __target_switch { case glsl: __intrinsic_asm "EndPrimitive()"; case hlsl: __intrinsic_asm ".RestartStrip"; case spirv: spirv_asm { OpEndPrimitive; }; } } }; __generic [require(glsl_hlsl_spirv, geometry)] __magic_type(HLSLLineStreamType) __intrinsic_type($(kIROp_HLSLLineStreamType)) struct LineStream { [KnownBuiltin("GeometryStreamAppend")] void Append(T value) { __target_switch { case glsl: __intrinsic_asm "EmitVertex()"; case hlsl: __intrinsic_asm ".Append"; case spirv: spirv_asm { OpEmitVertex; }; } } [KnownBuiltin("GeometryStreamRestart")] void RestartStrip() { __target_switch { case glsl: __intrinsic_asm "EndPrimitive()"; case hlsl: __intrinsic_asm ".RestartStrip"; case spirv: spirv_asm { OpEndPrimitive; }; } } }; __generic [require(glsl_hlsl_spirv, geometry)] __magic_type(HLSLTriangleStreamType) __intrinsic_type($(kIROp_HLSLTriangleStreamType)) struct TriangleStream { [KnownBuiltin("GeometryStreamAppend")] void Append(T value) { __target_switch { case glsl: __intrinsic_asm "EmitVertex()"; case hlsl: __intrinsic_asm ".Append"; case spirv: spirv_asm { OpEmitVertex; }; } } [KnownBuiltin("GeometryStreamRestart")] void RestartStrip() { __target_switch { case glsl: __intrinsic_asm "EndPrimitive()"; case hlsl: __intrinsic_asm ".RestartStrip"; case spirv: spirv_asm { OpEndPrimitive; }; } } }; #define VECTOR_MAP_UNARY(TYPE, COUNT, FUNC, VALUE) \ vector result; for(int i = 0; i < COUNT; ++i) { result[i] = FUNC(VALUE[i]); } return result #define MATRIX_MAP_UNARY(TYPE, ROWS, COLS, FUNC, VALUE) \ matrix result; for(int i = 0; i < ROWS; ++i) { result[i] = FUNC(VALUE[i]); } return result #define VECTOR_MAP_BINARY(TYPE, COUNT, FUNC, LEFT, RIGHT) \ vector result; for(int i = 0; i < COUNT; ++i) { result[i] = FUNC(LEFT[i], RIGHT[i]); } return result #define MATRIX_MAP_BINARY(TYPE, ROWS, COLS, FUNC, LEFT, RIGHT) \ matrix result; for(int i = 0; i < ROWS; ++i) { result[i] = FUNC(LEFT[i], RIGHT[i]); } return result #define VECTOR_MAP_TRINARY(TYPE, COUNT, FUNC, A, B, C) \ vector result; for(int i = 0; i < COUNT; ++i) { result[i] = FUNC(A[i], B[i], C[i]); } return result #define MATRIX_MAP_TRINARY(TYPE, ROWS, COLS, FUNC, A, B, C) \ matrix result; for(int i = 0; i < ROWS; ++i) { result[i] = FUNC(A[i], B[i], C[i]); } return result // Try to terminate the current draw or dispatch call (HLSL SM 4.0) void abort(); // Absolute value (HLSL SM 1.0) __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T abs(T x) { __target_switch { case hlsl: __intrinsic_asm "abs"; case glsl: __intrinsic_asm "abs"; case metal: __intrinsic_asm "abs"; case cuda: __intrinsic_asm "$P_abs($0)"; case cpp: __intrinsic_asm "$P_abs($0)"; case spirv: return spirv_asm { result:$$T = OpExtInst glsl450 SAbs $x }; //default: // Note: this simple definition may not be appropriate for floating-point inputs // return x < 0 ? -x : x; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector abs(vector x) { __target_switch { case hlsl: __intrinsic_asm "abs"; case glsl: __intrinsic_asm "abs"; case metal: __intrinsic_asm "abs"; case spirv: return spirv_asm { result:$$vector = OpExtInst glsl450 SAbs $x; }; default: VECTOR_MAP_UNARY(T, N, abs, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix abs(matrix x) { __target_switch { case hlsl: __intrinsic_asm "abs"; default: MATRIX_MAP_UNARY(T, N, M, abs, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T abs(T x) { __target_switch { case hlsl: __intrinsic_asm "abs"; case metal: __intrinsic_asm "abs"; case glsl: __intrinsic_asm "abs"; case cuda: __intrinsic_asm "$P_abs($0)"; case cpp: __intrinsic_asm "$P_abs($0)"; case spirv: return spirv_asm { result:$$T = OpExtInst glsl450 FAbs $x; }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector abs(vector x) { __target_switch { case hlsl: __intrinsic_asm "abs"; case metal: __intrinsic_asm "abs"; case glsl: __intrinsic_asm "abs"; case spirv: return spirv_asm { result:$$vector = OpExtInst glsl450 FAbs $x; }; default: VECTOR_MAP_UNARY(T, N, abs, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix abs(matrix x) { __target_switch { case hlsl: __intrinsic_asm "abs"; default: MATRIX_MAP_UNARY(T, N, M, abs, x); } } __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fabs(T x) { __target_switch { case metal: __intrinsic_asm "fabs"; default: return abs(x); } } __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fabs(vector x) { __target_switch { case metal: __intrinsic_asm "fabs"; default: return abs(x); } } // Inverse cosine (HLSL SM 1.0) __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T acos(T x) { __target_switch { case cpp: __intrinsic_asm "$P_acos($0)"; case cuda: __intrinsic_asm "$P_acos($0)"; case glsl: __intrinsic_asm "acos"; case hlsl: __intrinsic_asm "acos"; case metal: __intrinsic_asm "acos"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Acos $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector acos(vector x) { __target_switch { case glsl: __intrinsic_asm "acos"; case hlsl: __intrinsic_asm "acos"; case metal: __intrinsic_asm "acos"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Acos $x }; default: VECTOR_MAP_UNARY(T, N, acos, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix acos(matrix x) { __target_switch { case hlsl: __intrinsic_asm "acos"; default: MATRIX_MAP_UNARY(T, N, M, acos, x); } } // Inverse hyperbolic cosine __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T acosh(T x) { __target_switch { case cpp: __intrinsic_asm "$P_acosh($0)"; case cuda: __intrinsic_asm "$P_acosh($0)"; case glsl: __intrinsic_asm "acosh"; case metal: __intrinsic_asm "acosh"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Acosh $x }; default: return log(x + sqrt( x * x - T(1))); } } __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector acosh(vector x) { __target_switch { case glsl: __intrinsic_asm "acosh"; case metal: __intrinsic_asm "acosh"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Acosh $x }; default: VECTOR_MAP_UNARY(T, N, acosh, x); } } // Test if all components are non-zero (HLSL SM 1.0) __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv)] bool all(T x) { __target_switch { default: __intrinsic_asm "bool($0)"; case hlsl: __intrinsic_asm "all"; case metal: __intrinsic_asm "all"; case spirv: let zero = __default(); if (__isInt()) return spirv_asm { OpINotEqual $$bool result $x $zero }; else if (__isFloat()) return spirv_asm { OpFUnordNotEqual $$bool result $x $zero }; else if (__isBool()) return __slang_noop_cast(x); else return false; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv)] bool all(vector x) { if(N == 1) return all(x[0]); __target_switch { case hlsl: __intrinsic_asm "all"; case metal: __intrinsic_asm "all"; case glsl: __intrinsic_asm "all(bvec$N0($0))"; case spirv: if (__isBool()) return spirv_asm { OpAll $$bool result $x }; else if (__isInt()) { let zero = __default>(); return spirv_asm { OpINotEqual $$vector %castResult $x $zero; OpAll $$bool result %castResult }; } else { let zero = __default>(); return spirv_asm { OpFUnordNotEqual $$vector %castResult $x $zero; OpAll $$bool result %castResult }; } default: bool result = true; for(int i = 0; i < N; ++i) result = result && all(x[i]); return result; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv)] bool all(matrix x) { __target_switch { case hlsl: __intrinsic_asm "all"; default: bool result = true; for(int i = 0; i < N; ++i) result = result && all(x[i]); return result; } } // Barrier for writes to all memory spaces (HLSL SM 5.0) __glsl_extension(GL_KHR_memory_scope_semantics) [require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void AllMemoryBarrier() { __target_switch { case hlsl: __intrinsic_asm "AllMemoryBarrier"; case glsl: __intrinsic_asm "memoryBarrier(gl_ScopeDevice, (gl_StorageSemanticsShared|gl_StorageSemanticsImage|gl_StorageSemanticsBuffer), gl_SemanticsAcquireRelease)"; case cuda: __intrinsic_asm "__threadfence()"; case metal: __intrinsic_asm "threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture | mem_flags::mem_threadgroup_imageblock)"; case spirv: spirv_asm { OpMemoryBarrier Device AcquireRelease|UniformMemory|WorkgroupMemory|ImageMemory; }; } } // Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0) __glsl_extension(GL_KHR_memory_scope_semantics) [require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void AllMemoryBarrierWithGroupSync() { __target_switch { case hlsl: __intrinsic_asm "AllMemoryBarrierWithGroupSync"; case glsl: __intrinsic_asm "controlBarrier(gl_ScopeWorkgroup, gl_ScopeDevice, (gl_StorageSemanticsShared|gl_StorageSemanticsImage|gl_StorageSemanticsBuffer), gl_SemanticsAcquireRelease)"; case cuda: __intrinsic_asm "__syncthreads()"; case metal: __intrinsic_asm "threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture | mem_flags::mem_threadgroup_imageblock)"; case spirv: spirv_asm { OpControlBarrier Workgroup Device AcquireRelease|UniformMemory|WorkgroupMemory|ImageMemory; }; } } // Returns the workgroup size of the calling entry point. [require(compute)] __intrinsic_op($(kIROp_GetWorkGroupSize)) int3 WorkgroupSize(); // Test if any components is non-zero (HLSL SM 1.0) __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv)] bool any(T x) { __target_switch { default: __intrinsic_asm "bool($0)"; case hlsl: __intrinsic_asm "any"; case metal: __intrinsic_asm "any"; case spirv: let zero = __default(); if (__isInt()) return spirv_asm { OpINotEqual $$bool result $x $zero }; else if (__isFloat()) return spirv_asm { OpFUnordNotEqual $$bool result $x $zero }; else if (__isBool()) return __slang_noop_cast(x); return false; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv)] bool any(vector x) { if(N == 1) return any(x[0]); __target_switch { case hlsl: __intrinsic_asm "any"; case metal: __intrinsic_asm "any"; case glsl: __intrinsic_asm "any(bvec$N0($0))"; case spirv: if (__isBool()) return spirv_asm { OpAny $$bool result $x }; else if (__isInt()) { let zero = __default>(); return spirv_asm { OpINotEqual $$vector %castResult $x $zero; OpAny $$bool result %castResult }; } else { let zero = __default>(); return spirv_asm { OpFUnordNotEqual $$vector %castResult $x $zero; OpAny $$bool result %castResult }; } default: bool result = false; for(int i = 0; i < N; ++i) result = result || any(x[i]); return result; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_spirv)] bool any(matrix x) { __target_switch { case hlsl: __intrinsic_asm "any"; default: bool result = false; for(int i = 0; i < N; ++i) result = result || any(x[i]); return result; } } // Reinterpret bits as a double (HLSL SM 5.0) __glsl_extension(GL_ARB_gpu_shader5) [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] double asdouble(uint lowbits, uint highbits) { __target_switch { case hlsl: __intrinsic_asm "asdouble"; case glsl: __intrinsic_asm "packDouble2x32(uvec2($0, $1))"; case cpp: __intrinsic_asm "$P_asdouble($0, $1)"; case cuda: __intrinsic_asm "$P_asdouble($0, $1)"; case spirv: return spirv_asm { %v:$$uint2 = OpCompositeConstruct $lowbits $highbits; result:$$double = OpExtInst glsl450 59 %v }; } } __glsl_extension(GL_ARB_gpu_shader5) [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] double2 asdouble(uint2 lowbits, uint2 highbits) { __target_switch { case hlsl: __intrinsic_asm "asdouble($0, $1)"; default: return double2(asdouble(lowbits.x, highbits.x), asdouble(lowbits.y, highbits.y)); } } // Reinterpret bits as a float (HLSL SM 4.0) [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] float asfloat(int x) { __target_switch { case cpp: __intrinsic_asm "$P_asfloat($0)"; case cuda: __intrinsic_asm "$P_asfloat($0)"; case glsl: __intrinsic_asm "intBitsToFloat"; case hlsl: __intrinsic_asm "asfloat"; case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$float result $x }; } } [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] float asfloat(uint x) { __target_switch { case cpp: __intrinsic_asm "$P_asfloat($0)"; case cuda: __intrinsic_asm "$P_asfloat($0)"; case glsl: __intrinsic_asm "uintBitsToFloat"; case hlsl: __intrinsic_asm "asfloat"; case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$float result $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] vector asfloat(vector< int, N> x) { __target_switch { case glsl: __intrinsic_asm "intBitsToFloat"; case hlsl: __intrinsic_asm "asfloat"; case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$vector result $x }; default: VECTOR_MAP_UNARY(float, N, asfloat, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] vector asfloat(vector x) { __target_switch { case glsl: __intrinsic_asm "uintBitsToFloat"; case hlsl: __intrinsic_asm "asfloat"; case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$vector result $x }; default: VECTOR_MAP_UNARY(float, N, asfloat, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] matrix asfloat(matrix< int,N,M> x) { __target_switch { case hlsl: __intrinsic_asm "asfloat"; default: MATRIX_MAP_UNARY(float, N, M, asfloat, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] matrix asfloat(matrix x) { __target_switch { case hlsl: __intrinsic_asm "asfloat"; default: MATRIX_MAP_UNARY(float, N, M, asfloat, x); } } // No op [__unsafeForceInlineEarly] [__readNone] float asfloat(float x) { return x; } __generic [__unsafeForceInlineEarly] [__readNone] vector asfloat(vector x) { return x; } __generic [__unsafeForceInlineEarly] [__readNone] matrix asfloat(matrix x) { return x; } // Inverse sine (HLSL SM 1.0) __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T asin(T x) { __target_switch { case cpp: __intrinsic_asm "$P_asin($0)"; case cuda: __intrinsic_asm "$P_asin($0)"; case glsl: __intrinsic_asm "asin"; case hlsl: __intrinsic_asm "asin"; case metal: __intrinsic_asm "asin"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Asin $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector asin(vector x) { __target_switch { case glsl: __intrinsic_asm "asin"; case hlsl: __intrinsic_asm "asin"; case metal: __intrinsic_asm "asin"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Asin $x }; default: VECTOR_MAP_UNARY(T,N,asin,x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix asin(matrix x) { __target_switch { case hlsl: __intrinsic_asm "asin"; default: MATRIX_MAP_UNARY(T,N,M,asin,x); } } // Inverse hyperbolic sine __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T asinh(T x) { __target_switch { case cpp: __intrinsic_asm "$P_asinh($0)"; case cuda: __intrinsic_asm "$P_asinh($0)"; case glsl: __intrinsic_asm "asinh"; case metal: __intrinsic_asm "asinh"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Asinh $x }; default: return log(x + sqrt(x * x + T(1))); } } __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector asinh(vector x) { __target_switch { case glsl: __intrinsic_asm "asinh"; case metal: __intrinsic_asm "asinh"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Asinh $x }; default: VECTOR_MAP_UNARY(T, N, asinh, x); } } // Reinterpret bits as an int (HLSL SM 4.0) [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] int asint(float x) { __target_switch { case cpp: __intrinsic_asm "$P_asint($0)"; case cuda: __intrinsic_asm "$P_asint($0)"; case glsl: __intrinsic_asm "floatBitsToInt"; case hlsl: __intrinsic_asm "asint"; case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$int result $x }; } } [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] int asint(uint x) { __target_switch { case cpp: __intrinsic_asm "$P_asint($0)"; case cuda: __intrinsic_asm "$P_asint($0)"; case glsl: __intrinsic_asm "int($0)"; case hlsl: __intrinsic_asm "asint"; case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$int result $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] vector asint(vector x) { __target_switch { case glsl: __intrinsic_asm "floatBitsToInt"; case hlsl: __intrinsic_asm "asint"; case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$vector result $x }; default: VECTOR_MAP_UNARY(int, N, asint, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] vector asint(vector x) { if(N == 1) return vector(asint(x[0])); __target_switch { case glsl: __intrinsic_asm "ivec$N0($0)"; case hlsl: __intrinsic_asm "asint"; case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$vector result $x }; default: VECTOR_MAP_UNARY(int, N, asint, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] matrix asint(matrix x) { __target_switch { case hlsl: __intrinsic_asm "asint"; default: MATRIX_MAP_UNARY(int, N, M, asint, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] matrix asint(matrix x) { __target_switch { case hlsl: __intrinsic_asm "asint"; default: MATRIX_MAP_UNARY(int, N, M, asint, x); } } // No op [__unsafeForceInlineEarly] [__readNone] int asint(int x) { return x; } __generic [__unsafeForceInlineEarly] [__readNone] vector asint(vector x) { return x; } __generic [__unsafeForceInlineEarly] [__readNone] matrix asint(matrix x) { return x; } // Reinterpret bits of double as a uint (HLSL SM 5.0) __glsl_extension(GL_ARB_gpu_shader5) [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] void asuint(double value, out uint lowbits, out uint highbits) { __target_switch { case hlsl: __intrinsic_asm "asuint"; case glsl: __intrinsic_asm "{ uvec2 v = unpackDouble2x32($0); $1 = v.x; $2 = v.y; }"; case cpp: case cuda: __intrinsic_asm "$P_asuint($0, $1, $2)"; case spirv: let uv = spirv_asm { result : $$uint2 = OpBitcast $value; }; lowbits = uv.x; highbits = uv.y; return; } } // Reinterpret bits as a uint (HLSL SM 4.0) [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] uint asuint(float x) { __target_switch { case cpp: __intrinsic_asm "$P_asuint($0)"; case cuda: __intrinsic_asm "$P_asuint($0)"; case glsl: __intrinsic_asm "floatBitsToUint"; case hlsl: __intrinsic_asm "asuint"; case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$uint result $x }; } } [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] uint asuint(int x) { __target_switch { case cpp: __intrinsic_asm "$P_asuint($0)"; case cuda: __intrinsic_asm "$P_asuint($0)"; case glsl: __intrinsic_asm "uint($0)"; case hlsl: __intrinsic_asm "asuint"; case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$uint result $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] vector asuint(vector x) { __target_switch { case glsl: __intrinsic_asm "floatBitsToUint"; case hlsl: __intrinsic_asm "asuint"; case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$vector result $x }; default: VECTOR_MAP_UNARY(uint, N, asuint, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] vector asuint(vector x) { if(N == 1) return vector(asuint(x[0])); __target_switch { case glsl: __intrinsic_asm "uvec$N0($0)"; case hlsl: __intrinsic_asm "asuint"; case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$vector result $x }; default: VECTOR_MAP_UNARY(uint, N, asuint, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] matrix asuint(matrix x) { __target_switch { case hlsl: __intrinsic_asm "asuint"; default: MATRIX_MAP_UNARY(uint, N, M, asuint, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_4_0)] matrix asuint(matrix x) { __target_switch { case hlsl: __intrinsic_asm "asuint"; default: MATRIX_MAP_UNARY(uint, N, M, asuint, x); } } [__unsafeForceInlineEarly] [__readNone] uint asuint(uint x) { return x; } __generic [__unsafeForceInlineEarly] [__readNone] vector asuint(vector x) { return x; } __generic [__unsafeForceInlineEarly] [__readNone] matrix asuint(matrix x) { return x; } // 16-bit bitcast ops (HLSL SM 6.2) // // TODO: We need to map these to GLSL/SPIR-V // operations that don't require an intermediate // conversion to fp32. // Identity cases: [__unsafeForceInlineEarly][__readNone] float16_t asfloat16(float16_t value) { return value; } [__unsafeForceInlineEarly][__readNone] vector asfloat16(vector value) { return value; } [__unsafeForceInlineEarly][__readNone] matrix asfloat16(matrix value) { return value; } [__unsafeForceInlineEarly][__readNone] int16_t asint16(int16_t value) { return value; } [__unsafeForceInlineEarly][__readNone] vector asint16(vector value) { return value; } [__unsafeForceInlineEarly][__readNone] matrix asint16(matrix value) { return value; } [__unsafeForceInlineEarly][__readNone] uint16_t asuint16(uint16_t value) { return value; } [__unsafeForceInlineEarly][__readNone] vector asuint16(vector value) { return value; } [__unsafeForceInlineEarly][__readNone] matrix asuint16(matrix value) { return value; } // Signed<->unsigned cases: [__unsafeForceInlineEarly][__readNone] int16_t asint16(uint16_t value) { return value; } [__unsafeForceInlineEarly][__readNone] vector asint16(vector value) { return value; } [__unsafeForceInlineEarly][__readNone] matrix asint16(matrix value) { return value; } [__unsafeForceInlineEarly][__readNone] uint16_t asuint16(int16_t value) { return value; } [__unsafeForceInlineEarly][__readNone] vector asuint16(vector value) { return value; } [__unsafeForceInlineEarly][__readNone] matrix asuint16(matrix value) { return value; } // Float->unsigned cases: [__readNone] [require(cuda_glsl_hlsl_spirv, shader5_sm_5_0)] uint16_t asuint16(float16_t value) { __target_switch { case cuda: __intrinsic_asm "__half_as_ushort"; case glsl: __intrinsic_asm "uint16_t(packHalf2x16(vec2($0, 0.0)))"; case hlsl: __intrinsic_asm "asuint16"; case spirv: return spirv_asm { OpBitcast $$uint16_t result $value }; } } [__readNone] [require(cuda_glsl_hlsl_spirv, shader5_sm_5_0)] vector asuint16(vector value) { __target_switch { case hlsl: __intrinsic_asm "asuint16"; case spirv: return spirv_asm { result:$$vector = OpBitcast $value }; default: VECTOR_MAP_UNARY(uint16_t, N, asuint16, value); } } [__readNone] [require(cuda_glsl_hlsl_spirv, shader5_sm_5_0)] matrix asuint16(matrix value) { MATRIX_MAP_UNARY(uint16_t, R, C, asuint16, value); } // Unsigned->float cases: [__readNone] [require(cuda_glsl_hlsl_spirv, shader5_sm_5_0)] float16_t asfloat16(uint16_t value) { __target_switch { case cuda: __intrinsic_asm "__ushort_as_half"; case glsl: __intrinsic_asm "float16_t(unpackHalf2x16($0).x)"; case hlsl: __intrinsic_asm "asfloat16"; case spirv: return spirv_asm { OpBitcast $$float16_t result $value }; } } [__readNone] [require(cuda_glsl_hlsl_spirv, shader5_sm_5_0)] vector asfloat16(vector value) { __target_switch { case hlsl: __intrinsic_asm "asfloat16"; case spirv: return spirv_asm { result:$$vector = OpBitcast $value }; default: VECTOR_MAP_UNARY(float16_t, N, asfloat16, value); } } [__readNone] matrix asfloat16(matrix value) { MATRIX_MAP_UNARY(float16_t, R, C, asfloat16, value); } // Float<->signed cases: [__unsafeForceInlineEarly] [__readNone] [require(cuda_hlsl_metal_spirv, shader5_sm_5_0)] int16_t asint16(float16_t value) { __target_switch { case cuda: __intrinsic_asm "__half_as_short"; case hlsl: __intrinsic_asm "asint16"; case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$int16_t result $value }; default: return asuint16(value); } } [__unsafeForceInlineEarly] [__readNone] [require(cuda_hlsl_metal_spirv, shader5_sm_5_0)] vector asint16(vector value) { __target_switch { case hlsl: __intrinsic_asm "asint16"; case metal: __intrinsic_asm "as_type<$TR>($0)"; default: return asuint16(value); } } [__unsafeForceInlineEarly] [__readNone] [require(cuda_hlsl_spirv, shader5_sm_5_0)] matrix asint16(matrix value) { __target_switch { case hlsl: __intrinsic_asm "asint16"; default: return asuint16(value); } } [__readNone] [__unsafeForceInlineEarly] [require(cuda_hlsl_metal_spirv, shader5_sm_5_0)] float16_t asfloat16(int16_t value) { __target_switch { case cuda: __intrinsic_asm "__short_as_half"; case hlsl: __intrinsic_asm "asfloat16"; case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$float16_t result $value }; default: return asfloat16(asuint16(value)); } } [__unsafeForceInlineEarly] [__readNone] [require(cuda_hlsl_metal_spirv, shader5_sm_5_0)] vector asfloat16(vector value) { __target_switch { case hlsl: __intrinsic_asm "asfloat16"; case metal: __intrinsic_asm "as_type<$TR>($0)"; case spirv: return spirv_asm { OpBitcast $$vector result $value }; default: return asfloat16(asuint16(value)); } } [__unsafeForceInlineEarly] [__readNone] [require(cuda_hlsl_spirv, shader5_sm_5_0)] matrix asfloat16(matrix value) { __target_switch { case hlsl: __intrinsic_asm "asfloat16"; default: return asfloat16(asuint16(value)); } } // Inverse tangent (HLSL SM 1.0) __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T atan(T x) { __target_switch { case cpp: __intrinsic_asm "$P_atan($0)"; case cuda: __intrinsic_asm "$P_atan($0)"; case glsl: __intrinsic_asm "atan"; case hlsl: __intrinsic_asm "atan"; case metal: __intrinsic_asm "atan"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Atan $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector atan(vector x) { __target_switch { case glsl: __intrinsic_asm "atan"; case hlsl: __intrinsic_asm "atan"; case metal: __intrinsic_asm "atan"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Atan $x }; default: VECTOR_MAP_UNARY(T, N, atan, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix atan(matrix x) { __target_switch { case hlsl: __intrinsic_asm "atan"; default: MATRIX_MAP_UNARY(T, N, M, atan, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T atan2(T y, T x) { __target_switch { case cpp: __intrinsic_asm "$P_atan2($0, $1)"; case cuda: __intrinsic_asm "$P_atan2($0, $1)"; case glsl: __intrinsic_asm "atan($0,$1)"; case hlsl: __intrinsic_asm "atan2"; case metal: __intrinsic_asm "atan2"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Atan2 $y $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector atan2(vector y, vector x) { __target_switch { case glsl: __intrinsic_asm "atan($0,$1)"; case hlsl: __intrinsic_asm "atan2"; case metal: __intrinsic_asm "atan2"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Atan2 $y $x }; default: VECTOR_MAP_BINARY(T, N, atan2, y, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix atan2(matrix y, matrix x) { __target_switch { case hlsl: __intrinsic_asm "atan2"; default: MATRIX_MAP_BINARY(T, N, M, atan2, y, x); } } // Hyperbolic inverse tangent __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T atanh(T x) { __target_switch { case cpp: __intrinsic_asm "$P_atanh($0)"; case cuda: __intrinsic_asm "$P_atanh($0)"; case glsl: __intrinsic_asm "atanh"; case metal: __intrinsic_asm "atanh"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Atanh $x }; default: return T(0.5) * log((T(1) + x) / (T(1) - x)); } } __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector atanh(vector x) { __target_switch { case glsl: __intrinsic_asm "atanh"; case metal: __intrinsic_asm "atanh"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Atanh $x }; default: VECTOR_MAP_UNARY(T, N, atanh, x); } } // Ceiling (HLSL SM 1.0) __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T ceil(T x) { __target_switch { case cpp: __intrinsic_asm "$P_ceil($0)"; case cuda: __intrinsic_asm "$P_ceil($0)"; case glsl: __intrinsic_asm "ceil"; case hlsl: __intrinsic_asm "ceil"; case metal: __intrinsic_asm "ceil"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Ceil $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector ceil(vector x) { __target_switch { case glsl: __intrinsic_asm "ceil"; case hlsl: __intrinsic_asm "ceil"; case metal: __intrinsic_asm "ceil"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Ceil $x }; default: VECTOR_MAP_UNARY(T, N, ceil, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix ceil(matrix x) { __target_switch { case hlsl: __intrinsic_asm "ceil"; default: MATRIX_MAP_UNARY(T, N, M, ceil, x); } } // Copy-sign __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv)] vector copysign_half(vector x, vector y) { let ux = reinterpret>(x); let uy = reinterpret>(y); vector signY = (uy & (uint16_t(1) << uint16_t(15))); vector newX = (ux & ((uint16_t(1) << uint16_t(15)) - uint16_t(1))) + signY; return reinterpret>(newX); } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv)] vector copysign_float(vector x, vector y) { let ux = reinterpret>(x); let uy = reinterpret>(y); vector signY = (uy & (uint32_t(1) << uint32_t(31))); vector newX = (ux & ((uint32_t(1) << uint32_t(31)) - uint32_t(1))) + signY; return reinterpret>(newX); } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv)] vector copysign_double(vector x, vector y) { let ux = reinterpret>(x); let uy = reinterpret>(y); vector signY = (uy & (uint64_t(1) << uint64_t(63))); vector newX = (ux & ((uint64_t(1) << uint64_t(63)) - uint64_t(1))) + signY; return reinterpret>(newX); } __generic __intrinsic_op($(kIROp_FloatCast)) vector __real_cast(vector val); __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv)] vector copysign(vector x, vector y) { __target_switch { case metal: __intrinsic_asm "copysign"; default: { // sign of -0.0 needs to be respected. if (T is half) return __real_cast(copysign_half( __real_cast(x), __real_cast(y))); if (T is float) return __real_cast(copysign_float( __real_cast(x), __real_cast(y))); return __real_cast(copysign_double( __real_cast(x), __real_cast(y))); } } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv)] T copysign(T x, T y) { __target_switch { case metal: __intrinsic_asm "copysign"; default: return copysign(vector(x), vector(y))[0]; } } // Check access status to tiled resource [ForceInline] [require(hlsl, sm_5_0)] bool CheckAccessFullyMapped(out uint status) { __target_switch { case hlsl: __intrinsic_asm "CheckAccessFullyMapped"; } } // Clamp (HLSL SM 1.0) __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T clamp(T x, T minBound, T maxBound) { __target_switch { case hlsl: __intrinsic_asm "clamp"; case glsl: __intrinsic_asm "clamp"; case metal: __intrinsic_asm "clamp"; case spirv: if (__isSignedInt()) return spirv_asm { result:$$T = OpExtInst glsl450 SClamp $x $minBound $maxBound }; else return spirv_asm { result:$$T = OpExtInst glsl450 UClamp $x $minBound $maxBound }; default: return min(max(x, minBound), maxBound); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector clamp(vector x, vector minBound, vector maxBound) { __target_switch { case hlsl: __intrinsic_asm "clamp"; case glsl: __intrinsic_asm "clamp"; case metal: __intrinsic_asm "clamp"; case spirv: if (__isSignedInt()) return spirv_asm { result:$$vector = OpExtInst glsl450 SClamp $x $minBound $maxBound }; else return spirv_asm { result:$$vector = OpExtInst glsl450 UClamp $x $minBound $maxBound }; default: return min(max(x, minBound), maxBound); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix clamp(matrix x, matrix minBound, matrix maxBound) { __target_switch { case hlsl: __intrinsic_asm "clamp"; default: return min(max(x, minBound), maxBound); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T clamp(T x, T minBound, T maxBound) { __target_switch { case hlsl: __intrinsic_asm "clamp"; case glsl: __intrinsic_asm "clamp"; case metal: __intrinsic_asm "clamp"; case spirv: return spirv_asm { result:$$T = OpExtInst glsl450 FClamp $x $minBound $maxBound }; default: return min(max(x, minBound), maxBound); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector clamp(vector x, vector minBound, vector maxBound) { __target_switch { case hlsl: __intrinsic_asm "clamp"; case glsl: __intrinsic_asm "clamp"; case metal: __intrinsic_asm "clamp"; case spirv: return spirv_asm { result:$$vector = OpExtInst glsl450 FClamp $x $minBound $maxBound }; default: return min(max(x, minBound), maxBound); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix clamp(matrix x, matrix minBound, matrix maxBound) { __target_switch { case hlsl: __intrinsic_asm "clamp"; default: return min(max(x, minBound), maxBound); } } // Clip (discard) fragment conditionally __generic [require(cpp_cuda_glsl_hlsl_spirv, fragment)] void clip(T x) { __target_switch { case hlsl: __intrinsic_asm "clip"; default: if(x < T(0)) discard; } } __generic [require(cpp_cuda_glsl_hlsl_spirv, fragment)] void clip(vector x) { __target_switch { case hlsl: __intrinsic_asm "clip"; default: if(any(x < T(0))) discard; } } __generic [require(cpp_cuda_glsl_hlsl_spirv, fragment)] void clip(matrix x) { __target_switch { case hlsl: __intrinsic_asm "clip"; default: if(any(x < T(0))) discard; } } // Cosine __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T cos(T x) { __target_switch { case cpp: __intrinsic_asm "$P_cos($0)"; case cuda: __intrinsic_asm "$P_cos($0)"; case glsl: __intrinsic_asm "cos"; case hlsl: __intrinsic_asm "cos"; case metal: __intrinsic_asm "cos"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Cos $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector cos(vector x) { __target_switch { case glsl: __intrinsic_asm "cos"; case hlsl: __intrinsic_asm "cos"; case metal: __intrinsic_asm "cos"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Cos $x }; default: VECTOR_MAP_UNARY(T,N, cos, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix cos(matrix x) { __target_switch { case hlsl: __intrinsic_asm "cos"; default: MATRIX_MAP_UNARY(T, N, M, cos, x); } } // Hyperbolic cosine __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T cosh(T x) { __target_switch { case cpp: __intrinsic_asm "$P_cosh($0)"; case cuda: __intrinsic_asm "$P_cosh($0)"; case glsl: __intrinsic_asm "cosh"; case hlsl: __intrinsic_asm "cosh"; case metal: __intrinsic_asm "cosh"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Cosh $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector cosh(vector x) { __target_switch { case glsl: __intrinsic_asm "cosh"; case hlsl: __intrinsic_asm "cosh"; case metal: __intrinsic_asm "cosh"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Cosh $x }; default: VECTOR_MAP_UNARY(T,N, cosh, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix cosh(matrix x) { __target_switch { case hlsl: __intrinsic_asm "cosh"; default: MATRIX_MAP_UNARY(T, N, M, cosh, x); } } // Cosine degree __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T cospi(T x) { __target_switch { case metal: __intrinsic_asm "cospi"; default: return cos(T.getPi() * x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector cospi(vector x) { __target_switch { case metal: __intrinsic_asm "cospi"; default: return cos(T.getPi() * x); } } // Population count [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] uint countbits(uint value) { __target_switch { case hlsl: __intrinsic_asm "countbits"; case glsl: __intrinsic_asm "bitCount"; case metal: __intrinsic_asm "popcount"; case cuda: case cpp: __intrinsic_asm "$P_countbits($0)"; case spirv: return spirv_asm {OpBitCount $$uint result $value}; } } __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector countbits(vector value) { __target_switch { case hlsl: __intrinsic_asm "countbits"; case glsl: __intrinsic_asm "bitCount"; case metal: __intrinsic_asm "popcount"; case spirv: return spirv_asm {OpBitCount $$vector result $value}; default: VECTOR_MAP_UNARY(uint, N, countbits, value); } } // Cross product // TODO: SPIRV does not support integer vectors. __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector cross(vector left, vector right) { __target_switch { case glsl: __intrinsic_asm "cross"; case hlsl: __intrinsic_asm "cross"; case metal: __intrinsic_asm "cross"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Cross $left $right }; default: return vector( left.y * right.z - left.z * right.y, left.z * right.x - left.x * right.z, left.x * right.y - left.y * right.x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector cross(vector left, vector right) { __target_switch { case glsl: __intrinsic_asm "cross"; case hlsl: __intrinsic_asm "cross"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Cross $left $right }; default: return vector( left.y * right.z - left.z * right.y, left.z * right.x - left.x * right.z, left.x * right.y - left.y * right.x); } } // Convert encoded color [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] int4 D3DCOLORtoUBYTE4(float4 color) { __target_switch { case hlsl: __intrinsic_asm "D3DCOLORtoUBYTE4"; default: let scaled = color.zyxw * 255.001999f; return int4(scaled); } } // Partial-difference derivatives ${{{{ const char* diffDimensions[2] = {"x", "y"}; for (auto xOrY : diffDimensions) { }}}} __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, fragmentprocessing)] T dd$(xOrY)(T x) { __requireComputeDerivative(); __target_switch { case hlsl: case cpp: case cuda: __intrinsic_asm "dd$(xOrY)"; case glsl: __intrinsic_asm "dFd$(xOrY)"; case metal: __intrinsic_asm "dfd$(xOrY)"; case spirv: return spirv_asm {OpDPd$(xOrY) $$T result $x}; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, fragmentprocessing)] vector dd$(xOrY)(vector x) { __requireComputeDerivative(); __target_switch { case hlsl: case cpp: case cuda: __intrinsic_asm "dd$(xOrY)"; case glsl: __intrinsic_asm "dFd$(xOrY)"; case metal: __intrinsic_asm "dfd$(xOrY)"; case spirv: return spirv_asm {OpDPd$(xOrY) $$vector result $x}; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, fragmentprocessing)] matrix dd$(xOrY)(matrix x) { __requireComputeDerivative(); __target_switch { case hlsl: __intrinsic_asm "dd$(xOrY)"; default: MATRIX_MAP_UNARY(T, N, M, dd$(xOrY), x); } } __generic __glsl_extension(GL_ARB_derivative_control) [__readNone] [require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] T dd$(xOrY)_coarse(T x) { __requireComputeDerivative(); __target_switch { case hlsl: __intrinsic_asm "dd$(xOrY)_coarse"; case glsl: __intrinsic_asm "dFd$(xOrY)Coarse"; case spirv: return spirv_asm {OpCapability DerivativeControl; result:$$T = OpDPd$(xOrY)Coarse $x}; } } __generic __glsl_extension(GL_ARB_derivative_control) [__readNone] [require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] vector dd$(xOrY)_coarse(vector x) { __requireComputeDerivative(); __target_switch { case hlsl: __intrinsic_asm "dd$(xOrY)_coarse"; case glsl: __intrinsic_asm "dFd$(xOrY)Coarse"; case spirv: return spirv_asm {OpCapability DerivativeControl; result:$$vector = OpDPd$(xOrY)Coarse $x}; } } __generic [__readNone] [require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] matrix dd$(xOrY)_coarse(matrix x) { __requireComputeDerivative(); __target_switch { case hlsl: __intrinsic_asm "dd$(xOrY)_coarse"; default: MATRIX_MAP_UNARY(T, N, M, dd$(xOrY)_coarse, x); } } __generic __glsl_extension(GL_ARB_derivative_control) [__readNone] [require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] T dd$(xOrY)_fine(T x) { __requireComputeDerivative(); __target_switch { case hlsl: __intrinsic_asm "dd$(xOrY)_fine"; case glsl: __intrinsic_asm "dFd$(xOrY)Fine"; case spirv: return spirv_asm {OpCapability DerivativeControl; result:$$T = OpDPd$(xOrY)Fine $x}; } } __generic __glsl_extension(GL_ARB_derivative_control) [__readNone] [require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] vector dd$(xOrY)_fine(vector x) { __requireComputeDerivative(); __target_switch { case hlsl: __intrinsic_asm "dd$(xOrY)_fine"; case glsl: __intrinsic_asm "dFd$(xOrY)Fine"; case spirv: return spirv_asm {OpCapability DerivativeControl; result:$$vector = OpDPd$(xOrY)Fine $x}; } } __generic [__readNone] [require(glsl_hlsl_spirv, fragmentprocessing_derivativecontrol)] matrix dd$(xOrY)_fine(matrix x) { __requireComputeDerivative(); __target_switch { case hlsl: __intrinsic_asm "dd$(xOrY)_fine"; default: MATRIX_MAP_UNARY(T, N, M, dd$(xOrY)_fine, x); } } ${{{{ } // for (xOrY) }}}} // Radians to degrees __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv)] T degrees(T x) { __target_switch { case glsl: __intrinsic_asm "degrees"; case hlsl: __intrinsic_asm "degrees"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Degrees $x }; default: return x * (T(180) / T.getPi()); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv)] vector degrees(vector x) { __target_switch { case glsl: __intrinsic_asm "degrees"; case hlsl: __intrinsic_asm "degrees"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Degrees $x }; default: VECTOR_MAP_UNARY(T, N, degrees, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv)] matrix degrees(matrix x) { __target_switch { case hlsl: __intrinsic_asm "degrees"; default: MATRIX_MAP_UNARY(T, N, M, degrees, x); } } // Matrix determinant __generic [__readNone] [PreferCheckpoint] [require(glsl_hlsl_metal_spirv)] T determinant(matrix m) { __target_switch { case glsl: __intrinsic_asm "determinant"; case hlsl: __intrinsic_asm "determinant"; case metal: __intrinsic_asm "determinant"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Determinant $m }; } } // Barrier for device memory __glsl_extension(GL_KHR_memory_scope_semantics) [require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void DeviceMemoryBarrier() { __target_switch { case hlsl: __intrinsic_asm "DeviceMemoryBarrier"; case glsl: __intrinsic_asm "memoryBarrier(gl_ScopeDevice, (gl_StorageSemanticsImage|gl_StorageSemanticsBuffer), gl_SemanticsAcquireRelease)"; case cuda: __intrinsic_asm "__threadfence()"; case metal: __intrinsic_asm "threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture | mem_flags::mem_threadgroup_imageblock)"; case spirv: spirv_asm { OpMemoryBarrier Device AcquireRelease|UniformMemory|ImageMemory; }; } } __glsl_extension(GL_KHR_memory_scope_semantics) [require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void DeviceMemoryBarrierWithGroupSync() { __target_switch { case hlsl: __intrinsic_asm "DeviceMemoryBarrierWithGroupSync"; case glsl: __intrinsic_asm "controlBarrier(gl_ScopeWorkgroup, gl_ScopeDevice, (gl_StorageSemanticsImage|gl_StorageSemanticsBuffer), gl_SemanticsAcquireRelease)"; case cuda: __intrinsic_asm "__syncthreads()"; case metal: __intrinsic_asm "threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture | mem_flags::mem_threadgroup_imageblock)"; case spirv: spirv_asm { OpControlBarrier Workgroup Device AcquireRelease|UniformMemory|ImageMemory; }; } } // Vector distance __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T distance(vector x, vector y) { __target_switch { case glsl: __intrinsic_asm "distance"; case hlsl: __intrinsic_asm "distance"; case metal: __intrinsic_asm "distance"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Distance $x $y }; default: return length(x - y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T distance(T x, T y) { __target_switch { case glsl: __intrinsic_asm "distance"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Distance $x $y }; default: return length(x - y); } } // fdim __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fdim(T x, T y) { __target_switch { case metal: __intrinsic_asm "fdim"; default: return max(T(0), x - y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fdim(vector x, vector y) { __target_switch { case metal: __intrinsic_asm "fdim"; default: return max(T(0), x - y); } } // divide __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv)] T divide(T x, T y) { __target_switch { case metal: __intrinsic_asm "divide"; default: return x / y; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv)] vector divide(vector x, vector y) { __target_switch { case metal: __intrinsic_asm "divide"; default: return x / y; } } // Vector dot product __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T dot(T x, T y) { __target_switch { case glsl: __intrinsic_asm "dot"; case hlsl: __intrinsic_asm "dot"; default: return x * y; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T dot(vector x, vector y) { __target_switch { case glsl: __intrinsic_asm "dot"; case hlsl: __intrinsic_asm "dot"; case metal: __intrinsic_asm "dot"; case spirv: return spirv_asm { OpDot $$T result $x $y }; default: T result = T(0); for(int i = 0; i < N; ++i) result += x[i] * y[i]; return result; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T dot(vector x, vector y) { __target_switch { case hlsl: __intrinsic_asm "dot"; default: T result = T(0); for(int i = 0; i < N; ++i) result += x[i] * y[i]; return result; } } // Helper for computing distance terms for lighting (obsolete) __generic vector dst(vector x, vector y); // Given a RWByteAddressBuffer allow it to be interpreted as a RWStructuredBuffer __intrinsic_op($(kIROp_GetEquivalentStructuredBuffer)) RWStructuredBuffer __getEquivalentStructuredBuffer(RWByteAddressBuffer b); __intrinsic_op($(kIROp_GetEquivalentStructuredBuffer)) StructuredBuffer __getEquivalentStructuredBuffer(ByteAddressBuffer b); __intrinsic_op($(kIROp_GetEquivalentStructuredBuffer)) RasterizerOrderedStructuredBuffer __getEquivalentStructuredBuffer(RasterizerOrderedByteAddressBuffer b); // Error message // void errorf( string format, ... ); // Attribute evaluation // TODO: The matrix cases of these functions won't actuall work // when compiled to GLSL, since they only support scalar/vector // TODO: Should these be constrains to `__BuiltinFloatingPointType`? // TODO: SPIRV-direct does not support non-floating-point types. __generic [__readNone] [require(glsl_spirv, fragmentprocessing)] T EvaluateAttributeAtCentroid(T x) { __target_switch { case glsl: __intrinsic_asm "interpolateAtCentroid"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 InterpolateAtCentroid $x }; } } __generic [__readNone] [require(glsl_spirv, fragmentprocessing)] vector EvaluateAttributeAtCentroid(vector x) { __target_switch { case glsl: __intrinsic_asm "interpolateAtCentroid"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 InterpolateAtCentroid $x }; } } __generic [__readNone] [require(glsl_spirv, fragmentprocessing)] matrix EvaluateAttributeAtCentroid(matrix x) { __target_switch { case glsl: __intrinsic_asm "interpolateAtCentroid"; default: MATRIX_MAP_UNARY(T, N, M, EvaluateAttributeAtCentroid, x); } } __generic [__readNone] [require(glsl_spirv, fragmentprocessing)] T EvaluateAttributeAtSample(T x, uint sampleindex) { __target_switch { case glsl: __intrinsic_asm "interpolateAtSample($0, int($1))"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 InterpolateAtSample $x $sampleindex }; } } __generic [__readNone] [require(glsl_spirv, fragmentprocessing)] vector EvaluateAttributeAtSample(vector x, uint sampleindex) { __target_switch { case glsl: __intrinsic_asm "interpolateAtSample($0, int($1))"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 InterpolateAtSample $x $sampleindex }; } } __generic [__readNone] [require(glsl_spirv, fragmentprocessing)] matrix EvaluateAttributeAtSample(matrix x, uint sampleindex) { __target_switch { case glsl: __intrinsic_asm "interpolateAtSample($0, int($1))"; default: matrix result; for(int i = 0; i < N; ++i) { result[i] = EvaluateAttributeAtSample(x[i], sampleindex); } return result; } } __generic [__readNone] [require(glsl_spirv, fragmentprocessing)] T EvaluateAttributeSnapped(T x, int2 offset) { __target_switch { case glsl: __intrinsic_asm "interpolateAtOffset($0, vec2($1) / 16.0f)"; case spirv: { const float2 tmp = float2(16.f, 16.f); return spirv_asm { %foffset:$$float2 = OpConvertSToF $offset; %offsetdiv16:$$float2 = OpFDiv %foffset $tmp; result:$$T = OpExtInst glsl450 InterpolateAtOffset $x %offsetdiv16 }; } } } __generic [__readNone] [require(glsl_spirv, fragmentprocessing)] vector EvaluateAttributeSnapped(vector x, int2 offset) { __target_switch { case glsl: __intrinsic_asm "interpolateAtOffset($0, vec2($1) / 16.0f)"; case spirv: { const float2 tmp = float2(16.f, 16.f); return spirv_asm { %foffset:$$float2 = OpConvertSToF $offset; %offsetdiv16:$$float2 = OpFDiv %foffset $tmp; result:$$vector = OpExtInst glsl450 InterpolateAtOffset $x %offsetdiv16 }; } } } __generic [__readNone] [require(glsl_spirv, fragmentprocessing)] matrix EvaluateAttributeSnapped(matrix x, int2 offset) { __target_switch { case glsl: __intrinsic_asm "interpolateAtOffset($0, vec2($1) / 16.0f)"; default: matrix result; for(int i = 0; i < N; ++i) { result[i] = EvaluateAttributeSnapped(x[i], offset); } return result; } } // Base-e exponent __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T exp(T x) { __target_switch { case cpp: __intrinsic_asm "$P_exp($0)"; case cuda: __intrinsic_asm "$P_exp($0)"; case glsl: __intrinsic_asm "exp"; case hlsl: __intrinsic_asm "exp"; case metal: __intrinsic_asm "exp"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Exp $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector exp(vector x) { __target_switch { case glsl: __intrinsic_asm "exp"; case hlsl: __intrinsic_asm "exp"; case metal: __intrinsic_asm "exp"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Exp $x }; default: VECTOR_MAP_UNARY(T, N, exp, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix exp(matrix x) { __target_switch { case hlsl: __intrinsic_asm "exp"; default: MATRIX_MAP_UNARY(T, N, M, exp, x); } } // Base-2 exponent __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T exp2(T x) { __target_switch { case glsl: __intrinsic_asm "exp2($0)"; case spirv: if (__isHalf()) { return spirv_asm { OpExtInst $$T result glsl450 Exp2 $x }; } else { float xf = __realCast(x); return T(spirv_asm { result:$$float = OpExtInst glsl450 Exp2 $xf }); } case hlsl: __intrinsic_asm "exp2($0)"; case metal: __intrinsic_asm "exp2"; case cpp: __intrinsic_asm "$P_exp2($0)"; case cuda: __intrinsic_asm "$P_exp2($0)"; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector exp2(vector x) { __target_switch { case glsl: __intrinsic_asm "exp2($0)"; case hlsl: __intrinsic_asm "exp2"; case metal: __intrinsic_asm "exp2"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Exp2 $x }; default: VECTOR_MAP_UNARY(T, N, exp2, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix exp2(matrix x) { __target_switch { case hlsl: __intrinsic_asm "exp2"; default: MATRIX_MAP_UNARY(T, N, M, exp2, x); } } // Base-10 exponent __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T exp10(T x) { __target_switch { case metal: __intrinsic_asm "exp10"; default: const T ln10 = T(2.302585092994045901); // ln(10) return exp(x * ln10); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector exp10(vector x) { __target_switch { case metal: __intrinsic_asm "exp10"; default: const T ln10 = T(2.30258509299); // ln(10) return exp(x * ln10); } } // Convert 16-bit float stored in low bits of integer __glsl_version(420) __cuda_sm_version(6.0) [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] float f16tof32(uint value) { __target_switch { case glsl: __intrinsic_asm "unpackHalf2x16($0).x"; case hlsl: __intrinsic_asm "f16tof32($0)"; case cuda: __intrinsic_asm "__half2float(__ushort_as_half($0))"; case cpp: __intrinsic_asm "f16tof32($0)"; case metal: __intrinsic_asm "as_type((ushort)($0))"; case spirv: { return spirv_asm { %lowBits = OpUConvert $$uint16_t $value; %half = OpBitcast $$half %lowBits; result:$$float = OpFConvert %half }; } } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector f16tof32(vector value) { __target_switch { case hlsl: __intrinsic_asm "f16tof32"; case spirv: { return spirv_asm { %lowBits = OpUConvert $$vector $value; %half = OpBitcast $$vector %lowBits; result:$$vector = OpFConvert %half }; } default: VECTOR_MAP_UNARY(float, N, f16tof32, value); } } // Convert to 16-bit float stored in low bits of integer __glsl_version(420) __cuda_sm_version(6.0) [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] uint f32tof16(float value) { __target_switch { case glsl: __intrinsic_asm "packHalf2x16(vec2($0,0.0))"; case hlsl: __intrinsic_asm "f32tof16($0)"; case cuda: __intrinsic_asm "__half_as_ushort(__float2half($0))"; case cpp: __intrinsic_asm "f32tof16($0)"; case metal: __intrinsic_asm "as_type((half)($0))"; case spirv: { return spirv_asm { %half = OpFConvert $$half $value; %lowBits = OpBitcast $$uint16_t %half; result:$$uint = OpUConvert %lowBits }; } } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector f32tof16(vector value) { __target_switch { case hlsl: __intrinsic_asm "f32tof16"; case spirv: { return spirv_asm { %half = OpFConvert $$vector $value; %lowBits = OpBitcast $$vector %half; result:$$vector = OpUConvert %lowBits }; } default: VECTOR_MAP_UNARY(uint, N, f32tof16, value); } } // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // The following is Slang specific and NOT part of standard HLSL // It's not clear what happens with float16 time in HLSL -> can the float16 coerce to uint for example? If so that would // give the wrong result __glsl_version(420) [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] float f16tof32(float16_t value) { __target_switch { case glsl: __intrinsic_asm "unpackHalf2x16($0).x"; case hlsl: __intrinsic_asm "f16tof32($0)"; case cuda: __intrinsic_asm "__half2float($0)"; case cpp: __intrinsic_asm "f16tof32($0)"; case metal: __intrinsic_asm "float($0)"; case spirv: { return spirv_asm { result:$$float = OpFConvert $value }; } } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector f16tof32(vector value) { __target_switch { case cuda: __intrinsic_asm "__half2float"; case hlsl: __intrinsic_asm "f16tof32"; case metal: __intrinsic_asm "$TR($0)"; case spirv: return spirv_asm { OpFConvert $$vector result $value }; default: VECTOR_MAP_UNARY(float, N, f16tof32, value); } } // Convert to float16_t __glsl_version(420) [__readNone] [require(cuda_glsl_metal_spirv, shader5_sm_5_0)] float16_t f32tof16_(float value) { __target_switch { case cuda: __intrinsic_asm "__float2half"; case glsl: __intrinsic_asm "packHalf2x16(vec2($0,0.0))"; case metal: __intrinsic_asm "half($0)"; case spirv: return spirv_asm { OpFConvert $$float16_t result $value }; } } __generic [__readNone] [require(cuda_glsl_metal_spirv, shader5_sm_5_0)] vector f32tof16_(vector value) { __target_switch { case cuda: __intrinsic_asm "__float2half"; case metal: __intrinsic_asm "$TR($0)"; case spirv: return spirv_asm { OpFConvert $$vector result $value }; default: VECTOR_MAP_UNARY(float16_t, N, f32tof16_, value); } } // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // Flip surface normal to face forward, if needed __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector faceforward(vector n, vector i, vector ng) { __target_switch { case glsl: __intrinsic_asm "faceforward"; case hlsl: __intrinsic_asm "faceforward"; case metal: __intrinsic_asm "faceforward"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 FaceForward $n $i $ng }; default: return dot(ng, i) < T(0.0f) ? n : -n; } } // Find first set bit starting at high bit and working down [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] int firstbithigh(int value) { __target_switch { case cpp: __intrinsic_asm "$P_firstbithigh($0)"; case cuda: __intrinsic_asm "$P_firstbithigh($0)"; case glsl: __intrinsic_asm "findMSB"; case hlsl: __intrinsic_asm "firstbithigh"; case metal: __intrinsic_asm "clz"; case spirv: return spirv_asm { OpExtInst $$int result glsl450 FindSMsb $value }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector firstbithigh(vector value) { __target_switch { case glsl: __intrinsic_asm "findMSB"; case hlsl: __intrinsic_asm "firstbithigh"; case metal: __intrinsic_asm "clz"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 FindSMsb $value }; default: VECTOR_MAP_UNARY(int, N, firstbithigh, value); } } [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] uint firstbithigh(uint value) { __target_switch { case cpp: __intrinsic_asm "$P_firstbithigh($0)"; case cuda: __intrinsic_asm "$P_firstbithigh($0)"; case glsl: __intrinsic_asm "findMSB"; case hlsl: __intrinsic_asm "firstbithigh"; case metal: __intrinsic_asm "clz"; case spirv: return spirv_asm { OpExtInst $$uint result glsl450 FindUMsb $value }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector firstbithigh(vector value) { __target_switch { case glsl: __intrinsic_asm "findMSB"; case hlsl: __intrinsic_asm "firstbithigh"; case metal: __intrinsic_asm "clz"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 FindUMsb $value }; default: VECTOR_MAP_UNARY(uint, N, firstbithigh, value); } } // Find first set bit starting at low bit and working up [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] int firstbitlow(int value) { __target_switch { case cpp: __intrinsic_asm "$P_firstbitlow($0)"; case cuda: __intrinsic_asm "$P_firstbitlow($0)"; case glsl: __intrinsic_asm "findLSB"; case hlsl: __intrinsic_asm "firstbitlow"; case metal: __intrinsic_asm "ctz"; case spirv: return spirv_asm { OpExtInst $$int result glsl450 FindILsb $value }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector firstbitlow(vector value) { __target_switch { case glsl: __intrinsic_asm "findLSB"; case hlsl: __intrinsic_asm "firstbitlow"; case metal: __intrinsic_asm "ctz"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 FindILsb $value }; default: VECTOR_MAP_UNARY(int, N, firstbitlow, value); } } [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] uint firstbitlow(uint value) { __target_switch { case cpp: __intrinsic_asm "$P_firstbitlow($0)"; case cuda: __intrinsic_asm "$P_firstbitlow($0)"; case glsl: __intrinsic_asm "findLSB"; case hlsl: __intrinsic_asm "firstbitlow"; case metal: __intrinsic_asm "ctz"; case spirv: return spirv_asm { OpExtInst $$uint result glsl450 FindILsb $value }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector firstbitlow(vector value) { __target_switch { case glsl: __intrinsic_asm "findLSB"; case hlsl: __intrinsic_asm "firstbitlow"; case metal: __intrinsic_asm "ctz"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 FindILsb $value }; default: VECTOR_MAP_UNARY(uint, N, firstbitlow, value); } } // Floor (HLSL SM 1.0) __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T floor(T x) { __target_switch { case cpp: __intrinsic_asm "$P_floor($0)"; case cuda: __intrinsic_asm "$P_floor($0)"; case glsl: __intrinsic_asm "floor"; case hlsl: __intrinsic_asm "floor"; case metal: __intrinsic_asm "floor"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Floor $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector floor(vector x) { __target_switch { case glsl: __intrinsic_asm "floor"; case hlsl: __intrinsic_asm "floor"; case metal: __intrinsic_asm "floor"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Floor $x }; default: VECTOR_MAP_UNARY(T, N, floor, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix floor(matrix x) { __target_switch { case hlsl: __intrinsic_asm "floor"; default: MATRIX_MAP_UNARY(T, N, M, floor, x); } } // Fused multiply-add __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] T fma(T a, T b, T c) { __target_switch { case cpp: __intrinsic_asm "$P_fma($0, $1, $2)"; case cuda: __intrinsic_asm "$P_fma($0, $1, $2)"; case glsl: __intrinsic_asm "fma"; case hlsl: if (__isFloat() || __isHalf()) return mad(a, b, c); else __intrinsic_asm "fma($0, $1, $2)"; case metal: __intrinsic_asm "fma"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Fma $a $b $c }; default: return a*b + c; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector fma(vector a, vector b, vector c) { __target_switch { case glsl: __intrinsic_asm "fma"; case hlsl: __intrinsic_asm "fma"; case metal: __intrinsic_asm "fma"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Fma $a $b $c }; default: VECTOR_MAP_TRINARY(T, N, fma, a, b, c); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] matrix fma(matrix a, matrix b, matrix c) { __target_switch { case hlsl: __intrinsic_asm "fma"; default: MATRIX_MAP_TRINARY(T, N, M, fma, a, b, c); } } // Floating point remainder of x/y __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmod(T x, T y) { // In HLSL, `fmod` returns a remainder. // Definition of `fmod` in HLSL is, // "The floating-point remainder is calculated such that x = i * y + f, // where i is an integer, f has the same sign as x, and the absolute value // of f is less than the absolute value of y." // // In GLSL, `mod` is a Modulus function. // OpenGL document defines "Modulus" as "Returns x - y * floor(x / y)". // The use of "Floor()" makes the difference. // // In Metal, `fmod` is Modulus function. // Metal document defines it as "Returns x - y * trunc(x/y)". // Note that the function name is same to HLSL but it behaves differently. // // The tricky ones are when x or y is a negative value. // // | Remainder | Modulus // x y | x= i*y +f | x-y*floor(x/y) // ------+-----------+------------------------------ // 4 3 | 4= 1*3 +1 | 4-3*floor( 4/3) = 4-3* 1 = 1 // 3 3 | 3= 1*3 +0 | 3-3*floor( 3/3) = 3-3* 1 = 0 // 2 3 | 2= 0*3 +2 | 2-3*floor( 2/3) = 2-3* 0 = 2 // 1 3 | 1= 0*3 +1 | 1-3*floor( 1/3) = 1-3* 0 = 1 // 0 3 | 0= 0*3 +0 | 0-3*floor( 0/3) = 0-3* 0 = 0 // -1 3 |-1= 0*3 -1 |-1-3*floor(-1/3) =-1-3*-1 = 2 // -2 3 |-2= 0*3 -2 |-2-3*floor(-2/3) =-2-3*-1 = 1 // -3 3 |-3=-1*3 0 |-3-3*floor(-3/3) =-3-3*-1 = 0 // -4 3 |-4=-1*3 -1 |-4-3*floor(-4/3) =-4-3*-2 = 2 // // When y is a negative value, // // | Remainder | Modulus // x y | x= i*y +f | x-y*floor(x/y) // ------+-----------+------------------------------ // 4 -3 | 4=-1*-3+1 | 4+3*floor( 4/-3) = 4+3*-2 =-2 // 3 -3 | 3=-1*-3+0 | 3+3*floor( 3/-3) = 3+3*-1 = 0 // 2 -3 | 2= 0*-3+2 | 2+3*floor( 2/-3) = 2+3*-1 =-1 // 1 -3 | 1= 0*-3+1 | 1+3*floor( 1/-3) = 1+3*-1 =-2 // 0 -3 | 0= 0*-3+0 | 0+3*floor( 0/-3) = 0+3* 0 = 0 // -1 -3 |-1= 0*-3-1 |-1+3*floor(-1/-3) =-1+3* 0 =-1 // -2 -3 |-2= 0*-3-2 |-2+3*floor(-2/-3) =-2+3* 0 =-2 // -3 -3 |-3= 1*-3 0 |-3+3*floor(-3/-3) =-3+3* 1 = 0 // -4 -3 |-4= 1*-3-1 |-4+3*floor(-4/-3) =-4+3* 1 =-1 __target_switch { case cpp: __intrinsic_asm "$P_fmod($0, $1)"; case cuda: __intrinsic_asm "$P_fmod($0, $1)"; case glsl: // GLSL doesn't have a function for remainder. __intrinsic_asm "(($0 < 0.0) ? -mod(-$0,abs($1)) : mod($0,abs($1)))"; case hlsl: __intrinsic_asm "fmod"; case metal: // Metal doesn't have a function for remainder. __intrinsic_asm "(($0 < 0.0) ? -fmod(-$0,abs($1)) : fmod($0,abs($1)))"; case spirv: // OpFRem return "The floating-point remainder whose sign // matches the sign of Operand 1", where Operand 1 is "x". return spirv_asm { result:$$T = OpFRem $x $y }; } } __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fmod(vector x, vector y) { __target_switch { case hlsl: __intrinsic_asm "fmod"; case spirv: return spirv_asm { result:$$vector = OpFRem $x $y }; default: VECTOR_MAP_BINARY(T, N, fmod, x, y); } } __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix fmod(matrix x, matrix y) { __target_switch { case hlsl: __intrinsic_asm "fmod"; default: MATRIX_MAP_BINARY(T, N, M, fmod, x, y); } } // Fractional part __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T frac(T x) { __target_switch { case cpp: __intrinsic_asm "$P_frac($0)"; case cuda: __intrinsic_asm "$P_frac($0)"; case glsl: __intrinsic_asm "fract"; case hlsl: __intrinsic_asm "frac"; case metal: __intrinsic_asm "fract"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Fract $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector frac(vector x) { __target_switch { case glsl: __intrinsic_asm "fract"; case hlsl: __intrinsic_asm "frac"; case metal: __intrinsic_asm "fract"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Fract $x }; default: VECTOR_MAP_UNARY(T, N, frac, x); } } __generic [__readNone] matrix frac(matrix x) { MATRIX_MAP_UNARY(T, N, M, frac, x); } __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fract(T x) { return frac(x); } __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fract(vector x) { return frac(x); } // Split float into mantissa and exponent __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T frexp(T x, out int exp) { __target_switch { case cpp: __intrinsic_asm "$P_frexp($0, $1)"; case cuda: __intrinsic_asm "$P_frexp($0, $1)"; case glsl: __intrinsic_asm "frexp"; case hlsl: __intrinsic_asm "frexp"; case metal: __intrinsic_asm "frexp($0, *($1))"; case spirv: return spirv_asm { result:$$T = OpExtInst glsl450 Frexp $x &exp }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector frexp(vector x, out vector exp) { __target_switch { case glsl: __intrinsic_asm "frexp"; case hlsl: __intrinsic_asm "frexp"; case metal: __intrinsic_asm "frexp($0, *($1))"; case spirv: return spirv_asm { result:$$vector = OpExtInst glsl450 Frexp $x &exp }; default: VECTOR_MAP_BINARY(T, N, frexp, x, exp); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix frexp(matrix x, out matrix exp) { __target_switch { case hlsl: __intrinsic_asm "frexp"; default: MATRIX_MAP_BINARY(T, N, M, frexp, x, exp); } } // Texture filter width __generic [__readNone] [require(glsl_hlsl_metal_spirv, fragmentprocessing)] T fwidth(T x) { __requireComputeDerivative(); __target_switch { case hlsl: __intrinsic_asm "fwidth($0)"; case glsl: __intrinsic_asm "fwidth($0)"; case metal: __intrinsic_asm "fwidth($0)"; case spirv: return spirv_asm { OpFwidth $$T result $x; }; } } __generic [__readNone] [require(glsl_hlsl_spirv, fragmentprocessing)] vector fwidth(vector x) { __requireComputeDerivative(); __target_switch { case hlsl: __intrinsic_asm "fwidth($0)"; case glsl: __intrinsic_asm "fwidth($0)"; case spirv: return spirv_asm { OpFwidth $$vector result $x; }; } } __generic [__readNone] [require(glsl_hlsl_spirv, fragmentprocessing)] matrix fwidth(matrix x) { __target_switch { case hlsl: __intrinsic_asm "fwidth($0)"; default: MATRIX_MAP_UNARY(T, N, M, fwidth, x); } } __intrinsic_op($(kIROp_GetPerVertexInputArray)) Array __GetPerVertexInputArray(T attribute); /// Get the value of a vertex attribute at a specific vertex. /// /// The `GetAttributeAtVertex()` function can be used in a fragment shader /// to get the value of the given `attribute` at the vertex of the primitive /// that corresponds to the given `vertexIndex`. /// /// Note that the `attribute` must have been a declared varying input to /// the fragment shader with the `nointerpolation` modifier. /// /// This function can be applied to scalars, vectors, and matrices of /// built-in scalar types. /// __generic [__readNone] __glsl_version(450) __glsl_extension(GL_EXT_fragment_shader_barycentric) [require(glsl_hlsl_spirv, getattributeatvertex)] [KnownBuiltin("GetAttributeAtVertex")] [__unsafeForceInlineEarly] T GetAttributeAtVertex(T attribute, uint vertexIndex) { __target_switch { case hlsl: __intrinsic_asm "GetAttributeAtVertex"; case glsl: case spirv: return __GetPerVertexInputArray(attribute)[vertexIndex]; } } /// Get the value of a vertex attribute at a specific vertex. /// /// The `GetAttributeAtVertex()` function can be used in a fragment shader /// to get the value of the given `attribute` at the vertex of the primitive /// that corresponds to the given `vertexIndex`. /// /// Note that the `attribute` must have been a declared varying input to /// the fragment shader with the `nointerpolation` modifier. /// /// This function can be applied to scalars, vectors, and matrices of /// built-in scalar types. /// __generic [__readNone] __glsl_version(450) __glsl_extension(GL_EXT_fragment_shader_barycentric) [require(glsl_hlsl_spirv, getattributeatvertex)] vector GetAttributeAtVertex(vector attribute, uint vertexIndex) { __target_switch { case hlsl: __intrinsic_asm "GetAttributeAtVertex"; case glsl: __intrinsic_asm "$0[$1]"; case spirv: return spirv_asm { %_ptr_Input_vectorT = OpTypePointer Input $$vector; %addr = OpAccessChain %_ptr_Input_vectorT $attribute $vertexIndex; result:$$vector = OpLoad %addr; }; } } /// Get the value of a vertex attribute at a specific vertex. /// /// The `GetAttributeAtVertex()` function can be used in a fragment shader /// to get the value of the given `attribute` at the vertex of the primitive /// that corresponds to the given `vertexIndex`. /// /// Note that the `attribute` must have been a declared varying input to /// the fragment shader with the `nointerpolation` modifier. /// /// This function can be applied to scalars, vectors, and matrices of /// built-in scalar types. /// __generic [__readNone] __glsl_version(450) __glsl_extension(GL_EXT_fragment_shader_barycentric) [require(glsl_hlsl_spirv, getattributeatvertex)] matrix GetAttributeAtVertex(matrix attribute, uint vertexIndex) { __target_switch { case hlsl: __intrinsic_asm "GetAttributeAtVertex"; case glsl: __intrinsic_asm "$0[$1]"; case spirv: return spirv_asm { %_ptr_Input_matrixT = OpTypePointer Input $$matrix; %addr = OpAccessChain %_ptr_Input_matrixT $attribute $vertexIndex; result:$$matrix = OpLoad %addr; }; } } // Get number of samples in render target [__readNone] [require(hlsl, sm_4_0)] [require(metal)] uint GetRenderTargetSampleCount() { __target_switch { case hlsl: __intrinsic_asm "GetRenderTargetSampleCount"; case metal: __intrinsic_asm "get_num_samples"; } } // Get position of given sample [__readNone] [require(hlsl, sm_4_0)] [require(metal)] float2 GetRenderTargetSamplePosition(int Index) { __target_switch { case hlsl: __intrinsic_asm "GetRenderTargetSamplePosition"; case metal: __intrinsic_asm "get_sample_position"; } } // Group memory barrier __glsl_extension(GL_KHR_memory_scope_semantics) [require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void GroupMemoryBarrier() { __target_switch { case glsl: __intrinsic_asm "memoryBarrier(gl_ScopeWorkgroup, gl_StorageSemanticsShared, gl_SemanticsAcquireRelease)"; case hlsl: __intrinsic_asm "GroupMemoryBarrier"; case cuda: __intrinsic_asm "__threadfence_block"; case metal: __intrinsic_asm "threadgroup_barrier(mem_flags::mem_threadgroup)"; case spirv: spirv_asm { OpMemoryBarrier Workgroup AcquireRelease|WorkgroupMemory }; } } [require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void __subgroupBarrier() { __target_switch { case glsl: __intrinsic_asm "subgroupBarrier"; case hlsl: __intrinsic_asm "GroupMemoryBarrierWithGroupSync"; case cuda: __intrinsic_asm "__syncthreads()"; case metal: __intrinsic_asm "threadgroup_barrier(mem_flags::mem_threadgroup)"; case spirv: spirv_asm { OpControlBarrier Subgroup Subgroup AcquireRelease|WorkgroupMemory|ImageMemory|UniformMemory }; } } __glsl_extension(GL_KHR_memory_scope_semantics) [require(cuda_glsl_hlsl_metal_spirv, memorybarrier)] void GroupMemoryBarrierWithGroupSync() { __target_switch { case glsl: __intrinsic_asm "controlBarrier(gl_ScopeWorkgroup, gl_ScopeWorkgroup, gl_StorageSemanticsShared, gl_SemanticsAcquireRelease)"; case hlsl: __intrinsic_asm "GroupMemoryBarrierWithGroupSync"; case cuda: __intrinsic_asm "__syncthreads()"; case metal: __intrinsic_asm "threadgroup_barrier(mem_flags::mem_threadgroup)"; case spirv: spirv_asm { OpControlBarrier Workgroup Workgroup AcquireRelease|WorkgroupMemory }; } } // Atomics __generic __intrinsic_op($(kIROp_MetalAtomicCast)) [require(metal)] T* __getMetalAtomicRef(__ref T x); // Checks if input is a ImageSubscript __generic __intrinsic_op($(kIROp_IsTextureAccess)) bool __isTextureAccess(__ref T x); // Checks if input is a texture of T type scalar __generic __intrinsic_op($(kIROp_IsTextureScalarAccess)) bool __isTextureScalarAccess(__ref T x); // Checks if input is a texture array __generic __intrinsic_op($(kIROp_IsTextureArrayAccess)) bool __isTextureArrayAccess(__ref T x); // Accepts an ImageSubscript // Gets Texture used with ImageSubscript. __generic __intrinsic_op($(kIROp_ExtractTextureFromTextureAccess)) TextureAccess* __extractTextureFromTextureAccess(__ref TextureAccess x); // Accepts an ImageSubscript // Gets Coord from ImageSubscript. Swizzles out ArrayCoord if applicable __generic __intrinsic_op($(kIROp_ExtractCoordFromTextureAccess)) uint __extractCoordFromTextureAccess(__ref TextureAccess x); // Accepts an ImageSubscript // Gets ArrayCoord from ImageSubscript __generic __intrinsic_op($(kIROp_ExtractArrayCoordFromTextureAccess)) uint __extractArrayCoordFromTextureAccess(__ref TextureAccess x); ${{{{ for (bool isArray : {false, true}) { StringBuilder coordBuilder; StringBuilder coordFetchBuilder; StringBuilder threeParamsASMBuilder; StringBuilder threeParamsOutputParamASMBuilder; StringBuilder fourParamsASMBuilder; coordBuilder << "Coord coord"; coordFetchBuilder << "coord"; threeParamsASMBuilder << "$1, $2"; fourParamsASMBuilder << "$1, $2, $3"; if(isArray) { coordBuilder << ", uint arrayCoord"; coordFetchBuilder << ", arrayCoord"; threeParamsASMBuilder << ", $3"; fourParamsASMBuilder << ", $4"; threeParamsOutputParamASMBuilder << "$4"; } else { threeParamsOutputParamASMBuilder << "$3"; } auto coordString = coordBuilder.toString(); auto coordFetchString = coordFetchBuilder.toString(); auto threeParamsASMString = threeParamsASMBuilder.toString(); auto threeParamsOutputParamASMString = threeParamsOutputParamASMBuilder.toString(); auto fourParamsASMString = fourParamsASMBuilder.toString(); }}}} ${{{{ for (const char* atomicOperation : {"add", "and", "max", "min", "or", "sub", "xor"}) { }}}} __generic [ForceInline] [require(metal)] vector __metalImageInterlocked_$(atomicOperation)(TextureType tex, $(coordString), vector value) { static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures"); static_assert(Coord is uint || Coord is vector || Coord is vector || Coord is vector, "__metalImageInterlocked implementation only allows 'uint' coordinates"); __intrinsic_asm "$0.atomic_fetch_$(atomicOperation)($(threeParamsASMString))"; } __generic [ForceInline] [require(metal)] void __metalImageInterlocked_$(atomicOperation)(TextureType tex, $(coordString), vector value, out T original_value) { static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures"); static_assert(Coord is uint || Coord is vector || Coord is vector || Coord is vector, "__metalImageInterlocked implementation only allows 'uint' coordinates"); original_value = __metalImageInterlocked_$(atomicOperation)(tex, $(coordFetchString), value)[0]; } ${{{{ } // atomicOperation }}}} __generic [ForceInline] [require(metal)] vector __metalImageInterlocked_exchange(TextureType tex, $(coordString), vector value) { static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures"); static_assert(Coord is uint || Coord is vector || Coord is vector || Coord is vector, "__metalImageInterlocked implementation only allows 'uint' coordinates"); __intrinsic_asm "($0.atomic_exchange($(threeParamsASMString)))"; } __generic [ForceInline] [require(metal)] void __metalImageInterlocked_exchange(TextureType tex, $(coordString), vector value, out T original_value) { static_assert(T is int || T is uint, "Metal atomic texture operations only allow 'int'/'uint' textures"); static_assert(Coord is uint || Coord is vector || Coord is vector || Coord is vector, "__metalImageInterlocked implementation only allows 'uint' coordinates"); original_value = __metalImageInterlocked_exchange(tex, $(coordFetchString), value)[0]; } __generic [ForceInline] [require(metal)] void __metalImageInterlocked_compare_exchange(TextureType tex, $(coordString), __ref vector compare_value, vector value) { static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures"); static_assert(Coord is uint || Coord is vector || Coord is vector || Coord is vector, "__metalImageInterlocked implementation only allows 'uint' coordinates"); __intrinsic_asm "($0.atomic_compare_exchange_weak($(fourParamsASMString)))"; } __generic [ForceInline] [require(metal)] void __metalImageInterlocked_compare_exchange(TextureType tex, $(coordString), vector compare_value, vector value, out T original_value) { static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures"); static_assert(Coord is uint || Coord is vector || Coord is vector || Coord is vector, "__metalImageInterlocked implementation only allows 'uint' coordinates"); __metalImageInterlocked_compare_exchange(tex, $(coordFetchString), compare_value, value); original_value = compare_value[0]; } ${{{{ } // isArray }}}} ${{{{ // Generated functions: // atomicAdd, InterlockedAdd, atomic_fetch_add_explicit, OpAtomicIAdd, OpAtomicFAddEXT // __cudaInterlocked_add, __glslInterlocked_add, __hlslInterlocked_add, __metalInterlocked_add, __spirvInterlocked_add // atomicAnd, InterlockedAnd, atomic_fetch_and_explicit, OpAtomicAnd // __cudaInterlocked_and, __glslInterlocked_and, __hlslInterlocked_and, __metalInterlocked_and, __spirvInterlocked_and // atomicMax, InterlockedMax, atomic_fetch_max_explicit, OpAtomicUMax, OpAtomicSMax, OpAtomicFMaxEXT // __cudaInterlocked_max, __glslInterlocked_max, __hlslInterlocked_max, __metalInterlocked_max, __spirvInterlocked_max // atomicMin, InterlockedMin, atomic_fetch_min_explicit, OpAtomicUMin, OpAtomicSMin, OpAtomicFMinEXT // __cudaInterlocked_min, __glslInterlocked_min, __hlslInterlocked_min, __metalInterlocked_min, __spirvInterlocked_min // atomicOr, InterlockedOr, atomic_fetch_or_explicit, OpAtomicOr // __cudaInterlocked_or, __glslInterlocked_or, __hlslInterlocked_or, __metalInterlocked_or, __spirvInterlocked_or // atomicXor, InterlockedXor, atomic_fetch_xor_explicit, OpAtomicXor // __cudaInterlocked_xor, __glslInterlocked_xor, __hlslInterlocked_xor, __metalInterlocked_xor, __spirvInterlocked_xor // atomicExchange, atomicExch, InterlockedExchange, atomic_exchange_explicit, OpAtomicExchange // __cudaInterlocked_exchange, __glslInterlocked_exchange, __hlslInterlocked_exchange, __metalInterlocked_exchange, __spirvInterlocked_exchange struct InternalAtomicOperationInfo { const char* slangSuffix; const char* cudaSuffix; const char* glslSuffix; const char* hlslSuffix; const char* metalSuffix; const char* spirvFloatSuffix; const char* spirvUIntSuffix; const char* spirvIntSuffix; const char* assertExpr; }; InternalAtomicOperationInfo internalAtomicOperationInfo[7] = { { "add", "Add", "Add", "Add", "fetch_add", "FAddEXT", "IAdd", "IAdd", "true" }, { "and", "And", "And", "And", "fetch_and", "And", "And", "And", "!__isFloat()" }, { "max", "Max", "Max", "Max", "fetch_max", "FMaxEXT", "UMax", "SMax", "true" }, { "min", "Min", "Min", "Min", "fetch_min", "FMinEXT", "UMin", "SMin", "true" }, { "or", "Or", "Or", "Or", "fetch_or", "Or", "Or", "Or", "!__isFloat()" }, { "xor", "Xor", "Xor", "Xor", "fetch_xor", "Xor", "Xor", "Xor", "!__isFloat()" }, { "exchange", "Exch", "Exchange", "Exchange", "exchange", "Exchange", "Exchange", "Exchange", "true" }, }; for (InternalAtomicOperationInfo atomicOp : internalAtomicOperationInfo) { }}}} __generic [ForceInline] [require(metal)] void __metalInterlocked_$(atomicOp.slangSuffix)(AtomicType dest, T value) { static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); __intrinsic_asm "atomic_$(atomicOp.metalSuffix)_explicit($0, $1, memory_order_relaxed)"; } __generic [ForceInline] [require(metal)] void __metalInterlocked_$(atomicOp.slangSuffix)(AtomicType dest, T value, out T original_value) { static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); __intrinsic_asm "((*($2)) = (atomic_$(atomicOp.metalSuffix)_explicit($0, $1, memory_order_relaxed)))"; } __generic [ForceInline] [require(cuda)] void __cudaInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value) { static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); __intrinsic_asm "atomic$(atomicOp.cudaSuffix)((int*)$0, $1)"; } __generic [ForceInline] [require(cuda)] void __cudaInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value) { static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); __intrinsic_asm "(*$2 = atomic$(atomicOp.cudaSuffix)((int*)$0, $1))"; } __generic [ForceInline] [require(glsl)] void __glslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value) { static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); __intrinsic_asm "$atomic$(atomicOp.glslSuffix)($A, $1)"; } __generic [ForceInline] [require(glsl)] void __glslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value) { static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); __intrinsic_asm "($2 = $atomic$(atomicOp.glslSuffix)($A, $1))"; } __generic [ForceInline] [require(hlsl)] void __hlslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value) { static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); __intrinsic_asm "Interlocked$(atomicOp.hlslSuffix)"; } __generic [ForceInline] [require(hlsl)] void __hlslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value) { static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); __intrinsic_asm "Interlocked$(atomicOp.hlslSuffix)"; } __generic [ForceInline] [require(spirv)] void __spirvInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value) { static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); if (__isFloat()) { spirv_asm { result:$$T = OpAtomic$(atomicOp.spirvFloatSuffix) &dest Device None $value }; } else if (__isUnsignedInt()) { spirv_asm { result:$$T = OpAtomic$(atomicOp.spirvUIntSuffix) &dest Device None $value }; } else if (__isInt()) { spirv_asm { result:$$T = OpAtomic$(atomicOp.spirvIntSuffix) &dest Device None $value }; } } __generic [ForceInline] [require(spirv)] void __spirvInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value) { static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); if (__isFloat()) { spirv_asm { %original:$$T = OpAtomic$(atomicOp.spirvFloatSuffix) &dest Device None $value; OpStore &original_value %original }; } else if (__isUnsignedInt()) { spirv_asm { %original:$$T = OpAtomic$(atomicOp.spirvUIntSuffix) &dest Device None $value; OpStore &original_value %original }; } else if (__isInt()) { spirv_asm { %original:$$T = OpAtomic$(atomicOp.spirvIntSuffix) &dest Device None $value; OpStore &original_value %original }; } } ${{{{ } // fetchAndModify }}}} __generic [ForceInline] [require(metal)] void __metalInterlocked_compare_exchange(AtomicType dest, __ref T compare_value, T value) { __intrinsic_asm "atomic_compare_exchange_weak_explicit($0, $1, $2, memory_order_relaxed, memory_order_relaxed)"; } __generic [ForceInline] [require(metal)] void __metalInterlocked_compare_exchange(AtomicType dest, T compare_value, T value, out T original_value) { __metalInterlocked_compare_exchange(dest, compare_value, value); original_value = compare_value; } __generic __glsl_version(430) [ForceInline] [require(cuda)] void __cudaInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value) { __intrinsic_asm "atomicCAS($0, $1, $2)"; } __generic [ForceInline] [require(cuda)] void __cudaInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value) { __intrinsic_asm "*$3 = atomicCAS($0, $1, $2)"; } __generic [ForceInline] [require(glsl)] void __glslInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value) { __intrinsic_asm "$atomicCompSwap($A, $1, $2)"; } __generic [ForceInline] [require(glsl)] void __glslInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value) { __intrinsic_asm "($3 = $atomicCompSwap($A, $1, $2))"; } __generic [ForceInline] [require(hlsl)] void __hlslInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value) { __intrinsic_asm "InterlockedCompareExchange"; } __generic [ForceInline] [require(hlsl)] void __hlslInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value) { __intrinsic_asm "InterlockedCompareExchange"; } __generic [ForceInline] [require(spirv)] void __spirvInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value) { spirv_asm { %result:$$T = OpAtomicCompareExchange &dest Device None None $value $compare_value; }; } __generic [ForceInline] [require(spirv)] void __spirvInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value) { spirv_asm { %original:$$T = OpAtomicCompareExchange &dest Device None None $value $compare_value; OpStore &original_value %original }; } __generic [ForceInline] [require(hlsl)] void __hlslInterlocked_compare_exchange_float_bitwise(__ref T dest, T compare_value, T value) { __intrinsic_asm "InterlockedCompareExchangeFloatBitwise"; } __generic [ForceInline] [require(hlsl)] void __hlslInterlocked_compare_exchange_float_bitwise(__ref T dest, T compare_value, T value, out T original_value) { __intrinsic_asm "InterlockedCompareExchangeFloatBitwise"; } ${{{{ // Generates code for: // InterlockedAdd, InterlockedAnd, InterlockedOr, InterlockedXor, // InterlockedMax, InterlockedMin, InterlockedExchange struct SlangAtomicOperationInfo { const char* slangCallSuffix; const char* internalCallSuffix; }; SlangAtomicOperationInfo slangAtomicOperationInfo[7] = { { "Add", "add" }, { "And", "and" }, { "Or", "or" }, { "Xor", "xor" }, { "Max", "max" }, { "Min", "min" }, { "Exchange", "exchange" }, }; for (SlangAtomicOperationInfo atomicOp : slangAtomicOperationInfo) { for(const char* T : {"int", "uint"}) { }}}} [ForceInline] __glsl_version(430) [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] void Interlocked$(atomicOp.slangCallSuffix)(__ref $(T) dest, $(T) value) { static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); __target_switch { case hlsl: __hlslInterlocked_$(atomicOp.internalCallSuffix)(dest, value); case cuda: __cudaInterlocked_$(atomicOp.internalCallSuffix)(dest, value); case glsl: __glslInterlocked_$(atomicOp.internalCallSuffix)(dest, value); case spirv: __spirvInterlocked_$(atomicOp.internalCallSuffix)(dest, value); case metal: if (__isTextureAccess(dest)) { if(__isTextureArrayAccess(dest)) { __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest), __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vector<$(T), 4>(value)); } else { __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest), __extractCoordFromTextureAccess(dest), vector<$(T), 4>(value)); } } else { __metalInterlocked_$(atomicOp.internalCallSuffix)(__getMetalAtomicRef(dest), value); } return; } } [ForceInline] __glsl_version(430) [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] void Interlocked$(atomicOp.slangCallSuffix)(__ref $(T) dest, $(T) value, out $(T) original_value) { static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to a scalar texture or non-texture"); __target_switch { case hlsl: __hlslInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value); case cuda: __cudaInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value); case glsl: __glslInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value); case spirv: __spirvInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value); case metal: if (__isTextureAccess(dest)) if(__isTextureArrayAccess(dest)) { __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest), __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vector<$(T),4>(value), original_value); } else { __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest), __extractCoordFromTextureAccess(dest), vector<$(T),4>(value), original_value); } else __metalInterlocked_$(atomicOp.internalCallSuffix)(__getMetalAtomicRef(dest), value, original_value); return; } } ${{{{ } // for(const char* T : {"int64_t", "uint64_t"}) }}}} [ForceInline] void Interlocked$(atomicOp.slangCallSuffix)(__ref uint dest, int value) { Interlocked$(atomicOp.slangCallSuffix)(dest, (uint)value); } ${{{{ } // for (SlangAtomicOperationInfo atomicOp : slangAtomicOperationInfo) }}}} ${{{{ for(const char* T : {"int64_t", "uint64_t"}) { }}}} [ForceInline] [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_metal)] void InterlockedAdd(__ref $(T) dest, $(T) value) { __target_switch { case hlsl: __hlslInterlocked_add(dest, value); case cuda: __cudaInterlocked_add(dest, value); case glsl: __requireGLSLExtension("GL_EXT_shader_atomic_int64"); __glslInterlocked_add(dest, value); case spirv: spirv_asm { OpCapability Int64Atomics; result:$$$(T) = OpAtomicIAdd &dest Device None $value; }; } } [ForceInline] void InterlockedAdd(__ref $(T) dest, $(T) value, out $(T) original_value) { __target_switch { case hlsl: __hlslInterlocked_add(dest, value, original_value); case cuda: __cudaInterlocked_add(dest, value, original_value); case glsl: __requireGLSLExtension("GL_EXT_shader_atomic_int64"); __glslInterlocked_add(dest, value, original_value); case spirv: spirv_asm { OpCapability Int64Atomics; %origin:$$$(T) = OpAtomicIAdd &dest Device None $value; OpStore &original_value %origin }; } } [ForceInline] void InterlockedAnd(__ref $(T) dest, $(T) value) { __target_switch { case hlsl: __hlslInterlocked_and(dest, value); } } [ForceInline] void InterlockedAnd(__ref $(T) dest, $(T) value, out $(T) original_value) { __target_switch { case hlsl: __hlslInterlocked_and(dest, value, original_value); } } [ForceInline] void InterlockedCompareExchange(__ref $(T) dest, $(T) compare_value, $(T) value) { __target_switch { case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value); } } [ForceInline] void InterlockedCompareExchange(__ref $(T) dest, $(T) compare_value, $(T) value, out $(T) original_value) { __target_switch { case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value, original_value); } } [ForceInline] void InterlockedCompareStore(__ref $(T) dest, $(T) compare_value, $(T) value); { __target_switch { case hlsl: __intrinsic_asm "InterlockedCompareStore"; } } [ForceInline] void InterlockedExchange(__ref $(T) dest, $(T) value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedExchange"; } } [ForceInline] void InterlockedExchange(__ref $(T) dest, $(T) value, out $(T) original_value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedExchange"; } } [ForceInline] void InterlockedMax(__ref $(T) dest, $(T) value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedMax"; } } [ForceInline] void InterlockedMax(__ref $(T) dest, $(T) value, out $(T) original_value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedMax"; } } [ForceInline] void InterlockedMin(__ref $(T) dest, $(T) value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedMin"; } } [ForceInline] void InterlockedMin(__ref $(T) dest, $(T) value, out $(T) original_value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedMin"; } } [ForceInline] void InterlockedOr(__ref $(T) dest, $(T) value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedOr"; } } [ForceInline] void InterlockedOr(__ref $(T) dest, $(T) value, out $(T) original_value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedOr"; } } [ForceInline] void InterlockedXor(__ref $(T) dest, $(T) value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedXor"; } } [ForceInline] void InterlockedXor(__ref $(T) dest, $(T) value, out $(T) original_value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedXor"; } } ${{{{ } // for(const char* T : {"int64_t", "uint64_t"}) }}}} [ForceInline] __glsl_version(430) [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value) { static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); __target_switch { case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value, original_value); case glsl: __glslInterlocked_compare_exchange(dest, compare_value, value, original_value); case cuda: __cudaInterlocked_compare_exchange(dest, compare_value, value, original_value); case spirv: __spirvInterlocked_compare_exchange(dest, compare_value, value, original_value); case metal: if (__isTextureAccess(dest)) { vector vec_compare_value = vector(compare_value); if(__isTextureArrayAccess(dest)) { __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector(value), original_value); } else { __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), __extractCoordFromTextureAccess(dest), vec_compare_value, vector(value), original_value); } } else { __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value); } return; } } [ForceInline] __glsl_version(430) [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, out uint original_value) { static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); __target_switch { case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value, original_value); case cuda: __cudaInterlocked_compare_exchange(dest, compare_value, value, original_value); case glsl: __glslInterlocked_compare_exchange(dest, compare_value, value, original_value); case spirv: __spirvInterlocked_compare_exchange(dest, compare_value, value, original_value); case metal: if (__isTextureAccess(dest)) { vector vec_compare_value = vector(compare_value); if(__isTextureArrayAccess(dest)) { __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector(value), original_value); } else { __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), __extractCoordFromTextureAccess(dest), vec_compare_value, vector(value), original_value); } } else { __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value); } return; } } [ForceInline] void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value) { static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); __target_switch { case hlsl: __hlslInterlocked_compare_exchange_float_bitwise(dest, compare_value, value); case metal: static_assert(!__isTextureAccess(dest), "float atomic texture operations are disallowed with Metal target's"); __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value); return; } } [ForceInline] void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value, out float original_value) { static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); __target_switch { case hlsl: __hlslInterlocked_compare_exchange_float_bitwise(dest, compare_value, value, original_value); case metal: static_assert(!__isTextureAccess(dest), "float atomic texture operations are disallowed with Metal target's"); __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value); return; } } [ForceInline] __glsl_version(430) [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] void InterlockedCompareStore(__ref int dest, int compare_value, int value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedCompareStore"; case glsl: __intrinsic_asm "$atomicCompSwap($A, $1, $2)"; case cuda: __intrinsic_asm "atomicCAS($0, $1, $2)"; case spirv: { spirv_asm { result:$$int = OpAtomicCompareExchange &dest Device None None $value $compare_value; }; return; } case metal: { if (__isTextureAccess(dest)) { vector vec_compare_value = vector(compare_value); if(__isTextureArrayAccess(dest)) { __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector(value)); } else { __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), __extractCoordFromTextureAccess(dest), vec_compare_value, vector(value)); } } else { __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value); } return; } } } [ForceInline] __glsl_version(430) [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedCompareStore"; case glsl: __intrinsic_asm "$atomicCompSwap($A, $1, $2)"; case cuda: __intrinsic_asm "atomicCAS((int*)$0, $1, $2)"; case spirv: spirv_asm { result:$$uint = OpAtomicCompareExchange &dest Device None None $value $compare_value; }; case metal: if (__isTextureAccess(dest)) { vector vec_compare_value = vector(compare_value); if(__isTextureArrayAccess(dest)) { __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector(value)); } else { __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), __extractCoordFromTextureAccess(dest), vec_compare_value, vector(value)); } } else { __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value); } return; } } [ForceInline] void InterlockedCompareStoreFloatBitwise(__ref float dest, float compare_value, float value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedCompareStoreFloatBitwise"; } } [ForceInline] void InterlockedExchange(__ref float dest, float value) { static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); __target_switch { case hlsl: __hlslInterlocked_exchange(dest, value); case metal: static_assert(!__isTextureAccess(dest), "'float' atomic texture operations are disallowed with Metal target's"); __metalInterlocked_exchange(__getMetalAtomicRef(dest), value); return; } } [ForceInline] void InterlockedExchange(__ref float dest, float value, out float original_value) { static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); __target_switch { case hlsl: __hlslInterlocked_exchange(dest, value, original_value); case metal: static_assert(!__isTextureAccess(dest), "'float' atomic texture operations are disallowed with Metal target's"); __metalInterlocked_exchange(__getMetalAtomicRef(dest), value, original_value); return; } } // Is floating-point value finite? __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] bool isfinite(T x) { __target_switch { case hlsl: __intrinsic_asm "isfinite"; case cuda: case cpp: __intrinsic_asm "$P_isfinite($0)"; case metal: __intrinsic_asm "isfinite"; default: return !(isinf(x) || isnan(x)); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector isfinite(vector x) { __target_switch { case hlsl: __intrinsic_asm "isfinite"; case glsl: case spirv: return !(isinf(x) || isnan(x)); case metal: __intrinsic_asm "isfinite"; default: VECTOR_MAP_UNARY(bool, N, isfinite, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix isfinite(matrix x) { __target_switch { case hlsl: __intrinsic_asm "isfinite"; default: MATRIX_MAP_UNARY(bool, N, M, isfinite, x); } } // Is floating-point value infinite? __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] bool isinf(T x) { __target_switch { case hlsl: case glsl: case metal: __intrinsic_asm "isinf"; case cuda: case cpp: __intrinsic_asm "$P_isinf($0)"; case spirv: return spirv_asm { result:$$bool = OpIsInf $x}; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector isinf(vector x) { __target_switch { case hlsl: case glsl: case metal: __intrinsic_asm "isinf"; case spirv: return spirv_asm { result:$$vector = OpIsInf $x}; default: VECTOR_MAP_UNARY(bool, N, isinf, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix isinf(matrix x) { __target_switch { case hlsl: __intrinsic_asm "isinf"; default: MATRIX_MAP_UNARY(bool, N, M, isinf, x); } } // Is floating-point value not-a-number? __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] bool isnan(T x) { __target_switch { case hlsl: case glsl: case metal: __intrinsic_asm "isnan"; case cuda: case cpp: __intrinsic_asm "$P_isnan($0)"; case spirv: return spirv_asm { result:$$bool = OpIsNan $x}; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector isnan(vector x) { __target_switch { case hlsl: case glsl: case metal: __intrinsic_asm "isnan"; case spirv: return spirv_asm { result:$$vector = OpIsNan $x}; default: VECTOR_MAP_UNARY(bool, N, isnan, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix isnan(matrix x) { __target_switch { case hlsl: __intrinsic_asm "isnan"; default: MATRIX_MAP_UNARY(bool, N, M, isnan, x); } } // Construct float from mantissa and exponent __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T ldexp(T x, T exp) { __target_switch { case hlsl: __intrinsic_asm "ldexp"; default: return x * exp2(exp); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector ldexp(vector x, vector exp) { __target_switch { case hlsl: __intrinsic_asm "ldexp"; default: return x * exp2(exp); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix ldexp(matrix x, matrix exp) { __target_switch { case hlsl: __intrinsic_asm "ldexp"; default: MATRIX_MAP_BINARY(T, N, M, ldexp, x, exp); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T ldexp(T x, E exp) { __target_switch { case glsl: __intrinsic_asm "ldexp"; case hlsl: __intrinsic_asm "ldexp"; case metal: __intrinsic_asm "ldexp"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Ldexp $x $exp }; default: return ldexp(x, __realCast(exp)); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector ldexp(vector x, vector exp) { __target_switch { case glsl: __intrinsic_asm "ldexp"; case hlsl: __intrinsic_asm "ldexp"; case metal: __intrinsic_asm "ldexp"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Ldexp $x $exp }; default: vector temp; [ForceUnroll] for (int i = 0; i < N; ++i) temp[i] = __realCast(exp[i]); return ldexp(x, temp); } } // Vector length __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T length(vector x) { __target_switch { case glsl: __intrinsic_asm "length"; case hlsl: __intrinsic_asm "length"; case metal: __intrinsic_asm "length"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Length $x }; default: return sqrt(dot(x, x)); } } // Scalar float length __generic [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T length(T x) { __target_switch { case glsl: __intrinsic_asm "length"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Length $x }; default: return abs(x); } } // Linear interpolation __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T lerp(T x, T y, T s) { __target_switch { case glsl: __intrinsic_asm "mix"; case metal: __intrinsic_asm "mix"; case hlsl: __intrinsic_asm "lerp"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 FMix $x $y $s }; default: return x + (y - x) * s; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector lerp(vector x, vector y, vector s) { __target_switch { case glsl: __intrinsic_asm "mix"; case metal: __intrinsic_asm "mix"; case hlsl: __intrinsic_asm "lerp"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 FMix $x $y $s }; default: return x + (y - x) * s; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix lerp(matrix x, matrix y, matrix s) { __target_switch { case hlsl: __intrinsic_asm "lerp"; default: MATRIX_MAP_TRINARY(T, N, M, lerp, x, y, s); } } // Legacy lighting function (obsolete) [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] float4 lit(float n_dot_l, float n_dot_h, float m) { __target_switch { case hlsl: __intrinsic_asm "lit"; default: let ambient = 1.0f; let diffuse = max(n_dot_l, 0.0f); let specular = step(0.0f, n_dot_l) * max(pow(n_dot_h, m), 0.0f); return float4(ambient, diffuse, specular, 1.0f); } } // Base-e logarithm __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T log(T x) { __target_switch { case cpp: __intrinsic_asm "$P_log($0)"; case cuda: __intrinsic_asm "$P_log($0)"; case glsl: __intrinsic_asm "log"; case hlsl: __intrinsic_asm "log"; case metal: __intrinsic_asm "log"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Log $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector log(vector x) { __target_switch { case glsl: __intrinsic_asm "log"; case hlsl: __intrinsic_asm "log"; case metal: __intrinsic_asm "log"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Log $x }; default: VECTOR_MAP_UNARY(T, N, log, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix log(matrix x) { __target_switch { case hlsl: __intrinsic_asm "log"; default: MATRIX_MAP_UNARY(T, N, M, log, x); } } // Base-10 logarithm __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T log10(T x) { __target_switch { case hlsl: __intrinsic_asm "log10"; case metal: __intrinsic_asm "log10"; case glsl: __intrinsic_asm "(log( $0 ) * $S0( 0.43429448190325182765112891891661) )"; case cuda: __intrinsic_asm "$P_log10($0)"; case cpp: __intrinsic_asm "$P_log10($0)"; case spirv: { const T tmp = T(0.43429448190325182765112891891661); return spirv_asm { %baseElog:$$T = OpExtInst glsl450 Log $x; result:$$T = OpFMul %baseElog $tmp }; } } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector log10(vector x) { __target_switch { case hlsl: __intrinsic_asm "log10"; case metal: __intrinsic_asm "log10"; case glsl: __intrinsic_asm "(log( $0 ) * $S0(0.43429448190325182765112891891661) )"; case spirv: { const T tmp = T(0.43429448190325182765112891891661); return spirv_asm { %baseElog:$$vector = OpExtInst glsl450 Log $x; result:$$vector = OpVectorTimesScalar %baseElog $tmp }; } default: VECTOR_MAP_UNARY(T, N, log10, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix log10(matrix x) { __target_switch { case hlsl: __intrinsic_asm "log10"; default: MATRIX_MAP_UNARY(T, N, M, log10, x); } } // Base-2 logarithm __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T log2(T x) { __target_switch { case cpp: __intrinsic_asm "$P_log2($0)"; case cuda: __intrinsic_asm "$P_log2($0)"; case glsl: __intrinsic_asm "log2"; case hlsl: __intrinsic_asm "log2"; case metal: __intrinsic_asm "log2"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Log2 $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector log2(vector x) { __target_switch { case glsl: __intrinsic_asm "log2"; case hlsl: __intrinsic_asm "log2"; case metal: __intrinsic_asm "log2"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Log2 $x }; default: VECTOR_MAP_UNARY(T, N, log2, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix log2(matrix x) { __target_switch { case hlsl: __intrinsic_asm "log2"; default: MATRIX_MAP_UNARY(T, N, M, log2, x); } } // multiply-add __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] T mad(T mvalue, T avalue, T bvalue) { __target_switch { case cpp: __intrinsic_asm "$P_fma($0, $1, $2)"; case cuda: __intrinsic_asm "$P_fma($0, $1, $2)"; case glsl: __intrinsic_asm "fma"; case hlsl: __intrinsic_asm "mad"; case metal: __intrinsic_asm "fma"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Fma $mvalue $avalue $bvalue }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector mad(vector mvalue, vector avalue, vector bvalue) { __target_switch { case glsl: __intrinsic_asm "fma"; case hlsl: __intrinsic_asm "mad"; case metal: __intrinsic_asm "fma"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Fma $mvalue $avalue $bvalue }; default: VECTOR_MAP_TRINARY(T, N, mad, mvalue, avalue, bvalue); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] matrix mad(matrix mvalue, matrix avalue, matrix bvalue) { __target_switch { case hlsl: __intrinsic_asm "mad"; default: MATRIX_MAP_TRINARY(T, N, M, mad, mvalue, avalue, bvalue); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] T mad(T mvalue, T avalue, T bvalue) { __target_switch { case cpp: __intrinsic_asm "$P_fma($0, $1, $2)"; case cuda: __intrinsic_asm "$P_fma($0, $1, $2)"; case glsl: __intrinsic_asm "fma"; case hlsl: __intrinsic_asm "mad"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Fma $mvalue $avalue $bvalue }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] vector mad(vector mvalue, vector avalue, vector bvalue) { __target_switch { case glsl: __intrinsic_asm "fma"; case hlsl: __intrinsic_asm "mad"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Fma $mvalue $avalue $bvalue }; default: VECTOR_MAP_TRINARY(T, N, mad, mvalue, avalue, bvalue); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, shader5_sm_5_0)] matrix mad(matrix mvalue, matrix avalue, matrix bvalue) { __target_switch { case hlsl: __intrinsic_asm "mad"; default: MATRIX_MAP_TRINARY(T, N, M, mad, mvalue, avalue, bvalue); } } // maximum __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T max(T x, T y) { // Note: a stdlib implementation of `max` (or `min`) will require splitting // floating-point and integer cases apart, because the floating-point // version needs to correctly handle the case where one of the inputs // is not-a-number. __target_switch { case hlsl: __intrinsic_asm "max"; case glsl: __intrinsic_asm "max"; case metal: __intrinsic_asm "max"; case cuda: __intrinsic_asm "$P_max($0, $1)"; case cpp: __intrinsic_asm "$P_max($0, $1)"; case spirv: { if (__isSignedInt()) { return spirv_asm { result:$$T = OpExtInst glsl450 SMax $x $y }; } else { return spirv_asm { result:$$T = OpExtInst glsl450 UMax $x $y }; } } } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector max(vector x, vector y) { __target_switch { case hlsl: __intrinsic_asm "max"; case glsl: __intrinsic_asm "max"; case metal: __intrinsic_asm "max"; case spirv: { if (__isSignedInt()) { return spirv_asm { result:$$vector = OpExtInst glsl450 SMax $x $y }; } else { return spirv_asm { result:$$vector = OpExtInst glsl450 UMax $x $y }; } } default: VECTOR_MAP_BINARY(T, N, max, x, y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix max(matrix x, matrix y) { __target_switch { case hlsl: __intrinsic_asm "max"; default: MATRIX_MAP_BINARY(T, N, M, max, x, y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T max(T x, T y) { __target_switch { case hlsl: __intrinsic_asm "max"; case metal: __intrinsic_asm "max"; case glsl: __intrinsic_asm "max"; case cuda: __intrinsic_asm "$P_max($0, $1)"; case cpp: __intrinsic_asm "$P_max($0, $1)"; case spirv: return spirv_asm { result:$$T = OpExtInst glsl450 FMax $x $y }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector max(vector x, vector y) { __target_switch { case hlsl: __intrinsic_asm "max"; case metal: __intrinsic_asm "max"; case glsl: __intrinsic_asm "max"; case spirv: return spirv_asm { result:$$vector = OpExtInst glsl450 FMax $x $y }; default: VECTOR_MAP_BINARY(T, N, max, x, y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix max(matrix x, matrix y) { __target_switch { case hlsl: __intrinsic_asm "max"; default: MATRIX_MAP_BINARY(T, N, M, max, x, y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T max3(T x, T y, T z) { __target_switch { case metal: __intrinsic_asm "max3"; default: return max(x, max(y, z)); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector max3(vector x, vector y, vector z) { __target_switch { case metal: __intrinsic_asm "max3"; default: return max(x, max(y, z)); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmax(T x, T y) { __target_switch { case metal: __intrinsic_asm "fmax"; default: if (isnan(x)) return y; return max(x, y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fmax(vector x, vector y) { __target_switch { case metal: __intrinsic_asm "fmax"; default: VECTOR_MAP_BINARY(T, N, fmax, x, y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmax3(T x, T y, T z) { __target_switch { case metal: __intrinsic_asm "fmax3"; default: { bool isnanX = isnan(x); bool isnanY = isnan(y); bool isnanZ = isnan(z); if (isnanX) { return isnanY ? z : y; } else if (isnanY) { if (isnanZ) return x; return max(x, z); } else if (isnanZ) { return max(x, y); } return max(y, max(x, z)); } } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fmax3(vector x, vector y, vector z) { __target_switch { case metal: __intrinsic_asm "fmax3"; default: VECTOR_MAP_TRINARY(T, N, fmax3, x, y, z); } } // minimum __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T min(T x, T y) { __target_switch { case hlsl: case glsl: case metal: __intrinsic_asm "min"; case cuda: case cpp: __intrinsic_asm "$P_min($0, $1)"; case spirv: { if (__isSignedInt()) return spirv_asm { result:$$T = OpExtInst glsl450 SMin $x $y }; else return spirv_asm { result:$$T = OpExtInst glsl450 UMin $x $y }; } } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector min(vector x, vector y) { __target_switch { case hlsl: __intrinsic_asm "min"; case glsl: __intrinsic_asm "min"; case metal: __intrinsic_asm "min"; case spirv: { if (__isSignedInt()) return spirv_asm { result:$$vector = OpExtInst glsl450 SMin $x $y }; else return spirv_asm { result:$$vector = OpExtInst glsl450 UMin $x $y }; } default: VECTOR_MAP_BINARY(T, N, min, x, y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix min(matrix x, matrix y) { __target_switch { case hlsl: __intrinsic_asm "min"; default: MATRIX_MAP_BINARY(T, N, M, min, x, y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T min(T x, T y) { __target_switch { case hlsl: __intrinsic_asm "min"; case metal: __intrinsic_asm "min"; case glsl: __intrinsic_asm "min"; case cuda: __intrinsic_asm "$P_min($0, $1)"; case cpp: __intrinsic_asm "$P_min($0, $1)"; case spirv: return spirv_asm { result:$$T = OpExtInst glsl450 FMin $x $y }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector min(vector x, vector y) { __target_switch { case hlsl: __intrinsic_asm "min"; case metal: __intrinsic_asm "min"; case glsl: __intrinsic_asm "min"; case spirv: return spirv_asm { result:$$vector = OpExtInst glsl450 FMin $x $y }; default: VECTOR_MAP_BINARY(T, N, min, x, y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix min(matrix x, matrix y) { __target_switch { case hlsl: __intrinsic_asm "min"; default: MATRIX_MAP_BINARY(T, N, M, min, x, y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T min3(T x, T y, T z) { __target_switch { case metal: __intrinsic_asm "min3"; default: return min(x, min(y, z)); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector min3(vector x, vector y, vector z) { __target_switch { case metal: __intrinsic_asm "min3"; default: return min(x, min(y, z)); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmin(T x, T y) { __target_switch { case metal: __intrinsic_asm "fmin"; default: if (isnan(x)) return y; return min(x, y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fmin(vector x, vector y) { __target_switch { case metal: __intrinsic_asm "fmin"; default: VECTOR_MAP_BINARY(T, N, fmin, x, y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmin3(T x, T y, T z) { __target_switch { case metal: __intrinsic_asm "fmin3"; default: { bool isnanX = isnan(x); bool isnanY = isnan(y); bool isnanZ = isnan(z); if (isnan(x)) { return isnanY ? z : y; } else if (isnanY) { if (isnanZ) return x; return min(x, z); } else if (isnanZ) { return min(x, y); } return min(x, min(y, z)); } } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fmin3(vector x, vector y, vector z) { __target_switch { case metal: __intrinsic_asm "fmin3"; default: VECTOR_MAP_TRINARY(T, N, fmin3, x, y, z); } } // Median __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T median3(T x, T y, T z) { __target_switch { case metal: __intrinsic_asm "median3"; default: { // | a | b | c | m | // ----------+---+---+---+---+ // x > y > z | z | y | x | y | // x > z > y | y | z | x | z | // y > x > z | z | y | x | x | // y > z > x | z | y | z | z | // z > x > y | y | z | x | x | // z > y > x | y | z | y | y | T a = min(y, z); T b = max(y, z); T c = max(x, a); T m = min(b, c); return m; } } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector median3(vector x, vector y, vector z) { __target_switch { case metal: __intrinsic_asm "median3"; default: { vector a = min(y, z); vector b = max(y, z); vector c = max(x, a); vector m = min(b, c); return m; } } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T fmedian3(T x, T y, T z) { __target_switch { case metal: __intrinsic_asm "fmedian3"; default: { bool isnanX = isnan(x); bool isnanY = isnan(y); bool isnanZ = isnan(z); if (isnanX) { return isnanY ? z : y; } else if (isnanY || isnanZ) { // "the function can return either non-NaN value" return x; } return median3(x, y, z); } } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector fmedian3(vector x, vector y, vector z) { __target_switch { case metal: __intrinsic_asm "fmedian3"; default: VECTOR_MAP_TRINARY(T, N, fmedian3, x, y, z); } } // split into integer and fractional parts (both with same sign) __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T modf(T x, out T ip) { __target_switch { case cpp: __intrinsic_asm "$P_modf($0, $1)"; case cuda: __intrinsic_asm "$P_modf($0, $1)"; case hlsl: __intrinsic_asm "modf"; case glsl: __intrinsic_asm "modf"; case metal: __intrinsic_asm "modf($0, *($1))"; case spirv: return spirv_asm { result:$$T = OpExtInst glsl450 Modf $x &ip }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector modf(vector x, out vector ip) { __target_switch { case hlsl: __intrinsic_asm "modf"; case glsl: __intrinsic_asm "modf"; case metal: __intrinsic_asm "modf($0, *($1))"; case spirv: return spirv_asm { result:$$vector = OpExtInst glsl450 Modf $x &ip }; default: VECTOR_MAP_BINARY(T, N, modf, x, ip); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix modf(matrix x, out matrix ip) { __target_switch { case hlsl: __intrinsic_asm "modf"; default: MATRIX_MAP_BINARY(T, N, M, modf, x, ip); } } // msad4 (whatever that is) [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] uint4 msad4(uint reference, uint2 source, uint4 accum) { __target_switch { case hlsl: __intrinsic_asm "msad4"; default: int4 bytesRef = (reference >> uint4(24, 16, 8, 0)) & 0xFF; int4 bytesX = (source.x >> uint4(24, 16, 8, 0)) & 0xFF; int4 bytesY = (source.y >> uint4(24, 16, 8, 0)) & 0xFF; uint4 mask = select(bytesRef == 0, 0, 0xFFFFFFFFu); uint4 result = accum; result += mask.x & abs(bytesRef - int4(bytesX.x, bytesY.y, bytesY.z, bytesY.w)); result += mask.y & abs(bytesRef - int4(bytesX.x, bytesX.y, bytesY.z, bytesY.w)); result += mask.z & abs(bytesRef - int4(bytesX.x, bytesX.y, bytesX.z, bytesY.w)); result += mask.w & abs(bytesRef - int4(bytesX.x, bytesX.y, bytesX.z, bytesX.w)); return result; } } // General inner products // scalar-scalar __generic __intrinsic_op($(kIROp_Mul)) [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T mul(T x, T y); // scalar-vector and vector-scalar __generic __intrinsic_op($(kIROp_Mul)) [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector mul(vector x, T y); __generic __intrinsic_op($(kIROp_Mul)) [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector mul(T x, vector y); // scalar-matrix and matrix-scalar __generic __intrinsic_op($(kIROp_Mul)) [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix mul(matrix x, T y); __generic __intrinsic_op($(kIROp_Mul)) [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix mul(T x, matrix y); // vector-vector (dot product) __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T mul(vector x, vector y) { __target_switch { case glsl: __intrinsic_asm "dot"; case metal: __intrinsic_asm "dot"; case hlsl: __intrinsic_asm "mul"; default: return dot(x, y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T mul(vector x, vector y) { __target_switch { case hlsl: __intrinsic_asm "mul"; default: return dot(x, y); } } // vector-matrix __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector mul(vector left, matrix right) { __target_switch { case glsl: __intrinsic_asm "($1 * $0)"; case metal: __intrinsic_asm "($1 * $0)"; case hlsl: __intrinsic_asm "mul"; case spirv: return spirv_asm { OpMatrixTimesVector $$vector result $right $left }; default: vector result; for( int j = 0; j < M; ++j ) { T sum = T(0); for( int i = 0; i < N; ++i ) { sum += left[i] * right[i][j]; } result[j] = sum; } return result; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector mul(vector left, matrix right) { __target_switch { case glsl: __intrinsic_asm "($1 * $0)"; case metal: __intrinsic_asm "($1 * $0)"; case hlsl: __intrinsic_asm "mul"; default: vector result; for( int j = 0; j < M; ++j ) { T sum = T(0); for( int i = 0; i < N; ++i ) { sum += left[i] * right[i][j]; } result[j] = sum; } return result; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector mul(vector left, matrix right) { __target_switch { case glsl: __intrinsic_asm "($1 * $0)"; case metal: __intrinsic_asm "($1 * $0)"; case hlsl: __intrinsic_asm "mul"; default: vector result; for( int j = 0; j < M; ++j ) { T sum = T(0); for( int i = 0; i < N; ++i ) { sum |= left[i] & right[i][j]; } result[j] = sum; } return result; } } // matrix-vector __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector mul(matrix left, vector right) { __target_switch { case glsl: __intrinsic_asm "($1 * $0)"; case metal: __intrinsic_asm "($1 * $0)"; case hlsl: __intrinsic_asm "mul"; case spirv: return spirv_asm { OpVectorTimesMatrix $$vector result $right $left }; default: vector result; for( int i = 0; i < N; ++i ) { T sum = T(0); for( int j = 0; j < M; ++j ) { sum += left[i][j] * right[j]; } result[i] = sum; } return result; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector mul(matrix left, vector right) { __target_switch { case glsl: __intrinsic_asm "($1 * $0)"; case metal: __intrinsic_asm "($1 * $0)"; case hlsl: __intrinsic_asm "mul"; default: vector result; for( int i = 0; i < N; ++i ) { T sum = T(0); for( int j = 0; j < M; ++j ) { sum += left[i][j] * right[j]; } result[i] = sum; } return result; } } __generic [__readNone] [OverloadRank(-1)] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector mul(matrix left, vector right) { __target_switch { case glsl: __intrinsic_asm "($1 * $0)"; case metal: __intrinsic_asm "($1 * $0)"; case hlsl: __intrinsic_asm "mul"; default: vector result; for( int i = 0; i < N; ++i ) { T sum = T(0); for( int j = 0; j < M; ++j ) { sum |= left[i][j] & right[j]; } result[i] = sum; } return result; } } // matrix-matrix __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix mul(matrix left, matrix right) { __target_switch { case glsl: __intrinsic_asm "($1 * $0)"; case metal: __intrinsic_asm "($1 * $0)"; case hlsl: __intrinsic_asm "mul"; case spirv: return spirv_asm { OpMatrixTimesMatrix $$matrix result $right $left }; default: matrix result; for( int r = 0; r < R; ++r) for( int c = 0; c < C; ++c) { T sum = T(0); for( int i = 0; i < N; ++i ) { sum += left[r][i] * right[i][c]; } result[r][c] = sum; } return result; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix mul(matrix left, matrix right) { __target_switch { case glsl: __intrinsic_asm "($1 * $0)"; case metal: __intrinsic_asm "($1 * $0)"; case hlsl: __intrinsic_asm "mul"; default: matrix result; for( int r = 0; r < R; ++r) for( int c = 0; c < C; ++c) { T sum = T(0); for( int i = 0; i < N; ++i ) { sum += left[r][i] * right[i][c]; } result[r][c] = sum; } return result; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix mul(matrix left, matrix right) { __target_switch { case glsl: __intrinsic_asm "($1 * $0)"; case metal: __intrinsic_asm "($1 * $0)"; case hlsl: __intrinsic_asm "mul"; default: matrix result; for( int r = 0; r < R; ++r) for( int c = 0; c < C; ++c) { T sum = T(0); for( int i = 0; i < N; ++i ) { sum |= left[r][i] & right[i][c]; } result[r][c] = sum; } return result; } } // next-after: next representable floating-point value // after x in the direction of y __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] T nextafter(T x, T y) { __target_switch { case metal: __intrinsic_asm "nextafter"; default: if (isnan(x)) return x; if (isnan(y)) return y; if (x == y) return y; if (T is half) { T delta = __realCast(bit_cast(uint16_t(1))); return x + ((x < y) ? delta : -delta); } if (T is float) { T delta = __realCast(bit_cast(uint32_t(1))); return x + ((x < y) ? delta : -delta); } T delta = __realCast(bit_cast(uint64_t(1))); return x + ((x < y) ? delta : -delta); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_4_0)] vector nextafter(vector x, vector y) { __target_switch { case metal: __intrinsic_asm "nextafter"; default: VECTOR_MAP_BINARY(T, N, nextafter, x, y); } } // noise (deprecated) [__readNone] [deprecated("Always returns 0")] float noise(float x) { return 0; } [__readNone] [deprecated("Always returns 0")] __generic float noise(vector x) { return 0; } /// Indicate that an index may be non-uniform at execution time. /// /// Shader Model 5.1 and 6.x introduce support for dynamic indexing /// of arrays of resources, but place the restriction that *by default* /// the implementation can assume that any value used as an index into /// such arrays will be dynamically uniform across an entire `Draw` or `Dispatch` /// (when using instancing, the value must be uniform across all instances; /// it does not seem that the restriction extends to draws within a multi-draw). /// /// In order to indicate to the implementation that it cannot make the /// uniformity assumption, a shader programmer is required to pass the index /// to the `NonUniformResourceIndex` function before using it as an index. /// The function superficially acts like an identity function. /// /// Note: a future version of Slang may take responsibility for inserting calls /// to this function as necessary in output code, rather than make this /// the user's responsibility, so that the default behavior of the language /// is more semantically "correct." [ForceInline] [require(spirv)] T __copyObject(T v) { __target_switch { case spirv: return spirv_asm { result:$$T = OpCopyObject $v; }; } } /// `NonUniformResourceIndex` function is used to indicate if the resource index is /// divergent, and ensure scalarization happens correctly for each divergent lane. __generic __intrinsic_op($(kIROp_NonUniformResourceIndex)) [require(cpp_cuda_glsl_hlsl_spirv, nonuniformqualifier)] T NonUniformResourceIndex(T index); /// HLSL allows NonUniformResourceIndex around non int/uint types. /// It's effect is presumably to ignore it, which the following implementation does. /// We should also look to add a warning for this scenario. [__unsafeForceInlineEarly] [deprecated("NonUniformResourceIndex on a type other than uint/int is deprecated and has no effect")] T NonUniformResourceIndex(T value) { return value; } // Normalize a vector __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector normalize(vector x) { __target_switch { case glsl: __intrinsic_asm "normalize"; case hlsl: __intrinsic_asm "normalize"; case metal: __intrinsic_asm "normalize"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Normalize $x }; default: return x / length(x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T normalize(T x) { __target_switch { case glsl: __intrinsic_asm "normalize"; case hlsl: __intrinsic_asm "normalize"; case metal: __intrinsic_asm "normalize"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Normalize $x }; default: return x / length(x); } } // Raise to a power __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T pow(T x, T y) { __target_switch { case cpp: __intrinsic_asm "$P_pow($0, $1)"; case cuda: __intrinsic_asm "$P_pow($0, $1)"; case glsl: __intrinsic_asm "pow"; case hlsl: __intrinsic_asm "pow"; case metal: __intrinsic_asm "pow"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Pow $x $y }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector pow(vector x, vector y) { __target_switch { case glsl: __intrinsic_asm "pow"; case hlsl: __intrinsic_asm "pow"; case metal: __intrinsic_asm "pow"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Pow $x $y }; default: VECTOR_MAP_BINARY(T, N, pow, x, y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix pow(matrix x, matrix y) { __target_switch { case hlsl: __intrinsic_asm "pow"; default: MATRIX_MAP_BINARY(T, N, M, pow, x, y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T powr(T x, T y) { __target_switch { case metal: __intrinsic_asm "powr"; default: return pow(abs(x), y); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector powr(vector x, vector y) { __target_switch { case metal: __intrinsic_asm "powr"; default: return pow(abs(x), y); } } // Output message // TODO: add check to ensure format is const literal. ${{{{ for (int argCount = 0; argCount < 12; argCount++) { StringBuilder paramList; StringBuilder argList; StringBuilder spirvArgList; StringBuilder genericParamList; if (argCount > 0) genericParamList << "<"; for (int i = 0; i < argCount; i++) { if (i > 0) genericParamList << ", "; genericParamList << "T" << i; paramList << ", T" << i << " v" << i; argList << ", $" << i+1; spirvArgList << " $v" << i; } if (argCount > 0) genericParamList << ">"; auto params = paramList.toString(); auto args = argList.toString(); auto spirvArgs = spirvArgList.toString(); }}}} __glsl_extension(GL_EXT_debug_printf) [require(cpp_cuda_glsl_hlsl_spirv, printf)] void printf$(genericParamList.toString())(NativeString format $(paramList)) { __target_switch { case hlsl: case cpp: case cuda: __intrinsic_asm "printf"; case glsl: __intrinsic_asm "debugPrintfEXT($0 $(argList))"; case spirv: spirv_asm { OpExtension "SPV_KHR_non_semantic_info"; result:$$void = OpExtInst debugPrintf 1 $format $(spirvArgs); }; } } ${{{{ } }}}} // Tessellation factor fixup routines [require(hlsl, sm_5_0)] void Process2DQuadTessFactorsAvg( in float4 RawEdgeFactors, in float2 InsideScale, out float4 RoundedEdgeTessFactors, out float2 RoundedInsideTessFactors, out float2 UnroundedInsideTessFactors); [require(hlsl, sm_5_0)] void Process2DQuadTessFactorsMax( in float4 RawEdgeFactors, in float2 InsideScale, out float4 RoundedEdgeTessFactors, out float2 RoundedInsideTessFactors, out float2 UnroundedInsideTessFactors); [require(hlsl, sm_5_0)] void Process2DQuadTessFactorsMin( in float4 RawEdgeFactors, in float2 InsideScale, out float4 RoundedEdgeTessFactors, out float2 RoundedInsideTessFactors, out float2 UnroundedInsideTessFactors); [require(hlsl, sm_5_0)] void ProcessIsolineTessFactors( in float RawDetailFactor, in float RawDensityFactor, out float RoundedDetailFactor, out float RoundedDensityFactor); [require(hlsl, sm_5_0)] void ProcessQuadTessFactorsAvg( in float4 RawEdgeFactors, in float InsideScale, out float4 RoundedEdgeTessFactors, out float2 RoundedInsideTessFactors, out float2 UnroundedInsideTessFactors); [require(hlsl, sm_5_0)] void ProcessQuadTessFactorsMax( in float4 RawEdgeFactors, in float InsideScale, out float4 RoundedEdgeTessFactors, out float2 RoundedInsideTessFactors, out float2 UnroundedInsideTessFactors); [require(hlsl, sm_5_0)] void ProcessQuadTessFactorsMin( in float4 RawEdgeFactors, in float InsideScale, out float4 RoundedEdgeTessFactors, out float2 RoundedInsideTessFactors, out float2 UnroundedInsideTessFactors); [require(hlsl, sm_5_0)] void ProcessTriTessFactorsAvg( in float3 RawEdgeFactors, in float InsideScale, out float3 RoundedEdgeTessFactors, out float RoundedInsideTessFactor, out float UnroundedInsideTessFactor); [require(hlsl, sm_5_0)] void ProcessTriTessFactorsMax( in float3 RawEdgeFactors, in float InsideScale, out float3 RoundedEdgeTessFactors, out float RoundedInsideTessFactor, out float UnroundedInsideTessFactor); [require(hlsl, sm_5_0)] void ProcessTriTessFactorsMin( in float3 RawEdgeFactors, in float InsideScale, out float3 RoundedEdgeTessFactors, out float RoundedInsideTessFactors, out float UnroundedInsideTessFactors); // Degrees to radians __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T radians(T x) { __target_switch { case glsl: __intrinsic_asm "radians"; case hlsl: __intrinsic_asm "radians"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Radians $x }; default: return x * (T.getPi() / T(180.0f)); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector radians(vector x) { __target_switch { case glsl: __intrinsic_asm "radians"; case hlsl: __intrinsic_asm "radians"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Radians $x }; default: return x * (T.getPi() / T(180.0f)); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix radians(matrix x) { __target_switch { case hlsl: __intrinsic_asm "radians"; default: return x * (T.getPi() / T(180.0f)); } } // Approximate reciprocal __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T rcp(T x) { __target_switch { case hlsl: __intrinsic_asm "rcp"; default: return T(1.0) / x; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector rcp(vector x) { __target_switch { case hlsl: __intrinsic_asm "rcp"; case glsl: case spirv: return T(1.0) / x; default: VECTOR_MAP_UNARY(T, N, rcp, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix rcp(matrix x) { __target_switch { case hlsl: __intrinsic_asm "rcp"; default: MATRIX_MAP_UNARY(T, N, M, rcp, x); } } // Reflect incident vector across plane with given normal __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T reflect(T i, T n) { __target_switch { case glsl: __intrinsic_asm "reflect"; case hlsl: __intrinsic_asm "reflect"; case metal: __intrinsic_asm "reflect"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Reflect $i $n }; default: return i - T(2) * dot(n,i) * n; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector reflect(vector i, vector n) { __target_switch { case glsl: __intrinsic_asm "reflect"; case hlsl: __intrinsic_asm "reflect"; case metal: __intrinsic_asm "reflect"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Reflect $i $n }; default: return i - T(2) * dot(n,i) * n; } } // Refract incident vector given surface normal and index of refraction __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector refract(vector i, vector n, T eta) { __target_switch { case glsl: __intrinsic_asm "refract"; case hlsl: __intrinsic_asm "refract"; case metal: __intrinsic_asm "refract"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Refract $i $n $eta }; default: let dotNI = dot(n,i); let k = T(1) - eta*eta*(T(1) - dotNI * dotNI); if(k < T(0)) return vector(T(0)); return eta * i - (eta * dotNI + sqrt(k)) * n; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T refract(T i, T n, T eta) { __target_switch { case glsl: __intrinsic_asm "refract"; case hlsl: __intrinsic_asm "refract"; case metal: __intrinsic_asm "refract"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Refract $i $n $eta }; default: let dotNI = dot(n,i); let k = T(1) - eta*eta*(T(1) - dotNI * dotNI); if(k < T(0)) return T(0); return eta * i - (eta * dotNI + sqrt(k)) * n; } } // Reverse order of bits [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] uint reversebits(uint value) { __target_switch { case hlsl: __intrinsic_asm "reversebits"; case glsl: __intrinsic_asm "bitfieldReverse"; case cuda: case cpp: __intrinsic_asm "$P_reversebits($0)"; case metal: __intrinsic_asm "reverse_bits"; case spirv: return spirv_asm {OpBitReverse $$uint result $value}; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, shader5_sm_5_0)] vector reversebits(vector value) { __target_switch { default: VECTOR_MAP_UNARY(uint, N, reversebits, value); case glsl: __intrinsic_asm "bitfieldReverse"; case metal: __intrinsic_asm "reverse_bits"; case spirv: return spirv_asm {OpBitReverse $$vector result $value}; } } // round even __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T rint(T x) { __target_switch { case glsl: __intrinsic_asm "roundEven"; case metal: __intrinsic_asm "rint"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 RoundEven $x }; default: T nearest = round(x); // Check if the value is exactly halfway between two integers if (abs(x - nearest) == T(0.5)) { // If halfway, choose the even number if ((nearest / T(2)) * T(2) != nearest) { // If the nearest number is odd, // move to the closest even number nearest -= ((x < nearest) ? T(1) : T(-1)); } } return nearest; } } __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector rint(vector x) { __target_switch { case glsl: __intrinsic_asm "roundEven"; case metal: __intrinsic_asm "rint"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 RoundEven $x }; default: VECTOR_MAP_UNARY(T, N, rint, x); } } // Round-to-nearest __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T round(T x) { __target_switch { case cpp: __intrinsic_asm "$P_round($0)"; case cuda: __intrinsic_asm "$P_round($0)"; case glsl: __intrinsic_asm "round"; case hlsl: __intrinsic_asm "round"; case metal: __intrinsic_asm "round"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Round $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector round(vector x) { __target_switch { case glsl: __intrinsic_asm "round"; case hlsl: __intrinsic_asm "round"; case metal: __intrinsic_asm "round"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Round $x }; default: VECTOR_MAP_UNARY(T, N, round, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix round(matrix x) { __target_switch { case hlsl: __intrinsic_asm "round"; default: MATRIX_MAP_UNARY(T, N, M, round, x); } } // Reciprocal of square root __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T rsqrt(T x) { __target_switch { case cpp: __intrinsic_asm "$P_rsqrt($0)"; case cuda: __intrinsic_asm "$P_rsqrt($0)"; case glsl: __intrinsic_asm "inversesqrt($0)"; case hlsl: __intrinsic_asm "rsqrt"; case metal: __intrinsic_asm "rsqrt"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 InverseSqrt $x }; default: return T(1.0) / sqrt(x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector rsqrt(vector x) { __target_switch { case glsl: __intrinsic_asm "inversesqrt($0)"; case hlsl: __intrinsic_asm "rsqrt"; case metal: __intrinsic_asm "rsqrt"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 InverseSqrt $x }; default: VECTOR_MAP_UNARY(T, N, rsqrt, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix rsqrt(matrix x) { __target_switch { case hlsl: __intrinsic_asm "rsqrt"; default: MATRIX_MAP_UNARY(T, N, M, rsqrt, x); } } // Clamp value to [0,1] range __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T saturate(T x) { __target_switch { case hlsl: __intrinsic_asm "saturate"; case metal: __intrinsic_asm "saturate"; default: return clamp(x, T(0), T(1)); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector saturate(vector x) { __target_switch { case hlsl: __intrinsic_asm "saturate"; case metal: __intrinsic_asm "saturate"; default: return clamp(x, vector(T(0)), vector(T(1))); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix saturate(matrix x) { __target_switch { case hlsl: __intrinsic_asm "saturate"; default: MATRIX_MAP_UNARY(T, N, M, saturate, x); } } __generic __intrinsic_op($(kIROp_IntCast)) T __int_cast(U val); __generic __intrinsic_op($(kIROp_IntCast)) vector __int_cast(vector val); // Extract sign of value __generic [__readNone] int sign(T x) { __target_switch { case hlsl: __intrinsic_asm "sign"; case metal: __intrinsic_asm "int(sign($0))"; case glsl: __intrinsic_asm "int(sign($0))"; case cuda: case cpp: __intrinsic_asm "$P_sign($0)"; case spirv: if (__isFloat()) return spirv_asm { %fsign:$$T = OpExtInst glsl450 FSign $x; result:$$int = OpConvertFToS %fsign }; else return __int_cast(spirv_asm {OpExtInst $$T result glsl450 SSign $x}); } } __generic [__readNone] vector sign(vector x) { if(N == 1) return vector(sign(x[0])); __target_switch { case hlsl: __intrinsic_asm "sign"; case glsl: __intrinsic_asm "ivec$N0(sign($0))"; case metal: __intrinsic_asm "vec(sign($0))"; case spirv: if (__isFloat()) return spirv_asm { %fsign:$$vector = OpExtInst glsl450 FSign $x; result:$$vector = OpConvertFToS %fsign }; else return __int_cast(spirv_asm {OpExtInst $$vector result glsl450 SSign $x}); default: VECTOR_MAP_UNARY(int, N, sign, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] matrix sign(matrix x) { __target_switch { case hlsl: __intrinsic_asm "sign"; default: MATRIX_MAP_UNARY(int, N, M, sign, x); } } // Sine __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T sin(T x) { __target_switch { case cpp: __intrinsic_asm "$P_sin($0)"; case cuda: __intrinsic_asm "$P_sin($0)"; case glsl: __intrinsic_asm "sin"; case hlsl: __intrinsic_asm "sin"; case metal: __intrinsic_asm "sin"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Sin $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector sin(vector x) { __target_switch { case glsl: __intrinsic_asm "sin"; case hlsl: __intrinsic_asm "sin"; case metal: __intrinsic_asm "sin"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Sin $x }; default: VECTOR_MAP_UNARY(T, N, sin, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix sin(matrix x) { __target_switch { case hlsl: __intrinsic_asm "sin"; default: MATRIX_MAP_UNARY(T, N, M, sin, x); } } // Sine and cosine __generic [__readNone] [require(metal)] T __sincos_metal(T x, out T c) { __target_switch { case metal: __intrinsic_asm "sincos($0, *$1)"; } } __generic [__readNone] [require(metal)] vector __sincos_metal(vector x, out vector c) { __target_switch { case metal: __intrinsic_asm "sincos($0, *$1)"; } } __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] void sincos(T x, out T s, out T c) { __target_switch { case cuda: __intrinsic_asm "$P_sincos($0, $1, $2)"; case hlsl: __intrinsic_asm "sincos"; case metal: //__intrinsic_asm "*($1) = sincos($0, *($2))"; s = __sincos_metal(x, c); return; default: s = sin(x); c = cos(x); } } __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] void sincos(vector x, out vector s, out vector c) { __target_switch { case hlsl: __intrinsic_asm "sincos"; case metal: //__intrinsic_asm "*($1) = sincos($0, *($2))"; s = __sincos_metal(x, c); return; default: s = sin(x); c = cos(x); } } __generic [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] void sincos(matrix x, out matrix s, out matrix c) { __target_switch { case hlsl: __intrinsic_asm "sincos"; default: s = sin(x); c = cos(x); } } // Hyperbolic Sine __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T sinh(T x) { __target_switch { case cpp: __intrinsic_asm "$P_sinh($0)"; case cuda: __intrinsic_asm "$P_sinh($0)"; case glsl: __intrinsic_asm "sinh"; case hlsl: __intrinsic_asm "sinh"; case metal: __intrinsic_asm "sinh"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Sinh $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector sinh(vector x) { __target_switch { case glsl: __intrinsic_asm "sinh"; case hlsl: __intrinsic_asm "sinh"; case metal: __intrinsic_asm "sinh"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Sinh $x }; default: VECTOR_MAP_UNARY(T, N, sinh, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix sinh(matrix x) { __target_switch { case hlsl: __intrinsic_asm "sinh"; default: MATRIX_MAP_UNARY(T, N, M, sinh, x); } } // Sine degree __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T sinpi(T x) { __target_switch { case metal: __intrinsic_asm "sinpi"; default: return sin(T.getPi() * x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector sinpi(vector x) { __target_switch { case metal: __intrinsic_asm "sinpi"; default: return sin(T.getPi() * x); } } // Smooth step (Hermite interpolation) __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T smoothstep(T min, T max, T x) { __target_switch { case glsl: __intrinsic_asm "smoothstep"; case hlsl: __intrinsic_asm "smoothstep"; case metal: __intrinsic_asm "smoothstep"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 SmoothStep $min $max $x }; default: let t = saturate((x - min) / (max - min)); return t * t * (T(3.0f) - (t + t)); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector smoothstep(vector min, vector max, vector x) { __target_switch { case glsl: __intrinsic_asm "smoothstep"; case hlsl: __intrinsic_asm "smoothstep"; case metal: __intrinsic_asm "smoothstep"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 SmoothStep $min $max $x }; default: VECTOR_MAP_TRINARY(T, N, smoothstep, min, max, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix smoothstep(matrix min, matrix max, matrix x) { __target_switch { case hlsl: __intrinsic_asm "smoothstep"; default: MATRIX_MAP_TRINARY(T, N, M, smoothstep, min, max, x); } } // Square root __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T sqrt(T x) { __target_switch { case cpp: __intrinsic_asm "$P_sqrt($0)"; case cuda: __intrinsic_asm "$P_sqrt($0)"; case glsl: __intrinsic_asm "sqrt"; case hlsl: __intrinsic_asm "sqrt"; case metal: __intrinsic_asm "sqrt"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Sqrt $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector sqrt(vector x) { __target_switch { case glsl: __intrinsic_asm "sqrt"; case hlsl: __intrinsic_asm "sqrt"; case metal: __intrinsic_asm "sqrt"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Sqrt $x }; default: VECTOR_MAP_UNARY(T, N, sqrt, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix sqrt(matrix x) { __target_switch { case hlsl: __intrinsic_asm "sqrt"; default: MATRIX_MAP_UNARY(T, N, M, sqrt, x); } } // Step function __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T step(T y, T x) { __target_switch { case glsl: __intrinsic_asm "step"; case hlsl: __intrinsic_asm "step"; case metal: __intrinsic_asm "step"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Step $y $x }; default: return x < y ? T(0.0f) : T(1.0f); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector step(vector y, vector x) { __target_switch { case glsl: __intrinsic_asm "step"; case hlsl: __intrinsic_asm "step"; case metal: __intrinsic_asm "step"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Step $y $x }; default: VECTOR_MAP_BINARY(T, N, step, y, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix step(matrix y, matrix x) { __target_switch { case hlsl: __intrinsic_asm "step"; default: MATRIX_MAP_BINARY(T, N, M, step, y, x); } } // Tangent __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T tan(T x) { __target_switch { case cpp: __intrinsic_asm "$P_tan($0)"; case cuda: __intrinsic_asm "$P_tan($0)"; case glsl: __intrinsic_asm "tan"; case hlsl: __intrinsic_asm "tan"; case metal: __intrinsic_asm "tan"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Tan $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector tan(vector x) { __target_switch { case glsl: __intrinsic_asm "tan"; case hlsl: __intrinsic_asm "tan"; case metal: __intrinsic_asm "tan"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Tan $x }; default: VECTOR_MAP_UNARY(T, N, tan, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix tan(matrix x) { __target_switch { case hlsl: __intrinsic_asm "tan"; default: MATRIX_MAP_UNARY(T, N, M, tan, x); } } // Hyperbolic tangent __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T tanh(T x) { __target_switch { case cpp: __intrinsic_asm "$P_tanh($0)"; case cuda: __intrinsic_asm "$P_tanh($0)"; case glsl: __intrinsic_asm "tanh"; case hlsl: __intrinsic_asm "tanh"; case metal: __intrinsic_asm "tanh"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Tanh $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector tanh(vector x) { __target_switch { case glsl: __intrinsic_asm "tanh"; case hlsl: __intrinsic_asm "tanh"; case metal: __intrinsic_asm "tanh"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Tanh $x }; default: VECTOR_MAP_UNARY(T, N, tanh, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix tanh(matrix x) { __target_switch { case hlsl: __intrinsic_asm "tanh"; default: MATRIX_MAP_UNARY(T, N, M, tanh, x); } } // Tangent degree __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T tanpi(T x) { __target_switch { case metal: __intrinsic_asm "tanpi"; default: return tan(T.getPi() * x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector tanpi(vector x) { __target_switch { case metal: __intrinsic_asm "tanpi"; default: return tan(T.getPi() * x); } } // Matrix transpose __generic [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] [PreferRecompute] matrix transpose(matrix x) { __target_switch { case glsl: __intrinsic_asm "transpose"; case hlsl: __intrinsic_asm "transpose"; case spirv: return spirv_asm { OpTranspose $$matrix result $x }; default: matrix result; for(int r = 0; r < M; ++r) for(int c = 0; c < N; ++c) result[r][c] = x[c][r]; return result; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] [PreferRecompute] matrix transpose(matrix x) { __target_switch { case glsl: __intrinsic_asm "transpose"; case hlsl: __intrinsic_asm "transpose"; case spirv: return spirv_asm { OpTranspose $$matrix result $x }; default: matrix result; for (int r = 0; r < M; ++r) for (int c = 0; c < N; ++c) result[r][c] = x[c][r]; return result; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_spirv, sm_4_0_version)] [PreferRecompute] [OverloadRank(-1)] matrix transpose(matrix x) { __target_switch { case glsl: __intrinsic_asm "transpose"; case hlsl: __intrinsic_asm "transpose"; case spirv: return spirv_asm { OpTranspose $$matrix result $x }; default: matrix result; for (int r = 0; r < M; ++r) for (int c = 0; c < N; ++c) result[r][c] = x[c][r]; return result; } } // Truncate to integer __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] T trunc(T x) { __target_switch { case cpp: __intrinsic_asm "$P_trunc($0)"; case cuda: __intrinsic_asm "$P_trunc($0)"; case glsl: __intrinsic_asm "trunc"; case hlsl: __intrinsic_asm "trunc"; case metal: __intrinsic_asm "trunc"; case spirv: return spirv_asm { OpExtInst $$T result glsl450 Trunc $x }; } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] vector trunc(vector x) { __target_switch { case glsl: __intrinsic_asm "trunc"; case hlsl: __intrinsic_asm "trunc"; case metal: __intrinsic_asm "trunc"; case spirv: return spirv_asm { OpExtInst $$vector result glsl450 Trunc $x }; default: VECTOR_MAP_UNARY(T, N, trunc, x); } } __generic [__readNone] [require(cpp_cuda_glsl_hlsl_metal_spirv, sm_4_0_version)] matrix trunc(matrix x) { __target_switch { case hlsl: __intrinsic_asm "trunc"; default: MATRIX_MAP_UNARY(T, N, M, trunc, x); } } // Slang Specific 'Mask' Wave Intrinsics typedef uint WaveMask; __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_ballot)] WaveMask WaveGetConvergedMask() { __target_switch { case glsl: __intrinsic_asm "subgroupBallot(true).x"; case hlsl: __intrinsic_asm "WaveActiveBallot(true).x"; case cuda: __intrinsic_asm "__activemask()"; case spirv: let _true = true; return (spirv_asm { OpCapability GroupNonUniformBallot; OpGroupNonUniformBallot $$uint4 result Subgroup $_true }).x; } } __intrinsic_op($(kIROp_WaveGetActiveMask)) WaveMask __WaveGetActiveMask(); __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_ballot_activemask)] WaveMask WaveGetActiveMask() { __target_switch { case glsl: __intrinsic_asm "subgroupBallot(true).x"; case hlsl: __intrinsic_asm "WaveActiveBallot(true).x"; case spirv: let _true = true; return (spirv_asm { OpCapability GroupNonUniformBallot; OpGroupNonUniformBallot $$uint4 result Subgroup $_true }).x; case cuda: return __WaveGetActiveMask(); } } __glsl_extension(GL_KHR_shader_subgroup_basic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_basic)] bool WaveMaskIsFirstLane(WaveMask mask) { __target_switch { case glsl: __intrinsic_asm "subgroupElect()"; case cuda: __intrinsic_asm "(($0 & -$0) == (WarpMask(1) << _getLaneId()))"; case hlsl: __intrinsic_asm "WaveIsFirstLane()"; case spirv: return spirv_asm { OpCapability GroupNonUniformBallot; OpGroupNonUniformElect $$bool result Subgroup }; } } __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_vote)] bool WaveMaskAllTrue(WaveMask mask, bool condition) { __target_switch { case glsl: __intrinsic_asm "subgroupAll($1)"; case cuda: __intrinsic_asm "(__all_sync($0, $1) != 0)"; case hlsl: __intrinsic_asm "WaveActiveAllTrue($1)"; case spirv: return spirv_asm { OpCapability GroupNonUniformBallot; OpGroupNonUniformAll $$bool result Subgroup $condition }; } } __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_vote)] bool WaveMaskAnyTrue(WaveMask mask, bool condition) { __target_switch { case glsl: __intrinsic_asm "subgroupAny($1)"; case cuda: __intrinsic_asm "(__any_sync($0, $1) != 0)"; case hlsl: __intrinsic_asm "WaveActiveAnyTrue($1)"; case spirv: return spirv_asm { OpCapability GroupNonUniformBallot; OpGroupNonUniformAny $$bool result Subgroup $condition }; } } __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_ballot)] WaveMask WaveMaskBallot(WaveMask mask, bool condition) { __target_switch { case glsl: __intrinsic_asm "subgroupBallot($1).x"; case cuda: __intrinsic_asm "__ballot_sync($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveBallot($1)"; case spirv: return (spirv_asm { OpCapability GroupNonUniformBallot; OpGroupNonUniformBallot $$uint4 result Subgroup $condition }).x; } } [require(cuda_glsl_hlsl_spirv, subgroup_basic_ballot)] uint WaveMaskCountBits(WaveMask mask, bool value) { __target_switch { case cuda: __intrinsic_asm "__popc(__ballot_sync($0, $1))"; case hlsl: __intrinsic_asm "WaveActiveCountBits($1)"; default: return _WaveCountBits(WaveActiveBallot(value)); } } // Waits until all warp lanes named in mask have executed a WaveMaskSharedSync (with the same mask) // before resuming execution. Guarantees memory ordering in shared memory among threads participating // in the barrier. // // The CUDA intrinsic says it orders *all* memory accesses, which appears to match most closely subgroupBarrier. // // TODO(JS): // For HLSL it's not clear what to do. There is no explicit mechanism to 'reconverge' threads. In the docs it describes // behavior as // "These intrinsics are dependent on active lanes and therefore flow control. In the model of this document, implementations // must enforce that the number of active lanes exactly corresponds to the programmer’s view of flow control." // // It seems this can only mean the active threads are the "threads the program flow would lead to". This implies a lockstep // "straight SIMD" style interpretation. That being the case this op on HLSL is just a memory barrier without any Sync. [require(cuda_glsl_hlsl_spirv, memorybarrier)] void AllMemoryBarrierWithWaveMaskSync(WaveMask mask) { __target_switch { case cuda: __intrinsic_asm "__syncwarp($0)"; case hlsl: __intrinsic_asm "AllMemoryBarrier()"; case glsl: case spirv: __subgroupBarrier(); return; } } // On GLSL, it appears we can't use subgroupMemoryBarrierShared, because it only implies a memory ordering, it does not // imply convergence. For subgroupBarrier we have from the docs.. // "The function subgroupBarrier() enforces that all active invocations within a subgroup must execute this function before any // are allowed to continue their execution" // TODO(JS): // It's not entirely clear what to do here on HLSL. // Reading the dxc wiki (https://github.com/Microsoft/DirectXShaderCompiler/wiki/Wave-Intrinsics), we have statements like: // ... these intrinsics enable the elimination of barrier constructs when the scope of synchronization is within the width of the SIMD processor. // Wave: A set of lanes executed simultaneously in the processor. No explicit barriers are required to guarantee that they execute in parallel. // Which seems to imply at least some memory barriers like Shared might not be needed. // // The barrier is left here though, because not only is the barrier make writes before the barrier across the wave appear to others afterwards, it's // also there to inform the compiler on what order reads and writes can take place. This might seem to be silly because of the 'Active' lanes // aspect of HLSL seems to make everything in lock step - but that's not quite so, it only has to apparently be that way as far as the programmers // model appears - divergence could perhaps potentially still happen. [require(cuda_glsl_hlsl_spirv, memorybarrier)] void GroupMemoryBarrierWithWaveMaskSync(WaveMask mask) { __target_switch { case cuda: __intrinsic_asm "__syncwarp($0)"; case hlsl: __intrinsic_asm "GroupMemoryBarrier()"; case glsl: case spirv: __subgroupBarrier(); return; } } [require(cuda_glsl_hlsl_spirv, memorybarrier)] void AllMemoryBarrierWithWaveSync() { __target_switch { case cuda: __intrinsic_asm "__syncwarp()"; case hlsl: __intrinsic_asm "AllMemoryBarrier()"; case glsl: case spirv: __subgroupBarrier(); return; } } [require(cuda_glsl_hlsl_spirv, memorybarrier)] void GroupMemoryBarrierWithWaveSync() { __target_switch { case cuda: __intrinsic_asm "__syncwarp()"; case hlsl: __intrinsic_asm "GroupMemoryBarrier()"; case glsl: case spirv: __subgroupBarrier(); return; } } // NOTE! WaveMaskBroadcastLaneAt is *NOT* standard HLSL // It is provided as access to subgroupBroadcast which can only take a // constexpr laneId. // https://github.com/KhronosGroup/GLSL/blob/master/extensions/khr/GL_KHR_shader_subgroup.txt // Versions SPIR-V greater than 1.4 loosen this restriction, and allow 'dynamic uniform' index // If that's the behavior required then client code should use WaveReadLaneAt which works this way. __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_ballot)] T WaveMaskBroadcastLaneAt(WaveMask mask, T value, constexpr int lane) { __target_switch { case glsl: __intrinsic_asm "subgroupBroadcast($1, $2)"; case cuda: __intrinsic_asm "__shfl_sync($0, $1, $2)"; case hlsl: __intrinsic_asm "WaveReadLaneAt($1, $2)"; case spirv: let ulane = uint(lane); return spirv_asm { OpCapability GroupNonUniformBallot; OpGroupNonUniformBroadcast $$T result Subgroup $value $ulane; }; } } __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_ballot)] vector WaveMaskBroadcastLaneAt(WaveMask mask, vector value, constexpr int lane) { __target_switch { case glsl: __intrinsic_asm "subgroupBroadcast($1, $2)"; case cuda: __intrinsic_asm "_waveShuffleMultiple($0, $1, $2)"; case hlsl: __intrinsic_asm "WaveReadLaneAt($1, $2)"; case spirv: let ulane = uint(lane); return spirv_asm { OpCapability GroupNonUniformBallot; OpGroupNonUniformBroadcast $$vector result Subgroup $value $ulane; }; } } __generic [require(cuda_hlsl, subgroup_ballot)] matrix WaveMaskBroadcastLaneAt(WaveMask mask, matrix value, constexpr int lane) { __target_switch { case cuda: __intrinsic_asm "_waveShuffleMultiple($0, $1, $2)"; case hlsl: __intrinsic_asm "WaveReadLaneAt($1, $2)"; } } // TODO(JS): If it can be determines that the `laneId` is constExpr, then subgroupBroadcast // could be used on GLSL. For now we just use subgroupShuffle __generic __glsl_extension(GL_KHR_shader_subgroup_shuffle) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_shuffle)] T WaveMaskReadLaneAt(WaveMask mask, T value, int lane) { __target_switch { case glsl: __intrinsic_asm "subgroupShuffle($1, $2)"; case cuda: __intrinsic_asm "__shfl_sync($0, $1, $2)"; case hlsl: __intrinsic_asm "WaveReadLaneAt($1, $2)"; case spirv: let ulane = uint(lane); return spirv_asm { OpCapability GroupNonUniformShuffle; OpGroupNonUniformShuffle $$T result Subgroup $value $ulane; }; } } __generic __glsl_extension(GL_KHR_shader_subgroup_shuffle) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_shuffle)] vector WaveMaskReadLaneAt(WaveMask mask, vector value, int lane) { __target_switch { case glsl: __intrinsic_asm "subgroupShuffle($1, $2)"; case cuda: __intrinsic_asm "_waveShuffleMultiple($0, $1, $2)"; case hlsl: __intrinsic_asm "WaveReadLaneAt($1, $2)"; case spirv: let ulane = uint(lane); return spirv_asm { OpCapability GroupNonUniformShuffle; OpGroupNonUniformShuffle $$vector result Subgroup $value $ulane; }; } } __generic [require(cuda_hlsl, subgroup_shuffle)] matrix WaveMaskReadLaneAt(WaveMask mask, matrix value, int lane) { __target_switch { case cuda: __intrinsic_asm "_waveShuffleMultiple($0, $1, $2)"; case hlsl: __intrinsic_asm "WaveReadLaneAt($1, $2)"; } } // NOTE! WaveMaskShuffle is a NON STANDARD HLSL intrinsic! It will map to WaveReadLaneAt on HLSL // which means it will only work on hardware which allows arbitrary laneIds which is not true // in general because it breaks the HLSL standard, which requires it's 'dynamically uniform' across the Wave. __generic [__unsafeForceInlineEarly] T WaveMaskShuffle(WaveMask mask, T value, int lane) { return WaveMaskReadLaneAt(mask, value, lane); } __generic [__unsafeForceInlineEarly] vector WaveMaskShuffle(WaveMask mask, vector value, int lane) { return WaveMaskReadLaneAt(mask, value, lane); } __generic [__unsafeForceInlineEarly] matrix WaveMaskShuffle(WaveMask mask, matrix value, int lane) { return WaveMaskReadLaneAt(mask, value, lane); } __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_ballot)] uint WaveMaskPrefixCountBits(WaveMask mask, bool value) { __target_switch { case glsl: __intrinsic_asm "subgroupBallotExclusiveBitCount(subgroupBallot($1))"; case cuda: __intrinsic_asm "__popc(__ballot_sync($0, $1) & _getLaneLtMask())"; case hlsl: __intrinsic_asm "WavePrefixCountBits($1)"; case spirv: return spirv_asm { OpCapability GroupNonUniformBallot; %mask:$$uint4 = OpGroupNonUniformBallot Subgroup $value; OpGroupNonUniformBallotBitCount $$uint result Subgroup 2 %mask }; } } // Across lane ops __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskBitAnd(WaveMask mask, T expr) { __target_switch { case glsl: __intrinsic_asm "subgroupAnd($1)"; case cuda: __intrinsic_asm "_waveAnd($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveBitAnd($1)"; case spirv: return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseAnd $$T result Subgroup 0 $expr }; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskBitAnd(WaveMask mask, vector expr) { __target_switch { case glsl: __intrinsic_asm "subgroupAnd($1)"; case cuda: __intrinsic_asm "_waveAndMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveBitAnd($1)"; case spirv: return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseAnd $$vector result Subgroup 0 $expr }; } } __generic [require(cuda_hlsl, subgroup_arithmetic)] matrix WaveMaskBitAnd(WaveMask mask, matrix expr) { __target_switch { case cuda: __intrinsic_asm "_waveAndMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveBitAnd($1)"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskBitOr(WaveMask mask, T expr) { __target_switch { case glsl: __intrinsic_asm "subgroupOr($1)"; case cuda: __intrinsic_asm "_waveOr($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveBitOr($1)"; case spirv: return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseOr $$T result Subgroup 0 $expr }; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskBitOr(WaveMask mask, vector expr) { __target_switch { case glsl: __intrinsic_asm "subgroupOr($1)"; case cuda: __intrinsic_asm "_waveOrMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveBitOr($1)"; case spirv: return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseOr $$vector result Subgroup 0 $expr }; } } __generic [require(cuda_hlsl, subgroup_arithmetic)] matrix WaveMaskBitOr(WaveMask mask, matrix expr) { __target_switch { case cuda: __intrinsic_asm "_waveOrMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveBitOr($1)"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskBitXor(WaveMask mask, T expr) { __target_switch { case glsl: __intrinsic_asm "subgroupXor($1)"; case cuda: __intrinsic_asm "_waveXor($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveBitXor($1)"; case spirv: return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseXor $$T result Subgroup 0 $expr }; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskBitXor(WaveMask mask, vector expr) { __target_switch { case glsl: __intrinsic_asm "subgroupXor($1)"; case cuda: __intrinsic_asm "_waveXorMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveBitXor($1)"; case spirv: return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseXor $$vector result Subgroup 0 $expr }; } } __generic [require(cuda_hlsl, subgroup_arithmetic)] matrix WaveMaskBitXor(WaveMask mask, matrix expr) { __target_switch { case cuda: __intrinsic_asm "_waveXorMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveBitXor($1)"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskMax(WaveMask mask, T expr) { __target_switch { case glsl: __intrinsic_asm "subgroupMax($1)"; case cuda: __intrinsic_asm "_waveMax($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveMax($1)"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMax $$T result Subgroup 0 $expr}; else if (__isSignedInt()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformSMax $$T result Subgroup 0 $expr}; else if (__isUnsignedInt()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformUMax $$T result Subgroup 0 $expr}; else return expr; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskMax(WaveMask mask, vector expr) { __target_switch { case glsl: __intrinsic_asm "subgroupMax($1)"; case cuda: __intrinsic_asm "_waveMaxMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveMax($1)"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMax $$vector result Subgroup 0 $expr}; else if (__isSignedInt()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformSMax $$vector result Subgroup 0 $expr}; else if (__isUnsignedInt()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformUMax $$vector result Subgroup 0 $expr}; else return expr; } } __generic [require(cuda_hlsl, subgroup_arithmetic)] matrix WaveMaskMax(WaveMask mask, matrix expr) { __target_switch { case cuda: __intrinsic_asm "_waveMaxMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveMax($1)"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskMin(WaveMask mask, T expr) { __target_switch { case glsl: __intrinsic_asm "subgroupMin($1)"; case cuda: __intrinsic_asm "_waveMin($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveMin($1)"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMin $$T result Subgroup 0 $expr}; else if (__isSignedInt()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformSMin $$T result Subgroup 0 $expr}; else if (__isUnsignedInt()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformUMin $$T result Subgroup 0 $expr}; else return expr; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskMin(WaveMask mask, vector expr) { __target_switch { case glsl: __intrinsic_asm "subgroupMin($1)"; case cuda: __intrinsic_asm "_waveMinMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveMin($1)"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMin $$vector result Subgroup 0 $expr}; else if (__isSignedInt()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformSMin $$vector result Subgroup 0 $expr}; else if (__isUnsignedInt()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformUMin $$vector result Subgroup 0 $expr}; else return expr; } } __generic [require(cuda_hlsl, subgroup_arithmetic)] matrix WaveMaskMin(WaveMask mask, matrix expr) { __target_switch { case cuda: __intrinsic_asm "_waveMinMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveMin($1)"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskProduct(WaveMask mask, T expr) { __target_switch { case glsl: __intrinsic_asm "subgroupMul($1)"; case cuda: __intrinsic_asm "_waveProduct($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveProduct($1)"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$T result Subgroup 0 $expr}; else if (__isInt()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIMul $$T result Subgroup 0 $expr; }; } else return expr; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskProduct(WaveMask mask, vector expr) { __target_switch { case glsl: __intrinsic_asm "subgroupMul($1)"; case cuda: __intrinsic_asm "_waveProductMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveProduct($1)"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$vector result Subgroup 0 $expr}; else if (__isInt()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIMul $$vector result Subgroup 0 $expr; }; } else return expr; } } __generic [require(cuda_hlsl, subgroup_arithmetic)] matrix WaveMaskProduct(WaveMask mask, matrix expr) { __target_switch { case cuda: __intrinsic_asm "_waveProductMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveProduct($1)"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskSum(WaveMask mask, T expr) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupAdd($1)"; case cuda: __intrinsic_asm "_waveSum($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveSum($1)"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$T result Subgroup 0 $expr}; else if (__isInt()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIAdd $$T result Subgroup 0 $expr; }; } else return expr; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskSum(WaveMask mask, vector expr) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupAdd($1)"; case cuda: __intrinsic_asm "_waveSumMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveSum($1)"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$vector result Subgroup 0 $expr}; else if (__isInt()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIAdd $$vector result Subgroup 0 $expr; }; } else return expr; } } __generic [require(cuda_hlsl, subgroup_arithmetic)] matrix WaveMaskSum(WaveMask mask, matrix expr) { __target_switch { case cuda: __intrinsic_asm "_waveSumMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveSum($1)"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) __cuda_sm_version(7.0) [require(cuda_glsl_hlsl_spirv, subgroup_vote)] bool WaveMaskAllEqual(WaveMask mask, T value) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupAllEqual($1)"; case hlsl: __intrinsic_asm "WaveActiveAllEqual($1)"; case cuda: __intrinsic_asm "_waveAllEqual($0, $1)"; case spirv: return spirv_asm { OpCapability GroupNonUniformVote; OpGroupNonUniformAllEqual $$bool result Subgroup $value }; } } __generic __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) __cuda_sm_version(7.0) [require(cuda_glsl_hlsl_spirv, subgroup_vote)] bool WaveMaskAllEqual(WaveMask mask, vector value) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupAllEqual($1)"; case hlsl: __intrinsic_asm "WaveActiveAllEqual($1)"; case cuda: __intrinsic_asm "_waveAllEqualMultiple($0, $1)"; case spirv: return spirv_asm { OpCapability GroupNonUniformVote; OpGroupNonUniformAllEqual $$bool result Subgroup $value }; } } __generic __cuda_sm_version(7.0) [require(cuda_hlsl, subgroup_vote)] bool WaveMaskAllEqual(WaveMask mask, matrix value) { __target_switch { case cuda: __intrinsic_asm "_waveAllEqualMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveActiveAllEqual($1)"; } } // Prefix __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskPrefixProduct(WaveMask mask, T expr) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupExclusiveMul($1)"; case cuda: __intrinsic_asm "_wavePrefixProduct($0, $1)"; case hlsl: __intrinsic_asm "WavePrefixProduct($1)"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$T result Subgroup ExclusiveScan $expr}; else if (__isInt()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIMul $$T result Subgroup ExclusiveScan $expr; }; } else return expr; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskPrefixProduct(WaveMask mask, vector expr) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupExclusiveMul($1)"; case cuda: __intrinsic_asm "_wavePrefixProductMultiple($0, $1)"; case hlsl: __intrinsic_asm "WavePrefixProduct($1)"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$vector result Subgroup ExclusiveScan $expr}; else if (__isInt()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIMul $$vector result Subgroup ExclusiveScan $expr; }; } else return expr; } } __generic [require(cuda_hlsl, subgroup_arithmetic)] matrix WaveMaskPrefixProduct(WaveMask mask, matrix expr) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixProductMultiple($0, $1)"; case hlsl: __intrinsic_asm "WavePrefixProduct($1)"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskPrefixSum(WaveMask mask, T expr) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupExclusiveAdd($1)"; case cuda: __intrinsic_asm "_wavePrefixSum($0, $1)"; case hlsl: __intrinsic_asm "WavePrefixSum($1)"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$T result Subgroup ExclusiveScan $expr}; else if (__isInt()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; result:$$T = OpGroupNonUniformIAdd Subgroup ExclusiveScan $expr; }; } else return expr; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskPrefixSum(WaveMask mask, vector expr) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupExclusiveAdd($1)"; case cuda: __intrinsic_asm "_wavePrefixSumMultiple($0, $1)"; case hlsl: __intrinsic_asm "WavePrefixSum($1)"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$vector result Subgroup ExclusiveScan $expr}; else if (__isInt()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; result:$$vector = OpGroupNonUniformIAdd Subgroup ExclusiveScan $expr; }; } else return expr; } } __generic [require(cuda_hlsl, subgroup_arithmetic)] matrix WaveMaskPrefixSum(WaveMask mask, matrix expr) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixSumMultiple($0, $1)"; case hlsl: __intrinsic_asm "WavePrefixSum($1)"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_ballot)] T WaveMaskReadLaneFirst(WaveMask mask, T expr) { __target_switch { case glsl: __intrinsic_asm "subgroupBroadcastFirst($1)"; case cuda: __intrinsic_asm "_waveReadFirst($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneFirst($1)"; case spirv: return spirv_asm {OpCapability GroupNonUniformBallot; OpGroupNonUniformBroadcastFirst $$T result Subgroup $expr}; } } __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_ballot)] vector WaveMaskReadLaneFirst(WaveMask mask, vector expr) { __target_switch { case glsl: __intrinsic_asm "subgroupBroadcastFirst($1)"; case cuda: __intrinsic_asm "_waveReadFirstMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneFirst($1)"; case spirv: return spirv_asm {OpCapability GroupNonUniformBallot; OpGroupNonUniformBroadcastFirst $$vector result Subgroup $expr}; } } __generic [require(cuda, subgroup_ballot)] matrix WaveMaskReadLaneFirst(WaveMask mask, matrix expr) { __target_switch { case cuda: __intrinsic_asm "_waveReadFirstMultiple($0, $1)"; } } // WaveMask SM6.5 like intrinsics // TODO(JS): On HLSL it only works for 32 bits or less __generic __glsl_extension(GL_NV_shader_subgroup_partitioned) __spirv_version(1.1) __cuda_sm_version(7.0) [require(cuda_glsl_hlsl_spirv, subgroup_partitioned)] WaveMask WaveMaskMatch(WaveMask mask, T value) { __target_switch { case glsl: __intrinsic_asm "subgroupPartitionNV($1).x"; case cuda: __intrinsic_asm "_waveMatchScalar($0, $1).x"; case hlsl: __intrinsic_asm "WaveMatch($1).x"; case spirv: return (spirv_asm { OpCapability GroupNonUniformPartitionedNV; OpExtension "SPV_NV_shader_subgroup_partitioned"; OpGroupNonUniformPartitionNV $$uint4 result $value }).x; } } __generic __glsl_extension(GL_NV_shader_subgroup_partitioned) __spirv_version(1.1) __cuda_sm_version(7.0) [require(cuda_glsl_hlsl_spirv, subgroup_partitioned)] WaveMask WaveMaskMatch(WaveMask mask, vector value) { __target_switch { case glsl: __intrinsic_asm "subgroupPartitionNV($1).x"; case cuda: __intrinsic_asm "_waveMatchMultiple($0, $1).x"; case hlsl: __intrinsic_asm "WaveMatch($1).x"; case spirv: return (spirv_asm { OpCapability GroupNonUniformPartitionedNV; OpExtension "SPV_NV_shader_subgroup_partitioned"; OpGroupNonUniformPartitionNV $$uint4 result $value }).x; } } __generic __glsl_extension(GL_NV_shader_subgroup_partitioned) __spirv_version(1.3) __cuda_sm_version(7.0) [require(cuda_glsl_hlsl, subgroup_partitioned)] WaveMask WaveMaskMatch(WaveMask mask, matrix value) { __target_switch { case cuda: __intrinsic_asm "_waveMatchMultiple($0, $1)"; case glsl: __intrinsic_asm "subgroupPartitionNV($1).x"; case hlsl: __intrinsic_asm "WaveMatch($1).x"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskPrefixBitAnd(WaveMask mask, T expr) { __target_switch { case glsl: __intrinsic_asm "subgroupExclusiveAnd($1)"; case cuda: __intrinsic_asm "_wavePrefixAnd($0, $1)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitAnd($1, uint4($0, 0, 0, 0))"; case spirv: return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseAnd $$T result Subgroup ExclusiveScan $expr}; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskPrefixBitAnd(WaveMask mask, vector expr) { __target_switch { case glsl: __intrinsic_asm "subgroupExclusiveAnd($1)"; case cuda: __intrinsic_asm "_wavePrefixAndMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitAnd($1, uint4($0, 0, 0, 0))"; case spirv: return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseAnd $$vector result Subgroup ExclusiveScan $expr}; } } __generic [require(cuda_hlsl, subgroup_arithmetic)] matrix WaveMaskPrefixBitAnd(WaveMask mask, matrix expr) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixAndMultiple(_getMultiPrefixMask($0, $1)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitAnd($1, uint4($0, 0, 0, 0))"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskPrefixBitOr(WaveMask mask, T expr) { __target_switch { case glsl: __intrinsic_asm "subgroupExclusiveOr($1)"; case cuda: __intrinsic_asm "_wavePrefixOr($0, $1)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitOr($1, uint4($0, 0, 0, 0))"; case spirv: return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseAnd $$T result Subgroup ExclusiveScan $expr}; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskPrefixBitOr(WaveMask mask, vector expr) { __target_switch { case glsl: __intrinsic_asm "subgroupExclusiveOr($1)"; case cuda: __intrinsic_asm "_wavePrefixOrMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitOr($1, uint4($0, 0, 0, 0))"; case spirv: return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseOr $$vector result Subgroup ExclusiveScan $expr}; } } __generic [require(cuda_hlsl, subgroup_arithmetic)] matrix WaveMaskPrefixBitOr(WaveMask mask, matrix expr) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixOrMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitOr($1, uint4($0, 0, 0, 0))"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveMaskPrefixBitXor(WaveMask mask, T expr) { __target_switch { case glsl: __intrinsic_asm "subgroupExclusiveXor($1)"; case cuda: __intrinsic_asm "_wavePrefixXor($0, $1)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitXor($1, uint4($0, 0, 0, 0))"; case spirv: return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseXor $$T result Subgroup ExclusiveScan $expr}; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveMaskPrefixBitXor(WaveMask mask, vector expr) { __target_switch { case glsl: __intrinsic_asm "subgroupExclusiveXor($1)"; case cuda: __intrinsic_asm "_wavePrefixXorMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitXor($1, uint4($0, 0, 0, 0))"; case spirv: return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformBitwiseXor $$vector result Subgroup ExclusiveScan $expr}; } } __generic [require(cuda_hlsl, subgroup_arithmetic)] matrix WaveMaskPrefixBitXor(WaveMask mask, matrix expr) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixXorMultiple($0, $1)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitXor($1, uint4($0, 0, 0, 0))"; } } // Shader model 6.0 stuff // Information for GLSL wave/subgroup support // https://github.com/KhronosGroup/GLSL/blob/master/extensions/khr/GL_KHR_shader_subgroup.txt __generic __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) [require(glsl_hlsl_spirv, subgroup_quad)] T QuadReadLaneAt(T sourceValue, uint quadLaneID) { __target_switch { case hlsl: __intrinsic_asm "QuadReadLaneAt"; case glsl: __intrinsic_asm "subgroupQuadBroadcast"; case spirv: return spirv_asm { OpCapability GroupNonUniformQuad; result:$$T = OpGroupNonUniformQuadBroadcast Subgroup $sourceValue $quadLaneID; }; } } __generic __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) [require(glsl_hlsl_spirv, subgroup_quad)] vector QuadReadLaneAt(vector sourceValue, uint quadLaneID) { __target_switch { case hlsl: __intrinsic_asm "QuadReadLaneAt"; case glsl: __intrinsic_asm "subgroupQuadBroadcast"; case spirv: return spirv_asm { OpCapability GroupNonUniformQuad; result:$$vector = OpGroupNonUniformQuadBroadcast Subgroup $sourceValue $quadLaneID; }; } } __generic matrix QuadReadLaneAt(matrix sourceValue, uint quadLaneID); __generic __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) [require(glsl_hlsl_spirv, subgroup_quad)] T QuadReadAcrossX(T localValue) { __target_switch { case hlsl: __intrinsic_asm "QuadReadAcrossX"; case glsl: __intrinsic_asm "subgroupQuadSwapHorizontal($0)"; case spirv: uint direction = 0u; return spirv_asm { OpCapability GroupNonUniformQuad; result:$$T = OpGroupNonUniformQuadSwap Subgroup $localValue $direction; }; } } __generic __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) [require(glsl_hlsl_spirv, subgroup_quad)] vector QuadReadAcrossX(vector localValue) { __target_switch { case hlsl: __intrinsic_asm "QuadReadAcrossX"; case glsl: __intrinsic_asm "subgroupQuadSwapHorizontal($0)"; case spirv: uint direction = 0u; return spirv_asm { OpCapability GroupNonUniformQuad; result:$$vector = OpGroupNonUniformQuadSwap Subgroup $localValue $direction; }; } } __generic matrix QuadReadAcrossX(matrix localValue); __generic __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) [require(glsl_hlsl_spirv, subgroup_quad)] T QuadReadAcrossY(T localValue) { __target_switch { case hlsl: __intrinsic_asm "QuadReadAcrossY"; case glsl: __intrinsic_asm "subgroupQuadSwapVertical($0)"; case spirv: uint direction = 1u; return spirv_asm { OpCapability GroupNonUniformQuad; result:$$T = OpGroupNonUniformQuadSwap Subgroup $localValue $direction; }; } } __generic __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) [require(glsl_hlsl_spirv, subgroup_quad)] vector QuadReadAcrossY(vector localValue) { __target_switch { case hlsl: __intrinsic_asm "QuadReadAcrossY"; case glsl: __intrinsic_asm "subgroupQuadSwapVertical($0)"; case spirv: uint direction = 1u; return spirv_asm { OpCapability GroupNonUniformQuad; result:$$vector = OpGroupNonUniformQuadSwap Subgroup $localValue $direction; }; } } __generic matrix QuadReadAcrossY(matrix localValue); __generic __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) [require(glsl_hlsl_spirv, subgroup_quad)] T QuadReadAcrossDiagonal(T localValue) { __target_switch { case hlsl: __intrinsic_asm "QuadReadAcrossDiagonal"; case glsl: __intrinsic_asm "subgroupQuadSwapDiagonal($0)"; case spirv: uint direction = 2u; return spirv_asm { OpCapability GroupNonUniformQuad; result:$$T = OpGroupNonUniformQuadSwap Subgroup $localValue $direction; }; } } __generic __glsl_extension(GL_KHR_shader_subgroup_quad) __spirv_version(1.3) [require(glsl_hlsl_spirv, subgroup_quad)] vector QuadReadAcrossDiagonal(vector localValue) { __target_switch { case hlsl: __intrinsic_asm "QuadReadAcrossDiagonal"; case glsl: __intrinsic_asm "subgroupQuadSwapDiagonal($0)"; case spirv: uint direction = 2u; return spirv_asm { OpCapability GroupNonUniformQuad; result:$$vector = OpGroupNonUniformQuadSwap Subgroup $localValue $direction; }; } } __generic matrix QuadReadAcrossDiagonal(matrix localValue); // WaveActiveBitAnd, WaveActiveBitOr, WaveActiveBitXor ${{{{ struct WaveActiveBitOpEntry { const char* hlslName; const char* glslName; const char* spirvName; }; const WaveActiveBitOpEntry kWaveActiveBitOpEntries[] = {{"BitAnd", "And", "BitwiseAnd"}, {"BitOr", "Or", "BitwiseOr"}, {"BitXor", "Xor", "BitwiseXor"}}; for (auto opName : kWaveActiveBitOpEntries) { }}}} __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveActive$(opName.hlslName)(T expr) { __target_switch { case glsl: __intrinsic_asm "subgroup$(opName.glslName)($0)"; case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)"; case spirv: return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniform$(opName.spirvName) $$T result Subgroup Reduce $expr}; default: return WaveMask$(opName.hlslName)(WaveGetActiveMask(), expr); } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveActive$(opName.hlslName)(vector expr) { __target_switch { case glsl: __intrinsic_asm "subgroup$(opName.glslName)($0)"; case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)"; case spirv: return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniform$(opName.spirvName) $$vector result Subgroup Reduce $expr}; default: return WaveMask$(opName.hlslName)(WaveGetActiveMask(), expr); } } __generic [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] matrix WaveActive$(opName.hlslName)(matrix expr) { __target_switch { case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)"; case glsl: case spirv: matrix result; [ForceUnroll] for (int i = 0; i < N; ++i) result[i] = WaveActive$(opName.hlslName)(expr[i]); return result; default: return WaveMask$(opName.hlslName)(WaveGetActiveMask(), expr); } } ${{{{ } // WaveActiveBitAnd, WaveActiveBitOr, WaveActiveBitXor }}}} // WaveActiveMin/Max ${{{{ const char* kWaveActiveMinMaxNames[] = {"Min", "Max"}; for (const char* opName : kWaveActiveMinMaxNames) { }}}} __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveActive$(opName)(T expr) { __target_switch { case glsl: __intrinsic_asm "subgroup$(opName)($0)"; case hlsl: __intrinsic_asm "WaveActive$(opName)"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformF$(opName) $$T result Subgroup Reduce $expr}; else if (__isUnsignedInt()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformU$(opName) $$T result Subgroup Reduce $expr}; else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformS$(opName) $$T result Subgroup Reduce $expr}; default: return WaveMask$(opName)(WaveGetActiveMask(), expr); } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveActive$(opName)(vector expr) { __target_switch { case glsl: __intrinsic_asm "subgroup$(opName)($0)"; case hlsl: __intrinsic_asm "WaveActive$(opName)"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformF$(opName) $$vector result Subgroup Reduce $expr}; else if (__isUnsignedInt()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformU$(opName) $$vector result Subgroup Reduce $expr}; else return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformS$(opName) $$vector result Subgroup Reduce $expr}; default: return WaveMask$(opName)(WaveGetActiveMask(), expr); } } __generic [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] matrix WaveActive$(opName)(matrix expr) { __target_switch { case hlsl: __intrinsic_asm "WaveActive$(opName)"; case glsl: case spirv: matrix result; [ForceUnroll] for (int i = 0; i < N; ++i) result[i] = WaveActive$(opName)(expr[i]); return result; default: return WaveMask$(opName)(WaveGetActiveMask(), expr); } } ${{{{ } // WaveActiveMinMax. }}}} // WaveActiveProduct/Sum ${{{{ struct WaveActiveProductSumEntry { const char* hlslName; const char* glslName; }; const WaveActiveProductSumEntry kWaveActivProductSumNames[] = {{"Product", "Mul"}, {"Sum", "Add"}}; for (auto opName : kWaveActivProductSumNames) { }}}} __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WaveActive$(opName.hlslName)(T expr) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroup$(opName.glslName)($0)"; case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)"; case spirv: if (__isFloat()) return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformF$(opName.glslName) $$T result Subgroup 0 $expr }; else if (__isInt()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformI$(opName.glslName) $$T result Subgroup 0 $expr; }; } else return expr; default: return WaveMask$(opName.hlslName)(WaveGetActiveMask(), expr); } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WaveActive$(opName.hlslName)(vector expr) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroup$(opName.glslName)($0)"; case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)"; case spirv: if (__isFloat()) return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformF$(opName.glslName) $$vector result Subgroup 0 $expr }; else if (__isInt()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformI$(opName.glslName) $$vector result Subgroup 0 $expr; }; } else return expr; default: return WaveMask$(opName.hlslName)(WaveGetActiveMask(), expr); } } __generic [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] matrix WaveActive$(opName.hlslName)(matrix expr) { __target_switch { case hlsl: __intrinsic_asm "WaveActive$(opName.hlslName)"; case glsl: case spirv: matrix result; [ForceUnroll] for (int i = 0; i < N; ++i) result[i] = WaveActive$(opName.hlslName)(expr[i]); return result; default: return WaveMask$(opName.hlslName)(WaveGetActiveMask(), expr); } } ${{{{ } // WaveActiveProduct/WaveActiveProductSum. }}}} __generic __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_vote)] bool WaveActiveAllEqual(T value) { __target_switch { case glsl: __intrinsic_asm "subgroupAllEqual($0)"; case hlsl: __intrinsic_asm "WaveActiveAllEqual"; case spirv: return spirv_asm { OpCapability GroupNonUniformVote; OpGroupNonUniformAllEqual $$bool result Subgroup $value }; default: return WaveMaskAllEqual(WaveGetActiveMask(), value); } } __generic __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_vote)] bool WaveActiveAllEqual(vector value) { __target_switch { case glsl: __intrinsic_asm "subgroupAllEqual($0)"; case hlsl: __intrinsic_asm "WaveActiveAllEqual"; case spirv: return spirv_asm { OpCapability GroupNonUniformVote; OpGroupNonUniformAllEqual $$bool result Subgroup $value }; default: return WaveMaskAllEqual(WaveGetActiveMask(), value); } } __generic [require(cuda_hlsl, subgroup_vote)] bool WaveActiveAllEqual(matrix value) { __target_switch { case hlsl: __intrinsic_asm "WaveActiveAllEqual"; default: return WaveMaskAllEqual(WaveGetActiveMask(), value); } } __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_vote)] bool WaveActiveAllTrue(bool condition) { __target_switch { case glsl: __intrinsic_asm "subgroupAll($0)"; case hlsl: __intrinsic_asm "WaveActiveAllTrue($0)"; case spirv: return spirv_asm { OpCapability GroupNonUniformVote; OpGroupNonUniformAll $$bool result Subgroup $condition }; default: return WaveMaskAllTrue(WaveGetActiveMask(), condition); } } __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_vote)] bool WaveActiveAnyTrue(bool condition) { __target_switch { case glsl: __intrinsic_asm "subgroupAny($0)"; case hlsl: __intrinsic_asm "WaveActiveAnyTrue($0)"; case spirv: return spirv_asm { OpCapability GroupNonUniformVote; OpGroupNonUniformAny $$bool result Subgroup $condition }; default: return WaveMaskAnyTrue(WaveGetActiveMask(), condition); } } __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) [NonUniformReturn] [require(cuda_glsl_hlsl_spirv, subgroup_ballot)] uint4 WaveActiveBallot(bool condition) { __target_switch { case glsl: __intrinsic_asm "subgroupBallot($0)"; case hlsl: __intrinsic_asm "WaveActiveBallot"; case spirv: return spirv_asm { OpCapability GroupNonUniformBallot; OpGroupNonUniformBallot $$uint4 result Subgroup $condition }; default: return WaveMaskBallot(WaveGetActiveMask(), condition); } } [require(cuda_glsl_hlsl_spirv, subgroup_basic_ballot)] uint WaveActiveCountBits(bool value) { __target_switch { case hlsl: __intrinsic_asm "WaveActiveCountBits"; case glsl: case spirv: return _WaveCountBits(WaveActiveBallot(value)); default: return WaveMaskCountBits(WaveGetActiveMask(), value); } } __glsl_extension(GL_KHR_shader_subgroup_basic) __spirv_version(1.3) [NonUniformReturn] [require(cuda_glsl_hlsl_spirv, subgroup_basic)] uint WaveGetLaneCount() { __target_switch { case glsl: __intrinsic_asm "(gl_SubgroupSize)"; case cuda: __intrinsic_asm "(warpSize)"; case hlsl: __intrinsic_asm "WaveGetLaneCount()"; case spirv: return spirv_asm { OpCapability GroupNonUniform; result:$$uint = OpLoad builtin(SubgroupSize:uint) }; } } __glsl_extension(GL_KHR_shader_subgroup_basic) __spirv_version(1.3) [NonUniformReturn] [require(cuda_glsl_hlsl_spirv, subgroup_basic)] uint WaveGetLaneIndex() { __target_switch { case glsl: __intrinsic_asm "(gl_SubgroupInvocationID)"; case cuda: __intrinsic_asm "_getLaneId()"; case hlsl: __intrinsic_asm "WaveGetLaneIndex()"; case spirv: return spirv_asm { OpCapability GroupNonUniform; result:$$uint = OpLoad builtin(SubgroupLocalInvocationId:uint) }; } } __glsl_extension(GL_KHR_shader_subgroup_basic) __spirv_version(1.3) [NonUniformReturn] [require(cuda_glsl_hlsl_spirv, subgroup_basic)] bool WaveIsFirstLane() { __target_switch { case glsl: __intrinsic_asm "subgroupElect()"; case hlsl: __intrinsic_asm "WaveIsFirstLane()"; case spirv: return spirv_asm { OpCapability GroupNonUniformBallot; OpGroupNonUniformElect $$bool result Subgroup }; default: return WaveMaskIsFirstLane(WaveGetActiveMask()); } } // It's useful to have a wave uint4 version of countbits, because some wave functions return uint4. // This implementation tries to limit the amount of work required by the actual lane count. __spirv_version(1.3) [require(cpp_cuda_glsl_hlsl_spirv, subgroup_basic_ballot)] uint _WaveCountBits(uint4 value) { __target_switch { case spirv: return spirv_asm { OpCapability GroupNonUniformBallot; OpGroupNonUniformBallotBitCount $$uint result Subgroup Reduce $value }; default: // Assume since WaveGetLaneCount should be known at compile time, the branches will hopefully boil away const uint waveLaneCount = WaveGetLaneCount(); switch ((waveLaneCount - 1) / 32) { default: case 0: return countbits(value.x); case 1: return countbits(value.x) + countbits(value.y); case 2: return countbits(value.x) + countbits(value.y) + countbits(value.z); case 3: return countbits(value.x) + countbits(value.y) + countbits(value.z) + countbits(value.w); } } } // Prefix __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WavePrefixProduct(T expr) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupExclusiveMul($0)"; case hlsl: __intrinsic_asm "WavePrefixProduct"; case spirv: if (__isFloat()) return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$T result Subgroup ExclusiveScan $expr }; else if (__isInt()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIMul $$T result Subgroup ExclusiveScan $expr; }; } else return expr; default: return WaveMaskPrefixProduct(WaveGetActiveMask(), expr); } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WavePrefixProduct(vector expr) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupExclusiveMul($0)"; case hlsl: __intrinsic_asm "WavePrefixProduct"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFMul $$vector result Subgroup ExclusiveScan $expr}; else if (__isInt()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; OpGroupNonUniformIMul $$vector result Subgroup ExclusiveScan $expr; }; } else return expr; default: return WaveMaskPrefixProduct(WaveGetActiveMask(), expr); } } __generic [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] matrix WavePrefixProduct(matrix expr) { __target_switch { case hlsl: __intrinsic_asm "WavePrefixProduct"; case glsl: case spirv: matrix result; for (int i = 0; i < N; ++i) result[i] = WavePrefixProduct(expr[i]); return result; default: return WaveMaskPrefixProduct(WaveGetActiveMask(), expr); } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] T WavePrefixSum(T expr) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupExclusiveAdd($0)"; case hlsl: __intrinsic_asm "WavePrefixSum"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$T result Subgroup ExclusiveScan $expr}; else if (__isInt()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; result:$$T = OpGroupNonUniformIAdd Subgroup ExclusiveScan $expr; }; } else return expr; default: return WaveMaskPrefixSum(WaveGetActiveMask(), expr); } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] vector WavePrefixSum(vector expr) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupExclusiveAdd($0)"; case hlsl: __intrinsic_asm "WavePrefixSum"; case spirv: if (__isFloat()) return spirv_asm {OpCapability GroupNonUniformArithmetic; OpGroupNonUniformFAdd $$vector result Subgroup ExclusiveScan $expr}; else if (__isInt()) { return spirv_asm { OpCapability GroupNonUniformArithmetic; result:$$vector = OpGroupNonUniformIAdd Subgroup ExclusiveScan $expr; }; } else return expr; default: return WaveMaskPrefixSum(WaveGetActiveMask(), expr); } } __generic [require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)] matrix WavePrefixSum(matrix expr) { __target_switch { case hlsl: __intrinsic_asm "WavePrefixSum"; case glsl: case spirv: matrix result; for (int i = 0; i < N; ++i) result[i] = WavePrefixSum(expr[i]); return result; default: return WaveMaskPrefixSum(WaveGetActiveMask(), expr); } } __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_ballot)] T WaveReadLaneFirst(T expr) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupBroadcastFirst($0)"; case hlsl: __intrinsic_asm "WaveReadLaneFirst"; case spirv: return spirv_asm {OpCapability GroupNonUniformBallot; OpGroupNonUniformBroadcastFirst $$T result Subgroup $expr}; default: return WaveMaskReadLaneFirst(WaveGetActiveMask(), expr); } } __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_ballot)] vector WaveReadLaneFirst(vector expr) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupBroadcastFirst($0)"; case hlsl: __intrinsic_asm "WaveReadLaneFirst"; case spirv: return spirv_asm {OpCapability GroupNonUniformBallot; OpGroupNonUniformBroadcastFirst $$vector result Subgroup $expr}; default: return WaveMaskReadLaneFirst(WaveGetActiveMask(), expr); } } __generic [require(cuda_glsl_hlsl_spirv, subgroup_ballot)] matrix WaveReadLaneFirst(matrix expr) { __target_switch { case hlsl: __intrinsic_asm "WaveReadLaneFirst"; case glsl: case spirv: matrix result; for (int i = 0; i < N; ++i) result[i] = WaveReadLaneFirst(expr[i]); return result; default: return WaveMaskReadLaneFirst(WaveGetActiveMask(), expr); } } // NOTE! WaveBroadcastLaneAt is *NOT* standard HLSL // It is provided as access to subgroupBroadcast which can only take a // constexpr laneId. // https://github.com/KhronosGroup/GLSL/blob/master/extensions/khr/GL_KHR_shader_subgroup.txt // Versions SPIR-V greater than 1.4 loosen this restriction, and allow 'dynamic uniform' index // If that's the behavior required then client code should use WaveReadLaneAt which works this way. __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_ballot)] T WaveBroadcastLaneAt(T value, constexpr int lane) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupBroadcast($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: let ulane = uint(lane); return spirv_asm {OpCapability GroupNonUniformBallot; OpGroupNonUniformBroadcast $$T result Subgroup $value $ulane}; default: return WaveMaskBroadcastLaneAt(WaveGetActiveMask(), value, lane); } } __generic __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_ballot)] vector WaveBroadcastLaneAt(vector value, constexpr int lane) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupBroadcast($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: let ulane = uint(lane); return spirv_asm {OpCapability GroupNonUniformBallot; OpGroupNonUniformBroadcast $$vector result Subgroup $value $ulane}; default: return WaveMaskBroadcastLaneAt(WaveGetActiveMask(), value, lane); } } __generic [require(cuda_glsl_hlsl_spirv, subgroup_ballot)] matrix WaveBroadcastLaneAt(matrix value, constexpr int lane) { __target_switch { case cuda: __intrinsic_asm "_waveShuffleMultiple(_getActiveMask(), $0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case glsl: case spirv: matrix result; for (int i = 0; i < N; ++i) result[i] = WaveBroadcastLaneAt(value[i], lane); return result; default: return WaveMaskBroadcastLaneAt(WaveGetActiveMask(), value, lane); } } // TODO(JS): If it can be determines that the `laneId` is constExpr, then subgroupBroadcast // could be used on GLSL. For now we just use subgroupShuffle __generic __glsl_extension(GL_KHR_shader_subgroup_shuffle) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_shuffle)] T WaveReadLaneAt(T value, int lane) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupShuffle($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: let ulane = uint(lane); return spirv_asm {OpCapability GroupNonUniformShuffle; OpGroupNonUniformShuffle $$T result Subgroup $value $ulane}; default: return WaveMaskReadLaneAt(WaveGetActiveMask(), value, lane); } } __generic __spirv_version(1.3) __glsl_extension(GL_KHR_shader_subgroup_shuffle) [require(cuda_glsl_hlsl_spirv, subgroup_shuffle)] vector WaveReadLaneAt(vector value, int lane) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupShuffle($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: let ulane = uint(lane); return spirv_asm {OpCapability GroupNonUniformShuffle; OpGroupNonUniformShuffle $$vector result Subgroup $value $ulane}; default: return WaveMaskReadLaneAt(WaveGetActiveMask(), value, lane); } } __generic [require(cuda_glsl_hlsl_spirv, subgroup_shuffle)] matrix WaveReadLaneAt(matrix value, int lane) { __target_switch { case cuda: __intrinsic_asm "_waveShuffleMultiple(_getActiveMask(), $0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case glsl: case spirv: matrix result; for (int i = 0; i < N; ++i) result[i] = WaveReadLaneAt(value[i], lane); return result; default: return WaveMaskReadLaneAt(WaveGetActiveMask(), value, lane); } } // NOTE! WaveShuffle is a NON STANDARD HLSL intrinsic! It will map to WaveReadLaneAt on HLSL // which means it will only work on hardware which allows arbitrary laneIds which is not true // in general because it breaks the HLSL standard, which requires it's 'dynamically uniform' across the Wave. __generic __glsl_extension(GL_KHR_shader_subgroup_shuffle) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_shuffle)] T WaveShuffle(T value, int lane) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupShuffle($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: let ulane = uint(lane); return spirv_asm {OpCapability GroupNonUniformShuffle; OpGroupNonUniformShuffle $$T result Subgroup $value $ulane}; default: return WaveMaskShuffle(WaveGetActiveMask(), value, lane); } } __generic __glsl_extension(GL_KHR_shader_subgroup_shuffle) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_shuffle)] vector WaveShuffle(vector value, int lane) { __target_switch { case glsl: if (__isHalf()) __requireGLSLExtension("GL_EXT_shader_subgroup_extended_types_float16"); __intrinsic_asm "subgroupShuffle($0, $1)"; case hlsl: __intrinsic_asm "WaveReadLaneAt"; case spirv: let ulane = uint(lane); return spirv_asm {OpCapability GroupNonUniformShuffle; OpGroupNonUniformShuffle $$vector result Subgroup $value $ulane}; default: return WaveMaskShuffle(WaveGetActiveMask(), value, lane); } } __generic [require(cuda_hlsl, subgroup_shuffle)] matrix WaveShuffle(matrix value, int lane) { __target_switch { case hlsl: __intrinsic_asm "WaveReadLaneAt"; default: return WaveMaskShuffle(WaveGetActiveMask(), value, lane); } } __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_ballot)] uint WavePrefixCountBits(bool value) { __target_switch { case glsl: __intrinsic_asm "subgroupBallotExclusiveBitCount(subgroupBallot($0))"; case hlsl: __intrinsic_asm "WavePrefixCountBits($0)"; case spirv: return spirv_asm { OpCapability GroupNonUniformBallot; %mask:$$uint4 = OpGroupNonUniformBallot Subgroup $value; OpGroupNonUniformBallotBitCount $$uint result Subgroup 2 %mask }; default: return WaveMaskPrefixCountBits(WaveGetActiveMask(), value); } } __glsl_extension(GL_KHR_shader_subgroup_ballot) __spirv_version(1.3) [require(cuda_glsl_hlsl_spirv, subgroup_ballot)] uint4 WaveGetConvergedMulti() { __target_switch { case glsl: __intrinsic_asm "subgroupBallot(true)"; case hlsl: __intrinsic_asm "WaveActiveBallot(true)"; case cuda: __intrinsic_asm "make_uint4(__activemask(), 0, 0, 0)"; case spirv: let _true = true; return spirv_asm { OpCapability GroupNonUniformBallot; OpGroupNonUniformBallot $$uint4 result Subgroup $_true }; } } [ForceInline] uint4 WaveGetActiveMulti() { return WaveGetConvergedMulti(); } // Shader model 6.5 stuff // https://github.com/microsoft/DirectX-Specs/blob/master/d3d/HLSL_ShaderModel6_5.md __generic [require(cuda_glsl_hlsl_spirv, subgroup_partitioned)] uint4 WaveMatch(T value) { __target_switch { case hlsl: __intrinsic_asm "WaveMatch"; case glsl: __intrinsic_asm "subgroupPartitionNV($0)"; case spirv: return spirv_asm { OpCapability GroupNonUniformPartitionedNV; OpExtension "SPV_NV_shader_subgroup_partitioned"; OpGroupNonUniformPartitionNV $$uint4 result $value }; default: return WaveMaskMatch(WaveGetActiveMask(), value); } } __generic [require(cuda_glsl_hlsl_spirv, subgroup_partitioned)] uint4 WaveMatch(vector value) { __target_switch { case hlsl: __intrinsic_asm "WaveMatch"; case glsl: __intrinsic_asm "subgroupPartitionNV($0)"; case spirv: return spirv_asm { OpCapability GroupNonUniformPartitionedNV; OpExtension "SPV_NV_shader_subgroup_partitioned"; OpGroupNonUniformPartitionNV $$uint4 result $value }; default: return WaveMaskMatch(WaveGetActiveMask(), value); } } __generic [require(cuda_glsl_hlsl_spirv, subgroup_partitioned)] uint4 WaveMatch(matrix value) { __target_switch { case hlsl: __intrinsic_asm "WaveMatch"; case glsl: case cuda: case spirv: uint4 result = uint4(0xFFFFFFFF); [ForceUnroll] for (int i = 0; i < N; i++) result &= WaveMatch(value[i]); return result; default: return WaveMaskMatch(WaveGetActiveMask(), value); } } [require(cuda_hlsl, waveprefix)] uint WaveMultiPrefixCountBits(bool value, uint4 mask) { __target_switch { case cuda: __intrinsic_asm "_popc(__ballot_sync(($1).x, $0) & _getLaneLtMask())"; case hlsl: __intrinsic_asm "WaveMultiPrefixCountBits"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl, waveprefix)] T WaveMultiPrefixBitAnd(T expr, uint4 mask) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixAnd(_getMultiPrefixMask(($1).x), $0)"; case glsl: __intrinsic_asm "subgroupExclusiveAnd($0)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitAnd"; } } __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) __generic [require(cuda_glsl_hlsl, waveprefix)] vector WaveMultiPrefixBitAnd(vector expr, uint4 mask) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixAndMultiple(_getMultiPrefixMask(($1).x), $0)"; case glsl: __intrinsic_asm "subgroupExclusiveAnd($0)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitAnd"; } } __generic [require(cuda_hlsl, waveprefix)] matrix WaveMultiPrefixBitAnd(matrix expr, uint4 mask) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixAndMultiple(_getMultiPrefixMask(($1).x), $0)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitAnd"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl, waveprefix)] T WaveMultiPrefixBitOr(T expr, uint4 mask) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixOr(, _getMultiPrefixMask(($1).x), $0)"; case glsl: __intrinsic_asm "subgroupExclusiveOr($0)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitOr"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl, waveprefix)] vector WaveMultiPrefixBitOr(vector expr, uint4 mask) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixOrMultiple(_getMultiPrefixMask(($1).x), $0)"; case glsl: __intrinsic_asm "subgroupExclusiveOr($0)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitOr"; } } __generic [require(cuda_hlsl, waveprefix)] matrix WaveMultiPrefixBitOr(matrix expr, uint4 mask) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixOrMultiple(_getMultiPrefixMask(($1).x), $0)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitOr"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl, waveprefix)] T WaveMultiPrefixBitXor(T expr, uint4 mask) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixXor(_getMultiPrefixMask(($1).x), $0)"; case glsl: __intrinsic_asm "subgroupExclusiveXor($0)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitXor"; } } __generic __glsl_extension(GL_KHR_shader_subgroup_arithmetic) __spirv_version(1.3) [require(cuda_glsl_hlsl, waveprefix)] vector WaveMultiPrefixBitXor(vector expr, uint4 mask) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixXorMultiple(_getMultiPrefixMask(($1).x), $0)"; case glsl: __intrinsic_asm "subgroupExclusiveXor($0)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitXor"; } } __generic [require(cuda_hlsl, waveprefix)] matrix WaveMultiPrefixBitXor(matrix expr, uint4 mask) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixXorMultiple(_getMultiPrefixMask(($1).x), $0)"; case hlsl: __intrinsic_asm "WaveMultiPrefixBitXor"; } } __generic [require(cuda_hlsl, waveprefix)] T WaveMultiPrefixProduct(T value, uint4 mask) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixProduct(_getMultiPrefixMask(($1).x), $0)"; case hlsl: __intrinsic_asm "WaveMultiPrefixProduct"; } } __generic [require(cuda_hlsl, waveprefix)] vector WaveMultiPrefixProduct(vector value, uint4 mask) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixProductMultiple(_getMultiPrefixMask(($1).x), $0)"; case hlsl: __intrinsic_asm "WaveMultiPrefixProduct"; } } __generic [require(cuda_hlsl, waveprefix)] matrix WaveMultiPrefixProduct(matrix value, uint4 mask) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixProductMultiple(_getMultiPrefixMask(($1).x), $0)"; case hlsl: __intrinsic_asm "WaveMultiPrefixProduct"; } } __generic [require(cuda_hlsl, waveprefix)] T WaveMultiPrefixSum(T value, uint4 mask) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixSum(_getMultiPrefixMask(($1).x), $0)"; case hlsl: __intrinsic_asm "WaveMultiPrefixSum"; } } __generic [require(cuda_hlsl, waveprefix)] vector WaveMultiPrefixSum(vector value, uint4 mask) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixSumMultiple(_getMultiPrefixMask(($1).x), $0 )"; case hlsl: __intrinsic_asm "WaveMultiPrefixSum"; } } __generic [require(cuda_hlsl, waveprefix)] matrix WaveMultiPrefixSum(matrix value, uint4 mask) { __target_switch { case cuda: __intrinsic_asm "_wavePrefixSumMultiple(_getMultiPrefixMask(($1).x), $0)"; case hlsl: __intrinsic_asm "WaveMultiPrefixSum"; } } __glsl_extension(GL_EXT_demote_to_helper_invocation) [ForceInline] [require(glsl_hlsl_metal_spirv, helper_lane)] bool IsHelperLane() { __target_switch { case hlsl: __intrinsic_asm "IsHelperLane()"; case glsl: __intrinsic_asm "gl_HelperInvocation"; case metal: __intrinsic_asm "simd_is_helper_thread()"; case spirv: return spirv_asm { OpExtension "SPV_EXT_demote_to_helper_invocation"; OpCapability DemoteToHelperInvocationEXT; result:$$bool = OpIsHelperInvocationEXT }; } } // `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points typedef Texture2D texture2D; ${{{{ // Buffer types static const struct { char const* name; SlangResourceAccess access; } kBaseBufferAccessLevels[] = { { "", SLANG_RESOURCE_ACCESS_READ }, { "RW", SLANG_RESOURCE_ACCESS_READ_WRITE }, { "RasterizerOrdered", SLANG_RESOURCE_ACCESS_RASTER_ORDERED }, }; static const int kBaseBufferAccessLevelCount = sizeof(kBaseBufferAccessLevels) / sizeof(kBaseBufferAccessLevels[0]); for (int aa = 0; aa < kBaseBufferAccessLevelCount; ++aa) { auto access = kBaseBufferAccessLevels[aa].access; sb << "__generic\n"; sb << "typealias "; sb << kBaseBufferAccessLevels[aa].name; sb << "Buffer = __TextureImpl;\n"; bool isReadOnly = aa == 0; char const* glslTextureSizeFunc = (isReadOnly) ? "textureSize" : "imageSize"; char const* glslLoadFuncName = (isReadOnly) ? "texelFetch" : "imageLoad"; char const* spvLoadInstName = (isReadOnly) ? "OpImageFetch" : "OpImageRead"; char const* requireToSetQuery = (isReadOnly) ? "[require(glsl_hlsl_metal_spirv, texture_size)]" : "[require(glsl_hlsl_metal_spirv, image_size)]"; char const* requireToSet = (isReadOnly) ? "[require(glsl_hlsl_metal_spirv, texture_sm_4_1)]" : "[require(glsl_hlsl_metal_spirv, texture_sm_4_1_compute_fragment)]"; char const* requireToSet_onlyHLSL = (isReadOnly) ? "[require(hlsl, texture_sm_4_1)]" : "[require(hlsl, texture_sm_4_1_compute_fragment)]"; }}}} __generic extension __TextureImpl { [__readNone] $(requireToSetQuery) void GetDimensions(out uint dim) { __target_switch { case hlsl: __intrinsic_asm ".GetDimensions"; case glsl: __intrinsic_asm "($1 = $(glslTextureSizeFunc)($0))"; case metal: __intrinsic_asm "(*($1) = $0.get_width())"; case spirv: dim = spirv_asm { OpCapability ImageQuery; result:$$uint = OpImageQuerySize $this; }; } } __glsl_extension(GL_EXT_samplerless_texture_functions) $(isReadOnly?"[__readNone] ":"") $(requireToSet) T Load(int location) { __target_switch { case hlsl: __intrinsic_asm ".Load"; case metal: __intrinsic_asm "$c$0.read(uint($1))$z"; case glsl: __intrinsic_asm "$(glslLoadFuncName)($0, $1)$z"; case spirv: return spirv_asm { %sampled:__sampledType(T) = $(spvLoadInstName) $this $location; __truncate $$T result __sampledType(T) %sampled; }; } } $(isReadOnly?"[__readNone] ":"") $(requireToSet_onlyHLSL) T Load(int location, out uint status) { __target_switch { case hlsl: __intrinsic_asm ".Load"; } } __subscript(uint index) -> T { $(isReadOnly?"[__readNone] ":"") [ForceInline] $(requireToSet) get { return Load((int)index); } ${{{{ if (access != SLANG_RESOURCE_ACCESS_READ) { }}}} [nonmutating] $(requireToSet) set { __target_switch { case hlsl: __intrinsic_asm "($0)[$1] = $2"; case glsl: __intrinsic_asm "imageStore($0, int($1), $V2)"; case metal: __intrinsic_asm "$0.write($2, $1)"; case spirv: spirv_asm { OpImageWrite $this $index __convertTexel(newValue); }; } } __intrinsic_op($(kIROp_ImageSubscript)) ref; ${{{{ } // access != SLANG_RESOURCE_ACCESS_READ }}}} } }; // end extension ${{{{ } }}}} // DirectX Raytracing (DXR) Support // // The following is based on the experimental DXR SDK v0.09.01. // // Numbering follows the sections in the "D3D12 Raytracing Functional Spec" v0.09 (2018-03-12) // // 10.1.1 - Ray Flags typedef uint RAY_FLAG; static const RAY_FLAG RAY_FLAG_NONE = 0x00; static const RAY_FLAG RAY_FLAG_FORCE_OPAQUE = 0x01; static const RAY_FLAG RAY_FLAG_FORCE_NON_OPAQUE = 0x02; static const RAY_FLAG RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH = 0x04; static const RAY_FLAG RAY_FLAG_SKIP_CLOSEST_HIT_SHADER = 0x08; static const RAY_FLAG RAY_FLAG_CULL_BACK_FACING_TRIANGLES = 0x10; static const RAY_FLAG RAY_FLAG_CULL_FRONT_FACING_TRIANGLES = 0x20; static const RAY_FLAG RAY_FLAG_CULL_OPAQUE = 0x40; static const RAY_FLAG RAY_FLAG_CULL_NON_OPAQUE = 0x80; static const RAY_FLAG RAY_FLAG_SKIP_TRIANGLES = 0x100; static const RAY_FLAG RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES = 0x200; // 10.1.2 - Ray Description Structure __target_intrinsic(hlsl, RayDesc) __target_intrinsic(cuda, RayDesc) struct RayDesc { __target_intrinsic(hlsl, Origin) __target_intrinsic(cuda, Origin) float3 Origin; __target_intrinsic(hlsl, TMin) __target_intrinsic(cuda, TMin) float TMin; __target_intrinsic(hlsl, Direction) __target_intrinsic(cuda, Direction) float3 Direction; __target_intrinsic(hlsl, TMax) __target_intrinsic(cuda, TMax) float TMax; }; // 10.1.3 - Ray Acceleration Structure __builtin __magic_type(RaytracingAccelerationStructureType) __intrinsic_type($(kIROp_RaytracingAccelerationStructureType)) struct RaytracingAccelerationStructure {}; // 10.1.4 - Subobject Definitions // TODO: We may decide to support these, but their reliance on C++ implicit // constructor call syntax (`SomeType someVar(arg0, arg1);`) makes them // annoying for the current Slang parsing strategy, and using global variables // for this stuff comes across as a kludge rather than the best possible design. // 10.1.5 - Intersection Attributes Structure __target_intrinsic(hlsl, BuiltInTriangleIntersectionAttributes) [require(cpp_cuda_glsl_hlsl_spirv, raytracing)] struct BuiltInTriangleIntersectionAttributes { __target_intrinsic(hlsl, barycentrics) float2 barycentrics; }; // 10.2 Shaders // Right now new shader stages need to be added directly to the compiler // implementation, rather than being something that can be declared in the stdlib. // 10.3 - Intrinsics // 10.3.1 // `executeCallableNV` is the GLSL intrinsic that will be used to implement // `CallShader()` for GLSL-based targets. // [require(glsl, raytracing_raygen_closesthit_miss_callable)] void __executeCallable(uint shaderIndex, int payloadLocation) { __target_switch { case glsl: __intrinsic_asm "executeCallableEXT"; } } // Next is the custom intrinsic that will compute the payload location // for a type being used in a `CallShader()` call for GLSL-based targets. // __generic [__readNone] __intrinsic_op($(kIROp_GetVulkanRayTracingPayloadLocation)) int __callablePayloadLocation(__ref Payload payload); // Now we provide a hard-coded definition of `CallShader()` for GLSL-based // targets, which maps the generic HLSL operation into the non-generic // GLSL equivalent. // __generic [require(glsl_hlsl_spirv, raytracing_raygen_closesthit_miss_callable)] void CallShader(uint shaderIndex, inout Payload payload) { __target_switch { case hlsl: __intrinsic_asm "CallShader"; case glsl: { [__vulkanCallablePayload] static Payload p; p = payload; __executeCallable(shaderIndex, __callablePayloadLocation(p)); payload = p; } case spirv: { [__vulkanCallablePayload] static Payload p; p = payload; spirv_asm { OpExecuteCallableKHR $shaderIndex &p }; payload = p; } } } // 10.3.2 // Some functions only accept a "struct type" parameter. The // following function addresses this issue by transforming non-struct // parameters into a struct. // side effect typed use locations (`inout`,`out`, etc.) are managed. __generic __intrinsic_op($(kIROp_ForceVarIntoStructTemporarily)) Ref __forceVarIntoStructTemporarily(inout T maybeStruct); __generic [require(hlsl, raytracing)] void __traceRayHLSL( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, uint InstanceInclusionMask, uint RayContributionToHitGroupIndex, uint MultiplierForGeometryContributionToHitGroupIndex, uint MissShaderIndex, RayDesc Ray, inout payload_t Payload) { __target_switch { case hlsl: __intrinsic_asm "TraceRay"; } } [require(glsl, raytracing_raygen_closesthit_miss)] void __traceRay( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, uint InstanceInclusionMask, uint RayContributionToHitGroupIndex, uint MultiplierForGeometryContributionToHitGroupIndex, uint MissShaderIndex, float3 Origin, float TMin, float3 Direction, float TMax, int PayloadLocation) { __target_switch { case glsl: __intrinsic_asm "traceRayEXT"; } } // TODO: Slang's parsing logic currently puts modifiers on // the `GenericDecl` rather than the inner decl when // using our default syntax, which seems wrong. We need // to fix this, but for now using the expanded `__generic` // syntax works in a pinch. // __generic [__readNone] __intrinsic_op($(kIROp_GetVulkanRayTracingPayloadLocation)) int __rayPayloadLocation(__ref Payload payload); [ForceInline] __generic [require(cuda_glsl_hlsl_spirv, raytracing_raygen_closesthit_miss)] void TraceRay( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, uint InstanceInclusionMask, uint RayContributionToHitGroupIndex, uint MultiplierForGeometryContributionToHitGroupIndex, uint MissShaderIndex, RayDesc Ray, inout payload_t Payload) { __target_switch { case hlsl: __traceRayHLSL( AccelerationStructure, RayFlags, InstanceInclusionMask, RayContributionToHitGroupIndex, MultiplierForGeometryContributionToHitGroupIndex, MissShaderIndex, Ray, __forceVarIntoStructTemporarily(Payload)); return; case cuda: __intrinsic_asm "traceOptiXRay"; case glsl: { [__vulkanRayPayload] static payload_t p; p = Payload; __traceRay( AccelerationStructure, RayFlags, InstanceInclusionMask, RayContributionToHitGroupIndex, MultiplierForGeometryContributionToHitGroupIndex, MissShaderIndex, Ray.Origin, Ray.TMin, Ray.Direction, Ray.TMax, __rayPayloadLocation(p)); Payload = p; } case spirv: { [__vulkanRayPayload] static payload_t p; p = Payload; let origin = Ray.Origin; let direction = Ray.Direction; let tmin = Ray.TMin; let tmax = Ray.TMax; spirv_asm { OpTraceRayKHR /**/ $AccelerationStructure /**/ $RayFlags /**/ $InstanceInclusionMask /**/ $RayContributionToHitGroupIndex /**/ $MultiplierForGeometryContributionToHitGroupIndex /**/ $MissShaderIndex /**/ $origin /**/ $tmin /**/ $direction /**/ $tmax /**/ &p; }; Payload = p; } } } // NOTE! // The name of the following functions may change when DXR supports // a feature similar to the `GL_NV_ray_tracing_motion_blur` extension // // https://github.com/KhronosGroup/GLSL/blob/master/extensions/nv/GLSL_NV_ray_tracing_motion_blur.txt __generic [require(hlsl, raytracing_motionblur)] void __traceMotionRayHLSL( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, uint InstanceInclusionMask, uint RayContributionToHitGroupIndex, uint MultiplierForGeometryContributionToHitGroupIndex, uint MissShaderIndex, RayDesc Ray, float CurrentTime, inout payload_t Payload) { __target_switch { case hlsl: __intrinsic_asm "TraceMotionRay"; } } __glsl_extension(GL_NV_ray_tracing_motion_blur) [require(glsl, raytracing_motionblur_raygen_closesthit_miss)] void __traceMotionRay( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, uint InstanceInclusionMask, uint RayContributionToHitGroupIndex, uint MultiplierForGeometryContributionToHitGroupIndex, uint MissShaderIndex, float3 Origin, float TMin, float3 Direction, float TMax, float CurrentTime, int PayloadLocation) { __target_switch { case glsl: __intrinsic_asm "traceRayMotionNV"; } } [ForceInline] [require(glsl_hlsl_spirv, raytracing_motionblur_raygen_closesthit_miss)] __generic void TraceMotionRay( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, uint InstanceInclusionMask, uint RayContributionToHitGroupIndex, uint MultiplierForGeometryContributionToHitGroupIndex, uint MissShaderIndex, RayDesc Ray, float CurrentTime, inout payload_t Payload) { __target_switch { case hlsl: __traceMotionRayHLSL( AccelerationStructure, RayFlags, InstanceInclusionMask, RayContributionToHitGroupIndex, MultiplierForGeometryContributionToHitGroupIndex, MissShaderIndex, Ray, CurrentTime, __forceVarIntoStructTemporarily(Payload)); return; case glsl: { [__vulkanRayPayload] static payload_t p; p = Payload; __traceMotionRay( AccelerationStructure, RayFlags, InstanceInclusionMask, RayContributionToHitGroupIndex, MultiplierForGeometryContributionToHitGroupIndex, MissShaderIndex, Ray.Origin, Ray.TMin, Ray.Direction, Ray.TMax, CurrentTime, __rayPayloadLocation(p)); Payload = p; } case spirv: { [__vulkanRayPayload] static payload_t p; let origin = Ray.Origin; let direction = Ray.Direction; let tmin = Ray.TMin; let tmax = Ray.TMax; p = Payload; spirv_asm { OpCapability RayTracingMotionBlurNV; OpExtension "SPV_NV_ray_tracing_motion_blur"; OpTraceRayMotionNV /**/ $AccelerationStructure /**/ $RayFlags /**/ $InstanceInclusionMask /**/ $RayContributionToHitGroupIndex /**/ $MultiplierForGeometryContributionToHitGroupIndex /**/ $MissShaderIndex /**/ $origin /**/ $tmin /**/ $direction /**/ $tmax /**/ $CurrentTime /**/ &p; }; Payload = p; } } } // 10.3.3 [require(glsl_spirv, raytracing_intersection)] bool __reportIntersection(float tHit, uint hitKind) { __target_switch { case glsl: __intrinsic_asm "reportIntersectionEXT"; case spirv: return spirv_asm { result:$$bool = OpReportIntersectionKHR $tHit $hitKind; }; } } __generic [ForceInline] [require(glsl_hlsl_spirv, raytracing_intersection)] bool ReportHit(float tHit, uint hitKind, A attributes) { __target_switch { case hlsl: __intrinsic_asm "ReportHit($0, $1, $2)"; case glsl: case spirv: [__vulkanHitAttributes] static A a; a = attributes; return __reportIntersection(tHit, hitKind); } } // 10.3.4 [require(cuda_glsl_hlsl_spirv, raytracing_anyhit)] void IgnoreHit() { __target_switch { case hlsl: __intrinsic_asm "IgnoreHit"; case glsl: __intrinsic_asm "ignoreIntersectionEXT;"; case cuda: __intrinsic_asm "optixIgnoreIntersection"; case spirv: spirv_asm { OpIgnoreIntersectionKHR; %_ = OpLabel }; } } // 10.3.5 [require(cuda_glsl_hlsl_spirv, raytracing_anyhit)] void AcceptHitAndEndSearch() { __target_switch { case hlsl: __intrinsic_asm "AcceptHitAndEndSearch"; case glsl: __intrinsic_asm "terminateRayEXT;"; case cuda: __intrinsic_asm "optixTerminateRay"; case spirv: spirv_asm { OpTerminateRayKHR; %_ = OpLabel }; } } // 10.4 - System Values and Special Semantics // TODO: Many of these functions need to be restricted so that // they can only be accessed from specific stages. // 10.4.1 - Ray Dispatch System Values [NonUniformReturn] [require(cuda_glsl_hlsl_spirv, raytracing_allstages)] uint3 DispatchRaysIndex() { __target_switch { case hlsl: __intrinsic_asm "DispatchRaysIndex"; case glsl: __intrinsic_asm "(gl_LaunchIDEXT)"; case cuda: __intrinsic_asm "optixGetLaunchIndex"; case spirv: return spirv_asm { result:$$uint3 = OpLoad builtin(LaunchIdKHR:uint3); }; } } [require(cuda_glsl_hlsl_spirv, raytracing_allstages)] uint3 DispatchRaysDimensions() { __target_switch { case hlsl: __intrinsic_asm "DispatchRaysDimensions"; case glsl: __intrinsic_asm "(gl_LaunchSizeEXT)"; case cuda: __intrinsic_asm "optixGetLaunchDimensions"; case spirv: return spirv_asm { result:$$uint3 = OpLoad builtin(LaunchSizeKHR:uint3); }; } } // 10.4.2 - Ray System Values [NonUniformReturn] [require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection_miss)] float3 WorldRayOrigin() { __target_switch { case hlsl: __intrinsic_asm "WorldRayOrigin"; case glsl: __intrinsic_asm "(gl_WorldRayOriginEXT)"; case cuda: __intrinsic_asm "optixGetWorldRayOrigin"; case spirv: return spirv_asm { result:$$float3 = OpLoad builtin(WorldRayOriginKHR:float3); }; } } [NonUniformReturn] [require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection_miss)] float3 WorldRayDirection() { __target_switch { case hlsl: __intrinsic_asm "WorldRayDirection"; case glsl: __intrinsic_asm "(gl_WorldRayDirectionEXT)"; case cuda: __intrinsic_asm "optixGetWorldRayDirection"; case spirv: return spirv_asm { result:$$float3 = OpLoad builtin(WorldRayDirectionKHR:float3); }; } } [NonUniformReturn] [require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection_miss)] float RayTMin() { __target_switch { case hlsl: __intrinsic_asm "RayTMin"; case glsl: __intrinsic_asm "(gl_RayTminEXT)"; case cuda: __intrinsic_asm "optixGetRayTmin"; case spirv: return spirv_asm { result:$$float = OpLoad builtin(RayTminKHR:float); }; } } // Note: The `RayTCurrent()` intrinsic should translate to // either `gl_HitTNV` (for hit shaders) or `gl_RayTmaxNV` // (for intersection shaders). Right now we are handling this // during code emission, for simplicity. // // TODO: Once the compiler supports a more refined concept // of profiles/capabilities and overloading based on them, // we should simply provide two overloads here, specialized // to the appropriate Vulkan stages. // [NonUniformReturn] [require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection_miss)] float RayTCurrent() { __target_switch { case hlsl: __intrinsic_asm "RayTCurrent"; case glsl: __intrinsic_asm "(gl_RayTmaxEXT)"; case cuda: __intrinsic_asm "optixGetRayTmax"; case spirv: return spirv_asm { result:$$float = OpLoad builtin(RayTmaxKHR:float); }; } } [require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection_miss)] uint RayFlags() { __target_switch { case hlsl: __intrinsic_asm "RayFlags"; case glsl: __intrinsic_asm "(gl_IncomingRayFlagsEXT)"; case cuda: __intrinsic_asm "optixGetRayFlags"; case spirv: return spirv_asm { result:$$uint = OpLoad builtin(IncomingRayFlagsKHR:uint); }; } } // 10.4.3 - Primitive/Object Space System Values [NonUniformReturn] [require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] uint InstanceIndex() { __target_switch { case hlsl: __intrinsic_asm "InstanceIndex"; case glsl: __intrinsic_asm "(gl_InstanceID)"; case cuda: __intrinsic_asm "optixGetInstanceIndex"; case spirv: return spirv_asm { result:$$uint = OpLoad builtin(InstanceId:uint); }; } } [NonUniformReturn] [require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] uint InstanceID() { __target_switch { case hlsl: __intrinsic_asm "InstanceID"; case glsl: __intrinsic_asm "(gl_InstanceCustomIndexEXT)"; case cuda: __intrinsic_asm "optixGetInstanceId"; case spirv: return spirv_asm { result:$$uint = OpLoad builtin(InstanceCustomIndexKHR:uint); }; } } [NonUniformReturn] [require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] uint PrimitiveIndex() { __target_switch { case hlsl: __intrinsic_asm "PrimitiveIndex"; case glsl: __intrinsic_asm "(gl_PrimitiveID)"; case cuda: __intrinsic_asm "optixGetPrimitiveIndex"; case spirv: return spirv_asm { result:$$uint = OpLoad builtin(PrimitiveId:uint); }; } } [NonUniformReturn] [require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] float3 ObjectRayOrigin() { __target_switch { case hlsl: __intrinsic_asm "ObjectRayOrigin"; case glsl: __intrinsic_asm "(gl_ObjectRayOriginEXT)"; case cuda: __intrinsic_asm "optixGetObjectRayOrigin"; case spirv: return spirv_asm { result:$$float3 = OpLoad builtin(ObjectRayOriginKHR:float3); }; } } [NonUniformReturn] [require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] float3 ObjectRayDirection() { __target_switch { case hlsl: __intrinsic_asm "ObjectRayDirection"; case glsl: __intrinsic_asm "(gl_ObjectRayDirectionEXT)"; case cuda: __intrinsic_asm "optixGetObjectRayDirection"; case spirv: return spirv_asm { result:$$float3 = OpLoad builtin(ObjectRayDirectionKHR:float3); }; } } // TODO: optix has an optixGetObjectToWorldTransformMatrix function that returns 12 // floats by reference. [NonUniformReturn] [require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] float3x4 ObjectToWorld3x4() { __target_switch { case hlsl: __intrinsic_asm "ObjectToWorld3x4"; case glsl: __intrinsic_asm "transpose(gl_ObjectToWorldEXT)"; case spirv: return spirv_asm { %mat:$$float4x3 = OpLoad builtin(ObjectToWorldKHR:float4x3); result:$$float3x4 = OpTranspose %mat; }; } } [NonUniformReturn] [require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] float3x4 WorldToObject3x4() { __target_switch { case hlsl: __intrinsic_asm "WorldToObject3x4"; case glsl: __intrinsic_asm "transpose(gl_WorldToObjectEXT)"; case spirv: return spirv_asm { %mat:$$float4x3 = OpLoad builtin(WorldToObjectKHR:float4x3); result:$$float3x4 = OpTranspose %mat; }; } } [NonUniformReturn] [require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] float4x3 ObjectToWorld4x3() { __target_switch { case hlsl: __intrinsic_asm "ObjectToWorld4x3"; case glsl: __intrinsic_asm "(gl_ObjectToWorldEXT)"; case spirv: return spirv_asm { result:$$float4x3 = OpLoad builtin(ObjectToWorldKHR:float4x3); }; } } [NonUniformReturn] [require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] float4x3 WorldToObject4x3() { __target_switch { case hlsl: __intrinsic_asm "WorldToObject4x3"; case glsl: __intrinsic_asm "(gl_WorldToObjectEXT)"; case spirv: return spirv_asm { result:$$float4x3 = OpLoad builtin(WorldToObjectKHR:float4x3); }; } } // NOTE! // The name of the following functions may change when DXR supports // a feature similar to the `GL_NV_ray_tracing_motion_blur` extension __glsl_extension(GL_NV_ray_tracing_motion_blur) __glsl_extension(GL_EXT_ray_tracing) [NonUniformReturn] [require(glsl_hlsl_spirv, raytracing_motionblur_anyhit_closesthit_intersection_miss)] float RayCurrentTime() { __target_switch { case hlsl: __intrinsic_asm "RayCurrentTime"; case glsl: __intrinsic_asm "(gl_CurrentRayTimeNV)"; case spirv: return spirv_asm { result:$$float = OpLoad builtin(CurrentRayTimeNV:float); }; } } // Note: The provisional DXR spec included these unadorned // `ObjectToWorld()` and `WorldToObject()` functions, so // we will forward them to the new names as a convience // for users who are porting their code. // // TODO: Should we provide a deprecation warning on these // declarations, so that users can know they aren't coding // against the final spec? // [NonUniformReturn] float3x4 ObjectToWorld() { return ObjectToWorld3x4(); } [NonUniformReturn] float3x4 WorldToObject() { return WorldToObject3x4(); } // 10.4.4 - Hit Specific System values [NonUniformReturn] [require(cuda_glsl_hlsl_spirv, raytracing_anyhit_closesthit)] uint HitKind() { __target_switch { case hlsl: __intrinsic_asm "HitKind"; case glsl: __intrinsic_asm "(gl_HitKindEXT)"; case cuda: __intrinsic_asm "optixGetHitKind"; case spirv: return spirv_asm { result:$$uint = OpLoad builtin(HitKindKHR:uint); }; } } // Pre-defined hit kinds (not documented explicitly) static const uint HIT_KIND_TRIANGLE_FRONT_FACE = 254; static const uint HIT_KIND_TRIANGLE_BACK_FACE = 255; // // Shader Model 6.4 // // Treats `left` and `right` as 4-component vectors of `UInt8` and computes `dot(left, right) + acc` uint dot4add_u8packed(uint left, uint right, uint acc); // Treats `left` and `right` as 4-component vectors of `Int8` and computes `dot(left, right) + acc` int dot4add_i8packed(uint left, uint right, int acc); // Computes `dot(left, right) + acc`. // // May not produce infinities or NaNs for intermediate results that overflow the range of `half` float dot2add(float2 left, float2 right, float acc); // // Shader Model 6.5 // // // Mesh Shaders // // Set the number of output vertices and primitives for a mesh shader invocation. __glsl_extension(GL_EXT_mesh_shader) __glsl_version(450) [require(glsl_hlsl_spirv, meshshading)] void SetMeshOutputCounts(uint vertexCount, uint primitiveCount) { __target_switch { case hlsl: __intrinsic_asm "SetMeshOutputCounts"; case glsl: __intrinsic_asm "SetMeshOutputsEXT"; case spirv: return spirv_asm { OpCapability MeshShadingEXT; OpExtension "SPV_EXT_mesh_shader"; OpSetMeshOutputsEXT $vertexCount $primitiveCount; }; } } // Specify the number of downstream mesh shader thread groups to invoke from an amplification shader, // and provide the values for per-mesh payload parameters. // // This function doesn't return. // // This function cannot be inlined due to a legalization pass happening mid-way through processing // and later more processing happening to the function which requires eventual inlining. [KnownBuiltin("DispatchMesh")] [require(glsl_hlsl_metal_spirv, meshshading)] [noRefInline] void DispatchMesh

(uint threadGroupCountX, uint threadGroupCountY, uint threadGroupCountZ, __ref P meshPayload) { __target_switch { case hlsl: __intrinsic_asm "DispatchMesh"; case glsl: // This intrinsic doesn't take into account writing meshPayload. That // is dealt with separately by 'legalizeDispatchMeshPayloadForGLSL'. __intrinsic_asm "EmitMeshTasksEXT($0, $1, $2)"; case metal: __intrinsic_asm "_slang_mesh_payload = *$3; _slang_mgp.set_threadgroups_per_grid(uint3($0, $1, $2)); return;"; case spirv: return spirv_asm { OpCapability MeshShadingEXT; OpExtension "SPV_EXT_mesh_shader"; OpEmitMeshTasksEXT $threadGroupCountX $threadGroupCountY $threadGroupCountZ &meshPayload; // OpEmitMeshTasksExt is a terminator, so we need to start a new // block to hold whatever comes after this intrinsic %_ = OpLabel }; } } // // "Sampler feedback" types `FeedbackTexture2D` and `FeedbackTexture2DArray`. // // https://microsoft.github.io/DirectX-Specs/d3d/SamplerFeedback.html // The docs describe these as 'types' but their syntax makes them seem enum like, and enum is a simpler way to implement them // But slang enums are always 'enum class like', so I use an empty struct type here [sealed] [builtin] interface __BuiltinSamplerFeedbackType {}; [sealed] __magic_type(FeedbackType, $(int(FeedbackType::Kind::MinMip))) __target_intrinsic(hlsl, SAMPLER_FEEDBACK_MIN_MIP) struct SAMPLER_FEEDBACK_MIN_MIP : __BuiltinSamplerFeedbackType {}; [sealed] __magic_type(FeedbackType, $(int(FeedbackType::Kind::MipRegionUsed))) __target_intrinsic(hlsl, SAMPLER_FEEDBACK_MIP_REGION_USED) struct SAMPLER_FEEDBACK_MIP_REGION_USED : __BuiltinSamplerFeedbackType {}; // All of these objects are write-only resources that point to a special kind of unordered access view meant for sampler feedback. __generic extension __TextureImpl { // With Clamp [require(cpp_hlsl)] void WriteSamplerFeedback(Texture2D tex, SamplerState samp, float2 location, float clamp) { __target_switch { case cpp: __intrinsic_asm "($0).WriteSamplerFeedback($1, $2, $3, $4)"; case hlsl: __intrinsic_asm "($0).WriteSamplerFeedback($1, $2, $3, $4)"; } } [require(cpp_hlsl)] void WriteSamplerFeedbackBias(Texture2D tex, SamplerState samp, float2 location, float bias, float clamp) { __target_switch { case cpp: __intrinsic_asm "($0).WriteSamplerFeedbackBias($1, $2, $3, $4, $5)"; case hlsl: __intrinsic_asm "($0).WriteSamplerFeedbackBias($1, $2, $3, $4, $5)"; } } [require(cpp_hlsl)] void WriteSamplerFeedbackGrad(Texture2D tex, SamplerState samp, float2 location, float2 ddx, float2 ddy, float clamp) { __target_switch { case cpp: __intrinsic_asm "($0).WriteSamplerFeedbackGrad($1, $2, $3, $4, $5, $6)"; case hlsl: __intrinsic_asm "($0).WriteSamplerFeedbackGrad($1, $2, $3, $4, $5, $6)"; } } // Level [require(cpp_hlsl)] void WriteSamplerFeedbackLevel(Texture2D tex, SamplerState samp, float2 location, float lod) { __target_switch { case cpp: __intrinsic_asm "($0).WriteSamplerFeedbackLevel($1, $2, $3, $4)"; case hlsl: __intrinsic_asm "($0).WriteSamplerFeedbackLevel($1, $2, $3, $4)"; } } // Without Clamp [require(cpp_hlsl)] void WriteSamplerFeedback(Texture2D tex, SamplerState samp, float2 location) { __target_switch { case cpp: __intrinsic_asm "($0).WriteSamplerFeedback($1, $2, $3)"; case hlsl: __intrinsic_asm "($0).WriteSamplerFeedback($1, $2, $3)"; } } [require(cpp_hlsl)] void WriteSamplerFeedbackBias(Texture2D tex, SamplerState samp, float2 location, float bias) { __target_switch { case cpp: __intrinsic_asm "($0).WriteSamplerFeedbackBias($1, $2, $3, $4)"; case hlsl: __intrinsic_asm "($0).WriteSamplerFeedbackBias($1, $2, $3, $4)"; } } [require(cpp_hlsl)] void WriteSamplerFeedbackGrad(Texture2D tex, SamplerState samp, float2 location, float2 ddx, float2 ddy) { __target_switch { case cpp: __intrinsic_asm "($0).WriteSamplerFeedbackGrad($1, $2, $3, $4, $5)"; case hlsl: __intrinsic_asm "($0).WriteSamplerFeedbackGrad($1, $2, $3, $4, $5)"; } } }; __generic extension __TextureImpl { // With Clamp [require(cpp_hlsl)] void WriteSamplerFeedback(Texture2DArray texArray, SamplerState samp, float3 location, float clamp) { __target_switch { case cpp: __intrinsic_asm "($0).WriteSamplerFeedback($1, $2, $3, $4)"; case hlsl: __intrinsic_asm "($0).WriteSamplerFeedback($1, $2, $3, $4)"; } } [require(cpp_hlsl)] void WriteSamplerFeedbackBias(Texture2DArray texArray, SamplerState samp, float3 location, float bias, float clamp) { __target_switch { case cpp: __intrinsic_asm "($0).WriteSamplerFeedbackBias($1, $2, $3, $4, $5)"; case hlsl: __intrinsic_asm "($0).WriteSamplerFeedbackBias($1, $2, $3, $4, $5)"; } } [require(cpp_hlsl)] void WriteSamplerFeedbackGrad(Texture2DArray texArray, SamplerState samp, float3 location, float3 ddx, float3 ddy, float clamp) { __target_switch { case cpp: __intrinsic_asm "($0).WriteSamplerFeedbackGrad($1, $2, $3, $4, $5, $6)"; case hlsl: __intrinsic_asm "($0).WriteSamplerFeedbackGrad($1, $2, $3, $4, $5, $6)"; } } // Level [require(cpp_hlsl)] void WriteSamplerFeedbackLevel(Texture2DArray texArray, SamplerState samp, float3 location, float lod) { __target_switch { case cpp: __intrinsic_asm "($0).WriteSamplerFeedbackLevel($1, $2, $3, $4)"; case hlsl: __intrinsic_asm "($0).WriteSamplerFeedbackLevel($1, $2, $3, $4)"; } } // Without Clamp [require(cpp_hlsl)] void WriteSamplerFeedback(Texture2DArray texArray, SamplerState samp, float3 location) { __target_switch { case cpp: __intrinsic_asm "($0).WriteSamplerFeedback($1, $2, $3)"; case hlsl: __intrinsic_asm "($0).WriteSamplerFeedback($1, $2, $3)"; } } [require(cpp_hlsl)] void WriteSamplerFeedbackBias(Texture2DArray texArray, SamplerState samp, float3 location, float bias) { __target_switch { case cpp: __intrinsic_asm "($0).WriteSamplerFeedbackBias($1, $2, $3, $4)"; case hlsl: __intrinsic_asm "($0).WriteSamplerFeedbackBias($1, $2, $3, $4)"; } } [require(cpp_hlsl)] void WriteSamplerFeedbackGrad(Texture2DArray texArray, SamplerState samp, float3 location, float3 ddx, float3 ddy) { __target_switch { case cpp: __intrinsic_asm "($0).WriteSamplerFeedbackGrad($1, $2, $3, $4, $5)"; case hlsl: __intrinsic_asm "($0).WriteSamplerFeedbackGrad($1, $2, $3, $4, $5)"; } } }; // // DXR 1.1 and `TraceRayInline` support // // Get the index of the geometry that was hit in an intersection, any-hit, or closest-hit shader __glsl_extension(GL_EXT_ray_tracing) [NonUniformReturn] [require(glsl_hlsl_spirv, raytracing_anyhit_closesthit_intersection)] uint GeometryIndex() { __target_switch { case hlsl: __intrinsic_asm "GeometryIndex"; case glsl: __intrinsic_asm "(gl_GeometryIndexEXT)"; case spirv: return spirv_asm { result:$$uint = OpLoad builtin(RayGeometryIndexKHR:uint); }; } } // Get the vertex positions of the currently hit triangle in any-hit or closest-hit shader. // https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GLSL_EXT_ray_tracing_position_fetch.txt __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_EXT_ray_tracing_position_fetch) [ForceInline] [require(glsl_spirv, raytracing_position)] float3 HitTriangleVertexPosition(uint index) { __target_switch { case glsl: __intrinsic_asm "gl_HitTriangleVertexPositionsEXT[$0]"; case spirv: return spirv_asm { OpCapability RayTracingKHR; OpCapability RayTracingPositionFetchKHR; OpExtension "SPV_KHR_ray_tracing"; OpExtension "SPV_KHR_ray_tracing_position_fetch"; %_ptr_Input_v3float = OpTypePointer Input $$float3; %addr : %_ptr_Input_v3float = OpAccessChain builtin(HitTriangleVertexPositionsKHR:float3[3]) $index; result:$$float3 = OpLoad %addr; }; } } // Status of whether a (closest) hit has been committed in a `RayQuery`. typedef uint COMMITTED_STATUS; // No hit committed. static const COMMITTED_STATUS COMMITTED_NOTHING = 0; // Closest hit is a triangle. // // This could be an opaque triangle hit found by the fixed-function // traversal and intersection implementation, or a non-opaque // triangle hit committed by user code with `RayQuery.CommitNonOpaqueTriangleHit` // static const COMMITTED_STATUS COMMITTED_TRIANGLE_HIT = 1; // Closest hit is a procedural primitive. // // A procedural hit primitive is committed using `RayQuery.CommitProceduralPrimitiveHit`. static const COMMITTED_STATUS COMMITTED_PROCEDURAL_PRIMITIVE_HIT = 2; // Type of candidate hit that a `RayQuery` is pausing at. // // A `RayQuery` can automatically commit hits with opaque triangles, // but yields to user code for other hits to allow them to be // dismissed or committed. // typedef uint CANDIDATE_TYPE; // Candidate hit is a non-opaque triangle. static const CANDIDATE_TYPE CANDIDATE_NON_OPAQUE_TRIANGLE = 0; // Candidate hit is a procedural primitive. static const CANDIDATE_TYPE CANDIDATE_PROCEDURAL_PRIMITIVE = 1; // Handle to state of an in-progress ray-tracing query. // // The ray query is effectively a coroutine that user shader // code can resume to continue tracing the ray, and which yields // back to the user code at interesting events along the ray. // // Note: The treatment of the `RayQuery` type in Slang does not // perfectly match its semantics in vanilla HLSL in some corner // cases. Specifically, a `RayQuery` in vanilla HLSL is an // opaque handle to mutable storage, and assigning a `RayQuery` // or passing one as a parameter will only copy the *handle*, // potentially resulting in aliasing of the underlying mutable // storage. // // In contrast, Slang considers a `RayQuery` to own its mutable // state, and (because the API does not support cloning of queries), // `RayQuery` values are non-copyable (aka "move-only"). // // The main place where this arises as a consideration is when // passing a `RayQuery` down into a function that will perform // mutating operations on it (e.g., `TraceRay` or `Proceed`): // // void myFunc( inout RayQuery q ) // { // q.Proceed(); // } // // In Slang, a parameter like `q` above should be declared `inout`. // HLSL does not care about whether `q` is declared `inout` or not. // //cannot use a cap for struct with unequal target support //since it will propegate rules to children __glsl_extension(GL_EXT_ray_query) [__NonCopyableType] __intrinsic_type($(kIROp_RayQueryType)) struct RayQuery { // Create a new ray query, initialized to its default state. // __intrinsic_op($(kIROp_AllocateOpaqueHandle)) __init(); __glsl_extension(GL_EXT_ray_query) [require(glsl_spirv, rayquery)] [mutating] void __rayQueryInitializeEXT( RaytracingAccelerationStructure accelerationStructure, RAY_FLAG rayFlags, uint instanceInclusionMask, float3 origin, float tMin, float3 direction, float tMax) { __target_switch { case glsl: __intrinsic_asm "rayQueryInitializeEXT($0, $1, $2, $3, $4, $5, $6, $7)"; case spirv: spirv_asm { OpRayQueryInitializeKHR &this $accelerationStructure $rayFlags $instanceInclusionMask $origin $tMin $direction $tMax; }; } } // Initialize a ray-tracing query. // // This method may be called on a "fresh" ray query, or // on one that is already tracing a ray. In the latter // case any state related to the ray previously being // traced is overwritten. // // The `rayFlags` here will be bitwise ORed with // the `rayFlags` passed as a generic argument to // `RayQuery` to get the effective ray flags, which // must obey any API-imposed restrictions. // [__unsafeForceInlineEarly] [mutating] [require(glsl_hlsl_spirv, rayquery)] void TraceRayInline( RaytracingAccelerationStructure accelerationStructure, RAY_FLAG rayFlags, uint instanceInclusionMask, RayDesc ray) { __target_switch { case hlsl: __intrinsic_asm ".TraceRayInline"; case glsl: case spirv: __rayQueryInitializeEXT( accelerationStructure, rayFlags | rayFlagsGeneric, instanceInclusionMask, ray.Origin, ray.TMin, ray.Direction, ray.TMax); } } // Resume the ray query coroutine. // // If the coroutine suspends because of encountering // a candidate hit that cannot be resolved with fixed-funciton // logic, this function returns `true`, and the `Candidate*()` // functions should be used by application code to resolve // the candidate hit (by either committing or ignoring it). // // If the coroutine terminates because traversal is // complete (or has been aborted), this function returns // `false`, and application code should use the `Committed*()` // functions to appropriately handle the closest hit (it any) // that was found. // __glsl_extension(GL_EXT_ray_query) [mutating] [require(glsl_hlsl_spirv, rayquery)] bool Proceed() { __target_switch { case hlsl: __intrinsic_asm ".Proceed"; case glsl: __intrinsic_asm "rayQueryProceedEXT"; case spirv: return spirv_asm { result:$$bool = OpRayQueryProceedKHR &this }; } } // Causes the ray query to terminate. // // This function cases the ray query to act as if // traversal has terminated, so that subsequent // `Proceed()` calls will return `false`. // __glsl_extension(GL_EXT_ray_query) [mutating] [require(glsl_hlsl_spirv, rayquery)] void Abort() { __target_switch { case hlsl: __intrinsic_asm ".Abort"; case glsl: __intrinsic_asm "rayQueryTerminateEXT"; case spirv: spirv_asm { OpRayQueryTerminateKHR &this }; } } // Commit the current non-opaque triangle hit. __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [mutating] [require(glsl_hlsl_spirv, rayquery)] void CommitNonOpaqueTriangleHit() { __target_switch { case hlsl: __intrinsic_asm ".CommitNonOpaqueTriangleHit"; case glsl: __intrinsic_asm "rayQueryConfirmIntersectionEXT"; case spirv: spirv_asm { OpRayQueryConfirmIntersectionKHR &this }; } } // Commit the current procedural primitive hit, with hit time `t`. __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [mutating] [require(glsl_hlsl_spirv, rayquery)] void CommitProceduralPrimitiveHit(float t) { __target_switch { case hlsl: __intrinsic_asm ".CommitProceduralPrimitiveHit"; case glsl: __intrinsic_asm "rayQueryGenerateIntersectionEXT"; case spirv: spirv_asm { OpRayQueryGenerateIntersectionKHR &this $t }; } } // Get the type of candidate hit being considered. // // The ray query coroutine will suspend when it encounters // a hit that cannot be resolved with fixed-function logic // (either a non-opaque triangle or a procedural primitive). // In either of those cases, `CandidateType()` will return // the kind of candidate hit that must be resolved by // user code. // __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [NonUniformReturn] [require(glsl_hlsl_spirv, rayquery)] CANDIDATE_TYPE CandidateType() { __target_switch { case hlsl: __intrinsic_asm ".CandidateType"; case glsl: __intrinsic_asm "rayQueryGetIntersectionTypeEXT($0, false)"; case spirv: uint RayQueryCandidateIntersectionKHR = 0; return spirv_asm { result:$$CANDIDATE_TYPE = OpRayQueryGetIntersectionTypeKHR &this $RayQueryCandidateIntersectionKHR; }; } } // Get the status of the committed (closest) hit, if any. __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [NonUniformReturn] [require(glsl_hlsl_spirv, rayquery)] COMMITTED_STATUS CommittedStatus() { __target_switch { case hlsl: __intrinsic_asm ".CommittedStatus"; case glsl: __intrinsic_asm "rayQueryGetIntersectionTypeEXT($0, true)"; case spirv: uint RayQueryCommittedIntersectionKHR = 1; return spirv_asm { result:$$COMMITTED_STATUS = OpRayQueryGetIntersectionTypeKHR &this $RayQueryCommittedIntersectionKHR; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [NonUniformReturn] [require(glsl_hlsl_spirv, rayquery)] bool CandidateProceduralPrimitiveNonOpaque() { __target_switch { case hlsl: __intrinsic_asm ".CandidateProceduralPrimitiveNonOpaque"; case glsl: __intrinsic_asm "(!rayQueryGetIntersectionCandidateAABBOpaqueEXT($0))"; case spirv: uint iCandidateOrCommitted = 0; return spirv_asm { %rr:$$bool = OpRayQueryGetIntersectionCandidateAABBOpaqueKHR &this; result:$$bool = OpLogicalNot %rr; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [NonUniformReturn] [require(glsl_hlsl_spirv, rayquery)] float CandidateTriangleRayT() { __target_switch { case hlsl: __intrinsic_asm ".CandidateTriangleRayT"; case glsl: __intrinsic_asm "rayQueryGetIntersectionTEXT($0, false)"; case spirv: uint iCandidateOrCommitted = 0; return spirv_asm { result:$$float = OpRayQueryGetIntersectionTKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [NonUniformReturn] [require(glsl_hlsl_spirv, rayquery)] float CommittedRayT() { __target_switch { case hlsl: __intrinsic_asm ".CommittedRayT"; case glsl: __intrinsic_asm "rayQueryGetIntersectionTEXT($0, true)"; case spirv: uint iCandidateOrCommitted = 1; return spirv_asm { result:$$float = OpRayQueryGetIntersectionTKHR &this $iCandidateOrCommitted; }; } } ///missing hlsl equivlent; only implemented for glsl & spirv __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] int CandidateRayInstanceCustomIndex() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionInstanceCustomIndexEXT($0, false)"; case spirv: uint iCandidateOrCommitted = 0; return spirv_asm { result:$$int = OpRayQueryGetIntersectionInstanceCustomIndexKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] int CommittedRayInstanceCustomIndex() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionInstanceCustomIndexEXT($0, true)"; case spirv: uint iCandidateOrCommitted = 1; return spirv_asm { result:$$int = OpRayQueryGetIntersectionInstanceCustomIndexKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] int CandidateRayInstanceId() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionInstanceIdEXT($0, false)"; case spirv: uint iCandidateOrCommitted = 0; return spirv_asm { result:$$int = OpRayQueryGetIntersectionInstanceIdKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] int CommittedRayInstanceId() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionInstanceIdEXT($0, true)"; case spirv: uint iCandidateOrCommitted = 1; return spirv_asm { result:$$int = OpRayQueryGetIntersectionInstanceIdKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] uint CandidateRayInstanceShaderBindingTableRecordOffset() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT($0, false)"; case spirv: uint iCandidateOrCommitted = 0; return spirv_asm { result:$$uint = OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] uint CommittedRayInstanceShaderBindingTableRecordOffset() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT($0, true)"; case spirv: uint iCandidateOrCommitted = 1; return spirv_asm { result:$$uint = OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] int CandidateRayGeometryIndex() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionGeometryIndexEXT($0, false)"; case spirv: uint iCandidateOrCommitted = 0; return spirv_asm { result:$$int = OpRayQueryGetIntersectionGeometryIndexKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] int CommittedRayGeometryIndex() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionGeometryIndexEXT($0, true)"; case spirv: uint iCandidateOrCommitted = 1; return spirv_asm { result:$$int = OpRayQueryGetIntersectionGeometryIndexKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] int CandidateRayPrimitiveIndex() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionPrimitiveIndexEXT($0, false)"; case spirv: uint iCandidateOrCommitted = 0; return spirv_asm { result:$$int = OpRayQueryGetIntersectionPrimitiveIndexKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] int CommittedRayPrimitiveIndex() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionPrimitiveIndexEXT($0, true)"; case spirv: uint iCandidateOrCommitted = 1; return spirv_asm { result:$$int = OpRayQueryGetIntersectionPrimitiveIndexKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] float2 CandidateRayBarycentrics() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionBarycentricsEXT($0, false)"; case spirv: uint iCandidateOrCommitted = 0; return spirv_asm { result:$$float2 = OpRayQueryGetIntersectionBarycentricsKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] float2 CommittedRayBarycentrics() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionBarycentricsEXT($0, true)"; case spirv: uint iCandidateOrCommitted = 1; return spirv_asm { result:$$float2 = OpRayQueryGetIntersectionBarycentricsKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] bool CandidateRayFrontFace() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionFrontFaceEXT($0, false)"; case spirv: uint iCandidateOrCommitted = 0; return spirv_asm { result:$$bool = OpRayQueryGetIntersectionFrontFaceKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] bool CommittedRayFrontFace() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionFrontFaceEXT($0, true)"; case spirv: uint iCandidateOrCommitted = 1; return spirv_asm { result:$$bool = OpRayQueryGetIntersectionFrontFaceKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] float3 CandidateRayObjectRayDirection() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionObjectRayDirectionEXT($0, false)"; case spirv: uint iCandidateOrCommitted = 0; return spirv_asm { result:$$float3 = OpRayQueryGetIntersectionObjectRayDirectionKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] float3 CommittedRayObjectRayDirection() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionObjectRayDirectionEXT($0, true)"; case spirv: uint iCandidateOrCommitted = 1; return spirv_asm { result:$$float3 = OpRayQueryGetIntersectionObjectRayDirectionKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] float3 CandidateRayObjectRayOrigin() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionObjectRayOriginEXT($0, false)"; case spirv: uint iCandidateOrCommitted = 0; return spirv_asm { result:$$float3 = OpRayQueryGetIntersectionObjectRayOriginKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] float3 CommittedRayObjectRayOrigin() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionObjectRayOriginEXT($0, true)"; case spirv: uint iCandidateOrCommitted = 1; return spirv_asm { result:$$float3 = OpRayQueryGetIntersectionObjectRayOriginKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] float4x3 CandidateRayObjectToWorld() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionObjectToWorldEXT($0, false)"; case spirv: uint iCandidateOrCommitted = 0; return spirv_asm { result:$$float4x3 = OpRayQueryGetIntersectionObjectToWorldKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] float4x3 CommittedRayObjectToWorld() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionObjectToWorldEXT($0, true)"; case spirv: uint iCandidateOrCommitted = 1; return spirv_asm { result:$$float4x3 = OpRayQueryGetIntersectionObjectToWorldKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] float4x3 CandidateRayWorldToObject() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionWorldToObjectEXT($0, false)"; case spirv: uint iCandidateOrCommitted = 0; return spirv_asm { result:$$float4x3 = OpRayQueryGetIntersectionWorldToObjectKHR &this $iCandidateOrCommitted; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [require(glsl_spirv, rayquery)] float4x3 CommittedRayWorldToObject() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersectionWorldToObjectEXT($0, true)"; case spirv: uint iCandidateOrCommitted = 1; return spirv_asm { OpRayQueryGetIntersectionWorldToObjectKHR $$float4x3 result &this $iCandidateOrCommitted; }; } } ///~ ${{{{ const char* kCandidateCommitted[] = {"Candidate", "Committed"}; // Access Candidate and Committed Matrices. for (uint32_t candidateOrCommitted = 0; candidateOrCommitted < 2; candidateOrCommitted++) { auto ccName = kCandidateCommitted[candidateOrCommitted]; auto ccTF = candidateOrCommitted == 0 ? "false" : "true"; }}}} __glsl_extension(GL_EXT_ray_query) __glsl_extension(GL_EXT_ray_tracing_position_fetch) [require(glsl, rayquery_position)] [__NoSideEffect] void __glslGetIntersectionTriangleVertexPositions$(ccName)(out float3 arr[3]) { __intrinsic_asm "rayQueryGetIntersectionTriangleVertexPositionsEXT($0, $(ccTF), $1)"; }; __glsl_extension(GL_EXT_ray_query) [require(glsl, rayquery_position)] [require(spirv, rayquery_position)] [__NoSideEffect] float3[3] $(ccName)GetIntersectionTriangleVertexPositions() { typedef float3[3] float3Arr3; __target_switch { case glsl: float3 output[3]; __glslGetIntersectionTriangleVertexPositions$(ccName)(output); return output; case spirv: uint iCandidateOrCommitted = $(candidateOrCommitted); return spirv_asm { OpCapability RayQueryKHR; OpCapability RayQueryPositionFetchKHR; OpExtension "SPV_KHR_ray_query"; OpExtension "SPV_KHR_ray_tracing_position_fetch"; result: $$float3Arr3 = OpRayQueryGetIntersectionTriangleVertexPositionsKHR &this $iCandidateOrCommitted; }; } }; // CandidateObjectToWorld3x4, CandidateWorldToObject4x3 // CommittedObjectToWorld3x4, CommittedObjectToWorld4x3 ${{{{ const char* kRayQueryMatrixNames[] = {"ObjectToWorld", "WorldToObject"}; for (auto matName : kRayQueryMatrixNames) { }}}} __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [NonUniformReturn] [require(glsl_hlsl_spirv, rayquery)] float3x4 $(ccName)$(matName)3x4() { __target_switch { case glsl: __intrinsic_asm "transpose(rayQueryGetIntersection$(matName)EXT($0, $(ccTF)))"; case hlsl: __intrinsic_asm ".$(ccName)$(matName)3x4"; case spirv: uint iCandidateOrCommitted = $(candidateOrCommitted); return spirv_asm { %m:$$float4x3 = OpRayQueryGetIntersection$(matName)KHR &this $iCandidateOrCommitted; result:$$float3x4 = OpTranspose %m; }; } } __glsl_extension(GL_EXT_ray_query) [__readNone] [NonUniformReturn] [require(glsl_hlsl_spirv, rayquery)] float4x3 $(ccName)$(matName)4x3() { __target_switch { case glsl: __intrinsic_asm "rayQueryGetIntersection$(matName)EXT($0, $(ccTF))"; case hlsl: __intrinsic_asm ".$(ccName)$(matName)4x3"; case spirv: uint iCandidateOrCommitted = $(candidateOrCommitted); return spirv_asm { result:$$float4x3 = OpRayQueryGetIntersection$(matName)KHR &this $iCandidateOrCommitted; }; } } ${{{{ } // ObjectToWorld/WorldToObject. // Access Candidate and Committed properties. struct RayQueryMethodEntry { const char* type; const char* hlslName; const char* glslName; }; const RayQueryMethodEntry rayQueryMethods[] = { {"uint", "InstanceIndex", "InstanceId"}, {"uint", "InstanceID", "InstanceCustomIndex"}, {"uint", "PrimitiveIndex", "PrimitiveIndex"}, {"uint", "GeometryIndex", "GeometryIndex"}, {"uint", "InstanceContributionToHitGroupIndex", "InstanceShaderBindingTableRecordOffset"}, {"float3", "ObjectRayOrigin", "ObjectRayOrigin"}, {"float3", "ObjectRayDirection", "ObjectRayDirection"}, {"bool", "TriangleFrontFace", "FrontFace"}, {"float2", "TriangleBarycentrics", "Barycentrics"}, }; for (auto method : rayQueryMethods) { }}}} __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [NonUniformReturn] [require(glsl_hlsl_spirv, rayquery)] $(method.type) $(ccName)$(method.hlslName)() { __target_switch { case hlsl: __intrinsic_asm ".$(ccName)$(method.hlslName)"; case glsl: __intrinsic_asm "rayQueryGetIntersection$(method.glslName)EXT($0, $(ccTF))"; case spirv: uint iCandidateOrCommitted = $(candidateOrCommitted); return spirv_asm { result:$$$(method.type) = OpRayQueryGetIntersection$(method.glslName)KHR &this $iCandidateOrCommitted; }; } } ${{{{ } // Candidate/Committed properties. } // for ("Candidate", "Committed") }}}} // Access properties of the ray being traced. __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [NonUniformReturn] [require(glsl_hlsl_spirv, rayquery)] uint RayFlags() { __target_switch { case hlsl: __intrinsic_asm ".RayFlags"; case glsl: __intrinsic_asm "rayQueryGetRayFlagsEXT"; case spirv: return spirv_asm { result:$$uint = OpRayQueryGetRayFlagsKHR &this; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [NonUniformReturn] [require(glsl_hlsl_spirv, rayquery)] float3 WorldRayOrigin() { __target_switch { case hlsl: __intrinsic_asm ".WorldRayOrigin"; case glsl: __intrinsic_asm "rayQueryGetWorldRayOriginEXT"; case spirv: return spirv_asm { result:$$float3 = OpRayQueryGetWorldRayOriginKHR &this; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [NonUniformReturn] [require(glsl_hlsl_spirv, rayquery)] float3 WorldRayDirection() { __target_switch { case hlsl: __intrinsic_asm ".WorldRayDirection"; case glsl: __intrinsic_asm "rayQueryGetWorldRayDirectionEXT"; case spirv: return spirv_asm { result:$$float3 = OpRayQueryGetWorldRayDirectionKHR &this; }; } } __glsl_extension(GL_EXT_ray_query) [__NoSideEffect] [NonUniformReturn] [require(glsl_hlsl_spirv, rayquery)] float RayTMin() { __target_switch { case hlsl: __intrinsic_asm ".RayTMin"; case glsl: __intrinsic_asm "rayQueryGetRayTMinEXT"; case spirv: return spirv_asm { result:$$float = OpRayQueryGetRayTMinKHR &this; }; } }; } // // SubpassInput // __magic_type(SubpassInputType) __intrinsic_type($(kIROp_SubpassInputType)) [require(glsl_hlsl_spirv, subpass)] struct __SubpassImpl { } __generic typealias SubpassInput = __SubpassImpl; __generic extension __SubpassImpl { [ForceInline] [require(glsl_hlsl_spirv, subpass)] T SubpassLoad() { __target_switch { case hlsl: __intrinsic_asm "$0.SubpassLoad()"; case glsl: __intrinsic_asm "subpassLoad($0)"; case spirv: { let zeroVec = int2(0); return spirv_asm { OpCapability StorageImageReadWithoutFormat; result:$$T = OpImageRead $this $zeroVec }; } } } } __generic typealias SubpassInputMS = __SubpassImpl; __generic extension __SubpassImpl { [ForceInline] [require(glsl_hlsl_spirv, subpass)] T SubpassLoad(int sample) { __target_switch { case hlsl: __intrinsic_asm "$0.SubpassLoad($1)"; case glsl: __intrinsic_asm "subpassLoad($0, $1)"; case spirv: { let zeroVec = int2(0); return spirv_asm { OpCapability StorageImageReadWithoutFormat; result:$$T = OpImageRead $this $zeroVec Sample $sample }; } } } } /// /// Shader Execution Reordering (SER) /// /// NOTE! This API is currently experimental and may change in the future as SER is made available /// in different APIs and downstream compilers. /// /// Based on the NVAPI on D3D12 only currently. /// /// White paper on SER on NVAPI https://developer.nvidia.com/sites/default/files/akamai/gameworks/ser-whitepaper.pdf /// /// The NVAPI headers (R520) required for this functionality to work can be found here... /// /// https://developer.nvidia.com/rtx/path-tracing/nvapi/get-started /// /// For VK the specification is currently in this PR /// /// https://github.com/KhronosGroup/GLSL/pull/196/files /// Internal helper functions // This is a bit of a hack for GLSL HitObjectAttributes // It relies on [ForceInline] removing the surrounding function and just inserting the *contained* `t` as a global // The __ref should indicate the desire for the returned value to not be a copy of t, but *t*. // In practive __ref doesn't have this effect in practice. // // We need this to be able access the payload outside of a function (which is all that TraceRay for example needs) // We access the HitObjectAttributes via this function for the desired type, and it acts *as if* it's just an access // to the global t. [ForceInline] Ref __hitObjectAttributes() { [__vulkanHitObjectAttributes] static T t; return t; } [ForceInline] __Addr __allocHitObjectAttributes() { [__vulkanHitObjectAttributes] static T t; return __get_addr(t); } // Next is the custom intrinsic that will compute the hitObjectAttributes location // for GLSL-based targets. // __generic __intrinsic_op($(kIROp_GetVulkanRayTracingPayloadLocation)) int __hitObjectAttributesLocation(__ref Attributes attributes); /// Immutable data type representing a ray hit or a miss. Can be used to invoke hit or miss shading, /// or as a key in ReorderThread. Created by one of several methods described below. HitObject /// and its related functions are available in raytracing shader types only. __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GL_EXT_ray_tracing) [__NonCopyableType] __intrinsic_type($(kIROp_HitObjectType)) struct HitObject { __intrinsic_op($(kIROp_AllocateOpaqueHandle)) __init(); /// Executes ray traversal (including anyhit and intersection shaders) like TraceRay, but returns the /// resulting hit information as a HitObject and does not trigger closesthit or miss shaders. [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static HitObject TraceRay( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, uint InstanceInclusionMask, uint RayContributionToHitGroupIndex, uint MultiplierForGeometryContributionToHitGroupIndex, uint MissShaderIndex, RayDesc Ray, inout payload_t Payload) { __target_switch { case hlsl: { HitObject hitObj; __hlslTraceRay( AccelerationStructure, RayFlags, InstanceInclusionMask, RayContributionToHitGroupIndex, MultiplierForGeometryContributionToHitGroupIndex, MissShaderIndex, Ray, __forceVarIntoStructTemporarily(Payload), hitObj); return hitObj; } case glsl: { [__vulkanRayPayload] static payload_t p; // Save the payload p = Payload; __glslTraceRay( __return_val, AccelerationStructure, RayFlags, // Assumes D3D/VK have some RayFlags values InstanceInclusionMask, // cullMask RayContributionToHitGroupIndex, // sbtRecordOffset MultiplierForGeometryContributionToHitGroupIndex, // sbtRecordStride MissShaderIndex, Ray.Origin, Ray.TMin, Ray.Direction, Ray.TMax, __rayPayloadLocation(p)); // Write the payload out Payload = p; } case spirv: { [__vulkanRayPayload] static payload_t p; // Save the payload p = Payload; let origin = Ray.Origin; let direction = Ray.Direction; let tmin = Ray.TMin; let tmax = Ray.TMax; spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; OpHitObjectTraceRayNV /**/ &__return_val /**/ $AccelerationStructure /**/ $RayFlags /**/ $InstanceInclusionMask /**/ $RayContributionToHitGroupIndex /**/ $MultiplierForGeometryContributionToHitGroupIndex /**/ $MissShaderIndex /**/ $origin /**/ $tmin /**/ $direction /**/ $tmax /**/ &p; }; // Write the payload out Payload = p; } } } /// Executes motion ray traversal (including anyhit and intersection shaders) like TraceRay, but returns the /// resulting hit information as a HitObject and does not trigger closesthit or miss shaders. [ForceInline] [require(glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)] static HitObject TraceMotionRay( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, uint InstanceInclusionMask, uint RayContributionToHitGroupIndex, uint MultiplierForGeometryContributionToHitGroupIndex, uint MissShaderIndex, RayDesc Ray, float CurrentTime, inout payload_t Payload) { __target_switch { case hlsl: __traceMotionRayHLSL( AccelerationStructure, RayFlags, InstanceInclusionMask, RayContributionToHitGroupIndex, MultiplierForGeometryContributionToHitGroupIndex, MissShaderIndex, Ray, CurrentTime, __forceVarIntoStructTemporarily(Payload)); case glsl: { [__vulkanRayPayload] static payload_t p; // Save the payload p = Payload; __glslTraceMotionRay( __return_val, AccelerationStructure, RayFlags, // Assumes D3D/VK have some RayFlags values InstanceInclusionMask, // cullMask RayContributionToHitGroupIndex, // sbtRecordOffset MultiplierForGeometryContributionToHitGroupIndex, // sbtRecordStride MissShaderIndex, Ray.Origin, Ray.TMin, Ray.Direction, Ray.TMax, CurrentTime, __rayPayloadLocation(p)); // Write the payload out Payload = p; } case spirv: { [__vulkanRayPayload] static payload_t p; // Save the payload p = Payload; let origin = Ray.Origin; let direction = Ray.Direction; let tmin = Ray.TMin; let tmax = Ray.TMax; spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; OpExtension "SPV_NV_ray_tracing_motion_blur"; OpCapability RayTracingMotionBlurNV; OpHitObjectTraceRayMotionNV /**/ &__return_val /**/ $AccelerationStructure /**/ $RayFlags /**/ $InstanceInclusionMask /**/ $RayContributionToHitGroupIndex /**/ $MultiplierForGeometryContributionToHitGroupIndex /**/ $MissShaderIndex /**/ $origin /**/ $tmin /**/ $direction /**/ $tmax /**/ $CurrentTime /**/ &p; }; // Write the payload out Payload = p; } } } /// Creates a HitObject representing a hit based on values explicitly passed as arguments, without /// tracing a ray. The primitive specified by AccelerationStructure, InstanceIndex, GeometryIndex, /// and PrimitiveIndex must exist. The shader table index is computed using the formula used with /// TraceRay. The computed index must reference a valid hit group record in the shader table. The /// Attributes parameter must either be an attribute struct, such as /// BuiltInTriangleIntersectionAttributes, or another HitObject to copy the attributes from. [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static HitObject MakeHit( RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, uint GeometryIndex, uint PrimitiveIndex, uint HitKind, uint RayContributionToHitGroupIndex, uint MultiplierForGeometryContributionToHitGroupIndex, RayDesc Ray, attr_t attributes) { __target_switch { case hlsl: HitObject hitObj; __hlslMakeHit( AccelerationStructure, InstanceIndex, GeometryIndex, PrimitiveIndex, HitKind, RayContributionToHitGroupIndex, MultiplierForGeometryContributionToHitGroupIndex, Ray, attributes, hitObj); return hitObj; case glsl: { // Save the attributes __hitObjectAttributes() = attributes; __glslMakeHit( __return_val, AccelerationStructure, InstanceIndex, PrimitiveIndex, GeometryIndex, HitKind, RayContributionToHitGroupIndex, /// sbtRecordOffset? MultiplierForGeometryContributionToHitGroupIndex, /// sbtRecordStride? Ray.Origin, Ray.TMin, Ray.Direction, Ray.TMax, __hitObjectAttributesLocation(__hitObjectAttributes())); } case spirv: { // Save the attributes __Addr attr = __allocHitObjectAttributes(); *attr = attributes; let origin = Ray.Origin; let direction = Ray.Direction; let tmin = Ray.TMin; let tmax = Ray.TMax; spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; OpHitObjectRecordHitNV /**/ &__return_val /**/ $AccelerationStructure /**/ $InstanceIndex /**/ $PrimitiveIndex /**/ $GeometryIndex /**/ $HitKind /**/ $RayContributionToHitGroupIndex /**/ $MultiplierForGeometryContributionToHitGroupIndex /**/ $origin /**/ $tmin /**/ $direction /**/ $tmax /**/ $attr; }; } } } /// See MakeHit but handles Motion /// Currently only supported on VK [ForceInline] [require(glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)] static HitObject MakeMotionHit( RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, uint GeometryIndex, uint PrimitiveIndex, uint HitKind, uint RayContributionToHitGroupIndex, uint MultiplierForGeometryContributionToHitGroupIndex, RayDesc Ray, float CurrentTime, attr_t attributes) { __target_switch { case hlsl: __intrinsic_asm "MakeMotionHit"; case glsl: { // Save the attributes __hitObjectAttributes() = attributes; __glslMakeMotionHit( __return_val, AccelerationStructure, InstanceIndex, PrimitiveIndex, GeometryIndex, HitKind, RayContributionToHitGroupIndex, /// sbtRecordOffset? MultiplierForGeometryContributionToHitGroupIndex, /// sbtRecordStride? Ray.Origin, Ray.TMin, Ray.Direction, Ray.TMax, CurrentTime, __hitObjectAttributesLocation(__hitObjectAttributes())); } case spirv: { // Save the attributes __Addr attr = __allocHitObjectAttributes(); *attr = attributes; let origin = Ray.Origin; let direction = Ray.Direction; let tmin = Ray.TMin; let tmax = Ray.TMax; spirv_asm { OpExtension "SPV_NV_ray_tracing_motion_blur"; OpCapability RayTracingMotionBlurNV; OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; OpHitObjectRecordHitMotionNV /**/ &__return_val /**/ $AccelerationStructure /**/ $InstanceIndex /**/ $PrimitiveIndex /**/ $GeometryIndex /**/ $HitKind /**/ $RayContributionToHitGroupIndex /**/ $MultiplierForGeometryContributionToHitGroupIndex /**/ $origin /**/ $tmin /**/ $direction /**/ $tmax /**/ $CurrentTime /**/ $attr; }; } } } /// Creates a HitObject representing a hit based on values explicitly passed as arguments, without /// tracing a ray. The primitive specified by AccelerationStructure, InstanceIndex, GeometryIndex, /// and PrimitiveIndex must exist. The shader table index is explicitly provided as an argument /// instead of being computed from the indexing formula used in TraceRay. The provided index must /// reference a valid hit group record in the shader table. The Attributes parameter must either be an /// attribute struct, such as BuiltInTriangleIntersectionAttributes, or another HitObject to copy the /// attributes from. [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static HitObject MakeHit( uint HitGroupRecordIndex, RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, uint GeometryIndex, uint PrimitiveIndex, uint HitKind, RayDesc Ray, attr_t attributes) { __target_switch { case hlsl: HitObject hitObj; __hlslMakeHitWithRecordIndex( HitGroupRecordIndex, AccelerationStructure, InstanceIndex, GeometryIndex, PrimitiveIndex, HitKind, Ray, attributes, hitObj); return hitObj; case glsl: { // Save the attributes __hitObjectAttributes() = attributes; __glslMakeHitWithIndex( __return_val, AccelerationStructure, InstanceIndex, ///? Same as instanceid ? PrimitiveIndex, GeometryIndex, HitKind, /// Assuming HitKinds are compatible HitGroupRecordIndex, /// sbtRecordIndex Ray.Origin, Ray.TMin, Ray.Direction, Ray.TMax, __hitObjectAttributesLocation(__hitObjectAttributes())); } case spirv: { // Save the attributes __Addr attr = __allocHitObjectAttributes(); *attr = attributes; let origin = Ray.Origin; let direction = Ray.Direction; let tmin = Ray.TMin; let tmax = Ray.TMax; spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; OpHitObjectRecordHitWithIndexNV /**/ &__return_val /**/ $AccelerationStructure /**/ $InstanceIndex /**/ $PrimitiveIndex /**/ $GeometryIndex /**/ $HitKind /**/ $HitGroupRecordIndex /**/ $origin /**/ $tmin /**/ $direction /**/ $tmax /**/ $attr; }; } } } /// See MakeHit but handles Motion /// Currently only supported on VK [ForceInline] [require(glsl_spirv, ser_motion_raygen_closesthit_miss)] static HitObject MakeMotionHit( uint HitGroupRecordIndex, RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, uint GeometryIndex, uint PrimitiveIndex, uint HitKind, RayDesc Ray, float CurrentTime, attr_t attributes) { __target_switch { case glsl: { // Save the attributes __hitObjectAttributes() = attributes; __glslMakeMotionHitWithIndex( __return_val, AccelerationStructure, InstanceIndex, ///? Same as instanceid ? PrimitiveIndex, GeometryIndex, HitKind, /// Assuming HitKinds are compatible HitGroupRecordIndex, /// sbtRecordIndex Ray.Origin, Ray.TMin, Ray.Direction, Ray.TMax, CurrentTime, __hitObjectAttributesLocation(__hitObjectAttributes())); } case spirv: { // Save the attributes __Addr attr = __allocHitObjectAttributes(); *attr = attributes; let origin = Ray.Origin; let direction = Ray.Direction; let tmin = Ray.TMin; let tmax = Ray.TMax; spirv_asm { OpExtension "SPV_NV_ray_tracing_motion_blur"; OpCapability RayTracingMotionBlurNV; OpHitObjectRecordHitWithIndexMotionNV /**/ &__return_val /**/ $AccelerationStructure /**/ $InstanceIndex /**/ $PrimitiveIndex /**/ $GeometryIndex /**/ $HitKind /**/ $HitGroupRecordIndex /**/ $origin /**/ $tmin /**/ $direction /**/ $tmax /**/ $CurrentTime /**/ $attr; }; } } } /// Creates a HitObject representing a miss based on values explicitly passed as arguments, without /// tracing a ray. The provided shader table index must reference a valid miss record in the shader /// table. [__requiresNVAPI] [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static HitObject MakeMiss( uint MissShaderIndex, RayDesc Ray) { __target_switch { case hlsl: __intrinsic_asm "($2=NvMakeMiss($0,$1))"; case glsl: __glslMakeMiss(__return_val, MissShaderIndex, Ray.Origin, Ray.TMin, Ray.Direction, Ray.TMax); case spirv: { let origin = Ray.Origin; let direction = Ray.Direction; let tmin = Ray.TMin; let tmax = Ray.TMax; spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; OpHitObjectRecordMissNV /**/ &__return_val /**/ $MissShaderIndex /**/ $origin /**/ $tmin /**/ $direction /**/ $tmax; }; } } } /// See MakeMiss but handles Motion /// Currently only supported on VK [ForceInline] [require(glsl_hlsl_spirv, ser_motion_raygen_closesthit_miss)] static HitObject MakeMotionMiss( uint MissShaderIndex, RayDesc Ray, float CurrentTime) { __target_switch { case hlsl: __intrinsic_asm "($3=NvMakeMotionMiss($0,$1,$2))"; case glsl: __glslMakeMotionMiss(__return_val, MissShaderIndex, Ray.Origin, Ray.TMin, Ray.Direction, Ray.TMax, CurrentTime); case spirv: { let origin = Ray.Origin; let direction = Ray.Direction; let tmin = Ray.TMin; let tmax = Ray.TMax; spirv_asm { OpExtension "SPV_NV_ray_tracing_motion_blur"; OpCapability RayTracingMotionBlurNV; OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; OpHitObjectRecordMissMotionNV /**/ &__return_val /**/ $MissShaderIndex /**/ $origin /**/ $tmin /**/ $direction /**/ $tmax /**/ $CurrentTime; }; } } } /// Creates a HitObject representing “NOP” (no operation) which is neither a hit nor a miss. Invoking a /// NOP hit object using HitObject::Invoke has no effect. Reordering by hit objects using /// ReorderThread will group NOP hit objects together. This can be useful in some reordering /// scenarios where future control flow for some threads is known to process neither a hit nor a /// miss. [__requiresNVAPI] [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static HitObject MakeNop() { __target_switch { case hlsl: __intrinsic_asm "($0 = NvMakeNop())"; case glsl: __glslMakeNop(__return_val); case spirv: spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; OpHitObjectRecordEmptyNV /**/ &__return_val; }; } } [require(hlsl, ser)] __generic static void __InvokeHLSL( RaytracingAccelerationStructure AccelerationStructure, HitObject HitOrMiss, inout payload_t Payload) { __target_switch { case hlsl: __intrinsic_asm "NvInvokeHitObject"; } } /// Invokes closesthit or miss shading for the specified hit object. In case of a NOP HitObject, no /// shader is invoked. [__requiresNVAPI] [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] static void Invoke( RaytracingAccelerationStructure AccelerationStructure, HitObject HitOrMiss, inout payload_t Payload) { __target_switch { case hlsl: __InvokeHLSL( AccelerationStructure, HitOrMiss, __forceVarIntoStructTemporarily(Payload)); case glsl: { [__vulkanRayPayload] static payload_t p; // Save the payload p = Payload; __glslInvoke(HitOrMiss, __rayPayloadLocation(p)); // Write payload result Payload = p; } case spirv: { [__vulkanRayPayload] static payload_t p; // Save the payload p = Payload; spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; OpHitObjectExecuteShaderNV /**/ &HitOrMiss /**/ &p; }; // Write payload result Payload = p; } } } /// Returns true if the HitObject encodes a miss, otherwise returns false. [__requiresNVAPI] [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] bool IsMiss() { __target_switch { case hlsl: __intrinsic_asm ".IsMiss"; case glsl: __intrinsic_asm "hitObjectIsMissNV($0)"; case spirv: return spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; result:$$bool = OpHitObjectIsMissNV &this; }; } } /// Returns true if the HitObject encodes a hit, otherwise returns false. [__requiresNVAPI] [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] bool IsHit() { __target_switch { case hlsl: __intrinsic_asm ".IsHit"; case glsl: __intrinsic_asm "hitObjectIsHitNV($0)"; case spirv: return spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; result:$$bool = OpHitObjectIsHitNV &this; }; } } /// Returns true if the HitObject encodes a nop, otherwise returns false. [__requiresNVAPI] [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] bool IsNop() { __target_switch { case hlsl: __intrinsic_asm ".IsNop"; case glsl: __intrinsic_asm "hitObjectIsEmptyNV($0)"; case spirv: return spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; result:$$bool = OpHitObjectIsEmptyNV &this; }; } } /// Queries ray properties from HitObject. Valid if the hit object represents a hit or a miss. [__requiresNVAPI] [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] RayDesc GetRayDesc() { __target_switch { case hlsl: __intrinsic_asm ".GetRayDesc"; case glsl: { RayDesc ray = { __glslGetRayWorldOrigin(), __glslGetTMin(), __glslGetRayWorldDirection(), __glslGetTMax() }; return ray; } case spirv: return spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; %origin:$$float3 = OpHitObjectGetWorldRayOriginNV &this; %tmin:$$float = OpHitObjectGetRayTMinNV &this; %direction:$$float3 = OpHitObjectGetWorldRayDirectionNV &this; %tmax:$$float = OpHitObjectGetRayTMaxNV &this; result:$$RayDesc = OpCompositeConstruct %origin %tmin %direction %tmax; }; } } /// Queries shader table index from HitObject. Valid if the hit object represents a hit or a miss. [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetShaderTableIndex() { __target_switch { case hlsl: __intrinsic_asm ".GetShaderTableIndex"; case glsl: __intrinsic_asm "hitObjectGetShaderBindingTableRecordIndexNV($0)"; case spirv: return spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; result:$$uint = OpHitObjectGetShaderBindingTableRecordIndexNV &this; }; } } /// Returns the instance index of a hit. Valid if the hit object represents a hit. [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetInstanceIndex() { __target_switch { case hlsl: __intrinsic_asm ".GetInstanceIndex"; case glsl: __intrinsic_asm "hitObjectGetInstanceIdNV($0)"; case spirv: return spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; result:$$uint = OpHitObjectGetInstanceIdNV &this; }; } } /// Returns the instance ID of a hit. Valid if the hit object represents a hit. [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetInstanceID() { __target_switch { case hlsl: __intrinsic_asm ".GetInstanceID"; case glsl: __intrinsic_asm "hitObjectGetInstanceCustomIndexNV($0)"; case spirv: return spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; result:$$uint = OpHitObjectGetInstanceCustomIndexNV &this; }; } } /// Returns the geometry index of a hit. Valid if the hit object represents a hit. [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetGeometryIndex() { __target_switch { case hlsl: __intrinsic_asm ".GetGeometryIndex"; case glsl: __intrinsic_asm "hitObjectGetGeometryIndexNV($0)"; case spirv: return spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; result:$$uint = OpHitObjectGetGeometryIndexNV &this; }; } } /// Returns the primitive index of a hit. Valid if the hit object represents a hit. [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetPrimitiveIndex() { __target_switch { case hlsl: __intrinsic_asm ".GetPrimitiveIndex"; case glsl: __intrinsic_asm "hitObjectGetPrimitiveIndexNV($0)"; case spirv: return spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; result:$$uint = OpHitObjectGetPrimitiveIndexNV &this; }; } } /// Returns the hit kind. Valid if the hit object represents a hit. [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] uint GetHitKind() { __target_switch { case hlsl: __intrinsic_asm ".GetHitKind"; case glsl: __intrinsic_asm "hitObjectGetHitKindNV($0)"; case spirv: return spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; result:$$uint = OpHitObjectGetHitKindNV &this; }; } } [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] float4x3 GetWorldToObject() { __target_switch { case hlsl: __intrinsic_asm ".GetWorldToObject"; case glsl: __intrinsic_asm "hitObjectGetWorldToObjectNV($0)"; case spirv: return spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; result:$$float4x3 = OpHitObjectGetWorldToObjectNV &this; }; } } [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] float4x3 GetObjectToWorld() { __target_switch { case hlsl: __intrinsic_asm ".GetObjectToWorld"; case glsl: __intrinsic_asm "hitObjectGetObjectToWorldNV($0)"; case spirv: return spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; result:$$float4x3 = OpHitObjectGetObjectToWorldNV &this; }; } } [ForceInline] [require(glsl_spirv, ser_raygen_closesthit_miss)] float GetCurrentTime() { __target_switch { case glsl: __intrinsic_asm "hitObjectGetCurrentTimeNV($0)"; case spirv: return spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; result:$$float = OpHitObjectGetCurrentTimeNV &this }; } } [ForceInline] [require(glsl_spirv, ser_raygen_closesthit_miss)] float3 GetObjectRayOrigin() { __target_switch { case glsl: __intrinsic_asm "hitObjectGetObjectRayOriginNV($0)"; case spirv: return spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; result:$$float3 = OpHitObjectGetObjectRayOriginNV &this }; } } [ForceInline] [require(glsl_spirv, ser_raygen_closesthit_miss)] float3 GetObjectRayDirection() { __target_switch { case glsl: __intrinsic_asm "hitObjectGetObjectRayDirectionNV($0)"; case spirv: return spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; result:$$float3 = OpHitObjectGetObjectRayDirectionNV &this }; } } [ForceInline] [require(glsl_spirv, ser_raygen_closesthit_miss)] uint2 GetShaderRecordBufferHandle() { __target_switch { case glsl: __intrinsic_asm "hitObjectGetShaderRecordBufferHandleNV($0)"; case spirv: return spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; result:$$uint2 = OpHitObjectGetShaderRecordBufferHandleNV &this }; } } /// Returns the attributes of a hit. Valid if the hit object represents a hit or a miss. [ForceInline] [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] attr_t GetAttributes() { __target_switch { case hlsl: { attr_t v; __hlslGetAttributesFromHitObject(v); return v; } case glsl: { // Work out the location int attributeLocation = __hitObjectAttributesLocation(__hitObjectAttributes()); // Load the attributes from the location __glslGetAttributes(attributeLocation); // Return the attributes return __hitObjectAttributes(); } case spirv: { __Addr attr = __allocHitObjectAttributes(); spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; OpHitObjectGetAttributesNV &this $attr; }; return *attr; } } } /// Loads a root constant from the local root table referenced by the hit object. Valid if the hit object /// represents a hit or a miss. RootConstantOffsetInBytes must be a multiple of 4. [__requiresNVAPI] [require(hlsl, ser)] uint LoadLocalRootTableConstant(uint RootConstantOffsetInBytes) { __target_switch { case hlsl: __intrinsic_asm ".LoadLocalRootTableConstant"; } } /// /// !!!! Internal NVAPI HLSL impl. Not part of interface! !!!!!!!!!!!! /// [__requiresNVAPI] [require(hlsl, ser_raygen_closesthit_miss)] void __hlslGetAttributesFromHitObject(out T t) { __target_switch { case hlsl: __intrinsic_asm "NvGetAttributesFromHitObject($0, $1)"; } } [__requiresNVAPI] [require(hlsl, ser_raygen_closesthit_miss)] static void __hlslMakeHitWithRecordIndex( uint HitGroupRecordIndex, RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, uint GeometryIndex, uint PrimitiveIndex, uint HitKind, RayDesc Ray, attr_t attributes, out HitObject hitObj) { __target_switch { case hlsl: __intrinsic_asm "NvMakeHitWithRecordIndex"; } } [__requiresNVAPI] [require(hlsl, ser_raygen_closesthit_miss)] static void __hlslMakeHit(RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, uint GeometryIndex, uint PrimitiveIndex, uint HitKind, uint RayContributionToHitGroupIndex, uint MultiplierForGeometryContributionToHitGroupIndex, RayDesc Ray, attr_t attributes, out HitObject hitObj) { __target_switch { case hlsl: __intrinsic_asm "NvMakeHit"; } } [__requiresNVAPI] [require(hlsl, ser_raygen_closesthit_miss)] static void __hlslTraceRay( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, uint InstanceInclusionMask, uint RayContributionToHitGroupIndex, uint MultiplierForGeometryContributionToHitGroupIndex, uint MissShaderIndex, RayDesc Ray, inout payload_t Payload, out HitObject hitObj) { __target_switch { case hlsl: __intrinsic_asm "NvTraceRayHitObject"; } } /// /// !!!! Internal GLSL GL_NV_shader_invocation_reorder impl. Not part of interface! !!!!!!!!!!!! /// __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GL_EXT_ray_tracing) [require(glsl, ser_raygen_closesthit_miss)] static void __glslMakeMiss( out HitObject hitObj, uint MissShaderIndex, float3 Origin, float TMin, float3 Direction, float TMax) { __target_switch { case glsl: __intrinsic_asm "hitObjectRecordMissNV"; } } // "void hitObjectRecordMissNV(hitObjectNV, uint, vec3, float, vec3, float);" __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_ray_tracing_motion_blur) [require(glsl, ser_motion_raygen_closesthit_miss)] static void __glslMakeMotionMiss( out HitObject hitObj, uint MissShaderIndex, float3 Origin, float TMin, float3 Direction, float TMax, float CurrentTime) { __target_switch { case glsl: __intrinsic_asm "hitObjectRecordMissMotionNV"; } } __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [require(glsl, ser_raygen_closesthit_miss)] static void __glslMakeNop(out HitObject hitObj) { __target_switch { case glsl: __intrinsic_asm "hitObjectRecordEmptyNV"; } } __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [require(glsl, ser_raygen_closesthit_miss)] float3 __glslGetRayDirection() { __target_switch { case glsl: __intrinsic_asm "hitObjectGetObjectRayDirectionNV($0)"; } } __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [require(glsl, ser_raygen_closesthit_miss)] float3 __glslGetRayWorldDirection() { __target_switch { case glsl: __intrinsic_asm "hitObjectGetWorldRayDirectionNV($0)"; } } __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [require(glsl, ser_raygen_closesthit_miss)] float3 __glslGetRayWorldOrigin() { __target_switch { case glsl: __intrinsic_asm "hitObjectGetWorldRayOriginNV($0)"; } } __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [require(glsl, ser_raygen_closesthit_miss)] float __glslGetTMax() { __target_switch { case glsl: __intrinsic_asm "hitObjectGetRayTMaxNV($0)"; } } __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [require(glsl, ser_raygen_closesthit_miss)] float __glslGetTMin() { __target_switch { case glsl: __intrinsic_asm "hitObjectGetRayTMinNV($0)"; } } // "void hitObjectRecordHitWithIndexNV(hitObjectNV, accelerationStructureEXT,int,int,int,uint,uint,vec3,float,vec3,float,int);" __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [require(glsl, ser_raygen_closesthit_miss)] static void __glslMakeHitWithIndex( out HitObject hitObj, RaytracingAccelerationStructure accelerationStructure, int instanceid, int primitiveid, int geometryindex, uint hitKind, uint sbtRecordIndex, float3 origin, float Tmin, float3 direction, float Tmax, int attributeLocation) { __target_switch { case glsl: __intrinsic_asm "hitObjectRecordHitWithIndexNV"; } } // "void hitObjectRecordHitWithIndexMotionNV(hitObjectNV, accelerationStructureEXT,int,int,int,uint,uint,vec3,float,vec3,float,float,int);" __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GL_NV_ray_tracing_motion_blur) [require(glsl, ser_motion_raygen_closesthit_miss)] static void __glslMakeMotionHitWithIndex( out HitObject hitObj, RaytracingAccelerationStructure accelerationStructure, int instanceid, int primitiveid, int geometryindex, uint hitKind, uint sbtRecordIndex, float3 origin, float Tmin, float3 direction, float Tmax, float CurrentTime, int attributeLocation) { __target_switch { case glsl: __intrinsic_asm "hitObjectRecordHitWithIndexMotionNV"; } } // "void hitObjectRecordHitNV(hitObjectNV,accelerationStructureEXT,int,int,int,uint,uint,uint,vec3,float,vec3,float,int);" __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [require(glsl, ser_raygen_closesthit_miss)] static void __glslMakeHit( out HitObject hitObj, RaytracingAccelerationStructure accelerationStructure, int instanceid, int primitiveid, int geometryindex, uint hitKind, uint sbtRecordOffset, uint sbtRecordStride, float3 origin, float Tmin, float3 direction, float Tmax, int attributeLocation) { __target_switch { case glsl: __intrinsic_asm "hitObjectRecordHitNV"; } } // "void hitObjectRecordHitMotionNV(hitObjectNV,accelerationStructureEXT,int,int,int,uint,uint,uint,vec3,float,vec3,float,float,int);" __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GL_NV_ray_tracing_motion_blur) [require(glsl, ser_motion_raygen_closesthit_miss)] static void __glslMakeMotionHit( out HitObject hitObj, RaytracingAccelerationStructure accelerationStructure, int instanceid, int primitiveid, int geometryindex, uint hitKind, uint sbtRecordOffset, uint sbtRecordStride, float3 origin, float Tmin, float3 direction, float Tmax, float CurrentTime, int attributeLocation) { __target_switch { case glsl: __intrinsic_asm "hitObjectRecordHitMotionNV"; } } __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [require(glsl, ser_raygen_closesthit_miss)] void __glslGetAttributes(int attributeLocation) { __target_switch { case glsl: __intrinsic_asm "hitObjectGetAttributesNV($0, $1)"; } } __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [require(glsl, ser_raygen_closesthit_miss)] static void __glslTraceRay( out HitObject hitObject, RaytracingAccelerationStructure accelerationStructure, uint rayFlags, uint cullMask, uint sbtRecordOffset, uint sbtRecordStride, uint missIndex, float3 origin, float Tmin, float3 direction, float Tmax, int payload) { __target_switch { case glsl: __intrinsic_asm "hitObjectTraceRayNV"; } } __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) __glsl_extension(GL_NV_ray_tracing_motion_blur) [require(glsl, ser_motion_raygen_closesthit_miss)] static void __glslTraceMotionRay( out HitObject hitObject, RaytracingAccelerationStructure accelerationStructure, uint rayFlags, uint cullMask, uint sbtRecordOffset, uint sbtRecordStride, uint missIndex, float3 origin, float Tmin, float3 direction, float Tmax, float currentTime, int payload) { __target_switch { case glsl: __intrinsic_asm "hitObjectTraceRayMotionNV"; } } __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [require(glsl, ser_raygen_closesthit_miss)] static void __glslInvoke( HitObject hitObj, int payload) { __target_switch { case glsl: __intrinsic_asm "hitObjectExecuteShaderNV"; } } }; /// Reorders threads based on a coherence hint value. NumCoherenceHintBits indicates how many of /// the least significant bits of CoherenceHint should be considered during reordering (max: 16). /// Applications should set this to the lowest value required to represent all possible values in /// CoherenceHint. For best performance, all threads should provide the same value for /// NumCoherenceHintBits. /// Where possible, reordering will also attempt to retain locality in the thread’s launch indices /// (DispatchRaysIndex in DXR). [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [ForceInline] [require(glsl_hlsl_spirv, ser_raygen)] void ReorderThread( uint CoherenceHint, uint NumCoherenceHintBitsFromLSB ) { __target_switch { case hlsl: __intrinsic_asm "NvReorderThread"; case glsl: __intrinsic_asm "reorderThreadNV"; case spirv: spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; OpReorderThreadWithHintNV $CoherenceHint $NumCoherenceHintBitsFromLSB; }; } } /// Reorders threads based on a hit object, optionally extended by a coherence hint value. Coherence /// hints behave as described in the generic variant of ReorderThread. The maximum number of /// coherence hint bits in this variant of ReorderThread is 8. If no coherence hint is desired, set /// NumCoherenceHitBits to zero. /// Reordering will consider information in the HitObject and coherence hint with the following /// priority: /// /// 1. Shader ID stored in the HitObject /// 2. Coherence hint, with the most significant hint bit having highest priority /// 3. Spatial information stored in the HitObject /// /// That is, ReorderThread will first attempt to group threads whose HitObject references the /// same shader ID. (Miss shaders and NOP HitObjects are grouped separately). Within each of these /// groups, it will attempt to order threads by the value of their coherence hints. And within ranges /// of equal coherence hints, it will attempt to maximize locality in 3D space of the ray hit (if any). [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [ForceInline] [require(glsl_hlsl_spirv, ser_raygen)] void ReorderThread( HitObject HitOrMiss, uint CoherenceHint, uint NumCoherenceHintBitsFromLSB ) { __target_switch { case hlsl: __intrinsic_asm "NvReorderThread"; case glsl: __intrinsic_asm "reorderThreadNV"; case spirv: spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; OpReorderThreadWithHitObjectNV &HitOrMiss $CoherenceHint $NumCoherenceHintBitsFromLSB; }; } } /// Is equivalent to /// ``` /// void ReorderThread( HitObject HitOrMiss, uint CoherenceHint, uint NumCoherenceHintBitsFromLSB ); /// ``` /// With CoherenceHint and NumCoherenceHintBitsFromLSB as 0, meaning they are ignored. [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [ForceInline] [require(glsl_hlsl_spirv, ser_raygen)] void ReorderThread( HitObject HitOrMiss ) { __target_switch { case hlsl: __intrinsic_asm "NvReorderThread"; case glsl: __intrinsic_asm "reorderThreadNV"; case spirv: spirv_asm { OpExtension "SPV_NV_shader_invocation_reorder"; OpCapability ShaderInvocationReorderNV; OpReorderThreadWithHitObjectNV &HitOrMiss; }; } } /// /// DebugBreak support /// /// There doesn't appear to be an equivalent for debugBreak for HLSL __specialized_for_target(glsl) [[vk::spirv_instruction(1, "NonSemantic.DebugBreak")]] void __glslDebugBreak(); [ForceInline] [require(cpp_cuda_glsl_hlsl, breakpoint)] void debugBreak() { __target_switch { case hlsl: __intrinsic_asm "/* debugBreak() not currently supported for HLSL */"; case cuda: __intrinsic_asm "__brkpt()"; case cpp: __intrinsic_asm "SLANG_BREAKPOINT(0)"; case glsl: __glslDebugBreak(); return; } } // // Realtime Clock support // // https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GL_EXT_shader_realtime_clock.txt [__requiresNVAPI] __glsl_extension(GL_EXT_shader_realtime_clock) [NonUniformReturn] [require(cpp_cuda_glsl_hlsl_spirv, shaderclock)] uint getRealtimeClockLow() { __target_switch { case hlsl: __intrinsic_asm "NvGetSpecial( NV_SPECIALOP_GLOBAL_TIMER_LO)"; case glsl: return getRealtimeClock().x; case cuda: __intrinsic_asm "clock"; case spirv: return getRealtimeClock().x; case cpp: __intrinsic_asm "(uint32_t)std::chrono::high_resolution_clock::now().time_since_epoch().count()"; } } [NonUniformReturn] [require(cpp_cuda, shaderclock)] int64_t __cudaCppGetRealtimeClock() { __target_switch { case cpp: __intrinsic_asm "std::chrono::high_resolution_clock::now().time_since_epoch().count()"; case cuda: __intrinsic_asm "clock64"; } } [__requiresNVAPI] __glsl_extension(GL_EXT_shader_realtime_clock) [NonUniformReturn] [require(cpp_cuda_glsl_hlsl_spirv, shaderclock)] uint2 getRealtimeClock() { __target_switch { case hlsl: __intrinsic_asm "uint2(NvGetSpecial(NV_SPECIALOP_GLOBAL_TIMER_LO), NvGetSpecial( NV_SPECIALOP_GLOBAL_TIMER_HI))"; case glsl: __intrinsic_asm "clockRealtime2x32EXT()"; case cuda: case cpp: int64_t ticks = __cudaCppGetRealtimeClock(); return uint2(uint(ticks), uint(uint64_t(ticks) >> 32)); case spirv: return spirv_asm { OpCapability ShaderClockKHR; OpExtension "SPV_KHR_shader_clock"; result : $$uint2 = OpReadClockKHR Device }; } } // // CUDA specific // [__readNone] [NonUniformReturn] [require(cuda)] uint3 cudaThreadIdx() { __target_switch { case cuda: __intrinsic_asm "(threadIdx)"; } } [__readNone] [NonUniformReturn] [require(cuda)] uint3 cudaBlockIdx() { __target_switch { case cuda: __intrinsic_asm "(blockIdx)"; } } [__readNone] [NonUniformReturn] [require(cuda)] uint3 cudaBlockDim() { __target_switch { case cuda: __intrinsic_asm "(blockDim)"; } } // // Workgroup cooperation // // // `saturated_cooperation(c, f, s, u)` will call `f(s, u)` if not all lanes in the // workgroup are currently executing. however if all lanes are saturated, then // for each unique `s` across all the active lanes `c(s, u)` is called. The // return value is the one corresponding to the input `s` from this lane. // // Adjacent calls to saturated_cooperation are subject to fusion, i.e. // saturated_cooperation(c1, f1, s, u1); // saturated_cooperation(c2, f2, s, u2); // will be transformed to: // saturated_cooperation(c1c2, f1f2, s, u1u2); // where // c1c2 is a function which calls c1(s, u1) and then c2(s, u2); // f1f2 is a function which calls f1(s, u1) and then f2(s, u2); // // When the input differs, calls are fused // saturated_cooperation(c1, f1, s1, u1); // saturated_cooperation(c2, f2, s2, u2); // will be transformed to: // saturated_cooperation(c1c2, f1f2, s1s2, u1u2); // where // s1s2 is a tuple of s1 and s2 // c1c2 is a function which calls c1(s1, u1) and then c2(s2, u2); // f1f2 is a function which calls f1(s1, u1) and then f2(s2, u2); // Note that in this case, we will make a call to c1c2 for every unique pair // s1s2 across all lanes // // (This fusion takes place in the fuse-satcoop pass, and as such any changes to // the signature or behavior of this function should be adjusted for there). // [KnownBuiltin("saturated_cooperation")] func saturated_cooperation( cooperate : functype (A, B) -> C, fallback : functype (A, B) -> C, A input, B otherArg) -> C { return saturated_cooperation_using(cooperate, fallback, __WaveMatchBuitin, __WaveReadLaneAtBuiltin, input, otherArg); } // These two functions are a temporary (circa May 2023) workaround to the fact // that we can't deduce which overload to pass to saturated_cooperation_using // in the call above [__unsafeForceInlineEarly] func __WaveMatchBuitin(T t) -> uint4 { return WaveMatch(t); } [__unsafeForceInlineEarly] func __WaveReadLaneAtBuiltin(T t, int i) -> T { return WaveReadLaneAt(t, i); } // // saturated_cooperation, but you're able to specify manually the functions: // // waveMatch: a function to return a mask of lanes with the same input as this one // broadcast: a function which returns the value passed into it on the specified lane // [KnownBuiltin("saturated_cooperation_using")] func saturated_cooperation_using( cooperate : functype (A, B) -> C, fallback : functype (A, B) -> C, waveMatch : functype (A) -> uint4, broadcast : functype (A, int) -> A, A input, B otherArg) -> C { const bool isWaveSaturated = WaveActiveCountBits(true) == WaveGetLaneCount(); if(isWaveSaturated) { let lanesWithSameInput = waveMatch(input).x; // Keep least significant lane in our set let ourRepresentative = lanesWithSameInput & -lanesWithSameInput; // The representative lanes for all lanes var allRepresentatives = WaveActiveBitOr(ourRepresentative); C ret; // Iterate over set bits in mask from low to high. // In each iteration the lowest bit is cleared. while(bool(allRepresentatives)) { // Broadcast input across warp. let laneIdx = firstbitlow(allRepresentatives); let uniformInput = broadcast(input, int(laneIdx)); // All lanes perform some cooperative computation with dynamic // uniform input C c = cooperate(uniformInput, otherArg); // Update our return value until it if(bool(allRepresentatives & ourRepresentative)) ret = c; // Clear the lowest bit allRepresentatives &= allRepresentatives - 1; } return ret; } else { return fallback(input, otherArg); } } ${ // The NVAPI operations are defined to take the space/register // indices of their texture and sampler parameters, rather than // taking the texture/sampler objects directly. // // In order to support this approach, we need intrinsics that // can magically fetch the binding information for a resource. // // TODO: These operations are kind of *screaming* for us to // have a built-in `interface` that all of the opaque resource // types conform to, so that we can define builtins that work // for any resource type. } __intrinsic_op($(kIROp_GetRegisterSpace)) uint __getRegisterSpace(__TextureImpl texture); __intrinsic_op($(kIROp_GetRegisterSpace)) uint __getRegisterSpace(SamplerState sampler); __intrinsic_op($(kIROp_GetRegisterIndex)) uint __getRegisterIndex(__TextureImpl texture); __intrinsic_op($(kIROp_GetRegisterIndex)) uint __getRegisterIndex(SamplerState sampler); ${{{{ // // Texture Footprint Queries // // This section introduces the types and methods related // to the `GL_NV_shader_texture_footprint` GLSL extension, // and the matching NVAPI operations. // // Footprint queries are allowed on both 2D and 3D textures, // and are structurally similar for the two, so we will // use a meta-loop to deduplicate the code for the two // cases. // // A footprint query yields a data structure // that describes blocks of texels that // conservatively cover the data that might // be fetched in the query. // // A given sampling operation might access two // mip levels of a texture when, e.g., trilinear // filtering is on. A footprint query may ask for // a footprint in either the coarse or fine level // of the pair. // // We first define a `struct` type that closely maps // to how a footprint is defined for each of the // implementations we support, and then wrap that // in a derived `struct` that includes the extra // data that is returned by the GLSL API via the // function reuslt. // }}}} [__NoSideEffect] [__requiresNVAPI] [require(glsl_hlsl_spirv, texturefootprint)] vector __textureFootprintGetAnchor(__TextureFootprintData data, int nd) { __target_switch { case hlsl: __intrinsic_asm "NvFootprintExtractAnchorTileLoc$!1D($0)"; case glsl: __intrinsic_asm "$0.anchor"; case spirv: return spirv_asm { result:$$vector = OpCompositeExtract $data 1; }; } } [__NoSideEffect] [__requiresNVAPI] [require(glsl_hlsl_spirv, texturefootprint)] vector __textureFootprintGetOffset(__TextureFootprintData data, int nd) { __target_switch { case hlsl: __intrinsic_asm "NvFootprintExtractOffset$!1D($0)"; case glsl: __intrinsic_asm "$0.offset"; case spirv: return spirv_asm { result:$$vector = OpCompositeExtract $data 2; }; } } __intrinsic_type($(kIROp_TextureFootprintType)) [require(glsl_hlsl_spirv, texturefootprint)] struct __TextureFootprintData { typealias Anchor = vector; typealias Offset = vector; typealias Mask = uint2; typealias LOD = uint; typealias Granularity = uint; property anchor : Anchor { [__NoSideEffect] [__requiresNVAPI] [ForceInline] get { return __textureFootprintGetAnchor(this, ND); } } property offset : Offset { [__NoSideEffect] [__requiresNVAPI] [ForceInline] get { return __textureFootprintGetOffset(this, ND); } } property mask : Mask { [__NoSideEffect] [__requiresNVAPI] get { __target_switch { case hlsl: __intrinsic_asm "NvFootprintExtractBitmask"; case glsl: __intrinsic_asm "$0.mask"; case spirv: return spirv_asm { result:$$Mask = OpCompositeExtract $this 3; }; } } } property lod : LOD { [__NoSideEffect] [__requiresNVAPI] get { __target_switch { case hlsl: __intrinsic_asm "NvFootprintExtractLOD"; case glsl: __intrinsic_asm "$0.lod"; case spirv: return spirv_asm { result:$$LOD = OpCompositeExtract $this 4; }; } } } property granularity : Granularity { [__NoSideEffect] [__requiresNVAPI] get { __target_switch { case hlsl: __intrinsic_asm "NvFootprintExtractReturnGran"; case glsl: __intrinsic_asm "$0.granularity"; case spirv: return spirv_asm { result:$$Granularity = OpCompositeExtract $this 5; }; } } } } struct TextureFootprint : __TextureFootprintData { bool _isSingleLevel; property isSingleLevel : bool { [__NoSideEffect] get { return _isSingleLevel; } } } typealias TextureFootprint2D = TextureFootprint<2>; typealias TextureFootprint3D = TextureFootprint<3>; ${ // We define the new operations via an `extension` // on the relevant texture type(s), rather than // further clutter the original type declarations. } __generic extension __TextureImpl { ${ // We introduce a few convenience type aliases here, // which both keep our declarations simpler and easier // to understand, but which might *also* be useful to // users of the stdlib, so that they can write things // like `Texture2D.Footprint`, and also have auto-complete // help them find such members. // // TODO: The `Coords` type really ought to be something // defined on the base texture types, rather than via // this `extension`. } typealias Coords = vector; typealias Footprint = TextureFootprint; typealias __FootprintData = __TextureFootprintData; typealias FootprintGranularity = Footprint.Granularity; ${ // For the GLSL extension, the choice between the // coarse and fine level is modeled as a `bool` // parameter to the query operation(s). We define // the GLSL functions here as intrinsics, so that // we can refer to them later in the definitions // of our stdlib operaitons. // // Note: despite the GLSL extension defining the `granularity` // member of the query result as having type `uint`, the // function signatures all take `int` parameters for the // granularity instead. // } [__NoSideEffect] __glsl_version(450) __glsl_extension(GL_NV_shader_texture_footprint) [require(glsl_spirv, texturefootprint)] bool __queryFootprintGLSL( SamplerState sampler, Coords coords, int granularity, bool useCoarseLevel, out __FootprintData footprint) { __target_switch { case glsl: __intrinsic_asm "textureFootprintNV($p, $*2)"; case spirv: return spirv_asm { OpCapability ImageFootprintNV; OpExtension "SPV_NV_shader_image_footprint"; %resultVal:$$__FootprintData = OpImageSampleFootprintNV &this $coords $granularity $useCoarseLevel; OpStore &footprint %resultVal; result:$$bool = OpCompositeExtract %resultVal 0; }; } } [__NoSideEffect] __glsl_version(450) __glsl_extension(GL_NV_shader_texture_footprint) [require(glsl_spirv, texturefootprint)] bool __queryFootprintGLSL( SamplerState sampler, Coords coords, int granularity, bool useCoarseLevel, out __FootprintData footprint, float bias) { __target_switch { case glsl: __intrinsic_asm "textureFootprintNV($p, $*2)"; case spirv: return spirv_asm { OpCapability ImageFootprintNV; OpExtension "SPV_NV_shader_image_footprint"; %resultVal:$$__FootprintData = OpImageSampleFootprintNV &this $coords $granularity $useCoarseLevel Bias $bias; OpStore &footprint %resultVal; result:$$bool = OpCompositeExtract %resultVal 0; }; } } [__NoSideEffect] __glsl_version(450) __glsl_extension(GL_NV_shader_texture_footprint) __glsl_extension(GL_ARB_sparse_texture_clamp) [require(glsl_spirv, texturefootprintclamp)] bool __queryFootprintClampGLSL( SamplerState sampler, Coords coords, float lodClamp, int granularity, bool useCoarseLevel, out __FootprintData footprint) { __target_switch { case glsl: __intrinsic_asm "textureFootprintClampNV($p, $*2)"; case spirv: return spirv_asm { OpCapability ImageFootprintNV; OpCapability MinLod; OpExtension "SPV_NV_shader_image_footprint"; %resultVal:$$__FootprintData = OpImageSampleFootprintNV &this $coords $granularity $useCoarseLevel MinLod $lodClamp; OpStore &footprint %resultVal; result:$$bool = OpCompositeExtract %resultVal 0; }; } } [__NoSideEffect] __glsl_version(450) __glsl_extension(GL_NV_shader_texture_footprint) __glsl_extension(GL_ARB_sparse_texture_clamp) [require(glsl_spirv, texturefootprintclamp)] bool __queryFootprintClampGLSL( SamplerState sampler, Coords coords, float lodClamp, int granularity, bool useCoarseLevel, out __FootprintData footprint, float bias) { __target_switch { case glsl: __intrinsic_asm "textureFootprintClampNV($p, $*2)"; case spirv: return spirv_asm { OpCapability ImageFootprintNV; OpCapability MinLod; OpExtension "SPV_NV_shader_image_footprint"; %resultVal:$$__FootprintData = OpImageSampleFootprintNV &this $coords $granularity $useCoarseLevel Bias|MinLod $bias $lodClamp; OpStore &footprint %resultVal; result:$$bool = OpCompositeExtract %resultVal 0; }; } } [__NoSideEffect] __glsl_version(450) __glsl_extension(GL_NV_shader_texture_footprint) [__requiresNVAPI] [require(glsl_spirv, texturefootprint)] bool __queryFootprintLodGLSL( SamplerState sampler, Coords coords, float lod, int granularity, bool useCoarseLevel, out __FootprintData footprint) { __target_switch { case glsl: __intrinsic_asm "textureFootprintLodNV($p, $*2)"; case spirv: return spirv_asm { OpCapability ImageFootprintNV; OpExtension "SPV_NV_shader_image_footprint"; %resultVal:$$__FootprintData = OpImageSampleFootprintNV &this $coords $granularity $useCoarseLevel Lod $lod; OpStore &footprint %resultVal; result:$$bool = OpCompositeExtract %resultVal 0; }; } } ${{{ // Texture sampling with gradient is only available for 2D textures. }}} [__NoSideEffect] __glsl_version(450) __glsl_extension(GL_NV_shader_texture_footprint) [__requiresNVAPI] [require(glsl_spirv, texturefootprint)] bool __queryFootprintGradGLSL( SamplerState sampler, Coords coords, Coords dx, Coords dy, int granularity, bool useCoarseLevel, out __FootprintData footprint) { __target_switch { case glsl: __intrinsic_asm "textureFootprintGradNV($p, $*2)"; case spirv: return spirv_asm { OpCapability ImageFootprintNV; OpExtension "SPV_NV_shader_image_footprint"; %resultVal:$$__FootprintData = OpImageSampleFootprintNV &this $coords $granularity $useCoarseLevel Grad $dx $dy; OpStore &footprint %resultVal; result:$$bool = OpCompositeExtract %resultVal 0; }; } } [__NoSideEffect] __glsl_version(450) __glsl_extension(GL_NV_shader_texture_footprint) __glsl_extension(GL_ARB_sparse_texture_clamp) [require(glsl_spirv, texturefootprintclamp)] bool __queryFootprintGradClampGLSL( SamplerState sampler, Coords coords, Coords dx, Coords dy, float lodClamp, int granularity, bool useCoarseLevel, out __FootprintData footprint) { __target_switch { case glsl: __intrinsic_asm "textureFootprintGradClampNV($p, $*2)"; case spirv: return spirv_asm { OpCapability ImageFootprintNV; OpCapability MinLod; OpExtension "SPV_NV_shader_image_footprint"; %resultVal:$$__FootprintData = OpImageSampleFootprintNV &this $coords $granularity $useCoarseLevel Grad|MinLod $dx $dy $lodClamp; OpStore &footprint %resultVal; result:$$bool = OpCompositeExtract %resultVal 0; }; } } ${{{ // End texture2D specific functions. }}} ${{{{ // The NVAPI texture query operations encode the choice // between coarse and fine levels as part of the function // name, and so we are forced to match this convention // if we want to provide a more portable API. // // TODO: We could conceivably define the functions to use // a parameter for the coarse/fine choice, which is required // to be `constexpr` for the HLSL/NVAPI target. // static const struct LevelChoice { char const* name; char const* isCoarseVal; } kLevelChoices[] = { { "Coarse", "true" }, { "Fine", "false" }, }; for(auto levelChoice : kLevelChoices) { auto CoarseOrFine = levelChoice.name; auto isCoarseVal = levelChoice.isCoarseVal; // We now go ahead and define the intrinsics provided by NVAPI, // which have a very different signature from the GLSL ones. // // Note: the NVAPI functions also support an optional texel // offset parameter. For now we are not including overloads // with that parameter, since they have no equivalent in // the GLSL extension. // }}}} [__NoSideEffect] [__requiresNVAPI] [require(hlsl, texturefootprint)] static __FootprintData __queryFootprint$(CoarseOrFine)NVAPI( int nd, uint textureSpace, uint textureIndex, uint samplerSpace, uint samplerIndex, float3 coords, FootprintGranularity granularity, out uint isSingleLod) { __target_switch { case hlsl: __intrinsic_asm "NvFootprint$(CoarseOrFine)($1, $2, $3, $4, NV_EXTN_TEXTURE_$!0D, $*5)"; } } [__NoSideEffect] [__requiresNVAPI] [require(hlsl, texturefootprint)] static __FootprintData __queryFootprint$(CoarseOrFine)BiasNVAPI( int nd, uint textureSpace, uint textureIndex, uint samplerSpace, uint samplerIndex, float3 coords, FootprintGranularity granularity, float lodBias, out uint isSingleLod) { __target_switch { case hlsl: __intrinsic_asm "NvFootprint$(CoarseOrFine)Bias($1, $2, $3, $4, NV_EXTN_TEXTURE_$!0D, $*5)"; } } [__NoSideEffect] [__requiresNVAPI] [require(hlsl, texturefootprint)] static __FootprintData __queryFootprint$(CoarseOrFine)LevelNVAPI( int nd, uint textureSpace, uint textureIndex, uint samplerSpace, uint samplerIndex, float3 coords, FootprintGranularity granularity, float lod, out uint isSingleLod) { __target_switch { case hlsl: __intrinsic_asm "NvFootprint$(CoarseOrFine)Level($1, $2, $3, $4, NV_EXTN_TEXTURE_$!0D, $*5)"; } } [__NoSideEffect] [__requiresNVAPI] [require(hlsl, texturefootprint)] static __FootprintData __queryFootprint$(CoarseOrFine)GradNVAPI( int nd, uint textureSpace, uint textureIndex, uint samplerSpace, uint samplerIndex, float3 coords, FootprintGranularity granularity, float3 dx, float3 dy, out uint isSingleLod) { __target_switch { case hlsl: __intrinsic_asm "NvFootprint$(CoarseOrFine)Grad($1, $2, $3, $4, NV_EXTN_TEXTURE_$!0D, $*5)"; } } ${ // We now define the portable operations that will be officially // supported by the standard library. For each operation, we // need to provide both a version that maps to the GLSL extension, // and a version that uses the NVAPI functions. // // Some function variations are only available with one extension // or the other, so we try our best to only define them where // each is available. // // Note that these functions cannot be marked as [ForceInline] for now // because the texture resource may get removed after DCE, since the only // use of those resources are done through __GetRegisterIndex/Space, which is // replaced early with their binding slot in the compilation process. // Not inlining these function is a quick way to make sure the texture always // has live uses. // } /// Query the footprint that would be accessed by a texture sampling operation. /// /// This operation queries the footprint that would be accessed /// by a comparable call to: /// /// t.Sample(sampler, coords); /// [__NoSideEffect] Footprint queryFootprint$(CoarseOrFine)( FootprintGranularity granularity, SamplerState sampler, Coords coords) { __target_switch { case glsl: case spirv: Footprint footprint; footprint._isSingleLevel = __queryFootprintGLSL(sampler, coords, granularity, $(isCoarseVal), footprint); return footprint; case hlsl: uint isSingleLod = 0; Footprint footprint = {__queryFootprint$(CoarseOrFine)NVAPI( Shape.dimensions, __getRegisterSpace(this), __getRegisterIndex(this), __getRegisterSpace(sampler), __getRegisterIndex(sampler), __vectorReshape<3>(coords), granularity, /* out */isSingleLod), false}; footprint._isSingleLevel = (isSingleLod != 0); return footprint; } } /// Query the footprint that would be accessed by a texture sampling operation. /// /// This operation queries the footprint that would be accessed /// by a comparable call to: /// /// t.SampleBias(sampler, coords, lodBias); /// [__NoSideEffect] Footprint queryFootprint$(CoarseOrFine)Bias( FootprintGranularity granularity, SamplerState sampler, Coords coords, float lodBias) { __target_switch { case glsl: case spirv: Footprint footprint; footprint._isSingleLevel = __queryFootprintGLSL(sampler, coords, granularity, $(isCoarseVal), footprint, lodBias); return footprint; case hlsl: uint isSingleLod = 0; Footprint footprint = {__queryFootprint$(CoarseOrFine)BiasNVAPI( Shape.dimensions, __getRegisterSpace(this), __getRegisterIndex(this), __getRegisterSpace(sampler), __getRegisterIndex(sampler), __vectorReshape<3>(coords), granularity, lodBias, /* out */isSingleLod), false}; footprint._isSingleLevel = (isSingleLod != 0); return footprint; } } /// Query the footprint that would be accessed by a texture sampling operation. /// /// This operation queries the footprint that would be accessed /// by a comparable call to: /// /// t.SampleClamp(sampler, coords, lodClamp); /// [__NoSideEffect] Footprint queryFootprint$(CoarseOrFine)Clamp( FootprintGranularity granularity, SamplerState sampler, Coords coords, float lodClamp) { __target_switch { case glsl: case spirv: Footprint footprint; footprint._isSingleLevel = __queryFootprintClampGLSL(sampler, coords, lodClamp, granularity, $(isCoarseVal), footprint); return footprint; } } /// Query the footprint that would be accessed by a texture sampling operation. /// /// This operation queries the footprint that would be accessed /// by a comparable call to: /// /// t.SampleBiasClamp(sampler, coords, lodBias, lodClamp); /// [__NoSideEffect] Footprint queryFootprint$(CoarseOrFine)BiasClamp( FootprintGranularity granularity, SamplerState sampler, Coords coords, float lodBias, float lodClamp) { __target_switch { case glsl: case spirv: Footprint footprint; footprint._isSingleLevel = __queryFootprintClampGLSL(sampler, coords, lodClamp, granularity, $(isCoarseVal), footprint, lodBias); return footprint; } } /// Query the footprint that would be accessed by a texture sampling operation. /// /// This operation queries the footprint that would be accessed /// by a comparable call to: /// /// t.SampleLevel(sampler, coords, lod); /// [__NoSideEffect] Footprint queryFootprint$(CoarseOrFine)Level( FootprintGranularity granularity, SamplerState sampler, Coords coords, float lod) { __target_switch { case glsl: case spirv: Footprint footprint; footprint._isSingleLevel = __queryFootprintLodGLSL(sampler, coords, lod, granularity, $(isCoarseVal), footprint); return footprint; case hlsl: uint isSingleLod = 0; Footprint footprint = {__queryFootprint$(CoarseOrFine)LevelNVAPI( Shape.dimensions, __getRegisterSpace(this), __getRegisterIndex(this), __getRegisterSpace(sampler), __getRegisterIndex(sampler), __vectorReshape<3>(coords), granularity, lod, /* out */isSingleLod), false}; footprint._isSingleLevel = (isSingleLod != 0); return footprint; } } ${{{ // TODO: Texture sampling with gradient is only available for 2D textures. }}} /// Query the footprint that would be accessed by a texture sampling operation. /// /// This operation queries the footprint that would be accessed /// by a comparable call to: /// /// t.SampleGrad(sampler, coords, dx, dy); /// [__NoSideEffect] [ForceInline] Footprint queryFootprint$(CoarseOrFine)Grad( FootprintGranularity granularity, SamplerState sampler, Coords coords, Coords dx, Coords dy) { __target_switch { case glsl: case spirv: Footprint footprint; footprint._isSingleLevel = __queryFootprintGradGLSL(sampler, coords, dx, dy, granularity, $(isCoarseVal), footprint); return footprint; case hlsl: uint isSingleLod = 0; Footprint footprint = {__queryFootprint$(CoarseOrFine)GradNVAPI( Shape.dimensions, __getRegisterSpace(this), __getRegisterIndex(this), __getRegisterSpace(sampler), __getRegisterIndex(sampler), __vectorReshape<3>(coords), granularity, __vectorReshape<3>(dx), __vectorReshape<3>(dy), /* out */isSingleLod), false}; footprint._isSingleLevel = (isSingleLod != 0); return footprint; } } /// Query the footprint that would be accessed by a texture sampling operation. /// /// This operation queries the footprint that would be accessed /// by a comparable call to: /// /// t.SampleGradClamp(sampler, coords, dx, dy, lodClamp); /// [__NoSideEffect][ForceInline] Footprint queryFootprint$(CoarseOrFine)GradClamp( FootprintGranularity granularity, SamplerState sampler, Coords coords, Coords dx, Coords dy, float lodClamp) { __target_switch { case glsl: case spirv: Footprint footprint; footprint._isSingleLevel = __queryFootprintGradClampGLSL(sampler, coords, dx, dy, lodClamp, granularity, $(isCoarseVal), footprint); return footprint; } } ${{{ // TODO: end texture2D specific functions. }}} ${{{{ } }}}} } // extension // __generic extension __TextureImpl { [__requiresNVAPI] [ForceInline] __glsl_extension(GL_EXT_shader_atomic_float) [require(glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda_metal_float1)] void InterlockedAddF32(vector coord, float value, out float originalValue) { __target_switch { case spirv: originalValue = __atomicAdd(this[coord], value); return; case glsl: __intrinsic_asm "$3 = imageAtomicAdd($0, $1, $2)"; case hlsl: __intrinsic_asm "$3 = NvInterlockedAddFp32($0, $1, $2)"; } } [ForceInline] float InterlockedAddF32(vector coord, float value) { float originalValue; InterlockedAddF32(coord, value, originalValue); return originalValue; } } // Buffer Pointer namespace vk { // Partial implementation of the vk::buffer_ref proposal: // https://github.com/microsoft/hlsl-specs/blob/main/proposals/0010-vk-buffer-ref.md struct BufferPointer { T *_ptr; [ForceInline] __init(T *ptr) { _ptr = ptr; } [ForceInline] __init(uint64_t val) { _ptr = (T *)val; } [ForceInline] Ref Get() { return *_ptr; } [ForceInline] T *getPtr() { return _ptr;} } [ForceInline] BufferPointer static_pointer_cast(BufferPointer src) { return BufferPointer((U*)(src.getPtr())); } [ForceInline] BufferPointer reinterpret_pointer_cast(BufferPointer src) { return BufferPointer((U *)(src.getPtr())); } } attribute_syntax[vk_aliased_pointer] : VkAliasedPointerAttribute; attribute_syntax[vk_restrict_pointer] : VkRestrictPointerAttribute; extension uint64_t { __init(vk::BufferPointer ptr) { this = (uint64_t)ptr._ptr; } } __generic __intrinsic_type($(kIROp_HLSLConstBufferPointerType)) __glsl_extension(GL_EXT_buffer_reference) __magic_type(ConstBufferPointerType) [require(glsl_spirv, bufferreference)] struct ConstBufferPointer { __glsl_version(450) __glsl_extension(GL_EXT_buffer_reference) [__NoSideEffect] T get() { __target_switch { case glsl: __intrinsic_asm "$0._data"; case spirv: return spirv_asm { result:$$T = OpLoad $this Aligned !Alignment; }; } } __subscript(int index) -> T { [ForceInline] get {return ConstBufferPointer.fromUInt(toUInt() + __naturalStrideOf() * index).get(); } } __glsl_version(450) __glsl_extension(GL_EXT_shader_explicit_arithmetic_types_int64) __glsl_extension(GL_EXT_buffer_reference) [require(glsl_spirv, bufferreference_int64)] static ConstBufferPointer fromUInt(uint64_t val) { __target_switch { case glsl: __intrinsic_asm "$TR($0)"; case spirv: return spirv_asm { result:$$ConstBufferPointer = OpConvertUToPtr $val; }; } } __glsl_version(450) __glsl_extension(GL_EXT_shader_explicit_arithmetic_types_int64) __glsl_extension(GL_EXT_buffer_reference) [require(glsl_spirv, bufferreference_int64)] uint64_t toUInt() { __target_switch { case glsl: __intrinsic_asm "uint64_t($0)"; case spirv: return spirv_asm { result:$$uint64_t = OpConvertPtrToU $this; }; } } __glsl_version(450) __glsl_extension(GL_EXT_shader_explicit_arithmetic_types_int64) __glsl_extension(GL_EXT_buffer_reference) [__NoSideEffect] [ForceInline] [require(glsl_spirv, bufferreference_int64)] bool isValid() { __target_switch { case glsl: __intrinsic_asm "(uint64_t($0) != 0)"; case spirv: uint64_t zero = 0ULL; return spirv_asm { %ptrval:$$uint64_t = OpConvertPtrToU $this; result:$$bool = OpINotEqual %ptrval $zero; }; } } } // // HLSL-like dynamic resources // https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_DynamicResources.html // // For Khronos targets, `__DynamicResource` can be used to declare "untyped" global bindings as // usual (e.g. unsized arrays for descriptor indexing), which will then be materialized into // new aliased bindings for each distinct cast type. // __magic_type(DynamicResourceType) __intrinsic_type($(kIROp_DynamicResourceType)) struct __DynamicResource { __intrinsic_op($(kIROp_CastDynamicResource)) T as>(); } interface __IDynamicResourceCastable { } enum __DynamicResourceKind { General = 0, // CBV_SRV_UAV Sampler = 1 } __generic extension __TextureImpl : __IDynamicResourceCastable<__DynamicResourceKind.General> { __intrinsic_op($(kIROp_CastDynamicResource)) __implicit_conversion($(kConversionCost_GenericParamUpcast)) __init(__DynamicResource res); } ${{{{ const char* kDynamicResourceCastableTypes[] = { "StructuredBuffer", "RWStructuredBuffer", "AppendStructuredBuffer", "ConsumeStructuredBuffer", "RasterizerOrderedStructuredBuffer", "ByteAddressBuffer", "RWByteAddressBuffer", "RasterizerOrderedByteAddressBuffer", "SamplerState", "SamplerComparisonState", "ConstantBuffer", "TextureBuffer", }; for (auto typeName : kDynamicResourceCastableTypes) { auto kind = strstr(typeName, "Sampler") ? "Sampler" : "General"; if (strstr(typeName, "StructuredBuffer")) sb << "__generic\n"; else if (strstr(typeName, "Buffer")) sb << "__generic\n"; }}}} extension $(typeName) : __IDynamicResourceCastable<__DynamicResourceKind.$(kind)> { __intrinsic_op($(kIROp_CastDynamicResource)) __implicit_conversion($(kConversionCost_GenericParamUpcast)) __init(__DynamicResource res); } ${{{{ } }}}} __glsl_version(450) __glsl_extension(GL_ARB_shader_clock) [require(glsl_spirv, GL_ARB_shader_clock)] uint2 clock2x32ARB() { __target_switch { case glsl: __intrinsic_asm "clock2x32ARB"; case spirv: const uint32_t scopeId_subgroup = 3; return spirv_asm { OpCapability ShaderClockKHR; OpExtension "SPV_KHR_shader_clock"; result:$$uint2 = OpReadClockKHR $scopeId_subgroup; }; } } __glsl_version(450) __glsl_extension(GL_ARB_shader_clock) __glsl_extension(GL_ARB_gpu_shader_int64) [require(glsl_spirv, GL_ARB_shader_clock64)] uint64_t clockARB() { __target_switch { case glsl: __intrinsic_asm "clockARB"; case spirv: const uint32_t scopeId_subgroup = 3; return spirv_asm { OpCapability ShaderClockKHR; OpExtension "SPV_KHR_shader_clock"; result:$$uint64_t = OpReadClockKHR $scopeId_subgroup; }; } }