From 484c1e618dddf586360c03f90e6c27ece1065acd Mon Sep 17 00:00:00 2001 From: Sriram Murali <85252063+sriramm-nv@users.noreply.github.com> Date: Mon, 22 Apr 2024 19:14:35 -0700 Subject: ForceInline ByteAddressBuffer operations in stdlib (#4003) * ForceInline ByteAddressBuffer operations in stdlib * fixup --- source/slang/hlsl.meta.slang | 51 +++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 13 deletions(-) (limited to 'source') diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index cb01e9e68..16b143529 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -145,6 +145,7 @@ struct ByteAddressBuffer uint4 Load4(int location, out uint status); [__readNone] + [ForceInline] T Load(int location) { return __byteAddressBufferLoad(this, location); @@ -325,8 +326,8 @@ extension __TextureImpl __intrinsic_op($(kIROp_CombinedTextureSamplerGetSampler)) SamplerComparisonState __getComparisonSampler(); - [ForceInline] [__readNone] + [ForceInline] [require(glsl_hlsl_spirv, texture_querylod)] float CalculateLevelOfDetail(TextureCoord location) { @@ -346,8 +347,8 @@ extension __TextureImpl } } - [ForceInline] [__readNone] + [ForceInline] [require(glsl_hlsl_spirv, texture_querylod)] float CalculateLevelOfDetailUnclamped(TextureCoord location) { @@ -368,6 +369,7 @@ extension __TextureImpl } [__readNone] + [ForceInline] [require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(vector location) { @@ -417,6 +419,7 @@ extension __TextureImpl } [__readNone] + [ForceInline] __glsl_extension(GL_ARB_sparse_texture_clamp) [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(vector location, vector offset, float clamp) @@ -439,6 +442,7 @@ extension __TextureImpl } [__readNone] + [ForceInline] __target_intrinsic(hlsl) [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(vector location, vector offset, float clamp, out uint status) @@ -448,6 +452,7 @@ extension __TextureImpl } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T SampleBias(vector location, float bias) { @@ -469,6 +474,7 @@ extension __TextureImpl } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T SampleBias(vector location, float bias, constexpr vector offset) { @@ -599,6 +605,7 @@ extension __TextureImpl } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(vector location, vector gradX, vector gradY) { @@ -620,6 +627,7 @@ extension __TextureImpl } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(vector location, vector gradX, vector gradY, constexpr vector offset) { @@ -639,8 +647,9 @@ extension __TextureImpl } } - __glsl_extension(GL_ARB_sparse_texture_clamp) [__readNone] + [ForceInline] + __glsl_extension(GL_ARB_sparse_texture_clamp) [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(vector location, vector gradX, vector gradY, constexpr vector offset, float lodClamp) { @@ -785,6 +794,7 @@ __generic { [__readNone] + [ForceInline] [require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(SamplerState s, vector location) { @@ -837,6 +847,7 @@ extension __TextureImpl } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(SamplerState s, vector location, constexpr vector offset) { @@ -858,6 +869,7 @@ extension __TextureImpl } [__readNone] + [ForceInline] __glsl_extension(GL_ARB_sparse_texture_clamp) [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(SamplerState s, vector location, constexpr vector offset, float clamp) @@ -880,8 +892,9 @@ extension __TextureImpl } } - [__readNone] __target_intrinsic(hlsl) + [__readNone] + [ForceInline] T Sample(SamplerState s, vector location, constexpr vector offset, float clamp, out uint status) { status = 0; @@ -889,6 +902,7 @@ extension __TextureImpl } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T SampleBias(SamplerState s, vector location, float bias) { @@ -910,6 +924,7 @@ extension __TextureImpl } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T SampleBias(SamplerState s, vector location, float bias, constexpr vector offset) { @@ -930,7 +945,8 @@ extension __TextureImpl } } - [__readNone] [ForceInline] + [__readNone] + [ForceInline] [require(glsl_hlsl_spirv, texture_shadowlod)] float SampleCmp(SamplerComparisonState s, vector location, float compareValue) { @@ -960,7 +976,8 @@ extension __TextureImpl } } - [__readNone] [ForceInline] + [__readNone] + [ForceInline] [require(glsl_hlsl_spirv, texture_shadowlod)] float SampleCmpLevelZero(SamplerComparisonState s, vector location, float compareValue) { @@ -987,7 +1004,8 @@ extension __TextureImpl } } - [__readNone] [ForceInline] + [__readNone] + [ForceInline] [require(glsl_hlsl_spirv, texture_shadowlod)] float SampleCmp(SamplerComparisonState s, vector location, float compareValue, constexpr vector offset) { @@ -1013,7 +1031,8 @@ extension __TextureImpl } } - [__readNone] [ForceInline] + [__readNone] + [ForceInline] [require(glsl_hlsl_spirv, texture_shadowlod)] float SampleCmpLevelZero(SamplerComparisonState s, vector location, float compareValue, constexpr vector offset) { @@ -1041,6 +1060,7 @@ extension __TextureImpl } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(SamplerState s, vector location, vector gradX, vector gradY) { @@ -1062,6 +1082,7 @@ extension __TextureImpl } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(SamplerState s, vector location, vector gradX, vector gradY, constexpr vector offset) { @@ -1083,8 +1104,9 @@ extension __TextureImpl } } - __glsl_extension(GL_ARB_sparse_texture_clamp) [__readNone] + [ForceInline] + __glsl_extension(GL_ARB_sparse_texture_clamp) [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(SamplerState s, vector location, vector gradX, vector gradY, constexpr vector offset, float lodClamp) { @@ -2813,7 +2835,6 @@ ${{{{ [__requiresNVAPI] [ForceInline] __cuda_sm_version(2.0) - [ForceInline] [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_float1)] void InterlockedAddF32(uint byteAddress, float valueToAdd) { @@ -2834,7 +2855,6 @@ ${{{{ // Int64 Add [ForceInline] __cuda_sm_version(6.0) - [ForceInline] [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda6_int64)] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd, out int64_t originalValue) { @@ -2858,7 +2878,6 @@ ${{{{ [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda6_int64)] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd); - [ForceInline] __specialized_for_target(hlsl) [ForceInline] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd) @@ -2866,7 +2885,6 @@ ${{{{ __atomicAdd(this, byteAddress, __asuint2(valueToAdd)); } - [ForceInline] __specialized_for_target(glsl) __specialized_for_target(spirv) [ForceInline] @@ -2906,6 +2924,7 @@ ${{{{ uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) + [ForceInline] uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicMax(this, byteAddress, __asuint2(value))); } __specialized_for_target(glsl) @@ -2965,6 +2984,7 @@ ${{{{ uint64_t InterlockedMinU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) + [ForceInline] uint64_t InterlockedMinU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicMin(this, byteAddress, __asuint2(value))); } __specialized_for_target(glsl) @@ -3024,6 +3044,7 @@ ${{{{ uint64_t InterlockedAndU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) + [ForceInline] uint64_t InterlockedAndU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicAnd(this, byteAddress, __asuint2(value))); } __specialized_for_target(glsl) @@ -3063,6 +3084,7 @@ ${{{{ uint64_t InterlockedOrU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) + [ForceInline] uint64_t InterlockedOrU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicOr(this, byteAddress, __asuint2(value))); } __specialized_for_target(glsl) @@ -3102,6 +3124,7 @@ ${{{{ uint64_t InterlockedXorU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) + [ForceInline] uint64_t InterlockedXorU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicXor(this, byteAddress, __asuint2(value))); } __specialized_for_target(glsl) @@ -3140,6 +3163,7 @@ ${{{{ uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) + [ForceInline] uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicExchange(this, byteAddress, __asuint2(value))); } __specialized_for_target(glsl) @@ -3255,6 +3279,7 @@ ${{{{ return; } } + [ForceInline] void InterlockedCompareExchange64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) { -- cgit v1.2.3