diff options
| author | Sriram Murali <85252063+sriramm-nv@users.noreply.github.com> | 2024-04-22 19:14:35 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-04-22 19:14:35 -0700 |
| commit | 484c1e618dddf586360c03f90e6c27ece1065acd (patch) | |
| tree | 90b6b159abdce9bdf48b7545953917e6318aa501 /source | |
| parent | 22fbca520f73007e2b8625bfe7eb1c77d528f301 (diff) | |
ForceInline ByteAddressBuffer operations in stdlib (#4003)
* ForceInline ByteAddressBuffer operations in stdlib
* fixup
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 51 |
1 files changed, 38 insertions, 13 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index cb01e9e68..16b143529 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -145,6 +145,7 @@ struct ByteAddressBuffer uint4 Load4(int location, out uint status); [__readNone] + [ForceInline] T Load<T>(int location) { return __byteAddressBufferLoad<T>(this, location); @@ -325,8 +326,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> __intrinsic_op($(kIROp_CombinedTextureSamplerGetSampler)) SamplerComparisonState __getComparisonSampler(); - [ForceInline] [__readNone] + [ForceInline] [require(glsl_hlsl_spirv, texture_querylod)] float CalculateLevelOfDetail(TextureCoord location) { @@ -346,8 +347,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> } } - [ForceInline] [__readNone] + [ForceInline] [require(glsl_hlsl_spirv, texture_querylod)] float CalculateLevelOfDetailUnclamped(TextureCoord location) { @@ -368,6 +369,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> } [__readNone] + [ForceInline] [require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(vector<float, Shape.dimensions+isArray> location) { @@ -417,6 +419,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> } [__readNone] + [ForceInline] __glsl_extension(GL_ARB_sparse_texture_clamp) [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(vector<float, Shape.dimensions+isArray> location, vector<int, Shape.planeDimensions> offset, float clamp) @@ -439,6 +442,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> } [__readNone] + [ForceInline] __target_intrinsic(hlsl) [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(vector<float, Shape.dimensions+isArray> location, vector<int, Shape.planeDimensions> offset, float clamp, out uint status) @@ -448,6 +452,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T SampleBias(vector<float, Shape.dimensions+isArray> location, float bias) { @@ -469,6 +474,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T SampleBias(vector<float, Shape.dimensions+isArray> location, float bias, constexpr vector<int, Shape.planeDimensions> offset) { @@ -599,6 +605,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY) { @@ -620,6 +627,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset) { @@ -639,8 +647,9 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format> } } - __glsl_extension(GL_ARB_sparse_texture_clamp) [__readNone] + [ForceInline] + __glsl_extension(GL_ARB_sparse_texture_clamp) [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset, float lodClamp) { @@ -785,6 +794,7 @@ __generic<T:IFloat, Shape: __ITextureShape, let isArray:int, let isMS:int, let s extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> { [__readNone] + [ForceInline] [require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location) { @@ -837,6 +847,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset) { @@ -858,6 +869,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> } [__readNone] + [ForceInline] __glsl_extension(GL_ARB_sparse_texture_clamp) [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset, float clamp) @@ -880,8 +892,9 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> } } - [__readNone] __target_intrinsic(hlsl) + [__readNone] + [ForceInline] T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset, float clamp, out uint status) { status = 0; @@ -889,6 +902,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T SampleBias(SamplerState s, vector<float, Shape.dimensions+isArray> location, float bias) { @@ -910,6 +924,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)] T SampleBias(SamplerState s, vector<float, Shape.dimensions+isArray> location, float bias, constexpr vector<int, Shape.planeDimensions> offset) { @@ -930,7 +945,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> } } - [__readNone] [ForceInline] + [__readNone] + [ForceInline] [require(glsl_hlsl_spirv, texture_shadowlod)] float SampleCmp(SamplerComparisonState s, vector<float, Shape.dimensions+isArray> location, float compareValue) { @@ -960,7 +976,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> } } - [__readNone] [ForceInline] + [__readNone] + [ForceInline] [require(glsl_hlsl_spirv, texture_shadowlod)] float SampleCmpLevelZero(SamplerComparisonState s, vector<float, Shape.dimensions+isArray> location, float compareValue) { @@ -987,7 +1004,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> } } - [__readNone] [ForceInline] + [__readNone] + [ForceInline] [require(glsl_hlsl_spirv, texture_shadowlod)] float SampleCmp(SamplerComparisonState s, vector<float, Shape.dimensions+isArray> location, float compareValue, constexpr vector<int, Shape.planeDimensions> offset) { @@ -1013,7 +1031,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> } } - [__readNone] [ForceInline] + [__readNone] + [ForceInline] [require(glsl_hlsl_spirv, texture_shadowlod)] float SampleCmpLevelZero(SamplerComparisonState s, vector<float, Shape.dimensions+isArray> location, float compareValue, constexpr vector<int, Shape.planeDimensions> offset) { @@ -1041,6 +1060,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(SamplerState s, vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY) { @@ -1062,6 +1082,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> } [__readNone] + [ForceInline] [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(SamplerState s, vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset) { @@ -1083,8 +1104,9 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format> } } - __glsl_extension(GL_ARB_sparse_texture_clamp) [__readNone] + [ForceInline] + __glsl_extension(GL_ARB_sparse_texture_clamp) [require(cpp_glsl_hlsl_spirv, texture_sm_4_1)] T SampleGrad(SamplerState s, vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset, float lodClamp) { @@ -2813,7 +2835,6 @@ ${{{{ [__requiresNVAPI] [ForceInline] __cuda_sm_version(2.0) - [ForceInline] [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_float1)] void InterlockedAddF32(uint byteAddress, float valueToAdd) { @@ -2834,7 +2855,6 @@ ${{{{ // Int64 Add [ForceInline] __cuda_sm_version(6.0) - [ForceInline] [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda6_int64)] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd, out int64_t originalValue) { @@ -2858,7 +2878,6 @@ ${{{{ [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda6_int64)] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd); - [ForceInline] __specialized_for_target(hlsl) [ForceInline] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd) @@ -2866,7 +2885,6 @@ ${{{{ __atomicAdd(this, byteAddress, __asuint2(valueToAdd)); } - [ForceInline] __specialized_for_target(glsl) __specialized_for_target(spirv) [ForceInline] @@ -2906,6 +2924,7 @@ ${{{{ uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) + [ForceInline] uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicMax(this, byteAddress, __asuint2(value))); } __specialized_for_target(glsl) @@ -2965,6 +2984,7 @@ ${{{{ uint64_t InterlockedMinU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) + [ForceInline] uint64_t InterlockedMinU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicMin(this, byteAddress, __asuint2(value))); } __specialized_for_target(glsl) @@ -3024,6 +3044,7 @@ ${{{{ uint64_t InterlockedAndU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) + [ForceInline] uint64_t InterlockedAndU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicAnd(this, byteAddress, __asuint2(value))); } __specialized_for_target(glsl) @@ -3063,6 +3084,7 @@ ${{{{ uint64_t InterlockedOrU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) + [ForceInline] uint64_t InterlockedOrU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicOr(this, byteAddress, __asuint2(value))); } __specialized_for_target(glsl) @@ -3102,6 +3124,7 @@ ${{{{ uint64_t InterlockedXorU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) + [ForceInline] uint64_t InterlockedXorU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicXor(this, byteAddress, __asuint2(value))); } __specialized_for_target(glsl) @@ -3140,6 +3163,7 @@ ${{{{ uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value); __specialized_for_target(hlsl) + [ForceInline] uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicExchange(this, byteAddress, __asuint2(value))); } __specialized_for_target(glsl) @@ -3255,6 +3279,7 @@ ${{{{ return; } } + [ForceInline] void InterlockedCompareExchange64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) { |
