summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorSriram Murali <85252063+sriramm-nv@users.noreply.github.com>2024-04-22 19:14:35 -0700
committerGitHub <noreply@github.com>2024-04-22 19:14:35 -0700
commit484c1e618dddf586360c03f90e6c27ece1065acd (patch)
tree90b6b159abdce9bdf48b7545953917e6318aa501 /source
parent22fbca520f73007e2b8625bfe7eb1c77d528f301 (diff)
ForceInline ByteAddressBuffer operations in stdlib (#4003)
* ForceInline ByteAddressBuffer operations in stdlib * fixup
Diffstat (limited to 'source')
-rw-r--r--source/slang/hlsl.meta.slang51
1 files changed, 38 insertions, 13 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index cb01e9e68..16b143529 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -145,6 +145,7 @@ struct ByteAddressBuffer
uint4 Load4(int location, out uint status);
[__readNone]
+ [ForceInline]
T Load<T>(int location)
{
return __byteAddressBufferLoad<T>(this, location);
@@ -325,8 +326,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
__intrinsic_op($(kIROp_CombinedTextureSamplerGetSampler))
SamplerComparisonState __getComparisonSampler();
- [ForceInline]
[__readNone]
+ [ForceInline]
[require(glsl_hlsl_spirv, texture_querylod)]
float CalculateLevelOfDetail(TextureCoord location)
{
@@ -346,8 +347,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}
}
- [ForceInline]
[__readNone]
+ [ForceInline]
[require(glsl_hlsl_spirv, texture_querylod)]
float CalculateLevelOfDetailUnclamped(TextureCoord location)
{
@@ -368,6 +369,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}
[__readNone]
+ [ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T Sample(vector<float, Shape.dimensions+isArray> location)
{
@@ -417,6 +419,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}
[__readNone]
+ [ForceInline]
__glsl_extension(GL_ARB_sparse_texture_clamp)
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T Sample(vector<float, Shape.dimensions+isArray> location, vector<int, Shape.planeDimensions> offset, float clamp)
@@ -439,6 +442,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}
[__readNone]
+ [ForceInline]
__target_intrinsic(hlsl)
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T Sample(vector<float, Shape.dimensions+isArray> location, vector<int, Shape.planeDimensions> offset, float clamp, out uint status)
@@ -448,6 +452,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}
[__readNone]
+ [ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T SampleBias(vector<float, Shape.dimensions+isArray> location, float bias)
{
@@ -469,6 +474,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}
[__readNone]
+ [ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T SampleBias(vector<float, Shape.dimensions+isArray> location, float bias, constexpr vector<int, Shape.planeDimensions> offset)
{
@@ -599,6 +605,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}
[__readNone]
+ [ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
T SampleGrad(vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY)
{
@@ -620,6 +627,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}
[__readNone]
+ [ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
T SampleGrad(vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset)
{
@@ -639,8 +647,9 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
}
}
- __glsl_extension(GL_ARB_sparse_texture_clamp)
[__readNone]
+ [ForceInline]
+ __glsl_extension(GL_ARB_sparse_texture_clamp)
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
T SampleGrad(vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset, float lodClamp)
{
@@ -785,6 +794,7 @@ __generic<T:IFloat, Shape: __ITextureShape, let isArray:int, let isMS:int, let s
extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
{
[__readNone]
+ [ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location)
{
@@ -837,6 +847,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
[__readNone]
+ [ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset)
{
@@ -858,6 +869,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
[__readNone]
+ [ForceInline]
__glsl_extension(GL_ARB_sparse_texture_clamp)
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset, float clamp)
@@ -880,8 +892,9 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
}
- [__readNone]
__target_intrinsic(hlsl)
+ [__readNone]
+ [ForceInline]
T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset, float clamp, out uint status)
{
status = 0;
@@ -889,6 +902,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
[__readNone]
+ [ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T SampleBias(SamplerState s, vector<float, Shape.dimensions+isArray> location, float bias)
{
@@ -910,6 +924,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
[__readNone]
+ [ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
T SampleBias(SamplerState s, vector<float, Shape.dimensions+isArray> location, float bias, constexpr vector<int, Shape.planeDimensions> offset)
{
@@ -930,7 +945,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
}
- [__readNone] [ForceInline]
+ [__readNone]
+ [ForceInline]
[require(glsl_hlsl_spirv, texture_shadowlod)]
float SampleCmp(SamplerComparisonState s, vector<float, Shape.dimensions+isArray> location, float compareValue)
{
@@ -960,7 +976,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
}
- [__readNone] [ForceInline]
+ [__readNone]
+ [ForceInline]
[require(glsl_hlsl_spirv, texture_shadowlod)]
float SampleCmpLevelZero(SamplerComparisonState s, vector<float, Shape.dimensions+isArray> location, float compareValue)
{
@@ -987,7 +1004,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
}
- [__readNone] [ForceInline]
+ [__readNone]
+ [ForceInline]
[require(glsl_hlsl_spirv, texture_shadowlod)]
float SampleCmp(SamplerComparisonState s, vector<float, Shape.dimensions+isArray> location, float compareValue, constexpr vector<int, Shape.planeDimensions> offset)
{
@@ -1013,7 +1031,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
}
- [__readNone] [ForceInline]
+ [__readNone]
+ [ForceInline]
[require(glsl_hlsl_spirv, texture_shadowlod)]
float SampleCmpLevelZero(SamplerComparisonState s, vector<float, Shape.dimensions+isArray> location, float compareValue, constexpr vector<int, Shape.planeDimensions> offset)
{
@@ -1041,6 +1060,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
[__readNone]
+ [ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
T SampleGrad(SamplerState s, vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY)
{
@@ -1062,6 +1082,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
[__readNone]
+ [ForceInline]
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
T SampleGrad(SamplerState s, vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset)
{
@@ -1083,8 +1104,9 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
}
}
- __glsl_extension(GL_ARB_sparse_texture_clamp)
[__readNone]
+ [ForceInline]
+ __glsl_extension(GL_ARB_sparse_texture_clamp)
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
T SampleGrad(SamplerState s, vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset, float lodClamp)
{
@@ -2813,7 +2835,6 @@ ${{{{
[__requiresNVAPI]
[ForceInline]
__cuda_sm_version(2.0)
- [ForceInline]
[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_float1)]
void InterlockedAddF32(uint byteAddress, float valueToAdd)
{
@@ -2834,7 +2855,6 @@ ${{{{
// Int64 Add
[ForceInline]
__cuda_sm_version(6.0)
- [ForceInline]
[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda6_int64)]
void InterlockedAddI64(uint byteAddress, int64_t valueToAdd, out int64_t originalValue)
{
@@ -2858,7 +2878,6 @@ ${{{{
[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda6_int64)]
void InterlockedAddI64(uint byteAddress, int64_t valueToAdd);
- [ForceInline]
__specialized_for_target(hlsl)
[ForceInline]
void InterlockedAddI64(uint byteAddress, int64_t valueToAdd)
@@ -2866,7 +2885,6 @@ ${{{{
__atomicAdd(this, byteAddress, __asuint2(valueToAdd));
}
- [ForceInline]
__specialized_for_target(glsl)
__specialized_for_target(spirv)
[ForceInline]
@@ -2906,6 +2924,7 @@ ${{{{
uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value);
__specialized_for_target(hlsl)
+ [ForceInline]
uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicMax(this, byteAddress, __asuint2(value))); }
__specialized_for_target(glsl)
@@ -2965,6 +2984,7 @@ ${{{{
uint64_t InterlockedMinU64(uint byteAddress, uint64_t value);
__specialized_for_target(hlsl)
+ [ForceInline]
uint64_t InterlockedMinU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicMin(this, byteAddress, __asuint2(value))); }
__specialized_for_target(glsl)
@@ -3024,6 +3044,7 @@ ${{{{
uint64_t InterlockedAndU64(uint byteAddress, uint64_t value);
__specialized_for_target(hlsl)
+ [ForceInline]
uint64_t InterlockedAndU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicAnd(this, byteAddress, __asuint2(value))); }
__specialized_for_target(glsl)
@@ -3063,6 +3084,7 @@ ${{{{
uint64_t InterlockedOrU64(uint byteAddress, uint64_t value);
__specialized_for_target(hlsl)
+ [ForceInline]
uint64_t InterlockedOrU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicOr(this, byteAddress, __asuint2(value))); }
__specialized_for_target(glsl)
@@ -3102,6 +3124,7 @@ ${{{{
uint64_t InterlockedXorU64(uint byteAddress, uint64_t value);
__specialized_for_target(hlsl)
+ [ForceInline]
uint64_t InterlockedXorU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicXor(this, byteAddress, __asuint2(value))); }
__specialized_for_target(glsl)
@@ -3140,6 +3163,7 @@ ${{{{
uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value);
__specialized_for_target(hlsl)
+ [ForceInline]
uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicExchange(this, byteAddress, __asuint2(value))); }
__specialized_for_target(glsl)
@@ -3255,6 +3279,7 @@ ${{{{
return;
}
}
+
[ForceInline]
void InterlockedCompareExchange64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue)
{