summaryrefslogtreecommitdiffstats
path: root/source/slang
diff options
context:
space:
mode:
Diffstat (limited to 'source/slang')
-rw-r--r--source/slang/hlsl.meta.slang58
1 files changed, 57 insertions, 1 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 329a73a33..46851269f 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -56,9 +56,12 @@ struct ByteAddressBuffer
__target_intrinsic(glsl, "atomicAdd($0, $1)")
__glsl_version(430)
__glsl_extension(GL_EXT_shader_atomic_float)
-//__glsl_extension(GL_EXT_gpu_shader5)
float __atomicAdd(__ref float value, float amount);
+// Helper for hlsl, using nvAPI
+__target_intrinsic(hlsl, "NvInterlockedAddUint64($0, $1, $2)")
+uint2 __atomicAdd(RWByteAddressBuffer buf, uint offset, uint2);
+
// Int versions require glsl 4.30
// https://www.khronos.org/registry/OpenGL-Refpages/gl4/html/atomicAdd.xhtml
@@ -70,6 +73,10 @@ __target_intrinsic(glsl, "atomicAdd($0, $1)")
__glsl_version(430)
uint __atomicAdd(__ref uint value, uint amount);
+__target_intrinsic(glsl, "atomicAdd($0, $1)")
+__glsl_version(430)
+__glsl_extension(GL_EXT_shader_atomic_int64)
+int64_t __atomicAdd(__ref int64_t value, int64_t amount);
__intrinsic_op($(kIROp_ByteAddressBufferLoad))
T __byteAddressBufferLoad<T>(ByteAddressBuffer buffer, int offset);
@@ -192,6 +199,9 @@ ${{{{
// NvAPI support on DX
// NOTE! To use this feature on HLSL, the shader needs to include 'nvHLSLExtns.h' from the NvAPI SDK
//
+
+ // Fp32
+
__target_intrinsic(hlsl, "($3 = NvInterlockedAddFp32($0, $1, $2))")
__target_intrinsic(cuda, "(*$3 = atomicAdd((float*)$0._getPtrAt($1), $2))")
void InterlockedAddFp32(uint byteAddress, float valueToAdd, out float originalValue);
@@ -203,6 +213,8 @@ ${{{{
originalValue = __atomicAdd(buf[byteAddress / 4], valueToAdd);
}
+ // Without returning original value
+
__target_intrinsic(hlsl, "(NvInterlockedAddFp32($0, $1, $2))")
__target_intrinsic(cuda, "atomicAdd((float*)$0._getPtrAt($1), $2)")
void InterlockedAddFp32(uint byteAddress, float valueToAdd);
@@ -214,6 +226,50 @@ ${{{{
__atomicAdd(buf[byteAddress / 4], valueToAdd);
}
+ // Int64
+ __cuda_sm_version(6.0)
+ __target_intrinsic(cuda, "(*$3 = atomicAdd((uint64_t*)$0._getPtrAt($1), $2))")
+ void InterlockedAddI64(uint byteAddress, int64_t valueToAdd, out int64_t originalValue);
+
+ __specialized_for_target(hlsl)
+ void InterlockedAddI64(uint byteAddress, int64_t inValueToAdd, out int64_t outOriginalValue)
+ {
+ uint2 valueToAdd;
+ valueToAdd.x = uint(inValueToAdd);
+ valueToAdd.y = uint(uint64_t(inValueToAdd) >> 32);
+
+ const uint2 originalValue = __atomicAdd(this, byteAddress, valueToAdd);
+ outOriginalValue = (int64_t(originalValue.y) << 32) | originalValue.x;
+ }
+
+ __specialized_for_target(glsl)
+ void InterlockedAddI64(uint byteAddress, int64_t valueToAdd, out int64_t originalValue)
+ {
+ RWStructuredBuffer<int64_t> buf = __getEquivalentStructuredBuffer<int64_t>(this);
+ originalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd);
+ }
+
+ // Without returning original value
+ __cuda_sm_version(6.0)
+ __target_intrinsic(cuda, "atomicAdd((uint64_t*)$0._getPtrAt($1), $2)")
+ void InterlockedAddI64(uint byteAddress, int64_t valueToAdd);
+
+ __specialized_for_target(hlsl)
+ void InterlockedAddI64(uint byteAddress, int64_t inValueToAdd)
+ {
+ uint2 valueToAdd;
+ valueToAdd.x = uint(inValueToAdd);
+ valueToAdd.y = uint(uint64_t(inValueToAdd) >> 32);
+ __atomicAdd(this, byteAddress, valueToAdd);
+ }
+
+ __specialized_for_target(glsl)
+ void InterlockedAddI64(uint byteAddress, int64_t valueToAdd)
+ {
+ RWStructuredBuffer<int64_t> buf = __getEquivalentStructuredBuffer<int64_t>(this);
+ __atomicAdd(buf[byteAddress / 8], valueToAdd);
+ }
+
${{{{
}
}}}}