summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Kwak <82421531+jkwak-work@users.noreply.github.com>2024-04-15 19:47:23 -0700
committerGitHub <noreply@github.com>2024-04-15 19:47:23 -0700
commit030d7f45726187b5b23a3cfb9743166aa60fae30 (patch)
treea5618abd8d30034458778543db4122d2df9c7e1b
parent54745ac9aff75c579f886980dd3397c79d0f3e00 (diff)
Support 64bit HLSL atomic functions (#3957)
Resolves #3951 This adds a few atomic functions for SM6.6. The spec can be found from here: https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_Int64_and_Float_Atomics.html The new functions are: void InterlockedAdd(inout XXX dest, in int64_t value, out int64_t original_value); void InterlockedAdd(inout XXX dest, in uint64_t value, out uint64_t original_value); void InterlockedAnd(inout XXX dest, in uint64_t value, out uint64_t original_value); void InterlockedOr(inout XXX dest, in uint64_t value, out uint64_t original_value); void InterlockedXor(inout XXX dest, in uint64_t value, out uint64_t original_value); void InterlockedMin(inout XXX dest, in int64_t value, out int64_t original_value); void InterlockedMin(inout XXX dest, in uint64_t value, out uint64_t original_value); void InterlockedMax(inout XXX dest, in int64_t value, out int64_t original_value); void InterlockedMax(inout XXX dest, in uint64_t value, out uint64_t original_value); void InterlockedExchange(inout XXX dest, in float value, out float original_value); void InterlockedExchange(inout XXX dest, in int64_t value, out int64_t original_value); void InterlockedExchange(inout XXX dest, in uint64_t value, out uint64_t original_value); void InterlockedCompareStore(inout XXX dest, in int64_t compare_value, in int64_t value); void InterlockedCompareStore(inout XXX dest, in uint64_t compare_value, in uint64_t value); void InterlockedCompareStoreFloatBitwise(inout XXX dest, in float compare_value, in float value); void InterlockedCompareExchange(inout XXX dest, in int64_t compare_value, in int64_t value, out int64_t original_value); void InterlockedCompareExchange(inout XXX dest, in uint64_t compare_value, in uint64_t value, out uint64_t original_value); void InterlockedCompareExchangeFloatBitwise(inout XXX dest, in float compare_value, in float value, out float original_value); void RWByteAddressBuffer::InterlockedAnd64(in uint dest_offset, in uint64_t value, out uint64_t original_value); void RWByteAddressBuffer::InterlockedOr64(in uint dest_offset, in uint64_t value, out uint64_t original_value); void RWByteAddressBuffer::InterlockedXor64(in uint dest_offset, in uint64_t value, out uint64_t original_value); void RWByteAddressBuffer::InterlockedMin64(in uint dest_offset, in int64_t value, out int64_t original_value); void RWByteAddressBuffer::InterlockedMin64(in uint dest_offset, in uint64_t value, out uint64_t original_value); void RWByteAddressBuffer::InterlockedMax64(in uint dest_offset, in int64_t value, out int64_t original_value); void RWByteAddressBuffer::InterlockedMax64(in uint dest_offset, in uint64_t value, out uint64_t original_value); void RWByteAddressBuffer::InterlockedExchangeFloat(in uint dest_offset, in float value, out float original_value); void RWByteAddressBuffer::InterlockedExchange64(in uint dest_offset, in int64_t value, out int64_t original_value); void RWByteAddressBuffer::InterlockedExchange64(in uint dest_offset, in uint64_t value, out uint64_t original_value); void RWByteAddressBuffer::InterlockedCompareStore64(in uint dest_offset, in int64_t compare_value, in int64_t value); void RWByteAddressBuffer::InterlockedCompareStore64(in uint dest_offset, in uint64_t compare_value, in uint64_t value); void RWByteAddressBuffer::InterlockedCompareStoreFloatBitwise(in uint dest_offset, in float compare_value, in float value); void RWByteAddressBuffer::InterlockedCompareExchangeFloatBitwise(in uint dest_offset, in float compare_value, in float value, out float original_value);
-rw-r--r--source/slang/hlsl.meta.slang588
-rw-r--r--tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang336
2 files changed, 908 insertions, 16 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index ef5c3ae5d..3d712559d 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -2707,6 +2707,46 @@ ${{{{
return __atomicMax(buf[byteAddress / 8], value);
}
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedMax64(uint byteAddress, int64_t value)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedMax64";
+ }
+ }
+
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedMax64(uint byteAddress, int64_t value, out int64_t outOriginalValue)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedMax64";
+ }
+ }
+
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedMax64(uint byteAddress, uint64_t value)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedMax64";
+ }
+ }
+
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedMax64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedMax64";
+ }
+ }
+
// Min
__cuda_sm_version(3.5)
@@ -2724,6 +2764,46 @@ ${{{{
return __atomicMin(buf[byteAddress / 8], value);
}
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedMin64(uint byteAddress, int64_t value)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedMin64";
+ }
+ }
+
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedMin64(uint byteAddress, int64_t value, out int64_t outOriginalValue)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedMin64";
+ }
+ }
+
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedMin64(uint byteAddress, uint64_t value)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedMin64";
+ }
+ }
+
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedMin64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedMin64";
+ }
+ }
+
// And
__target_intrinsic(cuda, "atomicAnd($0._getPtrAt<uint64_t>($1), $2)")
@@ -2740,6 +2820,26 @@ ${{{{
return __atomicAnd(buf[byteAddress / 8], value);
}
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedAnd64(uint byteAddress, uint64_t value)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedAnd64";
+ }
+ }
+
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedAnd64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedAnd64";
+ }
+ }
+
// Or
__target_intrinsic(cuda, "atomicOr($0._getPtrAt<uint64_t>($1), $2)")
@@ -2756,6 +2856,26 @@ ${{{{
return __atomicOr(buf[byteAddress / 8], value);
}
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedOr64(uint byteAddress, uint64_t value)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedOr64";
+ }
+ }
+
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedOr64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedOr64";
+ }
+ }
+
// Xor
__target_intrinsic(cuda, "atomicXor($0._getPtrAt<uint64_t>($1), $2)")
@@ -2772,6 +2892,26 @@ ${{{{
return __atomicXor(buf[byteAddress / 8], value);
}
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedXor64(uint byteAddress, uint64_t value)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedXor64";
+ }
+ }
+
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedXor64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedXor64";
+ }
+ }
+
// Exchange
__target_intrinsic(cuda, "atomicExch($0._getPtrAt<uint64_t>($1), $2)")
@@ -2788,31 +2928,89 @@ ${{{{
return __atomicExchange(buf[byteAddress / 8], value);
}
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedExchangeFloat(uint byteAddress, float value, out float outOriginalValue)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedExchangeFloat";
+ }
+ }
+
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedExchange64(uint byteAddress, int64_t value, out int64_t outOriginalValue)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedExchange64";
+ }
+ }
+
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedExchange64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedExchange64";
+ }
+ }
+
// SM6.6 6 64bit atomics.
- __specialized_for_target(hlsl)
- void InterlockedAdd64(uint byteAddress, int64_t valueToAdd, out int64_t outOriginalValue)
+ [ForceInline]
+ void InterlockedAdd64(uint byteAddress, int64_t valueToAdd)
{
- __atomicAdd(this, byteAddress, valueToAdd, outOriginalValue);
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedAdd64";
+ case glsl:
+ case spirv:
+ let buf = __getEquivalentStructuredBuffer<int64_t>(this);
+ __atomicAdd(buf[byteAddress / 8], valueToAdd);
+ }
}
- __specialized_for_target(glsl)
- __specialized_for_target(spirv)
- void InterlockedAdd64(uint byteAddress, int64_t valueToAdd, out int64_t originalValue)
+
+ [ForceInline]
+ void InterlockedAdd64(uint byteAddress, int64_t valueToAdd, out int64_t outOriginalValue)
{
- let buf = __getEquivalentStructuredBuffer<int64_t>(this);
- originalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd);
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedAdd64";
+ case glsl:
+ case spirv:
+ let buf = __getEquivalentStructuredBuffer<int64_t>(this);
+ outOriginalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd);
+ }
}
- __specialized_for_target(hlsl)
- void InterlockedAdd64(uint byteAddress, uint64_t valueToAdd, out uint64_t outOriginalValue)
+
+ [ForceInline]
+ void InterlockedAdd64(uint byteAddress, uint64_t valueToAdd)
{
- __atomicAdd(this, byteAddress, valueToAdd, outOriginalValue);
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedAdd64";
+ case glsl:
+ case spirv:
+ let buf = __getEquivalentStructuredBuffer<uint64_t>(this);
+ __atomicAdd(buf[byteAddress / 8], valueToAdd);
+ }
}
- __specialized_for_target(glsl)
- __specialized_for_target(spirv)
- void InterlockedAdd64(uint byteAddress, uint64_t valueToAdd, out uint64_t originalValue)
+
+ [ForceInline]
+ void InterlockedAdd64(uint byteAddress, uint64_t valueToAdd, out uint64_t outOriginalValue)
{
- let buf = __getEquivalentStructuredBuffer<uint64_t>(this);
- originalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd);
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedAdd64";
+ case glsl:
+ case spirv:
+ let buf = __getEquivalentStructuredBuffer<uint64_t>(this);
+ outOriginalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd);
+ }
}
+
__specialized_for_target(hlsl)
void InterlockedCompareExchange64(uint byteAddress, int64_t compareValue, int64_t value, out int64_t outOriginalValue)
{
@@ -2837,6 +3035,66 @@ ${{{{
let buf = __getEquivalentStructuredBuffer<uint64_t>(this);
outOriginalValue = __cas(buf[byteAddress / 8], compareValue, value);
}
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedCompareStoreFloatBitwise(uint byteAddress, float compareValue, float value)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedCompareStoreFloatBitwise";
+ }
+ }
+
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedCompareExchangeFloatBitwise(uint byteAddress, float compareValue, float value, out float outOriginalValue)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedCompareExchangeFloatBitwise";
+ }
+ }
+
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedCompareStore64(uint byteAddress, int64_t compareValue, int64_t value)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedCompareStore64";
+ }
+ }
+
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedCompareExchange64(uint byteAddress, int64_t compareValue, int64_t value, out int64_t outOriginalValue)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedCompareExchange64";
+ }
+ }
+
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedCompareStore64(uint byteAddress, uint64_t compareValue, uint64_t value)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedCompareStore64";
+ }
+ }
+
+ [require(hlsl)]
+ [ForceInline]
+ void InterlockedCompareExchange64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue)
+ {
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm ".InterlockedCompareExchange64";
+ }
+ }
+
${{{{
} // endif (type == RWByteAddressBuffer)
}}}}
@@ -5200,6 +5458,42 @@ void InterlockedAdd(__ref uint dest, uint value, out uint original_value)
}
}
+[ForceInline]
+void InterlockedAdd(__ref int64_t dest, int64_t value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedAdd";
+ }
+}
+
+[ForceInline]
+void InterlockedAdd(__ref int64_t dest, int64_t value, out int64_t original_value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedAdd";
+ }
+}
+
+[ForceInline]
+void InterlockedAdd(__ref uint64_t dest, uint64_t value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedAdd";
+ }
+}
+
+[ForceInline]
+void InterlockedAdd(__ref uint64_t dest, uint64_t value, out uint64_t original_value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedAdd";
+ }
+}
+
__glsl_version(430)
void InterlockedAnd(__ref int dest, int value)
{
@@ -5266,6 +5560,24 @@ void InterlockedAnd(__ref uint dest, uint value, out uint original_value)
}
}
+[ForceInline]
+void InterlockedAnd(__ref uint64_t dest, uint64_t value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedAnd";
+ }
+}
+
+[ForceInline]
+void InterlockedAnd(__ref uint64_t dest, uint64_t value, out uint64_t original_value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedAnd";
+ }
+}
+
__glsl_version(430)
void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value)
{
@@ -5300,6 +5612,60 @@ void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value,
}
}
+[ForceInline]
+void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedCompareExchangeFloatBitwise";
+ }
+}
+
+[ForceInline]
+void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value, out float original_value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedCompareExchangeFloatBitwise";
+ }
+}
+
+[ForceInline]
+void InterlockedCompareExchange(__ref int64_t dest, int64_t compare_value, int64_t value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedCompareExchange";
+ }
+}
+
+[ForceInline]
+void InterlockedCompareExchange(__ref int64_t dest, int64_t compare_value, int64_t value, out int64_t original_value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedCompareExchange";
+ }
+}
+
+[ForceInline]
+void InterlockedCompareExchange(__ref uint64_t dest, uint64_t compare_value, uint64_t value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedCompareExchange";
+ }
+}
+
+[ForceInline]
+void InterlockedCompareExchange(__ref uint64_t dest, uint64_t compare_value, uint64_t value, out uint64_t original_value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedCompareExchange";
+ }
+}
+
__glsl_version(430)
void InterlockedCompareStore(__ref int dest, int compare_value, int value)
{
@@ -5332,6 +5698,33 @@ void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value)
}
}
+[ForceInline]
+void InterlockedCompareStoreFloatBitwise(__ref float dest, float compare_value, float value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedCompareStoreFloatBitwise";
+ }
+}
+
+[ForceInline]
+void InterlockedCompareStore(__ref int64_t dest, int64_t compare_value, int64_t value);
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedCompareStore";
+ }
+}
+
+[ForceInline]
+void InterlockedCompareStore(__ref uint64_t dest, uint64_t compare_value, uint64_t value);
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedCompareStore";
+ }
+}
+
__glsl_version(430)
void InterlockedExchange(__ref int dest, int value, out int original_value)
{
@@ -5366,6 +5759,60 @@ void InterlockedExchange(__ref uint dest, uint value, out uint original_value)
}
}
+[ForceInline]
+void InterlockedExchange(__ref float dest, float value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedExchange";
+ }
+}
+
+[ForceInline]
+void InterlockedExchange(__ref float dest, float value, out float original_value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedExchange";
+ }
+}
+
+[ForceInline]
+void InterlockedExchange(__ref int64_t dest, int64_t value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedExchange";
+ }
+}
+
+[ForceInline]
+void InterlockedExchange(__ref int64_t dest, int64_t value, out int64_t original_value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedExchange";
+ }
+}
+
+[ForceInline]
+void InterlockedExchange(__ref uint64_t dest, uint64_t value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedExchange";
+ }
+}
+
+[ForceInline]
+void InterlockedExchange(__ref uint64_t dest, uint64_t value, out uint64_t original_value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedExchange";
+ }
+}
+
__glsl_version(430)
void InterlockedMax(__ref int dest, int value)
{
@@ -5432,6 +5879,42 @@ void InterlockedMax(__ref uint dest, uint value, out uint original_value)
}
}
+[ForceInline]
+void InterlockedMax(__ref int64_t dest, int64_t value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedMax";
+ }
+}
+
+[ForceInline]
+void InterlockedMax(__ref int64_t dest, int64_t value, out int64_t original_value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedMax";
+ }
+}
+
+[ForceInline]
+void InterlockedMax(__ref uint64_t dest, uint64_t value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedMax";
+ }
+}
+
+[ForceInline]
+void InterlockedMax(__ref uint64_t dest, uint64_t value, out uint64_t original_value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedMax";
+ }
+}
+
__glsl_version(430)
void InterlockedMin(__ref int dest, int value)
{
@@ -5498,6 +5981,42 @@ void InterlockedMin(__ref uint dest, uint value, out uint original_value)
}
}
+[ForceInline]
+void InterlockedMin(__ref int64_t dest, int64_t value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedMin";
+ }
+}
+
+[ForceInline]
+void InterlockedMin(__ref int64_t dest, int64_t value, out int64_t original_value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedMin";
+ }
+}
+
+[ForceInline]
+void InterlockedMin(__ref uint64_t dest, uint64_t value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedMin";
+ }
+}
+
+[ForceInline]
+void InterlockedMin(__ref uint64_t dest, uint64_t value, out uint64_t original_value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedMin";
+ }
+}
+
__glsl_version(430)
void InterlockedOr(__ref int dest, int value)
{
@@ -5564,6 +6083,24 @@ void InterlockedOr(__ref uint dest, uint value, out uint original_value)
}
}
+[ForceInline]
+void InterlockedOr(__ref uint64_t dest, uint64_t value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedOr";
+ }
+}
+
+[ForceInline]
+void InterlockedOr(__ref uint64_t dest, uint64_t value, out uint64_t original_value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedOr";
+ }
+}
+
__glsl_version(430)
void InterlockedXor(__ref int dest, int value)
{
@@ -5630,6 +6167,25 @@ void InterlockedXor(__ref uint dest, uint value, out uint original_value)
}
}
+[ForceInline]
+void InterlockedXor(__ref uint64_t dest, uint64_t value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedXor";
+ }
+}
+
+[ForceInline]
+void InterlockedXor(__ref uint64_t dest, uint64_t value, out uint64_t original_value)
+{
+ __target_switch
+ {
+ case hlsl: __intrinsic_asm "InterlockedXor";
+ }
+}
+
+
// Is floating-point value finite?
__generic<T : __BuiltinFloatingPointType>
diff --git a/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang b/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang
new file mode 100644
index 000000000..aa05f9750
--- /dev/null
+++ b/tests/hlsl-intrinsic/atomic/atomic-intrinsics-64bit.slang
@@ -0,0 +1,336 @@
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=DX12):-slang -compute -dx12 -profile cs_6_6 -use-dxil -shaderobj -output-using-type
+
+// This is to support 64-bit `Interlocked*` functions defined for HLSL SM6.6
+// https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_Int64_and_Float_Atomics.html
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):name=f32Buffer
+RWStructuredBuffer<float> f32Buffer;
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=8):name=u64Buffer
+RWStructuredBuffer<uint64_t> u64Buffer;
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=8):name=i64Buffer
+RWStructuredBuffer<int64_t> i64Buffer;
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]):name=fBuf
+RWByteAddressBuffer fBuf;
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]):name=uBuf
+RWByteAddressBuffer uBuf;
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]):name=iBuf
+RWByteAddressBuffer iBuf;
+
+groupshared float f32Shared[4] = { 0.f, 0.f, 0.f, 0.f };
+groupshared uint64_t u64Shared[4] = { 0, 0, 0, 0 };
+groupshared int64_t i64Shared[4] = { 0, 0, 0, 0 };
+groupshared uint64_t indexAlloc = 0;
+
+//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+
+[numthreads(4, 1, 1)]
+void computeMain(uint groupIndex : SV_GroupIndex, int3 dispatchThreadID: SV_DispatchThreadID)
+{
+ int idx = dispatchThreadID.x;
+ bool result = true;
+
+ uint64_t u64Value[9];
+ int64_t i64Value[9];
+ float f32Value[9];
+
+ // Add
+ InterlockedAdd(u64Shared[idx], uint64_t(1));
+ InterlockedAdd(i64Shared[idx], int64_t(1));
+ InterlockedAdd(u64Buffer[idx], uint64_t(1));
+ InterlockedAdd(i64Buffer[idx], int64_t(1));
+ uBuf.InterlockedAdd64(idx * 8, uint64_t(1));
+ iBuf.InterlockedAdd64(idx * 8, int64_t(1));
+
+ result = result
+ && (u64Shared[idx] == 1)
+ && (i64Shared[idx] == 1)
+ && (u64Buffer[idx] == 1)
+ && (i64Buffer[idx] == 1)
+ && (uBuf.Load<uint64_t>(idx * 8) == 1)
+ && (iBuf.Load< int64_t>(idx * 8) == 1)
+ ;
+
+ // Add - original_value
+ InterlockedAdd(u64Shared[idx], uint64_t(1), u64Value[0]);
+ InterlockedAdd(i64Shared[idx], int64_t(1), i64Value[1]);
+ InterlockedAdd(u64Buffer[idx], uint64_t(1), u64Value[2]);
+ InterlockedAdd(i64Buffer[idx], int64_t(1), i64Value[3]);
+ uBuf.InterlockedAdd64(idx * 8, uint64_t(1), u64Value[4]);
+ iBuf.InterlockedAdd64(idx * 8, int64_t(1), i64Value[5]);
+
+ result = result
+ && (u64Value[0] == 1)
+ && (i64Value[1] == 1)
+ && (u64Value[2] == 1)
+ && (i64Value[3] == 1)
+ && (u64Value[4] == 1)
+ && (i64Value[5] == 1)
+ && (u64Shared[idx] == 2)
+ && (i64Shared[idx] == 2)
+ && (u64Buffer[idx] == 2)
+ && (i64Buffer[idx] == 2)
+ && (uBuf.Load<uint64_t>(idx * 8) == 2)
+ && (iBuf.Load< int64_t>(idx * 8) == 2)
+ ;
+
+ // Bitwise-And
+ InterlockedAnd(u64Shared[idx], uint64_t(3));
+ InterlockedAnd(u64Buffer[idx], uint64_t(3));
+ uBuf.InterlockedAnd64(idx * 8, uint64_t(3));
+
+ result = result
+ && (u64Shared[idx] == 2)
+ && (u64Buffer[idx] == 2)
+ && (uBuf.Load<uint64_t>(idx * 8) == 2)
+ ;
+
+ // And - original_value
+ InterlockedAnd(u64Shared[idx], uint64_t(1), u64Value[0]);
+ InterlockedAnd(u64Buffer[idx], uint64_t(1), u64Value[1]);
+ uBuf.InterlockedAnd64(idx * 8, uint64_t(1), u64Value[2]);
+
+ result = result
+ && (u64Value[0] == 2)
+ && (u64Value[1] == 2)
+ && (u64Value[2] == 2)
+ && (u64Shared[idx] == 0)
+ && (u64Buffer[idx] == 0)
+ && (uBuf.Load<uint64_t>(idx * 8) == 0)
+ ;
+
+ // Bitwise-Or
+ InterlockedOr(u64Shared[idx], uint64_t(1));
+ InterlockedOr(u64Buffer[idx], uint64_t(1));
+ uBuf.InterlockedOr64(idx * 8, uint64_t(1));
+
+ result = result
+ && (u64Shared[idx] == 1)
+ && (u64Buffer[idx] == 1)
+ && (uBuf.Load<uint64_t>(idx * 8) == 1)
+ ;
+
+ // Or - original_value
+ InterlockedOr(u64Shared[idx], uint64_t(2), u64Value[0]);
+ InterlockedOr(u64Buffer[idx], uint64_t(2), u64Value[1]);
+ uBuf.InterlockedOr64(idx * 8, uint64_t(2), u64Value[2]);
+
+ result = result
+ && (u64Value[0] == 1)
+ && (u64Value[1] == 1)
+ && (u64Value[2] == 1)
+ && (u64Shared[idx] == 3)
+ && (u64Buffer[idx] == 3)
+ && (uBuf.Load<uint64_t>(idx * 8) == 3)
+ ;
+
+ // Bitwise-Xor
+ InterlockedXor(u64Shared[idx], uint64_t(5));
+ InterlockedXor(u64Buffer[idx], uint64_t(5));
+ uBuf.InterlockedXor64(idx * 8, uint64_t(5));
+
+ result = result
+ && (u64Shared[idx] == 6)
+ && (u64Buffer[idx] == 6)
+ && (uBuf.Load<uint64_t>(idx * 8) == 6)
+ ;
+
+ // Xor - original_value
+ InterlockedXor(u64Shared[idx], uint64_t(1), u64Value[0]);
+ InterlockedXor(u64Buffer[idx], uint64_t(1), u64Value[1]);
+ uBuf.InterlockedXor64(idx * 8, uint64_t(1), u64Value[2]);
+
+ result = result
+ && (u64Value[0] == 6)
+ && (u64Value[1] == 6)
+ && (u64Value[2] == 6)
+ && (u64Shared[idx] == 7)
+ && (u64Buffer[idx] == 7)
+ && (uBuf.Load<uint64_t>(idx * 8) == 7)
+ ;
+
+ // Min
+ InterlockedMin(u64Shared[idx], uint64_t(1));
+ InterlockedMin(i64Shared[idx], int64_t(1));
+ InterlockedMin(u64Buffer[idx], uint64_t(1));
+ InterlockedMin(i64Buffer[idx], int64_t(1));
+ uBuf.InterlockedMin64(idx * 8, uint64_t(1));
+ iBuf.InterlockedMin64(idx * 8, int64_t(1));
+
+ result = result
+ && (u64Shared[idx] == 1)
+ && (i64Shared[idx] == 1)
+ && (u64Buffer[idx] == 1)
+ && (i64Buffer[idx] == 1)
+ && (uBuf.Load<uint64_t>(idx * 8) == 1)
+ && (iBuf.Load< int64_t>(idx * 8) == 1)
+ ;
+
+ // Min - original_value
+ InterlockedMin(u64Shared[idx], uint64_t(2), u64Value[0]);
+ InterlockedMin(i64Shared[idx], int64_t(2), i64Value[1]);
+ InterlockedMin(u64Buffer[idx], uint64_t(2), u64Value[2]);
+ InterlockedMin(i64Buffer[idx], int64_t(2), i64Value[3]);
+ uBuf.InterlockedMin64(idx * 8, uint64_t(2), u64Value[4]);
+ iBuf.InterlockedMin64(idx * 8, int64_t(2), i64Value[5]);
+
+ result = result
+ && (u64Value[0] == 1)
+ && (i64Value[1] == 1)
+ && (u64Value[2] == 1)
+ && (i64Value[3] == 1)
+ && (u64Value[4] == 1)
+ && (i64Value[5] == 1)
+ && (u64Shared[idx] == 1)
+ && (i64Shared[idx] == 1)
+ && (u64Buffer[idx] == 1)
+ && (i64Buffer[idx] == 1)
+ && (uBuf.Load<uint64_t>(idx * 8) == 1)
+ && (iBuf.Load< int64_t>(idx * 8) == 1)
+ ;
+
+ // Max
+ InterlockedMax(u64Shared[idx], uint64_t(2));
+ InterlockedMax(i64Shared[idx], int64_t(2));
+ InterlockedMax(u64Buffer[idx], uint64_t(2));
+ InterlockedMax(i64Buffer[idx], int64_t(2));
+ uBuf.InterlockedMax64(idx * 8, uint64_t(2));
+ iBuf.InterlockedMax64(idx * 8, int64_t(2));
+
+ result = result
+ && (u64Shared[idx] == 2)
+ && (i64Shared[idx] == 2)
+ && (u64Buffer[idx] == 2)
+ && (i64Buffer[idx] == 2)
+ && (uBuf.Load<uint64_t>(idx * 8) == 2)
+ && (iBuf.Load< int64_t>(idx * 8) == 2)
+ ;
+
+ // Max - original_value
+ InterlockedMax(u64Shared[idx], uint64_t(0), u64Value[0]);
+ InterlockedMax(i64Shared[idx], int64_t(0), i64Value[1]);
+ InterlockedMax(u64Buffer[idx], uint64_t(0), u64Value[2]);
+ InterlockedMax(i64Buffer[idx], int64_t(0), i64Value[3]);
+ uBuf.InterlockedMax64(idx * 8, uint64_t(0), u64Value[4]);
+ iBuf.InterlockedMax64(idx * 8, int64_t(0), i64Value[5]);
+
+ result = result
+ && (u64Value[0] == 2)
+ && (i64Value[1] == 2)
+ && (u64Value[2] == 2)
+ && (i64Value[3] == 2)
+ && (u64Value[4] == 2)
+ && (i64Value[5] == 2)
+ && (u64Shared[idx] == 2)
+ && (i64Shared[idx] == 2)
+ && (u64Buffer[idx] == 2)
+ && (i64Buffer[idx] == 2)
+ && (uBuf.Load<uint64_t>(idx * 8) == 2)
+ && (iBuf.Load< int64_t>(idx * 8) == 2)
+ ;
+
+ // Exchange
+ InterlockedExchange(f32Shared[idx], float(1), f32Value[0]);
+ InterlockedExchange(u64Shared[idx], uint64_t(1), u64Value[1]);
+ InterlockedExchange(i64Shared[idx], int64_t(1), i64Value[2]);
+ InterlockedExchange(f32Buffer[idx], float(1), f32Value[3]);
+ InterlockedExchange(u64Buffer[idx], uint64_t(1), u64Value[4]);
+ InterlockedExchange(i64Buffer[idx], int64_t(1), i64Value[5]);
+ fBuf.InterlockedExchangeFloat(idx * 8, float(1), f32Value[6]);
+ uBuf.InterlockedExchange64(idx * 8, uint64_t(1), u64Value[7]);
+ iBuf.InterlockedExchange64(idx * 8, int64_t(1), i64Value[8]);
+
+ result = result
+ && (f32Value[0] == 0)
+ && (u64Value[1] == 2)
+ && (i64Value[2] == 2)
+ && (f32Value[3] == 0)
+ && (u64Value[4] == 2)
+ && (i64Value[5] == 2)
+ && (f32Value[6] == 0)
+ && (u64Value[7] == 2)
+ && (i64Value[8] == 2)
+ && (f32Buffer[idx] == 1.f)
+ && (u64Shared[idx] == 1)
+ && (i64Shared[idx] == 1)
+ && (f32Buffer[idx] == 1.f)
+ && (u64Buffer[idx] == 1)
+ && (i64Buffer[idx] == 1)
+ && (fBuf.Load< float>(idx * 8) == 1.f)
+ && (uBuf.Load<uint64_t>(idx * 8) == 1)
+ && (iBuf.Load< int64_t>(idx * 8) == 1)
+ ;
+
+ // CompareStore
+ InterlockedCompareStore(u64Shared[idx], uint64_t(1), uint64_t(0));
+ InterlockedCompareStore(i64Shared[idx], int64_t(1), int64_t(0));
+ InterlockedCompareStore(u64Buffer[idx], uint64_t(1), uint64_t(0));
+ InterlockedCompareStore(i64Buffer[idx], int64_t(1), int64_t(0));
+ uBuf.InterlockedCompareStore64(idx * 8, uint64_t(1), uint64_t(0));
+ iBuf.InterlockedCompareStore64(idx * 8, int64_t(1), int64_t(0));
+
+ result = result
+ && (u64Shared[idx] == 0)
+ && (i64Shared[idx] == 0)
+ && (u64Buffer[idx] == 0)
+ && (i64Buffer[idx] == 0)
+ && (uBuf.Load<uint64_t>(idx * 8) == 0)
+ && (iBuf.Load< int64_t>(idx * 8) == 0)
+ ;
+
+ // CompareStoreFloatBitwise
+ InterlockedCompareStoreFloatBitwise(f32Shared[idx], float(1), float(0));
+ InterlockedCompareStoreFloatBitwise(f32Buffer[idx], float(1), float(0));
+ fBuf.InterlockedCompareStoreFloatBitwise(idx * 8, float(1), float(0));
+
+ result = result
+ && (f32Shared[idx] == float(0))
+ && (f32Buffer[idx] == float(0))
+ && (fBuf.Load<float>(idx * 8) == float(0))
+ ;
+
+ // CompareExchange
+ InterlockedCompareExchange(u64Shared[idx], uint64_t(0), uint64_t(1), u64Value[0]);
+ InterlockedCompareExchange(i64Shared[idx], int64_t(0), int64_t(1), i64Value[1]);
+ InterlockedCompareExchange(u64Buffer[idx], uint64_t(0), uint64_t(1), u64Value[2]);
+ InterlockedCompareExchange(i64Buffer[idx], int64_t(0), int64_t(1), i64Value[3]);
+ uBuf.InterlockedCompareExchange64(idx * 8, uint64_t(0), uint64_t(1), u64Value[4]);
+ iBuf.InterlockedCompareExchange64(idx * 8, int64_t(0), int64_t(1), i64Value[5]);
+
+ result = result
+ && (u64Value[0] == 0)
+ && (i64Value[1] == 0)
+ && (u64Value[2] == 0)
+ && (i64Value[3] == 0)
+ && (u64Value[4] == 0)
+ && (i64Value[5] == 0)
+ && (u64Shared[idx] == 1)
+ && (i64Shared[idx] == 1)
+ && (u64Buffer[idx] == 1)
+ && (i64Buffer[idx] == 1)
+ && (uBuf.Load<uint64_t>(idx * 8) == 1)
+ && (iBuf.Load< int64_t>(idx * 8) == 1)
+ ;
+
+ // CompareExchangeFloatBitwise
+ InterlockedCompareExchangeFloatBitwise(f32Shared[idx], float(0), float(1), f32Value[0]);
+ InterlockedCompareExchangeFloatBitwise(f32Buffer[idx], float(0), float(1), f32Value[1]);
+ fBuf.InterlockedCompareExchangeFloatBitwise(idx * 8, float(0), float(1), f32Value[2]);
+
+ result = result
+ && (f32Value[0] == float(0))
+ && (f32Value[1] == float(0))
+ && (f32Value[2] == float(0))
+ && (f32Shared[idx] == float(1))
+ && (f32Buffer[idx] == float(1))
+ && (fBuf.Load<float>(idx * 8) == float(1))
+ ;
+
+ outputBuffer[idx] = int(result);
+}
+
+// DX12: 1
+// DX12-NEXT: 1
+// DX12-NEXT: 1
+// DX12-NEXT: 1