summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2023-03-30 14:34:54 -0700
committerGitHub <noreply@github.com>2023-03-30 14:34:54 -0700
commitefeda20ec280771348887ae4eb498a8b158c9c0c (patch)
tree9fad826ca62c6aa8ee121725bfdf4d077a236c4f /source
parent917416f6db7056cddff9d2a0e4e9b4117359157d (diff)
Fix stdlib definitions for tensor interlocked methods. (#2761)
Co-authored-by: Yong He <yhe@nvidia.com>
Diffstat (limited to 'source')
-rw-r--r--source/slang/diff.meta.slang105
1 files changed, 53 insertions, 52 deletions
diff --git a/source/slang/diff.meta.slang b/source/slang/diff.meta.slang
index 252b6f5e9..2bdaccee3 100644
--- a/source/slang/diff.meta.slang
+++ b/source/slang/diff.meta.slang
@@ -74,12 +74,12 @@ struct TensorView
__target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3, $4, $5, $6)")
void store(uint i0, uint i1, uint i2, uint i3, uint i4, T val);
- __target_intrinsic(cuda, "atomicAdd($0.data_ptr_at<$TR>($1), $2)")
- T InterlockedAdd(uint index, T val);
+ __target_intrinsic(cuda, "*($3) = atomicAdd($0.data_ptr_at<$T2>($1), $2)")
+ void InterlockedAdd(uint index, T val, out T oldVal);
__generic<let N:int>
- __target_intrinsic(cuda, "atomicAdd($0.data_ptr_at<$TR>($1), $2)")
- T InterlockedAdd(vector<uint, N> index, T val);
+ __target_intrinsic(cuda, "*($3) = atomicAdd($0.data_ptr_at<$T2>($1), $2)")
+ void InterlockedAdd(vector<uint, N> index, T val, out T oldVal);
__target_intrinsic(cuda, "$0.dimensionCount")
[__readNone]
@@ -159,61 +159,55 @@ for (auto atomicIntegerTypeName : kCudaAtomicIntegerTypes)
extension TensorView<$(atomicIntegerTypeName)>
{
typealias __Element = $(atomicIntegerTypeName);
- __target_intrinsic(cuda, "atomicInc($0.data_ptr_at<$TR>($1), $2)")
- __Element InterlockedIncrement(uint index, __Element val);
- __generic<let N:int>
- __target_intrinsic(cuda, "atomicInc($0.data_ptr_at<$TR>($1), $2)")
- __Element InterlockedIncrement(vector<uint, N> index, __Element val);
+ __target_intrinsic(cuda, "*($3) = atomicMin($0.data_ptr_at<$T2>($1), $2)")
+ void InterlockedMin(uint index, __Element val, out __Element oldVal);
- __target_intrinsic(cuda, "atomicMin($0.data_ptr_at<$TR>($1), $2)")
- __Element InterlockedMin(uint index, __Element val);
-
- __generic<let N:int>
- __target_intrinsic(cuda, "atomicMin($0.data_ptr_at<$TR>($1), $2)")
- __Element InterlockedMin(vector<uint, N> index, __Element val);
+ __generic<let N : int>
+ __target_intrinsic(cuda, "*($3) = atomicMin($0.data_ptr_at<$T2>($1), $2)")
+ void InterlockedMin(vector<uint, N> index, __Element val, out __Element oldVal);
- __target_intrinsic(cuda, "atomicMax($0.data_ptr_at<$TR>($1), $2)")
- __Element InterlockedMax<T>(uint index, __Element val);
-
- __generic<let N:int>
- __target_intrinsic(cuda, "atomicMax($0.data_ptr_at<$TR>($1), $2)")
- __Element InterlockedMax(vector<uint, N> index, __Element val);
+ __target_intrinsic(cuda, "*($3) = atomicMax($0.data_ptr_at<$T2>($1), $2)")
+ void InterlockedMax<T>(uint index, __Element val, out __Element oldVal);
- __target_intrinsic(cuda, "atomicAnd($0.data_ptr_at<$TR>($1), $2)")
- __Element InterlockedAnd<T>(uint index, __Element val);
-
- __generic<let N:int>
- __target_intrinsic(cuda, "atomicAnd($0.data_ptr_at<$TR>($1), $2)")
- __Element InterlockedAnd(vector<uint, N> index, __Element val);
+ __generic<let N : int>
+ __target_intrinsic(cuda, "*($3) = atomicMax($0.data_ptr_at<$T2>($1), $2)")
+ void InterlockedMax(vector<uint, N> index, __Element val, out __Element oldVal);
- __target_intrinsic(cuda, "atomicOr($0.data_ptr_at<$TR>($1), $2)")
- __Element InterlockedOr<T>(uint index, __Element val);
-
- __generic<let N:int>
- __target_intrinsic(cuda, "atomicOr($0.data_ptr_at<$TR>($1), $2)")
- __Element InterlockedOr(vector<uint, N> index, __Element val);
+ __target_intrinsic(cuda, "*($3) = atomicAnd($0.data_ptr_at<$T2>($1), $2)")
+ void InterlockedAnd<T>(uint index, __Element val, out __Element oldVal);
- __target_intrinsic(cuda, "atomicXor($0.data_ptr_at<$TR>($1), $2)")
- __Element InterlockedXor<T>(uint index, __Element val);
-
- __generic<let N:int>
- __target_intrinsic(cuda, "atomicXor($0.data_ptr_at<$TR>($1), $2)")
- __Element InterlockedXor(vector<uint, N> index, __Element val);
-
- __target_intrinsic(cuda, "atomicExch($0.data_ptr_at<$TR>($1), $2)")
- __Element InterlockedExchange(uint index, __Element val);
+ __generic<let N : int>
+ __target_intrinsic(cuda, "*($3) = atomicAnd($0.data_ptr_at<$T2>($1), $2)")
+ void InterlockedAnd(vector<uint, N> index, __Element val, out __Element oldVal);
+
+ __target_intrinsic(cuda, "*($3) = atomicOr($0.data_ptr_at<$T2>($1), $2)")
+ void InterlockedOr<T>(uint index, __Element val, out __Element oldVal);
+
+ __generic<let N : int>
+ __target_intrinsic(cuda, "*($3) = atomicOr($0.data_ptr_at<$T2>($1), $2)")
+ void InterlockedOr(vector<uint, N> index, __Element val, out __Element oldVal);
+
+ __target_intrinsic(cuda, "*($3) = atomicXor($0.data_ptr_at<$T2>($1), $2)")
+ void InterlockedXor<T>(uint index, __Element val, out __Element oldVal);
+
+ __generic<let N : int>
+ __target_intrinsic(cuda, "*($3) = atomicXor($0.data_ptr_at<$T2>($1), $2)")
+ void InterlockedXor(vector<uint, N> index, __Element val, out __Element oldVal);
+
+ __target_intrinsic(cuda, "*($3) = atomicExch($0.data_ptr_at<$T2>($1), $2)")
+ void InterlockedExchange(uint index, __Element va, out __Element oldVall);
__generic<let N:int>
- __target_intrinsic(cuda, "atomicExch($0.data_ptr_at<$TR>($1), $2)")
- __Element InterlockedExchange(vector<uint, N> index, __Element val);
+ __target_intrinsic(cuda, "*($3) = atomicExch($0.data_ptr_at<$T2>($1), $2)")
+ void InterlockedExchange(vector<uint, N> index, __Element val, out __Element oldVal);
- __target_intrinsic(cuda, "atomicCAS($0.data_ptr_at<$TR>($1), $2, $3)")
- __Element InterlockedCompareExchange(uint index, __Element compare, __Element val);
+ __target_intrinsic(cuda, "atomicCAS($0.data_ptr_at<$T2>($1), $2, $3)")
+ void InterlockedCompareExchange(uint index, __Element compare, __Element val);
__generic<let N:int>
- __target_intrinsic(cuda, "atomicCAS($0.data_ptr_at<$TR>($1), $2, $3)")
- __Element InterlockedCompareExchange(vector<uint, N> index, __Element compare, __Element val);
+ __target_intrinsic(cuda, "atomicCAS($0.data_ptr_at<$T2>($1), $2, $3)")
+ void InterlockedCompareExchange(vector<uint, N> index, __Element compare, __Element val);
}
${{{{
@@ -222,12 +216,19 @@ ${{{{
extension TensorView<float>
{
- __target_intrinsic(cuda, "atomicExch($0.data_ptr_at<$G0>($1), $2)")
- float InterlockedExchange(uint index, float val);
+ __target_intrinsic(cuda, "*($3) = atomicExch($0.data_ptr_at<float>($1), $2)")
+ float InterlockedExchange(uint index, float val, out float oldVal);
__generic<let N:int>
- __target_intrinsic(cuda, "atomicExch($0.data_ptr_at<$G0>($1), $2)")
- float InterlockedExchange(vector<uint, N> index, float val);
+ __target_intrinsic(cuda, "*($3) = atomicExch($0.data_ptr_at<float>($1), $2)")
+ float InterlockedExchange(vector<uint, N> index, float val, out float oldVal);
+
+ __target_intrinsic(cuda, "atomicCAS($0.data_ptr_at<uint32_t>($1), slang_bit_cast<uint32_t>($2), slang_bit_cast<uint32_t>($3))")
+ void InterlockedCompareExchange(uint index, float compare, float val);
+
+ __generic<let N : int>
+ __target_intrinsic(cuda, "atomicCAS($0.data_ptr_at<uint32_t>($1), slang_bit_cast<uint32_t>($2), slang_bit_cast<uint32_t>($3))")
+ void InterlockedCompareExchange(vector<uint, N> index, float compare, float val);
}
__generic<T>