summaryrefslogtreecommitdiffstats
path: root/source/slang/diff.meta.slang
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2024-05-14 18:01:31 -0700
committerGitHub <noreply@github.com>2024-05-14 18:01:31 -0700
commit4edc72e4dea47cf549b4e28940e3509a5ab61439 (patch)
tree10475236b4a0e1f8a7a0bafdaa05a09d26f9412f /source/slang/diff.meta.slang
parentd76bed6c1b03e5d7ef19c947fdd5fcaf33b595f7 (diff)
Remove use of `G0` and `__target_intrinsic` in stdlib. (#4170)
* Remove use of `G0` and `__target_intrinsic` in stdlib. * Fix. * Fix calling intrinsic in global scope.
Diffstat (limited to 'source/slang/diff.meta.slang')
-rw-r--r--source/slang/diff.meta.slang246
1 files changed, 195 insertions, 51 deletions
diff --git a/source/slang/diff.meta.slang b/source/slang/diff.meta.slang
index 769630d50..c912e026c 100644
--- a/source/slang/diff.meta.slang
+++ b/source/slang/diff.meta.slang
@@ -55,81 +55,171 @@ __magic_type(TensorViewType)
__intrinsic_type($(kIROp_TensorViewType))
struct TensorView
{
- __target_intrinsic(cuda, "$0.data_ptr<$G0>()")
[__NoSideEffect]
[require(cuda)]
- Ptr<T> data_ptr();
+ Ptr<T> data_ptr()
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.data_ptr<$[0]>()", T;
+ }
+ }
- __target_intrinsic(cuda, "$0.data_ptr_at<$G0>($1)")
[__NoSideEffect]
[require(cuda)]
- Ptr<T> data_ptr_at(uint index);
+ Ptr<T> data_ptr_at(uint index)
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.data_ptr_at<$[0]>($1)", T;
+ }
+ }
- __generic<let N: int>
- __target_intrinsic(cuda, "$0.data_ptr_at<$G0>($1)")
+ __generic<let N : int>
[__NoSideEffect]
[require(cuda)]
- Ptr<T> data_ptr_at(vector<uint, N> index);
+ Ptr<T> data_ptr_at(vector<uint, N> index)
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.data_ptr_at<$[0]>($1)", T;
+ }
+ }
__implicit_conversion($(kConversionCost_ImplicitDereference))
__intrinsic_op($(kIROp_TorchTensorGetView))
__init(TorchTensor<T> t);
- __target_intrinsic(cuda, "$0.load<$G0>($1)")
[__NoSideEffect]
[require(cuda)]
- T load(uint x);
+ T load(uint x)
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.load<$[0]>($1)", T;
+ }
+ }
- __target_intrinsic(cuda, "$0.load<$G0>($1, $2)")
[__NoSideEffect]
[require(cuda)]
- T load(uint x, uint y);
+ T load(uint x, uint y)
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.load<$[0]>($1, $2)", T;
+ }
+ }
- __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3)")
[__NoSideEffect]
[require(cuda)]
- T load(uint x, uint y, uint z);
+ T load(uint x, uint y, uint z)
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.load<$[0]>($1, $2, $3)", T;
+ }
+ }
- __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4)")
[__NoSideEffect]
[require(cuda)]
- T load(uint x, uint y, uint z, uint w);
+ T load(uint x, uint y, uint z, uint w)
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.load<$[0]>($1, $2, $3, $4)", T;
+ }
+ }
- __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4, $5)")
[__NoSideEffect]
[require(cuda)]
- T load(uint i0, uint i1, uint i2, uint i3, uint i4);
+ T load(uint i0, uint i1, uint i2, uint i3, uint i4)
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.load<$[0]>($1, $2, $3, $4, $5)", T;
+ }
+ }
__generic<let N : int>
- __target_intrinsic(cuda, "$0.load<$TR>($1)")
[__NoSideEffect]
[require(cuda)]
- T load(vector<uint, N> index);
+ T load(vector<uint, N> index)
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.load<$TR>($1)";
+ }
+ }
- __target_intrinsic(cuda, "$0.store<$G0>($1, $2)")
[require(cuda)]
- void store(uint x, T val);
+ void store(uint x, T val)
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.store<$T2>($1, $2)";
+ }
+ }
- __target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3)")
[require(cuda)]
- void store(uint x, uint y, T val);
+ void store(uint x, uint y, T val)
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.store<$T3>($1, $2, $3)";
+ }
+ }
- __target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3, $4)")
[require(cuda)]
- void store(uint x, uint y, uint z, T val);
+ void store(uint x, uint y, uint z, T val)
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.store<$T4>($1, $2, $3, $4)";
+ }
+ }
- __target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3, $4, $5)")
[require(cuda)]
- void store(uint x, uint y, uint z, uint w, T val);
+ void store(uint x, uint y, uint z, uint w, T val)
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.store<$T5>($1, $2, $3, $4, $5)";
+ }
+ }
- __target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3, $4, $5, $6)")
[require(cuda)]
- void store(uint i0, uint i1, uint i2, uint i3, uint i4, T val);
+ void store(uint i0, uint i1, uint i2, uint i3, uint i4, T val)
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.store<$T6>($1, $2, $3, $4, $5, $6)";
+ }
+ }
__generic<let N : int>
- __target_intrinsic(cuda, "$0.store<$T2>($1, $2)")
[require(cuda)]
- void store(vector<uint, N> index, T val);
+ void store(vector<uint, N> index, T val)
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.store<$T2>($1, $2)";
+ }
+ }
[require(cuda)]
void InterlockedAdd(uint index, T val, out T oldVal)
@@ -184,66 +274,114 @@ struct TensorView
{
[ForceInline] [__NoSideEffect] get { return load(index); }
[ForceInline] set { store(index, newValue); }
-
- __target_intrinsic(cuda, "$0.load<$G0>($1)")
+
[__NoSideEffect]
- ref;
+ ref
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.load<$[0]>($1)", T;
+ }
+ }
}
__subscript(uint i1, uint i2) -> T
{
[ForceInline] [__NoSideEffect] get { return load(i1, i2); }
[ForceInline] set { store(i1, i2, newValue); }
- __target_intrinsic(cuda, "$0.load<$G0>($1, $2)")
[__NoSideEffect]
- ref;
+ ref
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.load<$[0]>($1, $2)", T;
+ }
+ }
}
__subscript(uint2 i) -> T
{
[ForceInline] [__NoSideEffect] get { return load(i.x, i.y); }
[ForceInline] set { store(i.x, i.y, newValue); }
- __target_intrinsic(cuda, "$0.load<$G0>($1.x, $1.y)")
[__NoSideEffect]
- ref;
+ ref
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.load<$[0]>($1.x, $1.y)", T;
+ }
+ }
}
__subscript(uint i1, uint i2, uint i3) -> T
{
[ForceInline] [__NoSideEffect] get { return load(i1, i2, i3); }
[ForceInline] set { store(i1, i2, i3, newValue); }
- __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3)")
[__NoSideEffect]
- ref;
+ ref
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.load<$[0]>($1, $2, $3)", T;
+ }
+ }
}
__subscript(uint3 i) -> T
{
[ForceInline] [__NoSideEffect] get { return load(i.x, i.y, i.z); }
[ForceInline] set { store(i.x, i.y, i.z, newValue); }
- __target_intrinsic(cuda, "$0.load<$G0>($1.x, $1.y, $1.z)")
[__NoSideEffect]
- ref;
+ ref
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.load<$[0]>($1.x, $1.y, $1.z)", T;
+ }
+ }
}
__subscript(uint i1, uint i2, uint i3, uint i4) -> T
{
[ForceInline] [__NoSideEffect] get { return load(i1, i2, i3, i4); }
[ForceInline] set { store(i1, i2, i3, i4, newValue); }
- __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4)")
[__NoSideEffect]
- ref;
+ ref
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.load<$[0]>($1, $2, $3, $4)", T;
+ }
+ }
}
__subscript(uint4 i) -> T
{
[__NoSideEffect][ForceInline] get { return load(i.x, i.y, i.z, i.w); }
[ForceInline] set { store(i.x, i.y, i.z, i.w, newValue); }
- __target_intrinsic(cuda, "$0.load<$G0>($1.x, $1.y, $1.z, $1.w)")
[__NoSideEffect]
- ref;
+ ref
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.load<$[0]>($1.x, $1.y, $1.z, $1.w)", T;
+ }
+ }
}
__subscript(uint i1, uint i2, uint i3, uint i4, uint i5) -> T
{
[ForceInline] [__NoSideEffect] get { return load(i1, i2, i3, i4, i5); }
[ForceInline] set { store(i1, i2, i3, i4, i5, newValue); }
- __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4, $5)")
[__NoSideEffect]
- ref;
+ ref
+ {
+ __target_switch
+ {
+ case cuda:
+ __intrinsic_asm "$0.load<$[0]>($1, $2, $3, $4, $5)", T;
+ }
+ }
}
}
@@ -882,12 +1020,18 @@ struct TorchTensor
}
}
- __target_intrinsic(cpp, "$0.data_ptr<$G0>()")
- __target_intrinsic(cuda, "$0.data_ptr<$G0>()")
[__readNone]
[CudaHost]
[require(cpp_cuda)]
- Ptr<T> data_ptr();
+ Ptr<T> data_ptr()
+ {
+ __target_switch
+ {
+ case cpp:
+ case cuda:
+ __intrinsic_asm "$0.data_ptr<$[0]>()", T;
+ }
+ }
__intrinsic_op($(kIROp_AllocateTorchTensor))
[CudaHost]