diff options
| author | Yong He <yonghe@outlook.com> | 2024-05-14 18:01:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-05-14 18:01:31 -0700 |
| commit | 4edc72e4dea47cf549b4e28940e3509a5ab61439 (patch) | |
| tree | 10475236b4a0e1f8a7a0bafdaa05a09d26f9412f /source/slang/diff.meta.slang | |
| parent | d76bed6c1b03e5d7ef19c947fdd5fcaf33b595f7 (diff) | |
Remove use of `G0` and `__target_intrinsic` in stdlib. (#4170)
* Remove use of `G0` and `__target_intrinsic` in stdlib.
* Fix.
* Fix calling intrinsic in global scope.
Diffstat (limited to 'source/slang/diff.meta.slang')
| -rw-r--r-- | source/slang/diff.meta.slang | 246 |
1 files changed, 195 insertions, 51 deletions
diff --git a/source/slang/diff.meta.slang b/source/slang/diff.meta.slang index 769630d50..c912e026c 100644 --- a/source/slang/diff.meta.slang +++ b/source/slang/diff.meta.slang @@ -55,81 +55,171 @@ __magic_type(TensorViewType) __intrinsic_type($(kIROp_TensorViewType)) struct TensorView { - __target_intrinsic(cuda, "$0.data_ptr<$G0>()") [__NoSideEffect] [require(cuda)] - Ptr<T> data_ptr(); + Ptr<T> data_ptr() + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.data_ptr<$[0]>()", T; + } + } - __target_intrinsic(cuda, "$0.data_ptr_at<$G0>($1)") [__NoSideEffect] [require(cuda)] - Ptr<T> data_ptr_at(uint index); + Ptr<T> data_ptr_at(uint index) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.data_ptr_at<$[0]>($1)", T; + } + } - __generic<let N: int> - __target_intrinsic(cuda, "$0.data_ptr_at<$G0>($1)") + __generic<let N : int> [__NoSideEffect] [require(cuda)] - Ptr<T> data_ptr_at(vector<uint, N> index); + Ptr<T> data_ptr_at(vector<uint, N> index) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.data_ptr_at<$[0]>($1)", T; + } + } __implicit_conversion($(kConversionCost_ImplicitDereference)) __intrinsic_op($(kIROp_TorchTensorGetView)) __init(TorchTensor<T> t); - __target_intrinsic(cuda, "$0.load<$G0>($1)") [__NoSideEffect] [require(cuda)] - T load(uint x); + T load(uint x) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1)", T; + } + } - __target_intrinsic(cuda, "$0.load<$G0>($1, $2)") [__NoSideEffect] [require(cuda)] - T load(uint x, uint y); + T load(uint x, uint y) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1, $2)", T; + } + } - __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3)") [__NoSideEffect] [require(cuda)] - T load(uint x, uint y, uint z); + T load(uint x, uint y, uint z) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1, $2, $3)", T; + } + } - __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4)") [__NoSideEffect] [require(cuda)] - T load(uint x, uint y, uint z, uint w); + T load(uint x, uint y, uint z, uint w) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1, $2, $3, $4)", T; + } + } - __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4, $5)") [__NoSideEffect] [require(cuda)] - T load(uint i0, uint i1, uint i2, uint i3, uint i4); + T load(uint i0, uint i1, uint i2, uint i3, uint i4) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1, $2, $3, $4, $5)", T; + } + } __generic<let N : int> - __target_intrinsic(cuda, "$0.load<$TR>($1)") [__NoSideEffect] [require(cuda)] - T load(vector<uint, N> index); + T load(vector<uint, N> index) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$TR>($1)"; + } + } - __target_intrinsic(cuda, "$0.store<$G0>($1, $2)") [require(cuda)] - void store(uint x, T val); + void store(uint x, T val) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.store<$T2>($1, $2)"; + } + } - __target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3)") [require(cuda)] - void store(uint x, uint y, T val); + void store(uint x, uint y, T val) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.store<$T3>($1, $2, $3)"; + } + } - __target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3, $4)") [require(cuda)] - void store(uint x, uint y, uint z, T val); + void store(uint x, uint y, uint z, T val) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.store<$T4>($1, $2, $3, $4)"; + } + } - __target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3, $4, $5)") [require(cuda)] - void store(uint x, uint y, uint z, uint w, T val); + void store(uint x, uint y, uint z, uint w, T val) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.store<$T5>($1, $2, $3, $4, $5)"; + } + } - __target_intrinsic(cuda, "$0.store<$G0>($1, $2, $3, $4, $5, $6)") [require(cuda)] - void store(uint i0, uint i1, uint i2, uint i3, uint i4, T val); + void store(uint i0, uint i1, uint i2, uint i3, uint i4, T val) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.store<$T6>($1, $2, $3, $4, $5, $6)"; + } + } __generic<let N : int> - __target_intrinsic(cuda, "$0.store<$T2>($1, $2)") [require(cuda)] - void store(vector<uint, N> index, T val); + void store(vector<uint, N> index, T val) + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.store<$T2>($1, $2)"; + } + } [require(cuda)] void InterlockedAdd(uint index, T val, out T oldVal) @@ -184,66 +274,114 @@ struct TensorView { [ForceInline] [__NoSideEffect] get { return load(index); } [ForceInline] set { store(index, newValue); } - - __target_intrinsic(cuda, "$0.load<$G0>($1)") + [__NoSideEffect] - ref; + ref + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1)", T; + } + } } __subscript(uint i1, uint i2) -> T { [ForceInline] [__NoSideEffect] get { return load(i1, i2); } [ForceInline] set { store(i1, i2, newValue); } - __target_intrinsic(cuda, "$0.load<$G0>($1, $2)") [__NoSideEffect] - ref; + ref + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1, $2)", T; + } + } } __subscript(uint2 i) -> T { [ForceInline] [__NoSideEffect] get { return load(i.x, i.y); } [ForceInline] set { store(i.x, i.y, newValue); } - __target_intrinsic(cuda, "$0.load<$G0>($1.x, $1.y)") [__NoSideEffect] - ref; + ref + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1.x, $1.y)", T; + } + } } __subscript(uint i1, uint i2, uint i3) -> T { [ForceInline] [__NoSideEffect] get { return load(i1, i2, i3); } [ForceInline] set { store(i1, i2, i3, newValue); } - __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3)") [__NoSideEffect] - ref; + ref + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1, $2, $3)", T; + } + } } __subscript(uint3 i) -> T { [ForceInline] [__NoSideEffect] get { return load(i.x, i.y, i.z); } [ForceInline] set { store(i.x, i.y, i.z, newValue); } - __target_intrinsic(cuda, "$0.load<$G0>($1.x, $1.y, $1.z)") [__NoSideEffect] - ref; + ref + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1.x, $1.y, $1.z)", T; + } + } } __subscript(uint i1, uint i2, uint i3, uint i4) -> T { [ForceInline] [__NoSideEffect] get { return load(i1, i2, i3, i4); } [ForceInline] set { store(i1, i2, i3, i4, newValue); } - __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4)") [__NoSideEffect] - ref; + ref + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1, $2, $3, $4)", T; + } + } } __subscript(uint4 i) -> T { [__NoSideEffect][ForceInline] get { return load(i.x, i.y, i.z, i.w); } [ForceInline] set { store(i.x, i.y, i.z, i.w, newValue); } - __target_intrinsic(cuda, "$0.load<$G0>($1.x, $1.y, $1.z, $1.w)") [__NoSideEffect] - ref; + ref + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1.x, $1.y, $1.z, $1.w)", T; + } + } } __subscript(uint i1, uint i2, uint i3, uint i4, uint i5) -> T { [ForceInline] [__NoSideEffect] get { return load(i1, i2, i3, i4, i5); } [ForceInline] set { store(i1, i2, i3, i4, i5, newValue); } - __target_intrinsic(cuda, "$0.load<$G0>($1, $2, $3, $4, $5)") [__NoSideEffect] - ref; + ref + { + __target_switch + { + case cuda: + __intrinsic_asm "$0.load<$[0]>($1, $2, $3, $4, $5)", T; + } + } } } @@ -882,12 +1020,18 @@ struct TorchTensor } } - __target_intrinsic(cpp, "$0.data_ptr<$G0>()") - __target_intrinsic(cuda, "$0.data_ptr<$G0>()") [__readNone] [CudaHost] [require(cpp_cuda)] - Ptr<T> data_ptr(); + Ptr<T> data_ptr() + { + __target_switch + { + case cpp: + case cuda: + __intrinsic_asm "$0.data_ptr<$[0]>()", T; + } + } __intrinsic_op($(kIROp_AllocateTorchTensor)) [CudaHost] |
