diff options
| author | Yong He <yonghe@outlook.com> | 2023-02-24 10:01:47 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-02-24 10:01:47 -0800 |
| commit | bd6306cdaa4a49344658bd026721b6532e103d09 (patch) | |
| tree | bb7f666d426e6cfc7777a3ccac0a1d628588eb39 /source | |
| parent | e8c08e7ecb1124f115a1d1042277776193122b57 (diff) | |
More control flow simplifications. (#2673)
* More control flow and Phi param simplifications.
* Fix.
* Fix gcc error.
* Fix.
* More IR cleanup.
* Fix bug in phi param dce + ifelse simplify.
* Propagate and DCE side-effect-free functions.
* Enhance CFG simplifcation to remove loops with no side effects.
* Fix.
* Fixes.
* Fix tests. Add [__AlwaysFoldIntoUseSite] for rayPayloadLocation.
* More cleanup.
* Fixes.
* Fix.
---------
Co-authored-by: Yong He <yhe@nvidia.com>
Diffstat (limited to 'source')
29 files changed, 1616 insertions, 314 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index 6357d58bd..9da33c755 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -2525,21 +2525,25 @@ int __SyntaxError(); __generic<T> __target_intrinsic(cuda, "sizeof($G0)") __target_intrinsic(cpp, "sizeof($G0)") +[__readNone] int __sizeOf(); __generic<T> __target_intrinsic(cuda, "sizeof($T0)") __target_intrinsic(cpp, "sizeof($T0)") +[__readNone] int __sizeOf(T v); __generic<T> __target_intrinsic(cuda, "SLANG_ALIGN_OF($G0)") __target_intrinsic(cpp, "SLANG_ALIGN_OF($G0)") +[__readNone] int __alignOf(); __generic<T> __target_intrinsic(cuda, "SLANG_ALIGN_OF($T0)") __target_intrinsic(cpp, "SLANG_ALIGN_OF($T0)") +[__readNone] int __alignOf(T v); // It would be nice to have offsetof equivalent, but it's not clear how that would work in terms of the Slang language. @@ -2547,6 +2551,7 @@ int __alignOf(T v); __generic<T,F> __target_intrinsic(cuda, "int(((char*)&($1)) - ((char*)&($0)))") __target_intrinsic(cpp, "int(((char*)&($1)) - ((char*)&($0))") +[__readNone] int __offsetOf(in T t, in F field); /// Mark beginning of "interlocked" operations in a fragment shader. @@ -2960,6 +2965,9 @@ attribute_syntax [builtin] : BuiltinAttribute; __attributeTarget(DeclBase) attribute_syntax [__requiresNVAPI] : RequiresNVAPIAttribute; +__attributeTarget(DeclBase) +attribute_syntax [__AlwaysFoldIntoUseSiteAttribute] : AlwaysFoldIntoUseSiteAttribute; + __attributeTarget(FunctionDeclBase) attribute_syntax [noinline] : NoInlineAttribute; diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 7e75d06b3..37cdc205e 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -778,6 +778,7 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_abs($0)") __target_intrinsic(cpp, "$P_abs($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FAbs, SAbs) _0") +[__readNone] T abs(T x); /*{ // Note: this simple definition may not be appropriate for floating-point inputs @@ -788,6 +789,7 @@ __generic<T : __BuiltinIntegerType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FAbs, SAbs) _0") +[__readNone] vector<T, N> abs(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, abs, x); @@ -795,6 +797,7 @@ vector<T, N> abs(vector<T, N> x) __generic<T : __BuiltinIntegerType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T,N,M> abs(matrix<T,N,M> x) { MATRIX_MAP_UNARY(T, N, M, abs, x); @@ -806,12 +809,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_abs($0)") __target_intrinsic(cpp, "$P_abs($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FAbs, SAbs) _0") +[__readNone] T abs(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FAbs, SAbs) _0") +[__readNone] vector<T, N> abs(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, abs, x); @@ -819,6 +824,7 @@ vector<T, N> abs(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T,N,M> abs(matrix<T,N,M> x) { MATRIX_MAP_UNARY(T, N, M, abs, x); @@ -832,12 +838,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_acos($0)") __target_intrinsic(cpp, "$P_acos($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Acos _0") +[__readNone] T acos(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Acos _0") +[__readNone] vector<T, N> acos(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, acos, x); @@ -845,6 +853,7 @@ vector<T, N> acos(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> acos(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, acos, x); @@ -855,11 +864,13 @@ __generic<T : __BuiltinType> __target_intrinsic(cpp, "bool($0)") __target_intrinsic(cuda, "bool($0)") __target_intrinsic(glsl, "bool($0)") +[__readNone] bool all(T x); __generic<T : __BuiltinType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "all(bvec$N0($0))") +[__readNone] bool all(vector<T,N> x) { bool result = true; @@ -870,6 +881,7 @@ bool all(vector<T,N> x) __generic<T : __BuiltinType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] bool all(matrix<T,N,M> x) { bool result = true; @@ -894,11 +906,13 @@ __generic<T : __BuiltinType> __target_intrinsic(cpp, "bool($0)") __target_intrinsic(cuda, "bool($0)") __target_intrinsic(glsl, "bool($0)") +[__readNone] bool any(T x); __generic<T : __BuiltinType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "any(bvec$N0($0))") +[__readNone] bool any(vector<T, N> x) { bool result = false; @@ -909,6 +923,7 @@ bool any(vector<T, N> x) __generic<T : __BuiltinType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] bool any(matrix<T, N, M> x) { bool result = false; @@ -926,6 +941,7 @@ __target_intrinsic(cpp, "$P_asdouble($0, $1)") __target_intrinsic(cuda, "$P_asdouble($0, $1)") __target_intrinsic(spirv_direct, "%v = OpCompositeConstruct _type(uint2) resultId _0 _1; OpExtInst resultType resultId glsl450 59 %v") __glsl_extension(GL_ARB_gpu_shader5) +[__readNone] double asdouble(uint lowbits, uint highbits); // Reinterpret bits as a float (HLSL SM 4.0) @@ -935,6 +951,7 @@ __target_intrinsic(glsl, "intBitsToFloat") __target_intrinsic(cpp, "$P_asfloat($0)") __target_intrinsic(cuda, "$P_asfloat($0)") __target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0") +[__readNone] float asfloat(int x); __target_intrinsic(hlsl) @@ -942,12 +959,14 @@ __target_intrinsic(glsl, "uintBitsToFloat") __target_intrinsic(cpp, "$P_asfloat($0)") __target_intrinsic(cuda, "$P_asfloat($0)") __target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0") +[__readNone] float asfloat(uint x); __generic<let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "intBitsToFloat") __target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0") +[__readNone] vector<float, N> asfloat(vector< int, N> x) { VECTOR_MAP_UNARY(float, N, asfloat, x); @@ -957,6 +976,7 @@ __generic<let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "uintBitsToFloat") __target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0") +[__readNone] vector<float,N> asfloat(vector<uint,N> x) { VECTOR_MAP_UNARY(float, N, asfloat, x); @@ -964,6 +984,7 @@ vector<float,N> asfloat(vector<uint,N> x) __generic<let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<float,N,M> asfloat(matrix< int,N,M> x) { MATRIX_MAP_UNARY(float, N, M, asfloat, x); @@ -971,6 +992,7 @@ matrix<float,N,M> asfloat(matrix< int,N,M> x) __generic<let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<float,N,M> asfloat(matrix<uint,N,M> x) { MATRIX_MAP_UNARY(float, N, M, asfloat, x); @@ -978,16 +1000,19 @@ matrix<float,N,M> asfloat(matrix<uint,N,M> x) // No op [__unsafeForceInlineEarly] +[__readNone] float asfloat(float x) { return x; } __generic<let N : int> [__unsafeForceInlineEarly] +[__readNone] vector<float,N> asfloat(vector<float,N> x) { return x; } __generic<let N : int, let M : int> [__unsafeForceInlineEarly] +[__readNone] matrix<float,N,M> asfloat(matrix<float,N,M> x) { return x; } @@ -998,12 +1023,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_asin($0)") __target_intrinsic(cpp, "$P_asin($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Asin _0") +[__readNone] T asin(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Asin _0") +[__readNone] vector<T, N> asin(vector<T, N> x) { VECTOR_MAP_UNARY(T,N,asin,x); @@ -1011,6 +1038,7 @@ vector<T, N> asin(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> asin(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T,N,M,asin,x); @@ -1023,6 +1051,7 @@ __target_intrinsic(glsl, "floatBitsToInt") __target_intrinsic(cpp, "$P_asint($0)") __target_intrinsic(cuda, "$P_asint($0)") __target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0") +[__readNone] int asint(float x); __target_intrinsic(hlsl) @@ -1030,12 +1059,14 @@ __target_intrinsic(glsl, "int($0)") __target_intrinsic(cpp, "$P_asint($0)") __target_intrinsic(cuda, "$P_asint($0)") __target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0") +[__readNone] int asint(uint x); __generic<let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "floatBitsToInt") __target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0") +[__readNone] vector<int, N> asint(vector<float, N> x) { VECTOR_MAP_UNARY(int, N, asint, x); @@ -1045,6 +1076,7 @@ __generic<let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "ivec$N0($0)") __target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0") +[__readNone] vector<int, N> asint(vector<uint, N> x) { VECTOR_MAP_UNARY(int, N, asint, x); @@ -1052,6 +1084,7 @@ vector<int, N> asint(vector<uint, N> x) __generic<let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<int, N, M> asint(matrix<float, N, M> x) { MATRIX_MAP_UNARY(int, N, M, asint, x); @@ -1059,6 +1092,7 @@ matrix<int, N, M> asint(matrix<float, N, M> x) __generic<let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<int, N, M> asint(matrix<uint, N, M> x) { MATRIX_MAP_UNARY(int, N, M, asint, x); @@ -1066,16 +1100,19 @@ matrix<int, N, M> asint(matrix<uint, N, M> x) // No op [__unsafeForceInlineEarly] +[__readNone] int asint(int x) { return x; } __generic<let N : int> [__unsafeForceInlineEarly] +[__readNone] vector<int,N> asint(vector<int,N> x) { return x; } __generic<let N : int, let M : int> [__unsafeForceInlineEarly] +[__readNone] matrix<int,N,M> asint(matrix<int,N,M> x) { return x; } @@ -1086,6 +1123,7 @@ __target_intrinsic(glsl, "{ uvec2 v = unpackDouble2x32($0); $1 = v.x; $2 = v.y; __glsl_extension(GL_ARB_gpu_shader5) __target_intrinsic(cpp, "$P_asuint($0, $1, $2)") __target_intrinsic(cuda, "$P_asuint($0, $1, $2)") +[__readNone] void asuint(double value, out uint lowbits, out uint highbits); // Reinterpret bits as a uint (HLSL SM 4.0) @@ -1095,6 +1133,7 @@ __target_intrinsic(glsl, "floatBitsToUint") __target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0") __target_intrinsic(cpp, "$P_asuint($0)") __target_intrinsic(cuda, "$P_asuint($0)") +[__readNone] uint asuint(float x); __target_intrinsic(hlsl) @@ -1102,12 +1141,14 @@ __target_intrinsic(glsl, "uint($0)") __target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0") __target_intrinsic(cpp, "$P_asuint($0)") __target_intrinsic(cuda, "$P_asuint($0)") +[__readNone] uint asuint(int x); __generic<let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "floatBitsToUint") __target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0") +[__readNone] vector<uint,N> asuint(vector<float,N> x) { VECTOR_MAP_UNARY(uint, N, asuint, x); @@ -1117,6 +1158,7 @@ __generic<let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "uvec$N0($0)") __target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0") +[__readNone] vector<uint, N> asuint(vector<int, N> x) { VECTOR_MAP_UNARY(uint, N, asuint, x); @@ -1124,6 +1166,7 @@ vector<uint, N> asuint(vector<int, N> x) __generic<let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<uint,N,M> asuint(matrix<float,N,M> x) { MATRIX_MAP_UNARY(uint, N, M, asuint, x); @@ -1131,22 +1174,26 @@ matrix<uint,N,M> asuint(matrix<float,N,M> x) __generic<let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<uint, N, M> asuint(matrix<int, N, M> x) { MATRIX_MAP_UNARY(uint, N, M, asuint, x); } [__unsafeForceInlineEarly] +[__readNone] uint asuint(uint x) { return x; } __generic<let N : int> [__unsafeForceInlineEarly] +[__readNone] vector<uint,N> asuint(vector<uint,N> x) { return x; } __generic<let N : int, let M : int> [__unsafeForceInlineEarly] +[__readNone] matrix<uint,N,M> asuint(matrix<uint,N,M> x) { return x; } @@ -1159,38 +1206,41 @@ matrix<uint,N,M> asuint(matrix<uint,N,M> x) // Identity cases: -[__unsafeForceInlineEarly] float16_t asfloat16(float16_t value) { return value; } -[__unsafeForceInlineEarly] vector<float16_t,N> asfloat16<let N : int>(vector<float16_t,N> value) { return value; } -[__unsafeForceInlineEarly] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<float16_t,R,C> value) { return value; } +[__unsafeForceInlineEarly][__readNone] float16_t asfloat16(float16_t value) { return value; } +[__unsafeForceInlineEarly][__readNone] vector<float16_t,N> asfloat16<let N : int>(vector<float16_t,N> value) { return value; } +[__unsafeForceInlineEarly][__readNone] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<float16_t,R,C> value) { return value; } -[__unsafeForceInlineEarly] int16_t asint16(int16_t value) { return value; } -[__unsafeForceInlineEarly] vector<int16_t,N> asint16<let N : int>(vector<int16_t,N> value) { return value; } -[__unsafeForceInlineEarly] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return value; } +[__unsafeForceInlineEarly][__readNone] int16_t asint16(int16_t value) { return value; } +[__unsafeForceInlineEarly][__readNone] vector<int16_t,N> asint16<let N : int>(vector<int16_t,N> value) { return value; } +[__unsafeForceInlineEarly][__readNone] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return value; } -[__unsafeForceInlineEarly] uint16_t asuint16(uint16_t value) { return value; } -[__unsafeForceInlineEarly] vector<uint16_t,N> asuint16<let N : int>(vector<uint16_t,N> value) { return value; } -[__unsafeForceInlineEarly] matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<uint16_t,R,C> value) { return value; } +[__unsafeForceInlineEarly][__readNone] uint16_t asuint16(uint16_t value) { return value; } +[__unsafeForceInlineEarly][__readNone] vector<uint16_t,N> asuint16<let N : int>(vector<uint16_t,N> value) { return value; } +[__unsafeForceInlineEarly][__readNone] matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<uint16_t,R,C> value) { return value; } // Signed<->unsigned cases: -[__unsafeForceInlineEarly] int16_t asint16(uint16_t value) { return value; } -[__unsafeForceInlineEarly] vector<int16_t,N> asint16<let N : int>(vector<uint16_t,N> value) { return value; } -[__unsafeForceInlineEarly] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<uint16_t,R,C> value) { return value; } +[__unsafeForceInlineEarly][__readNone] int16_t asint16(uint16_t value) { return value; } +[__unsafeForceInlineEarly][__readNone] vector<int16_t,N> asint16<let N : int>(vector<uint16_t,N> value) { return value; } +[__unsafeForceInlineEarly][__readNone] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<uint16_t,R,C> value) { return value; } -[__unsafeForceInlineEarly] uint16_t asuint16(int16_t value) { return value; } -[__unsafeForceInlineEarly] vector<uint16_t,N> asuint16<let N : int>(vector<int16_t,N> value) { return value; } -[__unsafeForceInlineEarly] matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return value; } +[__unsafeForceInlineEarly][__readNone] uint16_t asuint16(int16_t value) { return value; } +[__unsafeForceInlineEarly][__readNone] vector<uint16_t,N> asuint16<let N : int>(vector<int16_t,N> value) { return value; } +[__unsafeForceInlineEarly][__readNone] matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return value; } // Float->unsigned cases: __target_intrinsic(hlsl) __target_intrinsic(glsl, "uint16_t(packHalf2x16(vec2($0, 0.0)))") __target_intrinsic(cuda, "__half_as_ushort") +[__readNone] uint16_t asuint16(float16_t value); +[__readNone] vector<uint16_t,N> asuint16<let N : int>(vector<float16_t,N> value) { VECTOR_MAP_UNARY(uint16_t, N, asuint16, value); } +[__readNone] matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<float16_t,R,C> value) { MATRIX_MAP_UNARY(uint16_t, R, C, asuint16, value); } @@ -1199,11 +1249,14 @@ matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<float16_t,R,C> va __target_intrinsic(hlsl) __target_intrinsic(glsl, "float16_t(unpackHalf2x16($0).x)") __target_intrinsic(cuda, "__ushort_as_half") +[__readNone] float16_t asfloat16(uint16_t value); +[__readNone] vector<float16_t,N> asfloat16<let N : int>(vector<uint16_t,N> value) { VECTOR_MAP_UNARY(float16_t, N, asfloat16, value); } +[__readNone] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<uint16_t,R,C> value) { MATRIX_MAP_UNARY(float16_t, R, C, asfloat16, value); } @@ -1211,16 +1264,17 @@ matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<uint16_t,R,C> v __target_intrinsic(hlsl) __target_intrinsic(cuda, "__half_as_short") -[__unsafeForceInlineEarly] int16_t asint16(float16_t value) { return asuint16(value); } -__target_intrinsic(hlsl) [__unsafeForceInlineEarly] vector<int16_t,N> asint16<let N : int>(vector<float16_t,N> value) { return asuint16(value); } -__target_intrinsic(hlsl) [__unsafeForceInlineEarly] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<float16_t,R,C> value) { return asuint16(value); } +[__unsafeForceInlineEarly][__readNone] int16_t asint16(float16_t value) { return asuint16(value); } +__target_intrinsic(hlsl) [__unsafeForceInlineEarly][__readNone] vector<int16_t,N> asint16<let N : int>(vector<float16_t,N> value) { return asuint16(value); } +__target_intrinsic(hlsl) [__unsafeForceInlineEarly][__readNone] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<float16_t,R,C> value) { return asuint16(value); } __target_intrinsic(hlsl) __target_intrinsic(cuda, "__short_as_half") +[__readNone] [__unsafeForceInlineEarly] float16_t asfloat16(int16_t value) { return asfloat16(asuint16(value)); } -__target_intrinsic(hlsl) [__unsafeForceInlineEarly] vector<float16_t,N> asfloat16<let N : int>(vector<int16_t,N> value) { return asfloat16(asuint16(value)); } -__target_intrinsic(hlsl) [__unsafeForceInlineEarly] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return asfloat16(asuint16(value)); } +__target_intrinsic(hlsl) [__unsafeForceInlineEarly][__readNone] vector<float16_t,N> asfloat16<let N : int>(vector<int16_t,N> value) { return asfloat16(asuint16(value)); } +__target_intrinsic(hlsl) [__unsafeForceInlineEarly][__readNone] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return asfloat16(asuint16(value)); } // Inverse tangent (HLSL SM 1.0) __generic<T : __BuiltinFloatingPointType> @@ -1229,12 +1283,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_atan($0)") __target_intrinsic(cpp, "$P_atan($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Atan _0") +[__readNone] T atan(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Atan _0") +[__readNone] vector<T, N> atan(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, atan, x); @@ -1242,6 +1298,7 @@ vector<T, N> atan(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> atan(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, atan, x); @@ -1253,12 +1310,14 @@ __target_intrinsic(glsl,"atan($0,$1)") __target_intrinsic(cuda, "$P_atan2($0, $1)") __target_intrinsic(cpp, "$P_atan2($0, $1)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Atan2 _0 _1") +[__readNone] T atan2(T y, T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl,"atan($0,$1)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Atan2 _0 _1") +[__readNone] vector<T, N> atan2(vector<T, N> y, vector<T, N> x) { VECTOR_MAP_BINARY(T, N, atan2, y, x); @@ -1266,6 +1325,7 @@ vector<T, N> atan2(vector<T, N> y, vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x) { MATRIX_MAP_BINARY(T, N, M, atan2, y, x); @@ -1278,12 +1338,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_ceil($0)") __target_intrinsic(cpp, "$P_ceil($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Ceil _0") +[__readNone] T ceil(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Ceil _0") +[__readNone] vector<T, N> ceil(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, ceil, x); @@ -1291,6 +1353,7 @@ vector<T, N> ceil(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> ceil(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, ceil, x); @@ -1305,6 +1368,7 @@ __generic<T : __BuiltinIntegerType> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FClamp, UClamp, SClamp) _0 _1 _2") +[__readNone] T clamp(T x, T minBound, T maxBound) { return min(max(x, minBound), maxBound); @@ -1314,6 +1378,7 @@ __generic<T : __BuiltinIntegerType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FClamp, UClamp, SClamp) _0 _1 _2") +[__readNone] vector<T, N> clamp(vector<T, N> x, vector<T, N> minBound, vector<T, N> maxBound) { return min(max(x, minBound), maxBound); @@ -1321,6 +1386,7 @@ vector<T, N> clamp(vector<T, N> x, vector<T, N> minBound, vector<T, N> maxBound) __generic<T : __BuiltinIntegerType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> minBound, matrix<T,N,M> maxBound) { return min(max(x, minBound), maxBound); @@ -1330,6 +1396,7 @@ __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FClamp, UClamp, SClamp) _0 _1 _2") +[__readNone] T clamp(T x, T minBound, T maxBound) { return min(max(x, minBound), maxBound); @@ -1339,6 +1406,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FClamp, UClamp, SClamp) _0 _1 _2") +[__readNone] vector<T, N> clamp(vector<T, N> x, vector<T, N> minBound, vector<T, N> maxBound) { return min(max(x, minBound), maxBound); @@ -1346,6 +1414,7 @@ vector<T, N> clamp(vector<T, N> x, vector<T, N> minBound, vector<T, N> maxBound) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> minBound, matrix<T,N,M> maxBound) { return min(max(x, minBound), maxBound); @@ -1354,6 +1423,7 @@ matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> minBound, matrix<T,N,M> maxBo // Clip (discard) fragment conditionally __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) +[__readNone] void clip(T x) { if(x < T(0)) discard; @@ -1361,6 +1431,7 @@ void clip(T x) __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) +[__readNone] void clip(vector<T,N> x) { if(any(x < T(0))) discard; @@ -1368,6 +1439,7 @@ void clip(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] void clip(matrix<T,N,M> x) { if(any(x < T(0))) discard; @@ -1380,12 +1452,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_cos($0)") __target_intrinsic(cpp, "$P_cos($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cos _0") +[__readNone] T cos(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cos _0") +[__readNone] vector<T, N> cos(vector<T, N> x) { VECTOR_MAP_UNARY(T,N, cos, x); @@ -1393,6 +1467,7 @@ vector<T, N> cos(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> cos(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, cos, x); @@ -1405,12 +1480,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_cosh($0)") __target_intrinsic(cpp, "$P_cosh($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cosh _0") +[__readNone] T cosh(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cosh _0") +[__readNone] vector<T,N> cosh(vector<T,N> x) { VECTOR_MAP_UNARY(T,N, cosh, x); @@ -1418,6 +1495,7 @@ vector<T,N> cosh(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> cosh(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, cosh, x); @@ -1428,6 +1506,7 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, "bitCount") __target_intrinsic(cuda, "$P_countbits($0)") __target_intrinsic(cpp, "$P_countbits($0)") +[__readNone] uint countbits(uint value); // Cross product @@ -1436,6 +1515,7 @@ __generic<T : __BuiltinArithmeticType> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cross _0 _1") +[__readNone] vector<T,3> cross(vector<T,3> left, vector<T,3> right) { return vector<T,3>( @@ -1446,6 +1526,7 @@ vector<T,3> cross(vector<T,3> left, vector<T,3> right) // Convert encoded color __target_intrinsic(hlsl) +[__readNone] int4 D3DCOLORtoUBYTE4(float4 color) { let scaled = color.zyxw * 255.001999f; @@ -1455,11 +1536,13 @@ int4 D3DCOLORtoUBYTE4(float4 color) // Partial-difference derivatives __generic<T : __BuiltinFloatingPointType> __target_intrinsic(glsl, dFdx) +[__readNone] T ddx(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, dFdx) +[__readNone] vector<T, N> ddx(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, ddx, x); @@ -1467,6 +1550,7 @@ vector<T, N> ddx(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> ddx(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, ddx, x); @@ -1476,12 +1560,14 @@ __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __glsl_extension(GL_ARB_derivative_control) __target_intrinsic(glsl, dFdxCoarse) +[__readNone] T ddx_coarse(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __glsl_extension(GL_ARB_derivative_control) __target_intrinsic(glsl, dFdxCoarse) +[__readNone] vector<T, N> ddx_coarse(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, ddx_coarse, x); @@ -1489,6 +1575,7 @@ vector<T, N> ddx_coarse(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> ddx_coarse(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, ddx_coarse, x); @@ -1498,12 +1585,14 @@ __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __glsl_extension(GL_ARB_derivative_control) __target_intrinsic(glsl, dFdxFine) +[__readNone] T ddx_fine(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __glsl_extension(GL_ARB_derivative_control) __target_intrinsic(glsl, dFdxFine) +[__readNone] vector<T, N> ddx_fine(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, ddx_fine, x); @@ -1511,6 +1600,7 @@ vector<T, N> ddx_fine(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> ddx_fine(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, ddx_fine, x); @@ -1519,11 +1609,13 @@ matrix<T, N, M> ddx_fine(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __target_intrinsic(glsl, dFdy) +[__readNone] T ddy(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, dFdy) +[__readNone] vector<T, N> ddy(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, ddy, x); @@ -1531,6 +1623,7 @@ vector<T, N> ddy(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> ddy(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, ddy, x); @@ -1539,12 +1632,14 @@ matrix<T, N, M> ddy(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> __glsl_extension(GL_ARB_derivative_control) __target_intrinsic(glsl, dFdyCoarse) +[__readNone] T ddy_coarse(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __glsl_extension(GL_ARB_derivative_control) __target_intrinsic(glsl, dFdyCoarse) +[__readNone] vector<T, N> ddy_coarse(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, ddy_coarse, x); @@ -1552,6 +1647,7 @@ vector<T, N> ddy_coarse(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> ddy_coarse(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, ddy_coarse, x); @@ -1561,12 +1657,14 @@ __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __glsl_extension(GL_ARB_derivative_control) __target_intrinsic(glsl, dFdyFine) +[__readNone] T ddy_fine(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __glsl_extension(GL_ARB_derivative_control) __target_intrinsic(glsl, dFdyFine) +[__readNone] vector<T, N> ddy_fine(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, ddy_fine, x); @@ -1574,6 +1672,7 @@ vector<T, N> ddy_fine(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> ddy_fine(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, ddy_fine, x); @@ -1586,6 +1685,7 @@ __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Degrees _0") +[__readNone] T degrees(T x) { return x * (T(180) / T.getPi()); @@ -1595,6 +1695,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Degrees _0") +[__readNone] vector<T, N> degrees(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, degrees, x); @@ -1602,6 +1703,7 @@ vector<T, N> degrees(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> degrees(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, degrees, x); @@ -1613,6 +1715,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Determinant _0") +[__readNone] T determinant(matrix<T,N,N> m); // Barrier for device memory @@ -1630,6 +1733,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Distance _0 _1") +[__readNone] T distance(vector<T, N> x, vector<T, N> y) { return length(x - y); @@ -1640,6 +1744,7 @@ T distance(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) +[__readNone] T dot(vector<T, N> x, vector<T, N> y) { T result = T(0); @@ -1650,6 +1755,7 @@ T dot(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinIntegerType, let N : int> __target_intrinsic(hlsl) +[__readNone] T dot(vector<T, N> x, vector<T, N> y) { T result = T(0); @@ -1682,15 +1788,18 @@ RWStructuredBuffer<T> __getEquivalentStructuredBuffer<T>(RWByteAddressBuffer b); __generic<T : __BuiltinArithmeticType> __target_intrinsic(glsl, interpolateAtCentroid) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InterpolateAtCentroid _0") +[__readNone] T EvaluateAttributeAtCentroid(T x); __generic<T : __BuiltinArithmeticType, let N : int> __target_intrinsic(glsl, interpolateAtCentroid) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InterpolateAtCentroid _0") +[__readNone] vector<T,N> EvaluateAttributeAtCentroid(vector<T,N> x); __generic<T : __BuiltinArithmeticType, let N : int, let M : int> __target_intrinsic(glsl, interpolateAtCentroid) +[__readNone] matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x) { MATRIX_MAP_UNARY(T, N, M, EvaluateAttributeAtCentroid, x); @@ -1699,15 +1808,18 @@ matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x) __generic<T : __BuiltinArithmeticType> __target_intrinsic(glsl, "interpolateAtSample($0, int($1))") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InterpolateAtSample _0 _1") +[__readNone] T EvaluateAttributeAtSample(T x, uint sampleindex); __generic<T : __BuiltinArithmeticType, let N : int> __target_intrinsic(glsl, "interpolateAtSample($0, int($1))") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InterpolateAtSample _0 _1") +[__readNone] vector<T,N> EvaluateAttributeAtSample(vector<T,N> x, uint sampleindex); __generic<T : __BuiltinArithmeticType, let N : int, let M : int> __target_intrinsic(glsl, "interpolateAtSample($0, int($1))") +[__readNone] matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex) { matrix<T,N,M> result; @@ -1721,15 +1833,18 @@ matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex) __generic<T : __BuiltinArithmeticType> __target_intrinsic(glsl, "interpolateAtOffset($0, vec2($1) / 16.0f)") __target_intrinsic(spirv_direct, "%foffset = OpConvertSToF _type(float2) resultId _1; %offsetdiv16 = 136 _type(float2) resultId %foffset const(float2, 16.0, 16.0); OpExtInst resultType resultId glsl450 78 _0 %offsetdiv16") +[__readNone] T EvaluateAttributeSnapped(T x, int2 offset); __generic<T : __BuiltinArithmeticType, let N : int> __target_intrinsic(glsl, "interpolateAtOffset($0, vec2($1) / 16.0f)") __target_intrinsic(spirv_direct, "%foffset = OpConvertSToF _type(float2) resultId _1; %offsetdiv16 = 136 _type(float2) resultId %foffset const(float2, 16.0, 16.0); OpExtInst resultType resultId glsl450 78 _0 %offsetdiv16") +[__readNone] vector<T,N> EvaluateAttributeSnapped(vector<T,N> x, int2 offset); __generic<T : __BuiltinArithmeticType, let N : int, let M : int> __target_intrinsic(glsl, "interpolateAtOffset($0, vec2($1) / 16.0f)") +[__readNone] matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset) { matrix<T,N,M> result; @@ -1748,12 +1863,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_exp($0)") __target_intrinsic(cpp, "$P_exp($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Exp _0") +[__readNone] T exp(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Exp _0") +[__readNone] vector<T, N> exp(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, exp, x); @@ -1761,6 +1878,7 @@ vector<T, N> exp(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> exp(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, exp, x); @@ -1774,12 +1892,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_exp2($0)") __target_intrinsic(cpp, "$P_exp2($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Exp2 _0") +[__readNone] T exp2(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Exp2 _0") +[__readNone] vector<T,N> exp2(vector<T,N> x) { VECTOR_MAP_UNARY(T, N, exp2, x); @@ -1787,6 +1907,7 @@ vector<T,N> exp2(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T,N,M> exp2(matrix<T,N,M> x) { MATRIX_MAP_UNARY(T, N, M, exp2, x); @@ -1799,10 +1920,12 @@ __glsl_version(420) __target_intrinsic(hlsl) __cuda_sm_version(6.0) __target_intrinsic(cuda, "__half2float(__ushort_as_half($0))") +[__readNone] float f16tof32(uint value); __generic<let N : int> __target_intrinsic(hlsl) +[__readNone] vector<float, N> f16tof32(vector<uint, N> value) { VECTOR_MAP_UNARY(float, N, f16tof32, value); @@ -1816,10 +1939,12 @@ __glsl_version(420) __target_intrinsic(hlsl) __cuda_sm_version(6.0) __target_intrinsic(cuda, "__half_as_ushort(__float2half($0))") +[__readNone] uint f32tof16(float value); __generic<let N : int> __target_intrinsic(hlsl) +[__readNone] vector<uint, N> f32tof16(vector<float, N> value) { VECTOR_MAP_UNARY(uint, N, f32tof16, value); @@ -1833,11 +1958,13 @@ vector<uint, N> f32tof16(vector<float, N> value) __target_intrinsic(glsl, "unpackHalf2x16($0).x") __target_intrinsic(cuda, "__half2float") __glsl_version(420) +[__readNone] float f16tof32(float16_t value); __generic<let N : int> __target_intrinsic(hlsl) __target_intrinsic(cuda, "__half2float") +[__readNone] vector<float, N> f16tof32(vector<float16_t, N> value) { VECTOR_MAP_UNARY(float, N, f16tof32, value); @@ -1847,10 +1974,12 @@ vector<float, N> f16tof32(vector<float16_t, N> value) __target_intrinsic(glsl, "packHalf2x16(vec2($0,0.0))") __glsl_version(420) __target_intrinsic(cuda, "__float2half") +[__readNone] float16_t f32tof16_(float value); __generic<let N : int> __target_intrinsic(cuda, "__float2half") +[__readNone] vector<float16_t, N> f32tof16_(vector<float, N> value) { VECTOR_MAP_UNARY(uint, N, f32tof16, value); @@ -1862,6 +1991,7 @@ vector<float16_t, N> f32tof16_(vector<float, N> value) __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) +[__readNone] vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng) { return dot(ng, i) < T(0.0f) ? n : -n; @@ -1873,12 +2003,14 @@ __target_intrinsic(glsl,"findMSB") __target_intrinsic(cuda, "$P_firstbithigh($0)") __target_intrinsic(cpp, "$P_firstbithigh($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindSMsb _0") +[__readNone] int firstbithigh(int value); __target_intrinsic(hlsl) __target_intrinsic(glsl,"findMSB") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindSMsb _0") __generic<let N : int> +[__readNone] vector<int, N> firstbithigh(vector<int, N> value) { VECTOR_MAP_UNARY(int, N, firstbithigh, value); @@ -1889,12 +2021,14 @@ __target_intrinsic(glsl,"findMSB") __target_intrinsic(cuda, "$P_firstbithigh($0)") __target_intrinsic(cpp, "$P_firstbithigh($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindUMsb _0") +[__readNone] uint firstbithigh(uint value); __target_intrinsic(hlsl) __target_intrinsic(glsl,"findMSB") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindUMsb _0") __generic<let N : int> +[__readNone] vector<uint,N> firstbithigh(vector<uint,N> value) { VECTOR_MAP_UNARY(uint, N, firstbithigh, value); @@ -1906,12 +2040,14 @@ __target_intrinsic(glsl,"findLSB") __target_intrinsic(cuda, "$P_firstbitlow($0)") __target_intrinsic(cpp, "$P_firstbitlow($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindILsb _0") +[__readNone] int firstbitlow(int value); __target_intrinsic(hlsl) __target_intrinsic(glsl,"findLSB") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindILsb _0") __generic<let N : int> +[__readNone] vector<int,N> firstbitlow(vector<int,N> value) { VECTOR_MAP_UNARY(int, N, firstbitlow, value); @@ -1922,12 +2058,14 @@ __target_intrinsic(glsl,"findLSB") __target_intrinsic(cuda, "$P_firstbitlow($0)") __target_intrinsic(cpp, "$P_firstbitlow($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindILsb _0") +[__readNone] uint firstbitlow(uint value); __target_intrinsic(hlsl) __target_intrinsic(glsl,"findLSB") __generic<let N : int> __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindILsb _0") +[__readNone] vector<uint,N> firstbitlow(vector<uint,N> value) { VECTOR_MAP_UNARY(uint, N, firstbitlow, value); @@ -1941,12 +2079,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_floor($0)") __target_intrinsic(cpp, "$P_floor($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Floor _0") +[__readNone] T floor(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Floor _0") +[__readNone] vector<T, N> floor(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, floor, x); @@ -1954,6 +2094,7 @@ vector<T, N> floor(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> floor(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, floor, x); @@ -1965,12 +2106,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_fma($0, $1, $2)") __target_intrinsic(cpp, "$P_fma($0, $1, $2)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2") +[__readNone] double fma(double a, double b, double c); __generic<let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2") +[__readNone] vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c) { VECTOR_MAP_TRINARY(double, N, fma, a, b, c); @@ -1978,6 +2121,7 @@ vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N __generic<let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<double, N, M> fma(matrix<double, N, M> a, matrix<double, N, M> b, matrix<double, N, M> c) { MATRIX_MAP_TRINARY(double, N, M, fma, a, b, c); @@ -1988,6 +2132,7 @@ __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __target_intrinsic(cuda, "$P_fmod($0, $1)") __target_intrinsic(cpp, "$P_fmod($0, $1)") +[__readNone] T fmod(T x, T y) { return x - y * trunc(x/y); @@ -1995,6 +2140,7 @@ T fmod(T x, T y) __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) +[__readNone] vector<T, N> fmod(vector<T, N> x, vector<T, N> y) { VECTOR_MAP_BINARY(T, N, fmod, x, y); @@ -2002,6 +2148,7 @@ vector<T, N> fmod(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> fmod(matrix<T, N, M> x, matrix<T, N, M> y) { MATRIX_MAP_BINARY(T, N, M, fmod, x, y); @@ -2014,18 +2161,21 @@ __target_intrinsic(glsl, fract) __target_intrinsic(cuda, "$P_frac($0)") __target_intrinsic(cpp, "$P_frac($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fract _0") +[__readNone] T frac(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, fract) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fract _0") +[__readNone] vector<T, N> frac(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, frac, x); } __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> +[__readNone] matrix<T, N, M> frac(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, frac, x); @@ -2036,12 +2186,14 @@ __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Frexp _0 _1") +[__readNone] T frexp(T x, out T exp); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Frexp _0 _1") +[__readNone] vector<T, N> frexp(vector<T, N> x, out vector<T, N> exp) { VECTOR_MAP_BINARY(T, N, frexp, x, exp); @@ -2049,6 +2201,7 @@ vector<T, N> frexp(vector<T, N> x, out vector<T, N> exp) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> frexp(matrix<T, N, M> x, out matrix<T, N, M> exp) { MATRIX_MAP_BINARY(T, N, M, frexp, x, exp); @@ -2056,11 +2209,13 @@ matrix<T, N, M> frexp(matrix<T, N, M> x, out matrix<T, N, M> exp) // Texture filter width __generic<T : __BuiltinFloatingPointType> +[__readNone] T fwidth(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) +[__readNone] vector<T, N> fwidth(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, fwidth, x); @@ -2068,6 +2223,7 @@ vector<T, N> fwidth(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> fwidth(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, fwidth, x); @@ -2141,9 +2297,11 @@ matrix<T,N,M> GetAttributeAtVertex(matrix<T,N,M> attribute, uint vertexIndex); // Get number of samples in render target +[__readNone] uint GetRenderTargetSampleCount(); // Get position of given sample +[__readNone] float2 GetRenderTargetSamplePosition(int Index); // Group memory barrier @@ -2284,6 +2442,7 @@ __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __target_intrinsic(cuda, "$P_isfinite($0)") __target_intrinsic(cpp, "$P_isfinite($0)") +[__readNone] bool isfinite(T x) { return !(isinf(x) || isnan(x)); @@ -2291,6 +2450,7 @@ bool isfinite(T x) __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) +[__readNone] vector<bool, N> isfinite(vector<T, N> x) { VECTOR_MAP_UNARY(bool, N, isfinite, x); @@ -2298,6 +2458,7 @@ vector<bool, N> isfinite(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<bool, N, M> isfinite(matrix<T, N, M> x) { MATRIX_MAP_UNARY(bool, N, M, isfinite, x); @@ -2309,11 +2470,13 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_isinf($0)") __target_intrinsic(cpp, "$P_isinf($0)") +[__readNone] bool isinf(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) +[__readNone] vector<bool, N> isinf(vector<T, N> x) { VECTOR_MAP_UNARY(bool, N, isinf, x); @@ -2321,6 +2484,7 @@ vector<bool, N> isinf(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<bool, N, M> isinf(matrix<T, N, M> x) { MATRIX_MAP_UNARY(bool, N, M, isinf, x); @@ -2332,11 +2496,13 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_isnan($0)") __target_intrinsic(cpp, "$P_isnan($0)") +[__readNone] bool isnan(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) +[__readNone] vector<bool, N> isnan(vector<T, N> x) { VECTOR_MAP_UNARY(bool, N, isnan, x); @@ -2344,6 +2510,7 @@ vector<bool, N> isnan(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<bool, N, M> isnan(matrix<T, N, M> x) { MATRIX_MAP_UNARY(bool, N, M, isnan, x); @@ -2354,6 +2521,7 @@ matrix<bool, N, M> isnan(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Ldexp _0 _1") +[__readNone] T ldexp(T x, T exp) { return x * exp2(exp); @@ -2362,6 +2530,7 @@ T ldexp(T x, T exp) __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Ldexp _0 _1") +[__readNone] vector<T, N> ldexp(vector<T, N> x, vector<T, N> exp) { return x * exp2(exp); @@ -2369,6 +2538,7 @@ vector<T, N> ldexp(vector<T, N> x, vector<T, N> exp) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> ldexp(matrix<T, N, M> x, matrix<T, N, M> exp) { MATRIX_MAP_BINARY(T, N, M, ldexp, x, exp); @@ -2379,6 +2549,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Length _0") +[__readNone] T length(vector<T, N> x) { return sqrt(dot(x, x)); @@ -2389,6 +2560,7 @@ __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __target_intrinsic(glsl, mix) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FMix _0 _1 _2") +[__readNone] T lerp(T x, T y, T s) { return x * (T(1.0f) - s) + y * s; @@ -2398,6 +2570,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, mix) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FMix _0 _1 _2") +[__readNone] vector<T, N> lerp(vector<T, N> x, vector<T, N> y, vector<T, N> s) { return x * (T(1.0f) - s) + y * s; @@ -2405,6 +2578,7 @@ vector<T, N> lerp(vector<T, N> x, vector<T, N> y, vector<T, N> s) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s) { MATRIX_MAP_TRINARY(T, N, M, lerp, x, y, s); @@ -2412,6 +2586,7 @@ matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s) // Legacy lighting function (obsolete) __target_intrinsic(hlsl) +[__readNone] float4 lit(float n_dot_l, float n_dot_h, float m) { let ambient = 1.0f; @@ -2427,12 +2602,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_log($0)") __target_intrinsic(cpp, "$P_log($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Log _0") +[__readNone] T log(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Log _0") +[__readNone] vector<T, N> log(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, log, x); @@ -2440,6 +2617,7 @@ vector<T, N> log(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> log(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, log, x); @@ -2452,12 +2630,14 @@ __target_intrinsic(glsl, "(log( $0 ) * $S0( 0.43429448190325182765112891891661) __target_intrinsic(cuda, "$P_log10($0)") __target_intrinsic(cpp, "$P_log10($0)") __target_intrinsic(spirv_direct, "%baseElog = OpExtInst resultType resultId glsl450 Log _0; OpFMul resultType resultId _0 %baseElog const(_p,0.43429448190325182765112891891661)") +[__readNone] T log10(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "(log( $0 ) * $S0(0.43429448190325182765112891891661) )" ) __target_intrinsic(spirv_direct, "%baseElog = OpExtInst resultType resultId glsl450 Log _0; OpVectorTimesScalar resultType resultId _0 %baseElog const(_p,0.43429448190325182765112891891661)") +[__readNone] vector<T,N> log10(vector<T,N> x) { VECTOR_MAP_UNARY(T, N, log10, x); @@ -2465,6 +2645,7 @@ vector<T,N> log10(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T,N,M> log10(matrix<T,N,M> x) { MATRIX_MAP_UNARY(T, N, M, log10, x); @@ -2477,12 +2658,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_log2($0)") __target_intrinsic(cpp, "$P_log2($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Log2 _0") +[__readNone] T log2(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Log2 _0") +[__readNone] vector<T,N> log2(vector<T,N> x) { VECTOR_MAP_UNARY(T, N, log2, x); @@ -2490,6 +2673,7 @@ vector<T,N> log2(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T,N,M> log2(matrix<T,N,M> x) { MATRIX_MAP_UNARY(T, N, M, log2, x); @@ -2503,12 +2687,14 @@ __target_intrinsic(glsl, fma) __target_intrinsic(cuda, "$P_fma($0, $1, $2)") __target_intrinsic(cpp, "$P_fma($0, $1, $2)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2") +[__readNone] T mad(T mvalue, T avalue, T bvalue); __generic<T : __BuiltinArithmeticType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, fma) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2") +[__readNone] vector<T, N> mad(vector<T, N> mvalue, vector<T, N> avalue, vector<T, N> bvalue) { VECTOR_MAP_TRINARY(T, N, mad, mvalue, avalue, bvalue); @@ -2516,6 +2702,7 @@ vector<T, N> mad(vector<T, N> mvalue, vector<T, N> avalue, vector<T, N> bvalue) __generic<T : __BuiltinArithmeticType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> mad(matrix<T, N, M> mvalue, matrix<T, N, M> avalue, matrix<T, N, M> bvalue) { MATRIX_MAP_TRINARY(T, N, M, mad, mvalue, avalue, bvalue); @@ -2528,6 +2715,7 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_max($0, $1)") __target_intrinsic(cpp, "$P_max($0, $1)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMax, UMax, SMax) _0") +[__readNone] T max(T x, T y); // Note: a stdlib implementation of `max` (or `min`) will require splitting // floating-point and integer cases apart, because the floating-point @@ -2538,6 +2726,7 @@ __generic<T : __BuiltinIntegerType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMax, UMax, SMax) _0") +[__readNone] vector<T, N> max(vector<T, N> x, vector<T, N> y) { VECTOR_MAP_BINARY(T, N, max, x, y); @@ -2545,6 +2734,7 @@ vector<T, N> max(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinIntegerType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y) { MATRIX_MAP_BINARY(T, N, M, max, x, y); @@ -2556,12 +2746,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_max($0, $1)") __target_intrinsic(cpp, "$P_max($0, $1)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMax, UMax, SMax) _0") +[__readNone] T max(T x, T y); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMax, UMax, SMax) _0") +[__readNone] vector<T, N> max(vector<T, N> x, vector<T, N> y) { VECTOR_MAP_BINARY(T, N, max, x, y); @@ -2569,6 +2761,7 @@ vector<T, N> max(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y) { MATRIX_MAP_BINARY(T, N, M, max, x, y); @@ -2581,12 +2774,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_min($0, $1)") __target_intrinsic(cpp, "$P_min($0, $1)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0") +[__readNone] T min(T x, T y); __generic<T : __BuiltinIntegerType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0") +[__readNone] vector<T,N> min(vector<T,N> x, vector<T,N> y) { VECTOR_MAP_BINARY(T, N, min, x, y); @@ -2594,6 +2789,7 @@ vector<T,N> min(vector<T,N> x, vector<T,N> y) __generic<T : __BuiltinIntegerType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y) { MATRIX_MAP_BINARY(T, N, M, min, x, y); @@ -2605,12 +2801,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_min($0, $1)") __target_intrinsic(cpp, "$P_min($0, $1)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0") +[__readNone] T min(T x, T y); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0") +[__readNone] vector<T,N> min(vector<T,N> x, vector<T,N> y) { VECTOR_MAP_BINARY(T, N, min, x, y); @@ -2618,6 +2816,7 @@ vector<T,N> min(vector<T,N> x, vector<T,N> y) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y) { MATRIX_MAP_BINARY(T, N, M, min, x, y); @@ -2625,11 +2824,13 @@ matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y) // split into integer and fractional parts (both with same sign) __generic<T : __BuiltinFloatingPointType> +[__readNone] T modf(T x, out T ip); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) +[__readNone] vector<T,N> modf(vector<T,N> x, out vector<T,N> ip) { VECTOR_MAP_BINARY(T, N, modf, x, ip); @@ -2637,6 +2838,7 @@ vector<T,N> modf(vector<T,N> x, out vector<T,N> ip) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip) { MATRIX_MAP_BINARY(T, N, M, modf, x, ip); @@ -2644,6 +2846,7 @@ matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip) // msad4 (whatever that is) __target_intrinsic(hlsl) +[__readNone] uint4 msad4(uint reference, uint2 source, uint4 accum) { int4 bytesRef = (reference >> uint4(24, 16, 8, 0)) & 0xFF; @@ -2665,36 +2868,43 @@ uint4 msad4(uint reference, uint2 source, uint4 accum) // scalar-scalar __generic<T : __BuiltinArithmeticType> __intrinsic_op($(kIROp_Mul)) +[__readNone] T mul(T x, T y); // scalar-vector and vector-scalar __generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op($(kIROp_Mul)) +[__readNone] vector<T, N> mul(vector<T, N> x, T y); __generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op($(kIROp_Mul)) +[__readNone] vector<T, N> mul(T x, vector<T, N> y); // scalar-matrix and matrix-scalar __generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op($(kIROp_Mul)) +[__readNone] matrix<T, N, M> mul(matrix<T, N, M> x, T y); __generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic_op($(kIROp_Mul)) +[__readNone] matrix<T, N, M> mul(T x, matrix<T, N, M> y); // vector-vector (dot product) __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "dot") +[__readNone] T mul(vector<T, N> x, vector<T, N> y) { return dot(x, y); } __generic<T : __BuiltinIntegerType, let N : int> __target_intrinsic(hlsl) +[__readNone] T mul(vector<T, N> x, vector<T, N> y) { return dot(x, y); @@ -2704,6 +2914,7 @@ T mul(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") +[__readNone] vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right) { vector<T,M> result; @@ -2721,6 +2932,7 @@ vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right) __generic<T : __BuiltinIntegerType, let N : int, let M : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") +[__readNone] vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right) { vector<T,M> result; @@ -2738,6 +2950,7 @@ vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right) __generic<T : __BuiltinLogicalType, let N : int, let M : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") +[__readNone] vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right) { vector<T,M> result; @@ -2757,6 +2970,7 @@ vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") +[__readNone] vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right) { vector<T,N> result; @@ -2774,6 +2988,7 @@ vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right) __generic<T : __BuiltinIntegerType, let N : int, let M : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") +[__readNone] vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right) { vector<T,N> result; @@ -2791,6 +3006,7 @@ vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right) __generic<T : __BuiltinLogicalType, let N : int, let M : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") +[__readNone] vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right) { vector<T,N> result; @@ -2810,6 +3026,7 @@ vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right) __generic<T : __BuiltinFloatingPointType, let R : int, let N : int, let C : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") +[__readNone] matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left) { matrix<T,R,C> result; @@ -2828,6 +3045,7 @@ matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left) __generic<T : __BuiltinIntegerType, let R : int, let N : int, let C : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") +[__readNone] matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left) { matrix<T,R,C> result; @@ -2846,6 +3064,7 @@ matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left) __generic<T : __BuiltinLogicalType, let R : int, let N : int, let C : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "($1 * $0)") +[__readNone] matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left) { matrix<T,R,C> result; @@ -2864,11 +3083,13 @@ matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left) // noise (deprecated) +[__readNone] float noise(float x) { return 0; } +[__readNone] __generic<let N : int> float noise(vector<float, N> x) { return 0; @@ -2915,6 +3136,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Normalize _0") +[__readNone] vector<T,N> normalize(vector<T,N> x) { return x / length(x); @@ -2927,12 +3149,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_pow($0, $1)") __target_intrinsic(cpp, "$P_pow($0, $1)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Pow _0 _1") +[__readNone] T pow(T x, T y); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Pow _0 _1") +[__readNone] vector<T, N> pow(vector<T, N> x, vector<T, N> y) { VECTOR_MAP_BINARY(T, N, pow, x, y); @@ -2940,6 +3164,7 @@ vector<T, N> pow(vector<T, N> x, vector<T, N> y) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y) { MATRIX_MAP_BINARY(T, N, M, pow, x, y); @@ -3087,6 +3312,7 @@ __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Radians _0") +[__readNone] T radians(T x) { return x * (T.getPi() / T(180.0f)); @@ -3096,6 +3322,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Radians _0") +[__readNone] vector<T, N> radians(vector<T, N> x) { return x * (T.getPi() / T(180.0f)); @@ -3103,6 +3330,7 @@ vector<T, N> radians(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> radians(matrix<T, N, M> x) { return x * (T.getPi() / T(180.0f)); @@ -3111,6 +3339,7 @@ matrix<T, N, M> radians(matrix<T, N, M> x) // Approximate reciprocal __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) +[__readNone] T rcp(T x) { return T(1.0) / x; @@ -3118,6 +3347,7 @@ T rcp(T x) __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) +[__readNone] vector<T, N> rcp(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, rcp, x); @@ -3125,6 +3355,7 @@ vector<T, N> rcp(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> rcp(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, rcp, x); @@ -3135,6 +3366,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Reflect _0 _1") +[__readNone] vector<T,N> reflect(vector<T,N> i, vector<T,N> n) { return i - T(2) * dot(n,i) * n; @@ -3145,6 +3377,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Refract _0 _1 _2") +[__readNone] vector<T,N> refract(vector<T,N> i, vector<T,N> n, T eta) { let dotNI = dot(n,i); @@ -3158,10 +3391,12 @@ __target_intrinsic(hlsl) __target_intrinsic(glsl, "bitfieldReverse") __target_intrinsic(cuda, "$P_reversebits($0)") __target_intrinsic(cpp, "$P_reversebits($0)") +[__readNone] uint reversebits(uint value); __target_intrinsic(glsl, "bitfieldReverse") __generic<let N : int> +[__readNone] vector<uint, N> reversebits(vector<uint, N> value) { VECTOR_MAP_UNARY(uint, N, reversebits, value); @@ -3174,12 +3409,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_round($0)") __target_intrinsic(cpp, "$P_round($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Round _0") +[__readNone] T round(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Round _0") +[__readNone] vector<T, N> round(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, round, x); @@ -3187,6 +3424,7 @@ vector<T, N> round(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T,N,M> round(matrix<T,N,M> x) { MATRIX_MAP_UNARY(T, N, M, round, x); @@ -3199,6 +3437,7 @@ __target_intrinsic(glsl, "inversesqrt($0)") __target_intrinsic(cuda, "$P_rsqrt($0)") __target_intrinsic(cpp, "$P_rsqrt($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InverseSqrt _0") +[__readNone] T rsqrt(T x) { return T(1.0) / sqrt(x); @@ -3208,6 +3447,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "inversesqrt($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InverseSqrt _0") +[__readNone] vector<T, N> rsqrt(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, rsqrt, x); @@ -3215,6 +3455,7 @@ vector<T, N> rsqrt(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> rsqrt(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, rsqrt, x); @@ -3224,6 +3465,7 @@ matrix<T, N, M> rsqrt(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) +[__readNone] T saturate(T x) { return clamp<T>(x, T(0), T(1)); @@ -3231,6 +3473,7 @@ T saturate(T x) __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) +[__readNone] vector<T,N> saturate(vector<T,N> x) { return clamp<T,N>(x, @@ -3240,6 +3483,7 @@ vector<T,N> saturate(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T,N,M> saturate(matrix<T,N,M> x) { MATRIX_MAP_UNARY(T, N, M, saturate, x); @@ -3252,12 +3496,14 @@ __target_intrinsic(glsl, "int(sign($0))") __target_intrinsic(cuda, "$P_sign($0)") __target_intrinsic(cpp, "$P_sign($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FSign, SSign) _0") +[__readNone] int sign(T x); __generic<T : __BuiltinSignedArithmeticType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, "ivec$N0(sign($0))") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FSign, SSign) _0") +[__readNone] vector<int, N> sign(vector<T, N> x) { VECTOR_MAP_UNARY(int, N, sign, x); @@ -3265,6 +3511,7 @@ vector<int, N> sign(vector<T, N> x) __generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<int, N, M> sign(matrix<T, N, M> x) { MATRIX_MAP_UNARY(int, N, M, sign, x); @@ -3279,12 +3526,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_sin($0)") __target_intrinsic(cpp, "$P_sin($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sin _0") +[__readNone] T sin(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sin _0") +[__readNone] vector<T, N> sin(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, sin, x); @@ -3292,6 +3541,7 @@ vector<T, N> sin(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> sin(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, sin, x); @@ -3301,6 +3551,7 @@ matrix<T, N, M> sin(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __target_intrinsic(cuda, "$P_sincos($0, $1, $2)") +[__readNone] void sincos(T x, out T s, out T c) { s = sin(x); @@ -3309,6 +3560,7 @@ void sincos(T x, out T s, out T c) __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) +[__readNone] void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c) { s = sin(x); @@ -3317,6 +3569,7 @@ void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] void sincos(matrix<T,N,M> x, out matrix<T,N,M> s, out matrix<T,N,M> c) { s = sin(x); @@ -3330,12 +3583,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_sinh($0)") __target_intrinsic(cpp, "$P_sinh($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sinh _0") +[__readNone] T sinh(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sinh _0") +[__readNone] vector<T, N> sinh(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, sinh, x); @@ -3343,6 +3598,7 @@ vector<T, N> sinh(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> sinh(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, sinh, x); @@ -3353,6 +3609,7 @@ __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 SmoothStep _0 _1 _2") +[__readNone] T smoothstep(T min, T max, T x) { let t = saturate((x - min) / (max - min)); @@ -3363,6 +3620,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 SmoothStep _0 _1 _2") +[__readNone] vector<T, N> smoothstep(vector<T, N> min, vector<T, N> max, vector<T, N> x) { VECTOR_MAP_TRINARY(T, N, smoothstep, min, max, x); @@ -3370,6 +3628,7 @@ vector<T, N> smoothstep(vector<T, N> min, vector<T, N> max, vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> smoothstep(matrix<T, N, M> min, matrix<T, N, M> max, matrix<T, N, M> x) { MATRIX_MAP_TRINARY(T, N, M, smoothstep, min, max, x); @@ -3382,12 +3641,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_sqrt($0)") __target_intrinsic(cpp, "$P_sqrt($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sqrt _0") +[__readNone] T sqrt(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sqrt _0") +[__readNone] vector<T, N> sqrt(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, sqrt, x); @@ -3395,6 +3656,7 @@ vector<T, N> sqrt(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> sqrt(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, sqrt, x); @@ -3405,6 +3667,7 @@ __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Step _0 _1") +[__readNone] T step(T y, T x) { return x < y ? T(0.0f) : T(1.0f); @@ -3414,6 +3677,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Step _0 _1") +[__readNone] vector<T,N> step(vector<T,N> y, vector<T,N> x) { VECTOR_MAP_BINARY(T, N, step, y, x); @@ -3421,6 +3685,7 @@ vector<T,N> step(vector<T,N> y, vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> step(matrix<T, N, M> y, matrix<T, N, M> x) { MATRIX_MAP_BINARY(T, N, M, step, y, x); @@ -3433,12 +3698,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_tan($0)") __target_intrinsic(cpp, "$P_tan($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Tan _0") +[__readNone] T tan(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Tan _0") +[__readNone] vector<T, N> tan(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, tan, x); @@ -3446,6 +3713,7 @@ vector<T, N> tan(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> tan(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, tan, x); @@ -3458,12 +3726,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_tanh($0)") __target_intrinsic(cpp, "$P_tanh($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Tanh _0") +[__readNone] T tanh(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Tanh _0") +[__readNone] vector<T,N> tanh(vector<T,N> x) { VECTOR_MAP_UNARY(T, N, tanh, x); @@ -3471,6 +3741,7 @@ vector<T,N> tanh(vector<T,N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T,N,M> tanh(matrix<T,N,M> x) { MATRIX_MAP_UNARY(T, N, M, tanh, x); @@ -3480,6 +3751,7 @@ matrix<T,N,M> tanh(matrix<T,N,M> x) __generic<T : __BuiltinType, let N : int, let M : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) +[__readNone] matrix<T, M, N> transpose(matrix<T, N, M> x) { matrix<T,M,N> result; @@ -3496,12 +3768,14 @@ __target_intrinsic(glsl) __target_intrinsic(cuda, "$P_trunc($0)") __target_intrinsic(cpp, "$P_trunc($0)") __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Trunc _0") +[__readNone] T trunc(T x); __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) __target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Trunc _0") +[__readNone] vector<T, N> trunc(vector<T, N> x) { VECTOR_MAP_UNARY(T, N, trunc, x); @@ -3509,6 +3783,7 @@ vector<T, N> trunc(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) +[__readNone] matrix<T, N, M> trunc(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, trunc, x); @@ -4779,6 +5054,7 @@ void __executeCallable(uint shaderIndex, int payloadLocation); __generic<Payload> __target_intrinsic(__glslRayTracing, "$XC") [__readNone] +[__AlwaysFoldIntoUseSiteAttribute] int __callablePayloadLocation(__ref Payload payload); // Now we provide a hard-coded definition of `CallShader()` for GLSL-based @@ -4834,6 +5110,7 @@ void __traceRay( __generic<Payload> __target_intrinsic(__glslRayTracing, "$XP") [__readNone] +[__AlwaysFoldIntoUseSiteAttribute] int __rayPayloadLocation(__ref Payload payload); __generic<payload_t> @@ -5677,6 +5954,7 @@ Ref<T> __hitObjectAttributes<T>() __generic<Attributes> __target_intrinsic(__glslRayTracing, "$XH") [__readNone] +[__AlwaysFoldIntoUseSiteAttribute] int __hitObjectAttributesLocation(__ref Attributes attributes); /// Immutable data type representing a ray hit or a miss. Can be used to invoke hit or miss shading, diff --git a/source/slang/slang-ast-modifier.h b/source/slang/slang-ast-modifier.h index 99e221b1e..6ac464784 100644 --- a/source/slang/slang-ast-modifier.h +++ b/source/slang/slang-ast-modifier.h @@ -1083,6 +1083,14 @@ class RequiresNVAPIAttribute : public Attribute SLANG_AST_CLASS(RequiresNVAPIAttribute) }; + + /// A `[__AlwaysFoldIntoUseSite]` attribute indicates that the calls into the modified + /// function should always be folded into use sites during source emit. +class AlwaysFoldIntoUseSiteAttribute :public Attribute +{ + SLANG_AST_CLASS(AlwaysFoldIntoUseSiteAttribute) +}; + /// The `[ForwardDifferentiable]` attribute indicates that a function can be forward-differentiated. class ForwardDifferentiableAttribute : public DifferentiableAttribute { diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index c664449e5..7840dc450 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -1244,14 +1244,24 @@ bool CLikeSourceEmitter::shouldFoldInstIntoUseSites(IRInst* inst) return true; } + // Always hold if inst is a call into an [__alwaysFoldIntoUseSite] function. + if (auto call = as<IRCall>(inst)) + { + auto callee = call->getCallee(); + if (getResolvedInstForDecorations(callee)->findDecoration<IRAlwaysFoldIntoUseSiteDecoration>()) + { + return true; + } + } + // Having dealt with all of the cases where we *must* fold things // above, we can now deal with the more general cases where we // *should not* fold things. - // Don't fold something with no users: if(!inst->hasUses()) return false; + // Don't fold something that has multiple users: if(inst->hasMoreThanOneUse()) return false; diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h index ff229c38b..1cd2045c7 100644 --- a/source/slang/slang-emit-c-like.h +++ b/source/slang/slang-emit-c-like.h @@ -326,7 +326,7 @@ public: void emitSimpleValue(IRInst* inst) { emitSimpleValueImpl(inst); } - bool shouldFoldInstIntoUseSites(IRInst* inst); + virtual bool shouldFoldInstIntoUseSites(IRInst* inst); void emitOperand(IRInst* inst, EmitOpInfo const& outerPrec) { emitOperandImpl(inst, outerPrec); } diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp index ba6b26ec6..795ec74b0 100644 --- a/source/slang/slang-emit-cpp.cpp +++ b/source/slang/slang-emit-cpp.cpp @@ -1557,6 +1557,46 @@ void CPPSourceEmitter::emitGlobalInstImpl(IRInst* inst) } } +bool CPPSourceEmitter::shouldFoldInstIntoUseSites(IRInst* inst) +{ + bool result = Super::shouldFoldInstIntoUseSites(inst); + if (!result) + return result; + if (as<IRVectorType>(inst->getDataType()) || as<IRMatrixType>(inst->getDataType())) + { + // If a vector value is being used in a reshape/cast, + // we should not fold it because the implementation of cast will have multiple references to it. + for (auto use = inst->firstUse; use; use = use->nextUse) + { + switch (use->getUser()->getOp()) + { + case kIROp_MatrixReshape: + case kIROp_VectorReshape: + case kIROp_IntCast: + case kIROp_FloatCast: + case kIROp_CastIntToFloat: + case kIROp_CastFloatToInt: + return false; + default: + break; + } + } + switch (inst->getOp()) + { + case kIROp_MatrixReshape: + case kIROp_VectorReshape: + case kIROp_IntCast: + case kIROp_FloatCast: + case kIROp_CastIntToFloat: + case kIROp_CastFloatToInt: + return false; + default: + break; + } + } + return true; +} + static bool _isExported(IRInst* inst) { for (auto decoration : inst->getDecorations()) diff --git a/source/slang/slang-emit-cpp.h b/source/slang/slang-emit-cpp.h index 92780e0a4..71c382f87 100644 --- a/source/slang/slang-emit-cpp.h +++ b/source/slang/slang-emit-cpp.h @@ -71,6 +71,7 @@ protected: virtual void emitFuncDecorationsImpl(IRFunc* func) SLANG_OVERRIDE; virtual void emitVarDecorationsImpl(IRInst* var) SLANG_OVERRIDE; virtual void emitGlobalInstImpl(IRInst* inst) SLANG_OVERRIDE; + virtual bool shouldFoldInstIntoUseSites(IRInst* inst) SLANG_OVERRIDE; const UnownedStringSlice* getVectorElementNames(BaseType elemType, Index elemCount); diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index e2f00bf88..a25fae5ae 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -891,8 +891,8 @@ Result linkAndOptimizeIR( } } - // Run a final round of DCE to clean up unused things after phi-elimination. - eliminateDeadCode(irModule); + // Run a final round of simplifications to clean up unused things after phi-elimination. + simplifyNonSSAIR(irModule); // We include one final step to (optionally) dump the IR and validate // it after all of the optimization passes are complete. This should diff --git a/source/slang/slang-ir-autodiff-unzip.cpp b/source/slang/slang-ir-autodiff-unzip.cpp index 096751836..a05fe7044 100644 --- a/source/slang/slang-ir-autodiff-unzip.cpp +++ b/source/slang/slang-ir-autodiff-unzip.cpp @@ -559,6 +559,7 @@ IRFunc* DiffUnzipPass::extractPrimalFunc( { if (inst->getOp() == kIROp_Call) { + // The primal calls should be marked as no side effect so they can be DCE'd if possible. builder.addSimpleDecoration<IRNoSideEffectDecoration>(inst); } } diff --git a/source/slang/slang-ir-autodiff.h b/source/slang/slang-ir-autodiff.h index fa01d50ae..a4eb94461 100644 --- a/source/slang/slang-ir-autodiff.h +++ b/source/slang/slang-ir-autodiff.h @@ -212,20 +212,12 @@ struct DifferentiableTypeConformanceContext IRInst* getZeroMethodForType(IRBuilder* builder, IRType* origType) { auto result = lookUpInterfaceMethod(builder, origType, sharedContext->zeroMethodStructKey); - if (result && !result->findDecoration<IRNoSideEffectDecoration>()) - { - builder->addDecoration(result, kIROp_NoSideEffectDecoration); - } return result; } IRInst* getAddMethodForType(IRBuilder* builder, IRType* origType) { auto result = lookUpInterfaceMethod(builder, origType, sharedContext->addMethodStructKey); - if (result && !result->findDecoration<IRNoSideEffectDecoration>()) - { - builder->addDecoration(result, kIROp_NoSideEffectDecoration); - } return result; } }; diff --git a/source/slang/slang-ir-dce.cpp b/source/slang/slang-ir-dce.cpp index 58c9b23f1..e5c9b1fdb 100644 --- a/source/slang/slang-ir-dce.cpp +++ b/source/slang/slang-ir-dce.cpp @@ -24,6 +24,11 @@ struct DeadCodeEliminationContext // These uses will be replaced with `undefInst`. IRInst* undefInst = nullptr; + // Track if we have removed any phi parameters. + // If so we need to rerun dce pass because after removing them + // there could be new DCE opportunities. + bool phiRemoved = false; + // Our overall process is going to be to determine // which instructions in the module are "live" // and then eliminate anything that wasn't found to @@ -98,104 +103,115 @@ struct DeadCodeEliminationContext bool processInst(IRInst* root) { - // First of all, we know that the root instruction - // should be considered as live, because otherwise - // we'd end up eliminating it, so that is a - // good place to start. - // - markInstAsLive(root); - - // Ensure there is a global undef inst that is always alive. - // This undef inst will be used to fill in weak-referencing uses - // whose used value is marked as dead and eliminated. - // We always make sure this undef inst is available to prevent - // infiniate oscilating loops. - markInstAsLive(getUndefInst()); - - // Marking the module as live should have - // seeded our work list, so we can now start - // processing entries off of our work list - // until it goes dry. - // - while (workList.getCount()) + bool result = false; + for (;;) { - auto inst = workList.getLast(); - workList.removeLast(); + liveInsts.Clear(); + workList.clear(); - if (!isChildInstOf(inst, root)) - continue; - - // At this point we know that `inst` is live, - // and we want to start considering which other - // instructions must be live because of that - // knowlege. - // - // A first easy case is that the parent (if any) - // of a live instruction had better be live, or - // else we might delete the parent, and - // the child with it. + // First of all, we know that the root instruction + // should be considered as live, because otherwise + // we'd end up eliminating it, so that is a + // good place to start. // - markInstAsLive(inst->getParent()); - - // Next the type of a live instruction, and all - // of its operands must also be live, or else - // we won't be able to compute its value. + markInstAsLive(root); + + // Ensure there is a global undef inst that is always alive. + // This undef inst will be used to fill in weak-referencing uses + // whose used value is marked as dead and eliminated. + // We always make sure this undef inst is available to prevent + // infiniate oscilating loops. + markInstAsLive(getUndefInst()); + + // Marking the module as live should have + // seeded our work list, so we can now start + // processing entries off of our work list + // until it goes dry. // - markInstAsLive(inst->getFullType()); - UInt operandCount = inst->getOperandCount(); - for (UInt ii = 0; ii < operandCount; ++ii) + while (workList.getCount()) { - // There are some type of operands that needs to be treated as - // "weak" references -- they can never hold things alive, and - // whenever we delete the referenced value, these operands needs - // to be replaced with `undef`. - if (!isWeakReferenceOperand(inst, ii)) - markInstAsLive(inst->getOperand(ii)); - } + auto inst = workList.getLast(); + workList.removeLast(); + + if (!isChildInstOf(inst, root)) + continue; + + // At this point we know that `inst` is live, + // and we want to start considering which other + // instructions must be live because of that + // knowlege. + // + // A first easy case is that the parent (if any) + // of a live instruction had better be live, or + // else we might delete the parent, and + // the child with it. + // + markInstAsLive(inst->getParent()); + + // Next the type of a live instruction, and all + // of its operands must also be live, or else + // we won't be able to compute its value. + // + markInstAsLive(inst->getFullType()); + UInt operandCount = inst->getOperandCount(); + for (UInt ii = 0; ii < operandCount; ++ii) + { + // There are some type of operands that needs to be treated as + // "weak" references -- they can never hold things alive, and + // whenever we delete the referenced value, these operands needs + // to be replaced with `undef`. + if (!isWeakReferenceOperand(inst, ii)) + markInstAsLive(inst->getOperand(ii)); + } - // Finally, we need to consider the children - // and decorations of the instruction. - // - // Note that just because an instruction is - // live doesn't mean its children must be, or - // else we'd never eliminate *anything* (we - // marked the whole module as live, and everything - // is a transitive child of the module). - // - // Decorations, in contrast, are always live if their - // parents are (because we don't want to silently drop - // decorations). It is still important to *mark* - // decorations as live, because they have operands, - // and those operands need to be marked as live. - // We will fold decorations into the same loop - // as children for simplicity. - // - // To keep the code here simple, we'll defer the - // decision of whether a child (or decoration) - // should be live when its parent is to a subroutine. - // - for (auto child : inst->getDecorationsAndChildren()) - { - if (shouldInstBeLiveIfParentIsLive(child)) + // Finally, we need to consider the children + // and decorations of the instruction. + // + // Note that just because an instruction is + // live doesn't mean its children must be, or + // else we'd never eliminate *anything* (we + // marked the whole module as live, and everything + // is a transitive child of the module). + // + // Decorations, in contrast, are always live if their + // parents are (because we don't want to silently drop + // decorations). It is still important to *mark* + // decorations as live, because they have operands, + // and those operands need to be marked as live. + // We will fold decorations into the same loop + // as children for simplicity. + // + // To keep the code here simple, we'll defer the + // decision of whether a child (or decoration) + // should be live when its parent is to a subroutine. + // + for (auto child : inst->getDecorationsAndChildren()) { - // In this case, we know `inst` is live and - // its `child` should be live if its parent is, - // so the `child` must be live too. - // - markInstAsLive(child); + if (shouldInstBeLiveIfParentIsLive(child)) + { + // In this case, we know `inst` is live and + // its `child` should be live if its parent is, + // so the `child` must be live too. + // + markInstAsLive(child); + } } } - } - // If our work list runs dry, that means we've reached a steady - // state where everything that is transitively relevant to - // the "outputs" of the module has been marked as live. - // - // Now we can simply walk through all of our instructions - // recursively and eliminate those that are "dead" by - // virtue of not having been found live. - // - return eliminateDeadInstsRec(root); + // If our work list runs dry, that means we've reached a steady + // state where everything that is transitively relevant to + // the "outputs" of the module has been marked as live. + // + // Now we can simply walk through all of our instructions + // recursively and eliminate those that are "dead" by + // virtue of not having been found live. + // + phiRemoved = false; + result |= eliminateDeadInstsRec(root); + if (!phiRemoved) + break; + } + return result; } // Given the basic infrastructrure above, let's @@ -207,6 +223,25 @@ struct DeadCodeEliminationContext return processInst(module->getModuleInst()); } + void removePhiArgs(IRInst* phiParam) + { + auto block = cast<IRBlock>(phiParam->getParent()); + UInt paramIndex = 0; + for (auto p = block->getFirstParam(); p; p = p->getNextParam()) + { + if (p == phiParam) + break; + paramIndex++; + } + for (auto predBlock : block->getPredecessors()) + { + auto termInst = as<IRUnconditionalBranch>(predBlock->getTerminator()); + SLANG_ASSERT(paramIndex < termInst->getArgCount()); + termInst->removeArgument(paramIndex); + } + phiRemoved = true; + } + bool eliminateDeadInstsRec(IRInst* inst) { bool changed = false; @@ -226,6 +261,12 @@ struct DeadCodeEliminationContext { inst->replaceUsesWith(getUndefInst()); } + + if (inst->getOp() == kIROp_Param) + { + // For Phi parameters, we need to update all branch arguments. + removePhiArgs(inst); + } inst->removeAndDeallocate(); changed = true; } @@ -261,6 +302,16 @@ struct DeadCodeEliminationContext } }; +bool isFirstBlock(IRInst* inst) +{ + auto block = as<IRBlock>(inst); + if (!block) + return false; + if (!block->getParent()) + return false; + return block->getParent()->getFirstBlock() == block; +} + bool shouldInstBeLiveIfParentIsLive(IRInst* inst, IRDeadCodeEliminationOptions options) { // The main source of confusion/complexity here is that @@ -275,7 +326,31 @@ bool shouldInstBeLiveIfParentIsLive(IRInst* inst, IRDeadCodeEliminationOptions o // when it is executed, then we should keep it around. // if (inst->mightHaveSideEffects()) - return true; + { + // If the inst has side effect, we should keep it alive. + // An exception is if we have a call to a pure function + // that writes its output to a local variable, but we + // don't have any uses of that local variable. + auto call = as<IRCall>(inst); + if (!call) + return true; + if (!getResolvedInstForDecorations(call->getCallee())->findDecoration<IRReadNoneDecoration>()) + return true; + auto parentFunc = getParentFunc(inst); + if (!parentFunc) + return true; + for (UInt i = 0; i < call->getArgCount(); i++) + { + auto arg = call->getArg(i); + if (getParentFunc(arg) != parentFunc) + return true; + if (arg->getOp() != kIROp_Var) + return true; + if (arg->hasMoreThanOneUse()) + return true; + } + return false; + } // // The `mightHaveSideEffects` query is conservative, and will // return `true` as its default mode, so once we are past that @@ -352,17 +427,10 @@ bool shouldInstBeLiveIfParentIsLive(IRInst* inst, IRDeadCodeEliminationOptions o switch (inst->getOp()) { // Function parameters obviously shouldn't get eliminated, - // even if nothing references them, and block parameters - // (phi nodes) will be considered live when their block is, - // just so that we don't have to deal with any complications - // around re-writing the relevant inter-block argument passing. - // - // TODO: A smarter DCE pass could deal with this case more - // carefully, or we could improve the interprocedural SCCP - // pass to deal with block parameters instead. + // even if nothing references them. // case kIROp_Param: - return true; + return isFirstBlock(inst->getParent()); // IR struct types and witness tables are currently kludged // so that they have child instructions that represent their diff --git a/source/slang/slang-ir-glsl-legalize.cpp b/source/slang/slang-ir-glsl-legalize.cpp index e111a548b..9c16f40ac 100644 --- a/source/slang/slang-ir-glsl-legalize.cpp +++ b/source/slang/slang-ir-glsl-legalize.cpp @@ -2027,8 +2027,8 @@ void legalizeMeshOutputParam( IRBuilderInsertLocScope locScope{builder}; builder->setInsertBefore(p); - auto e = builder->emitElementAddress(meshOutputBlockType, blockParam, p->getIndex()); - auto a = builder->emitFieldAddress(builtin.type, e, builtin.key); + auto e = builder->emitElementAddress(builder->getPtrType(meshOutputBlockType), blockParam, p->getIndex()); + auto a = builder->emitFieldAddress(builder->getPtrType(builtin.type), e, builtin.key); p->replaceUsesWith(a); }); diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h index 4dea3985a..4b1037240 100644 --- a/source/slang/slang-ir-inst-defs.h +++ b/source/slang/slang-ir-inst-defs.h @@ -728,6 +728,9 @@ INST(HighLevelDeclDecoration, highLevelDecl, 1, 0) /// Applie to an IR function and signals that inlining should not be performed unless unavoidable. INST(NoInlineDecoration, noInline, 0, 0) + /// A call to the decorated function should always be folded into its use site. + INST(AlwaysFoldIntoUseSiteDecoration, alwaysFold, 0, 0) + INST(PayloadDecoration, payload, 0, 0) /* Mesh Shader outputs */ diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index fe20f17f5..f2e4e05d3 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -325,6 +325,7 @@ IR_SIMPLE_DECORATION(HLSLExportDecoration) IR_SIMPLE_DECORATION(KeepAliveDecoration) IR_SIMPLE_DECORATION(RequiresNVAPIDecoration) IR_SIMPLE_DECORATION(NoInlineDecoration) +IR_SIMPLE_DECORATION(AlwaysFoldIntoUseSiteDecoration) struct IRNVAPIMagicDecoration : IRDecoration { @@ -1925,7 +1926,7 @@ struct IRUnconditionalBranch : IRTerminatorInst UInt getArgCount(); IRUse* getArgs(); IRInst* getArg(UInt index); - + void removeArgument(UInt index); IR_PARENT_ISA(UnconditionalBranch); }; @@ -1968,20 +1969,6 @@ struct IRConditionalBranch : IRTerminatorInst IRBlock* getFalseBlock() { return (IRBlock*)falseBlock.get(); } }; -// A conditional branch that represent the test inside a loop -struct IRLoopTest : IRConditionalBranch -{ -}; - -// A conditional branch that represents a one-sided `if`: -// -// if( <condition> ) { <trueBlock> } -// <falseBlock> -struct IRIf : IRConditionalBranch -{ - IRBlock* getAfterBlock() { return getFalseBlock(); } -}; - // A conditional branch that represents a two-sided `if`: // // if( <condition> ) { <trueBlock> } @@ -3361,6 +3348,7 @@ public: IRInst* emitBitOr(IRType* type, IRInst* left, IRInst* right); IRInst* emitBitNot(IRType* type, IRInst* value); IRInst* emitNeg(IRType* type, IRInst* value); + IRInst* emitNot(IRType* type, IRInst* value); IRInst* emitAdd(IRType* type, IRInst* left, IRInst* right); IRInst* emitSub(IRType* type, IRInst* left, IRInst* right); diff --git a/source/slang/slang-ir-loop-unroll.cpp b/source/slang/slang-ir-loop-unroll.cpp index 79b00f60a..2f689ebde 100644 --- a/source/slang/slang-ir-loop-unroll.cpp +++ b/source/slang/slang-ir-loop-unroll.cpp @@ -47,7 +47,7 @@ static bool _eliminateDeadBlocks(List<IRBlock*>& blocks, IRBlock* unreachableBlo return changed; } -List<IRBlock*> _collectBlocksInLoop(Dictionary<IRBlock*, int>& blockOrdering, IRLoop* loopInst) +List<IRBlock*> _collectBlocksInLoop(IRDominatorTree* dom, IRLoop* loopInst) { List<IRBlock*> loopBlocks; HashSet<IRBlock*> loopBlocksSet; @@ -58,7 +58,6 @@ List<IRBlock*> _collectBlocksInLoop(Dictionary<IRBlock*, int>& blockOrdering, IR }; auto firstBlock = as<IRBlock>(loopInst->block.get()); auto breakBlock = as<IRBlock>(loopInst->breakBlock.get()); - auto breakBlockOrdering = blockOrdering[breakBlock].GetValue(); addBlock(firstBlock); for (Index i = 0; i < loopBlocks.getCount(); i++) @@ -68,18 +67,19 @@ List<IRBlock*> _collectBlocksInLoop(Dictionary<IRBlock*, int>& blockOrdering, IR { if (succ == breakBlock) continue; - auto successorOrdering = blockOrdering[block].GetValue(); - // The target must be post-dominated by the break block in order to be considered - // the body of the loop. - // Since we don't support arbitrary goto or multi-level continue, the simple - // ordering comparison is sufficient to serve as a post-dominance check. - if (successorOrdering < breakBlockOrdering) + if (dom->dominates(firstBlock, succ) && !dom->dominates(breakBlock, succ)) addBlock(succ); } } return loopBlocks; } +List<IRBlock*> collectBlocksInLoop(IRGlobalValueWithCode* func, IRLoop* loopInst) +{ + auto dom = computeDominatorTree(func); + return _collectBlocksInLoop(dom, loopInst); +} + static int _getLoopMaxIterationsToUnroll(IRLoop* loopInst) { static constexpr int kMaxIterationsToAttempt = 100; @@ -483,15 +483,7 @@ bool unrollLoopsInFunc( // Remove any continue jumps from the loop. eliminateContinueBlocks(module, loop); - auto postOrderReverseCFG = getPostorderOnReverseCFG(func); - Dictionary<IRBlock*, int> blockOrdering; - - for (Index i = 0; i < postOrderReverseCFG.getCount(); i++) - { - blockOrdering[postOrderReverseCFG[i]] = (int)i; - } - - auto blocks = _collectBlocksInLoop(blockOrdering, loop); + auto blocks = collectBlocksInLoop(func, loop); auto loopLoc = loop->sourceLoc; if (!_unrollLoop(module, loop, blocks)) { diff --git a/source/slang/slang-ir-loop-unroll.h b/source/slang/slang-ir-loop-unroll.h index d9c31e6be..6f7a41192 100644 --- a/source/slang/slang-ir-loop-unroll.h +++ b/source/slang/slang-ir-loop-unroll.h @@ -1,18 +1,22 @@ // slang-ir-loop-unroll.h #pragma once +#include "../core/slang-list.h" + namespace Slang { struct IRLoop; struct IRGlobalValueWithCode; class DiagnosticSink; struct IRModule; + struct IRBlock; // Return true if successfull, false if errors occurred. bool unrollLoopsInFunc(IRModule* module, IRGlobalValueWithCode* func, DiagnosticSink* sink); bool unrollLoopsInModule(IRModule* module, DiagnosticSink* sink); + List<IRBlock*> collectBlocksInLoop(IRGlobalValueWithCode* func, IRLoop* loop); // Turn a loop with continue block into a loop with only back jumps and breaks. // Each iteration will be wrapped in a breakable region, where everything before `continue` diff --git a/source/slang/slang-ir-propagate-func-properties.cpp b/source/slang/slang-ir-propagate-func-properties.cpp new file mode 100644 index 000000000..f98a77fc7 --- /dev/null +++ b/source/slang/slang-ir-propagate-func-properties.cpp @@ -0,0 +1,186 @@ +#include "slang-ir-propagate-func-properties.h" + +#include "slang-ir.h" +#include "slang-ir-insts.h" +#include "slang-ir-util.h" + + +namespace Slang +{ +bool propagateFuncProperties(IRModule* module) +{ + bool result = false; + List<IRFunc*> workList; + HashSet<IRFunc*> workListSet; + + auto addToWorkList = [&](IRFunc* f) + { + if (workListSet.Add(f)) + workList.add(f); + }; + auto addCallersToWorkList = [&](IRFunc* f) + { + if (auto g = findOuterGeneric(f)) + { + for (auto use = g->firstUse; use; use = use->nextUse) + { + if (use->getUser()->getOp() == kIROp_Specialize) + { + auto specialize = use->getUser(); + for (auto iuse = specialize->firstUse; iuse; iuse = iuse->nextUse) + { + if (auto userFunc = getParentFunc(iuse->getUser())) + addToWorkList(userFunc); + } + } + } + return; + } + for (auto use = f->firstUse; use; use = use->nextUse) + { + if (use->getUser()->getOp() == kIROp_Call) + { + if (auto userFunc = getParentFunc(use->getUser())) + addToWorkList(userFunc); + } + } + }; + for (;;) + { + bool changed = false; + workList.clear(); + workListSet.Clear(); + + // Add side effect free functions and their transitive callers to work list. + for (auto inst : module->getGlobalInsts()) + { + auto genericInst = as<IRGeneric>(inst); + if (genericInst) + { + inst = findGenericReturnVal(genericInst); + } + if (auto func = as<IRFunc>(inst)) + { + if (func->findDecoration<IRReadNoneDecoration>()) + { + addCallersToWorkList(func); + } + } + } + + // Add remaining functions to work list. + for (auto inst : module->getGlobalInsts()) + { + auto genericInst = as<IRGeneric>(inst); + if (genericInst) + { + inst = findGenericReturnVal(genericInst); + } + if (auto func = as<IRFunc>(inst)) + { + addToWorkList(func); + } + } + + IRBuilder builder(module); + + for (Index i = 0; i < workList.getCount(); i++) + { + auto f = workList[i]; + bool hasSideEffectCall = false; + if (f->findDecoration<IRReadNoneDecoration>()) + continue; + // Never propagate to functions without a body. + if (f->getFirstBlock() == nullptr) + continue; + if (f->findDecoration<IRTargetIntrinsicDecoration>()) + continue; + for (auto block : f->getBlocks()) + { + for (auto inst : block->getChildren()) + { + // Is this inst known to not have global side effect/analyzable? + if (inst->mightHaveSideEffects()) + { + switch (inst->getOp()) + { + case kIROp_ifElse: + case kIROp_unconditionalBranch: + case kIROp_Switch: + case kIROp_Return: + case kIROp_loop: + case kIROp_Store: + case kIROp_Call: + case kIROp_Param: + case kIROp_Unreachable: + break; + default: + // We have a inst that has side effect and is not understood by this method. + // e.g. bufferStore, discard, etc. + return true; + } + } + + if (auto call = as<IRCall>(inst)) + { + auto callee = getResolvedInstForDecorations(call->getCallee()); + switch (callee->getOp()) + { + default: + // We are calling an unknown function, so we have to assume + // there are side effects in the call. + hasSideEffectCall = true; + break; + case kIROp_Func: + if (!callee->findDecoration<IRReadNoneDecoration>()) + { + hasSideEffectCall = true; + break; + } + } + } + + // Are any operands defined in global scope? + for (UInt o = 0; o < inst->getOperandCount(); o++) + { + auto operand = inst->getOperand(o); + if (getParentFunc(operand) == f) + continue; + if (as<IRConstant>(operand)) + continue; + if (as<IRType>(operand)) + continue; + switch (operand->getOp()) + { + case kIROp_Specialize: + case kIROp_LookupWitness: + case kIROp_StructKey: + case kIROp_WitnessTable: + case kIROp_WitnessTableEntry: + case kIROp_undefined: + case kIROp_Func: + continue; + default: + break; + } + hasSideEffectCall = true; + break; + } + } + if (hasSideEffectCall) + break; + } + if (!hasSideEffectCall) + { + builder.addDecoration(f, kIROp_ReadNoneDecoration); + addCallersToWorkList(f); + changed = true; + } + } + result |= changed; + if (!changed) + break; + } + return result; +} +} diff --git a/source/slang/slang-ir-propagate-func-properties.h b/source/slang/slang-ir-propagate-func-properties.h new file mode 100644 index 000000000..6df2de18e --- /dev/null +++ b/source/slang/slang-ir-propagate-func-properties.h @@ -0,0 +1,7 @@ +#pragma once + +namespace Slang +{ +struct IRModule; +bool propagateFuncProperties(IRModule* module); +} diff --git a/source/slang/slang-ir-redundancy-removal.cpp b/source/slang/slang-ir-redundancy-removal.cpp index f3996fc01..2a2047de9 100644 --- a/source/slang/slang-ir-redundancy-removal.cpp +++ b/source/slang/slang-ir-redundancy-removal.cpp @@ -8,10 +8,118 @@ namespace Slang struct RedundancyRemovalContext { RefPtr<IRDominatorTree> dom; - bool removeRedundancyInBlock(DeduplicateContext& deduplicateContext, IRBlock* block) + bool isMovableInst(IRInst* inst) + { + switch (inst->getOp()) + { + case kIROp_Add: + case kIROp_Sub: + case kIROp_Mul: + case kIROp_Div: + case kIROp_FRem: + case kIROp_IRem: + case kIROp_Lsh: + case kIROp_Rsh: + case kIROp_And: + case kIROp_Or: + case kIROp_Not: + case kIROp_FieldExtract: + case kIROp_FieldAddress: + case kIROp_GetElement: + case kIROp_GetElementPtr: + case kIROp_UpdateElement: + case kIROp_OptionalHasValue: + case kIROp_GetOptionalValue: + case kIROp_MakeOptionalValue: + case kIROp_MakeTuple: + case kIROp_GetTupleElement: + case kIROp_MakeStruct: + case kIROp_MakeArray: + case kIROp_MakeArrayFromElement: + case kIROp_MakeVector: + case kIROp_MakeMatrix: + case kIROp_MakeMatrixFromScalar: + case kIROp_MakeVectorFromScalar: + case kIROp_swizzle: + case kIROp_MatrixReshape: + case kIROp_MakeString: + case kIROp_MakeResultError: + case kIROp_MakeResultValue: + case kIROp_GetResultError: + case kIROp_GetResultValue: + case kIROp_CastFloatToInt: + case kIROp_CastIntToFloat: + case kIROp_CastIntToPtr: + case kIROp_CastPtrToBool: + case kIROp_CastPtrToInt: + case kIROp_BitAnd: + case kIROp_BitNot: + case kIROp_BitOr: + case kIROp_BitXor: + case kIROp_BitCast: + case kIROp_Reinterpret: + case kIROp_Greater: + case kIROp_Less: + case kIROp_Geq: + case kIROp_Leq: + case kIROp_Neq: + case kIROp_Eql: + return true; + case kIROp_Call: + return isPureFunctionalCall(as<IRCall>(inst)); + default: + return false; + } + } + + bool tryHoistInstToOuterMostLoop(IRGlobalValueWithCode* func, IRInst* inst) + { + bool changed = false; + for (auto parentBlock = dom->getImmediateDominator(as<IRBlock>(inst->getParent())); + parentBlock; + parentBlock = dom->getImmediateDominator(parentBlock)) + { + auto terminatorInst = parentBlock->getTerminator(); + if (terminatorInst->getOp() == kIROp_loop) + { + // Consider hoisting the inst into this block. + // This is only possible if all operands of the inst are dominating `parentBlock`. + bool canHoist = true; + for (UInt i = 0; i < inst->getOperandCount(); i++) + { + auto operand = inst->getOperand(i); + if (getParentFunc(operand) != func) + { + // Global value won't prevent hoisting. + continue; + } + auto operandParent = as<IRBlock>(operand->getParent()); + if (!operandParent) + { + canHoist = false; + break; + } + canHoist = dom->dominates(operandParent, parentBlock); + if (!canHoist) + break; + } + if (!canHoist) + break; + + // Move inst to parentBlock. + inst->insertBefore(terminatorInst); + changed = true; + + // Continue to consider outer hoisting positions. + } + } + return changed; + } + + bool removeRedundancyInBlock(DeduplicateContext& deduplicateContext, IRGlobalValueWithCode* func, IRBlock* block) { bool result = false; - for (auto instP : block->getChildren()) + for (auto instP : block->getModifiableChildren()) { auto resultInst = deduplicateContext.deduplicate(instP, [&](IRInst* inst) { @@ -20,75 +128,25 @@ struct RedundancyRemovalContext return false; if (dom->isUnreachable(parentBlock)) return false; - - switch (inst->getOp()) - { - case kIROp_Add: - case kIROp_Sub: - case kIROp_Mul: - case kIROp_Div: - case kIROp_Module: - case kIROp_Lsh: - case kIROp_Rsh: - case kIROp_And: - case kIROp_Or: - case kIROp_Not: - case kIROp_FieldExtract: - case kIROp_FieldAddress: - case kIROp_GetElement: - case kIROp_GetElementPtr: - case kIROp_UpdateElement: - case kIROp_OptionalHasValue: - case kIROp_GetOptionalValue: - case kIROp_MakeOptionalValue: - case kIROp_MakeTuple: - case kIROp_GetTupleElement: - case kIROp_MakeStruct: - case kIROp_MakeArray: - case kIROp_MakeArrayFromElement: - case kIROp_MakeVector: - case kIROp_MakeMatrix: - case kIROp_MakeMatrixFromScalar: - case kIROp_MakeVectorFromScalar: - case kIROp_swizzle: - case kIROp_MatrixReshape: - case kIROp_MakeString: - case kIROp_MakeResultError: - case kIROp_MakeResultValue: - case kIROp_GetResultError: - case kIROp_GetResultValue: - case kIROp_CastFloatToInt: - case kIROp_CastIntToFloat: - case kIROp_CastIntToPtr: - case kIROp_CastPtrToBool: - case kIROp_CastPtrToInt: - case kIROp_BitAnd: - case kIROp_BitNot: - case kIROp_BitOr: - case kIROp_BitXor: - case kIROp_BitCast: - case kIROp_Reinterpret: - case kIROp_Greater: - case kIROp_Less: - case kIROp_Geq: - case kIROp_Leq: - case kIROp_Neq: - case kIROp_Eql: - return true; - case kIROp_Call: - return isPureFunctionalCall(as<IRCall>(inst)); - default: - return false; - } + return isMovableInst(inst); }); if (resultInst != instP) + { + instP->replaceUsesWith(resultInst); result = true; + } + else if (isMovableInst(resultInst)) + { + // This inst is unique, we should consider hoisting it + // if it is inside a loop. + result |= tryHoistInstToOuterMostLoop(func, resultInst); + } } for (auto child : dom->getImmediatelyDominatedBlocks(block)) { DeduplicateContext subContext; subContext.deduplicateMap = deduplicateContext.deduplicateMap; - result |= removeRedundancyInBlock(subContext, child); + result |= removeRedundancyInBlock(subContext, func, child); } return result; } @@ -122,7 +180,142 @@ bool removeRedundancyInFunc(IRGlobalValueWithCode* func) RedundancyRemovalContext context; context.dom = computeDominatorTree(func); DeduplicateContext deduplicateCtx; - return context.removeRedundancyInBlock(deduplicateCtx, root); + return context.removeRedundancyInBlock(deduplicateCtx, func, root); +} + +static IRInst* _getRootVar(IRInst* inst) +{ + while (inst) + { + switch (inst->getOp()) + { + case kIROp_FieldAddress: + case kIROp_GetElementPtr: + inst = inst->getOperand(0); + break; + default: + return inst; + } + } + return inst; +} + +bool tryRemoveRedundantStore(IRGlobalValueWithCode* func, IRStore* store) +{ + // We perform a quick and conservative check: + // A store is redundant if it is followed by another store to the same address in + // the same basic block, and there are no instructions that may use any addresses + // related to this address. + bool hasAddrUse = false; + bool hasOverridingStore = false; + + // Stores to global variables will never get removed. + auto rootVar = _getRootVar(store->getPtr()); + if (!isChildInstOf(rootVar, func)) + return false; + + // A store can be removed if it stores into a local variable + // that has no other uses than store. + if (auto varInst = as<IRVar>(rootVar)) + { + bool hasNonStoreUse = false; + // If the entire access chain doesn't non-store use, we can safely remove it. + HashSet<IRInst*> knownAccessChain; + for (auto accessChain = store->getPtr(); accessChain;) + { + knownAccessChain.Add(accessChain); + for (auto use = accessChain->firstUse; use; use = use->nextUse) + { + if (as<IRDecoration>(use->getUser())) + continue; + if (knownAccessChain.Contains(use->getUser())) + continue; + if (use->getUser()->getOp() == kIROp_Store && + use == use->getUser()->getOperands()) + { + continue; + } + hasNonStoreUse = true; + break; + } + if (hasNonStoreUse) + break; + switch (accessChain->getOp()) + { + case kIROp_GetElementPtr: + case kIROp_FieldAddress: + accessChain = accessChain->getOperand(0); + continue; + default: + break; + } + break; + } + if (!hasNonStoreUse) + { + store->removeAndDeallocate(); + return true; + } + } + + // A store can be removed if there are subsequent stores to the same variable, + // and there are no insts in between the stores that can read the variable. + + HashSet<IRBlock*> visitedBlocks; + for (auto next = store->getNextInst(); next;) + { + if (auto nextStore = as<IRStore>(next)) + { + if (nextStore->getPtr() == store->getPtr()) + { + hasOverridingStore = true; + break; + } + } + + // If we see any insts that have reads or modifies the address before seeing + // an overriding store, don't remove the store. + // We can make the test more accurate by collecting all addresses related to + // the target address first, and only bail out if any of the related addresses + // are involved. + switch (next->getOp()) + { + case kIROp_Load: + if (canAddressesPotentiallyAlias(func, next->getOperand(0), store->getPtr())) + { + hasAddrUse = true; + } + break; + default: + if (canInstHaveSideEffectAtAddress(func, next, store->getPtr())) + { + hasAddrUse = true; + } + break; + } + if (hasAddrUse) + break; + + // If we are at the end of the current block and see a unconditional branch, + // we can follow the path and check the subsequent block. + if (auto branch = as<IRUnconditionalBranch>(next)) + { + auto nextBlock = branch->getTargetBlock(); + if (visitedBlocks.Add(nextBlock)) + { + next = nextBlock->getFirstInst(); + continue; + } + } + next = next->getNextInst(); + } + + if (!hasAddrUse && hasOverridingStore) + { + store->removeAndDeallocate(); + return true; + } + return false; } bool eliminateRedundantLoadStore(IRGlobalValueWithCode* func) @@ -158,57 +351,7 @@ bool eliminateRedundantLoadStore(IRGlobalValueWithCode* func) } else if (auto store = as<IRStore>(inst)) { - // We perform a quick and conservative check: - // A store is redundant if it is followed by another store to the same address in - // the same basic block, and there are no instructions that may use any addresses - // related to this address. - bool hasAddrUse = false; - bool hasOverridingStore = false; - - // Stores to global variables will never get removed. - if (!isChildInstOf(store->getPtr(), func)) - hasAddrUse = true; - - for (auto next = store->getNextInst(); next; next = next->getNextInst()) - { - if (auto nextStore = as<IRStore>(next)) - { - if (nextStore->getPtr() == store->getPtr()) - { - hasOverridingStore = true; - break; - } - } - - // If we see any insts that have reads or modifies the address before seeing - // an overriding store, don't remove the store. - // We can make the test more accurate by collecting all addresses related to - // the target address first, and only bail out if any of the related addresses - // are involved. - switch (next->getOp()) - { - case kIROp_Load: - if (canAddressesPotentiallyAlias(func, next->getOperand(0), store->getPtr())) - { - hasAddrUse = true; - } - break; - default: - if (canInstHaveSideEffectAtAddress(func, next, store->getPtr())) - { - hasAddrUse = true; - } - break; - } - if (hasAddrUse) - break; - } - - if (!hasAddrUse && hasOverridingStore) - { - store->removeAndDeallocate(); - changed = true; - } + changed |= tryRemoveRedundantStore(func, store); } inst = nextInst; } diff --git a/source/slang/slang-ir-sccp.cpp b/source/slang/slang-ir-sccp.cpp index d05527e59..691bd7ff0 100644 --- a/source/slang/slang-ir-sccp.cpp +++ b/source/slang/slang-ir-sccp.cpp @@ -1439,7 +1439,9 @@ struct SCCPContext inst->replaceUsesWith(constantVal); if( !inst->mightHaveSideEffects() ) { - instsToRemove.add(inst); + // Don't delete phi parameters, they will be cleaned up in CFG simplification. + if (inst->getOp() != kIROp_Param) + instsToRemove.add(inst); } } } diff --git a/source/slang/slang-ir-simplify-cfg.cpp b/source/slang/slang-ir-simplify-cfg.cpp index 7e9e105e1..b814442fa 100644 --- a/source/slang/slang-ir-simplify-cfg.cpp +++ b/source/slang/slang-ir-simplify-cfg.cpp @@ -4,6 +4,8 @@ #include "slang-ir.h" #include "slang-ir-dominators.h" #include "slang-ir-restructure.h" +#include "slang-ir-util.h" +#include "slang-ir-loop-unroll.h" namespace Slang { @@ -31,8 +33,7 @@ static BreakableRegion* findBreakableRegion(Region* region) // it is needed and hasn't been generated yet. static bool isTrivialSingleIterationLoop( IRGlobalValueWithCode* func, - IRLoop* loop, - CFGSimplificationContext& inoutContext) + IRLoop* loop) { auto targetBlock = loop->getTargetBlock(); if (targetBlock->getPredecessors().getCount() != 1) return false; @@ -52,14 +53,14 @@ static bool isTrivialSingleIterationLoop( // // We need to verify this is a trivial loop by checking if there is any multi-level breaks // that skips out of this loop. - - if (!inoutContext.domTree) - inoutContext.domTree = computeDominatorTree(func); - if (!inoutContext.regionTree) - inoutContext.regionTree = generateRegionTreeForFunc(func, nullptr); + CFGSimplificationContext context; + if (!context.domTree) + context.domTree = computeDominatorTree(func); + if (!context.regionTree) + context.regionTree = generateRegionTreeForFunc(func, nullptr); SimpleRegion* targetBlockRegion = nullptr; - if (!inoutContext.regionTree->mapBlockToRegion.TryGetValue(targetBlock, targetBlockRegion)) + if (!context.regionTree->mapBlockToRegion.TryGetValue(targetBlock, targetBlockRegion)) return false; BreakableRegion* loopBreakableRegion = findBreakableRegion(targetBlockRegion); LoopRegion* loopRegion = as<LoopRegion>(loopBreakableRegion); @@ -67,18 +68,18 @@ static bool isTrivialSingleIterationLoop( return false; for (auto block : func->getBlocks()) { - if (!inoutContext.domTree->dominates(loop->getTargetBlock(), block)) + if (!context.domTree->dominates(loop->getTargetBlock(), block)) continue; - if (inoutContext.domTree->dominates(loop->getBreakBlock(), block)) + if (context.domTree->dominates(loop->getBreakBlock(), block)) continue; SimpleRegion* region = nullptr; - if (!inoutContext.regionTree->mapBlockToRegion.TryGetValue(block, region)) + if (!context.regionTree->mapBlockToRegion.TryGetValue(block, region)) return false; for (auto branchTarget : block->getSuccessors()) { SimpleRegion* targetRegion = nullptr; - if (!inoutContext.regionTree->mapBlockToRegion.TryGetValue(branchTarget, targetRegion)) + if (!context.regionTree->mapBlockToRegion.TryGetValue(branchTarget, targetRegion)) return false; // If multi-level break out that skips over this loop exists, then this is not a trivial loop. if (targetRegion->isDescendentOf(loopRegion)) @@ -96,6 +97,104 @@ static bool isTrivialSingleIterationLoop( return true; } +static bool doesLoopHasSideEffect(IRGlobalValueWithCode* func, IRLoop* loopInst) +{ + auto blocks = collectBlocksInLoop(func, loopInst); + HashSet<IRBlock*> loopBlocks; + for (auto b : blocks) + loopBlocks.Add(b); + auto addressHasOutOfLoopUses = [&](IRInst* addr) + { + // The entire access chain of `addr` must have no uses out side the loop. + // The root variable must be a local var. + for (auto chainNode = addr; chainNode;) + { + if (getParentFunc(chainNode) != func) + return true; + for (auto use = chainNode->firstUse; use; use = use->nextUse) + { + if (!loopBlocks.Contains(as<IRBlock>(use->getUser()->getParent()))) + return true; + } + switch (chainNode->getOp()) + { + case kIROp_GetElementPtr: + case kIROp_FieldAddress: + chainNode = chainNode->getOperand(0); + continue; + case kIROp_Var: + break; + default: + return true; + } + break; + } + return false; + }; + + for (auto b : blocks) + { + for (auto inst : b->getChildren()) + { + // Is this inst used anywhere outside the loop? If so the loop has side effect. + for (auto use = inst->firstUse; use; use = use->nextUse) + { + if (!loopBlocks.Contains(as<IRBlock>(use->getUser()->getParent()))) + return true; + } + + // The inst can't possibly have side effect? Skip it. + if (!inst->mightHaveSideEffects()) + continue; + + // This inst might have side effect, try to prove that the + // side effect does not leak beyond the scope of the loop. + if (auto call = as<IRCall>(inst)) + { + auto callee = getResolvedInstForDecorations(call->getCallee()); + if (!callee || !callee->findDecoration<IRReadNoneDecoration>()) + return true; + // We are calling a pure function, check if any of the return + // variables are used outside the loop. + for (UInt i = 0; i < call->getArgCount(); i++) + { + auto arg = call->getArg(i); + if (!isValueType(arg->getDataType())) + { + if (addressHasOutOfLoopUses(arg)) + return true; + } + } + } + else if (auto store = as<IRStore>(inst)) + { + if (addressHasOutOfLoopUses(store->getPtr())) + return true; + } + else if (auto branch = as<IRUnconditionalBranch>(inst)) + { + if (loopBlocks.Contains(branch->getTargetBlock())) + continue; + // Branching out of the loop with some argument is considered + // having a side effect. + if (branch->getArgCount() != 0) + return true; + } + else if (as<IRIfElse>(inst) || as<IRSwitch>(inst) || as<IRLoop>(inst)) + { + // We are starting a sub control flow. + // This is considered side effect free. + } + else + { + // For all other insts, we assume it has a global side effect. + return true; + } + } + } + return false; +} + static bool removeDeadBlocks(IRGlobalValueWithCode* func) { bool changed = false; @@ -142,15 +241,327 @@ static bool removeDeadBlocks(IRGlobalValueWithCode* func) return changed; } +// Return the true of the if-else branch block if the branch is a trivial jump +// to after block with no other insts. +static bool isTrivialIfElseBranch(IRIfElse* condBranch, IRBlock* branchBlock) +{ + if (branchBlock != condBranch->getAfterBlock()) + { + if (auto br = as<IRUnconditionalBranch>(branchBlock->getFirstOrdinaryInst())) + { + if (br->getTargetBlock() == condBranch->getAfterBlock() && br->getOp() == kIROp_unconditionalBranch) + { + return true; + } + } + } + else + { + return true; + } + return false; +} + +static bool arePhiArgsEquivalentInBranches(IRIfElse* ifElse) +{ + // If one of the branch target is afterBlock itself, and the other branch + // is a trivial block that jumps into the afterBlock, this if-else is trivial. + // In this case the argCount must be 0 because a block with phi parameters can't + // be used as targets in a conditional branch. + auto branch1 = ifElse->getTrueBlock(); + auto branch2 = ifElse->getFalseBlock(); + auto afterBlock = ifElse->getAfterBlock(); + + if (branch1 == afterBlock) return true; + if (branch2 == afterBlock) return true; + + auto branchInst1 = as<IRUnconditionalBranch>(branch1->getTerminator()); + auto branchInst2 = as<IRUnconditionalBranch>(branch2->getTerminator()); + if (!branchInst1) return false; + if (!branchInst2) return false; + + // If both branches are trivial blocks, we must compare the arguments. + if (branchInst1->getArgCount() != branchInst2->getArgCount()) + { + // This should never happen, return false now to be safe. + return false; + } + + for (UInt i = 0; i < branchInst1->getArgCount(); i++) + { + if (branchInst1->getArg(i) != branchInst2->getArg(i)) + { + // argument is different, the if-else is non-trivial. + return false; + } + } + return true; +} + +static bool isTrivialIfElse(IRIfElse* condBranch, bool& isTrueBranchTrivial, bool& isFalseBranchTrivial) +{ + isTrueBranchTrivial = isTrivialIfElseBranch(condBranch, condBranch->getTrueBlock()); + isFalseBranchTrivial = isTrivialIfElseBranch(condBranch, condBranch->getFalseBlock()); + if (isTrueBranchTrivial && isFalseBranchTrivial) + { + if (arePhiArgsEquivalentInBranches(condBranch)) + return true; + } + return false; +} + +#if 0 +static bool tryMoveFalseBranchToTrueBranch(IRBuilder& builder, IRIfElse* ifElseInst) +{ + auto falseBlock = ifElseInst->getFalseBlock(); + if (falseBlock == ifElseInst->getAfterBlock()) + return false; + if (auto termInst = as<IRUnconditionalBranch>(falseBlock->getTerminator())) + { + // We can't fold a branch with arguments into the ifElse. + if (termInst->getArgCount() != 0) + return false; + } + ifElseInst->trueBlock.set(falseBlock); + ifElseInst->falseBlock.set(ifElseInst->getAfterBlock()); + builder.setInsertBefore(ifElseInst); + auto newCondition = builder.emitNot(builder.getBoolType(), ifElseInst->getCondition()); + ifElseInst->condition.set(newCondition); + return true; +} +#endif + +static bool tryEliminateFalseBranch(IRIfElse* ifElseInst) +{ + auto falseBlock = ifElseInst->getFalseBlock(); + if (falseBlock == ifElseInst->getAfterBlock()) + return false; + if (auto termInst = as<IRUnconditionalBranch>(falseBlock->getTerminator())) + { + // We can't fold a branch with arguments into the ifElse. + if (termInst->getArgCount() != 0) + return false; + } + ifElseInst->falseBlock.set(ifElseInst->getAfterBlock()); + return true; +} + +static bool trySimplifyIfElse(IRBuilder& builder, IRIfElse* ifElseInst) +{ + bool isTrueBranchTrivial = false; + bool isFalseBranchTrivial = false; + if (isTrivialIfElse(ifElseInst, isTrueBranchTrivial, isFalseBranchTrivial)) + { + // If both branches of `if-else` are trivial jumps into after block, + // we can get rid of the entire conditional branch and replace it + // with a jump into the after block. + if (auto termInst = as<IRUnconditionalBranch>(ifElseInst->getTrueBlock()->getTerminator())) + { + List<IRInst*> args; + for (UInt i = 0; i < termInst->getArgCount(); i++) + args.add(termInst->getArg(i)); + builder.setInsertBefore(ifElseInst); + builder.emitBranch(ifElseInst->getAfterBlock(), (Int)args.getCount(), args.getBuffer()); + ifElseInst->removeAndDeallocate(); + return true; + } + } + else if (isTrueBranchTrivial) + { + // If true branch is empty, we move false branch to true branch and invert the condition. + // TODO: diabled for now since our auto-diff pass can't handle loops whose body is on the false + // side of condition. + //return tryMoveFalseBranchToTrueBranch(builder, ifElseInst); + } + else if (isFalseBranchTrivial) + { + // If false branch is empty, we set it to afterBlock. + return tryEliminateFalseBranch(ifElseInst); + } + return false; +} + +static bool isTrueLit(IRInst* lit) +{ + if (auto boolLit = as<IRBoolLit>(lit)) + return boolLit->getValue(); + return false; +} +static bool isFalseLit(IRInst* lit) +{ + if (auto boolLit = as<IRBoolLit>(lit)) + return !boolLit->getValue(); + return false; +} + +static bool simplifyBoolPhiParam(IRIfElse* ifElse, Array<IRBlock*, 2>& preds, IRParam* param, UInt paramIndex) +{ + // For bool params where its value is assigned from the same `if-else` statement, + // we can simplify it into an expression of the condition of the source `if-else`. + + if (!param->getDataType() || param->getDataType()->getOp() != kIROp_BoolType) + return false; + + auto branch0 = as<IRUnconditionalBranch>(preds[0]->getTerminator()); + if (!branch0) + return false; + if (branch0->getArgCount() <= paramIndex) + return false; + auto branch1 = as<IRUnconditionalBranch>(preds[1]->getTerminator()); + if (!branch1) + return false; + if (branch1->getArgCount() <= paramIndex) + return false; + + IRInst* replacement = nullptr; + if (isTrueLit(branch0->getArg(paramIndex)) && isFalseLit(branch1->getArg(paramIndex))) + { + replacement = ifElse->getCondition(); + } + else if (isFalseLit(branch0->getArg(paramIndex)) && isTrueLit(branch1->getArg(paramIndex))) + { + IRBuilder builder(param); + setInsertBeforeOrdinaryInst(&builder, param); + replacement = builder.emitNot(builder.getBoolType(), ifElse->getCondition()); + } + if (replacement) + { + param->replaceUsesWith(replacement); + param->removeAndDeallocate(); + branch0->removeArgument(paramIndex); + branch1->removeArgument(paramIndex); + return true; + } + return false; +} + +static bool simplifyBoolPhiParams(IRBlock* block) +{ + if (!block) + return false; + + if (block->getPredecessors().getCount() != 2) + return false; + + Array<IRBlock*, 2> preds; + for (auto pred : block->getPredecessors()) + preds.add(pred); + + IRBlock* ifElseBlock = nullptr; + if (preds[0]->getPredecessors().getCount() != 1) + return false; + ifElseBlock = *(preds[0]->getPredecessors().begin()); + if (preds[1]->getPredecessors().getCount() != 1) + return false; + auto p = *(preds[1]->getPredecessors().begin()); + if (p != ifElseBlock) + return false; + + auto ifElse = as<IRIfElse>(ifElseBlock->getTerminator()); + if (!ifElse) + return false; + + if (ifElse->getTrueBlock() == preds[1]) + { + Swap(preds[0], preds[1]); + } + SLANG_ASSERT(ifElse->getTrueBlock() == preds[0] && ifElse->getFalseBlock() == preds[1]); + + List<IRParam*> params; + for (auto param : block->getParams()) + params.add(param); + bool changed = false; + for (Index i = params.getCount() - 1; i >= 0; i--) + { + changed |= simplifyBoolPhiParam(ifElse, preds, params[i], (UInt)i); + } + return changed; +} + +static bool removeTrivialPhiParams(IRBlock* block) +{ + // We can remove a phi parmeter if: + // 1. all arguments to a parameter is the same (not really a phi). + // 2. the arguments to the parameter is always the same as arguments to another existing parameter (duplicate phi). + + bool changed = false; + List<IRParam*> params; + struct ParamState + { + bool areKnownValueSame = true; + IRInst* knownValue = nullptr; + OrderedHashSet<UInt> sameAsParamSet; + }; + List<ParamState> args; + List<IRUnconditionalBranch*> termInsts; + for (auto param : block->getParams()) + { + params.add(param); + args.add(ParamState()); + } + + if (!params.getCount()) + return false; + + for (UInt i = 1; i < (UInt)args.getCount(); i++) + for (UInt j = 0; j < i; j++) + args[i].sameAsParamSet.Add(j); + + for (auto pred : block->getPredecessors()) + { + auto termInst = as<IRUnconditionalBranch>(pred->getTerminator()); + if (!termInst) + return false; + SLANG_ASSERT(termInst->getArgCount() == (UInt)args.getCount()); + termInsts.add(termInst); + for (UInt i = 0; i < termInst->getArgCount(); i++) + { + if (args[i].areKnownValueSame) + { + if (args[i].knownValue == nullptr) + args[i].knownValue = termInst->getArg(i); + else if (args[i].knownValue != termInst->getArg(i)) + args[i].areKnownValueSame = false; + } + for (UInt j = 0; j < i; j++) + { + if (termInst->getArg(i) != termInst->getArg(j)) + { + args[i].sameAsParamSet.Remove(j); + } + } + } + } + for (Index i = args.getCount() - 1; i >= 0; i--) + { + IRInst* targetVal = nullptr; + if (args[i].areKnownValueSame) + { + targetVal = args[i].knownValue; + } + else if (args[i].sameAsParamSet.Count()) + { + auto targetParamId = *args[i].sameAsParamSet.begin(); + targetVal = params[targetParamId]; + } + if (targetVal) + { + params[i]->replaceUsesWith(args[i].knownValue); + params[i]->removeAndDeallocate(); + for (auto termInst : termInsts) + termInst->removeArgument((UInt)i); + changed = true; + } + } + return changed; +} + static bool processFunc(IRGlobalValueWithCode* func) { auto firstBlock = func->getFirstBlock(); if (!firstBlock) return false; - // Lazily generated region tree. - CFGSimplificationContext simplificationContext; - IRBuilder builder(func->getModule()); bool changed = false; @@ -165,6 +576,14 @@ static bool processFunc(IRGlobalValueWithCode* func) workList.fastRemoveAt(0); while (block) { + // If all arguments to a phi parameter are the known to be the same, + // we can safely replace the phi parameter with the argument. + if (block != func->getFirstBlock()) + { + changed |= simplifyBoolPhiParams(block); + changed |= removeTrivialPhiParams(block); + } + if (auto loop = as<IRLoop>(block->getTerminator())) { // If continue block is unreachable, remove it. @@ -179,7 +598,7 @@ static bool processFunc(IRGlobalValueWithCode* func) // break at the end of the loop, we can remove the header and turn it into // a normal branch. auto targetBlock = loop->getTargetBlock(); - if (isTrivialSingleIterationLoop(func, loop, simplificationContext)) + if (isTrivialSingleIterationLoop(func, loop)) { builder.setInsertBefore(loop); List<IRInst*> args; @@ -189,7 +608,22 @@ static bool processFunc(IRGlobalValueWithCode* func) } builder.emitBranch(targetBlock, args.getCount(), args.getBuffer()); loop->removeAndDeallocate(); + changed = true; } + else if (!doesLoopHasSideEffect(func, loop)) + { + // The loop isn't computing anything useful outside the loop. + // We can delete the entire loop. + builder.setInsertBefore(loop); + SLANG_ASSERT(loop->getBreakBlock()->getFirstParam() == nullptr); + builder.emitBranch(loop->getBreakBlock()); + loop->removeAndDeallocate(); + changed = true; + } + } + else if (auto condBranch = as<IRIfElse>(block->getTerminator())) + { + changed |= trySimplifyIfElse(builder, condBranch); } // If `block` does not end with an unconditional branch, bail. @@ -225,6 +659,7 @@ static bool processFunc(IRGlobalValueWithCode* func) branch->removeAndDeallocate(); assert(!successor->hasUses()); successor->removeAndDeallocate(); + break; } for (auto successor : block->getSuccessors()) { diff --git a/source/slang/slang-ir-specialize-function-call.cpp b/source/slang/slang-ir-specialize-function-call.cpp index 894d46cce..a2ebbc0cf 100644 --- a/source/slang/slang-ir-specialize-function-call.cpp +++ b/source/slang/slang-ir-specialize-function-call.cpp @@ -822,6 +822,12 @@ struct FunctionParameterSpecializationContext { decoration->removeAndDeallocate(); } + else if (as<IRReadNoneDecoration>(decoration)) + { + // After specialization, the function may no longer be side effect free + // because the parameter we substituted in maybe a global param. + decoration->removeAndDeallocate(); + } } } diff --git a/source/slang/slang-ir-ssa-simplification.cpp b/source/slang/slang-ir-ssa-simplification.cpp index f06fafcb3..beaaae065 100644 --- a/source/slang/slang-ir-ssa-simplification.cpp +++ b/source/slang/slang-ir-ssa-simplification.cpp @@ -10,6 +10,7 @@ #include "slang-ir-deduplicate-generic-children.h" #include "slang-ir-remove-unused-generic-param.h" #include "slang-ir-redundancy-removal.h" +#include "slang-ir-propagate-func-properties.h" namespace Slang { @@ -29,6 +30,7 @@ namespace Slang changed |= peepholeOptimize(module); changed |= removeRedundancy(module); changed |= simplifyCFG(module); + changed |= propagateFuncProperties(module); // Note: we disregard the `changed` state from dead code elimination pass since // SCCP pass could be generating temporarily evaluated constant values and never actually use them. @@ -41,6 +43,28 @@ namespace Slang } } + void simplifyNonSSAIR(IRModule* module) + { + bool changed = true; + const int kMaxIterations = 8; + int iterationCounter = 0; + while (changed && iterationCounter < kMaxIterations) + { + changed = false; + changed |= peepholeOptimize(module); + changed |= removeRedundancy(module); + changed |= simplifyCFG(module); + + // Note: we disregard the `changed` state from dead code elimination pass since + // SCCP pass could be generating temporarily evaluated constant values and never actually use them. + // DCE will always remove those nearly generated consts and always returns true here. + eliminateDeadCode(module); + + iterationCounter++; + } + } + + void simplifyFunc(IRGlobalValueWithCode* func) { bool changed = true; diff --git a/source/slang/slang-ir-ssa-simplification.h b/source/slang/slang-ir-ssa-simplification.h index ee8343003..39504e102 100644 --- a/source/slang/slang-ir-ssa-simplification.h +++ b/source/slang/slang-ir-ssa-simplification.h @@ -10,5 +10,8 @@ namespace Slang // until no more changes are possible. void simplifyIR(IRModule* module); + // Run simplifications on IR that is out of SSA form. + void simplifyNonSSAIR(IRModule* module); + void simplifyFunc(IRGlobalValueWithCode* func); } diff --git a/source/slang/slang-ir-util.cpp b/source/slang/slang-ir-util.cpp index 3db036a8d..339521f41 100644 --- a/source/slang/slang-ir-util.cpp +++ b/source/slang/slang-ir-util.cpp @@ -157,6 +157,32 @@ IRInst* maybeSpecializeWithGeneric(IRBuilder& builder, IRInst* genericToSpecaili return genericToSpecailize; } +bool isValueType(IRInst* dataType) +{ + dataType = getResolvedInstForDecorations(unwrapAttributedType(dataType)); + if (as<IRBasicType>(dataType)) + return true; + switch (dataType->getOp()) + { + case kIROp_StructType: + case kIROp_InterfaceType: + case kIROp_ClassType: + case kIROp_VectorType: + case kIROp_MatrixType: + case kIROp_TupleType: + case kIROp_ResultType: + case kIROp_OptionalType: + case kIROp_DifferentialPairType: + case kIROp_DynamicType: + case kIROp_AnyValueType: + case kIROp_ArrayType: + case kIROp_FuncType: + return true; + default: + return false; + } +} + IRInst* hoistValueFromGeneric(IRBuilder& inBuilder, IRInst* value, IRInst*& outSpecializedVal, bool replaceExistingValue) { auto outerGeneric = as<IRGeneric>(findOuterGeneric(value)); @@ -402,8 +428,7 @@ bool canInstHaveSideEffectAtAddress(IRGlobalValueWithCode* func, IRInst* inst, I { auto callee = call->getCallee(); if (callee && - callee->findDecoration<IRReadNoneDecoration>() && - callee->findDecoration<IRNoSideEffectDecoration>()) + callee->findDecoration<IRReadNoneDecoration>()) { // An exception is if the callee is side-effect free and is not reading from // memory. @@ -423,6 +448,32 @@ bool canInstHaveSideEffectAtAddress(IRGlobalValueWithCode* func, IRInst* inst, I if (canAddressesPotentiallyAlias(func, call->getArg(i), addr)) return true; } + else if (!isValueType(call->getArg(i)->getDataType())) + { + // This is some unknown handle type, we assume it can have any side effects. + return true; + } + } + } + break; + case kIROp_unconditionalBranch: + case kIROp_loop: + { + auto branch = as<IRUnconditionalBranch>(inst); + // If any pointer typed argument of the branch inst may overlap addr, return true. + for (UInt i = 0; i < branch->getArgCount(); i++) + { + SLANG_RELEASE_ASSERT(branch->getArg(i)->getDataType()); + if (isPtrLikeOrHandleType(branch->getArg(i)->getDataType())) + { + if (canAddressesPotentiallyAlias(func, branch->getArg(i), addr)) + return true; + } + else if (!isValueType(branch->getArg(i)->getDataType())) + { + // This is some unknown handle type, we assume it can have any side effects. + return true; + } } } break; @@ -434,6 +485,11 @@ bool canInstHaveSideEffectAtAddress(IRGlobalValueWithCode* func, IRInst* inst, I if (isPtrLikeOrHandleType(inst->getOperand(0)->getDataType()) && canAddressesPotentiallyAlias(func, inst->getOperand(0), addr)) return true; + else if (!isValueType(inst->getOperand(0)->getDataType())) + { + // This is some unknown handle type, we assume it can have any side effects. + return true; + } } break; default: @@ -520,20 +576,17 @@ bool isPureFunctionalCall(IRCall* call) auto callee = getResolvedInstForDecorations(call->getCallee()); if (callee->findDecoration<IRReadNoneDecoration>()) { - return true; - } - if (callee->findDecoration<IRNoSideEffectDecoration>()) - { // If the function has no side effect and is not writing to any outputs, // we can safely treat the call as a normal inst. bool hasOutArg = false; for (UInt i = 0; i < call->getArgCount(); i++) { - if (as<IRPtrTypeBase>(call->getArg(i)->getDataType())) - { - hasOutArg = true; - break; - } + if (isValueType(call->getArg(i)->getDataType())) + continue; + // If the argument type is not a known value type, + // assume it is a pointer or handle through which side effect can take place. + hasOutArg = true; + break; } return !hasOutArg; } diff --git a/source/slang/slang-ir-util.h b/source/slang/slang-ir-util.h index 8a12ab895..62156cad6 100644 --- a/source/slang/slang-ir-util.h +++ b/source/slang/slang-ir-util.h @@ -83,6 +83,9 @@ inline bool isScalarIntegerType(IRType* type) return getTypeStyle(type->getOp()) == kIROp_IntType; } +// No side effect can take place through a value of a "Value" type. +bool isValueType(IRInst* type); + inline bool isChildInstOf(IRInst* inst, IRInst* parent) { while (inst) diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index accefc0c9..fd211d05c 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -43,7 +43,10 @@ namespace Slang case kIROp_PreciseDecoration: case kIROp_PublicDecoration: case kIROp_HLSLExportDecoration: - case kIROp_ReadNoneDecoration: + case kIROp_ReadNoneDecoration: + case kIROp_NoSideEffectDecoration: + case kIROp_ForwardDifferentiableDecoration: + case kIROp_BackwardDifferentiableDecoration: case kIROp_RequiresNVAPIDecoration: case kIROp_TriangleAdjInputPrimitiveTypeDecoration: case kIROp_TriangleInputPrimitiveTypeDecoration: @@ -695,6 +698,21 @@ namespace Slang } } + void IRUnconditionalBranch::removeArgument(UInt index) + { + switch (getOp()) + { + case kIROp_unconditionalBranch: + removeOperand(1 + index); + break; + case kIROp_loop: + removeOperand(3 + index); + break; + default: + SLANG_UNEXPECTED("unhandled unconditional branch opcode"); + } + } + IRInst* IRUnconditionalBranch::getArg(UInt index) { return getArgs()[index].usedValue; @@ -5109,6 +5127,17 @@ namespace Slang return inst; } + IRInst* IRBuilder::emitNot(IRType* type, IRInst* value) + { + auto inst = createInst<IRInst>( + this, + kIROp_Not, + type, + value); + addInst(inst); + return inst; + } + IRInst* IRBuilder::emitAdd(IRType* type, IRInst* left, IRInst* right) { auto inst = createInst<IRInst>( @@ -6792,6 +6821,17 @@ namespace Slang } } + void IRInst::removeOperand(Index index) + { + for (Index i = index; i < (Index)operandCount - 1; i++) + { + getOperands()[i].set(getOperand(i + 1)); + } + getOperands()[operandCount - 1].clear(); + operandCount--; + return; + } + // Remove this instruction from its parent block, // and then destroy it (it had better have no uses!) void IRInst::removeAndDeallocate() @@ -6879,6 +6919,8 @@ namespace Slang // common subexpression elimination, etc. // auto call = cast<IRCall>(this); + // If the call has been marked as no-side-effect, we + // will treat it so, by-passing all other checks. if (call->findDecoration<IRNoSideEffectDecoration>()) return false; return !isPureFunctionalCall(call); @@ -6894,6 +6936,7 @@ namespace Slang case kIROp_Func: case kIROp_Generic: case kIROp_Var: + case kIROp_Param: case kIROp_GlobalVar: // Note: the IRGlobalVar represents the *address*, so only a load/store would have side effects case kIROp_GlobalConstant: case kIROp_GlobalParam: @@ -7003,12 +7046,6 @@ namespace Slang case kIROp_BackwardDifferentiatePropagate: return false; } - - // Check if the calle has been marked with a catch-all no-side-effect decoration. - if (findDecoration<IRNoSideEffectDecoration>()) - { - return false; - } return true; } diff --git a/source/slang/slang-ir.h b/source/slang/slang-ir.h index 63b7c4ef9..e22ea8a36 100644 --- a/source/slang/slang-ir.h +++ b/source/slang/slang-ir.h @@ -744,6 +744,11 @@ struct IRInst // for those values. void removeArguments(); + // Remove operand `index` from operand list. + // For example, if the inst is `op(a,b,c)`, calling removeOperand(inst, 1) will result + // `op(a,c)`. + void removeOperand(Index index); + /// Transfer any decorations of this instruction to the `target` instruction. void transferDecorationsTo(IRInst* target); diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index 681871b6c..d09c35eea 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -8304,6 +8304,11 @@ struct DeclLoweringVisitor : DeclVisitor<DeclLoweringVisitor, LoweredValInfo> getBuilder()->addSimpleDecoration<IRRequiresNVAPIDecoration>(irFunc); } + if (decl->findModifier<AlwaysFoldIntoUseSiteAttribute>()) + { + getBuilder()->addSimpleDecoration<IRAlwaysFoldIntoUseSiteDecoration>(irFunc); + } + if (decl->findModifier<NoInlineAttribute>()) { getBuilder()->addSimpleDecoration<IRNoInlineDecoration>(irFunc); |
