summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2023-02-24 10:01:47 -0800
committerGitHub <noreply@github.com>2023-02-24 10:01:47 -0800
commitbd6306cdaa4a49344658bd026721b6532e103d09 (patch)
treebb7f666d426e6cfc7777a3ccac0a1d628588eb39 /source
parente8c08e7ecb1124f115a1d1042277776193122b57 (diff)
More control flow simplifications. (#2673)
* More control flow and Phi param simplifications. * Fix. * Fix gcc error. * Fix. * More IR cleanup. * Fix bug in phi param dce + ifelse simplify. * Propagate and DCE side-effect-free functions. * Enhance CFG simplifcation to remove loops with no side effects. * Fix. * Fixes. * Fix tests. Add [__AlwaysFoldIntoUseSite] for rayPayloadLocation. * More cleanup. * Fixes. * Fix. --------- Co-authored-by: Yong He <yhe@nvidia.com>
Diffstat (limited to 'source')
-rw-r--r--source/slang/core.meta.slang8
-rw-r--r--source/slang/hlsl.meta.slang318
-rw-r--r--source/slang/slang-ast-modifier.h8
-rw-r--r--source/slang/slang-emit-c-like.cpp12
-rw-r--r--source/slang/slang-emit-c-like.h2
-rw-r--r--source/slang/slang-emit-cpp.cpp40
-rw-r--r--source/slang/slang-emit-cpp.h1
-rw-r--r--source/slang/slang-emit.cpp4
-rw-r--r--source/slang/slang-ir-autodiff-unzip.cpp1
-rw-r--r--source/slang/slang-ir-autodiff.h8
-rw-r--r--source/slang/slang-ir-dce.cpp264
-rw-r--r--source/slang/slang-ir-glsl-legalize.cpp4
-rw-r--r--source/slang/slang-ir-inst-defs.h3
-rw-r--r--source/slang/slang-ir-insts.h18
-rw-r--r--source/slang/slang-ir-loop-unroll.cpp26
-rw-r--r--source/slang/slang-ir-loop-unroll.h4
-rw-r--r--source/slang/slang-ir-propagate-func-properties.cpp186
-rw-r--r--source/slang/slang-ir-propagate-func-properties.h7
-rw-r--r--source/slang/slang-ir-redundancy-removal.cpp373
-rw-r--r--source/slang/slang-ir-sccp.cpp4
-rw-r--r--source/slang/slang-ir-simplify-cfg.cpp467
-rw-r--r--source/slang/slang-ir-specialize-function-call.cpp6
-rw-r--r--source/slang/slang-ir-ssa-simplification.cpp24
-rw-r--r--source/slang/slang-ir-ssa-simplification.h3
-rw-r--r--source/slang/slang-ir-util.cpp75
-rw-r--r--source/slang/slang-ir-util.h3
-rw-r--r--source/slang/slang-ir.cpp51
-rw-r--r--source/slang/slang-ir.h5
-rw-r--r--source/slang/slang-lower-to-ir.cpp5
29 files changed, 1616 insertions, 314 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 6357d58bd..9da33c755 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -2525,21 +2525,25 @@ int __SyntaxError();
__generic<T>
__target_intrinsic(cuda, "sizeof($G0)")
__target_intrinsic(cpp, "sizeof($G0)")
+[__readNone]
int __sizeOf();
__generic<T>
__target_intrinsic(cuda, "sizeof($T0)")
__target_intrinsic(cpp, "sizeof($T0)")
+[__readNone]
int __sizeOf(T v);
__generic<T>
__target_intrinsic(cuda, "SLANG_ALIGN_OF($G0)")
__target_intrinsic(cpp, "SLANG_ALIGN_OF($G0)")
+[__readNone]
int __alignOf();
__generic<T>
__target_intrinsic(cuda, "SLANG_ALIGN_OF($T0)")
__target_intrinsic(cpp, "SLANG_ALIGN_OF($T0)")
+[__readNone]
int __alignOf(T v);
// It would be nice to have offsetof equivalent, but it's not clear how that would work in terms of the Slang language.
@@ -2547,6 +2551,7 @@ int __alignOf(T v);
__generic<T,F>
__target_intrinsic(cuda, "int(((char*)&($1)) - ((char*)&($0)))")
__target_intrinsic(cpp, "int(((char*)&($1)) - ((char*)&($0))")
+[__readNone]
int __offsetOf(in T t, in F field);
/// Mark beginning of "interlocked" operations in a fragment shader.
@@ -2960,6 +2965,9 @@ attribute_syntax [builtin] : BuiltinAttribute;
__attributeTarget(DeclBase)
attribute_syntax [__requiresNVAPI] : RequiresNVAPIAttribute;
+__attributeTarget(DeclBase)
+attribute_syntax [__AlwaysFoldIntoUseSiteAttribute] : AlwaysFoldIntoUseSiteAttribute;
+
__attributeTarget(FunctionDeclBase)
attribute_syntax [noinline] : NoInlineAttribute;
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 7e75d06b3..37cdc205e 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -778,6 +778,7 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_abs($0)")
__target_intrinsic(cpp, "$P_abs($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FAbs, SAbs) _0")
+[__readNone]
T abs(T x);
/*{
// Note: this simple definition may not be appropriate for floating-point inputs
@@ -788,6 +789,7 @@ __generic<T : __BuiltinIntegerType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FAbs, SAbs) _0")
+[__readNone]
vector<T, N> abs(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, abs, x);
@@ -795,6 +797,7 @@ vector<T, N> abs(vector<T, N> x)
__generic<T : __BuiltinIntegerType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> abs(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, abs, x);
@@ -806,12 +809,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_abs($0)")
__target_intrinsic(cpp, "$P_abs($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FAbs, SAbs) _0")
+[__readNone]
T abs(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FAbs, SAbs) _0")
+[__readNone]
vector<T, N> abs(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, abs, x);
@@ -819,6 +824,7 @@ vector<T, N> abs(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> abs(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, abs, x);
@@ -832,12 +838,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_acos($0)")
__target_intrinsic(cpp, "$P_acos($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Acos _0")
+[__readNone]
T acos(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Acos _0")
+[__readNone]
vector<T, N> acos(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, acos, x);
@@ -845,6 +853,7 @@ vector<T, N> acos(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> acos(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, acos, x);
@@ -855,11 +864,13 @@ __generic<T : __BuiltinType>
__target_intrinsic(cpp, "bool($0)")
__target_intrinsic(cuda, "bool($0)")
__target_intrinsic(glsl, "bool($0)")
+[__readNone]
bool all(T x);
__generic<T : __BuiltinType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "all(bvec$N0($0))")
+[__readNone]
bool all(vector<T,N> x)
{
bool result = true;
@@ -870,6 +881,7 @@ bool all(vector<T,N> x)
__generic<T : __BuiltinType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
bool all(matrix<T,N,M> x)
{
bool result = true;
@@ -894,11 +906,13 @@ __generic<T : __BuiltinType>
__target_intrinsic(cpp, "bool($0)")
__target_intrinsic(cuda, "bool($0)")
__target_intrinsic(glsl, "bool($0)")
+[__readNone]
bool any(T x);
__generic<T : __BuiltinType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "any(bvec$N0($0))")
+[__readNone]
bool any(vector<T, N> x)
{
bool result = false;
@@ -909,6 +923,7 @@ bool any(vector<T, N> x)
__generic<T : __BuiltinType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
bool any(matrix<T, N, M> x)
{
bool result = false;
@@ -926,6 +941,7 @@ __target_intrinsic(cpp, "$P_asdouble($0, $1)")
__target_intrinsic(cuda, "$P_asdouble($0, $1)")
__target_intrinsic(spirv_direct, "%v = OpCompositeConstruct _type(uint2) resultId _0 _1; OpExtInst resultType resultId glsl450 59 %v")
__glsl_extension(GL_ARB_gpu_shader5)
+[__readNone]
double asdouble(uint lowbits, uint highbits);
// Reinterpret bits as a float (HLSL SM 4.0)
@@ -935,6 +951,7 @@ __target_intrinsic(glsl, "intBitsToFloat")
__target_intrinsic(cpp, "$P_asfloat($0)")
__target_intrinsic(cuda, "$P_asfloat($0)")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
float asfloat(int x);
__target_intrinsic(hlsl)
@@ -942,12 +959,14 @@ __target_intrinsic(glsl, "uintBitsToFloat")
__target_intrinsic(cpp, "$P_asfloat($0)")
__target_intrinsic(cuda, "$P_asfloat($0)")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
float asfloat(uint x);
__generic<let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "intBitsToFloat")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
vector<float, N> asfloat(vector< int, N> x)
{
VECTOR_MAP_UNARY(float, N, asfloat, x);
@@ -957,6 +976,7 @@ __generic<let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "uintBitsToFloat")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
vector<float,N> asfloat(vector<uint,N> x)
{
VECTOR_MAP_UNARY(float, N, asfloat, x);
@@ -964,6 +984,7 @@ vector<float,N> asfloat(vector<uint,N> x)
__generic<let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<float,N,M> asfloat(matrix< int,N,M> x)
{
MATRIX_MAP_UNARY(float, N, M, asfloat, x);
@@ -971,6 +992,7 @@ matrix<float,N,M> asfloat(matrix< int,N,M> x)
__generic<let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<float,N,M> asfloat(matrix<uint,N,M> x)
{
MATRIX_MAP_UNARY(float, N, M, asfloat, x);
@@ -978,16 +1000,19 @@ matrix<float,N,M> asfloat(matrix<uint,N,M> x)
// No op
[__unsafeForceInlineEarly]
+[__readNone]
float asfloat(float x)
{ return x; }
__generic<let N : int>
[__unsafeForceInlineEarly]
+[__readNone]
vector<float,N> asfloat(vector<float,N> x)
{ return x; }
__generic<let N : int, let M : int>
[__unsafeForceInlineEarly]
+[__readNone]
matrix<float,N,M> asfloat(matrix<float,N,M> x)
{ return x; }
@@ -998,12 +1023,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_asin($0)")
__target_intrinsic(cpp, "$P_asin($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Asin _0")
+[__readNone]
T asin(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Asin _0")
+[__readNone]
vector<T, N> asin(vector<T, N> x)
{
VECTOR_MAP_UNARY(T,N,asin,x);
@@ -1011,6 +1038,7 @@ vector<T, N> asin(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> asin(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T,N,M,asin,x);
@@ -1023,6 +1051,7 @@ __target_intrinsic(glsl, "floatBitsToInt")
__target_intrinsic(cpp, "$P_asint($0)")
__target_intrinsic(cuda, "$P_asint($0)")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
int asint(float x);
__target_intrinsic(hlsl)
@@ -1030,12 +1059,14 @@ __target_intrinsic(glsl, "int($0)")
__target_intrinsic(cpp, "$P_asint($0)")
__target_intrinsic(cuda, "$P_asint($0)")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
int asint(uint x);
__generic<let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "floatBitsToInt")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
vector<int, N> asint(vector<float, N> x)
{
VECTOR_MAP_UNARY(int, N, asint, x);
@@ -1045,6 +1076,7 @@ __generic<let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "ivec$N0($0)")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
vector<int, N> asint(vector<uint, N> x)
{
VECTOR_MAP_UNARY(int, N, asint, x);
@@ -1052,6 +1084,7 @@ vector<int, N> asint(vector<uint, N> x)
__generic<let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<int, N, M> asint(matrix<float, N, M> x)
{
MATRIX_MAP_UNARY(int, N, M, asint, x);
@@ -1059,6 +1092,7 @@ matrix<int, N, M> asint(matrix<float, N, M> x)
__generic<let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<int, N, M> asint(matrix<uint, N, M> x)
{
MATRIX_MAP_UNARY(int, N, M, asint, x);
@@ -1066,16 +1100,19 @@ matrix<int, N, M> asint(matrix<uint, N, M> x)
// No op
[__unsafeForceInlineEarly]
+[__readNone]
int asint(int x)
{ return x; }
__generic<let N : int>
[__unsafeForceInlineEarly]
+[__readNone]
vector<int,N> asint(vector<int,N> x)
{ return x; }
__generic<let N : int, let M : int>
[__unsafeForceInlineEarly]
+[__readNone]
matrix<int,N,M> asint(matrix<int,N,M> x)
{ return x; }
@@ -1086,6 +1123,7 @@ __target_intrinsic(glsl, "{ uvec2 v = unpackDouble2x32($0); $1 = v.x; $2 = v.y;
__glsl_extension(GL_ARB_gpu_shader5)
__target_intrinsic(cpp, "$P_asuint($0, $1, $2)")
__target_intrinsic(cuda, "$P_asuint($0, $1, $2)")
+[__readNone]
void asuint(double value, out uint lowbits, out uint highbits);
// Reinterpret bits as a uint (HLSL SM 4.0)
@@ -1095,6 +1133,7 @@ __target_intrinsic(glsl, "floatBitsToUint")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
__target_intrinsic(cpp, "$P_asuint($0)")
__target_intrinsic(cuda, "$P_asuint($0)")
+[__readNone]
uint asuint(float x);
__target_intrinsic(hlsl)
@@ -1102,12 +1141,14 @@ __target_intrinsic(glsl, "uint($0)")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
__target_intrinsic(cpp, "$P_asuint($0)")
__target_intrinsic(cuda, "$P_asuint($0)")
+[__readNone]
uint asuint(int x);
__generic<let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "floatBitsToUint")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
vector<uint,N> asuint(vector<float,N> x)
{
VECTOR_MAP_UNARY(uint, N, asuint, x);
@@ -1117,6 +1158,7 @@ __generic<let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "uvec$N0($0)")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
vector<uint, N> asuint(vector<int, N> x)
{
VECTOR_MAP_UNARY(uint, N, asuint, x);
@@ -1124,6 +1166,7 @@ vector<uint, N> asuint(vector<int, N> x)
__generic<let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<uint,N,M> asuint(matrix<float,N,M> x)
{
MATRIX_MAP_UNARY(uint, N, M, asuint, x);
@@ -1131,22 +1174,26 @@ matrix<uint,N,M> asuint(matrix<float,N,M> x)
__generic<let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<uint, N, M> asuint(matrix<int, N, M> x)
{
MATRIX_MAP_UNARY(uint, N, M, asuint, x);
}
[__unsafeForceInlineEarly]
+[__readNone]
uint asuint(uint x)
{ return x; }
__generic<let N : int>
[__unsafeForceInlineEarly]
+[__readNone]
vector<uint,N> asuint(vector<uint,N> x)
{ return x; }
__generic<let N : int, let M : int>
[__unsafeForceInlineEarly]
+[__readNone]
matrix<uint,N,M> asuint(matrix<uint,N,M> x)
{ return x; }
@@ -1159,38 +1206,41 @@ matrix<uint,N,M> asuint(matrix<uint,N,M> x)
// Identity cases:
-[__unsafeForceInlineEarly] float16_t asfloat16(float16_t value) { return value; }
-[__unsafeForceInlineEarly] vector<float16_t,N> asfloat16<let N : int>(vector<float16_t,N> value) { return value; }
-[__unsafeForceInlineEarly] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<float16_t,R,C> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] float16_t asfloat16(float16_t value) { return value; }
+[__unsafeForceInlineEarly][__readNone] vector<float16_t,N> asfloat16<let N : int>(vector<float16_t,N> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<float16_t,R,C> value) { return value; }
-[__unsafeForceInlineEarly] int16_t asint16(int16_t value) { return value; }
-[__unsafeForceInlineEarly] vector<int16_t,N> asint16<let N : int>(vector<int16_t,N> value) { return value; }
-[__unsafeForceInlineEarly] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] int16_t asint16(int16_t value) { return value; }
+[__unsafeForceInlineEarly][__readNone] vector<int16_t,N> asint16<let N : int>(vector<int16_t,N> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return value; }
-[__unsafeForceInlineEarly] uint16_t asuint16(uint16_t value) { return value; }
-[__unsafeForceInlineEarly] vector<uint16_t,N> asuint16<let N : int>(vector<uint16_t,N> value) { return value; }
-[__unsafeForceInlineEarly] matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<uint16_t,R,C> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] uint16_t asuint16(uint16_t value) { return value; }
+[__unsafeForceInlineEarly][__readNone] vector<uint16_t,N> asuint16<let N : int>(vector<uint16_t,N> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<uint16_t,R,C> value) { return value; }
// Signed<->unsigned cases:
-[__unsafeForceInlineEarly] int16_t asint16(uint16_t value) { return value; }
-[__unsafeForceInlineEarly] vector<int16_t,N> asint16<let N : int>(vector<uint16_t,N> value) { return value; }
-[__unsafeForceInlineEarly] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<uint16_t,R,C> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] int16_t asint16(uint16_t value) { return value; }
+[__unsafeForceInlineEarly][__readNone] vector<int16_t,N> asint16<let N : int>(vector<uint16_t,N> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<uint16_t,R,C> value) { return value; }
-[__unsafeForceInlineEarly] uint16_t asuint16(int16_t value) { return value; }
-[__unsafeForceInlineEarly] vector<uint16_t,N> asuint16<let N : int>(vector<int16_t,N> value) { return value; }
-[__unsafeForceInlineEarly] matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] uint16_t asuint16(int16_t value) { return value; }
+[__unsafeForceInlineEarly][__readNone] vector<uint16_t,N> asuint16<let N : int>(vector<int16_t,N> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return value; }
// Float->unsigned cases:
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "uint16_t(packHalf2x16(vec2($0, 0.0)))")
__target_intrinsic(cuda, "__half_as_ushort")
+[__readNone]
uint16_t asuint16(float16_t value);
+[__readNone]
vector<uint16_t,N> asuint16<let N : int>(vector<float16_t,N> value)
{ VECTOR_MAP_UNARY(uint16_t, N, asuint16, value); }
+[__readNone]
matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<float16_t,R,C> value)
{ MATRIX_MAP_UNARY(uint16_t, R, C, asuint16, value); }
@@ -1199,11 +1249,14 @@ matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<float16_t,R,C> va
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "float16_t(unpackHalf2x16($0).x)")
__target_intrinsic(cuda, "__ushort_as_half")
+[__readNone]
float16_t asfloat16(uint16_t value);
+[__readNone]
vector<float16_t,N> asfloat16<let N : int>(vector<uint16_t,N> value)
{ VECTOR_MAP_UNARY(float16_t, N, asfloat16, value); }
+[__readNone]
matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<uint16_t,R,C> value)
{ MATRIX_MAP_UNARY(float16_t, R, C, asfloat16, value); }
@@ -1211,16 +1264,17 @@ matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<uint16_t,R,C> v
__target_intrinsic(hlsl)
__target_intrinsic(cuda, "__half_as_short")
-[__unsafeForceInlineEarly] int16_t asint16(float16_t value) { return asuint16(value); }
-__target_intrinsic(hlsl) [__unsafeForceInlineEarly] vector<int16_t,N> asint16<let N : int>(vector<float16_t,N> value) { return asuint16(value); }
-__target_intrinsic(hlsl) [__unsafeForceInlineEarly] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<float16_t,R,C> value) { return asuint16(value); }
+[__unsafeForceInlineEarly][__readNone] int16_t asint16(float16_t value) { return asuint16(value); }
+__target_intrinsic(hlsl) [__unsafeForceInlineEarly][__readNone] vector<int16_t,N> asint16<let N : int>(vector<float16_t,N> value) { return asuint16(value); }
+__target_intrinsic(hlsl) [__unsafeForceInlineEarly][__readNone] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<float16_t,R,C> value) { return asuint16(value); }
__target_intrinsic(hlsl)
__target_intrinsic(cuda, "__short_as_half")
+[__readNone]
[__unsafeForceInlineEarly] float16_t asfloat16(int16_t value) { return asfloat16(asuint16(value)); }
-__target_intrinsic(hlsl) [__unsafeForceInlineEarly] vector<float16_t,N> asfloat16<let N : int>(vector<int16_t,N> value) { return asfloat16(asuint16(value)); }
-__target_intrinsic(hlsl) [__unsafeForceInlineEarly] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return asfloat16(asuint16(value)); }
+__target_intrinsic(hlsl) [__unsafeForceInlineEarly][__readNone] vector<float16_t,N> asfloat16<let N : int>(vector<int16_t,N> value) { return asfloat16(asuint16(value)); }
+__target_intrinsic(hlsl) [__unsafeForceInlineEarly][__readNone] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return asfloat16(asuint16(value)); }
// Inverse tangent (HLSL SM 1.0)
__generic<T : __BuiltinFloatingPointType>
@@ -1229,12 +1283,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_atan($0)")
__target_intrinsic(cpp, "$P_atan($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Atan _0")
+[__readNone]
T atan(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Atan _0")
+[__readNone]
vector<T, N> atan(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, atan, x);
@@ -1242,6 +1298,7 @@ vector<T, N> atan(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> atan(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, atan, x);
@@ -1253,12 +1310,14 @@ __target_intrinsic(glsl,"atan($0,$1)")
__target_intrinsic(cuda, "$P_atan2($0, $1)")
__target_intrinsic(cpp, "$P_atan2($0, $1)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Atan2 _0 _1")
+[__readNone]
T atan2(T y, T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl,"atan($0,$1)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Atan2 _0 _1")
+[__readNone]
vector<T, N> atan2(vector<T, N> y, vector<T, N> x)
{
VECTOR_MAP_BINARY(T, N, atan2, y, x);
@@ -1266,6 +1325,7 @@ vector<T, N> atan2(vector<T, N> y, vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x)
{
MATRIX_MAP_BINARY(T, N, M, atan2, y, x);
@@ -1278,12 +1338,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_ceil($0)")
__target_intrinsic(cpp, "$P_ceil($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Ceil _0")
+[__readNone]
T ceil(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Ceil _0")
+[__readNone]
vector<T, N> ceil(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, ceil, x);
@@ -1291,6 +1353,7 @@ vector<T, N> ceil(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> ceil(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, ceil, x);
@@ -1305,6 +1368,7 @@ __generic<T : __BuiltinIntegerType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FClamp, UClamp, SClamp) _0 _1 _2")
+[__readNone]
T clamp(T x, T minBound, T maxBound)
{
return min(max(x, minBound), maxBound);
@@ -1314,6 +1378,7 @@ __generic<T : __BuiltinIntegerType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FClamp, UClamp, SClamp) _0 _1 _2")
+[__readNone]
vector<T, N> clamp(vector<T, N> x, vector<T, N> minBound, vector<T, N> maxBound)
{
return min(max(x, minBound), maxBound);
@@ -1321,6 +1386,7 @@ vector<T, N> clamp(vector<T, N> x, vector<T, N> minBound, vector<T, N> maxBound)
__generic<T : __BuiltinIntegerType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> minBound, matrix<T,N,M> maxBound)
{
return min(max(x, minBound), maxBound);
@@ -1330,6 +1396,7 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FClamp, UClamp, SClamp) _0 _1 _2")
+[__readNone]
T clamp(T x, T minBound, T maxBound)
{
return min(max(x, minBound), maxBound);
@@ -1339,6 +1406,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FClamp, UClamp, SClamp) _0 _1 _2")
+[__readNone]
vector<T, N> clamp(vector<T, N> x, vector<T, N> minBound, vector<T, N> maxBound)
{
return min(max(x, minBound), maxBound);
@@ -1346,6 +1414,7 @@ vector<T, N> clamp(vector<T, N> x, vector<T, N> minBound, vector<T, N> maxBound)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> minBound, matrix<T,N,M> maxBound)
{
return min(max(x, minBound), maxBound);
@@ -1354,6 +1423,7 @@ matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> minBound, matrix<T,N,M> maxBo
// Clip (discard) fragment conditionally
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
+[__readNone]
void clip(T x)
{
if(x < T(0)) discard;
@@ -1361,6 +1431,7 @@ void clip(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
+[__readNone]
void clip(vector<T,N> x)
{
if(any(x < T(0))) discard;
@@ -1368,6 +1439,7 @@ void clip(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
void clip(matrix<T,N,M> x)
{
if(any(x < T(0))) discard;
@@ -1380,12 +1452,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_cos($0)")
__target_intrinsic(cpp, "$P_cos($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cos _0")
+[__readNone]
T cos(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cos _0")
+[__readNone]
vector<T, N> cos(vector<T, N> x)
{
VECTOR_MAP_UNARY(T,N, cos, x);
@@ -1393,6 +1467,7 @@ vector<T, N> cos(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> cos(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, cos, x);
@@ -1405,12 +1480,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_cosh($0)")
__target_intrinsic(cpp, "$P_cosh($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cosh _0")
+[__readNone]
T cosh(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cosh _0")
+[__readNone]
vector<T,N> cosh(vector<T,N> x)
{
VECTOR_MAP_UNARY(T,N, cosh, x);
@@ -1418,6 +1495,7 @@ vector<T,N> cosh(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> cosh(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, cosh, x);
@@ -1428,6 +1506,7 @@ __target_intrinsic(hlsl)
__target_intrinsic(glsl, "bitCount")
__target_intrinsic(cuda, "$P_countbits($0)")
__target_intrinsic(cpp, "$P_countbits($0)")
+[__readNone]
uint countbits(uint value);
// Cross product
@@ -1436,6 +1515,7 @@ __generic<T : __BuiltinArithmeticType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cross _0 _1")
+[__readNone]
vector<T,3> cross(vector<T,3> left, vector<T,3> right)
{
return vector<T,3>(
@@ -1446,6 +1526,7 @@ vector<T,3> cross(vector<T,3> left, vector<T,3> right)
// Convert encoded color
__target_intrinsic(hlsl)
+[__readNone]
int4 D3DCOLORtoUBYTE4(float4 color)
{
let scaled = color.zyxw * 255.001999f;
@@ -1455,11 +1536,13 @@ int4 D3DCOLORtoUBYTE4(float4 color)
// Partial-difference derivatives
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(glsl, dFdx)
+[__readNone]
T ddx(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, dFdx)
+[__readNone]
vector<T, N> ddx(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, ddx, x);
@@ -1467,6 +1550,7 @@ vector<T, N> ddx(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> ddx(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, ddx, x);
@@ -1476,12 +1560,14 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__glsl_extension(GL_ARB_derivative_control)
__target_intrinsic(glsl, dFdxCoarse)
+[__readNone]
T ddx_coarse(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__glsl_extension(GL_ARB_derivative_control)
__target_intrinsic(glsl, dFdxCoarse)
+[__readNone]
vector<T, N> ddx_coarse(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, ddx_coarse, x);
@@ -1489,6 +1575,7 @@ vector<T, N> ddx_coarse(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> ddx_coarse(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, ddx_coarse, x);
@@ -1498,12 +1585,14 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__glsl_extension(GL_ARB_derivative_control)
__target_intrinsic(glsl, dFdxFine)
+[__readNone]
T ddx_fine(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__glsl_extension(GL_ARB_derivative_control)
__target_intrinsic(glsl, dFdxFine)
+[__readNone]
vector<T, N> ddx_fine(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, ddx_fine, x);
@@ -1511,6 +1600,7 @@ vector<T, N> ddx_fine(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> ddx_fine(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, ddx_fine, x);
@@ -1519,11 +1609,13 @@ matrix<T, N, M> ddx_fine(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, dFdy)
+[__readNone]
T ddy(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, dFdy)
+[__readNone]
vector<T, N> ddy(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, ddy, x);
@@ -1531,6 +1623,7 @@ vector<T, N> ddy(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> ddy(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, ddy, x);
@@ -1539,12 +1632,14 @@ matrix<T, N, M> ddy(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
__glsl_extension(GL_ARB_derivative_control)
__target_intrinsic(glsl, dFdyCoarse)
+[__readNone]
T ddy_coarse(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__glsl_extension(GL_ARB_derivative_control)
__target_intrinsic(glsl, dFdyCoarse)
+[__readNone]
vector<T, N> ddy_coarse(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, ddy_coarse, x);
@@ -1552,6 +1647,7 @@ vector<T, N> ddy_coarse(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> ddy_coarse(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, ddy_coarse, x);
@@ -1561,12 +1657,14 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__glsl_extension(GL_ARB_derivative_control)
__target_intrinsic(glsl, dFdyFine)
+[__readNone]
T ddy_fine(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__glsl_extension(GL_ARB_derivative_control)
__target_intrinsic(glsl, dFdyFine)
+[__readNone]
vector<T, N> ddy_fine(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, ddy_fine, x);
@@ -1574,6 +1672,7 @@ vector<T, N> ddy_fine(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> ddy_fine(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, ddy_fine, x);
@@ -1586,6 +1685,7 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Degrees _0")
+[__readNone]
T degrees(T x)
{
return x * (T(180) / T.getPi());
@@ -1595,6 +1695,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Degrees _0")
+[__readNone]
vector<T, N> degrees(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, degrees, x);
@@ -1602,6 +1703,7 @@ vector<T, N> degrees(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> degrees(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, degrees, x);
@@ -1613,6 +1715,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Determinant _0")
+[__readNone]
T determinant(matrix<T,N,N> m);
// Barrier for device memory
@@ -1630,6 +1733,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Distance _0 _1")
+[__readNone]
T distance(vector<T, N> x, vector<T, N> y)
{
return length(x - y);
@@ -1640,6 +1744,7 @@ T distance(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
+[__readNone]
T dot(vector<T, N> x, vector<T, N> y)
{
T result = T(0);
@@ -1650,6 +1755,7 @@ T dot(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinIntegerType, let N : int>
__target_intrinsic(hlsl)
+[__readNone]
T dot(vector<T, N> x, vector<T, N> y)
{
T result = T(0);
@@ -1682,15 +1788,18 @@ RWStructuredBuffer<T> __getEquivalentStructuredBuffer<T>(RWByteAddressBuffer b);
__generic<T : __BuiltinArithmeticType>
__target_intrinsic(glsl, interpolateAtCentroid)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InterpolateAtCentroid _0")
+[__readNone]
T EvaluateAttributeAtCentroid(T x);
__generic<T : __BuiltinArithmeticType, let N : int>
__target_intrinsic(glsl, interpolateAtCentroid)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InterpolateAtCentroid _0")
+[__readNone]
vector<T,N> EvaluateAttributeAtCentroid(vector<T,N> x);
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
__target_intrinsic(glsl, interpolateAtCentroid)
+[__readNone]
matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, EvaluateAttributeAtCentroid, x);
@@ -1699,15 +1808,18 @@ matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x)
__generic<T : __BuiltinArithmeticType>
__target_intrinsic(glsl, "interpolateAtSample($0, int($1))")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InterpolateAtSample _0 _1")
+[__readNone]
T EvaluateAttributeAtSample(T x, uint sampleindex);
__generic<T : __BuiltinArithmeticType, let N : int>
__target_intrinsic(glsl, "interpolateAtSample($0, int($1))")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InterpolateAtSample _0 _1")
+[__readNone]
vector<T,N> EvaluateAttributeAtSample(vector<T,N> x, uint sampleindex);
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
__target_intrinsic(glsl, "interpolateAtSample($0, int($1))")
+[__readNone]
matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex)
{
matrix<T,N,M> result;
@@ -1721,15 +1833,18 @@ matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex)
__generic<T : __BuiltinArithmeticType>
__target_intrinsic(glsl, "interpolateAtOffset($0, vec2($1) / 16.0f)")
__target_intrinsic(spirv_direct, "%foffset = OpConvertSToF _type(float2) resultId _1; %offsetdiv16 = 136 _type(float2) resultId %foffset const(float2, 16.0, 16.0); OpExtInst resultType resultId glsl450 78 _0 %offsetdiv16")
+[__readNone]
T EvaluateAttributeSnapped(T x, int2 offset);
__generic<T : __BuiltinArithmeticType, let N : int>
__target_intrinsic(glsl, "interpolateAtOffset($0, vec2($1) / 16.0f)")
__target_intrinsic(spirv_direct, "%foffset = OpConvertSToF _type(float2) resultId _1; %offsetdiv16 = 136 _type(float2) resultId %foffset const(float2, 16.0, 16.0); OpExtInst resultType resultId glsl450 78 _0 %offsetdiv16")
+[__readNone]
vector<T,N> EvaluateAttributeSnapped(vector<T,N> x, int2 offset);
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
__target_intrinsic(glsl, "interpolateAtOffset($0, vec2($1) / 16.0f)")
+[__readNone]
matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset)
{
matrix<T,N,M> result;
@@ -1748,12 +1863,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_exp($0)")
__target_intrinsic(cpp, "$P_exp($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Exp _0")
+[__readNone]
T exp(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Exp _0")
+[__readNone]
vector<T, N> exp(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, exp, x);
@@ -1761,6 +1878,7 @@ vector<T, N> exp(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> exp(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, exp, x);
@@ -1774,12 +1892,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_exp2($0)")
__target_intrinsic(cpp, "$P_exp2($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Exp2 _0")
+[__readNone]
T exp2(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Exp2 _0")
+[__readNone]
vector<T,N> exp2(vector<T,N> x)
{
VECTOR_MAP_UNARY(T, N, exp2, x);
@@ -1787,6 +1907,7 @@ vector<T,N> exp2(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> exp2(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, exp2, x);
@@ -1799,10 +1920,12 @@ __glsl_version(420)
__target_intrinsic(hlsl)
__cuda_sm_version(6.0)
__target_intrinsic(cuda, "__half2float(__ushort_as_half($0))")
+[__readNone]
float f16tof32(uint value);
__generic<let N : int>
__target_intrinsic(hlsl)
+[__readNone]
vector<float, N> f16tof32(vector<uint, N> value)
{
VECTOR_MAP_UNARY(float, N, f16tof32, value);
@@ -1816,10 +1939,12 @@ __glsl_version(420)
__target_intrinsic(hlsl)
__cuda_sm_version(6.0)
__target_intrinsic(cuda, "__half_as_ushort(__float2half($0))")
+[__readNone]
uint f32tof16(float value);
__generic<let N : int>
__target_intrinsic(hlsl)
+[__readNone]
vector<uint, N> f32tof16(vector<float, N> value)
{
VECTOR_MAP_UNARY(uint, N, f32tof16, value);
@@ -1833,11 +1958,13 @@ vector<uint, N> f32tof16(vector<float, N> value)
__target_intrinsic(glsl, "unpackHalf2x16($0).x")
__target_intrinsic(cuda, "__half2float")
__glsl_version(420)
+[__readNone]
float f16tof32(float16_t value);
__generic<let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(cuda, "__half2float")
+[__readNone]
vector<float, N> f16tof32(vector<float16_t, N> value)
{
VECTOR_MAP_UNARY(float, N, f16tof32, value);
@@ -1847,10 +1974,12 @@ vector<float, N> f16tof32(vector<float16_t, N> value)
__target_intrinsic(glsl, "packHalf2x16(vec2($0,0.0))")
__glsl_version(420)
__target_intrinsic(cuda, "__float2half")
+[__readNone]
float16_t f32tof16_(float value);
__generic<let N : int>
__target_intrinsic(cuda, "__float2half")
+[__readNone]
vector<float16_t, N> f32tof16_(vector<float, N> value)
{
VECTOR_MAP_UNARY(uint, N, f32tof16, value);
@@ -1862,6 +1991,7 @@ vector<float16_t, N> f32tof16_(vector<float, N> value)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
+[__readNone]
vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng)
{
return dot(ng, i) < T(0.0f) ? n : -n;
@@ -1873,12 +2003,14 @@ __target_intrinsic(glsl,"findMSB")
__target_intrinsic(cuda, "$P_firstbithigh($0)")
__target_intrinsic(cpp, "$P_firstbithigh($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindSMsb _0")
+[__readNone]
int firstbithigh(int value);
__target_intrinsic(hlsl)
__target_intrinsic(glsl,"findMSB")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindSMsb _0")
__generic<let N : int>
+[__readNone]
vector<int, N> firstbithigh(vector<int, N> value)
{
VECTOR_MAP_UNARY(int, N, firstbithigh, value);
@@ -1889,12 +2021,14 @@ __target_intrinsic(glsl,"findMSB")
__target_intrinsic(cuda, "$P_firstbithigh($0)")
__target_intrinsic(cpp, "$P_firstbithigh($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindUMsb _0")
+[__readNone]
uint firstbithigh(uint value);
__target_intrinsic(hlsl)
__target_intrinsic(glsl,"findMSB")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindUMsb _0")
__generic<let N : int>
+[__readNone]
vector<uint,N> firstbithigh(vector<uint,N> value)
{
VECTOR_MAP_UNARY(uint, N, firstbithigh, value);
@@ -1906,12 +2040,14 @@ __target_intrinsic(glsl,"findLSB")
__target_intrinsic(cuda, "$P_firstbitlow($0)")
__target_intrinsic(cpp, "$P_firstbitlow($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindILsb _0")
+[__readNone]
int firstbitlow(int value);
__target_intrinsic(hlsl)
__target_intrinsic(glsl,"findLSB")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindILsb _0")
__generic<let N : int>
+[__readNone]
vector<int,N> firstbitlow(vector<int,N> value)
{
VECTOR_MAP_UNARY(int, N, firstbitlow, value);
@@ -1922,12 +2058,14 @@ __target_intrinsic(glsl,"findLSB")
__target_intrinsic(cuda, "$P_firstbitlow($0)")
__target_intrinsic(cpp, "$P_firstbitlow($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindILsb _0")
+[__readNone]
uint firstbitlow(uint value);
__target_intrinsic(hlsl)
__target_intrinsic(glsl,"findLSB")
__generic<let N : int>
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindILsb _0")
+[__readNone]
vector<uint,N> firstbitlow(vector<uint,N> value)
{
VECTOR_MAP_UNARY(uint, N, firstbitlow, value);
@@ -1941,12 +2079,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_floor($0)")
__target_intrinsic(cpp, "$P_floor($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Floor _0")
+[__readNone]
T floor(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Floor _0")
+[__readNone]
vector<T, N> floor(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, floor, x);
@@ -1954,6 +2094,7 @@ vector<T, N> floor(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> floor(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, floor, x);
@@ -1965,12 +2106,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_fma($0, $1, $2)")
__target_intrinsic(cpp, "$P_fma($0, $1, $2)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2")
+[__readNone]
double fma(double a, double b, double c);
__generic<let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2")
+[__readNone]
vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c)
{
VECTOR_MAP_TRINARY(double, N, fma, a, b, c);
@@ -1978,6 +2121,7 @@ vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N
__generic<let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<double, N, M> fma(matrix<double, N, M> a, matrix<double, N, M> b, matrix<double, N, M> c)
{
MATRIX_MAP_TRINARY(double, N, M, fma, a, b, c);
@@ -1988,6 +2132,7 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(cuda, "$P_fmod($0, $1)")
__target_intrinsic(cpp, "$P_fmod($0, $1)")
+[__readNone]
T fmod(T x, T y)
{
return x - y * trunc(x/y);
@@ -1995,6 +2140,7 @@ T fmod(T x, T y)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
+[__readNone]
vector<T, N> fmod(vector<T, N> x, vector<T, N> y)
{
VECTOR_MAP_BINARY(T, N, fmod, x, y);
@@ -2002,6 +2148,7 @@ vector<T, N> fmod(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> fmod(matrix<T, N, M> x, matrix<T, N, M> y)
{
MATRIX_MAP_BINARY(T, N, M, fmod, x, y);
@@ -2014,18 +2161,21 @@ __target_intrinsic(glsl, fract)
__target_intrinsic(cuda, "$P_frac($0)")
__target_intrinsic(cpp, "$P_frac($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fract _0")
+[__readNone]
T frac(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, fract)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fract _0")
+[__readNone]
vector<T, N> frac(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, frac, x);
}
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+[__readNone]
matrix<T, N, M> frac(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, frac, x);
@@ -2036,12 +2186,14 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Frexp _0 _1")
+[__readNone]
T frexp(T x, out T exp);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Frexp _0 _1")
+[__readNone]
vector<T, N> frexp(vector<T, N> x, out vector<T, N> exp)
{
VECTOR_MAP_BINARY(T, N, frexp, x, exp);
@@ -2049,6 +2201,7 @@ vector<T, N> frexp(vector<T, N> x, out vector<T, N> exp)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> frexp(matrix<T, N, M> x, out matrix<T, N, M> exp)
{
MATRIX_MAP_BINARY(T, N, M, frexp, x, exp);
@@ -2056,11 +2209,13 @@ matrix<T, N, M> frexp(matrix<T, N, M> x, out matrix<T, N, M> exp)
// Texture filter width
__generic<T : __BuiltinFloatingPointType>
+[__readNone]
T fwidth(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
+[__readNone]
vector<T, N> fwidth(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, fwidth, x);
@@ -2068,6 +2223,7 @@ vector<T, N> fwidth(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> fwidth(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, fwidth, x);
@@ -2141,9 +2297,11 @@ matrix<T,N,M> GetAttributeAtVertex(matrix<T,N,M> attribute, uint vertexIndex);
// Get number of samples in render target
+[__readNone]
uint GetRenderTargetSampleCount();
// Get position of given sample
+[__readNone]
float2 GetRenderTargetSamplePosition(int Index);
// Group memory barrier
@@ -2284,6 +2442,7 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(cuda, "$P_isfinite($0)")
__target_intrinsic(cpp, "$P_isfinite($0)")
+[__readNone]
bool isfinite(T x)
{
return !(isinf(x) || isnan(x));
@@ -2291,6 +2450,7 @@ bool isfinite(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
+[__readNone]
vector<bool, N> isfinite(vector<T, N> x)
{
VECTOR_MAP_UNARY(bool, N, isfinite, x);
@@ -2298,6 +2458,7 @@ vector<bool, N> isfinite(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<bool, N, M> isfinite(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(bool, N, M, isfinite, x);
@@ -2309,11 +2470,13 @@ __target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_isinf($0)")
__target_intrinsic(cpp, "$P_isinf($0)")
+[__readNone]
bool isinf(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
+[__readNone]
vector<bool, N> isinf(vector<T, N> x)
{
VECTOR_MAP_UNARY(bool, N, isinf, x);
@@ -2321,6 +2484,7 @@ vector<bool, N> isinf(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<bool, N, M> isinf(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(bool, N, M, isinf, x);
@@ -2332,11 +2496,13 @@ __target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_isnan($0)")
__target_intrinsic(cpp, "$P_isnan($0)")
+[__readNone]
bool isnan(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
+[__readNone]
vector<bool, N> isnan(vector<T, N> x)
{
VECTOR_MAP_UNARY(bool, N, isnan, x);
@@ -2344,6 +2510,7 @@ vector<bool, N> isnan(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<bool, N, M> isnan(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(bool, N, M, isnan, x);
@@ -2354,6 +2521,7 @@ matrix<bool, N, M> isnan(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Ldexp _0 _1")
+[__readNone]
T ldexp(T x, T exp)
{
return x * exp2(exp);
@@ -2362,6 +2530,7 @@ T ldexp(T x, T exp)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Ldexp _0 _1")
+[__readNone]
vector<T, N> ldexp(vector<T, N> x, vector<T, N> exp)
{
return x * exp2(exp);
@@ -2369,6 +2538,7 @@ vector<T, N> ldexp(vector<T, N> x, vector<T, N> exp)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> ldexp(matrix<T, N, M> x, matrix<T, N, M> exp)
{
MATRIX_MAP_BINARY(T, N, M, ldexp, x, exp);
@@ -2379,6 +2549,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Length _0")
+[__readNone]
T length(vector<T, N> x)
{
return sqrt(dot(x, x));
@@ -2389,6 +2560,7 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, mix)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FMix _0 _1 _2")
+[__readNone]
T lerp(T x, T y, T s)
{
return x * (T(1.0f) - s) + y * s;
@@ -2398,6 +2570,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, mix)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FMix _0 _1 _2")
+[__readNone]
vector<T, N> lerp(vector<T, N> x, vector<T, N> y, vector<T, N> s)
{
return x * (T(1.0f) - s) + y * s;
@@ -2405,6 +2578,7 @@ vector<T, N> lerp(vector<T, N> x, vector<T, N> y, vector<T, N> s)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s)
{
MATRIX_MAP_TRINARY(T, N, M, lerp, x, y, s);
@@ -2412,6 +2586,7 @@ matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s)
// Legacy lighting function (obsolete)
__target_intrinsic(hlsl)
+[__readNone]
float4 lit(float n_dot_l, float n_dot_h, float m)
{
let ambient = 1.0f;
@@ -2427,12 +2602,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_log($0)")
__target_intrinsic(cpp, "$P_log($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Log _0")
+[__readNone]
T log(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Log _0")
+[__readNone]
vector<T, N> log(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, log, x);
@@ -2440,6 +2617,7 @@ vector<T, N> log(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> log(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, log, x);
@@ -2452,12 +2630,14 @@ __target_intrinsic(glsl, "(log( $0 ) * $S0( 0.43429448190325182765112891891661)
__target_intrinsic(cuda, "$P_log10($0)")
__target_intrinsic(cpp, "$P_log10($0)")
__target_intrinsic(spirv_direct, "%baseElog = OpExtInst resultType resultId glsl450 Log _0; OpFMul resultType resultId _0 %baseElog const(_p,0.43429448190325182765112891891661)")
+[__readNone]
T log10(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "(log( $0 ) * $S0(0.43429448190325182765112891891661) )" )
__target_intrinsic(spirv_direct, "%baseElog = OpExtInst resultType resultId glsl450 Log _0; OpVectorTimesScalar resultType resultId _0 %baseElog const(_p,0.43429448190325182765112891891661)")
+[__readNone]
vector<T,N> log10(vector<T,N> x)
{
VECTOR_MAP_UNARY(T, N, log10, x);
@@ -2465,6 +2645,7 @@ vector<T,N> log10(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> log10(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, log10, x);
@@ -2477,12 +2658,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_log2($0)")
__target_intrinsic(cpp, "$P_log2($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Log2 _0")
+[__readNone]
T log2(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Log2 _0")
+[__readNone]
vector<T,N> log2(vector<T,N> x)
{
VECTOR_MAP_UNARY(T, N, log2, x);
@@ -2490,6 +2673,7 @@ vector<T,N> log2(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> log2(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, log2, x);
@@ -2503,12 +2687,14 @@ __target_intrinsic(glsl, fma)
__target_intrinsic(cuda, "$P_fma($0, $1, $2)")
__target_intrinsic(cpp, "$P_fma($0, $1, $2)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2")
+[__readNone]
T mad(T mvalue, T avalue, T bvalue);
__generic<T : __BuiltinArithmeticType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, fma)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2")
+[__readNone]
vector<T, N> mad(vector<T, N> mvalue, vector<T, N> avalue, vector<T, N> bvalue)
{
VECTOR_MAP_TRINARY(T, N, mad, mvalue, avalue, bvalue);
@@ -2516,6 +2702,7 @@ vector<T, N> mad(vector<T, N> mvalue, vector<T, N> avalue, vector<T, N> bvalue)
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> mad(matrix<T, N, M> mvalue, matrix<T, N, M> avalue, matrix<T, N, M> bvalue)
{
MATRIX_MAP_TRINARY(T, N, M, mad, mvalue, avalue, bvalue);
@@ -2528,6 +2715,7 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_max($0, $1)")
__target_intrinsic(cpp, "$P_max($0, $1)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMax, UMax, SMax) _0")
+[__readNone]
T max(T x, T y);
// Note: a stdlib implementation of `max` (or `min`) will require splitting
// floating-point and integer cases apart, because the floating-point
@@ -2538,6 +2726,7 @@ __generic<T : __BuiltinIntegerType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMax, UMax, SMax) _0")
+[__readNone]
vector<T, N> max(vector<T, N> x, vector<T, N> y)
{
VECTOR_MAP_BINARY(T, N, max, x, y);
@@ -2545,6 +2734,7 @@ vector<T, N> max(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinIntegerType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y)
{
MATRIX_MAP_BINARY(T, N, M, max, x, y);
@@ -2556,12 +2746,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_max($0, $1)")
__target_intrinsic(cpp, "$P_max($0, $1)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMax, UMax, SMax) _0")
+[__readNone]
T max(T x, T y);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMax, UMax, SMax) _0")
+[__readNone]
vector<T, N> max(vector<T, N> x, vector<T, N> y)
{
VECTOR_MAP_BINARY(T, N, max, x, y);
@@ -2569,6 +2761,7 @@ vector<T, N> max(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y)
{
MATRIX_MAP_BINARY(T, N, M, max, x, y);
@@ -2581,12 +2774,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_min($0, $1)")
__target_intrinsic(cpp, "$P_min($0, $1)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0")
+[__readNone]
T min(T x, T y);
__generic<T : __BuiltinIntegerType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0")
+[__readNone]
vector<T,N> min(vector<T,N> x, vector<T,N> y)
{
VECTOR_MAP_BINARY(T, N, min, x, y);
@@ -2594,6 +2789,7 @@ vector<T,N> min(vector<T,N> x, vector<T,N> y)
__generic<T : __BuiltinIntegerType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y)
{
MATRIX_MAP_BINARY(T, N, M, min, x, y);
@@ -2605,12 +2801,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_min($0, $1)")
__target_intrinsic(cpp, "$P_min($0, $1)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0")
+[__readNone]
T min(T x, T y);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0")
+[__readNone]
vector<T,N> min(vector<T,N> x, vector<T,N> y)
{
VECTOR_MAP_BINARY(T, N, min, x, y);
@@ -2618,6 +2816,7 @@ vector<T,N> min(vector<T,N> x, vector<T,N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y)
{
MATRIX_MAP_BINARY(T, N, M, min, x, y);
@@ -2625,11 +2824,13 @@ matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y)
// split into integer and fractional parts (both with same sign)
__generic<T : __BuiltinFloatingPointType>
+[__readNone]
T modf(T x, out T ip);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
+[__readNone]
vector<T,N> modf(vector<T,N> x, out vector<T,N> ip)
{
VECTOR_MAP_BINARY(T, N, modf, x, ip);
@@ -2637,6 +2838,7 @@ vector<T,N> modf(vector<T,N> x, out vector<T,N> ip)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip)
{
MATRIX_MAP_BINARY(T, N, M, modf, x, ip);
@@ -2644,6 +2846,7 @@ matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip)
// msad4 (whatever that is)
__target_intrinsic(hlsl)
+[__readNone]
uint4 msad4(uint reference, uint2 source, uint4 accum)
{
int4 bytesRef = (reference >> uint4(24, 16, 8, 0)) & 0xFF;
@@ -2665,36 +2868,43 @@ uint4 msad4(uint reference, uint2 source, uint4 accum)
// scalar-scalar
__generic<T : __BuiltinArithmeticType>
__intrinsic_op($(kIROp_Mul))
+[__readNone]
T mul(T x, T y);
// scalar-vector and vector-scalar
__generic<T : __BuiltinArithmeticType, let N : int>
__intrinsic_op($(kIROp_Mul))
+[__readNone]
vector<T, N> mul(vector<T, N> x, T y);
__generic<T : __BuiltinArithmeticType, let N : int>
__intrinsic_op($(kIROp_Mul))
+[__readNone]
vector<T, N> mul(T x, vector<T, N> y);
// scalar-matrix and matrix-scalar
__generic<T : __BuiltinArithmeticType, let N : int, let M :int>
__intrinsic_op($(kIROp_Mul))
+[__readNone]
matrix<T, N, M> mul(matrix<T, N, M> x, T y);
__generic<T : __BuiltinArithmeticType, let N : int, let M :int>
__intrinsic_op($(kIROp_Mul))
+[__readNone]
matrix<T, N, M> mul(T x, matrix<T, N, M> y);
// vector-vector (dot product)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "dot")
+[__readNone]
T mul(vector<T, N> x, vector<T, N> y)
{
return dot(x, y);
}
__generic<T : __BuiltinIntegerType, let N : int>
__target_intrinsic(hlsl)
+[__readNone]
T mul(vector<T, N> x, vector<T, N> y)
{
return dot(x, y);
@@ -2704,6 +2914,7 @@ T mul(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right)
{
vector<T,M> result;
@@ -2721,6 +2932,7 @@ vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right)
__generic<T : __BuiltinIntegerType, let N : int, let M : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right)
{
vector<T,M> result;
@@ -2738,6 +2950,7 @@ vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right)
__generic<T : __BuiltinLogicalType, let N : int, let M : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right)
{
vector<T,M> result;
@@ -2757,6 +2970,7 @@ vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right)
{
vector<T,N> result;
@@ -2774,6 +2988,7 @@ vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right)
__generic<T : __BuiltinIntegerType, let N : int, let M : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right)
{
vector<T,N> result;
@@ -2791,6 +3006,7 @@ vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right)
__generic<T : __BuiltinLogicalType, let N : int, let M : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right)
{
vector<T,N> result;
@@ -2810,6 +3026,7 @@ vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right)
__generic<T : __BuiltinFloatingPointType, let R : int, let N : int, let C : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left)
{
matrix<T,R,C> result;
@@ -2828,6 +3045,7 @@ matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left)
__generic<T : __BuiltinIntegerType, let R : int, let N : int, let C : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left)
{
matrix<T,R,C> result;
@@ -2846,6 +3064,7 @@ matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left)
__generic<T : __BuiltinLogicalType, let R : int, let N : int, let C : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left)
{
matrix<T,R,C> result;
@@ -2864,11 +3083,13 @@ matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left)
// noise (deprecated)
+[__readNone]
float noise(float x)
{
return 0;
}
+[__readNone]
__generic<let N : int> float noise(vector<float, N> x)
{
return 0;
@@ -2915,6 +3136,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Normalize _0")
+[__readNone]
vector<T,N> normalize(vector<T,N> x)
{
return x / length(x);
@@ -2927,12 +3149,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_pow($0, $1)")
__target_intrinsic(cpp, "$P_pow($0, $1)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Pow _0 _1")
+[__readNone]
T pow(T x, T y);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Pow _0 _1")
+[__readNone]
vector<T, N> pow(vector<T, N> x, vector<T, N> y)
{
VECTOR_MAP_BINARY(T, N, pow, x, y);
@@ -2940,6 +3164,7 @@ vector<T, N> pow(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y)
{
MATRIX_MAP_BINARY(T, N, M, pow, x, y);
@@ -3087,6 +3312,7 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Radians _0")
+[__readNone]
T radians(T x)
{
return x * (T.getPi() / T(180.0f));
@@ -3096,6 +3322,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Radians _0")
+[__readNone]
vector<T, N> radians(vector<T, N> x)
{
return x * (T.getPi() / T(180.0f));
@@ -3103,6 +3330,7 @@ vector<T, N> radians(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> radians(matrix<T, N, M> x)
{
return x * (T.getPi() / T(180.0f));
@@ -3111,6 +3339,7 @@ matrix<T, N, M> radians(matrix<T, N, M> x)
// Approximate reciprocal
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
+[__readNone]
T rcp(T x)
{
return T(1.0) / x;
@@ -3118,6 +3347,7 @@ T rcp(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
+[__readNone]
vector<T, N> rcp(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, rcp, x);
@@ -3125,6 +3355,7 @@ vector<T, N> rcp(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> rcp(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, rcp, x);
@@ -3135,6 +3366,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Reflect _0 _1")
+[__readNone]
vector<T,N> reflect(vector<T,N> i, vector<T,N> n)
{
return i - T(2) * dot(n,i) * n;
@@ -3145,6 +3377,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Refract _0 _1 _2")
+[__readNone]
vector<T,N> refract(vector<T,N> i, vector<T,N> n, T eta)
{
let dotNI = dot(n,i);
@@ -3158,10 +3391,12 @@ __target_intrinsic(hlsl)
__target_intrinsic(glsl, "bitfieldReverse")
__target_intrinsic(cuda, "$P_reversebits($0)")
__target_intrinsic(cpp, "$P_reversebits($0)")
+[__readNone]
uint reversebits(uint value);
__target_intrinsic(glsl, "bitfieldReverse")
__generic<let N : int>
+[__readNone]
vector<uint, N> reversebits(vector<uint, N> value)
{
VECTOR_MAP_UNARY(uint, N, reversebits, value);
@@ -3174,12 +3409,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_round($0)")
__target_intrinsic(cpp, "$P_round($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Round _0")
+[__readNone]
T round(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Round _0")
+[__readNone]
vector<T, N> round(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, round, x);
@@ -3187,6 +3424,7 @@ vector<T, N> round(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> round(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, round, x);
@@ -3199,6 +3437,7 @@ __target_intrinsic(glsl, "inversesqrt($0)")
__target_intrinsic(cuda, "$P_rsqrt($0)")
__target_intrinsic(cpp, "$P_rsqrt($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InverseSqrt _0")
+[__readNone]
T rsqrt(T x)
{
return T(1.0) / sqrt(x);
@@ -3208,6 +3447,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "inversesqrt($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InverseSqrt _0")
+[__readNone]
vector<T, N> rsqrt(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, rsqrt, x);
@@ -3215,6 +3455,7 @@ vector<T, N> rsqrt(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> rsqrt(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, rsqrt, x);
@@ -3224,6 +3465,7 @@ matrix<T, N, M> rsqrt(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
+[__readNone]
T saturate(T x)
{
return clamp<T>(x, T(0), T(1));
@@ -3231,6 +3473,7 @@ T saturate(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
+[__readNone]
vector<T,N> saturate(vector<T,N> x)
{
return clamp<T,N>(x,
@@ -3240,6 +3483,7 @@ vector<T,N> saturate(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> saturate(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, saturate, x);
@@ -3252,12 +3496,14 @@ __target_intrinsic(glsl, "int(sign($0))")
__target_intrinsic(cuda, "$P_sign($0)")
__target_intrinsic(cpp, "$P_sign($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FSign, SSign) _0")
+[__readNone]
int sign(T x);
__generic<T : __BuiltinSignedArithmeticType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "ivec$N0(sign($0))")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FSign, SSign) _0")
+[__readNone]
vector<int, N> sign(vector<T, N> x)
{
VECTOR_MAP_UNARY(int, N, sign, x);
@@ -3265,6 +3511,7 @@ vector<int, N> sign(vector<T, N> x)
__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<int, N, M> sign(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(int, N, M, sign, x);
@@ -3279,12 +3526,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_sin($0)")
__target_intrinsic(cpp, "$P_sin($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sin _0")
+[__readNone]
T sin(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sin _0")
+[__readNone]
vector<T, N> sin(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, sin, x);
@@ -3292,6 +3541,7 @@ vector<T, N> sin(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> sin(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, sin, x);
@@ -3301,6 +3551,7 @@ matrix<T, N, M> sin(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(cuda, "$P_sincos($0, $1, $2)")
+[__readNone]
void sincos(T x, out T s, out T c)
{
s = sin(x);
@@ -3309,6 +3560,7 @@ void sincos(T x, out T s, out T c)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
+[__readNone]
void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c)
{
s = sin(x);
@@ -3317,6 +3569,7 @@ void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
void sincos(matrix<T,N,M> x, out matrix<T,N,M> s, out matrix<T,N,M> c)
{
s = sin(x);
@@ -3330,12 +3583,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_sinh($0)")
__target_intrinsic(cpp, "$P_sinh($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sinh _0")
+[__readNone]
T sinh(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sinh _0")
+[__readNone]
vector<T, N> sinh(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, sinh, x);
@@ -3343,6 +3598,7 @@ vector<T, N> sinh(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> sinh(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, sinh, x);
@@ -3353,6 +3609,7 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 SmoothStep _0 _1 _2")
+[__readNone]
T smoothstep(T min, T max, T x)
{
let t = saturate((x - min) / (max - min));
@@ -3363,6 +3620,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 SmoothStep _0 _1 _2")
+[__readNone]
vector<T, N> smoothstep(vector<T, N> min, vector<T, N> max, vector<T, N> x)
{
VECTOR_MAP_TRINARY(T, N, smoothstep, min, max, x);
@@ -3370,6 +3628,7 @@ vector<T, N> smoothstep(vector<T, N> min, vector<T, N> max, vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> smoothstep(matrix<T, N, M> min, matrix<T, N, M> max, matrix<T, N, M> x)
{
MATRIX_MAP_TRINARY(T, N, M, smoothstep, min, max, x);
@@ -3382,12 +3641,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_sqrt($0)")
__target_intrinsic(cpp, "$P_sqrt($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sqrt _0")
+[__readNone]
T sqrt(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sqrt _0")
+[__readNone]
vector<T, N> sqrt(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, sqrt, x);
@@ -3395,6 +3656,7 @@ vector<T, N> sqrt(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> sqrt(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, sqrt, x);
@@ -3405,6 +3667,7 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Step _0 _1")
+[__readNone]
T step(T y, T x)
{
return x < y ? T(0.0f) : T(1.0f);
@@ -3414,6 +3677,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Step _0 _1")
+[__readNone]
vector<T,N> step(vector<T,N> y, vector<T,N> x)
{
VECTOR_MAP_BINARY(T, N, step, y, x);
@@ -3421,6 +3685,7 @@ vector<T,N> step(vector<T,N> y, vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> step(matrix<T, N, M> y, matrix<T, N, M> x)
{
MATRIX_MAP_BINARY(T, N, M, step, y, x);
@@ -3433,12 +3698,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_tan($0)")
__target_intrinsic(cpp, "$P_tan($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Tan _0")
+[__readNone]
T tan(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Tan _0")
+[__readNone]
vector<T, N> tan(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, tan, x);
@@ -3446,6 +3713,7 @@ vector<T, N> tan(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> tan(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, tan, x);
@@ -3458,12 +3726,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_tanh($0)")
__target_intrinsic(cpp, "$P_tanh($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Tanh _0")
+[__readNone]
T tanh(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Tanh _0")
+[__readNone]
vector<T,N> tanh(vector<T,N> x)
{
VECTOR_MAP_UNARY(T, N, tanh, x);
@@ -3471,6 +3741,7 @@ vector<T,N> tanh(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> tanh(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, tanh, x);
@@ -3480,6 +3751,7 @@ matrix<T,N,M> tanh(matrix<T,N,M> x)
__generic<T : __BuiltinType, let N : int, let M : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
+[__readNone]
matrix<T, M, N> transpose(matrix<T, N, M> x)
{
matrix<T,M,N> result;
@@ -3496,12 +3768,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_trunc($0)")
__target_intrinsic(cpp, "$P_trunc($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Trunc _0")
+[__readNone]
T trunc(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Trunc _0")
+[__readNone]
vector<T, N> trunc(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, trunc, x);
@@ -3509,6 +3783,7 @@ vector<T, N> trunc(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> trunc(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, trunc, x);
@@ -4779,6 +5054,7 @@ void __executeCallable(uint shaderIndex, int payloadLocation);
__generic<Payload>
__target_intrinsic(__glslRayTracing, "$XC")
[__readNone]
+[__AlwaysFoldIntoUseSiteAttribute]
int __callablePayloadLocation(__ref Payload payload);
// Now we provide a hard-coded definition of `CallShader()` for GLSL-based
@@ -4834,6 +5110,7 @@ void __traceRay(
__generic<Payload>
__target_intrinsic(__glslRayTracing, "$XP")
[__readNone]
+[__AlwaysFoldIntoUseSiteAttribute]
int __rayPayloadLocation(__ref Payload payload);
__generic<payload_t>
@@ -5677,6 +5954,7 @@ Ref<T> __hitObjectAttributes<T>()
__generic<Attributes>
__target_intrinsic(__glslRayTracing, "$XH")
[__readNone]
+[__AlwaysFoldIntoUseSiteAttribute]
int __hitObjectAttributesLocation(__ref Attributes attributes);
/// Immutable data type representing a ray hit or a miss. Can be used to invoke hit or miss shading,
diff --git a/source/slang/slang-ast-modifier.h b/source/slang/slang-ast-modifier.h
index 99e221b1e..6ac464784 100644
--- a/source/slang/slang-ast-modifier.h
+++ b/source/slang/slang-ast-modifier.h
@@ -1083,6 +1083,14 @@ class RequiresNVAPIAttribute : public Attribute
SLANG_AST_CLASS(RequiresNVAPIAttribute)
};
+
+ /// A `[__AlwaysFoldIntoUseSite]` attribute indicates that the calls into the modified
+ /// function should always be folded into use sites during source emit.
+class AlwaysFoldIntoUseSiteAttribute :public Attribute
+{
+ SLANG_AST_CLASS(AlwaysFoldIntoUseSiteAttribute)
+};
+
/// The `[ForwardDifferentiable]` attribute indicates that a function can be forward-differentiated.
class ForwardDifferentiableAttribute : public DifferentiableAttribute
{
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index c664449e5..7840dc450 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -1244,14 +1244,24 @@ bool CLikeSourceEmitter::shouldFoldInstIntoUseSites(IRInst* inst)
return true;
}
+ // Always hold if inst is a call into an [__alwaysFoldIntoUseSite] function.
+ if (auto call = as<IRCall>(inst))
+ {
+ auto callee = call->getCallee();
+ if (getResolvedInstForDecorations(callee)->findDecoration<IRAlwaysFoldIntoUseSiteDecoration>())
+ {
+ return true;
+ }
+ }
+
// Having dealt with all of the cases where we *must* fold things
// above, we can now deal with the more general cases where we
// *should not* fold things.
-
// Don't fold something with no users:
if(!inst->hasUses())
return false;
+
// Don't fold something that has multiple users:
if(inst->hasMoreThanOneUse())
return false;
diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h
index ff229c38b..1cd2045c7 100644
--- a/source/slang/slang-emit-c-like.h
+++ b/source/slang/slang-emit-c-like.h
@@ -326,7 +326,7 @@ public:
void emitSimpleValue(IRInst* inst) { emitSimpleValueImpl(inst); }
- bool shouldFoldInstIntoUseSites(IRInst* inst);
+ virtual bool shouldFoldInstIntoUseSites(IRInst* inst);
void emitOperand(IRInst* inst, EmitOpInfo const& outerPrec) { emitOperandImpl(inst, outerPrec); }
diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp
index ba6b26ec6..795ec74b0 100644
--- a/source/slang/slang-emit-cpp.cpp
+++ b/source/slang/slang-emit-cpp.cpp
@@ -1557,6 +1557,46 @@ void CPPSourceEmitter::emitGlobalInstImpl(IRInst* inst)
}
}
+bool CPPSourceEmitter::shouldFoldInstIntoUseSites(IRInst* inst)
+{
+ bool result = Super::shouldFoldInstIntoUseSites(inst);
+ if (!result)
+ return result;
+ if (as<IRVectorType>(inst->getDataType()) || as<IRMatrixType>(inst->getDataType()))
+ {
+ // If a vector value is being used in a reshape/cast,
+ // we should not fold it because the implementation of cast will have multiple references to it.
+ for (auto use = inst->firstUse; use; use = use->nextUse)
+ {
+ switch (use->getUser()->getOp())
+ {
+ case kIROp_MatrixReshape:
+ case kIROp_VectorReshape:
+ case kIROp_IntCast:
+ case kIROp_FloatCast:
+ case kIROp_CastIntToFloat:
+ case kIROp_CastFloatToInt:
+ return false;
+ default:
+ break;
+ }
+ }
+ switch (inst->getOp())
+ {
+ case kIROp_MatrixReshape:
+ case kIROp_VectorReshape:
+ case kIROp_IntCast:
+ case kIROp_FloatCast:
+ case kIROp_CastIntToFloat:
+ case kIROp_CastFloatToInt:
+ return false;
+ default:
+ break;
+ }
+ }
+ return true;
+}
+
static bool _isExported(IRInst* inst)
{
for (auto decoration : inst->getDecorations())
diff --git a/source/slang/slang-emit-cpp.h b/source/slang/slang-emit-cpp.h
index 92780e0a4..71c382f87 100644
--- a/source/slang/slang-emit-cpp.h
+++ b/source/slang/slang-emit-cpp.h
@@ -71,6 +71,7 @@ protected:
virtual void emitFuncDecorationsImpl(IRFunc* func) SLANG_OVERRIDE;
virtual void emitVarDecorationsImpl(IRInst* var) SLANG_OVERRIDE;
virtual void emitGlobalInstImpl(IRInst* inst) SLANG_OVERRIDE;
+ virtual bool shouldFoldInstIntoUseSites(IRInst* inst) SLANG_OVERRIDE;
const UnownedStringSlice* getVectorElementNames(BaseType elemType, Index elemCount);
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index e2f00bf88..a25fae5ae 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -891,8 +891,8 @@ Result linkAndOptimizeIR(
}
}
- // Run a final round of DCE to clean up unused things after phi-elimination.
- eliminateDeadCode(irModule);
+ // Run a final round of simplifications to clean up unused things after phi-elimination.
+ simplifyNonSSAIR(irModule);
// We include one final step to (optionally) dump the IR and validate
// it after all of the optimization passes are complete. This should
diff --git a/source/slang/slang-ir-autodiff-unzip.cpp b/source/slang/slang-ir-autodiff-unzip.cpp
index 096751836..a05fe7044 100644
--- a/source/slang/slang-ir-autodiff-unzip.cpp
+++ b/source/slang/slang-ir-autodiff-unzip.cpp
@@ -559,6 +559,7 @@ IRFunc* DiffUnzipPass::extractPrimalFunc(
{
if (inst->getOp() == kIROp_Call)
{
+ // The primal calls should be marked as no side effect so they can be DCE'd if possible.
builder.addSimpleDecoration<IRNoSideEffectDecoration>(inst);
}
}
diff --git a/source/slang/slang-ir-autodiff.h b/source/slang/slang-ir-autodiff.h
index fa01d50ae..a4eb94461 100644
--- a/source/slang/slang-ir-autodiff.h
+++ b/source/slang/slang-ir-autodiff.h
@@ -212,20 +212,12 @@ struct DifferentiableTypeConformanceContext
IRInst* getZeroMethodForType(IRBuilder* builder, IRType* origType)
{
auto result = lookUpInterfaceMethod(builder, origType, sharedContext->zeroMethodStructKey);
- if (result && !result->findDecoration<IRNoSideEffectDecoration>())
- {
- builder->addDecoration(result, kIROp_NoSideEffectDecoration);
- }
return result;
}
IRInst* getAddMethodForType(IRBuilder* builder, IRType* origType)
{
auto result = lookUpInterfaceMethod(builder, origType, sharedContext->addMethodStructKey);
- if (result && !result->findDecoration<IRNoSideEffectDecoration>())
- {
- builder->addDecoration(result, kIROp_NoSideEffectDecoration);
- }
return result;
}
};
diff --git a/source/slang/slang-ir-dce.cpp b/source/slang/slang-ir-dce.cpp
index 58c9b23f1..e5c9b1fdb 100644
--- a/source/slang/slang-ir-dce.cpp
+++ b/source/slang/slang-ir-dce.cpp
@@ -24,6 +24,11 @@ struct DeadCodeEliminationContext
// These uses will be replaced with `undefInst`.
IRInst* undefInst = nullptr;
+ // Track if we have removed any phi parameters.
+ // If so we need to rerun dce pass because after removing them
+ // there could be new DCE opportunities.
+ bool phiRemoved = false;
+
// Our overall process is going to be to determine
// which instructions in the module are "live"
// and then eliminate anything that wasn't found to
@@ -98,104 +103,115 @@ struct DeadCodeEliminationContext
bool processInst(IRInst* root)
{
- // First of all, we know that the root instruction
- // should be considered as live, because otherwise
- // we'd end up eliminating it, so that is a
- // good place to start.
- //
- markInstAsLive(root);
-
- // Ensure there is a global undef inst that is always alive.
- // This undef inst will be used to fill in weak-referencing uses
- // whose used value is marked as dead and eliminated.
- // We always make sure this undef inst is available to prevent
- // infiniate oscilating loops.
- markInstAsLive(getUndefInst());
-
- // Marking the module as live should have
- // seeded our work list, so we can now start
- // processing entries off of our work list
- // until it goes dry.
- //
- while (workList.getCount())
+ bool result = false;
+ for (;;)
{
- auto inst = workList.getLast();
- workList.removeLast();
+ liveInsts.Clear();
+ workList.clear();
- if (!isChildInstOf(inst, root))
- continue;
-
- // At this point we know that `inst` is live,
- // and we want to start considering which other
- // instructions must be live because of that
- // knowlege.
- //
- // A first easy case is that the parent (if any)
- // of a live instruction had better be live, or
- // else we might delete the parent, and
- // the child with it.
+ // First of all, we know that the root instruction
+ // should be considered as live, because otherwise
+ // we'd end up eliminating it, so that is a
+ // good place to start.
//
- markInstAsLive(inst->getParent());
-
- // Next the type of a live instruction, and all
- // of its operands must also be live, or else
- // we won't be able to compute its value.
+ markInstAsLive(root);
+
+ // Ensure there is a global undef inst that is always alive.
+ // This undef inst will be used to fill in weak-referencing uses
+ // whose used value is marked as dead and eliminated.
+ // We always make sure this undef inst is available to prevent
+ // infiniate oscilating loops.
+ markInstAsLive(getUndefInst());
+
+ // Marking the module as live should have
+ // seeded our work list, so we can now start
+ // processing entries off of our work list
+ // until it goes dry.
//
- markInstAsLive(inst->getFullType());
- UInt operandCount = inst->getOperandCount();
- for (UInt ii = 0; ii < operandCount; ++ii)
+ while (workList.getCount())
{
- // There are some type of operands that needs to be treated as
- // "weak" references -- they can never hold things alive, and
- // whenever we delete the referenced value, these operands needs
- // to be replaced with `undef`.
- if (!isWeakReferenceOperand(inst, ii))
- markInstAsLive(inst->getOperand(ii));
- }
+ auto inst = workList.getLast();
+ workList.removeLast();
+
+ if (!isChildInstOf(inst, root))
+ continue;
+
+ // At this point we know that `inst` is live,
+ // and we want to start considering which other
+ // instructions must be live because of that
+ // knowlege.
+ //
+ // A first easy case is that the parent (if any)
+ // of a live instruction had better be live, or
+ // else we might delete the parent, and
+ // the child with it.
+ //
+ markInstAsLive(inst->getParent());
+
+ // Next the type of a live instruction, and all
+ // of its operands must also be live, or else
+ // we won't be able to compute its value.
+ //
+ markInstAsLive(inst->getFullType());
+ UInt operandCount = inst->getOperandCount();
+ for (UInt ii = 0; ii < operandCount; ++ii)
+ {
+ // There are some type of operands that needs to be treated as
+ // "weak" references -- they can never hold things alive, and
+ // whenever we delete the referenced value, these operands needs
+ // to be replaced with `undef`.
+ if (!isWeakReferenceOperand(inst, ii))
+ markInstAsLive(inst->getOperand(ii));
+ }
- // Finally, we need to consider the children
- // and decorations of the instruction.
- //
- // Note that just because an instruction is
- // live doesn't mean its children must be, or
- // else we'd never eliminate *anything* (we
- // marked the whole module as live, and everything
- // is a transitive child of the module).
- //
- // Decorations, in contrast, are always live if their
- // parents are (because we don't want to silently drop
- // decorations). It is still important to *mark*
- // decorations as live, because they have operands,
- // and those operands need to be marked as live.
- // We will fold decorations into the same loop
- // as children for simplicity.
- //
- // To keep the code here simple, we'll defer the
- // decision of whether a child (or decoration)
- // should be live when its parent is to a subroutine.
- //
- for (auto child : inst->getDecorationsAndChildren())
- {
- if (shouldInstBeLiveIfParentIsLive(child))
+ // Finally, we need to consider the children
+ // and decorations of the instruction.
+ //
+ // Note that just because an instruction is
+ // live doesn't mean its children must be, or
+ // else we'd never eliminate *anything* (we
+ // marked the whole module as live, and everything
+ // is a transitive child of the module).
+ //
+ // Decorations, in contrast, are always live if their
+ // parents are (because we don't want to silently drop
+ // decorations). It is still important to *mark*
+ // decorations as live, because they have operands,
+ // and those operands need to be marked as live.
+ // We will fold decorations into the same loop
+ // as children for simplicity.
+ //
+ // To keep the code here simple, we'll defer the
+ // decision of whether a child (or decoration)
+ // should be live when its parent is to a subroutine.
+ //
+ for (auto child : inst->getDecorationsAndChildren())
{
- // In this case, we know `inst` is live and
- // its `child` should be live if its parent is,
- // so the `child` must be live too.
- //
- markInstAsLive(child);
+ if (shouldInstBeLiveIfParentIsLive(child))
+ {
+ // In this case, we know `inst` is live and
+ // its `child` should be live if its parent is,
+ // so the `child` must be live too.
+ //
+ markInstAsLive(child);
+ }
}
}
- }
- // If our work list runs dry, that means we've reached a steady
- // state where everything that is transitively relevant to
- // the "outputs" of the module has been marked as live.
- //
- // Now we can simply walk through all of our instructions
- // recursively and eliminate those that are "dead" by
- // virtue of not having been found live.
- //
- return eliminateDeadInstsRec(root);
+ // If our work list runs dry, that means we've reached a steady
+ // state where everything that is transitively relevant to
+ // the "outputs" of the module has been marked as live.
+ //
+ // Now we can simply walk through all of our instructions
+ // recursively and eliminate those that are "dead" by
+ // virtue of not having been found live.
+ //
+ phiRemoved = false;
+ result |= eliminateDeadInstsRec(root);
+ if (!phiRemoved)
+ break;
+ }
+ return result;
}
// Given the basic infrastructrure above, let's
@@ -207,6 +223,25 @@ struct DeadCodeEliminationContext
return processInst(module->getModuleInst());
}
+ void removePhiArgs(IRInst* phiParam)
+ {
+ auto block = cast<IRBlock>(phiParam->getParent());
+ UInt paramIndex = 0;
+ for (auto p = block->getFirstParam(); p; p = p->getNextParam())
+ {
+ if (p == phiParam)
+ break;
+ paramIndex++;
+ }
+ for (auto predBlock : block->getPredecessors())
+ {
+ auto termInst = as<IRUnconditionalBranch>(predBlock->getTerminator());
+ SLANG_ASSERT(paramIndex < termInst->getArgCount());
+ termInst->removeArgument(paramIndex);
+ }
+ phiRemoved = true;
+ }
+
bool eliminateDeadInstsRec(IRInst* inst)
{
bool changed = false;
@@ -226,6 +261,12 @@ struct DeadCodeEliminationContext
{
inst->replaceUsesWith(getUndefInst());
}
+
+ if (inst->getOp() == kIROp_Param)
+ {
+ // For Phi parameters, we need to update all branch arguments.
+ removePhiArgs(inst);
+ }
inst->removeAndDeallocate();
changed = true;
}
@@ -261,6 +302,16 @@ struct DeadCodeEliminationContext
}
};
+bool isFirstBlock(IRInst* inst)
+{
+ auto block = as<IRBlock>(inst);
+ if (!block)
+ return false;
+ if (!block->getParent())
+ return false;
+ return block->getParent()->getFirstBlock() == block;
+}
+
bool shouldInstBeLiveIfParentIsLive(IRInst* inst, IRDeadCodeEliminationOptions options)
{
// The main source of confusion/complexity here is that
@@ -275,7 +326,31 @@ bool shouldInstBeLiveIfParentIsLive(IRInst* inst, IRDeadCodeEliminationOptions o
// when it is executed, then we should keep it around.
//
if (inst->mightHaveSideEffects())
- return true;
+ {
+ // If the inst has side effect, we should keep it alive.
+ // An exception is if we have a call to a pure function
+ // that writes its output to a local variable, but we
+ // don't have any uses of that local variable.
+ auto call = as<IRCall>(inst);
+ if (!call)
+ return true;
+ if (!getResolvedInstForDecorations(call->getCallee())->findDecoration<IRReadNoneDecoration>())
+ return true;
+ auto parentFunc = getParentFunc(inst);
+ if (!parentFunc)
+ return true;
+ for (UInt i = 0; i < call->getArgCount(); i++)
+ {
+ auto arg = call->getArg(i);
+ if (getParentFunc(arg) != parentFunc)
+ return true;
+ if (arg->getOp() != kIROp_Var)
+ return true;
+ if (arg->hasMoreThanOneUse())
+ return true;
+ }
+ return false;
+ }
//
// The `mightHaveSideEffects` query is conservative, and will
// return `true` as its default mode, so once we are past that
@@ -352,17 +427,10 @@ bool shouldInstBeLiveIfParentIsLive(IRInst* inst, IRDeadCodeEliminationOptions o
switch (inst->getOp())
{
// Function parameters obviously shouldn't get eliminated,
- // even if nothing references them, and block parameters
- // (phi nodes) will be considered live when their block is,
- // just so that we don't have to deal with any complications
- // around re-writing the relevant inter-block argument passing.
- //
- // TODO: A smarter DCE pass could deal with this case more
- // carefully, or we could improve the interprocedural SCCP
- // pass to deal with block parameters instead.
+ // even if nothing references them.
//
case kIROp_Param:
- return true;
+ return isFirstBlock(inst->getParent());
// IR struct types and witness tables are currently kludged
// so that they have child instructions that represent their
diff --git a/source/slang/slang-ir-glsl-legalize.cpp b/source/slang/slang-ir-glsl-legalize.cpp
index e111a548b..9c16f40ac 100644
--- a/source/slang/slang-ir-glsl-legalize.cpp
+++ b/source/slang/slang-ir-glsl-legalize.cpp
@@ -2027,8 +2027,8 @@ void legalizeMeshOutputParam(
IRBuilderInsertLocScope locScope{builder};
builder->setInsertBefore(p);
- auto e = builder->emitElementAddress(meshOutputBlockType, blockParam, p->getIndex());
- auto a = builder->emitFieldAddress(builtin.type, e, builtin.key);
+ auto e = builder->emitElementAddress(builder->getPtrType(meshOutputBlockType), blockParam, p->getIndex());
+ auto a = builder->emitFieldAddress(builder->getPtrType(builtin.type), e, builtin.key);
p->replaceUsesWith(a);
});
diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h
index 4dea3985a..4b1037240 100644
--- a/source/slang/slang-ir-inst-defs.h
+++ b/source/slang/slang-ir-inst-defs.h
@@ -728,6 +728,9 @@ INST(HighLevelDeclDecoration, highLevelDecl, 1, 0)
/// Applie to an IR function and signals that inlining should not be performed unless unavoidable.
INST(NoInlineDecoration, noInline, 0, 0)
+ /// A call to the decorated function should always be folded into its use site.
+ INST(AlwaysFoldIntoUseSiteDecoration, alwaysFold, 0, 0)
+
INST(PayloadDecoration, payload, 0, 0)
/* Mesh Shader outputs */
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index fe20f17f5..f2e4e05d3 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -325,6 +325,7 @@ IR_SIMPLE_DECORATION(HLSLExportDecoration)
IR_SIMPLE_DECORATION(KeepAliveDecoration)
IR_SIMPLE_DECORATION(RequiresNVAPIDecoration)
IR_SIMPLE_DECORATION(NoInlineDecoration)
+IR_SIMPLE_DECORATION(AlwaysFoldIntoUseSiteDecoration)
struct IRNVAPIMagicDecoration : IRDecoration
{
@@ -1925,7 +1926,7 @@ struct IRUnconditionalBranch : IRTerminatorInst
UInt getArgCount();
IRUse* getArgs();
IRInst* getArg(UInt index);
-
+ void removeArgument(UInt index);
IR_PARENT_ISA(UnconditionalBranch);
};
@@ -1968,20 +1969,6 @@ struct IRConditionalBranch : IRTerminatorInst
IRBlock* getFalseBlock() { return (IRBlock*)falseBlock.get(); }
};
-// A conditional branch that represent the test inside a loop
-struct IRLoopTest : IRConditionalBranch
-{
-};
-
-// A conditional branch that represents a one-sided `if`:
-//
-// if( <condition> ) { <trueBlock> }
-// <falseBlock>
-struct IRIf : IRConditionalBranch
-{
- IRBlock* getAfterBlock() { return getFalseBlock(); }
-};
-
// A conditional branch that represents a two-sided `if`:
//
// if( <condition> ) { <trueBlock> }
@@ -3361,6 +3348,7 @@ public:
IRInst* emitBitOr(IRType* type, IRInst* left, IRInst* right);
IRInst* emitBitNot(IRType* type, IRInst* value);
IRInst* emitNeg(IRType* type, IRInst* value);
+ IRInst* emitNot(IRType* type, IRInst* value);
IRInst* emitAdd(IRType* type, IRInst* left, IRInst* right);
IRInst* emitSub(IRType* type, IRInst* left, IRInst* right);
diff --git a/source/slang/slang-ir-loop-unroll.cpp b/source/slang/slang-ir-loop-unroll.cpp
index 79b00f60a..2f689ebde 100644
--- a/source/slang/slang-ir-loop-unroll.cpp
+++ b/source/slang/slang-ir-loop-unroll.cpp
@@ -47,7 +47,7 @@ static bool _eliminateDeadBlocks(List<IRBlock*>& blocks, IRBlock* unreachableBlo
return changed;
}
-List<IRBlock*> _collectBlocksInLoop(Dictionary<IRBlock*, int>& blockOrdering, IRLoop* loopInst)
+List<IRBlock*> _collectBlocksInLoop(IRDominatorTree* dom, IRLoop* loopInst)
{
List<IRBlock*> loopBlocks;
HashSet<IRBlock*> loopBlocksSet;
@@ -58,7 +58,6 @@ List<IRBlock*> _collectBlocksInLoop(Dictionary<IRBlock*, int>& blockOrdering, IR
};
auto firstBlock = as<IRBlock>(loopInst->block.get());
auto breakBlock = as<IRBlock>(loopInst->breakBlock.get());
- auto breakBlockOrdering = blockOrdering[breakBlock].GetValue();
addBlock(firstBlock);
for (Index i = 0; i < loopBlocks.getCount(); i++)
@@ -68,18 +67,19 @@ List<IRBlock*> _collectBlocksInLoop(Dictionary<IRBlock*, int>& blockOrdering, IR
{
if (succ == breakBlock)
continue;
- auto successorOrdering = blockOrdering[block].GetValue();
- // The target must be post-dominated by the break block in order to be considered
- // the body of the loop.
- // Since we don't support arbitrary goto or multi-level continue, the simple
- // ordering comparison is sufficient to serve as a post-dominance check.
- if (successorOrdering < breakBlockOrdering)
+ if (dom->dominates(firstBlock, succ) && !dom->dominates(breakBlock, succ))
addBlock(succ);
}
}
return loopBlocks;
}
+List<IRBlock*> collectBlocksInLoop(IRGlobalValueWithCode* func, IRLoop* loopInst)
+{
+ auto dom = computeDominatorTree(func);
+ return _collectBlocksInLoop(dom, loopInst);
+}
+
static int _getLoopMaxIterationsToUnroll(IRLoop* loopInst)
{
static constexpr int kMaxIterationsToAttempt = 100;
@@ -483,15 +483,7 @@ bool unrollLoopsInFunc(
// Remove any continue jumps from the loop.
eliminateContinueBlocks(module, loop);
- auto postOrderReverseCFG = getPostorderOnReverseCFG(func);
- Dictionary<IRBlock*, int> blockOrdering;
-
- for (Index i = 0; i < postOrderReverseCFG.getCount(); i++)
- {
- blockOrdering[postOrderReverseCFG[i]] = (int)i;
- }
-
- auto blocks = _collectBlocksInLoop(blockOrdering, loop);
+ auto blocks = collectBlocksInLoop(func, loop);
auto loopLoc = loop->sourceLoc;
if (!_unrollLoop(module, loop, blocks))
{
diff --git a/source/slang/slang-ir-loop-unroll.h b/source/slang/slang-ir-loop-unroll.h
index d9c31e6be..6f7a41192 100644
--- a/source/slang/slang-ir-loop-unroll.h
+++ b/source/slang/slang-ir-loop-unroll.h
@@ -1,18 +1,22 @@
// slang-ir-loop-unroll.h
#pragma once
+#include "../core/slang-list.h"
+
namespace Slang
{
struct IRLoop;
struct IRGlobalValueWithCode;
class DiagnosticSink;
struct IRModule;
+ struct IRBlock;
// Return true if successfull, false if errors occurred.
bool unrollLoopsInFunc(IRModule* module, IRGlobalValueWithCode* func, DiagnosticSink* sink);
bool unrollLoopsInModule(IRModule* module, DiagnosticSink* sink);
+ List<IRBlock*> collectBlocksInLoop(IRGlobalValueWithCode* func, IRLoop* loop);
// Turn a loop with continue block into a loop with only back jumps and breaks.
// Each iteration will be wrapped in a breakable region, where everything before `continue`
diff --git a/source/slang/slang-ir-propagate-func-properties.cpp b/source/slang/slang-ir-propagate-func-properties.cpp
new file mode 100644
index 000000000..f98a77fc7
--- /dev/null
+++ b/source/slang/slang-ir-propagate-func-properties.cpp
@@ -0,0 +1,186 @@
+#include "slang-ir-propagate-func-properties.h"
+
+#include "slang-ir.h"
+#include "slang-ir-insts.h"
+#include "slang-ir-util.h"
+
+
+namespace Slang
+{
+bool propagateFuncProperties(IRModule* module)
+{
+ bool result = false;
+ List<IRFunc*> workList;
+ HashSet<IRFunc*> workListSet;
+
+ auto addToWorkList = [&](IRFunc* f)
+ {
+ if (workListSet.Add(f))
+ workList.add(f);
+ };
+ auto addCallersToWorkList = [&](IRFunc* f)
+ {
+ if (auto g = findOuterGeneric(f))
+ {
+ for (auto use = g->firstUse; use; use = use->nextUse)
+ {
+ if (use->getUser()->getOp() == kIROp_Specialize)
+ {
+ auto specialize = use->getUser();
+ for (auto iuse = specialize->firstUse; iuse; iuse = iuse->nextUse)
+ {
+ if (auto userFunc = getParentFunc(iuse->getUser()))
+ addToWorkList(userFunc);
+ }
+ }
+ }
+ return;
+ }
+ for (auto use = f->firstUse; use; use = use->nextUse)
+ {
+ if (use->getUser()->getOp() == kIROp_Call)
+ {
+ if (auto userFunc = getParentFunc(use->getUser()))
+ addToWorkList(userFunc);
+ }
+ }
+ };
+ for (;;)
+ {
+ bool changed = false;
+ workList.clear();
+ workListSet.Clear();
+
+ // Add side effect free functions and their transitive callers to work list.
+ for (auto inst : module->getGlobalInsts())
+ {
+ auto genericInst = as<IRGeneric>(inst);
+ if (genericInst)
+ {
+ inst = findGenericReturnVal(genericInst);
+ }
+ if (auto func = as<IRFunc>(inst))
+ {
+ if (func->findDecoration<IRReadNoneDecoration>())
+ {
+ addCallersToWorkList(func);
+ }
+ }
+ }
+
+ // Add remaining functions to work list.
+ for (auto inst : module->getGlobalInsts())
+ {
+ auto genericInst = as<IRGeneric>(inst);
+ if (genericInst)
+ {
+ inst = findGenericReturnVal(genericInst);
+ }
+ if (auto func = as<IRFunc>(inst))
+ {
+ addToWorkList(func);
+ }
+ }
+
+ IRBuilder builder(module);
+
+ for (Index i = 0; i < workList.getCount(); i++)
+ {
+ auto f = workList[i];
+ bool hasSideEffectCall = false;
+ if (f->findDecoration<IRReadNoneDecoration>())
+ continue;
+ // Never propagate to functions without a body.
+ if (f->getFirstBlock() == nullptr)
+ continue;
+ if (f->findDecoration<IRTargetIntrinsicDecoration>())
+ continue;
+ for (auto block : f->getBlocks())
+ {
+ for (auto inst : block->getChildren())
+ {
+ // Is this inst known to not have global side effect/analyzable?
+ if (inst->mightHaveSideEffects())
+ {
+ switch (inst->getOp())
+ {
+ case kIROp_ifElse:
+ case kIROp_unconditionalBranch:
+ case kIROp_Switch:
+ case kIROp_Return:
+ case kIROp_loop:
+ case kIROp_Store:
+ case kIROp_Call:
+ case kIROp_Param:
+ case kIROp_Unreachable:
+ break;
+ default:
+ // We have a inst that has side effect and is not understood by this method.
+ // e.g. bufferStore, discard, etc.
+ return true;
+ }
+ }
+
+ if (auto call = as<IRCall>(inst))
+ {
+ auto callee = getResolvedInstForDecorations(call->getCallee());
+ switch (callee->getOp())
+ {
+ default:
+ // We are calling an unknown function, so we have to assume
+ // there are side effects in the call.
+ hasSideEffectCall = true;
+ break;
+ case kIROp_Func:
+ if (!callee->findDecoration<IRReadNoneDecoration>())
+ {
+ hasSideEffectCall = true;
+ break;
+ }
+ }
+ }
+
+ // Are any operands defined in global scope?
+ for (UInt o = 0; o < inst->getOperandCount(); o++)
+ {
+ auto operand = inst->getOperand(o);
+ if (getParentFunc(operand) == f)
+ continue;
+ if (as<IRConstant>(operand))
+ continue;
+ if (as<IRType>(operand))
+ continue;
+ switch (operand->getOp())
+ {
+ case kIROp_Specialize:
+ case kIROp_LookupWitness:
+ case kIROp_StructKey:
+ case kIROp_WitnessTable:
+ case kIROp_WitnessTableEntry:
+ case kIROp_undefined:
+ case kIROp_Func:
+ continue;
+ default:
+ break;
+ }
+ hasSideEffectCall = true;
+ break;
+ }
+ }
+ if (hasSideEffectCall)
+ break;
+ }
+ if (!hasSideEffectCall)
+ {
+ builder.addDecoration(f, kIROp_ReadNoneDecoration);
+ addCallersToWorkList(f);
+ changed = true;
+ }
+ }
+ result |= changed;
+ if (!changed)
+ break;
+ }
+ return result;
+}
+}
diff --git a/source/slang/slang-ir-propagate-func-properties.h b/source/slang/slang-ir-propagate-func-properties.h
new file mode 100644
index 000000000..6df2de18e
--- /dev/null
+++ b/source/slang/slang-ir-propagate-func-properties.h
@@ -0,0 +1,7 @@
+#pragma once
+
+namespace Slang
+{
+struct IRModule;
+bool propagateFuncProperties(IRModule* module);
+}
diff --git a/source/slang/slang-ir-redundancy-removal.cpp b/source/slang/slang-ir-redundancy-removal.cpp
index f3996fc01..2a2047de9 100644
--- a/source/slang/slang-ir-redundancy-removal.cpp
+++ b/source/slang/slang-ir-redundancy-removal.cpp
@@ -8,10 +8,118 @@ namespace Slang
struct RedundancyRemovalContext
{
RefPtr<IRDominatorTree> dom;
- bool removeRedundancyInBlock(DeduplicateContext& deduplicateContext, IRBlock* block)
+ bool isMovableInst(IRInst* inst)
+ {
+ switch (inst->getOp())
+ {
+ case kIROp_Add:
+ case kIROp_Sub:
+ case kIROp_Mul:
+ case kIROp_Div:
+ case kIROp_FRem:
+ case kIROp_IRem:
+ case kIROp_Lsh:
+ case kIROp_Rsh:
+ case kIROp_And:
+ case kIROp_Or:
+ case kIROp_Not:
+ case kIROp_FieldExtract:
+ case kIROp_FieldAddress:
+ case kIROp_GetElement:
+ case kIROp_GetElementPtr:
+ case kIROp_UpdateElement:
+ case kIROp_OptionalHasValue:
+ case kIROp_GetOptionalValue:
+ case kIROp_MakeOptionalValue:
+ case kIROp_MakeTuple:
+ case kIROp_GetTupleElement:
+ case kIROp_MakeStruct:
+ case kIROp_MakeArray:
+ case kIROp_MakeArrayFromElement:
+ case kIROp_MakeVector:
+ case kIROp_MakeMatrix:
+ case kIROp_MakeMatrixFromScalar:
+ case kIROp_MakeVectorFromScalar:
+ case kIROp_swizzle:
+ case kIROp_MatrixReshape:
+ case kIROp_MakeString:
+ case kIROp_MakeResultError:
+ case kIROp_MakeResultValue:
+ case kIROp_GetResultError:
+ case kIROp_GetResultValue:
+ case kIROp_CastFloatToInt:
+ case kIROp_CastIntToFloat:
+ case kIROp_CastIntToPtr:
+ case kIROp_CastPtrToBool:
+ case kIROp_CastPtrToInt:
+ case kIROp_BitAnd:
+ case kIROp_BitNot:
+ case kIROp_BitOr:
+ case kIROp_BitXor:
+ case kIROp_BitCast:
+ case kIROp_Reinterpret:
+ case kIROp_Greater:
+ case kIROp_Less:
+ case kIROp_Geq:
+ case kIROp_Leq:
+ case kIROp_Neq:
+ case kIROp_Eql:
+ return true;
+ case kIROp_Call:
+ return isPureFunctionalCall(as<IRCall>(inst));
+ default:
+ return false;
+ }
+ }
+
+ bool tryHoistInstToOuterMostLoop(IRGlobalValueWithCode* func, IRInst* inst)
+ {
+ bool changed = false;
+ for (auto parentBlock = dom->getImmediateDominator(as<IRBlock>(inst->getParent()));
+ parentBlock;
+ parentBlock = dom->getImmediateDominator(parentBlock))
+ {
+ auto terminatorInst = parentBlock->getTerminator();
+ if (terminatorInst->getOp() == kIROp_loop)
+ {
+ // Consider hoisting the inst into this block.
+ // This is only possible if all operands of the inst are dominating `parentBlock`.
+ bool canHoist = true;
+ for (UInt i = 0; i < inst->getOperandCount(); i++)
+ {
+ auto operand = inst->getOperand(i);
+ if (getParentFunc(operand) != func)
+ {
+ // Global value won't prevent hoisting.
+ continue;
+ }
+ auto operandParent = as<IRBlock>(operand->getParent());
+ if (!operandParent)
+ {
+ canHoist = false;
+ break;
+ }
+ canHoist = dom->dominates(operandParent, parentBlock);
+ if (!canHoist)
+ break;
+ }
+ if (!canHoist)
+ break;
+
+ // Move inst to parentBlock.
+ inst->insertBefore(terminatorInst);
+ changed = true;
+
+ // Continue to consider outer hoisting positions.
+ }
+ }
+ return changed;
+ }
+
+ bool removeRedundancyInBlock(DeduplicateContext& deduplicateContext, IRGlobalValueWithCode* func, IRBlock* block)
{
bool result = false;
- for (auto instP : block->getChildren())
+ for (auto instP : block->getModifiableChildren())
{
auto resultInst = deduplicateContext.deduplicate(instP, [&](IRInst* inst)
{
@@ -20,75 +128,25 @@ struct RedundancyRemovalContext
return false;
if (dom->isUnreachable(parentBlock))
return false;
-
- switch (inst->getOp())
- {
- case kIROp_Add:
- case kIROp_Sub:
- case kIROp_Mul:
- case kIROp_Div:
- case kIROp_Module:
- case kIROp_Lsh:
- case kIROp_Rsh:
- case kIROp_And:
- case kIROp_Or:
- case kIROp_Not:
- case kIROp_FieldExtract:
- case kIROp_FieldAddress:
- case kIROp_GetElement:
- case kIROp_GetElementPtr:
- case kIROp_UpdateElement:
- case kIROp_OptionalHasValue:
- case kIROp_GetOptionalValue:
- case kIROp_MakeOptionalValue:
- case kIROp_MakeTuple:
- case kIROp_GetTupleElement:
- case kIROp_MakeStruct:
- case kIROp_MakeArray:
- case kIROp_MakeArrayFromElement:
- case kIROp_MakeVector:
- case kIROp_MakeMatrix:
- case kIROp_MakeMatrixFromScalar:
- case kIROp_MakeVectorFromScalar:
- case kIROp_swizzle:
- case kIROp_MatrixReshape:
- case kIROp_MakeString:
- case kIROp_MakeResultError:
- case kIROp_MakeResultValue:
- case kIROp_GetResultError:
- case kIROp_GetResultValue:
- case kIROp_CastFloatToInt:
- case kIROp_CastIntToFloat:
- case kIROp_CastIntToPtr:
- case kIROp_CastPtrToBool:
- case kIROp_CastPtrToInt:
- case kIROp_BitAnd:
- case kIROp_BitNot:
- case kIROp_BitOr:
- case kIROp_BitXor:
- case kIROp_BitCast:
- case kIROp_Reinterpret:
- case kIROp_Greater:
- case kIROp_Less:
- case kIROp_Geq:
- case kIROp_Leq:
- case kIROp_Neq:
- case kIROp_Eql:
- return true;
- case kIROp_Call:
- return isPureFunctionalCall(as<IRCall>(inst));
- default:
- return false;
- }
+ return isMovableInst(inst);
});
if (resultInst != instP)
+ {
+ instP->replaceUsesWith(resultInst);
result = true;
+ }
+ else if (isMovableInst(resultInst))
+ {
+ // This inst is unique, we should consider hoisting it
+ // if it is inside a loop.
+ result |= tryHoistInstToOuterMostLoop(func, resultInst);
+ }
}
for (auto child : dom->getImmediatelyDominatedBlocks(block))
{
DeduplicateContext subContext;
subContext.deduplicateMap = deduplicateContext.deduplicateMap;
- result |= removeRedundancyInBlock(subContext, child);
+ result |= removeRedundancyInBlock(subContext, func, child);
}
return result;
}
@@ -122,7 +180,142 @@ bool removeRedundancyInFunc(IRGlobalValueWithCode* func)
RedundancyRemovalContext context;
context.dom = computeDominatorTree(func);
DeduplicateContext deduplicateCtx;
- return context.removeRedundancyInBlock(deduplicateCtx, root);
+ return context.removeRedundancyInBlock(deduplicateCtx, func, root);
+}
+
+static IRInst* _getRootVar(IRInst* inst)
+{
+ while (inst)
+ {
+ switch (inst->getOp())
+ {
+ case kIROp_FieldAddress:
+ case kIROp_GetElementPtr:
+ inst = inst->getOperand(0);
+ break;
+ default:
+ return inst;
+ }
+ }
+ return inst;
+}
+
+bool tryRemoveRedundantStore(IRGlobalValueWithCode* func, IRStore* store)
+{
+ // We perform a quick and conservative check:
+ // A store is redundant if it is followed by another store to the same address in
+ // the same basic block, and there are no instructions that may use any addresses
+ // related to this address.
+ bool hasAddrUse = false;
+ bool hasOverridingStore = false;
+
+ // Stores to global variables will never get removed.
+ auto rootVar = _getRootVar(store->getPtr());
+ if (!isChildInstOf(rootVar, func))
+ return false;
+
+ // A store can be removed if it stores into a local variable
+ // that has no other uses than store.
+ if (auto varInst = as<IRVar>(rootVar))
+ {
+ bool hasNonStoreUse = false;
+ // If the entire access chain doesn't non-store use, we can safely remove it.
+ HashSet<IRInst*> knownAccessChain;
+ for (auto accessChain = store->getPtr(); accessChain;)
+ {
+ knownAccessChain.Add(accessChain);
+ for (auto use = accessChain->firstUse; use; use = use->nextUse)
+ {
+ if (as<IRDecoration>(use->getUser()))
+ continue;
+ if (knownAccessChain.Contains(use->getUser()))
+ continue;
+ if (use->getUser()->getOp() == kIROp_Store &&
+ use == use->getUser()->getOperands())
+ {
+ continue;
+ }
+ hasNonStoreUse = true;
+ break;
+ }
+ if (hasNonStoreUse)
+ break;
+ switch (accessChain->getOp())
+ {
+ case kIROp_GetElementPtr:
+ case kIROp_FieldAddress:
+ accessChain = accessChain->getOperand(0);
+ continue;
+ default:
+ break;
+ }
+ break;
+ }
+ if (!hasNonStoreUse)
+ {
+ store->removeAndDeallocate();
+ return true;
+ }
+ }
+
+ // A store can be removed if there are subsequent stores to the same variable,
+ // and there are no insts in between the stores that can read the variable.
+
+ HashSet<IRBlock*> visitedBlocks;
+ for (auto next = store->getNextInst(); next;)
+ {
+ if (auto nextStore = as<IRStore>(next))
+ {
+ if (nextStore->getPtr() == store->getPtr())
+ {
+ hasOverridingStore = true;
+ break;
+ }
+ }
+
+ // If we see any insts that have reads or modifies the address before seeing
+ // an overriding store, don't remove the store.
+ // We can make the test more accurate by collecting all addresses related to
+ // the target address first, and only bail out if any of the related addresses
+ // are involved.
+ switch (next->getOp())
+ {
+ case kIROp_Load:
+ if (canAddressesPotentiallyAlias(func, next->getOperand(0), store->getPtr()))
+ {
+ hasAddrUse = true;
+ }
+ break;
+ default:
+ if (canInstHaveSideEffectAtAddress(func, next, store->getPtr()))
+ {
+ hasAddrUse = true;
+ }
+ break;
+ }
+ if (hasAddrUse)
+ break;
+
+ // If we are at the end of the current block and see a unconditional branch,
+ // we can follow the path and check the subsequent block.
+ if (auto branch = as<IRUnconditionalBranch>(next))
+ {
+ auto nextBlock = branch->getTargetBlock();
+ if (visitedBlocks.Add(nextBlock))
+ {
+ next = nextBlock->getFirstInst();
+ continue;
+ }
+ }
+ next = next->getNextInst();
+ }
+
+ if (!hasAddrUse && hasOverridingStore)
+ {
+ store->removeAndDeallocate();
+ return true;
+ }
+ return false;
}
bool eliminateRedundantLoadStore(IRGlobalValueWithCode* func)
@@ -158,57 +351,7 @@ bool eliminateRedundantLoadStore(IRGlobalValueWithCode* func)
}
else if (auto store = as<IRStore>(inst))
{
- // We perform a quick and conservative check:
- // A store is redundant if it is followed by another store to the same address in
- // the same basic block, and there are no instructions that may use any addresses
- // related to this address.
- bool hasAddrUse = false;
- bool hasOverridingStore = false;
-
- // Stores to global variables will never get removed.
- if (!isChildInstOf(store->getPtr(), func))
- hasAddrUse = true;
-
- for (auto next = store->getNextInst(); next; next = next->getNextInst())
- {
- if (auto nextStore = as<IRStore>(next))
- {
- if (nextStore->getPtr() == store->getPtr())
- {
- hasOverridingStore = true;
- break;
- }
- }
-
- // If we see any insts that have reads or modifies the address before seeing
- // an overriding store, don't remove the store.
- // We can make the test more accurate by collecting all addresses related to
- // the target address first, and only bail out if any of the related addresses
- // are involved.
- switch (next->getOp())
- {
- case kIROp_Load:
- if (canAddressesPotentiallyAlias(func, next->getOperand(0), store->getPtr()))
- {
- hasAddrUse = true;
- }
- break;
- default:
- if (canInstHaveSideEffectAtAddress(func, next, store->getPtr()))
- {
- hasAddrUse = true;
- }
- break;
- }
- if (hasAddrUse)
- break;
- }
-
- if (!hasAddrUse && hasOverridingStore)
- {
- store->removeAndDeallocate();
- changed = true;
- }
+ changed |= tryRemoveRedundantStore(func, store);
}
inst = nextInst;
}
diff --git a/source/slang/slang-ir-sccp.cpp b/source/slang/slang-ir-sccp.cpp
index d05527e59..691bd7ff0 100644
--- a/source/slang/slang-ir-sccp.cpp
+++ b/source/slang/slang-ir-sccp.cpp
@@ -1439,7 +1439,9 @@ struct SCCPContext
inst->replaceUsesWith(constantVal);
if( !inst->mightHaveSideEffects() )
{
- instsToRemove.add(inst);
+ // Don't delete phi parameters, they will be cleaned up in CFG simplification.
+ if (inst->getOp() != kIROp_Param)
+ instsToRemove.add(inst);
}
}
}
diff --git a/source/slang/slang-ir-simplify-cfg.cpp b/source/slang/slang-ir-simplify-cfg.cpp
index 7e9e105e1..b814442fa 100644
--- a/source/slang/slang-ir-simplify-cfg.cpp
+++ b/source/slang/slang-ir-simplify-cfg.cpp
@@ -4,6 +4,8 @@
#include "slang-ir.h"
#include "slang-ir-dominators.h"
#include "slang-ir-restructure.h"
+#include "slang-ir-util.h"
+#include "slang-ir-loop-unroll.h"
namespace Slang
{
@@ -31,8 +33,7 @@ static BreakableRegion* findBreakableRegion(Region* region)
// it is needed and hasn't been generated yet.
static bool isTrivialSingleIterationLoop(
IRGlobalValueWithCode* func,
- IRLoop* loop,
- CFGSimplificationContext& inoutContext)
+ IRLoop* loop)
{
auto targetBlock = loop->getTargetBlock();
if (targetBlock->getPredecessors().getCount() != 1) return false;
@@ -52,14 +53,14 @@ static bool isTrivialSingleIterationLoop(
//
// We need to verify this is a trivial loop by checking if there is any multi-level breaks
// that skips out of this loop.
-
- if (!inoutContext.domTree)
- inoutContext.domTree = computeDominatorTree(func);
- if (!inoutContext.regionTree)
- inoutContext.regionTree = generateRegionTreeForFunc(func, nullptr);
+ CFGSimplificationContext context;
+ if (!context.domTree)
+ context.domTree = computeDominatorTree(func);
+ if (!context.regionTree)
+ context.regionTree = generateRegionTreeForFunc(func, nullptr);
SimpleRegion* targetBlockRegion = nullptr;
- if (!inoutContext.regionTree->mapBlockToRegion.TryGetValue(targetBlock, targetBlockRegion))
+ if (!context.regionTree->mapBlockToRegion.TryGetValue(targetBlock, targetBlockRegion))
return false;
BreakableRegion* loopBreakableRegion = findBreakableRegion(targetBlockRegion);
LoopRegion* loopRegion = as<LoopRegion>(loopBreakableRegion);
@@ -67,18 +68,18 @@ static bool isTrivialSingleIterationLoop(
return false;
for (auto block : func->getBlocks())
{
- if (!inoutContext.domTree->dominates(loop->getTargetBlock(), block))
+ if (!context.domTree->dominates(loop->getTargetBlock(), block))
continue;
- if (inoutContext.domTree->dominates(loop->getBreakBlock(), block))
+ if (context.domTree->dominates(loop->getBreakBlock(), block))
continue;
SimpleRegion* region = nullptr;
- if (!inoutContext.regionTree->mapBlockToRegion.TryGetValue(block, region))
+ if (!context.regionTree->mapBlockToRegion.TryGetValue(block, region))
return false;
for (auto branchTarget : block->getSuccessors())
{
SimpleRegion* targetRegion = nullptr;
- if (!inoutContext.regionTree->mapBlockToRegion.TryGetValue(branchTarget, targetRegion))
+ if (!context.regionTree->mapBlockToRegion.TryGetValue(branchTarget, targetRegion))
return false;
// If multi-level break out that skips over this loop exists, then this is not a trivial loop.
if (targetRegion->isDescendentOf(loopRegion))
@@ -96,6 +97,104 @@ static bool isTrivialSingleIterationLoop(
return true;
}
+static bool doesLoopHasSideEffect(IRGlobalValueWithCode* func, IRLoop* loopInst)
+{
+ auto blocks = collectBlocksInLoop(func, loopInst);
+ HashSet<IRBlock*> loopBlocks;
+ for (auto b : blocks)
+ loopBlocks.Add(b);
+ auto addressHasOutOfLoopUses = [&](IRInst* addr)
+ {
+ // The entire access chain of `addr` must have no uses out side the loop.
+ // The root variable must be a local var.
+ for (auto chainNode = addr; chainNode;)
+ {
+ if (getParentFunc(chainNode) != func)
+ return true;
+ for (auto use = chainNode->firstUse; use; use = use->nextUse)
+ {
+ if (!loopBlocks.Contains(as<IRBlock>(use->getUser()->getParent())))
+ return true;
+ }
+ switch (chainNode->getOp())
+ {
+ case kIROp_GetElementPtr:
+ case kIROp_FieldAddress:
+ chainNode = chainNode->getOperand(0);
+ continue;
+ case kIROp_Var:
+ break;
+ default:
+ return true;
+ }
+ break;
+ }
+ return false;
+ };
+
+ for (auto b : blocks)
+ {
+ for (auto inst : b->getChildren())
+ {
+ // Is this inst used anywhere outside the loop? If so the loop has side effect.
+ for (auto use = inst->firstUse; use; use = use->nextUse)
+ {
+ if (!loopBlocks.Contains(as<IRBlock>(use->getUser()->getParent())))
+ return true;
+ }
+
+ // The inst can't possibly have side effect? Skip it.
+ if (!inst->mightHaveSideEffects())
+ continue;
+
+ // This inst might have side effect, try to prove that the
+ // side effect does not leak beyond the scope of the loop.
+ if (auto call = as<IRCall>(inst))
+ {
+ auto callee = getResolvedInstForDecorations(call->getCallee());
+ if (!callee || !callee->findDecoration<IRReadNoneDecoration>())
+ return true;
+ // We are calling a pure function, check if any of the return
+ // variables are used outside the loop.
+ for (UInt i = 0; i < call->getArgCount(); i++)
+ {
+ auto arg = call->getArg(i);
+ if (!isValueType(arg->getDataType()))
+ {
+ if (addressHasOutOfLoopUses(arg))
+ return true;
+ }
+ }
+ }
+ else if (auto store = as<IRStore>(inst))
+ {
+ if (addressHasOutOfLoopUses(store->getPtr()))
+ return true;
+ }
+ else if (auto branch = as<IRUnconditionalBranch>(inst))
+ {
+ if (loopBlocks.Contains(branch->getTargetBlock()))
+ continue;
+ // Branching out of the loop with some argument is considered
+ // having a side effect.
+ if (branch->getArgCount() != 0)
+ return true;
+ }
+ else if (as<IRIfElse>(inst) || as<IRSwitch>(inst) || as<IRLoop>(inst))
+ {
+ // We are starting a sub control flow.
+ // This is considered side effect free.
+ }
+ else
+ {
+ // For all other insts, we assume it has a global side effect.
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
static bool removeDeadBlocks(IRGlobalValueWithCode* func)
{
bool changed = false;
@@ -142,15 +241,327 @@ static bool removeDeadBlocks(IRGlobalValueWithCode* func)
return changed;
}
+// Return the true of the if-else branch block if the branch is a trivial jump
+// to after block with no other insts.
+static bool isTrivialIfElseBranch(IRIfElse* condBranch, IRBlock* branchBlock)
+{
+ if (branchBlock != condBranch->getAfterBlock())
+ {
+ if (auto br = as<IRUnconditionalBranch>(branchBlock->getFirstOrdinaryInst()))
+ {
+ if (br->getTargetBlock() == condBranch->getAfterBlock() && br->getOp() == kIROp_unconditionalBranch)
+ {
+ return true;
+ }
+ }
+ }
+ else
+ {
+ return true;
+ }
+ return false;
+}
+
+static bool arePhiArgsEquivalentInBranches(IRIfElse* ifElse)
+{
+ // If one of the branch target is afterBlock itself, and the other branch
+ // is a trivial block that jumps into the afterBlock, this if-else is trivial.
+ // In this case the argCount must be 0 because a block with phi parameters can't
+ // be used as targets in a conditional branch.
+ auto branch1 = ifElse->getTrueBlock();
+ auto branch2 = ifElse->getFalseBlock();
+ auto afterBlock = ifElse->getAfterBlock();
+
+ if (branch1 == afterBlock) return true;
+ if (branch2 == afterBlock) return true;
+
+ auto branchInst1 = as<IRUnconditionalBranch>(branch1->getTerminator());
+ auto branchInst2 = as<IRUnconditionalBranch>(branch2->getTerminator());
+ if (!branchInst1) return false;
+ if (!branchInst2) return false;
+
+ // If both branches are trivial blocks, we must compare the arguments.
+ if (branchInst1->getArgCount() != branchInst2->getArgCount())
+ {
+ // This should never happen, return false now to be safe.
+ return false;
+ }
+
+ for (UInt i = 0; i < branchInst1->getArgCount(); i++)
+ {
+ if (branchInst1->getArg(i) != branchInst2->getArg(i))
+ {
+ // argument is different, the if-else is non-trivial.
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool isTrivialIfElse(IRIfElse* condBranch, bool& isTrueBranchTrivial, bool& isFalseBranchTrivial)
+{
+ isTrueBranchTrivial = isTrivialIfElseBranch(condBranch, condBranch->getTrueBlock());
+ isFalseBranchTrivial = isTrivialIfElseBranch(condBranch, condBranch->getFalseBlock());
+ if (isTrueBranchTrivial && isFalseBranchTrivial)
+ {
+ if (arePhiArgsEquivalentInBranches(condBranch))
+ return true;
+ }
+ return false;
+}
+
+#if 0
+static bool tryMoveFalseBranchToTrueBranch(IRBuilder& builder, IRIfElse* ifElseInst)
+{
+ auto falseBlock = ifElseInst->getFalseBlock();
+ if (falseBlock == ifElseInst->getAfterBlock())
+ return false;
+ if (auto termInst = as<IRUnconditionalBranch>(falseBlock->getTerminator()))
+ {
+ // We can't fold a branch with arguments into the ifElse.
+ if (termInst->getArgCount() != 0)
+ return false;
+ }
+ ifElseInst->trueBlock.set(falseBlock);
+ ifElseInst->falseBlock.set(ifElseInst->getAfterBlock());
+ builder.setInsertBefore(ifElseInst);
+ auto newCondition = builder.emitNot(builder.getBoolType(), ifElseInst->getCondition());
+ ifElseInst->condition.set(newCondition);
+ return true;
+}
+#endif
+
+static bool tryEliminateFalseBranch(IRIfElse* ifElseInst)
+{
+ auto falseBlock = ifElseInst->getFalseBlock();
+ if (falseBlock == ifElseInst->getAfterBlock())
+ return false;
+ if (auto termInst = as<IRUnconditionalBranch>(falseBlock->getTerminator()))
+ {
+ // We can't fold a branch with arguments into the ifElse.
+ if (termInst->getArgCount() != 0)
+ return false;
+ }
+ ifElseInst->falseBlock.set(ifElseInst->getAfterBlock());
+ return true;
+}
+
+static bool trySimplifyIfElse(IRBuilder& builder, IRIfElse* ifElseInst)
+{
+ bool isTrueBranchTrivial = false;
+ bool isFalseBranchTrivial = false;
+ if (isTrivialIfElse(ifElseInst, isTrueBranchTrivial, isFalseBranchTrivial))
+ {
+ // If both branches of `if-else` are trivial jumps into after block,
+ // we can get rid of the entire conditional branch and replace it
+ // with a jump into the after block.
+ if (auto termInst = as<IRUnconditionalBranch>(ifElseInst->getTrueBlock()->getTerminator()))
+ {
+ List<IRInst*> args;
+ for (UInt i = 0; i < termInst->getArgCount(); i++)
+ args.add(termInst->getArg(i));
+ builder.setInsertBefore(ifElseInst);
+ builder.emitBranch(ifElseInst->getAfterBlock(), (Int)args.getCount(), args.getBuffer());
+ ifElseInst->removeAndDeallocate();
+ return true;
+ }
+ }
+ else if (isTrueBranchTrivial)
+ {
+ // If true branch is empty, we move false branch to true branch and invert the condition.
+ // TODO: diabled for now since our auto-diff pass can't handle loops whose body is on the false
+ // side of condition.
+ //return tryMoveFalseBranchToTrueBranch(builder, ifElseInst);
+ }
+ else if (isFalseBranchTrivial)
+ {
+ // If false branch is empty, we set it to afterBlock.
+ return tryEliminateFalseBranch(ifElseInst);
+ }
+ return false;
+}
+
+static bool isTrueLit(IRInst* lit)
+{
+ if (auto boolLit = as<IRBoolLit>(lit))
+ return boolLit->getValue();
+ return false;
+}
+static bool isFalseLit(IRInst* lit)
+{
+ if (auto boolLit = as<IRBoolLit>(lit))
+ return !boolLit->getValue();
+ return false;
+}
+
+static bool simplifyBoolPhiParam(IRIfElse* ifElse, Array<IRBlock*, 2>& preds, IRParam* param, UInt paramIndex)
+{
+ // For bool params where its value is assigned from the same `if-else` statement,
+ // we can simplify it into an expression of the condition of the source `if-else`.
+
+ if (!param->getDataType() || param->getDataType()->getOp() != kIROp_BoolType)
+ return false;
+
+ auto branch0 = as<IRUnconditionalBranch>(preds[0]->getTerminator());
+ if (!branch0)
+ return false;
+ if (branch0->getArgCount() <= paramIndex)
+ return false;
+ auto branch1 = as<IRUnconditionalBranch>(preds[1]->getTerminator());
+ if (!branch1)
+ return false;
+ if (branch1->getArgCount() <= paramIndex)
+ return false;
+
+ IRInst* replacement = nullptr;
+ if (isTrueLit(branch0->getArg(paramIndex)) && isFalseLit(branch1->getArg(paramIndex)))
+ {
+ replacement = ifElse->getCondition();
+ }
+ else if (isFalseLit(branch0->getArg(paramIndex)) && isTrueLit(branch1->getArg(paramIndex)))
+ {
+ IRBuilder builder(param);
+ setInsertBeforeOrdinaryInst(&builder, param);
+ replacement = builder.emitNot(builder.getBoolType(), ifElse->getCondition());
+ }
+ if (replacement)
+ {
+ param->replaceUsesWith(replacement);
+ param->removeAndDeallocate();
+ branch0->removeArgument(paramIndex);
+ branch1->removeArgument(paramIndex);
+ return true;
+ }
+ return false;
+}
+
+static bool simplifyBoolPhiParams(IRBlock* block)
+{
+ if (!block)
+ return false;
+
+ if (block->getPredecessors().getCount() != 2)
+ return false;
+
+ Array<IRBlock*, 2> preds;
+ for (auto pred : block->getPredecessors())
+ preds.add(pred);
+
+ IRBlock* ifElseBlock = nullptr;
+ if (preds[0]->getPredecessors().getCount() != 1)
+ return false;
+ ifElseBlock = *(preds[0]->getPredecessors().begin());
+ if (preds[1]->getPredecessors().getCount() != 1)
+ return false;
+ auto p = *(preds[1]->getPredecessors().begin());
+ if (p != ifElseBlock)
+ return false;
+
+ auto ifElse = as<IRIfElse>(ifElseBlock->getTerminator());
+ if (!ifElse)
+ return false;
+
+ if (ifElse->getTrueBlock() == preds[1])
+ {
+ Swap(preds[0], preds[1]);
+ }
+ SLANG_ASSERT(ifElse->getTrueBlock() == preds[0] && ifElse->getFalseBlock() == preds[1]);
+
+ List<IRParam*> params;
+ for (auto param : block->getParams())
+ params.add(param);
+ bool changed = false;
+ for (Index i = params.getCount() - 1; i >= 0; i--)
+ {
+ changed |= simplifyBoolPhiParam(ifElse, preds, params[i], (UInt)i);
+ }
+ return changed;
+}
+
+static bool removeTrivialPhiParams(IRBlock* block)
+{
+ // We can remove a phi parmeter if:
+ // 1. all arguments to a parameter is the same (not really a phi).
+ // 2. the arguments to the parameter is always the same as arguments to another existing parameter (duplicate phi).
+
+ bool changed = false;
+ List<IRParam*> params;
+ struct ParamState
+ {
+ bool areKnownValueSame = true;
+ IRInst* knownValue = nullptr;
+ OrderedHashSet<UInt> sameAsParamSet;
+ };
+ List<ParamState> args;
+ List<IRUnconditionalBranch*> termInsts;
+ for (auto param : block->getParams())
+ {
+ params.add(param);
+ args.add(ParamState());
+ }
+
+ if (!params.getCount())
+ return false;
+
+ for (UInt i = 1; i < (UInt)args.getCount(); i++)
+ for (UInt j = 0; j < i; j++)
+ args[i].sameAsParamSet.Add(j);
+
+ for (auto pred : block->getPredecessors())
+ {
+ auto termInst = as<IRUnconditionalBranch>(pred->getTerminator());
+ if (!termInst)
+ return false;
+ SLANG_ASSERT(termInst->getArgCount() == (UInt)args.getCount());
+ termInsts.add(termInst);
+ for (UInt i = 0; i < termInst->getArgCount(); i++)
+ {
+ if (args[i].areKnownValueSame)
+ {
+ if (args[i].knownValue == nullptr)
+ args[i].knownValue = termInst->getArg(i);
+ else if (args[i].knownValue != termInst->getArg(i))
+ args[i].areKnownValueSame = false;
+ }
+ for (UInt j = 0; j < i; j++)
+ {
+ if (termInst->getArg(i) != termInst->getArg(j))
+ {
+ args[i].sameAsParamSet.Remove(j);
+ }
+ }
+ }
+ }
+ for (Index i = args.getCount() - 1; i >= 0; i--)
+ {
+ IRInst* targetVal = nullptr;
+ if (args[i].areKnownValueSame)
+ {
+ targetVal = args[i].knownValue;
+ }
+ else if (args[i].sameAsParamSet.Count())
+ {
+ auto targetParamId = *args[i].sameAsParamSet.begin();
+ targetVal = params[targetParamId];
+ }
+ if (targetVal)
+ {
+ params[i]->replaceUsesWith(args[i].knownValue);
+ params[i]->removeAndDeallocate();
+ for (auto termInst : termInsts)
+ termInst->removeArgument((UInt)i);
+ changed = true;
+ }
+ }
+ return changed;
+}
+
static bool processFunc(IRGlobalValueWithCode* func)
{
auto firstBlock = func->getFirstBlock();
if (!firstBlock)
return false;
- // Lazily generated region tree.
- CFGSimplificationContext simplificationContext;
-
IRBuilder builder(func->getModule());
bool changed = false;
@@ -165,6 +576,14 @@ static bool processFunc(IRGlobalValueWithCode* func)
workList.fastRemoveAt(0);
while (block)
{
+ // If all arguments to a phi parameter are the known to be the same,
+ // we can safely replace the phi parameter with the argument.
+ if (block != func->getFirstBlock())
+ {
+ changed |= simplifyBoolPhiParams(block);
+ changed |= removeTrivialPhiParams(block);
+ }
+
if (auto loop = as<IRLoop>(block->getTerminator()))
{
// If continue block is unreachable, remove it.
@@ -179,7 +598,7 @@ static bool processFunc(IRGlobalValueWithCode* func)
// break at the end of the loop, we can remove the header and turn it into
// a normal branch.
auto targetBlock = loop->getTargetBlock();
- if (isTrivialSingleIterationLoop(func, loop, simplificationContext))
+ if (isTrivialSingleIterationLoop(func, loop))
{
builder.setInsertBefore(loop);
List<IRInst*> args;
@@ -189,7 +608,22 @@ static bool processFunc(IRGlobalValueWithCode* func)
}
builder.emitBranch(targetBlock, args.getCount(), args.getBuffer());
loop->removeAndDeallocate();
+ changed = true;
}
+ else if (!doesLoopHasSideEffect(func, loop))
+ {
+ // The loop isn't computing anything useful outside the loop.
+ // We can delete the entire loop.
+ builder.setInsertBefore(loop);
+ SLANG_ASSERT(loop->getBreakBlock()->getFirstParam() == nullptr);
+ builder.emitBranch(loop->getBreakBlock());
+ loop->removeAndDeallocate();
+ changed = true;
+ }
+ }
+ else if (auto condBranch = as<IRIfElse>(block->getTerminator()))
+ {
+ changed |= trySimplifyIfElse(builder, condBranch);
}
// If `block` does not end with an unconditional branch, bail.
@@ -225,6 +659,7 @@ static bool processFunc(IRGlobalValueWithCode* func)
branch->removeAndDeallocate();
assert(!successor->hasUses());
successor->removeAndDeallocate();
+ break;
}
for (auto successor : block->getSuccessors())
{
diff --git a/source/slang/slang-ir-specialize-function-call.cpp b/source/slang/slang-ir-specialize-function-call.cpp
index 894d46cce..a2ebbc0cf 100644
--- a/source/slang/slang-ir-specialize-function-call.cpp
+++ b/source/slang/slang-ir-specialize-function-call.cpp
@@ -822,6 +822,12 @@ struct FunctionParameterSpecializationContext
{
decoration->removeAndDeallocate();
}
+ else if (as<IRReadNoneDecoration>(decoration))
+ {
+ // After specialization, the function may no longer be side effect free
+ // because the parameter we substituted in maybe a global param.
+ decoration->removeAndDeallocate();
+ }
}
}
diff --git a/source/slang/slang-ir-ssa-simplification.cpp b/source/slang/slang-ir-ssa-simplification.cpp
index f06fafcb3..beaaae065 100644
--- a/source/slang/slang-ir-ssa-simplification.cpp
+++ b/source/slang/slang-ir-ssa-simplification.cpp
@@ -10,6 +10,7 @@
#include "slang-ir-deduplicate-generic-children.h"
#include "slang-ir-remove-unused-generic-param.h"
#include "slang-ir-redundancy-removal.h"
+#include "slang-ir-propagate-func-properties.h"
namespace Slang
{
@@ -29,6 +30,7 @@ namespace Slang
changed |= peepholeOptimize(module);
changed |= removeRedundancy(module);
changed |= simplifyCFG(module);
+ changed |= propagateFuncProperties(module);
// Note: we disregard the `changed` state from dead code elimination pass since
// SCCP pass could be generating temporarily evaluated constant values and never actually use them.
@@ -41,6 +43,28 @@ namespace Slang
}
}
+ void simplifyNonSSAIR(IRModule* module)
+ {
+ bool changed = true;
+ const int kMaxIterations = 8;
+ int iterationCounter = 0;
+ while (changed && iterationCounter < kMaxIterations)
+ {
+ changed = false;
+ changed |= peepholeOptimize(module);
+ changed |= removeRedundancy(module);
+ changed |= simplifyCFG(module);
+
+ // Note: we disregard the `changed` state from dead code elimination pass since
+ // SCCP pass could be generating temporarily evaluated constant values and never actually use them.
+ // DCE will always remove those nearly generated consts and always returns true here.
+ eliminateDeadCode(module);
+
+ iterationCounter++;
+ }
+ }
+
+
void simplifyFunc(IRGlobalValueWithCode* func)
{
bool changed = true;
diff --git a/source/slang/slang-ir-ssa-simplification.h b/source/slang/slang-ir-ssa-simplification.h
index ee8343003..39504e102 100644
--- a/source/slang/slang-ir-ssa-simplification.h
+++ b/source/slang/slang-ir-ssa-simplification.h
@@ -10,5 +10,8 @@ namespace Slang
// until no more changes are possible.
void simplifyIR(IRModule* module);
+ // Run simplifications on IR that is out of SSA form.
+ void simplifyNonSSAIR(IRModule* module);
+
void simplifyFunc(IRGlobalValueWithCode* func);
}
diff --git a/source/slang/slang-ir-util.cpp b/source/slang/slang-ir-util.cpp
index 3db036a8d..339521f41 100644
--- a/source/slang/slang-ir-util.cpp
+++ b/source/slang/slang-ir-util.cpp
@@ -157,6 +157,32 @@ IRInst* maybeSpecializeWithGeneric(IRBuilder& builder, IRInst* genericToSpecaili
return genericToSpecailize;
}
+bool isValueType(IRInst* dataType)
+{
+ dataType = getResolvedInstForDecorations(unwrapAttributedType(dataType));
+ if (as<IRBasicType>(dataType))
+ return true;
+ switch (dataType->getOp())
+ {
+ case kIROp_StructType:
+ case kIROp_InterfaceType:
+ case kIROp_ClassType:
+ case kIROp_VectorType:
+ case kIROp_MatrixType:
+ case kIROp_TupleType:
+ case kIROp_ResultType:
+ case kIROp_OptionalType:
+ case kIROp_DifferentialPairType:
+ case kIROp_DynamicType:
+ case kIROp_AnyValueType:
+ case kIROp_ArrayType:
+ case kIROp_FuncType:
+ return true;
+ default:
+ return false;
+ }
+}
+
IRInst* hoistValueFromGeneric(IRBuilder& inBuilder, IRInst* value, IRInst*& outSpecializedVal, bool replaceExistingValue)
{
auto outerGeneric = as<IRGeneric>(findOuterGeneric(value));
@@ -402,8 +428,7 @@ bool canInstHaveSideEffectAtAddress(IRGlobalValueWithCode* func, IRInst* inst, I
{
auto callee = call->getCallee();
if (callee &&
- callee->findDecoration<IRReadNoneDecoration>() &&
- callee->findDecoration<IRNoSideEffectDecoration>())
+ callee->findDecoration<IRReadNoneDecoration>())
{
// An exception is if the callee is side-effect free and is not reading from
// memory.
@@ -423,6 +448,32 @@ bool canInstHaveSideEffectAtAddress(IRGlobalValueWithCode* func, IRInst* inst, I
if (canAddressesPotentiallyAlias(func, call->getArg(i), addr))
return true;
}
+ else if (!isValueType(call->getArg(i)->getDataType()))
+ {
+ // This is some unknown handle type, we assume it can have any side effects.
+ return true;
+ }
+ }
+ }
+ break;
+ case kIROp_unconditionalBranch:
+ case kIROp_loop:
+ {
+ auto branch = as<IRUnconditionalBranch>(inst);
+ // If any pointer typed argument of the branch inst may overlap addr, return true.
+ for (UInt i = 0; i < branch->getArgCount(); i++)
+ {
+ SLANG_RELEASE_ASSERT(branch->getArg(i)->getDataType());
+ if (isPtrLikeOrHandleType(branch->getArg(i)->getDataType()))
+ {
+ if (canAddressesPotentiallyAlias(func, branch->getArg(i), addr))
+ return true;
+ }
+ else if (!isValueType(branch->getArg(i)->getDataType()))
+ {
+ // This is some unknown handle type, we assume it can have any side effects.
+ return true;
+ }
}
}
break;
@@ -434,6 +485,11 @@ bool canInstHaveSideEffectAtAddress(IRGlobalValueWithCode* func, IRInst* inst, I
if (isPtrLikeOrHandleType(inst->getOperand(0)->getDataType()) &&
canAddressesPotentiallyAlias(func, inst->getOperand(0), addr))
return true;
+ else if (!isValueType(inst->getOperand(0)->getDataType()))
+ {
+ // This is some unknown handle type, we assume it can have any side effects.
+ return true;
+ }
}
break;
default:
@@ -520,20 +576,17 @@ bool isPureFunctionalCall(IRCall* call)
auto callee = getResolvedInstForDecorations(call->getCallee());
if (callee->findDecoration<IRReadNoneDecoration>())
{
- return true;
- }
- if (callee->findDecoration<IRNoSideEffectDecoration>())
- {
// If the function has no side effect and is not writing to any outputs,
// we can safely treat the call as a normal inst.
bool hasOutArg = false;
for (UInt i = 0; i < call->getArgCount(); i++)
{
- if (as<IRPtrTypeBase>(call->getArg(i)->getDataType()))
- {
- hasOutArg = true;
- break;
- }
+ if (isValueType(call->getArg(i)->getDataType()))
+ continue;
+ // If the argument type is not a known value type,
+ // assume it is a pointer or handle through which side effect can take place.
+ hasOutArg = true;
+ break;
}
return !hasOutArg;
}
diff --git a/source/slang/slang-ir-util.h b/source/slang/slang-ir-util.h
index 8a12ab895..62156cad6 100644
--- a/source/slang/slang-ir-util.h
+++ b/source/slang/slang-ir-util.h
@@ -83,6 +83,9 @@ inline bool isScalarIntegerType(IRType* type)
return getTypeStyle(type->getOp()) == kIROp_IntType;
}
+// No side effect can take place through a value of a "Value" type.
+bool isValueType(IRInst* type);
+
inline bool isChildInstOf(IRInst* inst, IRInst* parent)
{
while (inst)
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index accefc0c9..fd211d05c 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -43,7 +43,10 @@ namespace Slang
case kIROp_PreciseDecoration:
case kIROp_PublicDecoration:
case kIROp_HLSLExportDecoration:
- case kIROp_ReadNoneDecoration:
+ case kIROp_ReadNoneDecoration:
+ case kIROp_NoSideEffectDecoration:
+ case kIROp_ForwardDifferentiableDecoration:
+ case kIROp_BackwardDifferentiableDecoration:
case kIROp_RequiresNVAPIDecoration:
case kIROp_TriangleAdjInputPrimitiveTypeDecoration:
case kIROp_TriangleInputPrimitiveTypeDecoration:
@@ -695,6 +698,21 @@ namespace Slang
}
}
+ void IRUnconditionalBranch::removeArgument(UInt index)
+ {
+ switch (getOp())
+ {
+ case kIROp_unconditionalBranch:
+ removeOperand(1 + index);
+ break;
+ case kIROp_loop:
+ removeOperand(3 + index);
+ break;
+ default:
+ SLANG_UNEXPECTED("unhandled unconditional branch opcode");
+ }
+ }
+
IRInst* IRUnconditionalBranch::getArg(UInt index)
{
return getArgs()[index].usedValue;
@@ -5109,6 +5127,17 @@ namespace Slang
return inst;
}
+ IRInst* IRBuilder::emitNot(IRType* type, IRInst* value)
+ {
+ auto inst = createInst<IRInst>(
+ this,
+ kIROp_Not,
+ type,
+ value);
+ addInst(inst);
+ return inst;
+ }
+
IRInst* IRBuilder::emitAdd(IRType* type, IRInst* left, IRInst* right)
{
auto inst = createInst<IRInst>(
@@ -6792,6 +6821,17 @@ namespace Slang
}
}
+ void IRInst::removeOperand(Index index)
+ {
+ for (Index i = index; i < (Index)operandCount - 1; i++)
+ {
+ getOperands()[i].set(getOperand(i + 1));
+ }
+ getOperands()[operandCount - 1].clear();
+ operandCount--;
+ return;
+ }
+
// Remove this instruction from its parent block,
// and then destroy it (it had better have no uses!)
void IRInst::removeAndDeallocate()
@@ -6879,6 +6919,8 @@ namespace Slang
// common subexpression elimination, etc.
//
auto call = cast<IRCall>(this);
+ // If the call has been marked as no-side-effect, we
+ // will treat it so, by-passing all other checks.
if (call->findDecoration<IRNoSideEffectDecoration>())
return false;
return !isPureFunctionalCall(call);
@@ -6894,6 +6936,7 @@ namespace Slang
case kIROp_Func:
case kIROp_Generic:
case kIROp_Var:
+ case kIROp_Param:
case kIROp_GlobalVar: // Note: the IRGlobalVar represents the *address*, so only a load/store would have side effects
case kIROp_GlobalConstant:
case kIROp_GlobalParam:
@@ -7003,12 +7046,6 @@ namespace Slang
case kIROp_BackwardDifferentiatePropagate:
return false;
}
-
- // Check if the calle has been marked with a catch-all no-side-effect decoration.
- if (findDecoration<IRNoSideEffectDecoration>())
- {
- return false;
- }
return true;
}
diff --git a/source/slang/slang-ir.h b/source/slang/slang-ir.h
index 63b7c4ef9..e22ea8a36 100644
--- a/source/slang/slang-ir.h
+++ b/source/slang/slang-ir.h
@@ -744,6 +744,11 @@ struct IRInst
// for those values.
void removeArguments();
+ // Remove operand `index` from operand list.
+ // For example, if the inst is `op(a,b,c)`, calling removeOperand(inst, 1) will result
+ // `op(a,c)`.
+ void removeOperand(Index index);
+
/// Transfer any decorations of this instruction to the `target` instruction.
void transferDecorationsTo(IRInst* target);
diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp
index 681871b6c..d09c35eea 100644
--- a/source/slang/slang-lower-to-ir.cpp
+++ b/source/slang/slang-lower-to-ir.cpp
@@ -8304,6 +8304,11 @@ struct DeclLoweringVisitor : DeclVisitor<DeclLoweringVisitor, LoweredValInfo>
getBuilder()->addSimpleDecoration<IRRequiresNVAPIDecoration>(irFunc);
}
+ if (decl->findModifier<AlwaysFoldIntoUseSiteAttribute>())
+ {
+ getBuilder()->addSimpleDecoration<IRAlwaysFoldIntoUseSiteDecoration>(irFunc);
+ }
+
if (decl->findModifier<NoInlineAttribute>())
{
getBuilder()->addSimpleDecoration<IRNoInlineDecoration>(irFunc);