summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--build/visual-studio/slang/slang.vcxproj2
-rw-r--r--build/visual-studio/slang/slang.vcxproj.filters6
-rw-r--r--source/slang/core.meta.slang8
-rw-r--r--source/slang/hlsl.meta.slang318
-rw-r--r--source/slang/slang-ast-modifier.h8
-rw-r--r--source/slang/slang-emit-c-like.cpp12
-rw-r--r--source/slang/slang-emit-c-like.h2
-rw-r--r--source/slang/slang-emit-cpp.cpp40
-rw-r--r--source/slang/slang-emit-cpp.h1
-rw-r--r--source/slang/slang-emit.cpp4
-rw-r--r--source/slang/slang-ir-autodiff-unzip.cpp1
-rw-r--r--source/slang/slang-ir-autodiff.h8
-rw-r--r--source/slang/slang-ir-dce.cpp264
-rw-r--r--source/slang/slang-ir-glsl-legalize.cpp4
-rw-r--r--source/slang/slang-ir-inst-defs.h3
-rw-r--r--source/slang/slang-ir-insts.h18
-rw-r--r--source/slang/slang-ir-loop-unroll.cpp26
-rw-r--r--source/slang/slang-ir-loop-unroll.h4
-rw-r--r--source/slang/slang-ir-propagate-func-properties.cpp186
-rw-r--r--source/slang/slang-ir-propagate-func-properties.h7
-rw-r--r--source/slang/slang-ir-redundancy-removal.cpp373
-rw-r--r--source/slang/slang-ir-sccp.cpp4
-rw-r--r--source/slang/slang-ir-simplify-cfg.cpp467
-rw-r--r--source/slang/slang-ir-specialize-function-call.cpp6
-rw-r--r--source/slang/slang-ir-ssa-simplification.cpp24
-rw-r--r--source/slang/slang-ir-ssa-simplification.h3
-rw-r--r--source/slang/slang-ir-util.cpp75
-rw-r--r--source/slang/slang-ir-util.h3
-rw-r--r--source/slang/slang-ir.cpp51
-rw-r--r--source/slang/slang-ir.h5
-rw-r--r--source/slang/slang-lower-to-ir.cpp5
-rw-r--r--tests/bugs/sample-grad-clamp-lod.slang.glsl6
-rw-r--r--tests/bugs/vk-structured-buffer-load.hlsl.glsl27
-rw-r--r--tests/cross-compile/array-of-buffers.slang.glsl20
-rw-r--r--tests/cross-compile/array-of-buffers.slang.hlsl20
-rw-r--r--tests/cross-compile/glsl-generic-in.slang.glsl12
-rw-r--r--tests/cross-compile/half-conversion.slang.glsl29
-rw-r--r--tests/cross-compile/sign.slang.glsl9
-rw-r--r--tests/diagnostics/interfaces/anyvalue-size-validation.slang4
-rw-r--r--tests/experimental/liveness/liveness-2.slang.expected5
-rw-r--r--tests/experimental/liveness/liveness-3.slang.expected81
-rw-r--r--tests/experimental/liveness/liveness-4.slang.expected19
-rw-r--r--tests/experimental/liveness/liveness-5.slang.expected23
-rw-r--r--tests/experimental/liveness/liveness-6.slang.expected31
-rw-r--r--tests/experimental/liveness/liveness.slang.expected77
-rw-r--r--tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.1.expected20
-rw-r--r--tests/hlsl-intrinsic/vector-float.slang50
-rw-r--r--tests/ir/loop-dce.slang40
-rw-r--r--tests/ir/loop-dce.slang.expected.txt4
-rw-r--r--tests/nv-extensions/nv-ray-tracing-motion-blur.slang.glsl84
-rw-r--r--tests/pipeline/rasterization/fragment-shader-interlock.slang.glsl14
-rw-r--r--tests/pipeline/rasterization/mesh/passing-outputs.slang.glsl6
-rw-r--r--tests/pipeline/ray-tracing/acceleration-structure-in-compute.slang.glsl3
-rw-r--r--tests/pipeline/ray-tracing/trace-ray-inline.slang.glsl36
-rw-r--r--tests/pipeline/ray-tracing/trace-ray-inline.slang.hlsl157
-rw-r--r--tests/slang-extension/atomic-float-byte-address-buffer-cross.slang.glsl15
-rw-r--r--tests/vkray/anyhit.slang.glsl21
-rw-r--r--tests/vkray/callable-caller.slang.glsl23
-rw-r--r--tests/vkray/raygen.slang.glsl42
59 files changed, 1984 insertions, 832 deletions
diff --git a/build/visual-studio/slang/slang.vcxproj b/build/visual-studio/slang/slang.vcxproj
index 9971333d7..e97d6a2b1 100644
--- a/build/visual-studio/slang/slang.vcxproj
+++ b/build/visual-studio/slang/slang.vcxproj
@@ -407,6 +407,7 @@ IF EXIST ..\..\..\external\slang-glslang\bin\windows-aarch64\release\slang-glsla
<ClInclude Include="..\..\..\source\slang\slang-ir-missing-return.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir-optix-entry-point-uniforms.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir-peephole.h" />
+ <ClInclude Include="..\..\..\source\slang\slang-ir-propagate-func-properties.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir-redundancy-removal.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir-remove-unused-generic-param.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir-restructure-scoping.h" />
@@ -591,6 +592,7 @@ IF EXIST ..\..\..\external\slang-glslang\bin\windows-aarch64\release\slang-glsla
<ClCompile Include="..\..\..\source\slang\slang-ir-missing-return.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir-optix-entry-point-uniforms.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir-peephole.cpp" />
+ <ClCompile Include="..\..\..\source\slang\slang-ir-propagate-func-properties.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir-redundancy-removal.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir-remove-unused-generic-param.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir-restructure-scoping.cpp" />
diff --git a/build/visual-studio/slang/slang.vcxproj.filters b/build/visual-studio/slang/slang.vcxproj.filters
index 839182de5..64267db4b 100644
--- a/build/visual-studio/slang/slang.vcxproj.filters
+++ b/build/visual-studio/slang/slang.vcxproj.filters
@@ -327,6 +327,9 @@
<ClInclude Include="..\..\..\source\slang\slang-ir-peephole.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="..\..\..\source\slang\slang-ir-propagate-func-properties.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
<ClInclude Include="..\..\..\source\slang\slang-ir-redundancy-removal.h">
<Filter>Header Files</Filter>
</ClInclude>
@@ -875,6 +878,9 @@
<ClCompile Include="..\..\..\source\slang\slang-ir-peephole.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="..\..\..\source\slang\slang-ir-propagate-func-properties.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="..\..\..\source\slang\slang-ir-redundancy-removal.cpp">
<Filter>Source Files</Filter>
</ClCompile>
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 6357d58bd..9da33c755 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -2525,21 +2525,25 @@ int __SyntaxError();
__generic<T>
__target_intrinsic(cuda, "sizeof($G0)")
__target_intrinsic(cpp, "sizeof($G0)")
+[__readNone]
int __sizeOf();
__generic<T>
__target_intrinsic(cuda, "sizeof($T0)")
__target_intrinsic(cpp, "sizeof($T0)")
+[__readNone]
int __sizeOf(T v);
__generic<T>
__target_intrinsic(cuda, "SLANG_ALIGN_OF($G0)")
__target_intrinsic(cpp, "SLANG_ALIGN_OF($G0)")
+[__readNone]
int __alignOf();
__generic<T>
__target_intrinsic(cuda, "SLANG_ALIGN_OF($T0)")
__target_intrinsic(cpp, "SLANG_ALIGN_OF($T0)")
+[__readNone]
int __alignOf(T v);
// It would be nice to have offsetof equivalent, but it's not clear how that would work in terms of the Slang language.
@@ -2547,6 +2551,7 @@ int __alignOf(T v);
__generic<T,F>
__target_intrinsic(cuda, "int(((char*)&($1)) - ((char*)&($0)))")
__target_intrinsic(cpp, "int(((char*)&($1)) - ((char*)&($0))")
+[__readNone]
int __offsetOf(in T t, in F field);
/// Mark beginning of "interlocked" operations in a fragment shader.
@@ -2960,6 +2965,9 @@ attribute_syntax [builtin] : BuiltinAttribute;
__attributeTarget(DeclBase)
attribute_syntax [__requiresNVAPI] : RequiresNVAPIAttribute;
+__attributeTarget(DeclBase)
+attribute_syntax [__AlwaysFoldIntoUseSiteAttribute] : AlwaysFoldIntoUseSiteAttribute;
+
__attributeTarget(FunctionDeclBase)
attribute_syntax [noinline] : NoInlineAttribute;
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 7e75d06b3..37cdc205e 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -778,6 +778,7 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_abs($0)")
__target_intrinsic(cpp, "$P_abs($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FAbs, SAbs) _0")
+[__readNone]
T abs(T x);
/*{
// Note: this simple definition may not be appropriate for floating-point inputs
@@ -788,6 +789,7 @@ __generic<T : __BuiltinIntegerType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FAbs, SAbs) _0")
+[__readNone]
vector<T, N> abs(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, abs, x);
@@ -795,6 +797,7 @@ vector<T, N> abs(vector<T, N> x)
__generic<T : __BuiltinIntegerType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> abs(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, abs, x);
@@ -806,12 +809,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_abs($0)")
__target_intrinsic(cpp, "$P_abs($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FAbs, SAbs) _0")
+[__readNone]
T abs(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FAbs, SAbs) _0")
+[__readNone]
vector<T, N> abs(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, abs, x);
@@ -819,6 +824,7 @@ vector<T, N> abs(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> abs(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, abs, x);
@@ -832,12 +838,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_acos($0)")
__target_intrinsic(cpp, "$P_acos($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Acos _0")
+[__readNone]
T acos(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Acos _0")
+[__readNone]
vector<T, N> acos(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, acos, x);
@@ -845,6 +853,7 @@ vector<T, N> acos(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> acos(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, acos, x);
@@ -855,11 +864,13 @@ __generic<T : __BuiltinType>
__target_intrinsic(cpp, "bool($0)")
__target_intrinsic(cuda, "bool($0)")
__target_intrinsic(glsl, "bool($0)")
+[__readNone]
bool all(T x);
__generic<T : __BuiltinType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "all(bvec$N0($0))")
+[__readNone]
bool all(vector<T,N> x)
{
bool result = true;
@@ -870,6 +881,7 @@ bool all(vector<T,N> x)
__generic<T : __BuiltinType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
bool all(matrix<T,N,M> x)
{
bool result = true;
@@ -894,11 +906,13 @@ __generic<T : __BuiltinType>
__target_intrinsic(cpp, "bool($0)")
__target_intrinsic(cuda, "bool($0)")
__target_intrinsic(glsl, "bool($0)")
+[__readNone]
bool any(T x);
__generic<T : __BuiltinType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "any(bvec$N0($0))")
+[__readNone]
bool any(vector<T, N> x)
{
bool result = false;
@@ -909,6 +923,7 @@ bool any(vector<T, N> x)
__generic<T : __BuiltinType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
bool any(matrix<T, N, M> x)
{
bool result = false;
@@ -926,6 +941,7 @@ __target_intrinsic(cpp, "$P_asdouble($0, $1)")
__target_intrinsic(cuda, "$P_asdouble($0, $1)")
__target_intrinsic(spirv_direct, "%v = OpCompositeConstruct _type(uint2) resultId _0 _1; OpExtInst resultType resultId glsl450 59 %v")
__glsl_extension(GL_ARB_gpu_shader5)
+[__readNone]
double asdouble(uint lowbits, uint highbits);
// Reinterpret bits as a float (HLSL SM 4.0)
@@ -935,6 +951,7 @@ __target_intrinsic(glsl, "intBitsToFloat")
__target_intrinsic(cpp, "$P_asfloat($0)")
__target_intrinsic(cuda, "$P_asfloat($0)")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
float asfloat(int x);
__target_intrinsic(hlsl)
@@ -942,12 +959,14 @@ __target_intrinsic(glsl, "uintBitsToFloat")
__target_intrinsic(cpp, "$P_asfloat($0)")
__target_intrinsic(cuda, "$P_asfloat($0)")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
float asfloat(uint x);
__generic<let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "intBitsToFloat")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
vector<float, N> asfloat(vector< int, N> x)
{
VECTOR_MAP_UNARY(float, N, asfloat, x);
@@ -957,6 +976,7 @@ __generic<let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "uintBitsToFloat")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
vector<float,N> asfloat(vector<uint,N> x)
{
VECTOR_MAP_UNARY(float, N, asfloat, x);
@@ -964,6 +984,7 @@ vector<float,N> asfloat(vector<uint,N> x)
__generic<let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<float,N,M> asfloat(matrix< int,N,M> x)
{
MATRIX_MAP_UNARY(float, N, M, asfloat, x);
@@ -971,6 +992,7 @@ matrix<float,N,M> asfloat(matrix< int,N,M> x)
__generic<let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<float,N,M> asfloat(matrix<uint,N,M> x)
{
MATRIX_MAP_UNARY(float, N, M, asfloat, x);
@@ -978,16 +1000,19 @@ matrix<float,N,M> asfloat(matrix<uint,N,M> x)
// No op
[__unsafeForceInlineEarly]
+[__readNone]
float asfloat(float x)
{ return x; }
__generic<let N : int>
[__unsafeForceInlineEarly]
+[__readNone]
vector<float,N> asfloat(vector<float,N> x)
{ return x; }
__generic<let N : int, let M : int>
[__unsafeForceInlineEarly]
+[__readNone]
matrix<float,N,M> asfloat(matrix<float,N,M> x)
{ return x; }
@@ -998,12 +1023,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_asin($0)")
__target_intrinsic(cpp, "$P_asin($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Asin _0")
+[__readNone]
T asin(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Asin _0")
+[__readNone]
vector<T, N> asin(vector<T, N> x)
{
VECTOR_MAP_UNARY(T,N,asin,x);
@@ -1011,6 +1038,7 @@ vector<T, N> asin(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> asin(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T,N,M,asin,x);
@@ -1023,6 +1051,7 @@ __target_intrinsic(glsl, "floatBitsToInt")
__target_intrinsic(cpp, "$P_asint($0)")
__target_intrinsic(cuda, "$P_asint($0)")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
int asint(float x);
__target_intrinsic(hlsl)
@@ -1030,12 +1059,14 @@ __target_intrinsic(glsl, "int($0)")
__target_intrinsic(cpp, "$P_asint($0)")
__target_intrinsic(cuda, "$P_asint($0)")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
int asint(uint x);
__generic<let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "floatBitsToInt")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
vector<int, N> asint(vector<float, N> x)
{
VECTOR_MAP_UNARY(int, N, asint, x);
@@ -1045,6 +1076,7 @@ __generic<let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "ivec$N0($0)")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
vector<int, N> asint(vector<uint, N> x)
{
VECTOR_MAP_UNARY(int, N, asint, x);
@@ -1052,6 +1084,7 @@ vector<int, N> asint(vector<uint, N> x)
__generic<let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<int, N, M> asint(matrix<float, N, M> x)
{
MATRIX_MAP_UNARY(int, N, M, asint, x);
@@ -1059,6 +1092,7 @@ matrix<int, N, M> asint(matrix<float, N, M> x)
__generic<let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<int, N, M> asint(matrix<uint, N, M> x)
{
MATRIX_MAP_UNARY(int, N, M, asint, x);
@@ -1066,16 +1100,19 @@ matrix<int, N, M> asint(matrix<uint, N, M> x)
// No op
[__unsafeForceInlineEarly]
+[__readNone]
int asint(int x)
{ return x; }
__generic<let N : int>
[__unsafeForceInlineEarly]
+[__readNone]
vector<int,N> asint(vector<int,N> x)
{ return x; }
__generic<let N : int, let M : int>
[__unsafeForceInlineEarly]
+[__readNone]
matrix<int,N,M> asint(matrix<int,N,M> x)
{ return x; }
@@ -1086,6 +1123,7 @@ __target_intrinsic(glsl, "{ uvec2 v = unpackDouble2x32($0); $1 = v.x; $2 = v.y;
__glsl_extension(GL_ARB_gpu_shader5)
__target_intrinsic(cpp, "$P_asuint($0, $1, $2)")
__target_intrinsic(cuda, "$P_asuint($0, $1, $2)")
+[__readNone]
void asuint(double value, out uint lowbits, out uint highbits);
// Reinterpret bits as a uint (HLSL SM 4.0)
@@ -1095,6 +1133,7 @@ __target_intrinsic(glsl, "floatBitsToUint")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
__target_intrinsic(cpp, "$P_asuint($0)")
__target_intrinsic(cuda, "$P_asuint($0)")
+[__readNone]
uint asuint(float x);
__target_intrinsic(hlsl)
@@ -1102,12 +1141,14 @@ __target_intrinsic(glsl, "uint($0)")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
__target_intrinsic(cpp, "$P_asuint($0)")
__target_intrinsic(cuda, "$P_asuint($0)")
+[__readNone]
uint asuint(int x);
__generic<let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "floatBitsToUint")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
vector<uint,N> asuint(vector<float,N> x)
{
VECTOR_MAP_UNARY(uint, N, asuint, x);
@@ -1117,6 +1158,7 @@ __generic<let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "uvec$N0($0)")
__target_intrinsic(spirv_direct, "OpBitcast resultType resultId _0")
+[__readNone]
vector<uint, N> asuint(vector<int, N> x)
{
VECTOR_MAP_UNARY(uint, N, asuint, x);
@@ -1124,6 +1166,7 @@ vector<uint, N> asuint(vector<int, N> x)
__generic<let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<uint,N,M> asuint(matrix<float,N,M> x)
{
MATRIX_MAP_UNARY(uint, N, M, asuint, x);
@@ -1131,22 +1174,26 @@ matrix<uint,N,M> asuint(matrix<float,N,M> x)
__generic<let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<uint, N, M> asuint(matrix<int, N, M> x)
{
MATRIX_MAP_UNARY(uint, N, M, asuint, x);
}
[__unsafeForceInlineEarly]
+[__readNone]
uint asuint(uint x)
{ return x; }
__generic<let N : int>
[__unsafeForceInlineEarly]
+[__readNone]
vector<uint,N> asuint(vector<uint,N> x)
{ return x; }
__generic<let N : int, let M : int>
[__unsafeForceInlineEarly]
+[__readNone]
matrix<uint,N,M> asuint(matrix<uint,N,M> x)
{ return x; }
@@ -1159,38 +1206,41 @@ matrix<uint,N,M> asuint(matrix<uint,N,M> x)
// Identity cases:
-[__unsafeForceInlineEarly] float16_t asfloat16(float16_t value) { return value; }
-[__unsafeForceInlineEarly] vector<float16_t,N> asfloat16<let N : int>(vector<float16_t,N> value) { return value; }
-[__unsafeForceInlineEarly] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<float16_t,R,C> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] float16_t asfloat16(float16_t value) { return value; }
+[__unsafeForceInlineEarly][__readNone] vector<float16_t,N> asfloat16<let N : int>(vector<float16_t,N> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<float16_t,R,C> value) { return value; }
-[__unsafeForceInlineEarly] int16_t asint16(int16_t value) { return value; }
-[__unsafeForceInlineEarly] vector<int16_t,N> asint16<let N : int>(vector<int16_t,N> value) { return value; }
-[__unsafeForceInlineEarly] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] int16_t asint16(int16_t value) { return value; }
+[__unsafeForceInlineEarly][__readNone] vector<int16_t,N> asint16<let N : int>(vector<int16_t,N> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return value; }
-[__unsafeForceInlineEarly] uint16_t asuint16(uint16_t value) { return value; }
-[__unsafeForceInlineEarly] vector<uint16_t,N> asuint16<let N : int>(vector<uint16_t,N> value) { return value; }
-[__unsafeForceInlineEarly] matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<uint16_t,R,C> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] uint16_t asuint16(uint16_t value) { return value; }
+[__unsafeForceInlineEarly][__readNone] vector<uint16_t,N> asuint16<let N : int>(vector<uint16_t,N> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<uint16_t,R,C> value) { return value; }
// Signed<->unsigned cases:
-[__unsafeForceInlineEarly] int16_t asint16(uint16_t value) { return value; }
-[__unsafeForceInlineEarly] vector<int16_t,N> asint16<let N : int>(vector<uint16_t,N> value) { return value; }
-[__unsafeForceInlineEarly] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<uint16_t,R,C> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] int16_t asint16(uint16_t value) { return value; }
+[__unsafeForceInlineEarly][__readNone] vector<int16_t,N> asint16<let N : int>(vector<uint16_t,N> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<uint16_t,R,C> value) { return value; }
-[__unsafeForceInlineEarly] uint16_t asuint16(int16_t value) { return value; }
-[__unsafeForceInlineEarly] vector<uint16_t,N> asuint16<let N : int>(vector<int16_t,N> value) { return value; }
-[__unsafeForceInlineEarly] matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] uint16_t asuint16(int16_t value) { return value; }
+[__unsafeForceInlineEarly][__readNone] vector<uint16_t,N> asuint16<let N : int>(vector<int16_t,N> value) { return value; }
+[__unsafeForceInlineEarly][__readNone] matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return value; }
// Float->unsigned cases:
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "uint16_t(packHalf2x16(vec2($0, 0.0)))")
__target_intrinsic(cuda, "__half_as_ushort")
+[__readNone]
uint16_t asuint16(float16_t value);
+[__readNone]
vector<uint16_t,N> asuint16<let N : int>(vector<float16_t,N> value)
{ VECTOR_MAP_UNARY(uint16_t, N, asuint16, value); }
+[__readNone]
matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<float16_t,R,C> value)
{ MATRIX_MAP_UNARY(uint16_t, R, C, asuint16, value); }
@@ -1199,11 +1249,14 @@ matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<float16_t,R,C> va
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "float16_t(unpackHalf2x16($0).x)")
__target_intrinsic(cuda, "__ushort_as_half")
+[__readNone]
float16_t asfloat16(uint16_t value);
+[__readNone]
vector<float16_t,N> asfloat16<let N : int>(vector<uint16_t,N> value)
{ VECTOR_MAP_UNARY(float16_t, N, asfloat16, value); }
+[__readNone]
matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<uint16_t,R,C> value)
{ MATRIX_MAP_UNARY(float16_t, R, C, asfloat16, value); }
@@ -1211,16 +1264,17 @@ matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<uint16_t,R,C> v
__target_intrinsic(hlsl)
__target_intrinsic(cuda, "__half_as_short")
-[__unsafeForceInlineEarly] int16_t asint16(float16_t value) { return asuint16(value); }
-__target_intrinsic(hlsl) [__unsafeForceInlineEarly] vector<int16_t,N> asint16<let N : int>(vector<float16_t,N> value) { return asuint16(value); }
-__target_intrinsic(hlsl) [__unsafeForceInlineEarly] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<float16_t,R,C> value) { return asuint16(value); }
+[__unsafeForceInlineEarly][__readNone] int16_t asint16(float16_t value) { return asuint16(value); }
+__target_intrinsic(hlsl) [__unsafeForceInlineEarly][__readNone] vector<int16_t,N> asint16<let N : int>(vector<float16_t,N> value) { return asuint16(value); }
+__target_intrinsic(hlsl) [__unsafeForceInlineEarly][__readNone] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<float16_t,R,C> value) { return asuint16(value); }
__target_intrinsic(hlsl)
__target_intrinsic(cuda, "__short_as_half")
+[__readNone]
[__unsafeForceInlineEarly] float16_t asfloat16(int16_t value) { return asfloat16(asuint16(value)); }
-__target_intrinsic(hlsl) [__unsafeForceInlineEarly] vector<float16_t,N> asfloat16<let N : int>(vector<int16_t,N> value) { return asfloat16(asuint16(value)); }
-__target_intrinsic(hlsl) [__unsafeForceInlineEarly] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return asfloat16(asuint16(value)); }
+__target_intrinsic(hlsl) [__unsafeForceInlineEarly][__readNone] vector<float16_t,N> asfloat16<let N : int>(vector<int16_t,N> value) { return asfloat16(asuint16(value)); }
+__target_intrinsic(hlsl) [__unsafeForceInlineEarly][__readNone] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return asfloat16(asuint16(value)); }
// Inverse tangent (HLSL SM 1.0)
__generic<T : __BuiltinFloatingPointType>
@@ -1229,12 +1283,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_atan($0)")
__target_intrinsic(cpp, "$P_atan($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Atan _0")
+[__readNone]
T atan(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Atan _0")
+[__readNone]
vector<T, N> atan(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, atan, x);
@@ -1242,6 +1298,7 @@ vector<T, N> atan(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> atan(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, atan, x);
@@ -1253,12 +1310,14 @@ __target_intrinsic(glsl,"atan($0,$1)")
__target_intrinsic(cuda, "$P_atan2($0, $1)")
__target_intrinsic(cpp, "$P_atan2($0, $1)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Atan2 _0 _1")
+[__readNone]
T atan2(T y, T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl,"atan($0,$1)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Atan2 _0 _1")
+[__readNone]
vector<T, N> atan2(vector<T, N> y, vector<T, N> x)
{
VECTOR_MAP_BINARY(T, N, atan2, y, x);
@@ -1266,6 +1325,7 @@ vector<T, N> atan2(vector<T, N> y, vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x)
{
MATRIX_MAP_BINARY(T, N, M, atan2, y, x);
@@ -1278,12 +1338,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_ceil($0)")
__target_intrinsic(cpp, "$P_ceil($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Ceil _0")
+[__readNone]
T ceil(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Ceil _0")
+[__readNone]
vector<T, N> ceil(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, ceil, x);
@@ -1291,6 +1353,7 @@ vector<T, N> ceil(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> ceil(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, ceil, x);
@@ -1305,6 +1368,7 @@ __generic<T : __BuiltinIntegerType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FClamp, UClamp, SClamp) _0 _1 _2")
+[__readNone]
T clamp(T x, T minBound, T maxBound)
{
return min(max(x, minBound), maxBound);
@@ -1314,6 +1378,7 @@ __generic<T : __BuiltinIntegerType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FClamp, UClamp, SClamp) _0 _1 _2")
+[__readNone]
vector<T, N> clamp(vector<T, N> x, vector<T, N> minBound, vector<T, N> maxBound)
{
return min(max(x, minBound), maxBound);
@@ -1321,6 +1386,7 @@ vector<T, N> clamp(vector<T, N> x, vector<T, N> minBound, vector<T, N> maxBound)
__generic<T : __BuiltinIntegerType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> minBound, matrix<T,N,M> maxBound)
{
return min(max(x, minBound), maxBound);
@@ -1330,6 +1396,7 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FClamp, UClamp, SClamp) _0 _1 _2")
+[__readNone]
T clamp(T x, T minBound, T maxBound)
{
return min(max(x, minBound), maxBound);
@@ -1339,6 +1406,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FClamp, UClamp, SClamp) _0 _1 _2")
+[__readNone]
vector<T, N> clamp(vector<T, N> x, vector<T, N> minBound, vector<T, N> maxBound)
{
return min(max(x, minBound), maxBound);
@@ -1346,6 +1414,7 @@ vector<T, N> clamp(vector<T, N> x, vector<T, N> minBound, vector<T, N> maxBound)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> minBound, matrix<T,N,M> maxBound)
{
return min(max(x, minBound), maxBound);
@@ -1354,6 +1423,7 @@ matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> minBound, matrix<T,N,M> maxBo
// Clip (discard) fragment conditionally
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
+[__readNone]
void clip(T x)
{
if(x < T(0)) discard;
@@ -1361,6 +1431,7 @@ void clip(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
+[__readNone]
void clip(vector<T,N> x)
{
if(any(x < T(0))) discard;
@@ -1368,6 +1439,7 @@ void clip(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
void clip(matrix<T,N,M> x)
{
if(any(x < T(0))) discard;
@@ -1380,12 +1452,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_cos($0)")
__target_intrinsic(cpp, "$P_cos($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cos _0")
+[__readNone]
T cos(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cos _0")
+[__readNone]
vector<T, N> cos(vector<T, N> x)
{
VECTOR_MAP_UNARY(T,N, cos, x);
@@ -1393,6 +1467,7 @@ vector<T, N> cos(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> cos(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, cos, x);
@@ -1405,12 +1480,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_cosh($0)")
__target_intrinsic(cpp, "$P_cosh($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cosh _0")
+[__readNone]
T cosh(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cosh _0")
+[__readNone]
vector<T,N> cosh(vector<T,N> x)
{
VECTOR_MAP_UNARY(T,N, cosh, x);
@@ -1418,6 +1495,7 @@ vector<T,N> cosh(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> cosh(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, cosh, x);
@@ -1428,6 +1506,7 @@ __target_intrinsic(hlsl)
__target_intrinsic(glsl, "bitCount")
__target_intrinsic(cuda, "$P_countbits($0)")
__target_intrinsic(cpp, "$P_countbits($0)")
+[__readNone]
uint countbits(uint value);
// Cross product
@@ -1436,6 +1515,7 @@ __generic<T : __BuiltinArithmeticType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Cross _0 _1")
+[__readNone]
vector<T,3> cross(vector<T,3> left, vector<T,3> right)
{
return vector<T,3>(
@@ -1446,6 +1526,7 @@ vector<T,3> cross(vector<T,3> left, vector<T,3> right)
// Convert encoded color
__target_intrinsic(hlsl)
+[__readNone]
int4 D3DCOLORtoUBYTE4(float4 color)
{
let scaled = color.zyxw * 255.001999f;
@@ -1455,11 +1536,13 @@ int4 D3DCOLORtoUBYTE4(float4 color)
// Partial-difference derivatives
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(glsl, dFdx)
+[__readNone]
T ddx(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, dFdx)
+[__readNone]
vector<T, N> ddx(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, ddx, x);
@@ -1467,6 +1550,7 @@ vector<T, N> ddx(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> ddx(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, ddx, x);
@@ -1476,12 +1560,14 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__glsl_extension(GL_ARB_derivative_control)
__target_intrinsic(glsl, dFdxCoarse)
+[__readNone]
T ddx_coarse(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__glsl_extension(GL_ARB_derivative_control)
__target_intrinsic(glsl, dFdxCoarse)
+[__readNone]
vector<T, N> ddx_coarse(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, ddx_coarse, x);
@@ -1489,6 +1575,7 @@ vector<T, N> ddx_coarse(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> ddx_coarse(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, ddx_coarse, x);
@@ -1498,12 +1585,14 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__glsl_extension(GL_ARB_derivative_control)
__target_intrinsic(glsl, dFdxFine)
+[__readNone]
T ddx_fine(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__glsl_extension(GL_ARB_derivative_control)
__target_intrinsic(glsl, dFdxFine)
+[__readNone]
vector<T, N> ddx_fine(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, ddx_fine, x);
@@ -1511,6 +1600,7 @@ vector<T, N> ddx_fine(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> ddx_fine(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, ddx_fine, x);
@@ -1519,11 +1609,13 @@ matrix<T, N, M> ddx_fine(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, dFdy)
+[__readNone]
T ddy(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, dFdy)
+[__readNone]
vector<T, N> ddy(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, ddy, x);
@@ -1531,6 +1623,7 @@ vector<T, N> ddy(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> ddy(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, ddy, x);
@@ -1539,12 +1632,14 @@ matrix<T, N, M> ddy(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
__glsl_extension(GL_ARB_derivative_control)
__target_intrinsic(glsl, dFdyCoarse)
+[__readNone]
T ddy_coarse(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__glsl_extension(GL_ARB_derivative_control)
__target_intrinsic(glsl, dFdyCoarse)
+[__readNone]
vector<T, N> ddy_coarse(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, ddy_coarse, x);
@@ -1552,6 +1647,7 @@ vector<T, N> ddy_coarse(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> ddy_coarse(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, ddy_coarse, x);
@@ -1561,12 +1657,14 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__glsl_extension(GL_ARB_derivative_control)
__target_intrinsic(glsl, dFdyFine)
+[__readNone]
T ddy_fine(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__glsl_extension(GL_ARB_derivative_control)
__target_intrinsic(glsl, dFdyFine)
+[__readNone]
vector<T, N> ddy_fine(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, ddy_fine, x);
@@ -1574,6 +1672,7 @@ vector<T, N> ddy_fine(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> ddy_fine(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, ddy_fine, x);
@@ -1586,6 +1685,7 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Degrees _0")
+[__readNone]
T degrees(T x)
{
return x * (T(180) / T.getPi());
@@ -1595,6 +1695,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Degrees _0")
+[__readNone]
vector<T, N> degrees(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, degrees, x);
@@ -1602,6 +1703,7 @@ vector<T, N> degrees(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> degrees(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, degrees, x);
@@ -1613,6 +1715,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Determinant _0")
+[__readNone]
T determinant(matrix<T,N,N> m);
// Barrier for device memory
@@ -1630,6 +1733,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Distance _0 _1")
+[__readNone]
T distance(vector<T, N> x, vector<T, N> y)
{
return length(x - y);
@@ -1640,6 +1744,7 @@ T distance(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
+[__readNone]
T dot(vector<T, N> x, vector<T, N> y)
{
T result = T(0);
@@ -1650,6 +1755,7 @@ T dot(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinIntegerType, let N : int>
__target_intrinsic(hlsl)
+[__readNone]
T dot(vector<T, N> x, vector<T, N> y)
{
T result = T(0);
@@ -1682,15 +1788,18 @@ RWStructuredBuffer<T> __getEquivalentStructuredBuffer<T>(RWByteAddressBuffer b);
__generic<T : __BuiltinArithmeticType>
__target_intrinsic(glsl, interpolateAtCentroid)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InterpolateAtCentroid _0")
+[__readNone]
T EvaluateAttributeAtCentroid(T x);
__generic<T : __BuiltinArithmeticType, let N : int>
__target_intrinsic(glsl, interpolateAtCentroid)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InterpolateAtCentroid _0")
+[__readNone]
vector<T,N> EvaluateAttributeAtCentroid(vector<T,N> x);
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
__target_intrinsic(glsl, interpolateAtCentroid)
+[__readNone]
matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, EvaluateAttributeAtCentroid, x);
@@ -1699,15 +1808,18 @@ matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x)
__generic<T : __BuiltinArithmeticType>
__target_intrinsic(glsl, "interpolateAtSample($0, int($1))")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InterpolateAtSample _0 _1")
+[__readNone]
T EvaluateAttributeAtSample(T x, uint sampleindex);
__generic<T : __BuiltinArithmeticType, let N : int>
__target_intrinsic(glsl, "interpolateAtSample($0, int($1))")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InterpolateAtSample _0 _1")
+[__readNone]
vector<T,N> EvaluateAttributeAtSample(vector<T,N> x, uint sampleindex);
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
__target_intrinsic(glsl, "interpolateAtSample($0, int($1))")
+[__readNone]
matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex)
{
matrix<T,N,M> result;
@@ -1721,15 +1833,18 @@ matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex)
__generic<T : __BuiltinArithmeticType>
__target_intrinsic(glsl, "interpolateAtOffset($0, vec2($1) / 16.0f)")
__target_intrinsic(spirv_direct, "%foffset = OpConvertSToF _type(float2) resultId _1; %offsetdiv16 = 136 _type(float2) resultId %foffset const(float2, 16.0, 16.0); OpExtInst resultType resultId glsl450 78 _0 %offsetdiv16")
+[__readNone]
T EvaluateAttributeSnapped(T x, int2 offset);
__generic<T : __BuiltinArithmeticType, let N : int>
__target_intrinsic(glsl, "interpolateAtOffset($0, vec2($1) / 16.0f)")
__target_intrinsic(spirv_direct, "%foffset = OpConvertSToF _type(float2) resultId _1; %offsetdiv16 = 136 _type(float2) resultId %foffset const(float2, 16.0, 16.0); OpExtInst resultType resultId glsl450 78 _0 %offsetdiv16")
+[__readNone]
vector<T,N> EvaluateAttributeSnapped(vector<T,N> x, int2 offset);
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
__target_intrinsic(glsl, "interpolateAtOffset($0, vec2($1) / 16.0f)")
+[__readNone]
matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset)
{
matrix<T,N,M> result;
@@ -1748,12 +1863,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_exp($0)")
__target_intrinsic(cpp, "$P_exp($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Exp _0")
+[__readNone]
T exp(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Exp _0")
+[__readNone]
vector<T, N> exp(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, exp, x);
@@ -1761,6 +1878,7 @@ vector<T, N> exp(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> exp(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, exp, x);
@@ -1774,12 +1892,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_exp2($0)")
__target_intrinsic(cpp, "$P_exp2($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Exp2 _0")
+[__readNone]
T exp2(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Exp2 _0")
+[__readNone]
vector<T,N> exp2(vector<T,N> x)
{
VECTOR_MAP_UNARY(T, N, exp2, x);
@@ -1787,6 +1907,7 @@ vector<T,N> exp2(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> exp2(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, exp2, x);
@@ -1799,10 +1920,12 @@ __glsl_version(420)
__target_intrinsic(hlsl)
__cuda_sm_version(6.0)
__target_intrinsic(cuda, "__half2float(__ushort_as_half($0))")
+[__readNone]
float f16tof32(uint value);
__generic<let N : int>
__target_intrinsic(hlsl)
+[__readNone]
vector<float, N> f16tof32(vector<uint, N> value)
{
VECTOR_MAP_UNARY(float, N, f16tof32, value);
@@ -1816,10 +1939,12 @@ __glsl_version(420)
__target_intrinsic(hlsl)
__cuda_sm_version(6.0)
__target_intrinsic(cuda, "__half_as_ushort(__float2half($0))")
+[__readNone]
uint f32tof16(float value);
__generic<let N : int>
__target_intrinsic(hlsl)
+[__readNone]
vector<uint, N> f32tof16(vector<float, N> value)
{
VECTOR_MAP_UNARY(uint, N, f32tof16, value);
@@ -1833,11 +1958,13 @@ vector<uint, N> f32tof16(vector<float, N> value)
__target_intrinsic(glsl, "unpackHalf2x16($0).x")
__target_intrinsic(cuda, "__half2float")
__glsl_version(420)
+[__readNone]
float f16tof32(float16_t value);
__generic<let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(cuda, "__half2float")
+[__readNone]
vector<float, N> f16tof32(vector<float16_t, N> value)
{
VECTOR_MAP_UNARY(float, N, f16tof32, value);
@@ -1847,10 +1974,12 @@ vector<float, N> f16tof32(vector<float16_t, N> value)
__target_intrinsic(glsl, "packHalf2x16(vec2($0,0.0))")
__glsl_version(420)
__target_intrinsic(cuda, "__float2half")
+[__readNone]
float16_t f32tof16_(float value);
__generic<let N : int>
__target_intrinsic(cuda, "__float2half")
+[__readNone]
vector<float16_t, N> f32tof16_(vector<float, N> value)
{
VECTOR_MAP_UNARY(uint, N, f32tof16, value);
@@ -1862,6 +1991,7 @@ vector<float16_t, N> f32tof16_(vector<float, N> value)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
+[__readNone]
vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng)
{
return dot(ng, i) < T(0.0f) ? n : -n;
@@ -1873,12 +2003,14 @@ __target_intrinsic(glsl,"findMSB")
__target_intrinsic(cuda, "$P_firstbithigh($0)")
__target_intrinsic(cpp, "$P_firstbithigh($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindSMsb _0")
+[__readNone]
int firstbithigh(int value);
__target_intrinsic(hlsl)
__target_intrinsic(glsl,"findMSB")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindSMsb _0")
__generic<let N : int>
+[__readNone]
vector<int, N> firstbithigh(vector<int, N> value)
{
VECTOR_MAP_UNARY(int, N, firstbithigh, value);
@@ -1889,12 +2021,14 @@ __target_intrinsic(glsl,"findMSB")
__target_intrinsic(cuda, "$P_firstbithigh($0)")
__target_intrinsic(cpp, "$P_firstbithigh($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindUMsb _0")
+[__readNone]
uint firstbithigh(uint value);
__target_intrinsic(hlsl)
__target_intrinsic(glsl,"findMSB")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindUMsb _0")
__generic<let N : int>
+[__readNone]
vector<uint,N> firstbithigh(vector<uint,N> value)
{
VECTOR_MAP_UNARY(uint, N, firstbithigh, value);
@@ -1906,12 +2040,14 @@ __target_intrinsic(glsl,"findLSB")
__target_intrinsic(cuda, "$P_firstbitlow($0)")
__target_intrinsic(cpp, "$P_firstbitlow($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindILsb _0")
+[__readNone]
int firstbitlow(int value);
__target_intrinsic(hlsl)
__target_intrinsic(glsl,"findLSB")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindILsb _0")
__generic<let N : int>
+[__readNone]
vector<int,N> firstbitlow(vector<int,N> value)
{
VECTOR_MAP_UNARY(int, N, firstbitlow, value);
@@ -1922,12 +2058,14 @@ __target_intrinsic(glsl,"findLSB")
__target_intrinsic(cuda, "$P_firstbitlow($0)")
__target_intrinsic(cpp, "$P_firstbitlow($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindILsb _0")
+[__readNone]
uint firstbitlow(uint value);
__target_intrinsic(hlsl)
__target_intrinsic(glsl,"findLSB")
__generic<let N : int>
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FindILsb _0")
+[__readNone]
vector<uint,N> firstbitlow(vector<uint,N> value)
{
VECTOR_MAP_UNARY(uint, N, firstbitlow, value);
@@ -1941,12 +2079,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_floor($0)")
__target_intrinsic(cpp, "$P_floor($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Floor _0")
+[__readNone]
T floor(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Floor _0")
+[__readNone]
vector<T, N> floor(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, floor, x);
@@ -1954,6 +2094,7 @@ vector<T, N> floor(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> floor(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, floor, x);
@@ -1965,12 +2106,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_fma($0, $1, $2)")
__target_intrinsic(cpp, "$P_fma($0, $1, $2)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2")
+[__readNone]
double fma(double a, double b, double c);
__generic<let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2")
+[__readNone]
vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c)
{
VECTOR_MAP_TRINARY(double, N, fma, a, b, c);
@@ -1978,6 +2121,7 @@ vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N
__generic<let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<double, N, M> fma(matrix<double, N, M> a, matrix<double, N, M> b, matrix<double, N, M> c)
{
MATRIX_MAP_TRINARY(double, N, M, fma, a, b, c);
@@ -1988,6 +2132,7 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(cuda, "$P_fmod($0, $1)")
__target_intrinsic(cpp, "$P_fmod($0, $1)")
+[__readNone]
T fmod(T x, T y)
{
return x - y * trunc(x/y);
@@ -1995,6 +2140,7 @@ T fmod(T x, T y)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
+[__readNone]
vector<T, N> fmod(vector<T, N> x, vector<T, N> y)
{
VECTOR_MAP_BINARY(T, N, fmod, x, y);
@@ -2002,6 +2148,7 @@ vector<T, N> fmod(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> fmod(matrix<T, N, M> x, matrix<T, N, M> y)
{
MATRIX_MAP_BINARY(T, N, M, fmod, x, y);
@@ -2014,18 +2161,21 @@ __target_intrinsic(glsl, fract)
__target_intrinsic(cuda, "$P_frac($0)")
__target_intrinsic(cpp, "$P_frac($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fract _0")
+[__readNone]
T frac(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, fract)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fract _0")
+[__readNone]
vector<T, N> frac(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, frac, x);
}
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
+[__readNone]
matrix<T, N, M> frac(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, frac, x);
@@ -2036,12 +2186,14 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Frexp _0 _1")
+[__readNone]
T frexp(T x, out T exp);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Frexp _0 _1")
+[__readNone]
vector<T, N> frexp(vector<T, N> x, out vector<T, N> exp)
{
VECTOR_MAP_BINARY(T, N, frexp, x, exp);
@@ -2049,6 +2201,7 @@ vector<T, N> frexp(vector<T, N> x, out vector<T, N> exp)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> frexp(matrix<T, N, M> x, out matrix<T, N, M> exp)
{
MATRIX_MAP_BINARY(T, N, M, frexp, x, exp);
@@ -2056,11 +2209,13 @@ matrix<T, N, M> frexp(matrix<T, N, M> x, out matrix<T, N, M> exp)
// Texture filter width
__generic<T : __BuiltinFloatingPointType>
+[__readNone]
T fwidth(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
+[__readNone]
vector<T, N> fwidth(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, fwidth, x);
@@ -2068,6 +2223,7 @@ vector<T, N> fwidth(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> fwidth(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, fwidth, x);
@@ -2141,9 +2297,11 @@ matrix<T,N,M> GetAttributeAtVertex(matrix<T,N,M> attribute, uint vertexIndex);
// Get number of samples in render target
+[__readNone]
uint GetRenderTargetSampleCount();
// Get position of given sample
+[__readNone]
float2 GetRenderTargetSamplePosition(int Index);
// Group memory barrier
@@ -2284,6 +2442,7 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(cuda, "$P_isfinite($0)")
__target_intrinsic(cpp, "$P_isfinite($0)")
+[__readNone]
bool isfinite(T x)
{
return !(isinf(x) || isnan(x));
@@ -2291,6 +2450,7 @@ bool isfinite(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
+[__readNone]
vector<bool, N> isfinite(vector<T, N> x)
{
VECTOR_MAP_UNARY(bool, N, isfinite, x);
@@ -2298,6 +2458,7 @@ vector<bool, N> isfinite(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<bool, N, M> isfinite(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(bool, N, M, isfinite, x);
@@ -2309,11 +2470,13 @@ __target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_isinf($0)")
__target_intrinsic(cpp, "$P_isinf($0)")
+[__readNone]
bool isinf(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
+[__readNone]
vector<bool, N> isinf(vector<T, N> x)
{
VECTOR_MAP_UNARY(bool, N, isinf, x);
@@ -2321,6 +2484,7 @@ vector<bool, N> isinf(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<bool, N, M> isinf(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(bool, N, M, isinf, x);
@@ -2332,11 +2496,13 @@ __target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_isnan($0)")
__target_intrinsic(cpp, "$P_isnan($0)")
+[__readNone]
bool isnan(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
+[__readNone]
vector<bool, N> isnan(vector<T, N> x)
{
VECTOR_MAP_UNARY(bool, N, isnan, x);
@@ -2344,6 +2510,7 @@ vector<bool, N> isnan(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<bool, N, M> isnan(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(bool, N, M, isnan, x);
@@ -2354,6 +2521,7 @@ matrix<bool, N, M> isnan(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Ldexp _0 _1")
+[__readNone]
T ldexp(T x, T exp)
{
return x * exp2(exp);
@@ -2362,6 +2530,7 @@ T ldexp(T x, T exp)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Ldexp _0 _1")
+[__readNone]
vector<T, N> ldexp(vector<T, N> x, vector<T, N> exp)
{
return x * exp2(exp);
@@ -2369,6 +2538,7 @@ vector<T, N> ldexp(vector<T, N> x, vector<T, N> exp)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> ldexp(matrix<T, N, M> x, matrix<T, N, M> exp)
{
MATRIX_MAP_BINARY(T, N, M, ldexp, x, exp);
@@ -2379,6 +2549,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Length _0")
+[__readNone]
T length(vector<T, N> x)
{
return sqrt(dot(x, x));
@@ -2389,6 +2560,7 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, mix)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FMix _0 _1 _2")
+[__readNone]
T lerp(T x, T y, T s)
{
return x * (T(1.0f) - s) + y * s;
@@ -2398,6 +2570,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, mix)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 FMix _0 _1 _2")
+[__readNone]
vector<T, N> lerp(vector<T, N> x, vector<T, N> y, vector<T, N> s)
{
return x * (T(1.0f) - s) + y * s;
@@ -2405,6 +2578,7 @@ vector<T, N> lerp(vector<T, N> x, vector<T, N> y, vector<T, N> s)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s)
{
MATRIX_MAP_TRINARY(T, N, M, lerp, x, y, s);
@@ -2412,6 +2586,7 @@ matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s)
// Legacy lighting function (obsolete)
__target_intrinsic(hlsl)
+[__readNone]
float4 lit(float n_dot_l, float n_dot_h, float m)
{
let ambient = 1.0f;
@@ -2427,12 +2602,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_log($0)")
__target_intrinsic(cpp, "$P_log($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Log _0")
+[__readNone]
T log(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Log _0")
+[__readNone]
vector<T, N> log(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, log, x);
@@ -2440,6 +2617,7 @@ vector<T, N> log(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> log(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, log, x);
@@ -2452,12 +2630,14 @@ __target_intrinsic(glsl, "(log( $0 ) * $S0( 0.43429448190325182765112891891661)
__target_intrinsic(cuda, "$P_log10($0)")
__target_intrinsic(cpp, "$P_log10($0)")
__target_intrinsic(spirv_direct, "%baseElog = OpExtInst resultType resultId glsl450 Log _0; OpFMul resultType resultId _0 %baseElog const(_p,0.43429448190325182765112891891661)")
+[__readNone]
T log10(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "(log( $0 ) * $S0(0.43429448190325182765112891891661) )" )
__target_intrinsic(spirv_direct, "%baseElog = OpExtInst resultType resultId glsl450 Log _0; OpVectorTimesScalar resultType resultId _0 %baseElog const(_p,0.43429448190325182765112891891661)")
+[__readNone]
vector<T,N> log10(vector<T,N> x)
{
VECTOR_MAP_UNARY(T, N, log10, x);
@@ -2465,6 +2645,7 @@ vector<T,N> log10(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> log10(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, log10, x);
@@ -2477,12 +2658,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_log2($0)")
__target_intrinsic(cpp, "$P_log2($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Log2 _0")
+[__readNone]
T log2(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Log2 _0")
+[__readNone]
vector<T,N> log2(vector<T,N> x)
{
VECTOR_MAP_UNARY(T, N, log2, x);
@@ -2490,6 +2673,7 @@ vector<T,N> log2(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> log2(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, log2, x);
@@ -2503,12 +2687,14 @@ __target_intrinsic(glsl, fma)
__target_intrinsic(cuda, "$P_fma($0, $1, $2)")
__target_intrinsic(cpp, "$P_fma($0, $1, $2)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2")
+[__readNone]
T mad(T mvalue, T avalue, T bvalue);
__generic<T : __BuiltinArithmeticType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, fma)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Fma _0 _1 _2")
+[__readNone]
vector<T, N> mad(vector<T, N> mvalue, vector<T, N> avalue, vector<T, N> bvalue)
{
VECTOR_MAP_TRINARY(T, N, mad, mvalue, avalue, bvalue);
@@ -2516,6 +2702,7 @@ vector<T, N> mad(vector<T, N> mvalue, vector<T, N> avalue, vector<T, N> bvalue)
__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> mad(matrix<T, N, M> mvalue, matrix<T, N, M> avalue, matrix<T, N, M> bvalue)
{
MATRIX_MAP_TRINARY(T, N, M, mad, mvalue, avalue, bvalue);
@@ -2528,6 +2715,7 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_max($0, $1)")
__target_intrinsic(cpp, "$P_max($0, $1)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMax, UMax, SMax) _0")
+[__readNone]
T max(T x, T y);
// Note: a stdlib implementation of `max` (or `min`) will require splitting
// floating-point and integer cases apart, because the floating-point
@@ -2538,6 +2726,7 @@ __generic<T : __BuiltinIntegerType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMax, UMax, SMax) _0")
+[__readNone]
vector<T, N> max(vector<T, N> x, vector<T, N> y)
{
VECTOR_MAP_BINARY(T, N, max, x, y);
@@ -2545,6 +2734,7 @@ vector<T, N> max(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinIntegerType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y)
{
MATRIX_MAP_BINARY(T, N, M, max, x, y);
@@ -2556,12 +2746,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_max($0, $1)")
__target_intrinsic(cpp, "$P_max($0, $1)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMax, UMax, SMax) _0")
+[__readNone]
T max(T x, T y);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMax, UMax, SMax) _0")
+[__readNone]
vector<T, N> max(vector<T, N> x, vector<T, N> y)
{
VECTOR_MAP_BINARY(T, N, max, x, y);
@@ -2569,6 +2761,7 @@ vector<T, N> max(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> max(matrix<T, N, M> x, matrix<T, N, M> y)
{
MATRIX_MAP_BINARY(T, N, M, max, x, y);
@@ -2581,12 +2774,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_min($0, $1)")
__target_intrinsic(cpp, "$P_min($0, $1)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0")
+[__readNone]
T min(T x, T y);
__generic<T : __BuiltinIntegerType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0")
+[__readNone]
vector<T,N> min(vector<T,N> x, vector<T,N> y)
{
VECTOR_MAP_BINARY(T, N, min, x, y);
@@ -2594,6 +2789,7 @@ vector<T,N> min(vector<T,N> x, vector<T,N> y)
__generic<T : __BuiltinIntegerType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y)
{
MATRIX_MAP_BINARY(T, N, M, min, x, y);
@@ -2605,12 +2801,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_min($0, $1)")
__target_intrinsic(cpp, "$P_min($0, $1)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0")
+[__readNone]
T min(T x, T y);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fus(FMin, UMin, SMin) _0")
+[__readNone]
vector<T,N> min(vector<T,N> x, vector<T,N> y)
{
VECTOR_MAP_BINARY(T, N, min, x, y);
@@ -2618,6 +2816,7 @@ vector<T,N> min(vector<T,N> x, vector<T,N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y)
{
MATRIX_MAP_BINARY(T, N, M, min, x, y);
@@ -2625,11 +2824,13 @@ matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y)
// split into integer and fractional parts (both with same sign)
__generic<T : __BuiltinFloatingPointType>
+[__readNone]
T modf(T x, out T ip);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
+[__readNone]
vector<T,N> modf(vector<T,N> x, out vector<T,N> ip)
{
VECTOR_MAP_BINARY(T, N, modf, x, ip);
@@ -2637,6 +2838,7 @@ vector<T,N> modf(vector<T,N> x, out vector<T,N> ip)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip)
{
MATRIX_MAP_BINARY(T, N, M, modf, x, ip);
@@ -2644,6 +2846,7 @@ matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip)
// msad4 (whatever that is)
__target_intrinsic(hlsl)
+[__readNone]
uint4 msad4(uint reference, uint2 source, uint4 accum)
{
int4 bytesRef = (reference >> uint4(24, 16, 8, 0)) & 0xFF;
@@ -2665,36 +2868,43 @@ uint4 msad4(uint reference, uint2 source, uint4 accum)
// scalar-scalar
__generic<T : __BuiltinArithmeticType>
__intrinsic_op($(kIROp_Mul))
+[__readNone]
T mul(T x, T y);
// scalar-vector and vector-scalar
__generic<T : __BuiltinArithmeticType, let N : int>
__intrinsic_op($(kIROp_Mul))
+[__readNone]
vector<T, N> mul(vector<T, N> x, T y);
__generic<T : __BuiltinArithmeticType, let N : int>
__intrinsic_op($(kIROp_Mul))
+[__readNone]
vector<T, N> mul(T x, vector<T, N> y);
// scalar-matrix and matrix-scalar
__generic<T : __BuiltinArithmeticType, let N : int, let M :int>
__intrinsic_op($(kIROp_Mul))
+[__readNone]
matrix<T, N, M> mul(matrix<T, N, M> x, T y);
__generic<T : __BuiltinArithmeticType, let N : int, let M :int>
__intrinsic_op($(kIROp_Mul))
+[__readNone]
matrix<T, N, M> mul(T x, matrix<T, N, M> y);
// vector-vector (dot product)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "dot")
+[__readNone]
T mul(vector<T, N> x, vector<T, N> y)
{
return dot(x, y);
}
__generic<T : __BuiltinIntegerType, let N : int>
__target_intrinsic(hlsl)
+[__readNone]
T mul(vector<T, N> x, vector<T, N> y)
{
return dot(x, y);
@@ -2704,6 +2914,7 @@ T mul(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right)
{
vector<T,M> result;
@@ -2721,6 +2932,7 @@ vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right)
__generic<T : __BuiltinIntegerType, let N : int, let M : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right)
{
vector<T,M> result;
@@ -2738,6 +2950,7 @@ vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right)
__generic<T : __BuiltinLogicalType, let N : int, let M : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right)
{
vector<T,M> result;
@@ -2757,6 +2970,7 @@ vector<T, M> mul(vector<T, N> left, matrix<T, N, M> right)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right)
{
vector<T,N> result;
@@ -2774,6 +2988,7 @@ vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right)
__generic<T : __BuiltinIntegerType, let N : int, let M : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right)
{
vector<T,N> result;
@@ -2791,6 +3006,7 @@ vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right)
__generic<T : __BuiltinLogicalType, let N : int, let M : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right)
{
vector<T,N> result;
@@ -2810,6 +3026,7 @@ vector<T,N> mul(matrix<T,N,M> left, vector<T,M> right)
__generic<T : __BuiltinFloatingPointType, let R : int, let N : int, let C : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left)
{
matrix<T,R,C> result;
@@ -2828,6 +3045,7 @@ matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left)
__generic<T : __BuiltinIntegerType, let R : int, let N : int, let C : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left)
{
matrix<T,R,C> result;
@@ -2846,6 +3064,7 @@ matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left)
__generic<T : __BuiltinLogicalType, let R : int, let N : int, let C : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "($1 * $0)")
+[__readNone]
matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left)
{
matrix<T,R,C> result;
@@ -2864,11 +3083,13 @@ matrix<T,R,C> mul(matrix<T,R,N> right, matrix<T,N,C> left)
// noise (deprecated)
+[__readNone]
float noise(float x)
{
return 0;
}
+[__readNone]
__generic<let N : int> float noise(vector<float, N> x)
{
return 0;
@@ -2915,6 +3136,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Normalize _0")
+[__readNone]
vector<T,N> normalize(vector<T,N> x)
{
return x / length(x);
@@ -2927,12 +3149,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_pow($0, $1)")
__target_intrinsic(cpp, "$P_pow($0, $1)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Pow _0 _1")
+[__readNone]
T pow(T x, T y);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Pow _0 _1")
+[__readNone]
vector<T, N> pow(vector<T, N> x, vector<T, N> y)
{
VECTOR_MAP_BINARY(T, N, pow, x, y);
@@ -2940,6 +3164,7 @@ vector<T, N> pow(vector<T, N> x, vector<T, N> y)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y)
{
MATRIX_MAP_BINARY(T, N, M, pow, x, y);
@@ -3087,6 +3312,7 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Radians _0")
+[__readNone]
T radians(T x)
{
return x * (T.getPi() / T(180.0f));
@@ -3096,6 +3322,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Radians _0")
+[__readNone]
vector<T, N> radians(vector<T, N> x)
{
return x * (T.getPi() / T(180.0f));
@@ -3103,6 +3330,7 @@ vector<T, N> radians(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> radians(matrix<T, N, M> x)
{
return x * (T.getPi() / T(180.0f));
@@ -3111,6 +3339,7 @@ matrix<T, N, M> radians(matrix<T, N, M> x)
// Approximate reciprocal
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
+[__readNone]
T rcp(T x)
{
return T(1.0) / x;
@@ -3118,6 +3347,7 @@ T rcp(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
+[__readNone]
vector<T, N> rcp(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, rcp, x);
@@ -3125,6 +3355,7 @@ vector<T, N> rcp(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> rcp(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, rcp, x);
@@ -3135,6 +3366,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Reflect _0 _1")
+[__readNone]
vector<T,N> reflect(vector<T,N> i, vector<T,N> n)
{
return i - T(2) * dot(n,i) * n;
@@ -3145,6 +3377,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Refract _0 _1 _2")
+[__readNone]
vector<T,N> refract(vector<T,N> i, vector<T,N> n, T eta)
{
let dotNI = dot(n,i);
@@ -3158,10 +3391,12 @@ __target_intrinsic(hlsl)
__target_intrinsic(glsl, "bitfieldReverse")
__target_intrinsic(cuda, "$P_reversebits($0)")
__target_intrinsic(cpp, "$P_reversebits($0)")
+[__readNone]
uint reversebits(uint value);
__target_intrinsic(glsl, "bitfieldReverse")
__generic<let N : int>
+[__readNone]
vector<uint, N> reversebits(vector<uint, N> value)
{
VECTOR_MAP_UNARY(uint, N, reversebits, value);
@@ -3174,12 +3409,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_round($0)")
__target_intrinsic(cpp, "$P_round($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Round _0")
+[__readNone]
T round(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Round _0")
+[__readNone]
vector<T, N> round(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, round, x);
@@ -3187,6 +3424,7 @@ vector<T, N> round(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> round(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, round, x);
@@ -3199,6 +3437,7 @@ __target_intrinsic(glsl, "inversesqrt($0)")
__target_intrinsic(cuda, "$P_rsqrt($0)")
__target_intrinsic(cpp, "$P_rsqrt($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InverseSqrt _0")
+[__readNone]
T rsqrt(T x)
{
return T(1.0) / sqrt(x);
@@ -3208,6 +3447,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "inversesqrt($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 InverseSqrt _0")
+[__readNone]
vector<T, N> rsqrt(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, rsqrt, x);
@@ -3215,6 +3455,7 @@ vector<T, N> rsqrt(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> rsqrt(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, rsqrt, x);
@@ -3224,6 +3465,7 @@ matrix<T, N, M> rsqrt(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
+[__readNone]
T saturate(T x)
{
return clamp<T>(x, T(0), T(1));
@@ -3231,6 +3473,7 @@ T saturate(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
+[__readNone]
vector<T,N> saturate(vector<T,N> x)
{
return clamp<T,N>(x,
@@ -3240,6 +3483,7 @@ vector<T,N> saturate(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> saturate(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, saturate, x);
@@ -3252,12 +3496,14 @@ __target_intrinsic(glsl, "int(sign($0))")
__target_intrinsic(cuda, "$P_sign($0)")
__target_intrinsic(cpp, "$P_sign($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FSign, SSign) _0")
+[__readNone]
int sign(T x);
__generic<T : __BuiltinSignedArithmeticType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, "ivec$N0(sign($0))")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 fi(FSign, SSign) _0")
+[__readNone]
vector<int, N> sign(vector<T, N> x)
{
VECTOR_MAP_UNARY(int, N, sign, x);
@@ -3265,6 +3511,7 @@ vector<int, N> sign(vector<T, N> x)
__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<int, N, M> sign(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(int, N, M, sign, x);
@@ -3279,12 +3526,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_sin($0)")
__target_intrinsic(cpp, "$P_sin($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sin _0")
+[__readNone]
T sin(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sin _0")
+[__readNone]
vector<T, N> sin(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, sin, x);
@@ -3292,6 +3541,7 @@ vector<T, N> sin(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> sin(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, sin, x);
@@ -3301,6 +3551,7 @@ matrix<T, N, M> sin(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(cuda, "$P_sincos($0, $1, $2)")
+[__readNone]
void sincos(T x, out T s, out T c)
{
s = sin(x);
@@ -3309,6 +3560,7 @@ void sincos(T x, out T s, out T c)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
+[__readNone]
void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c)
{
s = sin(x);
@@ -3317,6 +3569,7 @@ void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
void sincos(matrix<T,N,M> x, out matrix<T,N,M> s, out matrix<T,N,M> c)
{
s = sin(x);
@@ -3330,12 +3583,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_sinh($0)")
__target_intrinsic(cpp, "$P_sinh($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sinh _0")
+[__readNone]
T sinh(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sinh _0")
+[__readNone]
vector<T, N> sinh(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, sinh, x);
@@ -3343,6 +3598,7 @@ vector<T, N> sinh(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> sinh(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, sinh, x);
@@ -3353,6 +3609,7 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 SmoothStep _0 _1 _2")
+[__readNone]
T smoothstep(T min, T max, T x)
{
let t = saturate((x - min) / (max - min));
@@ -3363,6 +3620,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 SmoothStep _0 _1 _2")
+[__readNone]
vector<T, N> smoothstep(vector<T, N> min, vector<T, N> max, vector<T, N> x)
{
VECTOR_MAP_TRINARY(T, N, smoothstep, min, max, x);
@@ -3370,6 +3628,7 @@ vector<T, N> smoothstep(vector<T, N> min, vector<T, N> max, vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> smoothstep(matrix<T, N, M> min, matrix<T, N, M> max, matrix<T, N, M> x)
{
MATRIX_MAP_TRINARY(T, N, M, smoothstep, min, max, x);
@@ -3382,12 +3641,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_sqrt($0)")
__target_intrinsic(cpp, "$P_sqrt($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sqrt _0")
+[__readNone]
T sqrt(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Sqrt _0")
+[__readNone]
vector<T, N> sqrt(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, sqrt, x);
@@ -3395,6 +3656,7 @@ vector<T, N> sqrt(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> sqrt(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, sqrt, x);
@@ -3405,6 +3667,7 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Step _0 _1")
+[__readNone]
T step(T y, T x)
{
return x < y ? T(0.0f) : T(1.0f);
@@ -3414,6 +3677,7 @@ __generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Step _0 _1")
+[__readNone]
vector<T,N> step(vector<T,N> y, vector<T,N> x)
{
VECTOR_MAP_BINARY(T, N, step, y, x);
@@ -3421,6 +3685,7 @@ vector<T,N> step(vector<T,N> y, vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> step(matrix<T, N, M> y, matrix<T, N, M> x)
{
MATRIX_MAP_BINARY(T, N, M, step, y, x);
@@ -3433,12 +3698,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_tan($0)")
__target_intrinsic(cpp, "$P_tan($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Tan _0")
+[__readNone]
T tan(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Tan _0")
+[__readNone]
vector<T, N> tan(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, tan, x);
@@ -3446,6 +3713,7 @@ vector<T, N> tan(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> tan(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, tan, x);
@@ -3458,12 +3726,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_tanh($0)")
__target_intrinsic(cpp, "$P_tanh($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Tanh _0")
+[__readNone]
T tanh(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Tanh _0")
+[__readNone]
vector<T,N> tanh(vector<T,N> x)
{
VECTOR_MAP_UNARY(T, N, tanh, x);
@@ -3471,6 +3741,7 @@ vector<T,N> tanh(vector<T,N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T,N,M> tanh(matrix<T,N,M> x)
{
MATRIX_MAP_UNARY(T, N, M, tanh, x);
@@ -3480,6 +3751,7 @@ matrix<T,N,M> tanh(matrix<T,N,M> x)
__generic<T : __BuiltinType, let N : int, let M : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
+[__readNone]
matrix<T, M, N> transpose(matrix<T, N, M> x)
{
matrix<T,M,N> result;
@@ -3496,12 +3768,14 @@ __target_intrinsic(glsl)
__target_intrinsic(cuda, "$P_trunc($0)")
__target_intrinsic(cpp, "$P_trunc($0)")
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Trunc _0")
+[__readNone]
T trunc(T x);
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
__target_intrinsic(spirv_direct, "OpExtInst resultType resultId glsl450 Trunc _0")
+[__readNone]
vector<T, N> trunc(vector<T, N> x)
{
VECTOR_MAP_UNARY(T, N, trunc, x);
@@ -3509,6 +3783,7 @@ vector<T, N> trunc(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
+[__readNone]
matrix<T, N, M> trunc(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, trunc, x);
@@ -4779,6 +5054,7 @@ void __executeCallable(uint shaderIndex, int payloadLocation);
__generic<Payload>
__target_intrinsic(__glslRayTracing, "$XC")
[__readNone]
+[__AlwaysFoldIntoUseSiteAttribute]
int __callablePayloadLocation(__ref Payload payload);
// Now we provide a hard-coded definition of `CallShader()` for GLSL-based
@@ -4834,6 +5110,7 @@ void __traceRay(
__generic<Payload>
__target_intrinsic(__glslRayTracing, "$XP")
[__readNone]
+[__AlwaysFoldIntoUseSiteAttribute]
int __rayPayloadLocation(__ref Payload payload);
__generic<payload_t>
@@ -5677,6 +5954,7 @@ Ref<T> __hitObjectAttributes<T>()
__generic<Attributes>
__target_intrinsic(__glslRayTracing, "$XH")
[__readNone]
+[__AlwaysFoldIntoUseSiteAttribute]
int __hitObjectAttributesLocation(__ref Attributes attributes);
/// Immutable data type representing a ray hit or a miss. Can be used to invoke hit or miss shading,
diff --git a/source/slang/slang-ast-modifier.h b/source/slang/slang-ast-modifier.h
index 99e221b1e..6ac464784 100644
--- a/source/slang/slang-ast-modifier.h
+++ b/source/slang/slang-ast-modifier.h
@@ -1083,6 +1083,14 @@ class RequiresNVAPIAttribute : public Attribute
SLANG_AST_CLASS(RequiresNVAPIAttribute)
};
+
+ /// A `[__AlwaysFoldIntoUseSite]` attribute indicates that the calls into the modified
+ /// function should always be folded into use sites during source emit.
+class AlwaysFoldIntoUseSiteAttribute :public Attribute
+{
+ SLANG_AST_CLASS(AlwaysFoldIntoUseSiteAttribute)
+};
+
/// The `[ForwardDifferentiable]` attribute indicates that a function can be forward-differentiated.
class ForwardDifferentiableAttribute : public DifferentiableAttribute
{
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index c664449e5..7840dc450 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -1244,14 +1244,24 @@ bool CLikeSourceEmitter::shouldFoldInstIntoUseSites(IRInst* inst)
return true;
}
+ // Always hold if inst is a call into an [__alwaysFoldIntoUseSite] function.
+ if (auto call = as<IRCall>(inst))
+ {
+ auto callee = call->getCallee();
+ if (getResolvedInstForDecorations(callee)->findDecoration<IRAlwaysFoldIntoUseSiteDecoration>())
+ {
+ return true;
+ }
+ }
+
// Having dealt with all of the cases where we *must* fold things
// above, we can now deal with the more general cases where we
// *should not* fold things.
-
// Don't fold something with no users:
if(!inst->hasUses())
return false;
+
// Don't fold something that has multiple users:
if(inst->hasMoreThanOneUse())
return false;
diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h
index ff229c38b..1cd2045c7 100644
--- a/source/slang/slang-emit-c-like.h
+++ b/source/slang/slang-emit-c-like.h
@@ -326,7 +326,7 @@ public:
void emitSimpleValue(IRInst* inst) { emitSimpleValueImpl(inst); }
- bool shouldFoldInstIntoUseSites(IRInst* inst);
+ virtual bool shouldFoldInstIntoUseSites(IRInst* inst);
void emitOperand(IRInst* inst, EmitOpInfo const& outerPrec) { emitOperandImpl(inst, outerPrec); }
diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp
index ba6b26ec6..795ec74b0 100644
--- a/source/slang/slang-emit-cpp.cpp
+++ b/source/slang/slang-emit-cpp.cpp
@@ -1557,6 +1557,46 @@ void CPPSourceEmitter::emitGlobalInstImpl(IRInst* inst)
}
}
+bool CPPSourceEmitter::shouldFoldInstIntoUseSites(IRInst* inst)
+{
+ bool result = Super::shouldFoldInstIntoUseSites(inst);
+ if (!result)
+ return result;
+ if (as<IRVectorType>(inst->getDataType()) || as<IRMatrixType>(inst->getDataType()))
+ {
+ // If a vector value is being used in a reshape/cast,
+ // we should not fold it because the implementation of cast will have multiple references to it.
+ for (auto use = inst->firstUse; use; use = use->nextUse)
+ {
+ switch (use->getUser()->getOp())
+ {
+ case kIROp_MatrixReshape:
+ case kIROp_VectorReshape:
+ case kIROp_IntCast:
+ case kIROp_FloatCast:
+ case kIROp_CastIntToFloat:
+ case kIROp_CastFloatToInt:
+ return false;
+ default:
+ break;
+ }
+ }
+ switch (inst->getOp())
+ {
+ case kIROp_MatrixReshape:
+ case kIROp_VectorReshape:
+ case kIROp_IntCast:
+ case kIROp_FloatCast:
+ case kIROp_CastIntToFloat:
+ case kIROp_CastFloatToInt:
+ return false;
+ default:
+ break;
+ }
+ }
+ return true;
+}
+
static bool _isExported(IRInst* inst)
{
for (auto decoration : inst->getDecorations())
diff --git a/source/slang/slang-emit-cpp.h b/source/slang/slang-emit-cpp.h
index 92780e0a4..71c382f87 100644
--- a/source/slang/slang-emit-cpp.h
+++ b/source/slang/slang-emit-cpp.h
@@ -71,6 +71,7 @@ protected:
virtual void emitFuncDecorationsImpl(IRFunc* func) SLANG_OVERRIDE;
virtual void emitVarDecorationsImpl(IRInst* var) SLANG_OVERRIDE;
virtual void emitGlobalInstImpl(IRInst* inst) SLANG_OVERRIDE;
+ virtual bool shouldFoldInstIntoUseSites(IRInst* inst) SLANG_OVERRIDE;
const UnownedStringSlice* getVectorElementNames(BaseType elemType, Index elemCount);
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index e2f00bf88..a25fae5ae 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -891,8 +891,8 @@ Result linkAndOptimizeIR(
}
}
- // Run a final round of DCE to clean up unused things after phi-elimination.
- eliminateDeadCode(irModule);
+ // Run a final round of simplifications to clean up unused things after phi-elimination.
+ simplifyNonSSAIR(irModule);
// We include one final step to (optionally) dump the IR and validate
// it after all of the optimization passes are complete. This should
diff --git a/source/slang/slang-ir-autodiff-unzip.cpp b/source/slang/slang-ir-autodiff-unzip.cpp
index 096751836..a05fe7044 100644
--- a/source/slang/slang-ir-autodiff-unzip.cpp
+++ b/source/slang/slang-ir-autodiff-unzip.cpp
@@ -559,6 +559,7 @@ IRFunc* DiffUnzipPass::extractPrimalFunc(
{
if (inst->getOp() == kIROp_Call)
{
+ // The primal calls should be marked as no side effect so they can be DCE'd if possible.
builder.addSimpleDecoration<IRNoSideEffectDecoration>(inst);
}
}
diff --git a/source/slang/slang-ir-autodiff.h b/source/slang/slang-ir-autodiff.h
index fa01d50ae..a4eb94461 100644
--- a/source/slang/slang-ir-autodiff.h
+++ b/source/slang/slang-ir-autodiff.h
@@ -212,20 +212,12 @@ struct DifferentiableTypeConformanceContext
IRInst* getZeroMethodForType(IRBuilder* builder, IRType* origType)
{
auto result = lookUpInterfaceMethod(builder, origType, sharedContext->zeroMethodStructKey);
- if (result && !result->findDecoration<IRNoSideEffectDecoration>())
- {
- builder->addDecoration(result, kIROp_NoSideEffectDecoration);
- }
return result;
}
IRInst* getAddMethodForType(IRBuilder* builder, IRType* origType)
{
auto result = lookUpInterfaceMethod(builder, origType, sharedContext->addMethodStructKey);
- if (result && !result->findDecoration<IRNoSideEffectDecoration>())
- {
- builder->addDecoration(result, kIROp_NoSideEffectDecoration);
- }
return result;
}
};
diff --git a/source/slang/slang-ir-dce.cpp b/source/slang/slang-ir-dce.cpp
index 58c9b23f1..e5c9b1fdb 100644
--- a/source/slang/slang-ir-dce.cpp
+++ b/source/slang/slang-ir-dce.cpp
@@ -24,6 +24,11 @@ struct DeadCodeEliminationContext
// These uses will be replaced with `undefInst`.
IRInst* undefInst = nullptr;
+ // Track if we have removed any phi parameters.
+ // If so we need to rerun dce pass because after removing them
+ // there could be new DCE opportunities.
+ bool phiRemoved = false;
+
// Our overall process is going to be to determine
// which instructions in the module are "live"
// and then eliminate anything that wasn't found to
@@ -98,104 +103,115 @@ struct DeadCodeEliminationContext
bool processInst(IRInst* root)
{
- // First of all, we know that the root instruction
- // should be considered as live, because otherwise
- // we'd end up eliminating it, so that is a
- // good place to start.
- //
- markInstAsLive(root);
-
- // Ensure there is a global undef inst that is always alive.
- // This undef inst will be used to fill in weak-referencing uses
- // whose used value is marked as dead and eliminated.
- // We always make sure this undef inst is available to prevent
- // infiniate oscilating loops.
- markInstAsLive(getUndefInst());
-
- // Marking the module as live should have
- // seeded our work list, so we can now start
- // processing entries off of our work list
- // until it goes dry.
- //
- while (workList.getCount())
+ bool result = false;
+ for (;;)
{
- auto inst = workList.getLast();
- workList.removeLast();
+ liveInsts.Clear();
+ workList.clear();
- if (!isChildInstOf(inst, root))
- continue;
-
- // At this point we know that `inst` is live,
- // and we want to start considering which other
- // instructions must be live because of that
- // knowlege.
- //
- // A first easy case is that the parent (if any)
- // of a live instruction had better be live, or
- // else we might delete the parent, and
- // the child with it.
+ // First of all, we know that the root instruction
+ // should be considered as live, because otherwise
+ // we'd end up eliminating it, so that is a
+ // good place to start.
//
- markInstAsLive(inst->getParent());
-
- // Next the type of a live instruction, and all
- // of its operands must also be live, or else
- // we won't be able to compute its value.
+ markInstAsLive(root);
+
+ // Ensure there is a global undef inst that is always alive.
+ // This undef inst will be used to fill in weak-referencing uses
+ // whose used value is marked as dead and eliminated.
+ // We always make sure this undef inst is available to prevent
+ // infiniate oscilating loops.
+ markInstAsLive(getUndefInst());
+
+ // Marking the module as live should have
+ // seeded our work list, so we can now start
+ // processing entries off of our work list
+ // until it goes dry.
//
- markInstAsLive(inst->getFullType());
- UInt operandCount = inst->getOperandCount();
- for (UInt ii = 0; ii < operandCount; ++ii)
+ while (workList.getCount())
{
- // There are some type of operands that needs to be treated as
- // "weak" references -- they can never hold things alive, and
- // whenever we delete the referenced value, these operands needs
- // to be replaced with `undef`.
- if (!isWeakReferenceOperand(inst, ii))
- markInstAsLive(inst->getOperand(ii));
- }
+ auto inst = workList.getLast();
+ workList.removeLast();
+
+ if (!isChildInstOf(inst, root))
+ continue;
+
+ // At this point we know that `inst` is live,
+ // and we want to start considering which other
+ // instructions must be live because of that
+ // knowlege.
+ //
+ // A first easy case is that the parent (if any)
+ // of a live instruction had better be live, or
+ // else we might delete the parent, and
+ // the child with it.
+ //
+ markInstAsLive(inst->getParent());
+
+ // Next the type of a live instruction, and all
+ // of its operands must also be live, or else
+ // we won't be able to compute its value.
+ //
+ markInstAsLive(inst->getFullType());
+ UInt operandCount = inst->getOperandCount();
+ for (UInt ii = 0; ii < operandCount; ++ii)
+ {
+ // There are some type of operands that needs to be treated as
+ // "weak" references -- they can never hold things alive, and
+ // whenever we delete the referenced value, these operands needs
+ // to be replaced with `undef`.
+ if (!isWeakReferenceOperand(inst, ii))
+ markInstAsLive(inst->getOperand(ii));
+ }
- // Finally, we need to consider the children
- // and decorations of the instruction.
- //
- // Note that just because an instruction is
- // live doesn't mean its children must be, or
- // else we'd never eliminate *anything* (we
- // marked the whole module as live, and everything
- // is a transitive child of the module).
- //
- // Decorations, in contrast, are always live if their
- // parents are (because we don't want to silently drop
- // decorations). It is still important to *mark*
- // decorations as live, because they have operands,
- // and those operands need to be marked as live.
- // We will fold decorations into the same loop
- // as children for simplicity.
- //
- // To keep the code here simple, we'll defer the
- // decision of whether a child (or decoration)
- // should be live when its parent is to a subroutine.
- //
- for (auto child : inst->getDecorationsAndChildren())
- {
- if (shouldInstBeLiveIfParentIsLive(child))
+ // Finally, we need to consider the children
+ // and decorations of the instruction.
+ //
+ // Note that just because an instruction is
+ // live doesn't mean its children must be, or
+ // else we'd never eliminate *anything* (we
+ // marked the whole module as live, and everything
+ // is a transitive child of the module).
+ //
+ // Decorations, in contrast, are always live if their
+ // parents are (because we don't want to silently drop
+ // decorations). It is still important to *mark*
+ // decorations as live, because they have operands,
+ // and those operands need to be marked as live.
+ // We will fold decorations into the same loop
+ // as children for simplicity.
+ //
+ // To keep the code here simple, we'll defer the
+ // decision of whether a child (or decoration)
+ // should be live when its parent is to a subroutine.
+ //
+ for (auto child : inst->getDecorationsAndChildren())
{
- // In this case, we know `inst` is live and
- // its `child` should be live if its parent is,
- // so the `child` must be live too.
- //
- markInstAsLive(child);
+ if (shouldInstBeLiveIfParentIsLive(child))
+ {
+ // In this case, we know `inst` is live and
+ // its `child` should be live if its parent is,
+ // so the `child` must be live too.
+ //
+ markInstAsLive(child);
+ }
}
}
- }
- // If our work list runs dry, that means we've reached a steady
- // state where everything that is transitively relevant to
- // the "outputs" of the module has been marked as live.
- //
- // Now we can simply walk through all of our instructions
- // recursively and eliminate those that are "dead" by
- // virtue of not having been found live.
- //
- return eliminateDeadInstsRec(root);
+ // If our work list runs dry, that means we've reached a steady
+ // state where everything that is transitively relevant to
+ // the "outputs" of the module has been marked as live.
+ //
+ // Now we can simply walk through all of our instructions
+ // recursively and eliminate those that are "dead" by
+ // virtue of not having been found live.
+ //
+ phiRemoved = false;
+ result |= eliminateDeadInstsRec(root);
+ if (!phiRemoved)
+ break;
+ }
+ return result;
}
// Given the basic infrastructrure above, let's
@@ -207,6 +223,25 @@ struct DeadCodeEliminationContext
return processInst(module->getModuleInst());
}
+ void removePhiArgs(IRInst* phiParam)
+ {
+ auto block = cast<IRBlock>(phiParam->getParent());
+ UInt paramIndex = 0;
+ for (auto p = block->getFirstParam(); p; p = p->getNextParam())
+ {
+ if (p == phiParam)
+ break;
+ paramIndex++;
+ }
+ for (auto predBlock : block->getPredecessors())
+ {
+ auto termInst = as<IRUnconditionalBranch>(predBlock->getTerminator());
+ SLANG_ASSERT(paramIndex < termInst->getArgCount());
+ termInst->removeArgument(paramIndex);
+ }
+ phiRemoved = true;
+ }
+
bool eliminateDeadInstsRec(IRInst* inst)
{
bool changed = false;
@@ -226,6 +261,12 @@ struct DeadCodeEliminationContext
{
inst->replaceUsesWith(getUndefInst());
}
+
+ if (inst->getOp() == kIROp_Param)
+ {
+ // For Phi parameters, we need to update all branch arguments.
+ removePhiArgs(inst);
+ }
inst->removeAndDeallocate();
changed = true;
}
@@ -261,6 +302,16 @@ struct DeadCodeEliminationContext
}
};
+bool isFirstBlock(IRInst* inst)
+{
+ auto block = as<IRBlock>(inst);
+ if (!block)
+ return false;
+ if (!block->getParent())
+ return false;
+ return block->getParent()->getFirstBlock() == block;
+}
+
bool shouldInstBeLiveIfParentIsLive(IRInst* inst, IRDeadCodeEliminationOptions options)
{
// The main source of confusion/complexity here is that
@@ -275,7 +326,31 @@ bool shouldInstBeLiveIfParentIsLive(IRInst* inst, IRDeadCodeEliminationOptions o
// when it is executed, then we should keep it around.
//
if (inst->mightHaveSideEffects())
- return true;
+ {
+ // If the inst has side effect, we should keep it alive.
+ // An exception is if we have a call to a pure function
+ // that writes its output to a local variable, but we
+ // don't have any uses of that local variable.
+ auto call = as<IRCall>(inst);
+ if (!call)
+ return true;
+ if (!getResolvedInstForDecorations(call->getCallee())->findDecoration<IRReadNoneDecoration>())
+ return true;
+ auto parentFunc = getParentFunc(inst);
+ if (!parentFunc)
+ return true;
+ for (UInt i = 0; i < call->getArgCount(); i++)
+ {
+ auto arg = call->getArg(i);
+ if (getParentFunc(arg) != parentFunc)
+ return true;
+ if (arg->getOp() != kIROp_Var)
+ return true;
+ if (arg->hasMoreThanOneUse())
+ return true;
+ }
+ return false;
+ }
//
// The `mightHaveSideEffects` query is conservative, and will
// return `true` as its default mode, so once we are past that
@@ -352,17 +427,10 @@ bool shouldInstBeLiveIfParentIsLive(IRInst* inst, IRDeadCodeEliminationOptions o
switch (inst->getOp())
{
// Function parameters obviously shouldn't get eliminated,
- // even if nothing references them, and block parameters
- // (phi nodes) will be considered live when their block is,
- // just so that we don't have to deal with any complications
- // around re-writing the relevant inter-block argument passing.
- //
- // TODO: A smarter DCE pass could deal with this case more
- // carefully, or we could improve the interprocedural SCCP
- // pass to deal with block parameters instead.
+ // even if nothing references them.
//
case kIROp_Param:
- return true;
+ return isFirstBlock(inst->getParent());
// IR struct types and witness tables are currently kludged
// so that they have child instructions that represent their
diff --git a/source/slang/slang-ir-glsl-legalize.cpp b/source/slang/slang-ir-glsl-legalize.cpp
index e111a548b..9c16f40ac 100644
--- a/source/slang/slang-ir-glsl-legalize.cpp
+++ b/source/slang/slang-ir-glsl-legalize.cpp
@@ -2027,8 +2027,8 @@ void legalizeMeshOutputParam(
IRBuilderInsertLocScope locScope{builder};
builder->setInsertBefore(p);
- auto e = builder->emitElementAddress(meshOutputBlockType, blockParam, p->getIndex());
- auto a = builder->emitFieldAddress(builtin.type, e, builtin.key);
+ auto e = builder->emitElementAddress(builder->getPtrType(meshOutputBlockType), blockParam, p->getIndex());
+ auto a = builder->emitFieldAddress(builder->getPtrType(builtin.type), e, builtin.key);
p->replaceUsesWith(a);
});
diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h
index 4dea3985a..4b1037240 100644
--- a/source/slang/slang-ir-inst-defs.h
+++ b/source/slang/slang-ir-inst-defs.h
@@ -728,6 +728,9 @@ INST(HighLevelDeclDecoration, highLevelDecl, 1, 0)
/// Applie to an IR function and signals that inlining should not be performed unless unavoidable.
INST(NoInlineDecoration, noInline, 0, 0)
+ /// A call to the decorated function should always be folded into its use site.
+ INST(AlwaysFoldIntoUseSiteDecoration, alwaysFold, 0, 0)
+
INST(PayloadDecoration, payload, 0, 0)
/* Mesh Shader outputs */
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index fe20f17f5..f2e4e05d3 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -325,6 +325,7 @@ IR_SIMPLE_DECORATION(HLSLExportDecoration)
IR_SIMPLE_DECORATION(KeepAliveDecoration)
IR_SIMPLE_DECORATION(RequiresNVAPIDecoration)
IR_SIMPLE_DECORATION(NoInlineDecoration)
+IR_SIMPLE_DECORATION(AlwaysFoldIntoUseSiteDecoration)
struct IRNVAPIMagicDecoration : IRDecoration
{
@@ -1925,7 +1926,7 @@ struct IRUnconditionalBranch : IRTerminatorInst
UInt getArgCount();
IRUse* getArgs();
IRInst* getArg(UInt index);
-
+ void removeArgument(UInt index);
IR_PARENT_ISA(UnconditionalBranch);
};
@@ -1968,20 +1969,6 @@ struct IRConditionalBranch : IRTerminatorInst
IRBlock* getFalseBlock() { return (IRBlock*)falseBlock.get(); }
};
-// A conditional branch that represent the test inside a loop
-struct IRLoopTest : IRConditionalBranch
-{
-};
-
-// A conditional branch that represents a one-sided `if`:
-//
-// if( <condition> ) { <trueBlock> }
-// <falseBlock>
-struct IRIf : IRConditionalBranch
-{
- IRBlock* getAfterBlock() { return getFalseBlock(); }
-};
-
// A conditional branch that represents a two-sided `if`:
//
// if( <condition> ) { <trueBlock> }
@@ -3361,6 +3348,7 @@ public:
IRInst* emitBitOr(IRType* type, IRInst* left, IRInst* right);
IRInst* emitBitNot(IRType* type, IRInst* value);
IRInst* emitNeg(IRType* type, IRInst* value);
+ IRInst* emitNot(IRType* type, IRInst* value);
IRInst* emitAdd(IRType* type, IRInst* left, IRInst* right);
IRInst* emitSub(IRType* type, IRInst* left, IRInst* right);
diff --git a/source/slang/slang-ir-loop-unroll.cpp b/source/slang/slang-ir-loop-unroll.cpp
index 79b00f60a..2f689ebde 100644
--- a/source/slang/slang-ir-loop-unroll.cpp
+++ b/source/slang/slang-ir-loop-unroll.cpp
@@ -47,7 +47,7 @@ static bool _eliminateDeadBlocks(List<IRBlock*>& blocks, IRBlock* unreachableBlo
return changed;
}
-List<IRBlock*> _collectBlocksInLoop(Dictionary<IRBlock*, int>& blockOrdering, IRLoop* loopInst)
+List<IRBlock*> _collectBlocksInLoop(IRDominatorTree* dom, IRLoop* loopInst)
{
List<IRBlock*> loopBlocks;
HashSet<IRBlock*> loopBlocksSet;
@@ -58,7 +58,6 @@ List<IRBlock*> _collectBlocksInLoop(Dictionary<IRBlock*, int>& blockOrdering, IR
};
auto firstBlock = as<IRBlock>(loopInst->block.get());
auto breakBlock = as<IRBlock>(loopInst->breakBlock.get());
- auto breakBlockOrdering = blockOrdering[breakBlock].GetValue();
addBlock(firstBlock);
for (Index i = 0; i < loopBlocks.getCount(); i++)
@@ -68,18 +67,19 @@ List<IRBlock*> _collectBlocksInLoop(Dictionary<IRBlock*, int>& blockOrdering, IR
{
if (succ == breakBlock)
continue;
- auto successorOrdering = blockOrdering[block].GetValue();
- // The target must be post-dominated by the break block in order to be considered
- // the body of the loop.
- // Since we don't support arbitrary goto or multi-level continue, the simple
- // ordering comparison is sufficient to serve as a post-dominance check.
- if (successorOrdering < breakBlockOrdering)
+ if (dom->dominates(firstBlock, succ) && !dom->dominates(breakBlock, succ))
addBlock(succ);
}
}
return loopBlocks;
}
+List<IRBlock*> collectBlocksInLoop(IRGlobalValueWithCode* func, IRLoop* loopInst)
+{
+ auto dom = computeDominatorTree(func);
+ return _collectBlocksInLoop(dom, loopInst);
+}
+
static int _getLoopMaxIterationsToUnroll(IRLoop* loopInst)
{
static constexpr int kMaxIterationsToAttempt = 100;
@@ -483,15 +483,7 @@ bool unrollLoopsInFunc(
// Remove any continue jumps from the loop.
eliminateContinueBlocks(module, loop);
- auto postOrderReverseCFG = getPostorderOnReverseCFG(func);
- Dictionary<IRBlock*, int> blockOrdering;
-
- for (Index i = 0; i < postOrderReverseCFG.getCount(); i++)
- {
- blockOrdering[postOrderReverseCFG[i]] = (int)i;
- }
-
- auto blocks = _collectBlocksInLoop(blockOrdering, loop);
+ auto blocks = collectBlocksInLoop(func, loop);
auto loopLoc = loop->sourceLoc;
if (!_unrollLoop(module, loop, blocks))
{
diff --git a/source/slang/slang-ir-loop-unroll.h b/source/slang/slang-ir-loop-unroll.h
index d9c31e6be..6f7a41192 100644
--- a/source/slang/slang-ir-loop-unroll.h
+++ b/source/slang/slang-ir-loop-unroll.h
@@ -1,18 +1,22 @@
// slang-ir-loop-unroll.h
#pragma once
+#include "../core/slang-list.h"
+
namespace Slang
{
struct IRLoop;
struct IRGlobalValueWithCode;
class DiagnosticSink;
struct IRModule;
+ struct IRBlock;
// Return true if successfull, false if errors occurred.
bool unrollLoopsInFunc(IRModule* module, IRGlobalValueWithCode* func, DiagnosticSink* sink);
bool unrollLoopsInModule(IRModule* module, DiagnosticSink* sink);
+ List<IRBlock*> collectBlocksInLoop(IRGlobalValueWithCode* func, IRLoop* loop);
// Turn a loop with continue block into a loop with only back jumps and breaks.
// Each iteration will be wrapped in a breakable region, where everything before `continue`
diff --git a/source/slang/slang-ir-propagate-func-properties.cpp b/source/slang/slang-ir-propagate-func-properties.cpp
new file mode 100644
index 000000000..f98a77fc7
--- /dev/null
+++ b/source/slang/slang-ir-propagate-func-properties.cpp
@@ -0,0 +1,186 @@
+#include "slang-ir-propagate-func-properties.h"
+
+#include "slang-ir.h"
+#include "slang-ir-insts.h"
+#include "slang-ir-util.h"
+
+
+namespace Slang
+{
+bool propagateFuncProperties(IRModule* module)
+{
+ bool result = false;
+ List<IRFunc*> workList;
+ HashSet<IRFunc*> workListSet;
+
+ auto addToWorkList = [&](IRFunc* f)
+ {
+ if (workListSet.Add(f))
+ workList.add(f);
+ };
+ auto addCallersToWorkList = [&](IRFunc* f)
+ {
+ if (auto g = findOuterGeneric(f))
+ {
+ for (auto use = g->firstUse; use; use = use->nextUse)
+ {
+ if (use->getUser()->getOp() == kIROp_Specialize)
+ {
+ auto specialize = use->getUser();
+ for (auto iuse = specialize->firstUse; iuse; iuse = iuse->nextUse)
+ {
+ if (auto userFunc = getParentFunc(iuse->getUser()))
+ addToWorkList(userFunc);
+ }
+ }
+ }
+ return;
+ }
+ for (auto use = f->firstUse; use; use = use->nextUse)
+ {
+ if (use->getUser()->getOp() == kIROp_Call)
+ {
+ if (auto userFunc = getParentFunc(use->getUser()))
+ addToWorkList(userFunc);
+ }
+ }
+ };
+ for (;;)
+ {
+ bool changed = false;
+ workList.clear();
+ workListSet.Clear();
+
+ // Add side effect free functions and their transitive callers to work list.
+ for (auto inst : module->getGlobalInsts())
+ {
+ auto genericInst = as<IRGeneric>(inst);
+ if (genericInst)
+ {
+ inst = findGenericReturnVal(genericInst);
+ }
+ if (auto func = as<IRFunc>(inst))
+ {
+ if (func->findDecoration<IRReadNoneDecoration>())
+ {
+ addCallersToWorkList(func);
+ }
+ }
+ }
+
+ // Add remaining functions to work list.
+ for (auto inst : module->getGlobalInsts())
+ {
+ auto genericInst = as<IRGeneric>(inst);
+ if (genericInst)
+ {
+ inst = findGenericReturnVal(genericInst);
+ }
+ if (auto func = as<IRFunc>(inst))
+ {
+ addToWorkList(func);
+ }
+ }
+
+ IRBuilder builder(module);
+
+ for (Index i = 0; i < workList.getCount(); i++)
+ {
+ auto f = workList[i];
+ bool hasSideEffectCall = false;
+ if (f->findDecoration<IRReadNoneDecoration>())
+ continue;
+ // Never propagate to functions without a body.
+ if (f->getFirstBlock() == nullptr)
+ continue;
+ if (f->findDecoration<IRTargetIntrinsicDecoration>())
+ continue;
+ for (auto block : f->getBlocks())
+ {
+ for (auto inst : block->getChildren())
+ {
+ // Is this inst known to not have global side effect/analyzable?
+ if (inst->mightHaveSideEffects())
+ {
+ switch (inst->getOp())
+ {
+ case kIROp_ifElse:
+ case kIROp_unconditionalBranch:
+ case kIROp_Switch:
+ case kIROp_Return:
+ case kIROp_loop:
+ case kIROp_Store:
+ case kIROp_Call:
+ case kIROp_Param:
+ case kIROp_Unreachable:
+ break;
+ default:
+ // We have a inst that has side effect and is not understood by this method.
+ // e.g. bufferStore, discard, etc.
+ return true;
+ }
+ }
+
+ if (auto call = as<IRCall>(inst))
+ {
+ auto callee = getResolvedInstForDecorations(call->getCallee());
+ switch (callee->getOp())
+ {
+ default:
+ // We are calling an unknown function, so we have to assume
+ // there are side effects in the call.
+ hasSideEffectCall = true;
+ break;
+ case kIROp_Func:
+ if (!callee->findDecoration<IRReadNoneDecoration>())
+ {
+ hasSideEffectCall = true;
+ break;
+ }
+ }
+ }
+
+ // Are any operands defined in global scope?
+ for (UInt o = 0; o < inst->getOperandCount(); o++)
+ {
+ auto operand = inst->getOperand(o);
+ if (getParentFunc(operand) == f)
+ continue;
+ if (as<IRConstant>(operand))
+ continue;
+ if (as<IRType>(operand))
+ continue;
+ switch (operand->getOp())
+ {
+ case kIROp_Specialize:
+ case kIROp_LookupWitness:
+ case kIROp_StructKey:
+ case kIROp_WitnessTable:
+ case kIROp_WitnessTableEntry:
+ case kIROp_undefined:
+ case kIROp_Func:
+ continue;
+ default:
+ break;
+ }
+ hasSideEffectCall = true;
+ break;
+ }
+ }
+ if (hasSideEffectCall)
+ break;
+ }
+ if (!hasSideEffectCall)
+ {
+ builder.addDecoration(f, kIROp_ReadNoneDecoration);
+ addCallersToWorkList(f);
+ changed = true;
+ }
+ }
+ result |= changed;
+ if (!changed)
+ break;
+ }
+ return result;
+}
+}
diff --git a/source/slang/slang-ir-propagate-func-properties.h b/source/slang/slang-ir-propagate-func-properties.h
new file mode 100644
index 000000000..6df2de18e
--- /dev/null
+++ b/source/slang/slang-ir-propagate-func-properties.h
@@ -0,0 +1,7 @@
+#pragma once
+
+namespace Slang
+{
+struct IRModule;
+bool propagateFuncProperties(IRModule* module);
+}
diff --git a/source/slang/slang-ir-redundancy-removal.cpp b/source/slang/slang-ir-redundancy-removal.cpp
index f3996fc01..2a2047de9 100644
--- a/source/slang/slang-ir-redundancy-removal.cpp
+++ b/source/slang/slang-ir-redundancy-removal.cpp
@@ -8,10 +8,118 @@ namespace Slang
struct RedundancyRemovalContext
{
RefPtr<IRDominatorTree> dom;
- bool removeRedundancyInBlock(DeduplicateContext& deduplicateContext, IRBlock* block)
+ bool isMovableInst(IRInst* inst)
+ {
+ switch (inst->getOp())
+ {
+ case kIROp_Add:
+ case kIROp_Sub:
+ case kIROp_Mul:
+ case kIROp_Div:
+ case kIROp_FRem:
+ case kIROp_IRem:
+ case kIROp_Lsh:
+ case kIROp_Rsh:
+ case kIROp_And:
+ case kIROp_Or:
+ case kIROp_Not:
+ case kIROp_FieldExtract:
+ case kIROp_FieldAddress:
+ case kIROp_GetElement:
+ case kIROp_GetElementPtr:
+ case kIROp_UpdateElement:
+ case kIROp_OptionalHasValue:
+ case kIROp_GetOptionalValue:
+ case kIROp_MakeOptionalValue:
+ case kIROp_MakeTuple:
+ case kIROp_GetTupleElement:
+ case kIROp_MakeStruct:
+ case kIROp_MakeArray:
+ case kIROp_MakeArrayFromElement:
+ case kIROp_MakeVector:
+ case kIROp_MakeMatrix:
+ case kIROp_MakeMatrixFromScalar:
+ case kIROp_MakeVectorFromScalar:
+ case kIROp_swizzle:
+ case kIROp_MatrixReshape:
+ case kIROp_MakeString:
+ case kIROp_MakeResultError:
+ case kIROp_MakeResultValue:
+ case kIROp_GetResultError:
+ case kIROp_GetResultValue:
+ case kIROp_CastFloatToInt:
+ case kIROp_CastIntToFloat:
+ case kIROp_CastIntToPtr:
+ case kIROp_CastPtrToBool:
+ case kIROp_CastPtrToInt:
+ case kIROp_BitAnd:
+ case kIROp_BitNot:
+ case kIROp_BitOr:
+ case kIROp_BitXor:
+ case kIROp_BitCast:
+ case kIROp_Reinterpret:
+ case kIROp_Greater:
+ case kIROp_Less:
+ case kIROp_Geq:
+ case kIROp_Leq:
+ case kIROp_Neq:
+ case kIROp_Eql:
+ return true;
+ case kIROp_Call:
+ return isPureFunctionalCall(as<IRCall>(inst));
+ default:
+ return false;
+ }
+ }
+
+ bool tryHoistInstToOuterMostLoop(IRGlobalValueWithCode* func, IRInst* inst)
+ {
+ bool changed = false;
+ for (auto parentBlock = dom->getImmediateDominator(as<IRBlock>(inst->getParent()));
+ parentBlock;
+ parentBlock = dom->getImmediateDominator(parentBlock))
+ {
+ auto terminatorInst = parentBlock->getTerminator();
+ if (terminatorInst->getOp() == kIROp_loop)
+ {
+ // Consider hoisting the inst into this block.
+ // This is only possible if all operands of the inst are dominating `parentBlock`.
+ bool canHoist = true;
+ for (UInt i = 0; i < inst->getOperandCount(); i++)
+ {
+ auto operand = inst->getOperand(i);
+ if (getParentFunc(operand) != func)
+ {
+ // Global value won't prevent hoisting.
+ continue;
+ }
+ auto operandParent = as<IRBlock>(operand->getParent());
+ if (!operandParent)
+ {
+ canHoist = false;
+ break;
+ }
+ canHoist = dom->dominates(operandParent, parentBlock);
+ if (!canHoist)
+ break;
+ }
+ if (!canHoist)
+ break;
+
+ // Move inst to parentBlock.
+ inst->insertBefore(terminatorInst);
+ changed = true;
+
+ // Continue to consider outer hoisting positions.
+ }
+ }
+ return changed;
+ }
+
+ bool removeRedundancyInBlock(DeduplicateContext& deduplicateContext, IRGlobalValueWithCode* func, IRBlock* block)
{
bool result = false;
- for (auto instP : block->getChildren())
+ for (auto instP : block->getModifiableChildren())
{
auto resultInst = deduplicateContext.deduplicate(instP, [&](IRInst* inst)
{
@@ -20,75 +128,25 @@ struct RedundancyRemovalContext
return false;
if (dom->isUnreachable(parentBlock))
return false;
-
- switch (inst->getOp())
- {
- case kIROp_Add:
- case kIROp_Sub:
- case kIROp_Mul:
- case kIROp_Div:
- case kIROp_Module:
- case kIROp_Lsh:
- case kIROp_Rsh:
- case kIROp_And:
- case kIROp_Or:
- case kIROp_Not:
- case kIROp_FieldExtract:
- case kIROp_FieldAddress:
- case kIROp_GetElement:
- case kIROp_GetElementPtr:
- case kIROp_UpdateElement:
- case kIROp_OptionalHasValue:
- case kIROp_GetOptionalValue:
- case kIROp_MakeOptionalValue:
- case kIROp_MakeTuple:
- case kIROp_GetTupleElement:
- case kIROp_MakeStruct:
- case kIROp_MakeArray:
- case kIROp_MakeArrayFromElement:
- case kIROp_MakeVector:
- case kIROp_MakeMatrix:
- case kIROp_MakeMatrixFromScalar:
- case kIROp_MakeVectorFromScalar:
- case kIROp_swizzle:
- case kIROp_MatrixReshape:
- case kIROp_MakeString:
- case kIROp_MakeResultError:
- case kIROp_MakeResultValue:
- case kIROp_GetResultError:
- case kIROp_GetResultValue:
- case kIROp_CastFloatToInt:
- case kIROp_CastIntToFloat:
- case kIROp_CastIntToPtr:
- case kIROp_CastPtrToBool:
- case kIROp_CastPtrToInt:
- case kIROp_BitAnd:
- case kIROp_BitNot:
- case kIROp_BitOr:
- case kIROp_BitXor:
- case kIROp_BitCast:
- case kIROp_Reinterpret:
- case kIROp_Greater:
- case kIROp_Less:
- case kIROp_Geq:
- case kIROp_Leq:
- case kIROp_Neq:
- case kIROp_Eql:
- return true;
- case kIROp_Call:
- return isPureFunctionalCall(as<IRCall>(inst));
- default:
- return false;
- }
+ return isMovableInst(inst);
});
if (resultInst != instP)
+ {
+ instP->replaceUsesWith(resultInst);
result = true;
+ }
+ else if (isMovableInst(resultInst))
+ {
+ // This inst is unique, we should consider hoisting it
+ // if it is inside a loop.
+ result |= tryHoistInstToOuterMostLoop(func, resultInst);
+ }
}
for (auto child : dom->getImmediatelyDominatedBlocks(block))
{
DeduplicateContext subContext;
subContext.deduplicateMap = deduplicateContext.deduplicateMap;
- result |= removeRedundancyInBlock(subContext, child);
+ result |= removeRedundancyInBlock(subContext, func, child);
}
return result;
}
@@ -122,7 +180,142 @@ bool removeRedundancyInFunc(IRGlobalValueWithCode* func)
RedundancyRemovalContext context;
context.dom = computeDominatorTree(func);
DeduplicateContext deduplicateCtx;
- return context.removeRedundancyInBlock(deduplicateCtx, root);
+ return context.removeRedundancyInBlock(deduplicateCtx, func, root);
+}
+
+static IRInst* _getRootVar(IRInst* inst)
+{
+ while (inst)
+ {
+ switch (inst->getOp())
+ {
+ case kIROp_FieldAddress:
+ case kIROp_GetElementPtr:
+ inst = inst->getOperand(0);
+ break;
+ default:
+ return inst;
+ }
+ }
+ return inst;
+}
+
+bool tryRemoveRedundantStore(IRGlobalValueWithCode* func, IRStore* store)
+{
+ // We perform a quick and conservative check:
+ // A store is redundant if it is followed by another store to the same address in
+ // the same basic block, and there are no instructions that may use any addresses
+ // related to this address.
+ bool hasAddrUse = false;
+ bool hasOverridingStore = false;
+
+ // Stores to global variables will never get removed.
+ auto rootVar = _getRootVar(store->getPtr());
+ if (!isChildInstOf(rootVar, func))
+ return false;
+
+ // A store can be removed if it stores into a local variable
+ // that has no other uses than store.
+ if (auto varInst = as<IRVar>(rootVar))
+ {
+ bool hasNonStoreUse = false;
+ // If the entire access chain doesn't non-store use, we can safely remove it.
+ HashSet<IRInst*> knownAccessChain;
+ for (auto accessChain = store->getPtr(); accessChain;)
+ {
+ knownAccessChain.Add(accessChain);
+ for (auto use = accessChain->firstUse; use; use = use->nextUse)
+ {
+ if (as<IRDecoration>(use->getUser()))
+ continue;
+ if (knownAccessChain.Contains(use->getUser()))
+ continue;
+ if (use->getUser()->getOp() == kIROp_Store &&
+ use == use->getUser()->getOperands())
+ {
+ continue;
+ }
+ hasNonStoreUse = true;
+ break;
+ }
+ if (hasNonStoreUse)
+ break;
+ switch (accessChain->getOp())
+ {
+ case kIROp_GetElementPtr:
+ case kIROp_FieldAddress:
+ accessChain = accessChain->getOperand(0);
+ continue;
+ default:
+ break;
+ }
+ break;
+ }
+ if (!hasNonStoreUse)
+ {
+ store->removeAndDeallocate();
+ return true;
+ }
+ }
+
+ // A store can be removed if there are subsequent stores to the same variable,
+ // and there are no insts in between the stores that can read the variable.
+
+ HashSet<IRBlock*> visitedBlocks;
+ for (auto next = store->getNextInst(); next;)
+ {
+ if (auto nextStore = as<IRStore>(next))
+ {
+ if (nextStore->getPtr() == store->getPtr())
+ {
+ hasOverridingStore = true;
+ break;
+ }
+ }
+
+ // If we see any insts that have reads or modifies the address before seeing
+ // an overriding store, don't remove the store.
+ // We can make the test more accurate by collecting all addresses related to
+ // the target address first, and only bail out if any of the related addresses
+ // are involved.
+ switch (next->getOp())
+ {
+ case kIROp_Load:
+ if (canAddressesPotentiallyAlias(func, next->getOperand(0), store->getPtr()))
+ {
+ hasAddrUse = true;
+ }
+ break;
+ default:
+ if (canInstHaveSideEffectAtAddress(func, next, store->getPtr()))
+ {
+ hasAddrUse = true;
+ }
+ break;
+ }
+ if (hasAddrUse)
+ break;
+
+ // If we are at the end of the current block and see a unconditional branch,
+ // we can follow the path and check the subsequent block.
+ if (auto branch = as<IRUnconditionalBranch>(next))
+ {
+ auto nextBlock = branch->getTargetBlock();
+ if (visitedBlocks.Add(nextBlock))
+ {
+ next = nextBlock->getFirstInst();
+ continue;
+ }
+ }
+ next = next->getNextInst();
+ }
+
+ if (!hasAddrUse && hasOverridingStore)
+ {
+ store->removeAndDeallocate();
+ return true;
+ }
+ return false;
}
bool eliminateRedundantLoadStore(IRGlobalValueWithCode* func)
@@ -158,57 +351,7 @@ bool eliminateRedundantLoadStore(IRGlobalValueWithCode* func)
}
else if (auto store = as<IRStore>(inst))
{
- // We perform a quick and conservative check:
- // A store is redundant if it is followed by another store to the same address in
- // the same basic block, and there are no instructions that may use any addresses
- // related to this address.
- bool hasAddrUse = false;
- bool hasOverridingStore = false;
-
- // Stores to global variables will never get removed.
- if (!isChildInstOf(store->getPtr(), func))
- hasAddrUse = true;
-
- for (auto next = store->getNextInst(); next; next = next->getNextInst())
- {
- if (auto nextStore = as<IRStore>(next))
- {
- if (nextStore->getPtr() == store->getPtr())
- {
- hasOverridingStore = true;
- break;
- }
- }
-
- // If we see any insts that have reads or modifies the address before seeing
- // an overriding store, don't remove the store.
- // We can make the test more accurate by collecting all addresses related to
- // the target address first, and only bail out if any of the related addresses
- // are involved.
- switch (next->getOp())
- {
- case kIROp_Load:
- if (canAddressesPotentiallyAlias(func, next->getOperand(0), store->getPtr()))
- {
- hasAddrUse = true;
- }
- break;
- default:
- if (canInstHaveSideEffectAtAddress(func, next, store->getPtr()))
- {
- hasAddrUse = true;
- }
- break;
- }
- if (hasAddrUse)
- break;
- }
-
- if (!hasAddrUse && hasOverridingStore)
- {
- store->removeAndDeallocate();
- changed = true;
- }
+ changed |= tryRemoveRedundantStore(func, store);
}
inst = nextInst;
}
diff --git a/source/slang/slang-ir-sccp.cpp b/source/slang/slang-ir-sccp.cpp
index d05527e59..691bd7ff0 100644
--- a/source/slang/slang-ir-sccp.cpp
+++ b/source/slang/slang-ir-sccp.cpp
@@ -1439,7 +1439,9 @@ struct SCCPContext
inst->replaceUsesWith(constantVal);
if( !inst->mightHaveSideEffects() )
{
- instsToRemove.add(inst);
+ // Don't delete phi parameters, they will be cleaned up in CFG simplification.
+ if (inst->getOp() != kIROp_Param)
+ instsToRemove.add(inst);
}
}
}
diff --git a/source/slang/slang-ir-simplify-cfg.cpp b/source/slang/slang-ir-simplify-cfg.cpp
index 7e9e105e1..b814442fa 100644
--- a/source/slang/slang-ir-simplify-cfg.cpp
+++ b/source/slang/slang-ir-simplify-cfg.cpp
@@ -4,6 +4,8 @@
#include "slang-ir.h"
#include "slang-ir-dominators.h"
#include "slang-ir-restructure.h"
+#include "slang-ir-util.h"
+#include "slang-ir-loop-unroll.h"
namespace Slang
{
@@ -31,8 +33,7 @@ static BreakableRegion* findBreakableRegion(Region* region)
// it is needed and hasn't been generated yet.
static bool isTrivialSingleIterationLoop(
IRGlobalValueWithCode* func,
- IRLoop* loop,
- CFGSimplificationContext& inoutContext)
+ IRLoop* loop)
{
auto targetBlock = loop->getTargetBlock();
if (targetBlock->getPredecessors().getCount() != 1) return false;
@@ -52,14 +53,14 @@ static bool isTrivialSingleIterationLoop(
//
// We need to verify this is a trivial loop by checking if there is any multi-level breaks
// that skips out of this loop.
-
- if (!inoutContext.domTree)
- inoutContext.domTree = computeDominatorTree(func);
- if (!inoutContext.regionTree)
- inoutContext.regionTree = generateRegionTreeForFunc(func, nullptr);
+ CFGSimplificationContext context;
+ if (!context.domTree)
+ context.domTree = computeDominatorTree(func);
+ if (!context.regionTree)
+ context.regionTree = generateRegionTreeForFunc(func, nullptr);
SimpleRegion* targetBlockRegion = nullptr;
- if (!inoutContext.regionTree->mapBlockToRegion.TryGetValue(targetBlock, targetBlockRegion))
+ if (!context.regionTree->mapBlockToRegion.TryGetValue(targetBlock, targetBlockRegion))
return false;
BreakableRegion* loopBreakableRegion = findBreakableRegion(targetBlockRegion);
LoopRegion* loopRegion = as<LoopRegion>(loopBreakableRegion);
@@ -67,18 +68,18 @@ static bool isTrivialSingleIterationLoop(
return false;
for (auto block : func->getBlocks())
{
- if (!inoutContext.domTree->dominates(loop->getTargetBlock(), block))
+ if (!context.domTree->dominates(loop->getTargetBlock(), block))
continue;
- if (inoutContext.domTree->dominates(loop->getBreakBlock(), block))
+ if (context.domTree->dominates(loop->getBreakBlock(), block))
continue;
SimpleRegion* region = nullptr;
- if (!inoutContext.regionTree->mapBlockToRegion.TryGetValue(block, region))
+ if (!context.regionTree->mapBlockToRegion.TryGetValue(block, region))
return false;
for (auto branchTarget : block->getSuccessors())
{
SimpleRegion* targetRegion = nullptr;
- if (!inoutContext.regionTree->mapBlockToRegion.TryGetValue(branchTarget, targetRegion))
+ if (!context.regionTree->mapBlockToRegion.TryGetValue(branchTarget, targetRegion))
return false;
// If multi-level break out that skips over this loop exists, then this is not a trivial loop.
if (targetRegion->isDescendentOf(loopRegion))
@@ -96,6 +97,104 @@ static bool isTrivialSingleIterationLoop(
return true;
}
+static bool doesLoopHasSideEffect(IRGlobalValueWithCode* func, IRLoop* loopInst)
+{
+ auto blocks = collectBlocksInLoop(func, loopInst);
+ HashSet<IRBlock*> loopBlocks;
+ for (auto b : blocks)
+ loopBlocks.Add(b);
+ auto addressHasOutOfLoopUses = [&](IRInst* addr)
+ {
+ // The entire access chain of `addr` must have no uses out side the loop.
+ // The root variable must be a local var.
+ for (auto chainNode = addr; chainNode;)
+ {
+ if (getParentFunc(chainNode) != func)
+ return true;
+ for (auto use = chainNode->firstUse; use; use = use->nextUse)
+ {
+ if (!loopBlocks.Contains(as<IRBlock>(use->getUser()->getParent())))
+ return true;
+ }
+ switch (chainNode->getOp())
+ {
+ case kIROp_GetElementPtr:
+ case kIROp_FieldAddress:
+ chainNode = chainNode->getOperand(0);
+ continue;
+ case kIROp_Var:
+ break;
+ default:
+ return true;
+ }
+ break;
+ }
+ return false;
+ };
+
+ for (auto b : blocks)
+ {
+ for (auto inst : b->getChildren())
+ {
+ // Is this inst used anywhere outside the loop? If so the loop has side effect.
+ for (auto use = inst->firstUse; use; use = use->nextUse)
+ {
+ if (!loopBlocks.Contains(as<IRBlock>(use->getUser()->getParent())))
+ return true;
+ }
+
+ // The inst can't possibly have side effect? Skip it.
+ if (!inst->mightHaveSideEffects())
+ continue;
+
+ // This inst might have side effect, try to prove that the
+ // side effect does not leak beyond the scope of the loop.
+ if (auto call = as<IRCall>(inst))
+ {
+ auto callee = getResolvedInstForDecorations(call->getCallee());
+ if (!callee || !callee->findDecoration<IRReadNoneDecoration>())
+ return true;
+ // We are calling a pure function, check if any of the return
+ // variables are used outside the loop.
+ for (UInt i = 0; i < call->getArgCount(); i++)
+ {
+ auto arg = call->getArg(i);
+ if (!isValueType(arg->getDataType()))
+ {
+ if (addressHasOutOfLoopUses(arg))
+ return true;
+ }
+ }
+ }
+ else if (auto store = as<IRStore>(inst))
+ {
+ if (addressHasOutOfLoopUses(store->getPtr()))
+ return true;
+ }
+ else if (auto branch = as<IRUnconditionalBranch>(inst))
+ {
+ if (loopBlocks.Contains(branch->getTargetBlock()))
+ continue;
+ // Branching out of the loop with some argument is considered
+ // having a side effect.
+ if (branch->getArgCount() != 0)
+ return true;
+ }
+ else if (as<IRIfElse>(inst) || as<IRSwitch>(inst) || as<IRLoop>(inst))
+ {
+ // We are starting a sub control flow.
+ // This is considered side effect free.
+ }
+ else
+ {
+ // For all other insts, we assume it has a global side effect.
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
static bool removeDeadBlocks(IRGlobalValueWithCode* func)
{
bool changed = false;
@@ -142,15 +241,327 @@ static bool removeDeadBlocks(IRGlobalValueWithCode* func)
return changed;
}
+// Return the true of the if-else branch block if the branch is a trivial jump
+// to after block with no other insts.
+static bool isTrivialIfElseBranch(IRIfElse* condBranch, IRBlock* branchBlock)
+{
+ if (branchBlock != condBranch->getAfterBlock())
+ {
+ if (auto br = as<IRUnconditionalBranch>(branchBlock->getFirstOrdinaryInst()))
+ {
+ if (br->getTargetBlock() == condBranch->getAfterBlock() && br->getOp() == kIROp_unconditionalBranch)
+ {
+ return true;
+ }
+ }
+ }
+ else
+ {
+ return true;
+ }
+ return false;
+}
+
+static bool arePhiArgsEquivalentInBranches(IRIfElse* ifElse)
+{
+ // If one of the branch target is afterBlock itself, and the other branch
+ // is a trivial block that jumps into the afterBlock, this if-else is trivial.
+ // In this case the argCount must be 0 because a block with phi parameters can't
+ // be used as targets in a conditional branch.
+ auto branch1 = ifElse->getTrueBlock();
+ auto branch2 = ifElse->getFalseBlock();
+ auto afterBlock = ifElse->getAfterBlock();
+
+ if (branch1 == afterBlock) return true;
+ if (branch2 == afterBlock) return true;
+
+ auto branchInst1 = as<IRUnconditionalBranch>(branch1->getTerminator());
+ auto branchInst2 = as<IRUnconditionalBranch>(branch2->getTerminator());
+ if (!branchInst1) return false;
+ if (!branchInst2) return false;
+
+ // If both branches are trivial blocks, we must compare the arguments.
+ if (branchInst1->getArgCount() != branchInst2->getArgCount())
+ {
+ // This should never happen, return false now to be safe.
+ return false;
+ }
+
+ for (UInt i = 0; i < branchInst1->getArgCount(); i++)
+ {
+ if (branchInst1->getArg(i) != branchInst2->getArg(i))
+ {
+ // argument is different, the if-else is non-trivial.
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool isTrivialIfElse(IRIfElse* condBranch, bool& isTrueBranchTrivial, bool& isFalseBranchTrivial)
+{
+ isTrueBranchTrivial = isTrivialIfElseBranch(condBranch, condBranch->getTrueBlock());
+ isFalseBranchTrivial = isTrivialIfElseBranch(condBranch, condBranch->getFalseBlock());
+ if (isTrueBranchTrivial && isFalseBranchTrivial)
+ {
+ if (arePhiArgsEquivalentInBranches(condBranch))
+ return true;
+ }
+ return false;
+}
+
+#if 0
+static bool tryMoveFalseBranchToTrueBranch(IRBuilder& builder, IRIfElse* ifElseInst)
+{
+ auto falseBlock = ifElseInst->getFalseBlock();
+ if (falseBlock == ifElseInst->getAfterBlock())
+ return false;
+ if (auto termInst = as<IRUnconditionalBranch>(falseBlock->getTerminator()))
+ {
+ // We can't fold a branch with arguments into the ifElse.
+ if (termInst->getArgCount() != 0)
+ return false;
+ }
+ ifElseInst->trueBlock.set(falseBlock);
+ ifElseInst->falseBlock.set(ifElseInst->getAfterBlock());
+ builder.setInsertBefore(ifElseInst);
+ auto newCondition = builder.emitNot(builder.getBoolType(), ifElseInst->getCondition());
+ ifElseInst->condition.set(newCondition);
+ return true;
+}
+#endif
+
+static bool tryEliminateFalseBranch(IRIfElse* ifElseInst)
+{
+ auto falseBlock = ifElseInst->getFalseBlock();
+ if (falseBlock == ifElseInst->getAfterBlock())
+ return false;
+ if (auto termInst = as<IRUnconditionalBranch>(falseBlock->getTerminator()))
+ {
+ // We can't fold a branch with arguments into the ifElse.
+ if (termInst->getArgCount() != 0)
+ return false;
+ }
+ ifElseInst->falseBlock.set(ifElseInst->getAfterBlock());
+ return true;
+}
+
+static bool trySimplifyIfElse(IRBuilder& builder, IRIfElse* ifElseInst)
+{
+ bool isTrueBranchTrivial = false;
+ bool isFalseBranchTrivial = false;
+ if (isTrivialIfElse(ifElseInst, isTrueBranchTrivial, isFalseBranchTrivial))
+ {
+ // If both branches of `if-else` are trivial jumps into after block,
+ // we can get rid of the entire conditional branch and replace it
+ // with a jump into the after block.
+ if (auto termInst = as<IRUnconditionalBranch>(ifElseInst->getTrueBlock()->getTerminator()))
+ {
+ List<IRInst*> args;
+ for (UInt i = 0; i < termInst->getArgCount(); i++)
+ args.add(termInst->getArg(i));
+ builder.setInsertBefore(ifElseInst);
+ builder.emitBranch(ifElseInst->getAfterBlock(), (Int)args.getCount(), args.getBuffer());
+ ifElseInst->removeAndDeallocate();
+ return true;
+ }
+ }
+ else if (isTrueBranchTrivial)
+ {
+ // If true branch is empty, we move false branch to true branch and invert the condition.
+ // TODO: diabled for now since our auto-diff pass can't handle loops whose body is on the false
+ // side of condition.
+ //return tryMoveFalseBranchToTrueBranch(builder, ifElseInst);
+ }
+ else if (isFalseBranchTrivial)
+ {
+ // If false branch is empty, we set it to afterBlock.
+ return tryEliminateFalseBranch(ifElseInst);
+ }
+ return false;
+}
+
+static bool isTrueLit(IRInst* lit)
+{
+ if (auto boolLit = as<IRBoolLit>(lit))
+ return boolLit->getValue();
+ return false;
+}
+static bool isFalseLit(IRInst* lit)
+{
+ if (auto boolLit = as<IRBoolLit>(lit))
+ return !boolLit->getValue();
+ return false;
+}
+
+static bool simplifyBoolPhiParam(IRIfElse* ifElse, Array<IRBlock*, 2>& preds, IRParam* param, UInt paramIndex)
+{
+ // For bool params where its value is assigned from the same `if-else` statement,
+ // we can simplify it into an expression of the condition of the source `if-else`.
+
+ if (!param->getDataType() || param->getDataType()->getOp() != kIROp_BoolType)
+ return false;
+
+ auto branch0 = as<IRUnconditionalBranch>(preds[0]->getTerminator());
+ if (!branch0)
+ return false;
+ if (branch0->getArgCount() <= paramIndex)
+ return false;
+ auto branch1 = as<IRUnconditionalBranch>(preds[1]->getTerminator());
+ if (!branch1)
+ return false;
+ if (branch1->getArgCount() <= paramIndex)
+ return false;
+
+ IRInst* replacement = nullptr;
+ if (isTrueLit(branch0->getArg(paramIndex)) && isFalseLit(branch1->getArg(paramIndex)))
+ {
+ replacement = ifElse->getCondition();
+ }
+ else if (isFalseLit(branch0->getArg(paramIndex)) && isTrueLit(branch1->getArg(paramIndex)))
+ {
+ IRBuilder builder(param);
+ setInsertBeforeOrdinaryInst(&builder, param);
+ replacement = builder.emitNot(builder.getBoolType(), ifElse->getCondition());
+ }
+ if (replacement)
+ {
+ param->replaceUsesWith(replacement);
+ param->removeAndDeallocate();
+ branch0->removeArgument(paramIndex);
+ branch1->removeArgument(paramIndex);
+ return true;
+ }
+ return false;
+}
+
+static bool simplifyBoolPhiParams(IRBlock* block)
+{
+ if (!block)
+ return false;
+
+ if (block->getPredecessors().getCount() != 2)
+ return false;
+
+ Array<IRBlock*, 2> preds;
+ for (auto pred : block->getPredecessors())
+ preds.add(pred);
+
+ IRBlock* ifElseBlock = nullptr;
+ if (preds[0]->getPredecessors().getCount() != 1)
+ return false;
+ ifElseBlock = *(preds[0]->getPredecessors().begin());
+ if (preds[1]->getPredecessors().getCount() != 1)
+ return false;
+ auto p = *(preds[1]->getPredecessors().begin());
+ if (p != ifElseBlock)
+ return false;
+
+ auto ifElse = as<IRIfElse>(ifElseBlock->getTerminator());
+ if (!ifElse)
+ return false;
+
+ if (ifElse->getTrueBlock() == preds[1])
+ {
+ Swap(preds[0], preds[1]);
+ }
+ SLANG_ASSERT(ifElse->getTrueBlock() == preds[0] && ifElse->getFalseBlock() == preds[1]);
+
+ List<IRParam*> params;
+ for (auto param : block->getParams())
+ params.add(param);
+ bool changed = false;
+ for (Index i = params.getCount() - 1; i >= 0; i--)
+ {
+ changed |= simplifyBoolPhiParam(ifElse, preds, params[i], (UInt)i);
+ }
+ return changed;
+}
+
+static bool removeTrivialPhiParams(IRBlock* block)
+{
+ // We can remove a phi parmeter if:
+ // 1. all arguments to a parameter is the same (not really a phi).
+ // 2. the arguments to the parameter is always the same as arguments to another existing parameter (duplicate phi).
+
+ bool changed = false;
+ List<IRParam*> params;
+ struct ParamState
+ {
+ bool areKnownValueSame = true;
+ IRInst* knownValue = nullptr;
+ OrderedHashSet<UInt> sameAsParamSet;
+ };
+ List<ParamState> args;
+ List<IRUnconditionalBranch*> termInsts;
+ for (auto param : block->getParams())
+ {
+ params.add(param);
+ args.add(ParamState());
+ }
+
+ if (!params.getCount())
+ return false;
+
+ for (UInt i = 1; i < (UInt)args.getCount(); i++)
+ for (UInt j = 0; j < i; j++)
+ args[i].sameAsParamSet.Add(j);
+
+ for (auto pred : block->getPredecessors())
+ {
+ auto termInst = as<IRUnconditionalBranch>(pred->getTerminator());
+ if (!termInst)
+ return false;
+ SLANG_ASSERT(termInst->getArgCount() == (UInt)args.getCount());
+ termInsts.add(termInst);
+ for (UInt i = 0; i < termInst->getArgCount(); i++)
+ {
+ if (args[i].areKnownValueSame)
+ {
+ if (args[i].knownValue == nullptr)
+ args[i].knownValue = termInst->getArg(i);
+ else if (args[i].knownValue != termInst->getArg(i))
+ args[i].areKnownValueSame = false;
+ }
+ for (UInt j = 0; j < i; j++)
+ {
+ if (termInst->getArg(i) != termInst->getArg(j))
+ {
+ args[i].sameAsParamSet.Remove(j);
+ }
+ }
+ }
+ }
+ for (Index i = args.getCount() - 1; i >= 0; i--)
+ {
+ IRInst* targetVal = nullptr;
+ if (args[i].areKnownValueSame)
+ {
+ targetVal = args[i].knownValue;
+ }
+ else if (args[i].sameAsParamSet.Count())
+ {
+ auto targetParamId = *args[i].sameAsParamSet.begin();
+ targetVal = params[targetParamId];
+ }
+ if (targetVal)
+ {
+ params[i]->replaceUsesWith(args[i].knownValue);
+ params[i]->removeAndDeallocate();
+ for (auto termInst : termInsts)
+ termInst->removeArgument((UInt)i);
+ changed = true;
+ }
+ }
+ return changed;
+}
+
static bool processFunc(IRGlobalValueWithCode* func)
{
auto firstBlock = func->getFirstBlock();
if (!firstBlock)
return false;
- // Lazily generated region tree.
- CFGSimplificationContext simplificationContext;
-
IRBuilder builder(func->getModule());
bool changed = false;
@@ -165,6 +576,14 @@ static bool processFunc(IRGlobalValueWithCode* func)
workList.fastRemoveAt(0);
while (block)
{
+ // If all arguments to a phi parameter are the known to be the same,
+ // we can safely replace the phi parameter with the argument.
+ if (block != func->getFirstBlock())
+ {
+ changed |= simplifyBoolPhiParams(block);
+ changed |= removeTrivialPhiParams(block);
+ }
+
if (auto loop = as<IRLoop>(block->getTerminator()))
{
// If continue block is unreachable, remove it.
@@ -179,7 +598,7 @@ static bool processFunc(IRGlobalValueWithCode* func)
// break at the end of the loop, we can remove the header and turn it into
// a normal branch.
auto targetBlock = loop->getTargetBlock();
- if (isTrivialSingleIterationLoop(func, loop, simplificationContext))
+ if (isTrivialSingleIterationLoop(func, loop))
{
builder.setInsertBefore(loop);
List<IRInst*> args;
@@ -189,7 +608,22 @@ static bool processFunc(IRGlobalValueWithCode* func)
}
builder.emitBranch(targetBlock, args.getCount(), args.getBuffer());
loop->removeAndDeallocate();
+ changed = true;
}
+ else if (!doesLoopHasSideEffect(func, loop))
+ {
+ // The loop isn't computing anything useful outside the loop.
+ // We can delete the entire loop.
+ builder.setInsertBefore(loop);
+ SLANG_ASSERT(loop->getBreakBlock()->getFirstParam() == nullptr);
+ builder.emitBranch(loop->getBreakBlock());
+ loop->removeAndDeallocate();
+ changed = true;
+ }
+ }
+ else if (auto condBranch = as<IRIfElse>(block->getTerminator()))
+ {
+ changed |= trySimplifyIfElse(builder, condBranch);
}
// If `block` does not end with an unconditional branch, bail.
@@ -225,6 +659,7 @@ static bool processFunc(IRGlobalValueWithCode* func)
branch->removeAndDeallocate();
assert(!successor->hasUses());
successor->removeAndDeallocate();
+ break;
}
for (auto successor : block->getSuccessors())
{
diff --git a/source/slang/slang-ir-specialize-function-call.cpp b/source/slang/slang-ir-specialize-function-call.cpp
index 894d46cce..a2ebbc0cf 100644
--- a/source/slang/slang-ir-specialize-function-call.cpp
+++ b/source/slang/slang-ir-specialize-function-call.cpp
@@ -822,6 +822,12 @@ struct FunctionParameterSpecializationContext
{
decoration->removeAndDeallocate();
}
+ else if (as<IRReadNoneDecoration>(decoration))
+ {
+ // After specialization, the function may no longer be side effect free
+ // because the parameter we substituted in maybe a global param.
+ decoration->removeAndDeallocate();
+ }
}
}
diff --git a/source/slang/slang-ir-ssa-simplification.cpp b/source/slang/slang-ir-ssa-simplification.cpp
index f06fafcb3..beaaae065 100644
--- a/source/slang/slang-ir-ssa-simplification.cpp
+++ b/source/slang/slang-ir-ssa-simplification.cpp
@@ -10,6 +10,7 @@
#include "slang-ir-deduplicate-generic-children.h"
#include "slang-ir-remove-unused-generic-param.h"
#include "slang-ir-redundancy-removal.h"
+#include "slang-ir-propagate-func-properties.h"
namespace Slang
{
@@ -29,6 +30,7 @@ namespace Slang
changed |= peepholeOptimize(module);
changed |= removeRedundancy(module);
changed |= simplifyCFG(module);
+ changed |= propagateFuncProperties(module);
// Note: we disregard the `changed` state from dead code elimination pass since
// SCCP pass could be generating temporarily evaluated constant values and never actually use them.
@@ -41,6 +43,28 @@ namespace Slang
}
}
+ void simplifyNonSSAIR(IRModule* module)
+ {
+ bool changed = true;
+ const int kMaxIterations = 8;
+ int iterationCounter = 0;
+ while (changed && iterationCounter < kMaxIterations)
+ {
+ changed = false;
+ changed |= peepholeOptimize(module);
+ changed |= removeRedundancy(module);
+ changed |= simplifyCFG(module);
+
+ // Note: we disregard the `changed` state from dead code elimination pass since
+ // SCCP pass could be generating temporarily evaluated constant values and never actually use them.
+ // DCE will always remove those nearly generated consts and always returns true here.
+ eliminateDeadCode(module);
+
+ iterationCounter++;
+ }
+ }
+
+
void simplifyFunc(IRGlobalValueWithCode* func)
{
bool changed = true;
diff --git a/source/slang/slang-ir-ssa-simplification.h b/source/slang/slang-ir-ssa-simplification.h
index ee8343003..39504e102 100644
--- a/source/slang/slang-ir-ssa-simplification.h
+++ b/source/slang/slang-ir-ssa-simplification.h
@@ -10,5 +10,8 @@ namespace Slang
// until no more changes are possible.
void simplifyIR(IRModule* module);
+ // Run simplifications on IR that is out of SSA form.
+ void simplifyNonSSAIR(IRModule* module);
+
void simplifyFunc(IRGlobalValueWithCode* func);
}
diff --git a/source/slang/slang-ir-util.cpp b/source/slang/slang-ir-util.cpp
index 3db036a8d..339521f41 100644
--- a/source/slang/slang-ir-util.cpp
+++ b/source/slang/slang-ir-util.cpp
@@ -157,6 +157,32 @@ IRInst* maybeSpecializeWithGeneric(IRBuilder& builder, IRInst* genericToSpecaili
return genericToSpecailize;
}
+bool isValueType(IRInst* dataType)
+{
+ dataType = getResolvedInstForDecorations(unwrapAttributedType(dataType));
+ if (as<IRBasicType>(dataType))
+ return true;
+ switch (dataType->getOp())
+ {
+ case kIROp_StructType:
+ case kIROp_InterfaceType:
+ case kIROp_ClassType:
+ case kIROp_VectorType:
+ case kIROp_MatrixType:
+ case kIROp_TupleType:
+ case kIROp_ResultType:
+ case kIROp_OptionalType:
+ case kIROp_DifferentialPairType:
+ case kIROp_DynamicType:
+ case kIROp_AnyValueType:
+ case kIROp_ArrayType:
+ case kIROp_FuncType:
+ return true;
+ default:
+ return false;
+ }
+}
+
IRInst* hoistValueFromGeneric(IRBuilder& inBuilder, IRInst* value, IRInst*& outSpecializedVal, bool replaceExistingValue)
{
auto outerGeneric = as<IRGeneric>(findOuterGeneric(value));
@@ -402,8 +428,7 @@ bool canInstHaveSideEffectAtAddress(IRGlobalValueWithCode* func, IRInst* inst, I
{
auto callee = call->getCallee();
if (callee &&
- callee->findDecoration<IRReadNoneDecoration>() &&
- callee->findDecoration<IRNoSideEffectDecoration>())
+ callee->findDecoration<IRReadNoneDecoration>())
{
// An exception is if the callee is side-effect free and is not reading from
// memory.
@@ -423,6 +448,32 @@ bool canInstHaveSideEffectAtAddress(IRGlobalValueWithCode* func, IRInst* inst, I
if (canAddressesPotentiallyAlias(func, call->getArg(i), addr))
return true;
}
+ else if (!isValueType(call->getArg(i)->getDataType()))
+ {
+ // This is some unknown handle type, we assume it can have any side effects.
+ return true;
+ }
+ }
+ }
+ break;
+ case kIROp_unconditionalBranch:
+ case kIROp_loop:
+ {
+ auto branch = as<IRUnconditionalBranch>(inst);
+ // If any pointer typed argument of the branch inst may overlap addr, return true.
+ for (UInt i = 0; i < branch->getArgCount(); i++)
+ {
+ SLANG_RELEASE_ASSERT(branch->getArg(i)->getDataType());
+ if (isPtrLikeOrHandleType(branch->getArg(i)->getDataType()))
+ {
+ if (canAddressesPotentiallyAlias(func, branch->getArg(i), addr))
+ return true;
+ }
+ else if (!isValueType(branch->getArg(i)->getDataType()))
+ {
+ // This is some unknown handle type, we assume it can have any side effects.
+ return true;
+ }
}
}
break;
@@ -434,6 +485,11 @@ bool canInstHaveSideEffectAtAddress(IRGlobalValueWithCode* func, IRInst* inst, I
if (isPtrLikeOrHandleType(inst->getOperand(0)->getDataType()) &&
canAddressesPotentiallyAlias(func, inst->getOperand(0), addr))
return true;
+ else if (!isValueType(inst->getOperand(0)->getDataType()))
+ {
+ // This is some unknown handle type, we assume it can have any side effects.
+ return true;
+ }
}
break;
default:
@@ -520,20 +576,17 @@ bool isPureFunctionalCall(IRCall* call)
auto callee = getResolvedInstForDecorations(call->getCallee());
if (callee->findDecoration<IRReadNoneDecoration>())
{
- return true;
- }
- if (callee->findDecoration<IRNoSideEffectDecoration>())
- {
// If the function has no side effect and is not writing to any outputs,
// we can safely treat the call as a normal inst.
bool hasOutArg = false;
for (UInt i = 0; i < call->getArgCount(); i++)
{
- if (as<IRPtrTypeBase>(call->getArg(i)->getDataType()))
- {
- hasOutArg = true;
- break;
- }
+ if (isValueType(call->getArg(i)->getDataType()))
+ continue;
+ // If the argument type is not a known value type,
+ // assume it is a pointer or handle through which side effect can take place.
+ hasOutArg = true;
+ break;
}
return !hasOutArg;
}
diff --git a/source/slang/slang-ir-util.h b/source/slang/slang-ir-util.h
index 8a12ab895..62156cad6 100644
--- a/source/slang/slang-ir-util.h
+++ b/source/slang/slang-ir-util.h
@@ -83,6 +83,9 @@ inline bool isScalarIntegerType(IRType* type)
return getTypeStyle(type->getOp()) == kIROp_IntType;
}
+// No side effect can take place through a value of a "Value" type.
+bool isValueType(IRInst* type);
+
inline bool isChildInstOf(IRInst* inst, IRInst* parent)
{
while (inst)
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index accefc0c9..fd211d05c 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -43,7 +43,10 @@ namespace Slang
case kIROp_PreciseDecoration:
case kIROp_PublicDecoration:
case kIROp_HLSLExportDecoration:
- case kIROp_ReadNoneDecoration:
+ case kIROp_ReadNoneDecoration:
+ case kIROp_NoSideEffectDecoration:
+ case kIROp_ForwardDifferentiableDecoration:
+ case kIROp_BackwardDifferentiableDecoration:
case kIROp_RequiresNVAPIDecoration:
case kIROp_TriangleAdjInputPrimitiveTypeDecoration:
case kIROp_TriangleInputPrimitiveTypeDecoration:
@@ -695,6 +698,21 @@ namespace Slang
}
}
+ void IRUnconditionalBranch::removeArgument(UInt index)
+ {
+ switch (getOp())
+ {
+ case kIROp_unconditionalBranch:
+ removeOperand(1 + index);
+ break;
+ case kIROp_loop:
+ removeOperand(3 + index);
+ break;
+ default:
+ SLANG_UNEXPECTED("unhandled unconditional branch opcode");
+ }
+ }
+
IRInst* IRUnconditionalBranch::getArg(UInt index)
{
return getArgs()[index].usedValue;
@@ -5109,6 +5127,17 @@ namespace Slang
return inst;
}
+ IRInst* IRBuilder::emitNot(IRType* type, IRInst* value)
+ {
+ auto inst = createInst<IRInst>(
+ this,
+ kIROp_Not,
+ type,
+ value);
+ addInst(inst);
+ return inst;
+ }
+
IRInst* IRBuilder::emitAdd(IRType* type, IRInst* left, IRInst* right)
{
auto inst = createInst<IRInst>(
@@ -6792,6 +6821,17 @@ namespace Slang
}
}
+ void IRInst::removeOperand(Index index)
+ {
+ for (Index i = index; i < (Index)operandCount - 1; i++)
+ {
+ getOperands()[i].set(getOperand(i + 1));
+ }
+ getOperands()[operandCount - 1].clear();
+ operandCount--;
+ return;
+ }
+
// Remove this instruction from its parent block,
// and then destroy it (it had better have no uses!)
void IRInst::removeAndDeallocate()
@@ -6879,6 +6919,8 @@ namespace Slang
// common subexpression elimination, etc.
//
auto call = cast<IRCall>(this);
+ // If the call has been marked as no-side-effect, we
+ // will treat it so, by-passing all other checks.
if (call->findDecoration<IRNoSideEffectDecoration>())
return false;
return !isPureFunctionalCall(call);
@@ -6894,6 +6936,7 @@ namespace Slang
case kIROp_Func:
case kIROp_Generic:
case kIROp_Var:
+ case kIROp_Param:
case kIROp_GlobalVar: // Note: the IRGlobalVar represents the *address*, so only a load/store would have side effects
case kIROp_GlobalConstant:
case kIROp_GlobalParam:
@@ -7003,12 +7046,6 @@ namespace Slang
case kIROp_BackwardDifferentiatePropagate:
return false;
}
-
- // Check if the calle has been marked with a catch-all no-side-effect decoration.
- if (findDecoration<IRNoSideEffectDecoration>())
- {
- return false;
- }
return true;
}
diff --git a/source/slang/slang-ir.h b/source/slang/slang-ir.h
index 63b7c4ef9..e22ea8a36 100644
--- a/source/slang/slang-ir.h
+++ b/source/slang/slang-ir.h
@@ -744,6 +744,11 @@ struct IRInst
// for those values.
void removeArguments();
+ // Remove operand `index` from operand list.
+ // For example, if the inst is `op(a,b,c)`, calling removeOperand(inst, 1) will result
+ // `op(a,c)`.
+ void removeOperand(Index index);
+
/// Transfer any decorations of this instruction to the `target` instruction.
void transferDecorationsTo(IRInst* target);
diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp
index 681871b6c..d09c35eea 100644
--- a/source/slang/slang-lower-to-ir.cpp
+++ b/source/slang/slang-lower-to-ir.cpp
@@ -8304,6 +8304,11 @@ struct DeclLoweringVisitor : DeclVisitor<DeclLoweringVisitor, LoweredValInfo>
getBuilder()->addSimpleDecoration<IRRequiresNVAPIDecoration>(irFunc);
}
+ if (decl->findModifier<AlwaysFoldIntoUseSiteAttribute>())
+ {
+ getBuilder()->addSimpleDecoration<IRAlwaysFoldIntoUseSiteDecoration>(irFunc);
+ }
+
if (decl->findModifier<NoInlineAttribute>())
{
getBuilder()->addSimpleDecoration<IRNoInlineDecoration>(irFunc);
diff --git a/tests/bugs/sample-grad-clamp-lod.slang.glsl b/tests/bugs/sample-grad-clamp-lod.slang.glsl
index b91fb8668..a49983599 100644
--- a/tests/bugs/sample-grad-clamp-lod.slang.glsl
+++ b/tests/bugs/sample-grad-clamp-lod.slang.glsl
@@ -20,10 +20,10 @@ rayPayloadInEXT ShadowRay_0 _S1;
void main()
{
- vec4 val_0 = (textureGradOffsetClampARB(sampler2DArray(t2D_0,samplerState_0), (vec3(_S1.hitDistance_0 * 0.20000000000000001110, _S1.hitDistance_0 * 0.29999999999999998890, 0.20000000000000001110)), (vec2(float(0), float(0))), (vec2(float(0), float(0))), (ivec2(0)), (0.50000000000000000000)));
+ const vec2 _S2 = vec2(0.0, 0.0);
- float _S2 = dot(val_0, val_0);
+ vec4 val_0 = (textureGradOffsetClampARB(sampler2DArray(t2D_0,samplerState_0), (vec3(_S1.hitDistance_0 * 0.20000000298023223877, _S1.hitDistance_0 * 0.30000001192092895508, 0.20000000298023223877)), (_S2), (_S2), (ivec2(0)), (0.5)));
- _S1.hitDistance_0 = _S2;
+ _S1.hitDistance_0 = dot(val_0, val_0);
return;
}
diff --git a/tests/bugs/vk-structured-buffer-load.hlsl.glsl b/tests/bugs/vk-structured-buffer-load.hlsl.glsl
index 7f3ec40a2..1d056944a 100644
--- a/tests/bugs/vk-structured-buffer-load.hlsl.glsl
+++ b/tests/bugs/vk-structured-buffer-load.hlsl.glsl
@@ -1,15 +1,10 @@
-// vk-structured-buffer-load.hlsl.glsl
-//TEST_IGNORE_FILE:
-
#version 460
#extension GL_NV_ray_tracing : require
layout(row_major) uniform;
layout(row_major) buffer;
-
layout(std430, binding = 1) readonly buffer _S1 {
float _data[];
} gParamBlock_sbuf_0;
-
float rcp_0(float x_0)
{
float _S2 = 1.0 / x_0;
@@ -36,37 +31,21 @@ void main()
_S3.PackedHitInfoA_0.x = HitT_0;
float offsfloat_0 = ((gParamBlock_sbuf_0)._data[(0)]);
-
uint use_rcp_0 = 0U | uint(HitT_0 > 0.0);
-
if(use_rcp_0 != 0U)
{
-
- float _S5 = rcp_0(offsfloat_0);
-
- _S3.PackedHitInfoA_0.y = _S5;
-
+ _S3.PackedHitInfoA_0.y = rcp_0(offsfloat_0);
}
else
{
-
if(use_rcp_0 > 0U&&offsfloat_0 == 0.0)
{
-
- float _S6 = (inversesqrt((offsfloat_0 + 1.0)));
-
- _S3.PackedHitInfoA_0.y = _S6;
-
+ _S3.PackedHitInfoA_0.y = (inversesqrt((offsfloat_0 + 1.0)));
}
else
{
- float _S7 = (inversesqrt((offsfloat_0)));
-
- _S3.PackedHitInfoA_0.y = _S7;
-
+ _S3.PackedHitInfoA_0.y = (inversesqrt((offsfloat_0)));
}
-
}
-
return;
}
diff --git a/tests/cross-compile/array-of-buffers.slang.glsl b/tests/cross-compile/array-of-buffers.slang.glsl
index 1f436fad0..21961afd1 100644
--- a/tests/cross-compile/array-of-buffers.slang.glsl
+++ b/tests/cross-compile/array-of-buffers.slang.glsl
@@ -1,8 +1,6 @@
-//TEST_IGNORE_FILE:
#version 450
layout(row_major) uniform;
layout(row_major) buffer;
-
struct SLANG_ParameterGroup_C_0
{
uint index_0;
@@ -13,7 +11,6 @@ layout(std140) uniform _S1
{
SLANG_ParameterGroup_C_0 _data;
} C_0;
-
struct S_0
{
vec4 f_0;
@@ -24,31 +21,26 @@ layout(std140) uniform _S2
{
S_0 _data;
} cb_0[3];
-
layout(std430, binding = 2) readonly buffer _S3 {
S_0 _data[];
} sb1_0[4];
-
layout(std430, binding = 3) buffer _S4 {
vec4 _data[];
} sb2_0[5];
-
layout(std430, binding = 4) readonly buffer _S5
{
uint _data[];
} bb_0[6];
-
layout(location = 0)
out vec4 _S6;
void main()
{
- S_0 _S7 = ((sb1_0[C_0._data.index_0])._data[(C_0._data.index_0)]);
- vec4 _S8 = cb_0[C_0._data.index_0]._data.f_0 + _S7.f_0;
- vec4 _S9 = _S8 + ((sb2_0[C_0._data.index_0])._data[(C_0._data.index_0)]);
- uint _S10 = ((bb_0[C_0._data.index_0])._data[(int(C_0._data.index_0 * 4U))/4]);
-
- _S6 = _S9 + vec4(float(_S10));
-
+ vec4 _S7 = cb_0[C_0._data.index_0]._data.f_0;
+ S_0 _S8 = ((sb1_0[C_0._data.index_0])._data[(C_0._data.index_0)]);
+ vec4 _S9 = _S7 + _S8.f_0;
+ vec4 _S10 = _S9 + ((sb2_0[C_0._data.index_0])._data[(C_0._data.index_0)]);
+ uint _S11 = ((bb_0[C_0._data.index_0])._data[(int(C_0._data.index_0 * 4U))/4]);
+ _S6 = _S10 + vec4(float(_S11));
return;
}
diff --git a/tests/cross-compile/array-of-buffers.slang.hlsl b/tests/cross-compile/array-of-buffers.slang.hlsl
index 501b9c6db..960957789 100644
--- a/tests/cross-compile/array-of-buffers.slang.hlsl
+++ b/tests/cross-compile/array-of-buffers.slang.hlsl
@@ -1,5 +1,3 @@
-//TEST_IGNORE_FILE:
-
#pragma pack_matrix(column_major)
#ifdef SLANG_HLSL_ENABLE_NVAPI
#include "nvHLSLExtns.h"
@@ -15,24 +13,24 @@ cbuffer C_0 : register(b0)
{
SLANG_ParameterGroup_C_0 C_0;
}
-
struct S_0
{
float4 f_0;
};
ConstantBuffer<S_0 > cb_0[int(3)] : register(b1);
+
StructuredBuffer<S_0 > sb1_0[int(4)] : register(t0);
+
RWStructuredBuffer<float4 > sb2_0[int(5)] : register(u0);
-ByteAddressBuffer bb_0[int(6)] : register(t4);
+ByteAddressBuffer bb_0[int(6)] : register(t4);
float4 main() : SV_TARGET
{
- S_0 _S1 = sb1_0[C_0.index_0][C_0.index_0];
-
- float4 _S2 = cb_0[C_0.index_0].f_0 + _S1.f_0;
- float4 _S3 = _S2 + sb2_0[C_0.index_0][C_0.index_0];
- uint _S4 = bb_0[C_0.index_0].Load(int(C_0.index_0 * 4U));
-
- return _S3 + (float4)float(_S4);
+ float4 _S1 = cb_0[C_0.index_0].f_0;
+ S_0 _S2 = sb1_0[C_0.index_0][C_0.index_0];
+ float4 _S3 = _S1 + _S2.f_0;
+ float4 _S4 = _S3 + sb2_0[C_0.index_0][C_0.index_0];
+ uint _S5 = bb_0[C_0.index_0].Load(int(C_0.index_0 * 4U));
+ return _S4 + (float4)float(_S5);
}
diff --git a/tests/cross-compile/glsl-generic-in.slang.glsl b/tests/cross-compile/glsl-generic-in.slang.glsl
index c8d9b1bd1..4bf0598d0 100644
--- a/tests/cross-compile/glsl-generic-in.slang.glsl
+++ b/tests/cross-compile/glsl-generic-in.slang.glsl
@@ -1,8 +1,6 @@
-//TEST_IGNORE_FILE:
#version 450
layout(row_major) uniform;
layout(row_major) buffer;
-
struct F_0
{
vec4 v0_0;
@@ -14,10 +12,9 @@ float F_get_0(F_0 this_0)
return this_0.v0_0.x + this_0.v1_0.x;
}
-
float E_get_0()
{
- return 1.00000000000000000000;
+ return 1.0;
}
layout(location = 0)
@@ -33,14 +30,11 @@ struct VOut_0
{
vec4 projPos_0;
};
-
void main()
{
F_0 _S4 = { _S2, _S3 };
VOut_0 vout_0;
- float _S5 = F_get_0(_S4);
- float _S6 = E_get_0();
- vout_0.projPos_0 = vec4(_S1, _S5 + _S6);
+ vout_0.projPos_0 = vec4(_S1, F_get_0(_S4) + E_get_0());
gl_Position = vout_0.projPos_0;
return;
-} \ No newline at end of file
+}
diff --git a/tests/cross-compile/half-conversion.slang.glsl b/tests/cross-compile/half-conversion.slang.glsl
index 58d20b4fc..fb51809b4 100644
--- a/tests/cross-compile/half-conversion.slang.glsl
+++ b/tests/cross-compile/half-conversion.slang.glsl
@@ -1,8 +1,6 @@
-//half-conversion.slang.glsl
-//TEST_IGNORE_FILE:
-
#version 450
-
+layout(row_major) uniform;
+layout(row_major) buffer;
struct SLANG_ParameterGroup_C_0
{
uvec4 u_0;
@@ -13,29 +11,30 @@ layout(std140) uniform _S1
{
SLANG_ParameterGroup_C_0 _data;
} C_0;
-
vec4 f16tof32_0(uvec4 value_0)
{
vec4 result_0;
- int i_0;
- i_0 = 0;
+ int i_0 = 0;
for(;;)
{
- if(i_0 < 4) {} else break;
-
- float _S2 = (unpackHalf2x16((value_0[i_0])).x);
- result_0[i_0] = _S2;
- i_0 = i_0 + int(1);
+ if(i_0 < 4)
+ {
+ }
+ else
+ {
+ break;
+ }
+ result_0[i_0] = (unpackHalf2x16((value_0[i_0])).x);
+ i_0 = i_0 + 1;
}
return result_0;
}
layout(location = 0)
-out vec4 _S3;
+out vec4 _S2;
void main()
{
- vec4 _S4 = f16tof32_0(C_0._data.u_0);
- _S3 = _S4;
+ _S2 = f16tof32_0(C_0._data.u_0);
return;
}
diff --git a/tests/cross-compile/sign.slang.glsl b/tests/cross-compile/sign.slang.glsl
index 7a3a37c51..44c015967 100644
--- a/tests/cross-compile/sign.slang.glsl
+++ b/tests/cross-compile/sign.slang.glsl
@@ -1,17 +1,12 @@
-//TEST_IGNORE_FILE:
#version 450
layout(row_major) uniform;
layout(row_major) buffer;
-#line 8 0
layout(location = 0)
out vec4 _S1;
-
-#line 8
void main()
{
- ivec4 _S2 = ivec4(sign(vec4(1.50000000000000000000, 1.00000000000000000000, -1.50000000000000000000, -1.00000000000000000000)));
- _S1 = vec4(_S2);
+ _S1 = vec4((ivec4(sign((vec4(1.5, 1.0, -1.5, -1.0))))));
return;
-} \ No newline at end of file
+}
diff --git a/tests/diagnostics/interfaces/anyvalue-size-validation.slang b/tests/diagnostics/interfaces/anyvalue-size-validation.slang
index 6c33b72a9..1ebf7f4c3 100644
--- a/tests/diagnostics/interfaces/anyvalue-size-validation.slang
+++ b/tests/diagnostics/interfaces/anyvalue-size-validation.slang
@@ -21,9 +21,11 @@ T test<T:IInterface>(T s)
return s;
}
+RWStructuredBuffer<uint> output;
+
[numthreads(4, 1, 1)]
void main()
{
S s;
- test(s);
+ output[0] = test(s).a;
} \ No newline at end of file
diff --git a/tests/experimental/liveness/liveness-2.slang.expected b/tests/experimental/liveness/liveness-2.slang.expected
index c742fa1fc..16883c1fd 100644
--- a/tests/experimental/liveness/liveness-2.slang.expected
+++ b/tests/experimental/liveness/liveness-2.slang.expected
@@ -51,9 +51,8 @@ layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;
void main()
{
int index_0 = int(gl_GlobalInvocationID.x);
- uint _S4 = uint(index_0);
- int _S5 = calcThing_0(index_0);
- ((outputBuffer_0)._data[(_S4)]) = _S5;
+ int _S4 = calcThing_0(index_0);
+ ((outputBuffer_0)._data[(uint(index_0))]) = _S4;
return;
}
diff --git a/tests/experimental/liveness/liveness-3.slang.expected b/tests/experimental/liveness/liveness-3.slang.expected
index 4dff6b37a..cb093a640 100644
--- a/tests/experimental/liveness/liveness-3.slang.expected
+++ b/tests/experimental/liveness/liveness-3.slang.expected
@@ -53,12 +53,15 @@ int calcThing_0(int offset_0)
idx_0[0] = 0;
idx_0[1] = 0;
idx_0[2] = 0;
+ int _S2 = (k_0 + 7) % 5;
+ bool _S3 = _S2 == 4;
+ int k_1 = k_0 + 1;
int i_0;
livenessStart_1(i_0, 0);
i_0 = 0;
- int _S2;
- livenessStart_1(_S2, 0);
- _S2 = _S1;
+ int _S4;
+ livenessStart_1(_S4, 0);
+ _S4 = _S1;
for(;;)
{
if(i_0 < 17)
@@ -70,74 +73,74 @@ int calcThing_0(int offset_0)
}
int modRange_0 = i_0 % 3;
another_0[i_0 & 1] = another_0[i_0 & 1] + modRange_0;
- int _S3 = i_0 % 3;
- int _S4;
- if(_S3 != 0)
+ int _S5 = i_0 % 3;
+ int _S6;
+ if(modRange_0 != 0)
{
- int _S5 = _S2;
- livenessEnd_0(_S2, 0);
- int _S6 = _S5 + 1;
- livenessStart_1(_S4, 0);
- _S4 = _S6;
+ int _S7 = _S4;
+ livenessEnd_0(_S4, 0);
+ int _S8 = _S7 + 1;
+ livenessStart_1(_S6, 0);
+ _S6 = _S8;
}
else
{
- int _S7 = _S2;
- livenessEnd_0(_S2, 0);
- livenessStart_1(_S4, 0);
- _S4 = _S7;
+ int _S9 = _S4;
+ livenessEnd_0(_S4, 0);
+ livenessStart_1(_S6, 0);
+ _S6 = _S9;
}
- idx_0[modRange_0] = idx_0[modRange_0] + (_S4 + i_0);
+ idx_0[modRange_0] = idx_0[modRange_0] + (_S6 + i_0);
i_0 = i_0 + 1;
- livenessStart_1(_S2, 0);
- int _S8 = _S4;
- livenessEnd_0(_S4, 0);
- _S2 = _S8;
+ livenessStart_1(_S4, 0);
+ int _S10 = _S6;
+ livenessEnd_0(_S6, 0);
+ _S4 = _S10;
}
livenessEnd_0(i_0, 0);
livenessEnd_0(_S1, 0);
- int _S9 = (k_0 + 7) % 5;
- if(_S9 == 4)
+ livenessEnd_0(k_0, 0);
+ if(_S3)
{
- livenessEnd_0(_S2, 0);
+ livenessEnd_0(_S4, 0);
livenessEnd_1(idx_0, 0);
- livenessEnd_0(k_0, 0);
livenessEnd_2(another_0, 0);
return total_0;
}
- int _S10 = idx_0[0] + idx_0[1];
- int _S11 = idx_0[2];
+ int _S11 = idx_0[0] + idx_0[1];
+ int _S12 = idx_0[2];
livenessEnd_1(idx_0, 0);
- int _S12 = _S10 + _S11;
- int _S13 = total_0;
+ int _S13 = _S11 + _S12;
+ int _S14 = total_0;
livenessEnd_0(total_0, 0);
- int total_1 = _S13 + _S12;
- k_0 = k_0 + 1;
+ int total_1 = _S14 + _S13;
+ livenessStart_1(k_0, 0);
+ k_0 = k_1;
livenessStart_1(_S1, 0);
- int _S14 = _S2;
- livenessEnd_0(_S2, 0);
- _S1 = _S14;
+ int _S15 = _S4;
+ livenessEnd_0(_S4, 0);
+ _S1 = _S15;
livenessStart_1(total_0, 0);
total_0 = total_1;
}
livenessEnd_0(_S1, 0);
livenessEnd_0(k_0, 0);
livenessEnd_2(another_0, 0);
- int _S15 = total_0;
+ int _S16 = total_0;
livenessEnd_0(total_0, 0);
- return - _S15;
+ return - _S16;
}
-layout(std430, binding = 0) buffer _S16 {
+layout(std430, binding = 0) buffer _S17 {
int _data[];
} outputBuffer_0;
layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;
void main()
{
int index_0 = int(gl_GlobalInvocationID.x);
- uint _S17 = uint(index_0);
- int _S18 = calcThing_0(index_0);
- ((outputBuffer_0)._data[(_S17)]) = _S18;
+ uint _S18 = uint(index_0);
+ int _S19 = calcThing_0(index_0);
+ ((outputBuffer_0)._data[(_S18)]) = _S19;
return;
}
diff --git a/tests/experimental/liveness/liveness-4.slang.expected b/tests/experimental/liveness/liveness-4.slang.expected
index cd97f8057..efc2e3846 100644
--- a/tests/experimental/liveness/liveness-4.slang.expected
+++ b/tests/experimental/liveness/liveness-4.slang.expected
@@ -36,6 +36,9 @@ int calcThing_0(int offset_0)
{
break;
}
+ int _S1 = (k_0 + 7) % 5;
+ bool _S2 = _S1 == 4;
+ int k_1 = k_0 + 1;
int i_0;
livenessStart_1(i_0, 0);
i_0 = 0;
@@ -52,30 +55,30 @@ int calcThing_0(int offset_0)
i_0 = i_0 + 1;
}
livenessEnd_0(i_0, 0);
- int _S1 = (k_0 + 7) % 5;
- if(_S1 == 4)
+ livenessEnd_0(k_0, 0);
+ if(_S2)
{
- livenessEnd_0(k_0, 0);
livenessEnd_1(another_0, 0);
return 1;
}
- k_0 = k_0 + 1;
+ livenessStart_1(k_0, 0);
+ k_0 = k_1;
}
livenessEnd_0(k_0, 0);
livenessEnd_1(another_0, 0);
return -2;
}
-layout(std430, binding = 0) buffer _S2 {
+layout(std430, binding = 0) buffer _S3 {
int _data[];
} outputBuffer_0;
layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;
void main()
{
int index_0 = int(gl_GlobalInvocationID.x);
- uint _S3 = uint(index_0);
- int _S4 = calcThing_0(index_0);
- ((outputBuffer_0)._data[(_S3)]) = _S4;
+ uint _S4 = uint(index_0);
+ int _S5 = calcThing_0(index_0);
+ ((outputBuffer_0)._data[(_S4)]) = _S5;
return;
}
diff --git a/tests/experimental/liveness/liveness-5.slang.expected b/tests/experimental/liveness/liveness-5.slang.expected
index 3693d3fde..e9fe9d652 100644
--- a/tests/experimental/liveness/liveness-5.slang.expected
+++ b/tests/experimental/liveness/liveness-5.slang.expected
@@ -39,6 +39,9 @@ int calcThing_0(int offset_0)
{
break;
}
+ int _S1 = (k_0 + 7) % 5;
+ bool _S2 = _S1 == 4;
+ int k_1 = k_0 + 1;
int i_0;
livenessStart_1(i_0, 0);
i_0 = 0;
@@ -55,17 +58,17 @@ int calcThing_0(int offset_0)
i_0 = i_0 + 1;
}
livenessEnd_0(i_0, 0);
- int _S1 = total_0;
+ livenessEnd_0(k_0, 0);
+ int _S3 = total_0;
livenessEnd_0(total_0, 0);
- int total_1 = _S1 + another_0[k_0 & 1];
- int _S2 = (k_0 + 7) % 5;
- if(_S2 == 4)
+ int total_1 = _S3 + another_0[k_0 & 1];
+ if(_S2)
{
- livenessEnd_0(k_0, 0);
livenessEnd_1(another_0, 0);
return 1;
}
- k_0 = k_0 + 1;
+ livenessStart_1(k_0, 0);
+ k_0 = k_1;
livenessStart_1(total_0, 0);
total_0 = total_1;
}
@@ -81,16 +84,16 @@ int calcThing_0(int offset_0)
return total_0;
}
-layout(std430, binding = 0) buffer _S3 {
+layout(std430, binding = 0) buffer _S4 {
int _data[];
} outputBuffer_0;
layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;
void main()
{
int index_0 = int(gl_GlobalInvocationID.x);
- uint _S4 = uint(index_0);
- int _S5 = calcThing_0(index_0);
- ((outputBuffer_0)._data[(_S4)]) = _S5;
+ uint _S5 = uint(index_0);
+ int _S6 = calcThing_0(index_0);
+ ((outputBuffer_0)._data[(_S5)]) = _S6;
return;
}
diff --git a/tests/experimental/liveness/liveness-6.slang.expected b/tests/experimental/liveness/liveness-6.slang.expected
index 9c3bae815..b661c09bf 100644
--- a/tests/experimental/liveness/liveness-6.slang.expected
+++ b/tests/experimental/liveness/liveness-6.slang.expected
@@ -43,6 +43,10 @@ int calcThing_0(int offset_0)
livenessStart_0(arr_0, 0);
arr_0[0] = 2;
arr_0[1] = 3;
+ int _S1 = k_0 & 1;
+ int _S2 = (k_0 + 7) % 5;
+ bool _S3 = _S2 == 4;
+ int k_1 = k_0 + 1;
int i_0;
livenessStart_1(i_0, 0);
i_0 = 0;
@@ -56,25 +60,24 @@ int calcThing_0(int offset_0)
break;
}
another_0[i_0 & 1] = another_0[i_0 & 1] + (k_0 + i_0);
- arr_0[k_0 & 1] = arr_0[k_0 & 1] + i_0;
+ arr_0[_S1] = arr_0[_S1] + i_0;
i_0 = i_0 + 1;
}
livenessEnd_0(i_0, 0);
- int _S1 = k_0 & 1;
- int _S2 = total_0;
+ livenessEnd_0(k_0, 0);
+ int _S4 = total_0;
livenessEnd_0(total_0, 0);
- int total_1 = _S2 + another_0[_S1];
- int _S3 = arr_0[_S1];
+ int total_1 = _S4 + another_0[_S1];
+ int _S5 = arr_0[_S1];
livenessEnd_1(arr_0, 0);
- int total_2 = total_1 + _S3;
- int _S4 = (k_0 + 7) % 5;
- if(_S4 == 4)
+ int total_2 = total_1 + _S5;
+ if(_S3)
{
- livenessEnd_0(k_0, 0);
livenessEnd_1(another_0, 0);
return 1;
}
- k_0 = k_0 + 1;
+ livenessStart_1(k_0, 0);
+ k_0 = k_1;
livenessStart_1(total_0, 0);
total_0 = total_2;
}
@@ -90,16 +93,16 @@ int calcThing_0(int offset_0)
return total_0;
}
-layout(std430, binding = 0) buffer _S5 {
+layout(std430, binding = 0) buffer _S6 {
int _data[];
} outputBuffer_0;
layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;
void main()
{
int index_0 = int(gl_GlobalInvocationID.x);
- uint _S6 = uint(index_0);
- int _S7 = calcThing_0(index_0);
- ((outputBuffer_0)._data[(_S6)]) = _S7;
+ uint _S7 = uint(index_0);
+ int _S8 = calcThing_0(index_0);
+ ((outputBuffer_0)._data[(_S7)]) = _S8;
return;
}
diff --git a/tests/experimental/liveness/liveness.slang.expected b/tests/experimental/liveness/liveness.slang.expected
index 4a81b8855..06809ffc3 100644
--- a/tests/experimental/liveness/liveness.slang.expected
+++ b/tests/experimental/liveness/liveness.slang.expected
@@ -21,6 +21,7 @@ void livenessEnd_1(spirv_by_reference int _0, spirv_literal int _1);
int someSlowFunc_0(int a_0)
{
uint _S1 = uint(a_0);
+ int _S2 = a_0 * 20;
uint v_0;
livenessStart_0(v_0, 0);
v_0 = _S1;
@@ -29,20 +30,20 @@ int someSlowFunc_0(int a_0)
i_0 = 0;
for(;;)
{
- if(i_0 < a_0 * 20)
+ if(i_0 < _S2)
{
}
else
{
break;
}
- uint _S2 = v_0 >> 1;
- uint _S3 = v_0;
+ uint _S3 = v_0 >> 1;
+ uint _S4 = v_0;
livenessEnd_0(v_0, 0);
- uint _S4 = (_S2 | _S3 << 31) * uint(i_0);
+ uint _S5 = (_S3 | _S4 << 31) * uint(i_0);
int i_1 = i_0 + 1;
livenessStart_0(v_0, 0);
- v_0 = _S4;
+ v_0 = _S5;
i_0 = i_1;
}
livenessEnd_1(i_0, 0);
@@ -58,12 +59,12 @@ struct SomeStruct_0
SomeStruct_0 makeSomeStruct_0()
{
- const int _S5[100] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
- SomeStruct_0 s_0 = { 0, 0, _S5 };
+ const int _S6[100] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ SomeStruct_0 s_0 = { 0, 0, _S6 };
return s_0;
}
-layout(std430, binding = 1) buffer _S6 {
+layout(std430, binding = 1) buffer _S7 {
int _data[];
} anotherBuffer_0;
int doThing_0(SomeStruct_0 s_1)
@@ -73,11 +74,12 @@ int doThing_0(SomeStruct_0 s_1)
int somethingElse_0(inout SomeStruct_0 s_2)
{
- s_2.x_0 = s_2.x_0 + 1;
- return s_2.x_0;
+ int _S8 = s_2.x_0 + 1;
+ s_2.x_0 = _S8;
+ return _S8;
}
-layout(std430, binding = 0) buffer _S7 {
+layout(std430, binding = 0) buffer _S9 {
int _data[];
} outputBuffer_0;
spirv_instruction(id = 256)
@@ -90,6 +92,12 @@ layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;
void main()
{
int index_0 = int(gl_GlobalInvocationID.x);
+ const int _S10[100] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ int _S11 = index_0 & 7;
+ SomeStruct_0 _S12 = makeSomeStruct_0();
+ int v_1 = someSlowFunc_0(index_0);
+ bool _S13 = (v_1 & 256) != 0;
+ int _S14 = v_1 & 3;
int i_2;
livenessStart_1(i_2, 0);
i_2 = 0;
@@ -105,23 +113,20 @@ void main()
{
break;
}
- int v_1 = someSlowFunc_0(index_0);
SomeStruct_0 s_3;
livenessStart_2(s_3, 0);
SomeStruct_0 t_0;
livenessStart_2(t_0, 0);
- SomeStruct_0 _S8 = makeSomeStruct_0();
- t_0 = _S8;
- const int _S9[100] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ t_0 = _S12;
SomeStruct_0 u_0;
- if((v_1 & 256) != 0)
+ if(_S13)
{
- s_3.x_0 = ((anotherBuffer_0)._data[(uint(v_1 & 3))]);
- t_0.x_0 = ((anotherBuffer_0)._data[(uint(v_1 & 3))]);
+ s_3.x_0 = ((anotherBuffer_0)._data[(uint(_S14))]);
+ t_0.x_0 = ((anotherBuffer_0)._data[(uint(_S14))]);
livenessStart_2(u_0, 0);
u_0.a_1 = 0;
u_0.x_0 = 0;
- u_0.c_0 = _S9;
+ u_0.c_0 = _S10;
}
else
{
@@ -129,37 +134,35 @@ void main()
livenessStart_2(x_1, 0);
x_1.a_1 = 0;
x_1.x_0 = 0;
- x_1.c_0 = _S9;
- x_1.x_0 = ((anotherBuffer_0)._data[(uint(v_1 & 3))]) + 1;
- SomeStruct_0 _S10 = x_1;
+ x_1.c_0 = _S10;
+ x_1.x_0 = ((anotherBuffer_0)._data[(uint(_S14))]) + 1;
+ SomeStruct_0 _S15 = x_1;
livenessEnd_2(x_1, 0);
livenessStart_2(u_0, 0);
- u_0 = _S10;
+ u_0 = _S15;
}
- s_3.c_0[index_0 & 7] = s_3.c_0[index_0 & 7] + 1;
- int _S11 = s_3.x_0 + t_0.x_0;
- SomeStruct_0 _S12 = u_0;
+ s_3.c_0[_S11] = s_3.c_0[_S11] + 1;
+ int _S16 = s_3.x_0 + t_0.x_0;
+ SomeStruct_0 _S17 = u_0;
livenessEnd_2(u_0, 0);
- int _S13 = _S11 + _S12.x_0;
- int _S14 = doThing_0(t_0);
- int _S15 = _S13 + _S14;
- int _S16 = somethingElse_0(t_0);
+ int _S18 = _S16 + _S17.x_0 + doThing_0(t_0);
+ int _S19 = somethingElse_0(t_0);
livenessEnd_2(t_0, 0);
- int _S17 = _S15 + _S16;
- int _S18 = s_3.c_0[2];
+ int _S20 = _S18 + _S19;
+ int _S21 = s_3.c_0[2];
livenessEnd_2(s_3, 0);
- int _S19 = _S17 + _S18;
- int _S20 = res_0;
+ int _S22 = _S20 + _S21;
+ int _S23 = res_0;
livenessEnd_1(res_0, 0);
- int res_1 = _S20 + _S19;
+ int res_1 = _S23 + _S22;
i_2 = i_2 + 1;
livenessStart_1(res_0, 0);
res_0 = res_1;
}
livenessEnd_1(i_2, 0);
- int _S21 = res_0;
+ int _S24 = res_0;
livenessEnd_1(res_0, 0);
- ((outputBuffer_0)._data[(uint(index_0))]) = _S21;
+ ((outputBuffer_0)._data[(uint(index_0))]) = _S24;
return;
}
diff --git a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.1.expected b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.1.expected
index 09e389c32..09c026980 100644
--- a/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.1.expected
+++ b/tests/hlsl-intrinsic/shader-execution-reordering/hit-object-make-hit.slang.1.expected
@@ -90,15 +90,23 @@ void main()
ray_2.TMin_0 = 0.00999999977648258209;
ray_2.Direction_0 = vec3(0.0, 1.0, 0.0);
ray_2.TMax_0 = 10000.0;
- RayDesc_0 _S10 = ray_2;
+ int _S10 = idx_0 * 2;
+ int _S11 = idx_0 * 3;
+ RayDesc_0 _S12 = ray_2;
hitObjectNV hitObj_0;
- hitObjectRecordHitWithIndexNV(hitObj_0, scene_0, int(uint(idx_0)), int(uint(idx_0 * 2)), int(uint(idx_0 * 3)), 0U, 0U, _S10.Origin_0, _S10.TMin_0, _S10.Direction_0, _S10.TMax_0, (0));
+ int _S13 = int(uint(idx_0));
+ int _S14 = int(uint(_S10));
+ int _S15 = int(uint(_S11));
+ hitObjectRecordHitWithIndexNV(hitObj_0, scene_0, _S13, _S14, _S15, 0U, 0U, _S12.Origin_0, _S12.TMin_0, _S12.Direction_0, _S12.TMax_0, (0));
uint r_3 = calcValue_0(hitObj_0);
- RayDesc_0 _S11 = ray_2;
+ RayDesc_0 _S16 = ray_2;
hitObjectNV hitObj_1;
- hitObjectRecordHitNV(hitObj_1, scene_0, int(uint(idx_0)), int(uint(idx_0 * 3)), int(uint(idx_0 * 2)), 0U, 0U, 4U, _S11.Origin_0, _S11.TMin_0, _S11.Direction_0, _S11.TMax_0, (0));
- uint _S12 = calcValue_0(hitObj_1);
- uint r_4 = r_3 + _S12;
+ int _S17 = int(uint(idx_0));
+ int _S18 = int(uint(_S11));
+ int _S19 = int(uint(_S10));
+ hitObjectRecordHitNV(hitObj_1, scene_0, _S17, _S18, _S19, 0U, 0U, 4U, _S16.Origin_0, _S16.TMin_0, _S16.Direction_0, _S16.TMax_0, (0));
+ uint _S20 = calcValue_0(hitObj_1);
+ uint r_4 = r_3 + _S20;
((outputBuffer_0)._data[(uint(idx_0))]) = r_4;
return;
}
diff --git a/tests/hlsl-intrinsic/vector-float.slang b/tests/hlsl-intrinsic/vector-float.slang
index b9cc6b9c8..de49bae73 100644
--- a/tests/hlsl-intrinsic/vector-float.slang
+++ b/tests/hlsl-intrinsic/vector-float.slang
@@ -13,6 +13,32 @@ typedef vector<Float, 3> FloatVector;
typedef vector<int, 3> IntVector;
typedef vector<uint, 3> UIntVector;
+void subf(inout FloatVector ft, FloatVector f, int idx, Float vf)
+{
+
+ ft += log(f + 10.0);
+ ft += log2(f * 3 + 2);
+
+ {
+ float v[] = { 1, 10, 100, 1000 };
+ ft += IntVector(log10(FloatVector(v[idx] + vf) + 0.5f));
+ }
+
+ ft += abs(f * 4 - 2.0f);
+
+ ft += min(0.5, f);
+ ft += max(f, 0.75);
+
+ ft += pow(0.5, f);
+
+ ft += smoothstep(0.2, 0.7, f);
+ ft += lerp(-100, 100, f);
+
+ ft += clamp(f, 0.1, 0.3);
+
+ ft += step(f, 0.5);
+}
+
[numthreads(4, 1, 1)]
void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
{
@@ -84,30 +110,8 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
ft += floor(f * 10 - 7.01);
ft += trunc(f * 7);
-
- ft += log(f + 10.0);
- ft += log2(f * 3 + 2);
-
-
- {
- float v[] = { 1, 10, 100, 1000 };
- ft += IntVector(log10(FloatVector(v[idx] + vf) + 0.5f));
- }
-
-
- ft += abs(f * 4 - 2.0f);
-
- ft += min(0.5, f);
- ft += max(f, 0.75);
- ft += pow(0.5, f);
-
- ft += smoothstep(0.2, 0.7, f);
- ft += lerp(-100, 100, f);
-
- ft += clamp(f, 0.1, 0.3);
-
- ft += step(f, 0.5);
+ subf(ft, f, idx, vf);
{
IntVector vi = asint(f - f) + idx;
diff --git a/tests/ir/loop-dce.slang b/tests/ir/loop-dce.slang
new file mode 100644
index 000000000..f89c1aa38
--- /dev/null
+++ b/tests/ir/loop-dce.slang
@@ -0,0 +1,40 @@
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj
+//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<uint> outputBuffer;
+
+__target_intrinsic(hlsl, "@")
+__target_intrinsic(glsl, "@")
+__target_intrinsic(cpp, "@")
+__target_intrinsic(cuda, "@")
+[__readNone]
+int produceSyntaxError() { return 0; }
+
+[numthreads(1, 1, 1)]
+void computeMain(uint3 dispatchThreadID: SV_DispatchThreadID)
+{
+ int sum = 0;
+ int array[100];
+ // Next, this loop will be removed because there is no use of `array`.
+ for (int i = 0; i < 100; i++)
+ {
+ // This loop must be removed, or we will fail downstream compilation.
+ array[i] = i + produceSyntaxError();
+ }
+
+ // First, this loop will be removed because there is no use of `sum`.
+ for (int i = 0; i < 100; i++)
+ {
+ // This loop must be removed, or we will fail downstream compilation.
+ if (i < 50)
+ {
+ sum += array[i] + produceSyntaxError();
+ }
+ else
+ {
+ sum += i * 2 + produceSyntaxError();
+ }
+ }
+ outputBuffer[0] = 1;
+}
diff --git a/tests/ir/loop-dce.slang.expected.txt b/tests/ir/loop-dce.slang.expected.txt
new file mode 100644
index 000000000..968ac3ef0
--- /dev/null
+++ b/tests/ir/loop-dce.slang.expected.txt
@@ -0,0 +1,4 @@
+1
+0
+0
+0
diff --git a/tests/nv-extensions/nv-ray-tracing-motion-blur.slang.glsl b/tests/nv-extensions/nv-ray-tracing-motion-blur.slang.glsl
index 724a0a241..bae5f361d 100644
--- a/tests/nv-extensions/nv-ray-tracing-motion-blur.slang.glsl
+++ b/tests/nv-extensions/nv-ray-tracing-motion-blur.slang.glsl
@@ -3,47 +3,38 @@
#extension GL_NV_ray_tracing_motion_blur : require
layout(row_major) uniform;
layout(row_major) buffer;
-
struct ReflectionRay_0
{
float color_0;
};
-
layout(location = 0)
rayPayloadEXT
ReflectionRay_0 p_0;
-
struct ShadowRay_0
{
float hitDistance_0;
};
-
layout(location = 1)
rayPayloadEXT
ShadowRay_0 p_1;
-
layout(binding = 0)
uniform texture2D samplerPosition_0;
-
layout(binding = 2)
uniform sampler sampler_0;
-
layout(binding = 1)
uniform texture2D samplerNormal_0;
-
struct Light_0
{
vec4 position_0;
vec4 color_1;
};
-
struct Uniforms_0
{
Light_0 light_0;
@@ -52,13 +43,11 @@ struct Uniforms_0
mat4x4 model_0;
};
-
layout(binding = 3)
layout(std140) uniform _S1
{
Uniforms_0 _data;
} ubo_0;
-
struct RayDesc_0
{
vec3 Origin_0;
@@ -67,115 +56,76 @@ struct RayDesc_0
float TMax_0;
};
-
void TraceMotionRay_0(accelerationStructureEXT AccelerationStructure_0, uint RayFlags_0, uint InstanceInclusionMask_0, uint RayContributionToHitGroupIndex_0, uint MultiplierForGeometryContributionToHitGroupIndex_0, uint MissShaderIndex_0, RayDesc_0 Ray_0, float CurrentTime_0, inout ShadowRay_0 Payload_0)
{
-
p_1 = Payload_0;
traceRayMotionNV(AccelerationStructure_0, RayFlags_0, InstanceInclusionMask_0, RayContributionToHitGroupIndex_0, MultiplierForGeometryContributionToHitGroupIndex_0, MissShaderIndex_0, Ray_0.Origin_0, Ray_0.TMin_0, Ray_0.Direction_0, Ray_0.TMax_0, CurrentTime_0, (1));
-
Payload_0 = p_1;
return;
}
-
layout(binding = 5)
uniform accelerationStructureEXT as_0;
-
float saturate_0(float x_0)
{
- float _S2 = clamp(x_0, 0.0, 1.0);
-
- return _S2;
+ return clamp(x_0, 0.0, 1.0);
}
-
void TraceRay_0(accelerationStructureEXT AccelerationStructure_1, uint RayFlags_1, uint InstanceInclusionMask_1, uint RayContributionToHitGroupIndex_1, uint MultiplierForGeometryContributionToHitGroupIndex_1, uint MissShaderIndex_1, RayDesc_0 Ray_1, inout ReflectionRay_0 Payload_1)
{
-
p_0 = Payload_1;
traceRayEXT(AccelerationStructure_1, RayFlags_1, InstanceInclusionMask_1, RayContributionToHitGroupIndex_1, MultiplierForGeometryContributionToHitGroupIndex_1, MissShaderIndex_1, Ray_1.Origin_0, Ray_1.TMin_0, Ray_1.Direction_0, Ray_1.TMax_0, (0));
-
Payload_1 = p_0;
return;
}
-
layout(rgba32f)
layout(binding = 4)
uniform image2D outputImage_0;
-
void main()
{
- uvec3 _S3 = ((gl_LaunchIDEXT));
-
- ivec2 launchID_0 = ivec2(_S3.xy);
- uvec3 _S4 = ((gl_LaunchSizeEXT));
-
- ivec2 launchSize_0 = ivec2(_S4.xy);
-
-
- float _S5 = (float(launchID_0.x) + 0.5) / float(launchSize_0.x);
- float _S6 = (float(launchID_0.y) + 0.5) / float(launchSize_0.y);
-
- vec2 inUV_0 = vec2(_S5, _S6);
-
- vec4 _S7 = (texture(sampler2D(samplerPosition_0,sampler_0), (inUV_0)));
-
- vec3 P_0 = _S7.xyz;
- vec4 _S8 = (texture(sampler2D(samplerNormal_0,sampler_0), (inUV_0)));
-
- vec3 N_0 = _S8.xyz * 2.0 - 1.0;
-
+ uvec3 _S2 = ((gl_LaunchIDEXT));
+ ivec2 launchID_0 = ivec2(_S2.xy);
+ uvec3 _S3 = ((gl_LaunchSizeEXT));
+ ivec2 launchSize_0 = ivec2(_S3.xy);
+
+ float _S4 = (float(launchID_0.x) + 0.5) / float(launchSize_0.x);
+ float _S5 = (float(launchID_0.y) + 0.5) / float(launchSize_0.y);
+ vec2 inUV_0 = vec2(_S4, _S5);
+ vec4 _S6 = (texture(sampler2D(samplerPosition_0,sampler_0), (inUV_0)));
+ vec3 P_0 = _S6.xyz;
+ vec4 _S7 = (texture(sampler2D(samplerNormal_0,sampler_0), (inUV_0)));
+ vec3 N_0 = _S7.xyz * 2.0 - 1.0;
vec3 lightDelta_0 = ubo_0._data.light_0.position_0.xyz - P_0;
float lightDist_0 = length(lightDelta_0);
vec3 L_0 = normalize(lightDelta_0);
- float _S9 = 1.0 / (lightDist_0 * lightDist_0);
-
+ float _S8 = 1.0 / (lightDist_0 * lightDist_0);
RayDesc_0 ray_0;
ray_0.Origin_0 = P_0;
ray_0.TMin_0 = 0.00000099999999747524;
ray_0.Direction_0 = lightDelta_0;
ray_0.TMax_0 = lightDist_0;
-
ShadowRay_0 shadowRay_0;
shadowRay_0.hitDistance_0 = 0.0;
-
-
TraceMotionRay_0(as_0, 1U, 255U, 0U, 0U, 2U, ray_0, 1.0, shadowRay_0);
-
float atten_0;
-
if(shadowRay_0.hitDistance_0 < lightDist_0)
{
-
atten_0 = 0.0;
-
}
else
{
-
- atten_0 = _S9;
-
+ atten_0 = _S8;
}
-
- vec3 _S10 = ubo_0._data.light_0.color_1.xyz;
-
- float _S11 = dot(N_0, L_0);
-
- float _S12 = saturate_0(_S11);
-
- vec3 color_2 = _S10 * _S12 * atten_0;
-
+ vec3 color_2 = ubo_0._data.light_0.color_1.xyz * saturate_0(dot(N_0, L_0)) * atten_0;
ReflectionRay_0 reflectionRay_0;
TraceRay_0(as_0, 1U, 255U, 0U, 0U, 2U, ray_0, reflectionRay_0);
-
imageStore((outputImage_0), ivec2((uvec2(launchID_0))), vec4(color_2 + reflectionRay_0.color_0, 1.0));
return;
-} \ No newline at end of file
+}
diff --git a/tests/pipeline/rasterization/fragment-shader-interlock.slang.glsl b/tests/pipeline/rasterization/fragment-shader-interlock.slang.glsl
index 84eba46f0..7f53576e9 100644
--- a/tests/pipeline/rasterization/fragment-shader-interlock.slang.glsl
+++ b/tests/pipeline/rasterization/fragment-shader-interlock.slang.glsl
@@ -1,10 +1,7 @@
-//TEST_IGNORE_FILE:
-
#version 450
#extension GL_ARB_fragment_shader_interlock : require
layout(row_major) uniform;
layout(row_major) buffer;
-
layout(rgba32f)
layout(binding = 0)
uniform image2D entryPointParams_texture_0;
@@ -17,15 +14,12 @@ out vec4 _S2;
void main()
{
- beginInvocationInterlockARB();
-
- vec2 _S3 = _S1.xy;
-
- vec4 _S4 = (imageLoad((entryPointParams_texture_0), ivec2((uvec2(_S3)))));
- imageStore((entryPointParams_texture_0), ivec2((uvec2(_S3))), _S4 + _S1);
+ beginInvocationInterlockARB();
+ uvec2 _S3 = uvec2(_S1.xy);
+ vec4 _S4 = (imageLoad((entryPointParams_texture_0), ivec2((_S3))));
+ imageStore((entryPointParams_texture_0), ivec2((_S3)), _S4 + _S1);
endInvocationInterlockARB();
-
_S2 = _S4;
return;
}
diff --git a/tests/pipeline/rasterization/mesh/passing-outputs.slang.glsl b/tests/pipeline/rasterization/mesh/passing-outputs.slang.glsl
index 31c2f0db2..1102a838e 100644
--- a/tests/pipeline/rasterization/mesh/passing-outputs.slang.glsl
+++ b/tests/pipeline/rasterization/mesh/passing-outputs.slang.glsl
@@ -160,16 +160,10 @@ void main()
d_0(gl_LocalInvocationIndex);
e_0(gl_LocalInvocationIndex);
}
- else
- {
- }
if(gl_LocalInvocationIndex < 1U)
{
gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0U, 1U, 2U);
}
- else
- {
- }
return;
}
diff --git a/tests/pipeline/ray-tracing/acceleration-structure-in-compute.slang.glsl b/tests/pipeline/ray-tracing/acceleration-structure-in-compute.slang.glsl
index f95321039..83797d2d5 100644
--- a/tests/pipeline/ray-tracing/acceleration-structure-in-compute.slang.glsl
+++ b/tests/pipeline/ray-tracing/acceleration-structure-in-compute.slang.glsl
@@ -1,6 +1,7 @@
#version 460
#extension GL_EXT_ray_tracing : require
-
+layout(row_major) uniform;
+layout(row_major) buffer;
int helper_0(accelerationStructureEXT a_0, int b_0)
{
return b_0;
diff --git a/tests/pipeline/ray-tracing/trace-ray-inline.slang.glsl b/tests/pipeline/ray-tracing/trace-ray-inline.slang.glsl
index 0364d2513..1c2bc8090 100644
--- a/tests/pipeline/ray-tracing/trace-ray-inline.slang.glsl
+++ b/tests/pipeline/ray-tracing/trace-ray-inline.slang.glsl
@@ -3,7 +3,6 @@
#extension GL_EXT_ray_query : require
layout(row_major) uniform;
layout(row_major) buffer;
-
struct SLANG_ParameterGroup_C_0
{
vec3 origin_0;
@@ -20,7 +19,6 @@ layout(std140) uniform _S1
{
SLANG_ParameterGroup_C_0 _data;
} C_0;
-
layout(binding = 0)
uniform accelerationStructureEXT myAccelerationStructure_0;
@@ -70,40 +68,32 @@ void myMiss_0(inout MyRayPayload_0 payload_4)
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
void main()
{
+ MyProceduralHitAttrs_0 committedProceduralAttrs_0;
rayQueryEXT query_0;
MyRayPayload_0 payload_5;
payload_5.value_1 = -1;
rayQueryInitializeEXT((query_0), (myAccelerationStructure_0), (C_0._data.rayFlags_0 | 512), (C_0._data.instanceMask_0), (C_0._data.origin_0), (C_0._data.tMin_0), (C_0._data.direction_0), (C_0._data.tMax_0));
-
- MyProceduralHitAttrs_0 committedProceduralAttrs_0;
-
for(;;)
{
-
bool _S2 = rayQueryProceedEXT(query_0);
-
if(!_S2)
{
break;
}
uint _S3 = (rayQueryGetIntersectionTypeEXT((query_0), false));
-
switch(_S3)
{
case 1U:
{
MyProceduralHitAttrs_0 candidateProceduralAttrs_0;
-
candidateProceduralAttrs_0.value_0 = 0;
float tHit_1 = 0.0;
bool _S4 = myProceduralIntersection_0(tHit_1, candidateProceduralAttrs_0);
-
if(_S4)
{
bool _S5 = myProceduralAnyHit_0(payload_5);
-
if(_S5)
{
rayQueryGenerateIntersectionEXT(query_0, tHit_1);
@@ -112,35 +102,22 @@ void main()
{
rayQueryTerminateEXT(query_0);
}
- else
- {
- }
-
committedProceduralAttrs_0 = _S6;
-
}
else
{
-
committedProceduralAttrs_0 = committedProceduralAttrs_0;
-
}
-
}
else
{
-
committedProceduralAttrs_0 = committedProceduralAttrs_0;
-
}
-
break;
}
case 0U:
{
-
bool _S7 = myTriangleAnyHit_0(payload_5);
-
if(_S7)
{
rayQueryConfirmIntersectionEXT(query_0);
@@ -148,12 +125,6 @@ void main()
{
rayQueryTerminateEXT(query_0);
}
- else
- {
- }
- }
- else
- {
}
break;
}
@@ -162,13 +133,8 @@ void main()
break;
}
}
-
- committedProceduralAttrs_0 = committedProceduralAttrs_0;
-
}
-
uint _S8 = (rayQueryGetIntersectionTypeEXT((query_0), true));
-
switch(_S8)
{
case 1U:
diff --git a/tests/pipeline/ray-tracing/trace-ray-inline.slang.hlsl b/tests/pipeline/ray-tracing/trace-ray-inline.slang.hlsl
index 97d972328..b0c798b2d 100644
--- a/tests/pipeline/ray-tracing/trace-ray-inline.slang.hlsl
+++ b/tests/pipeline/ray-tracing/trace-ray-inline.slang.hlsl
@@ -1,11 +1,14 @@
-// trace-ray-inline.slang.hlsl
-//TEST_IGNORE_FILE:
+#pragma pack_matrix(column_major)
+#ifdef SLANG_HLSL_ENABLE_NVAPI
+#include "nvHLSLExtns.h"
+#endif
+#pragma warning(disable: 3557)
struct SLANG_ParameterGroup_C_0
{
- vector<float,3> origin_0;
+ float3 origin_0;
float tMin_0;
- vector<float,3> direction_0;
+ float3 direction_0;
float tMax_0;
uint rayFlags_0;
uint instanceMask_0;
@@ -16,7 +19,6 @@ cbuffer C_0 : register(b0)
{
SLANG_ParameterGroup_C_0 C_0;
}
-
RaytracingAccelerationStructure myAccelerationStructure_0 : register(t0);
struct MyProceduralHitAttrs_0
@@ -62,170 +64,92 @@ void myMiss_0(inout MyRayPayload_0 payload_4)
return;
}
-
-[shader("compute")]
-[numthreads(1, 1, 1)]
-void main(vector<uint,3> tid_0 : SV_DISPATCHTHREADID)
+[shader("compute")][numthreads(1, 1, 1)]
+void main(uint3 tid_0 : SV_DISPATCHTHREADID)
{
- MyRayPayload_0 payload_5;
MyProceduralHitAttrs_0 committedProceduralAttrs_0;
- MyProceduralHitAttrs_0 committedProceduralAttrs_1;
- MyRayPayload_0 payload_6;
- MyProceduralHitAttrs_0 committedProceduralAttrs_2;
- MyRayPayload_0 payload_7;
- MyProceduralHitAttrs_0 committedProceduralAttrs_3;
RayQuery<int(512) > query_0;
- MyRayPayload_0 _S1 = { int(-1) };
+ MyRayPayload_0 payload_5;
+ payload_5.value_1 = int(-1);
RayDesc ray_0 = { C_0.origin_0, C_0.tMin_0, C_0.direction_0, C_0.tMax_0 };
query_0.TraceRayInline(myAccelerationStructure_0, C_0.rayFlags_0, C_0.instanceMask_0, ray_0);
-
- MyProceduralHitAttrs_0 _S2;
-
- payload_5 = _S1;
- committedProceduralAttrs_0 = _S2;
for(;;)
{
- bool _S3 = query_0.Proceed();
-
- if(!_S3)
+ bool _S1 = query_0.Proceed();
+ if(!_S1)
{
break;
}
- uint _S4 = query_0.CandidateType();
-
- switch(_S4)
+ uint _S2 = query_0.CandidateType();
+ switch(_S2)
{
- case (uint) int(1):
+ case 1U:
{
- MyProceduralHitAttrs_0 candidateProceduralAttrs_0 = { int(0) };
-
- float _S5;
-
- _S5 = 0.00000000000000000000;
-
- MyProceduralHitAttrs_0 _S6;
-
- _S6 = candidateProceduralAttrs_0;
-
- bool _S7 = myProceduralIntersection_0(_S5, _S6);
-
- float tHit_1 = _S5;
-
- MyProceduralHitAttrs_0 candidateProceduralAttrs_1 = _S6;
-
- if(_S7)
+ MyProceduralHitAttrs_0 candidateProceduralAttrs_0;
+ candidateProceduralAttrs_0.value_0 = int(0);
+ float tHit_1 = 0.0;
+ bool _S3 = myProceduralIntersection_0(tHit_1, candidateProceduralAttrs_0);
+ if(_S3)
{
- MyRayPayload_0 _S8;
-
- _S8 = payload_5;
-
- bool _S9 = myProceduralAnyHit_0(_S8);
-
- MyRayPayload_0 _S10 = _S8;
-
- if(_S9)
+ bool _S4 = myProceduralAnyHit_0(payload_5);
+ if(_S4)
{
query_0.CommitProceduralPrimitiveHit(tHit_1);
-
- if((bool) C_0.shouldStopAtFirstHit_0)
+ MyProceduralHitAttrs_0 _S5 = candidateProceduralAttrs_0;
+ if(C_0.shouldStopAtFirstHit_0 != 0U)
{
-
query_0.Abort();
}
- else
- {
- }
-
- committedProceduralAttrs_1 = candidateProceduralAttrs_1;
+ committedProceduralAttrs_0 = _S5;
}
else
{
- committedProceduralAttrs_1 = committedProceduralAttrs_0;
+ committedProceduralAttrs_0 = committedProceduralAttrs_0;
}
-
- payload_6 = _S10;
- committedProceduralAttrs_2 = committedProceduralAttrs_1;
}
else
{
- payload_6 = payload_5;
- committedProceduralAttrs_2 = committedProceduralAttrs_0;
+ committedProceduralAttrs_0 = committedProceduralAttrs_0;
}
-
- payload_7 = payload_6;
- committedProceduralAttrs_3 = committedProceduralAttrs_2;
break;
}
- case (uint) int(0):
+ case 0U:
{
- MyRayPayload_0 _S11;
- _S11 = payload_5;
-
- bool _S12 = myTriangleAnyHit_0(_S11);
- MyRayPayload_0 _S13 = _S11;
-
- if(_S12)
+ bool _S6 = myTriangleAnyHit_0(payload_5);
+ if(_S6)
{
query_0.CommitNonOpaqueTriangleHit();
- if((bool) C_0.shouldStopAtFirstHit_0)
+ if(C_0.shouldStopAtFirstHit_0 != 0U)
{
query_0.Abort();
}
- else
- {
- }
- }
- else
- {
}
-
- payload_7 = _S13;
- committedProceduralAttrs_3 = committedProceduralAttrs_0;
break;
}
default:
{
- payload_7 = payload_5;
- committedProceduralAttrs_3 = committedProceduralAttrs_0;
break;
}
}
-
- payload_5 = payload_7;
- committedProceduralAttrs_0 = committedProceduralAttrs_3;
}
-
- uint _S14 = query_0.CommittedStatus();
-
- switch(_S14)
+ uint _S7 = query_0.CommittedStatus();
+ switch(_S7)
{
- case (uint) int(1):
+ case 1U:
{
- MyRayPayload_0 _S15;
-
- _S15 = payload_5;
-
- myTriangleClosestHit_0(_S15);
+ myTriangleClosestHit_0(payload_5);
break;
}
- case (uint) int(2):
+ case 2U:
{
-
- MyRayPayload_0 _S16;
- _S16 = payload_5;
-
- myProceduralClosestHit_0(_S16, committedProceduralAttrs_0);
+ myProceduralClosestHit_0(payload_5, committedProceduralAttrs_0);
break;
}
- case (uint) int(0):
+ case 0U:
{
- MyRayPayload_0 _S17;
-
- _S17 = payload_5;
-
- myMiss_0(_S17);
+ myMiss_0(payload_5);
break;
}
default:
@@ -233,6 +157,5 @@ void main(vector<uint,3> tid_0 : SV_DISPATCHTHREADID)
break;
}
}
-
return;
}
diff --git a/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang.glsl b/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang.glsl
index 139d55518..fca1fc1fa 100644
--- a/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang.glsl
+++ b/tests/slang-extension/atomic-float-byte-address-buffer-cross.slang.glsl
@@ -2,7 +2,6 @@
#extension GL_EXT_shader_atomic_float : require
layout(row_major) uniform;
layout(row_major) buffer;
-
layout(std430, binding = 1) buffer _S1 {
float _data[];
} anotherBuffer_0;
@@ -11,7 +10,7 @@ layout(std430, binding = 0) buffer _S2 {
} _S3;
void RWByteAddressBuffer_InterlockedAddF32_0(uint _S4, float _S5, out float _S6)
{
- uint _S7 = _S4 / uint(4);
+ uint _S7 = _S4 / 4U;
float _S8 = (atomicAdd((((_S3)._data[(_S7)])), (_S5)));
_S6 = _S8;
return;
@@ -19,7 +18,7 @@ void RWByteAddressBuffer_InterlockedAddF32_0(uint _S4, float _S5, out float _S6)
void RWByteAddressBuffer_InterlockedAddF32_1(uint _S9, float _S10)
{
- uint _S11 = _S9 / uint(4);
+ uint _S11 = _S9 / 4U;
float _S12 = (atomicAdd((((_S3)._data[(_S11)])), (_S10)));
return;
}
@@ -28,11 +27,11 @@ layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in;
void main()
{
uint tid_0 = gl_GlobalInvocationID.x;
- int idx_0 = int(tid_0 & uint(3) ^ tid_0 >> 2);
+ uint _S13 = tid_0 >> 2;
+ int idx_0 = int(tid_0 & 3U ^ _S13);
float delta_0 = ((anotherBuffer_0)._data[(uint(idx_0 & 3))]);
- float previousValue_0;
- previousValue_0 = float(0);
- RWByteAddressBuffer_InterlockedAddF32_0(uint(idx_0 << 2), 1.00000000000000000000, previousValue_0);
- RWByteAddressBuffer_InterlockedAddF32_1(uint(int(tid_0 >> 2) << 2), delta_0);
+ float previousValue_0 = 0.0;
+ RWByteAddressBuffer_InterlockedAddF32_0(uint(idx_0 << 2), 1.0, previousValue_0);
+ RWByteAddressBuffer_InterlockedAddF32_1(uint(int(_S13) << 2), delta_0);
return;
}
diff --git a/tests/vkray/anyhit.slang.glsl b/tests/vkray/anyhit.slang.glsl
index eb39299c5..345dd6624 100644
--- a/tests/vkray/anyhit.slang.glsl
+++ b/tests/vkray/anyhit.slang.glsl
@@ -1,16 +1,8 @@
// anyhit.slang.glsl
#version 460
-
-#if USE_NV_RT
-#extension GL_NV_ray_tracing : require
-#define hitAttributeEXT hitAttributeNV
-#define rayPayloadInEXT rayPayloadInNV
-#define terminateRayEXT terminateRayNV
-#define ignoreIntersectionEXT ignoreIntersectionNV
-#else
#extension GL_EXT_ray_tracing : require
-#endif
-
+layout(row_major) uniform;
+layout(row_major) buffer;
struct Params_0
{
int mode_0;
@@ -47,16 +39,15 @@ void main()
float val_0 = textureLod(
sampler2D(gParams_alphaMap_0, gParams_sampler_0),
_S2.normal_0.xy,
- float(0)).x;
-
+ (0.0)).x;
- if(val_0 > float(0))
+ if(val_0 > 0.0)
{
- terminateRayEXT;
+ terminateRayEXT;;
}
else
{
- ignoreIntersectionEXT;
+ ignoreIntersectionEXT;;
}
}
diff --git a/tests/vkray/callable-caller.slang.glsl b/tests/vkray/callable-caller.slang.glsl
index 0b7a9677b..11049074f 100644
--- a/tests/vkray/callable-caller.slang.glsl
+++ b/tests/vkray/callable-caller.slang.glsl
@@ -1,18 +1,7 @@
-//TEST_IGNORE_FILE:
#version 460
#extension GL_NV_ray_tracing : require
layout(row_major) uniform;
layout(row_major) buffer;
-struct SLANG_ParameterGroup_C_0
-{
- uint shaderIndex_0;
-};
-
-layout(binding = 0)
-layout(std140) uniform _S1
-{
- SLANG_ParameterGroup_C_0 _data;
-} C_0;
struct MaterialPayload_0
{
vec4 albedo_0;
@@ -23,6 +12,16 @@ layout(location = 0)
callableDataNV
MaterialPayload_0 p_0;
+struct SLANG_ParameterGroup_C_0
+{
+ uint shaderIndex_0;
+};
+
+layout(binding = 0)
+layout(std140) uniform _S1
+{
+ SLANG_ParameterGroup_C_0 _data;
+} C_0;
void CallShader_0(uint shaderIndex_1, inout MaterialPayload_0 payload_0)
{
p_0 = payload_0;
@@ -38,7 +37,7 @@ uniform image2D gImage_0;
void main()
{
MaterialPayload_0 payload_1;
- payload_1.albedo_0 = vec4(0);
+ payload_1.albedo_0 = vec4(0.0);
uvec3 _S2 = ((gl_LaunchIDNV));
vec2 _S3 = vec2(_S2.xy);
uvec3 _S4 = ((gl_LaunchSizeNV));
diff --git a/tests/vkray/raygen.slang.glsl b/tests/vkray/raygen.slang.glsl
index e34f1f6e0..f86f67e82 100644
--- a/tests/vkray/raygen.slang.glsl
+++ b/tests/vkray/raygen.slang.glsl
@@ -76,8 +76,7 @@ uniform accelerationStructureEXT as_0;
float saturate_0(float x_0)
{
- float _S2 = clamp(x_0, 0.0, 1.0);
- return _S2;
+ return clamp(x_0, 0.0, 1.0);
}
layout(rgba32f)
@@ -86,24 +85,24 @@ uniform image2D outputImage_0;
void main()
{
- uvec3 _S3 = ((gl_LaunchIDEXT));
- float _S4 = float(_S3.x) + 0.5;
- uvec3 _S5 = ((gl_LaunchSizeEXT));
- float _S6 = _S4 / float(_S5.x);
- uvec3 _S7 = ((gl_LaunchIDEXT));
- float _S8 = float(_S7.y) + 0.5;
- uvec3 _S9 = ((gl_LaunchSizeEXT));
- float _S10 = _S8 / float(_S9.y);
- vec2 inUV_0 = vec2(_S6, _S10);
- vec4 _S11 = (texture(sampler2D(samplerPosition_0,sampler_0), (inUV_0)));
- vec3 P_0 = _S11.xyz;
- vec4 _S12 = (texture(sampler2D(samplerNormal_0,sampler_0), (inUV_0)));
- vec3 N_0 = _S12.xyz * 2.0 - 1.0;
+ uvec3 _S2 = ((gl_LaunchIDEXT));
+ float _S3 = float(_S2.x) + 0.5;
+ uvec3 _S4 = ((gl_LaunchSizeEXT));
+ float _S5 = _S3 / float(_S4.x);
+ uvec3 _S6 = ((gl_LaunchIDEXT));
+ float _S7 = float(_S6.y) + 0.5;
+ uvec3 _S8 = ((gl_LaunchSizeEXT));
+ float _S9 = _S7 / float(_S8.y);
+ vec2 inUV_0 = vec2(_S5, _S9);
+ vec4 _S10 = (texture(sampler2D(samplerPosition_0,sampler_0), (inUV_0)));
+ vec3 P_0 = _S10.xyz;
+ vec4 _S11 = (texture(sampler2D(samplerNormal_0,sampler_0), (inUV_0)));
+ vec3 N_0 = _S11.xyz * 2.0 - 1.0;
vec3 lightDelta_0 = ubo_0._data.light_0.position_0.xyz - P_0;
float lightDist_0 = length(lightDelta_0);
vec3 L_0 = normalize(lightDelta_0);
- float _S13 = 1.0 / (lightDist_0 * lightDist_0);
+ float _S12 = 1.0 / (lightDist_0 * lightDist_0);
RayDesc_0 ray_0;
ray_0.Origin_0 = P_0;
ray_0.TMin_0 = 0.00000099999999747524;
@@ -120,17 +119,14 @@ void main()
}
else
{
- atten_0 = _S13;
+ atten_0 = _S12;
}
- vec3 _S14 = ubo_0._data.light_0.color_1.xyz;
- float _S15 = dot(N_0, L_0);
- float _S16 = saturate_0(_S15);
- vec3 color_2 = _S14 * _S16 * atten_0;
+ vec3 color_2 = ubo_0._data.light_0.color_1.xyz * saturate_0(dot(N_0, L_0)) * atten_0;
ReflectionRay_0 reflectionRay_0;
TraceRay_1(as_0, 1U, 255U, 0U, 0U, 2U, ray_0, reflectionRay_0);
vec3 color_3 = color_2 + reflectionRay_0.color_0;
- uvec3 _S17 = ((gl_LaunchIDEXT));
- imageStore((outputImage_0), ivec2((uvec2(ivec2(_S17.xy)))), vec4(color_3, 1.0));
+ uvec3 _S13 = ((gl_LaunchIDEXT));
+ imageStore((outputImage_0), ivec2((uvec2(ivec2(_S13.xy)))), vec4(color_3, 1.0));
return;
}