summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-01-29 15:17:33 -0500
committerGitHub <noreply@github.com>2020-01-29 15:17:33 -0500
commit415409fc10cfd0d6b2eb805df8f37bdabc4f2405 (patch)
treeec5f5cabb6b2a2f88d8f447ffa1378846b414a85
parent2c8b983cf20ba662e351813f3f432b65eef3530c (diff)
Feature/target intrinsic fold (#1190)
* When checking if an instruction can be folded, take into account if it's called by a target intrinsic, because if it is we need to check if the parameter is accessed multiple times to see if it's worth allowing to fold. * Tidy up code around folding/target intrinsics. * Fix texture-load.slang . * Fix typo in assert.
-rw-r--r--source/slang/slang-emit-c-like.cpp52
-rw-r--r--tests/cross-compile/texture-load.slang.glsl6
2 files changed, 56 insertions, 2 deletions
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index d630d18c9..48ca6009b 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -900,6 +900,8 @@ bool CLikeSourceEmitter::shouldFoldInstIntoUseSites(IRInst* inst)
}
}
+
+
// If the instruction is at global scope, then it might represent
// a constant (e.g., the value of an enum case).
//
@@ -940,6 +942,56 @@ bool CLikeSourceEmitter::shouldFoldInstIntoUseSites(IRInst* inst)
auto user = use->getUser();
+ // Check if the use is a call using a target intrinsic that uses the parameter more than once
+ // in the intrinsic definition.
+ if (auto callInst = as<IRCall>(user))
+ {
+ const auto funcValue = callInst->getCallee();
+
+ // Let's see if this instruction is a intrinsic call
+ // This is significant, because we can within a target intrinsics definition multiple accesses to the same
+ // parameter. This is not indicated into the call, and can lead to output code computes something multiple
+ // times as it is folding into the expression of the the target intrinsic, which we don't want.
+ if (auto targetIntrinsicDecoration = findTargetIntrinsicDecoration(funcValue))
+ {
+ // Find the index of the original instruction, to see if it's multiply used.
+ IRUse* args = callInst->getArgs();
+ const Index paramIndex = Index(use - args);
+ SLANG_ASSERT(paramIndex >= 0 && paramIndex < Index(callInst->getArgCount()));
+
+ // Look through the slice to seeing how many times this parameters is used (signified via the $0...$9)
+ {
+ UnownedStringSlice slice = targetIntrinsicDecoration->getDefinition();
+
+ const char* cur = slice.begin();
+ const char* end = slice.end();
+
+ // Count the amount of uses
+ Index useCount = 0;
+ while (cur < end)
+ {
+ const char c = *cur;
+ if (c == '$' && cur + 1 < end && cur[1] >= '0' && cur[1] <= '9')
+ {
+ const Index index = Index(cur[1] - '0');
+ useCount += Index(index == paramIndex);
+ cur += 2;
+ }
+ else
+ {
+ cur++;
+ }
+ }
+
+ // If there is more than one use can't fold.
+ if (useCount > 1)
+ {
+ return false;
+ }
+ }
+ }
+ }
+
// We'd like to figure out if it is safe to fold our instruction into `user`
// First, let's make sure they are in the same block/parent:
diff --git a/tests/cross-compile/texture-load.slang.glsl b/tests/cross-compile/texture-load.slang.glsl
index 23646db91..bb4514bad 100644
--- a/tests/cross-compile/texture-load.slang.glsl
+++ b/tests/cross-compile/texture-load.slang.glsl
@@ -30,10 +30,12 @@ uniform image2D outputTexture_0;
layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in;
void main()
{
+ ivec3 _S2 = ivec3(C_0._data.pos_0, 0);
+
vec2 tmp_0 = texelFetch(
inputTexture_0,
- ivec3(C_0._data.pos_0, 0).xy,
- ivec3(C_0._data.pos_0, 0).z).xy;
+ _S2.xy,
+ _S2.z).xy;
imageStore(
outputTexture_0,