summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--source/slang/slang-emit.cpp8
-rw-r--r--source/slang/slang-ir-legalize-global-values.cpp1
-rw-r--r--source/slang/slang-target.h3
-rw-r--r--source/slang/slang-type-layout.cpp6
-rw-r--r--tests/bugs/gh-4874.slang3
5 files changed, 19 insertions, 2 deletions
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index d5e89b1fe..bdd7dfe10 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -1270,7 +1270,13 @@ Result linkAndOptimizeIR(
// For CUDA targets, always inline global constants to avoid dynamic initialization
// of __device__ variables rejected by NVRTC. This runs independently of the broader
// resource/existential type legalization, which remains disabled for CUDA.
- if (target == CodeGenTarget::CUDASource || options.shouldLegalizeExistentialAndResourceTypes)
+ //
+ // We also need this pass on the CPU targets in shader mode, as global
+ // constants may reference global parameters, which can't be emitted as
+ // constants.
+ if (target == CodeGenTarget::CUDASource ||
+ (isCPUTarget(targetRequest) && isKernelTarget(target)) ||
+ options.shouldLegalizeExistentialAndResourceTypes)
{
inlineGlobalConstantsForLegalization(irModule);
}
diff --git a/source/slang/slang-ir-legalize-global-values.cpp b/source/slang/slang-ir-legalize-global-values.cpp
index 55676f2d1..f7152d561 100644
--- a/source/slang/slang-ir-legalize-global-values.cpp
+++ b/source/slang/slang-ir-legalize-global-values.cpp
@@ -129,6 +129,7 @@ bool GlobalInstInliningContextGeneric::isInlinableGlobalInst(IRInst* inst)
case kIROp_Neq:
case kIROp_Eql:
case kIROp_Call:
+ case kIROp_Load:
return true;
default:
if (isInlinableGlobalInstForTarget(inst))
diff --git a/source/slang/slang-target.h b/source/slang/slang-target.h
index 8fa1f83f6..f9f653406 100644
--- a/source/slang/slang-target.h
+++ b/source/slang/slang-target.h
@@ -83,6 +83,9 @@ bool isCPUTarget(TargetRequest* targetReq);
bool isWGPUTarget(TargetRequest* targetReq);
bool isWGPUTarget(CodeGenTarget target);
+// Are we generating code for a Kernel-style target (as opposed to host-style target)
+bool isKernelTarget(CodeGenTarget codeGenTarget);
+
/// A request to generate output in some target format.
class TargetRequest : public RefObject
{
diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp
index 5bbcd2eb1..2daf33fdf 100644
--- a/source/slang/slang-type-layout.cpp
+++ b/source/slang/slang-type-layout.cpp
@@ -2712,6 +2712,12 @@ bool isWGPUTarget(TargetRequest* targetReq)
return isWGPUTarget(targetReq->getTarget());
}
+bool isKernelTarget(CodeGenTarget codeGenTarget)
+{
+ return ArtifactDescUtil::makeDescForCompileTarget(asExternal(codeGenTarget)).style ==
+ ArtifactStyle::Kernel;
+}
+
SourceLanguage getIntermediateSourceLanguageForTarget(TargetProgram* targetProgram)
{
// If we are emitting directly, there is no intermediate source language
diff --git a/tests/bugs/gh-4874.slang b/tests/bugs/gh-4874.slang
index 403f6fc50..53a2aaf38 100644
--- a/tests/bugs/gh-4874.slang
+++ b/tests/bugs/gh-4874.slang
@@ -6,6 +6,7 @@
//
//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -shaderobj
//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -shaderobj -vk
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -shaderobj -cpu
//
//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer
RWStructuredBuffer<uint> outputBuffer;
@@ -47,4 +48,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
// CHECK: 432
// CHECK: 543
// CHECK: 654
-} \ No newline at end of file
+}