From 3d0f5ee55788dca324641ae9268ee37dc4d7d9d5 Mon Sep 17 00:00:00 2001 From: "Harsh Aggarwal (NVIDIA)" Date: Wed, 10 Sep 2025 17:31:36 +0530 Subject: CUDA: Fix compiler crash with unsized array field - nonuniformres-as-… (#8380) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …function-parameter.slang #8315 Root Cause: CUDA compilation crashed with `assert failure: !seenFinalUnsizedArrayField` because unsized arrays like `RWStructuredBuffer globalBuffer[]` were not the final field in generated parameter structs, violating the layout constraint in slang-ir-layout.cpp. Fix: Extended `collectGlobalUniformParameters` to automatically reorder struct fields for CUDA targets - regular fields first, unsized arrays last. Other targets preserve original order. Impact: - Enables CUDA support for nonuniform resource indexing as function parameters - Zero impact on existing GLSL/HLSL/SPIRV targets - Automatic handling - no manual parameter reordering required Files: slang-emit.cpp, slang-ir-collect-global-uniforms.cpp/.h, test file --------- Co-authored-by: slangbot Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com> Co-authored-by: Ellie Hermaszewska --- source/slang/slang-emit.cpp | 2 +- source/slang/slang-ir-collect-global-uniforms.cpp | 49 +++++++++++++++++++++- source/slang/slang-ir-collect-global-uniforms.h | 5 ++- .../nonuniformres-as-function-parameter.slang | 5 +++ 4 files changed, 57 insertions(+), 4 deletions(-) diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index f5b818c5d..31e6d17be 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -802,7 +802,7 @@ Result linkAndOptimizeIR( // can assume that all ordinary/uniform data is strictly // passed using constant buffers. // - collectGlobalUniformParameters(irModule, outLinkedIR.globalScopeVarLayout); + collectGlobalUniformParameters(irModule, outLinkedIR.globalScopeVarLayout, target); #if 0 dumpIRIfEnabled(codeGenContext, irModule, "GLOBAL UNIFORMS COLLECTED"); #endif diff --git a/source/slang/slang-ir-collect-global-uniforms.cpp b/source/slang/slang-ir-collect-global-uniforms.cpp index fb2a2233c..aad0de44f 100644 --- a/source/slang/slang-ir-collect-global-uniforms.cpp +++ b/source/slang/slang-ir-collect-global-uniforms.cpp @@ -49,6 +49,7 @@ struct CollectGlobalUniformParametersContext // IRModule* module; IRVarLayout* globalScopeVarLayout; + CodeGenTarget target = CodeGenTarget::Unknown; IRGlobalParam* _getGlobalParamFromLayoutFieldKey(IRInst* key) { @@ -174,7 +175,47 @@ struct CollectGlobalUniformParametersContext // parameters that were present in the layout information (they are // represented as the fields of the global-scope `struct` layout). // - for (auto fieldLayoutAttr : globalParamsStructTypeLayout->getFieldLayoutAttrs()) + // For CUDA targets, we need to ensure unsized arrays come last to satisfy + // the layout constraint in slang-ir-layout.cpp + auto fieldAttrs = globalParamsStructTypeLayout->getFieldLayoutAttrs(); + + // Create ordered field list - for CUDA, put unsized arrays last + List orderedFields; + + if (target == CodeGenTarget::CUDASource) + { + // For CUDA: separate regular and unsized array fields + List regularFields; + List unsizedArrayFields; + + for (auto fieldLayoutAttr : fieldAttrs) + { + auto globalParam = + _getGlobalParamFromLayoutFieldKey(fieldLayoutAttr->getFieldKey()); + if (globalParam && as(globalParam->getDataType())) + { + unsizedArrayFields.add(fieldLayoutAttr); + } + else + { + regularFields.add(fieldLayoutAttr); + } + } + + // Add regular fields first, then unsized arrays + for (auto field : regularFields) + orderedFields.add(field); + for (auto field : unsizedArrayFields) + orderedFields.add(field); + } + else + { + // For other targets: preserve original order + for (auto field : fieldAttrs) + orderedFields.add(field); + } + + for (auto fieldLayoutAttr : orderedFields) { // We expect the IR layout pass to have encoded field per-field // layout so that the "key" for the field is the corresponding @@ -339,11 +380,15 @@ struct CollectGlobalUniformParametersContext } }; -void collectGlobalUniformParameters(IRModule* module, IRVarLayout* globalScopeVarLayout) +void collectGlobalUniformParameters( + IRModule* module, + IRVarLayout* globalScopeVarLayout, + CodeGenTarget target) { CollectGlobalUniformParametersContext context; context.module = module; context.globalScopeVarLayout = globalScopeVarLayout; + context.target = target; context.processModule(); } diff --git a/source/slang/slang-ir-collect-global-uniforms.h b/source/slang/slang-ir-collect-global-uniforms.h index 76f56f074..5f9393a9b 100644 --- a/source/slang/slang-ir-collect-global-uniforms.h +++ b/source/slang/slang-ir-collect-global-uniforms.h @@ -11,6 +11,9 @@ struct IRVarLayout; /// Collect global-scope shader parameters that use uniform/ordinary /// storage into a single `struct` (possibly wrapped in a constant buffer). /// -void collectGlobalUniformParameters(IRModule* module, IRVarLayout* globalScopeVarLayout); +void collectGlobalUniformParameters( + IRModule* module, + IRVarLayout* globalScopeVarLayout, + CodeGenTarget target = CodeGenTarget::Unknown); } // namespace Slang diff --git a/tests/compute/nonuniformres-as-function-parameter.slang b/tests/compute/nonuniformres-as-function-parameter.slang index 38fb2a478..fcf8598f6 100644 --- a/tests/compute/nonuniformres-as-function-parameter.slang +++ b/tests/compute/nonuniformres-as-function-parameter.slang @@ -2,6 +2,7 @@ //TEST:SIMPLE(filecheck=CHECK_GLSL_SPV):-target spirv -entry main -stage compute -emit-spirv-via-glsl //TEST:SIMPLE(filecheck=CHECK_GLSL):-target glsl -entry main -stage compute //TEST:SIMPLE(filecheck=CHECK_HLSL):-target hlsl -entry main -stage compute +//TEST:SIMPLE(filecheck=CHECK_CUDA):-target cuda -entry main -stage compute RWStructuredBuffer globalBuffer[] : register(u0, space1); RWStructuredBuffer outputBuffer; @@ -81,6 +82,7 @@ void main(uint2 pixelIndex : SV_DispatchThreadID) // CHECK_GLSL: func_0({{.*}}nonuniformEXT({{.*}})) // CHECK_HLSL: func_0(globalBuffer_0[NonUniformResourceIndex({{.*}})]) + // CHECK_CUDA: func_{{[0-9]+}}(globalParams_{{[0-9]+}}->globalBuffer_{{[0-9]+}}[{{.*}}]) MyStruct myStruct = func(buffer); int bufferIdx2 = pixelIndex.y; @@ -103,6 +105,7 @@ void main(uint2 pixelIndex : SV_DispatchThreadID) // CHECK_GLSL: func_1({{.*}}nonuniformEXT({{.*}})) // CHECK_HLSL: func_0(globalBuffer_0[NonUniformResourceIndex({{.*}})]) + // CHECK_CUDA: func_{{[0-9]+}}(globalParams_{{[0-9]+}}->globalBuffer_{{[0-9]+}}[{{.*}}]) MyStruct myStruct2 = func(buffer2); // Test case 3: Test the case that we handle the uniformity correctly, the NonUniformResourceIndex will not propagate @@ -114,6 +117,7 @@ void main(uint2 pixelIndex : SV_DispatchThreadID) // Test to make sure this command is not decorated with NonUniform: // CHECK_SPV-NOT: OpDecorate %[[VAR4]] NonUniform + // CHECK_CUDA: func_{{[0-9]+}}(globalParams_{{[0-9]+}}->globalBuffer_{{[0-9]+}}[{{.*}}]) MyStruct myStruct3 = func(buffer3); @@ -130,6 +134,7 @@ void main(uint2 pixelIndex : SV_DispatchThreadID) // // Since after the nested cast, the index data type is 'uint' now, make sure it calls the same function as the test case 1. // CHECK_GLSL: func_0({{.*}}nonuniformEXT({{.*}})) + // CHECK_CUDA: func_{{[0-9]+}}(globalParams_{{[0-9]+}}->globalBuffer_{{[0-9]+}}[{{.*}}]) RWStructuredBuffer buffer4 = globalBuffer[(uint)((int)NonUniformResourceIndex(bufferIdx))]; MyStruct myStruct4 = func(buffer4); -- cgit v1.2.3