//TEST:SIMPLE(filecheck=SPV): -target spirv -O0 //TEST:SIMPLE(filecheck=CUDA): -target cuda -entry compute_main -stage compute //TEST:SIMPLE(filecheck=PTX): -target ptx -entry compute_main -stage compute // Check that we can specialize buffer loads through user pointers, and // do not load big struct elements into registers unnecessarily. struct Bottom { float bigArray[1024]; float bottomGetValue(int index) { return bigArray[index]; } } struct Middle { Bottom bottom; float middleGetValue(int index) { return bottom.bottomGetValue(index); } } struct Top { StructuredBuffer.Handle middle; // Calling `middleGetValue` on `middle[0]` should not causing the entire `Middle` // struct to be loaded into registers. Instead, we should be able to specialize // `middleGetValue` to take a `Middle*` and recursively specialize `bottomGetValue` // to only load the `Bottom.bigArray[index]` element. float topGetValue(int index) { return middle[0].middleGetValue(index); } } struct Root { Top top; } ConstantBuffer cb; RWStructuredBuffer outputBuffer; // Check that the generated CUDA code never loads a `Middle` or `Bottom` struct into a local var. // CUDA-NOT: Middle{{[_A-Za-z0-9]*}} {{[a-zA-Z0-9_]+}} = // CUDA-NOT: Bottom{{[_A-Za-z0-9]*}} {{[a-zA-Z0-9_]+}} = // CUDA-NOT: Top{{[_A-Za-z0-9]*}} {{[a-zA-Z0-9_]+}} = // Check that the generated CUDA code can be compiled by nvrtc correctly into PTX. // PTX: compute_main // Check that the generated (unoptimized) SPIR-V contains a specialized Bottom_bottomGetValue function // that takes in a Bottom* and use access chain to load the required array element directly, without // needing to load the entire Bottom struct. // SPV: %Bottom_bottomGetValue = OpFunction %float None // SPV: OpFunctionParameter %_ptr_PhysicalStorageBuffer_Middle_natural // SPV: %[[INDEX:[A-Za-z0-9_]+]] = OpFunctionParameter %int // SPV: %[[PTR:[A-Za-z0-9_]+]] = OpAccessChain %_ptr_PhysicalStorageBuffer_float %{{.*}} %[[INDEX]] // SPV: %[[VALUE:[A-Za-z0-9_]+]] = OpLoad %float %[[PTR]] // SPV: OpReturnValue %[[VALUE]] [shader("compute")] [numthreads(1, 1, 1)] void compute_main(uint3 tid: SV_DispatchThreadID) { outputBuffer[0] = cb.top.topGetValue(0); }