diff options
| author | Julius Ikkala <julius.ikkala@gmail.com> | 2025-10-09 02:13:27 +0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-10-08 23:13:27 +0000 |
| commit | 1e4265edd4ec4c44e3d8f209fca802727076aa46 (patch) | |
| tree | 811e872cfa374e1a3859d940431f687f628814db | |
| parent | 54d9b345bff4b01949e875366cb1e7cf1c021c61 (diff) | |
Allow 1D SV_DispatchThreadID in CPU targets (#8612)
The varying param legalization pass didn't deal with this 1D form of
SV_DispatchThreadID for CPU targets:
```slang
void computeMain(int i : SV_DispatchThreadID)
```
Instead, it just overrode the type of `i` with a `uint3`, breaking lots
of code that attempted to use `i` for something, like a `switch`
statement for example.
I ran across this when going through `language-feature` tests for the
LLVM target, which will also use this legalization pass. I'm separately
submitting this now because this also fixes the existing CPU target. The
test I enable in this PR is one that was previously generating broken
code on CPU.
(somewhat related issue: #7468)
| -rw-r--r-- | source/slang/slang-ir-legalize-varying-params.cpp | 171 | ||||
| -rw-r--r-- | tests/cuda/dispatch-thread-id-extraction.slang | 9 | ||||
| -rw-r--r-- | tests/language-feature/enums/enum-switch-2.slang | 1 | ||||
| -rw-r--r-- | tests/language-feature/system-value-extraction.slang | 31 |
4 files changed, 104 insertions, 108 deletions
diff --git a/source/slang/slang-ir-legalize-varying-params.cpp b/source/slang/slang-ir-legalize-varying-params.cpp index 39b9b3dd3..5f6c7a34e 100644 --- a/source/slang/slang-ir-legalize-varying-params.cpp +++ b/source/slang/slang-ir-legalize-varying-params.cpp @@ -290,6 +290,46 @@ IRInst* emitCalcGroupIndex(IRBuilder& builder, IRInst* groupThreadID, IRInst* gr return offset; } +IRInst* tryConvertValue(IRBuilder& builder, IRInst* val, IRType* toType) +{ + auto fromType = val->getFullType(); + if (auto fromVector = as<IRVectorType>(fromType)) + { + if (auto toVector = as<IRVectorType>(toType)) + { + if (fromVector->getElementCount() != toVector->getElementCount()) + { + fromType = builder.getVectorType( + fromVector->getElementType(), + toVector->getElementCount()); + val = builder.emitVectorReshape(fromType, val); + } + } + else if (as<IRBasicType>(toType)) + { + UInt index = 0; + val = builder.emitSwizzle(fromVector->getElementType(), val, 1, &index); + if (toType->getOp() == kIROp_VoidType) + return nullptr; + } + } + else if (auto fromBasicType = as<IRBasicType>(fromType)) + { + if (fromBasicType->getOp() == kIROp_VoidType) + return nullptr; + if (!as<IRBasicType>(toType)) + return nullptr; + if (toType->getOp() == kIROp_VoidType) + return nullptr; + } + else + { + return nullptr; + } + return builder.emitCast(toType, val); +} + + /// Context for the IR pass that legalizing entry-point /// varying parameters for a target. /// @@ -1018,6 +1058,25 @@ protected: return LegalizedVaryingVal(); } + + LegalizedVaryingVal createLegalizedSystemVaryingValInst( + VaryingParamInfo const& info, + IRInst* id) + { + IRType* paramType = info.type; + + // CUDA and C++ targets wrap parameters in a BorrowInParamType, but that + // may not always be the case for every target. + if (auto ptr = as<IRBorrowInParamType>(info.type)) + paramType = ptr->getValueType(); + + IRBuilder builder(m_module); + builder.setInsertBefore(m_firstOrdinaryInst); + + auto converted = tryConvertValue(builder, id, as<IRType>(paramType)); + + return LegalizedVaryingVal::makeValue(converted); + } }; // With the target-independent core of the pass out of the way, we can @@ -1274,13 +1333,13 @@ struct CUDAEntryPointVaryingParamLegalizeContext : EntryPointVaryingParamLegaliz switch (info.systemValueSemanticName) { case SystemValueSemanticName::GroupID: - return createLegalizedVal(info, blockIdxGlobalParam); + return createLegalizedSystemVaryingValInst(info, blockIdxGlobalParam); case SystemValueSemanticName::GroupThreadID: - return createLegalizedVal(info, threadIdxGlobalParam); + return createLegalizedSystemVaryingValInst(info, threadIdxGlobalParam); case SystemValueSemanticName::GroupIndex: - return createLegalizedVal(info, groupThreadIndex); + return createLegalizedSystemVaryingValInst(info, groupThreadIndex); case SystemValueSemanticName::DispatchThreadID: - return createLegalizedVal(info, dispatchThreadID); + return createLegalizedSystemVaryingValInst(info, dispatchThreadID); default: return diagnoseUnsupportedSystemVal(info); } @@ -1335,62 +1394,6 @@ struct CUDAEntryPointVaryingParamLegalizeContext : EntryPointVaryingParamLegaliz return diagnoseUnsupportedUserVal(info); } } - - LegalizedVaryingVal createLegalizedVal(VaryingParamInfo const& info, IRInst* id) - { - // If the parameter type is not uint3, we need to extract components as needed - auto paramType = info.type->getOperand(0); - IRBuilder builder(m_module); - builder.setInsertBefore(m_firstOrdinaryInst); - - if (as<IRBasicType>(paramType)) - { - auto uintType = builder.getBasicType(BaseType::UInt); - UInt swizzleIndex = 0; - auto xComponent = builder.emitSwizzle(uintType, id, 1, &swizzleIndex); - - if (auto basicType = as<IRBasicType>(paramType)) - { - if (basicType->getBaseType() != BaseType::UInt) - { - xComponent = builder.emitBitCast(basicType, xComponent); - } - } - return LegalizedVaryingVal::makeValue(xComponent); - } - // For vector types, use a swizzle to extract the needed components - else if (auto vectorType = as<IRVectorType>(paramType)) - { - auto elementCount = getIntVal(vectorType->getElementCount()); - - if (elementCount > 0 && elementCount <= 3) - { - // Setup indices for the swizzle (0 for x, 1 for y, 2 for z) - UInt swizzleIndices[3] = {0, 1, 2}; - auto uintType = builder.getBasicType(BaseType::UInt); - - // Use a swizzle to extract all needed components at once - auto extractedVector = builder.emitSwizzle( - builder.getVectorType(uintType, elementCount), - id, - elementCount, - swizzleIndices); - - // Cast if the element type is not uint - auto elementType = vectorType->getElementType(); - if (auto basicElementType = as<IRBasicType>(elementType)) - { - if (basicElementType->getBaseType() != BaseType::UInt) - { - extractedVector = builder.emitBitCast(vectorType, extractedVector); - } - } - return LegalizedVaryingVal::makeValue(extractedVector); - } - } - // Default to the full uint3 if the parameter type doesn't match our expectations - return LegalizedVaryingVal::makeValue(id); - } }; @@ -1529,14 +1532,13 @@ struct CPUEntryPointVaryingParamLegalizeContext : EntryPointVaryingParamLegalize switch (info.systemValueSemanticName) { case SystemValueSemanticName::GroupID: - return LegalizedVaryingVal::makeValue(groupID); + return createLegalizedSystemVaryingValInst(info, groupID); case SystemValueSemanticName::GroupThreadID: - return LegalizedVaryingVal::makeValue(groupThreadID); + return createLegalizedSystemVaryingValInst(info, groupThreadID); case SystemValueSemanticName::GroupIndex: - return LegalizedVaryingVal::makeValue(groupThreadIndex); + return createLegalizedSystemVaryingValInst(info, groupThreadIndex); case SystemValueSemanticName::DispatchThreadID: - return LegalizedVaryingVal::makeValue(dispatchThreadID); - + return createLegalizedSystemVaryingValInst(info, dispatchThreadID); default: return diagnoseUnsupportedSystemVal(info); } @@ -2997,45 +2999,6 @@ private: fixUpFuncType(func, structType); } - IRInst* tryConvertValue(IRBuilder& builder, IRInst* val, IRType* toType) - { - auto fromType = val->getFullType(); - if (auto fromVector = as<IRVectorType>(fromType)) - { - if (auto toVector = as<IRVectorType>(toType)) - { - if (fromVector->getElementCount() != toVector->getElementCount()) - { - fromType = builder.getVectorType( - fromVector->getElementType(), - toVector->getElementCount()); - val = builder.emitVectorReshape(fromType, val); - } - } - else if (as<IRBasicType>(toType)) - { - UInt index = 0; - val = builder.emitSwizzle(fromVector->getElementType(), val, 1, &index); - if (toType->getOp() == kIROp_VoidType) - return nullptr; - } - } - else if (auto fromBasicType = as<IRBasicType>(fromType)) - { - if (fromBasicType->getOp() == kIROp_VoidType) - return nullptr; - if (!as<IRBasicType>(toType)) - return nullptr; - if (toType->getOp() == kIROp_VoidType) - return nullptr; - } - else - { - return nullptr; - } - return builder.emitCast(toType, val); - } - void legalizeSystemValueParameters(EntryPointInfo entryPoint) { List<SystemValLegalizationWorkItem> systemValWorkItems = diff --git a/tests/cuda/dispatch-thread-id-extraction.slang b/tests/cuda/dispatch-thread-id-extraction.slang index 5fc3c89a6..02705ff24 100644 --- a/tests/cuda/dispatch-thread-id-extraction.slang +++ b/tests/cuda/dispatch-thread-id-extraction.slang @@ -28,8 +28,9 @@ void computeMain3(int2 tid: SV_DispatchThreadID, StructuredBuffer<uint> src, RWS { dst[tid.x] = src[tid.x]; } -// CHECK: int _S3 = (slang_bit_cast<int2 >(uint2 {(blockIdx * blockDim + threadIdx).x, (blockIdx * blockDim + threadIdx).y})).x; - +// CHECK: uint2 _S3 = uint2 {(blockIdx * blockDim + threadIdx).x, (blockIdx * blockDim + threadIdx).y}; +// CHECK: int2 _S4 = make_int2 ((int)_S3.x, (int)_S3.y); +// CHECK: int _S5 = _S4.x; [shader("compute")] [numthreads(1, 1, 1)] @@ -37,7 +38,7 @@ void computeMain4(int tid: SV_DispatchThreadID, StructuredBuffer<uint> src, RWSt { dst[tid.x] = src[tid.x]; } -// CHECK: int _S4 = (slang_bit_cast<int>((blockIdx * blockDim + threadIdx).x)); +// CHECK: int _S6 = int((blockIdx * blockDim + threadIdx).x); [shader("compute")] [numthreads(1, 1, 1)] @@ -45,4 +46,4 @@ void computeMain5(int tid: SV_GroupIndex, StructuredBuffer<uint> src, RWStructur { dst[tid.x] = src[tid.x]; } -// CHECK: int _S5 = (slang_bit_cast<int>((threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x));
\ No newline at end of file +// CHECK: int _S7 = int((threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x); diff --git a/tests/language-feature/enums/enum-switch-2.slang b/tests/language-feature/enums/enum-switch-2.slang index f5266f35d..de4a4757b 100644 --- a/tests/language-feature/enums/enum-switch-2.slang +++ b/tests/language-feature/enums/enum-switch-2.slang @@ -1,4 +1,5 @@ //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-slang -compute -shaderobj -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cpu -compute -shaderobj -output-using-type //TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -output-using-type enum class E : uint32_t diff --git a/tests/language-feature/system-value-extraction.slang b/tests/language-feature/system-value-extraction.slang new file mode 100644 index 000000000..12ca07a96 --- /dev/null +++ b/tests/language-feature/system-value-extraction.slang @@ -0,0 +1,31 @@ +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK): -dx11 -compute +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK): -dx12 -compute +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK): -cpu -compute +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK): -cuda -compute +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK): -wgsl -compute +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK): -mtl -compute +//TEST(compute,vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK): -vk -compute + +// Slang allows the type of system value semantics to differ from their +// "canonical" type - e.g. `uint8_t tid : SV_DispatchThreadID` is technically +// valid and refers to the first element in the underlying uint3 vector. + +// TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer<int> outputBuffer; + +[numthreads(2, 4, 1)] +void computeMain( + uint ind : SV_GroupIndex, + vector<uint, 2> tid: SV_DispatchThreadID, + int localTid : SV_GroupThreadID +){ + // CHECK: 0 + // CHECK-NEXT: 5 + // CHECK-NEXT: 8 + // CHECK-NEXT: D + // CHECK-NEXT: 10 + // CHECK-NEXT: 15 + // CHECK-NEXT: 18 + // CHECK-NEXT: 1D + outputBuffer[ind] = 4 * (tid.x + 2 * tid.y) + localTid; +} |
