summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJulius Ikkala <julius.ikkala@gmail.com>2025-10-09 02:13:27 +0300
committerGitHub <noreply@github.com>2025-10-08 23:13:27 +0000
commit1e4265edd4ec4c44e3d8f209fca802727076aa46 (patch)
tree811e872cfa374e1a3859d940431f687f628814db
parent54d9b345bff4b01949e875366cb1e7cf1c021c61 (diff)
Allow 1D SV_DispatchThreadID in CPU targets (#8612)
The varying param legalization pass didn't deal with this 1D form of SV_DispatchThreadID for CPU targets: ```slang void computeMain(int i : SV_DispatchThreadID) ``` Instead, it just overrode the type of `i` with a `uint3`, breaking lots of code that attempted to use `i` for something, like a `switch` statement for example. I ran across this when going through `language-feature` tests for the LLVM target, which will also use this legalization pass. I'm separately submitting this now because this also fixes the existing CPU target. The test I enable in this PR is one that was previously generating broken code on CPU. (somewhat related issue: #7468)
-rw-r--r--source/slang/slang-ir-legalize-varying-params.cpp171
-rw-r--r--tests/cuda/dispatch-thread-id-extraction.slang9
-rw-r--r--tests/language-feature/enums/enum-switch-2.slang1
-rw-r--r--tests/language-feature/system-value-extraction.slang31
4 files changed, 104 insertions, 108 deletions
diff --git a/source/slang/slang-ir-legalize-varying-params.cpp b/source/slang/slang-ir-legalize-varying-params.cpp
index 39b9b3dd3..5f6c7a34e 100644
--- a/source/slang/slang-ir-legalize-varying-params.cpp
+++ b/source/slang/slang-ir-legalize-varying-params.cpp
@@ -290,6 +290,46 @@ IRInst* emitCalcGroupIndex(IRBuilder& builder, IRInst* groupThreadID, IRInst* gr
return offset;
}
+IRInst* tryConvertValue(IRBuilder& builder, IRInst* val, IRType* toType)
+{
+ auto fromType = val->getFullType();
+ if (auto fromVector = as<IRVectorType>(fromType))
+ {
+ if (auto toVector = as<IRVectorType>(toType))
+ {
+ if (fromVector->getElementCount() != toVector->getElementCount())
+ {
+ fromType = builder.getVectorType(
+ fromVector->getElementType(),
+ toVector->getElementCount());
+ val = builder.emitVectorReshape(fromType, val);
+ }
+ }
+ else if (as<IRBasicType>(toType))
+ {
+ UInt index = 0;
+ val = builder.emitSwizzle(fromVector->getElementType(), val, 1, &index);
+ if (toType->getOp() == kIROp_VoidType)
+ return nullptr;
+ }
+ }
+ else if (auto fromBasicType = as<IRBasicType>(fromType))
+ {
+ if (fromBasicType->getOp() == kIROp_VoidType)
+ return nullptr;
+ if (!as<IRBasicType>(toType))
+ return nullptr;
+ if (toType->getOp() == kIROp_VoidType)
+ return nullptr;
+ }
+ else
+ {
+ return nullptr;
+ }
+ return builder.emitCast(toType, val);
+}
+
+
/// Context for the IR pass that legalizing entry-point
/// varying parameters for a target.
///
@@ -1018,6 +1058,25 @@ protected:
return LegalizedVaryingVal();
}
+
+ LegalizedVaryingVal createLegalizedSystemVaryingValInst(
+ VaryingParamInfo const& info,
+ IRInst* id)
+ {
+ IRType* paramType = info.type;
+
+ // CUDA and C++ targets wrap parameters in a BorrowInParamType, but that
+ // may not always be the case for every target.
+ if (auto ptr = as<IRBorrowInParamType>(info.type))
+ paramType = ptr->getValueType();
+
+ IRBuilder builder(m_module);
+ builder.setInsertBefore(m_firstOrdinaryInst);
+
+ auto converted = tryConvertValue(builder, id, as<IRType>(paramType));
+
+ return LegalizedVaryingVal::makeValue(converted);
+ }
};
// With the target-independent core of the pass out of the way, we can
@@ -1274,13 +1333,13 @@ struct CUDAEntryPointVaryingParamLegalizeContext : EntryPointVaryingParamLegaliz
switch (info.systemValueSemanticName)
{
case SystemValueSemanticName::GroupID:
- return createLegalizedVal(info, blockIdxGlobalParam);
+ return createLegalizedSystemVaryingValInst(info, blockIdxGlobalParam);
case SystemValueSemanticName::GroupThreadID:
- return createLegalizedVal(info, threadIdxGlobalParam);
+ return createLegalizedSystemVaryingValInst(info, threadIdxGlobalParam);
case SystemValueSemanticName::GroupIndex:
- return createLegalizedVal(info, groupThreadIndex);
+ return createLegalizedSystemVaryingValInst(info, groupThreadIndex);
case SystemValueSemanticName::DispatchThreadID:
- return createLegalizedVal(info, dispatchThreadID);
+ return createLegalizedSystemVaryingValInst(info, dispatchThreadID);
default:
return diagnoseUnsupportedSystemVal(info);
}
@@ -1335,62 +1394,6 @@ struct CUDAEntryPointVaryingParamLegalizeContext : EntryPointVaryingParamLegaliz
return diagnoseUnsupportedUserVal(info);
}
}
-
- LegalizedVaryingVal createLegalizedVal(VaryingParamInfo const& info, IRInst* id)
- {
- // If the parameter type is not uint3, we need to extract components as needed
- auto paramType = info.type->getOperand(0);
- IRBuilder builder(m_module);
- builder.setInsertBefore(m_firstOrdinaryInst);
-
- if (as<IRBasicType>(paramType))
- {
- auto uintType = builder.getBasicType(BaseType::UInt);
- UInt swizzleIndex = 0;
- auto xComponent = builder.emitSwizzle(uintType, id, 1, &swizzleIndex);
-
- if (auto basicType = as<IRBasicType>(paramType))
- {
- if (basicType->getBaseType() != BaseType::UInt)
- {
- xComponent = builder.emitBitCast(basicType, xComponent);
- }
- }
- return LegalizedVaryingVal::makeValue(xComponent);
- }
- // For vector types, use a swizzle to extract the needed components
- else if (auto vectorType = as<IRVectorType>(paramType))
- {
- auto elementCount = getIntVal(vectorType->getElementCount());
-
- if (elementCount > 0 && elementCount <= 3)
- {
- // Setup indices for the swizzle (0 for x, 1 for y, 2 for z)
- UInt swizzleIndices[3] = {0, 1, 2};
- auto uintType = builder.getBasicType(BaseType::UInt);
-
- // Use a swizzle to extract all needed components at once
- auto extractedVector = builder.emitSwizzle(
- builder.getVectorType(uintType, elementCount),
- id,
- elementCount,
- swizzleIndices);
-
- // Cast if the element type is not uint
- auto elementType = vectorType->getElementType();
- if (auto basicElementType = as<IRBasicType>(elementType))
- {
- if (basicElementType->getBaseType() != BaseType::UInt)
- {
- extractedVector = builder.emitBitCast(vectorType, extractedVector);
- }
- }
- return LegalizedVaryingVal::makeValue(extractedVector);
- }
- }
- // Default to the full uint3 if the parameter type doesn't match our expectations
- return LegalizedVaryingVal::makeValue(id);
- }
};
@@ -1529,14 +1532,13 @@ struct CPUEntryPointVaryingParamLegalizeContext : EntryPointVaryingParamLegalize
switch (info.systemValueSemanticName)
{
case SystemValueSemanticName::GroupID:
- return LegalizedVaryingVal::makeValue(groupID);
+ return createLegalizedSystemVaryingValInst(info, groupID);
case SystemValueSemanticName::GroupThreadID:
- return LegalizedVaryingVal::makeValue(groupThreadID);
+ return createLegalizedSystemVaryingValInst(info, groupThreadID);
case SystemValueSemanticName::GroupIndex:
- return LegalizedVaryingVal::makeValue(groupThreadIndex);
+ return createLegalizedSystemVaryingValInst(info, groupThreadIndex);
case SystemValueSemanticName::DispatchThreadID:
- return LegalizedVaryingVal::makeValue(dispatchThreadID);
-
+ return createLegalizedSystemVaryingValInst(info, dispatchThreadID);
default:
return diagnoseUnsupportedSystemVal(info);
}
@@ -2997,45 +2999,6 @@ private:
fixUpFuncType(func, structType);
}
- IRInst* tryConvertValue(IRBuilder& builder, IRInst* val, IRType* toType)
- {
- auto fromType = val->getFullType();
- if (auto fromVector = as<IRVectorType>(fromType))
- {
- if (auto toVector = as<IRVectorType>(toType))
- {
- if (fromVector->getElementCount() != toVector->getElementCount())
- {
- fromType = builder.getVectorType(
- fromVector->getElementType(),
- toVector->getElementCount());
- val = builder.emitVectorReshape(fromType, val);
- }
- }
- else if (as<IRBasicType>(toType))
- {
- UInt index = 0;
- val = builder.emitSwizzle(fromVector->getElementType(), val, 1, &index);
- if (toType->getOp() == kIROp_VoidType)
- return nullptr;
- }
- }
- else if (auto fromBasicType = as<IRBasicType>(fromType))
- {
- if (fromBasicType->getOp() == kIROp_VoidType)
- return nullptr;
- if (!as<IRBasicType>(toType))
- return nullptr;
- if (toType->getOp() == kIROp_VoidType)
- return nullptr;
- }
- else
- {
- return nullptr;
- }
- return builder.emitCast(toType, val);
- }
-
void legalizeSystemValueParameters(EntryPointInfo entryPoint)
{
List<SystemValLegalizationWorkItem> systemValWorkItems =
diff --git a/tests/cuda/dispatch-thread-id-extraction.slang b/tests/cuda/dispatch-thread-id-extraction.slang
index 5fc3c89a6..02705ff24 100644
--- a/tests/cuda/dispatch-thread-id-extraction.slang
+++ b/tests/cuda/dispatch-thread-id-extraction.slang
@@ -28,8 +28,9 @@ void computeMain3(int2 tid: SV_DispatchThreadID, StructuredBuffer<uint> src, RWS
{
dst[tid.x] = src[tid.x];
}
-// CHECK: int _S3 = (slang_bit_cast<int2 >(uint2 {(blockIdx * blockDim + threadIdx).x, (blockIdx * blockDim + threadIdx).y})).x;
-
+// CHECK: uint2 _S3 = uint2 {(blockIdx * blockDim + threadIdx).x, (blockIdx * blockDim + threadIdx).y};
+// CHECK: int2 _S4 = make_int2 ((int)_S3.x, (int)_S3.y);
+// CHECK: int _S5 = _S4.x;
[shader("compute")]
[numthreads(1, 1, 1)]
@@ -37,7 +38,7 @@ void computeMain4(int tid: SV_DispatchThreadID, StructuredBuffer<uint> src, RWSt
{
dst[tid.x] = src[tid.x];
}
-// CHECK: int _S4 = (slang_bit_cast<int>((blockIdx * blockDim + threadIdx).x));
+// CHECK: int _S6 = int((blockIdx * blockDim + threadIdx).x);
[shader("compute")]
[numthreads(1, 1, 1)]
@@ -45,4 +46,4 @@ void computeMain5(int tid: SV_GroupIndex, StructuredBuffer<uint> src, RWStructur
{
dst[tid.x] = src[tid.x];
}
-// CHECK: int _S5 = (slang_bit_cast<int>((threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x)); \ No newline at end of file
+// CHECK: int _S7 = int((threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x);
diff --git a/tests/language-feature/enums/enum-switch-2.slang b/tests/language-feature/enums/enum-switch-2.slang
index f5266f35d..de4a4757b 100644
--- a/tests/language-feature/enums/enum-switch-2.slang
+++ b/tests/language-feature/enums/enum-switch-2.slang
@@ -1,4 +1,5 @@
//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-slang -compute -shaderobj -output-using-type
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cpu -compute -shaderobj -output-using-type
//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -output-using-type
enum class E : uint32_t
diff --git a/tests/language-feature/system-value-extraction.slang b/tests/language-feature/system-value-extraction.slang
new file mode 100644
index 000000000..12ca07a96
--- /dev/null
+++ b/tests/language-feature/system-value-extraction.slang
@@ -0,0 +1,31 @@
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK): -dx11 -compute
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK): -dx12 -compute
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK): -cpu -compute
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK): -cuda -compute
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK): -wgsl -compute
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK): -mtl -compute
+//TEST(compute,vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK): -vk -compute
+
+// Slang allows the type of system value semantics to differ from their
+// "canonical" type - e.g. `uint8_t tid : SV_DispatchThreadID` is technically
+// valid and refers to the first element in the underlying uint3 vector.
+
+// TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+
+[numthreads(2, 4, 1)]
+void computeMain(
+ uint ind : SV_GroupIndex,
+ vector<uint, 2> tid: SV_DispatchThreadID,
+ int localTid : SV_GroupThreadID
+){
+ // CHECK: 0
+ // CHECK-NEXT: 5
+ // CHECK-NEXT: 8
+ // CHECK-NEXT: D
+ // CHECK-NEXT: 10
+ // CHECK-NEXT: 15
+ // CHECK-NEXT: 18
+ // CHECK-NEXT: 1D
+ outputBuffer[ind] = 4 * (tid.x + 2 * tid.y) + localTid;
+}