diff options
| author | Yong He <yonghe@outlook.com> | 2024-05-01 16:44:22 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-05-01 16:44:22 -0700 |
| commit | 0bb826f8b92aec330875d0b966c1f4a6b99988bf (patch) | |
| tree | f0d086d4bfb93e302fcb8232816842ccfc182480 /source/slang/slang-ir-lower-buffer-element-type.cpp | |
| parent | 4533c825fe628e08228037b846ee9d10004fd56f (diff) | |
SPIRV: Fix performance issue when handling large arrays. (#4064)
* SPIRV: Fix performance issue when handling large arrays.
* Add test for packing.
* Fix clang.
Diffstat (limited to 'source/slang/slang-ir-lower-buffer-element-type.cpp')
| -rw-r--r-- | source/slang/slang-ir-lower-buffer-element-type.cpp | 76 |
1 files changed, 64 insertions, 12 deletions
diff --git a/source/slang/slang-ir-lower-buffer-element-type.cpp b/source/slang/slang-ir-lower-buffer-element-type.cpp index e9fbfc0d1..360596741 100644 --- a/source/slang/slang-ir-lower-buffer-element-type.cpp +++ b/source/slang/slang-ir-lower-buffer-element-type.cpp @@ -9,6 +9,8 @@ namespace Slang { struct LoweredElementTypeContext { + static const IRIntegerValue kMaxArraySizeToUnroll = 32; + struct LoweredElementTypeInfo { IRType* originalType; @@ -161,17 +163,42 @@ namespace Slang auto packedParam = builder.emitParam(structType); auto packedArray = builder.emitFieldExtract(innerArrayType, packedParam, dataKey); auto count = getIntVal(arrayType->getElementCount()); - List<IRInst*> args; - args.setCount((Index)count); - for (IRIntegerValue ii = 0; ii < count; ++ii) + IRInst* result = nullptr; + if (count <= kMaxArraySizeToUnroll) + { + // If the array is small enough, just process each element directly. + List<IRInst*> args; + args.setCount((Index)count); + for (IRIntegerValue ii = 0; ii < count; ++ii) + { + auto packedElement = builder.emitElementExtract(packedArray, ii); + auto originalElement = innerTypeInfo.convertLoweredToOriginal + ? builder.emitCallInst(innerTypeInfo.originalType, innerTypeInfo.convertLoweredToOriginal, 1, &packedElement) + : packedElement; + args[(Index)ii] = originalElement; + } + result = builder.emitMakeArray(arrayType, (UInt)args.getCount(), args.getBuffer()); + + } + else { - auto packedElement = builder.emitElementExtract(packedArray, ii); + // The general case for large arrays is to emit a loop through the elements. + IRVar* resultVar = builder.emitVar(arrayType); + IRBlock* loopBodyBlock; + IRBlock* loopBreakBlock; + auto loopParam = emitLoopBlocks(&builder, builder.getIntValue(builder.getIntType(), 0), builder.getIntValue(builder.getIntType(), count), + loopBodyBlock, loopBreakBlock); + + builder.setInsertBefore(loopBodyBlock->getFirstOrdinaryInst()); + auto packedElement = builder.emitElementExtract(packedArray, loopParam); auto originalElement = innerTypeInfo.convertLoweredToOriginal ? builder.emitCallInst(innerTypeInfo.originalType, innerTypeInfo.convertLoweredToOriginal, 1, &packedElement) : packedElement; - args[(Index)ii] = originalElement; + auto varPtr = builder.emitElementAddress(resultVar, loopParam); + builder.emitStore(varPtr, originalElement); + builder.setInsertInto(loopBreakBlock); + result = builder.emitLoad(resultVar); } - auto result = builder.emitMakeArray(arrayType, (UInt)args.getCount(), args.getBuffer()); builder.emitReturn(result); return func; } @@ -191,18 +218,43 @@ namespace Slang builder.setInsertInto(func); builder.emitBlock(); auto originalParam = builder.emitParam(arrayType); + IRInst* packedArray = nullptr; auto count = getIntVal(arrayType->getElementCount()); - List<IRInst*> args; - args.setCount((Index)count); - for (IRIntegerValue ii = 0; ii < count; ++ii) + if (count <= kMaxArraySizeToUnroll) + { + // If the array is small enough, just process each element directly. + List<IRInst*> args; + args.setCount((Index)count); + for (IRIntegerValue ii = 0; ii < count; ++ii) + { + auto originalElement = builder.emitElementExtract(originalParam, ii); + auto packedElement = innerTypeInfo.convertOriginalToLowered + ? builder.emitCallInst(innerTypeInfo.loweredType, innerTypeInfo.convertOriginalToLowered, 1, &originalElement) + : originalElement; + args[(Index)ii] = packedElement; + } + packedArray = builder.emitMakeArray(innerArrayType, (UInt)args.getCount(), args.getBuffer()); + } + else { - auto originalElement = builder.emitElementExtract(originalParam, ii); + // The general case for large arrays is to emit a loop through the elements. + IRVar* packedArrayVar = builder.emitVar(innerArrayType); + IRBlock* loopBodyBlock; + IRBlock* loopBreakBlock; + auto loopParam = emitLoopBlocks(&builder, builder.getIntValue(builder.getIntType(), 0), builder.getIntValue(builder.getIntType(), count), + loopBodyBlock, loopBreakBlock); + + builder.setInsertBefore(loopBodyBlock->getFirstOrdinaryInst()); + auto originalElement = builder.emitElementExtract(originalParam, loopParam); auto packedElement = innerTypeInfo.convertOriginalToLowered ? builder.emitCallInst(innerTypeInfo.loweredType, innerTypeInfo.convertOriginalToLowered, 1, &originalElement) : originalElement; - args[(Index)ii] = packedElement; + auto varPtr = builder.emitElementAddress(packedArrayVar, loopParam); + builder.emitStore(varPtr, packedElement); + builder.setInsertInto(loopBreakBlock); + packedArray = builder.emitLoad(packedArrayVar); } - auto packedArray = builder.emitMakeArray(innerArrayType, (UInt)args.getCount(), args.getBuffer()); + auto result = builder.emitMakeStruct(structType, 1, &packedArray); builder.emitReturn(result); return func; |
