diff options
| author | kaizhangNV <149626564+kaizhangNV@users.noreply.github.com> | 2024-12-12 16:50:44 -0600 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-12-12 14:50:44 -0800 |
| commit | 78c9bd1c2fbd55889e62a2032e9bc96684ced3b5 (patch) | |
| tree | 63332e647aa597450b642751c8c6b2fd7f66439e | |
| parent | b4e63d7bc44fc969d24202fc51a774378a489294 (diff) | |
Bit extract (#5847)
* promoting bitfield extraction and insertion to become intrinsics for internal compiler use
* removing duplicate intrinsics from glsl.meta.slang
* refactor: update function signatures of bitfield extraction and insertion to use uint as the parameter type for offset and bits.
---------
Co-authored-by: Nate Morrical <natemorrical@gmail.com>
Co-authored-by: Yong He <yonghe@outlook.com>
| -rw-r--r-- | source/slang/core.meta.slang | 17 | ||||
| -rw-r--r-- | source/slang/glsl.meta.slang | 162 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.cpp | 376 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.h | 7 | ||||
| -rw-r--r-- | source/slang/slang-emit-glsl.cpp | 39 | ||||
| -rw-r--r-- | source/slang/slang-emit-glsl.h | 3 | ||||
| -rw-r--r-- | source/slang/slang-emit-spirv.cpp | 60 | ||||
| -rw-r--r-- | source/slang/slang-ir-inst-defs.h | 3 | ||||
| -rw-r--r-- | source/slang/slang-ir-insts.h | 4 | ||||
| -rw-r--r-- | source/slang/slang-ir.cpp | 34 | ||||
| -rw-r--r-- | tests/glsl-intrinsic/intrinsic-basic.slang | 16 | ||||
| -rw-r--r-- | tests/language-feature/bitfield/bitfield-extract.slang | 64 | ||||
| -rw-r--r-- | tests/language-feature/bitfield/bitfield-insert.slang | 68 |
13 files changed, 683 insertions, 170 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index 3a7df8e7a..625f8f608 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -2714,6 +2714,23 @@ __generic<T, U> __intrinsic_op($(kIROp_Reinterpret)) T reinterpret(U value); +/// `bitfieldInsert` inserts the bits least significant bits of `insert` into base at `offset` offset. +/// The returned value will have bits [offset, offset + bits + 1] taken from [0, bits - 1] of `insert` +/// and all other bits taken directly from the corresponding bits of `base`. +__generic<T> +[__readNone] +[__unsafeForceInlineEarly] +__intrinsic_op($(kIROp_BitfieldInsert)) +T bitfieldInsert(T base, T insert, uint offset, uint bits); + +/// `bitfieldExtract` extracts a subset of the bits of `value` and +/// returns it in the least significant bits of the result. The range of bits extracted is [offset, offset + bits - 1]. +__generic<T> +[__readNone] +[__unsafeForceInlineEarly] +__intrinsic_op($(kIROp_BitfieldExtract)) +T bitfieldExtract(T value, uint offset, uint bits); + /// Use an otherwise unused value /// This can be used to silence the warning about returning before initializing an out paramter. __generic<T> diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang index 1d91930e4..361a956f2 100644 --- a/source/slang/glsl.meta.slang +++ b/source/slang/glsl.meta.slang @@ -1153,168 +1153,6 @@ public void imulExtended(highp vector<int,N> x, highp vector<int,N> y, out highp [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] -public int bitfieldExtract(int value, int offset, int bits) -{ - __target_switch - { - case glsl: __intrinsic_asm "bitfieldExtract"; - case spirv: return spirv_asm { - result:$$int = OpBitFieldSExtract $value $offset $bits - }; - default: - return int(uint(value >> offset) & ((1u << bits) - 1)); - } -} - -__generic<let N:int> -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] -public vector<int,N> bitfieldExtract(vector<int,N> value, int offset, int bits) -{ - __target_switch - { - case glsl: __intrinsic_asm "bitfieldExtract"; - case spirv: return spirv_asm { - result:$$vector<int,N> = OpBitFieldSExtract $value $offset $bits - }; - default: - vector<int,N> result; - [ForceUnroll] - for (int i = 0; i < N; ++i) - { - result[i] = bitfieldExtract(value[i], offset, bits); - } - return result; - } -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] -public uint bitfieldExtract(uint value, int offset, int bits) -{ - __target_switch - { - case glsl: __intrinsic_asm "bitfieldExtract"; - case spirv: return spirv_asm { - result:$$uint = OpBitFieldUExtract $value $offset $bits - }; - default: - return (value >> offset) & ((1u << bits) - 1); - } -} - -__generic<let N:int> -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] -public vector<uint,N> bitfieldExtract(vector<uint,N> value, int offset, int bits) -{ - __target_switch - { - case glsl: __intrinsic_asm "bitfieldExtract"; - case spirv: return spirv_asm { - result:$$vector<uint,N> = OpBitFieldUExtract $value $offset $bits - }; - default: - vector<uint,N> result; - [ForceUnroll] - for (int i = 0; i < N; ++i) - { - result[i] = bitfieldExtract(value[i], offset, bits); - } - return result; - } -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] -public uint bitfieldInsert(uint base, uint insert, int offset, int bits) -{ - __target_switch - { - case glsl: __intrinsic_asm "bitfieldInsert"; - case spirv: return spirv_asm { - result:$$uint = OpBitFieldInsert $base $insert $offset $bits - }; - default: - uint clearMask = ~(((1u << bits) - 1u) << offset); - uint clearedBase = base & clearMask; - uint maskedInsert = (insert & ((1u << bits) - 1u)) << offset; - return clearedBase | maskedInsert; - } -} - -__generic<let N:int> -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] -public vector<uint,N> bitfieldInsert(vector<uint,N> base, vector<uint,N> insert, int offset, int bits) -{ - __target_switch - { - case glsl: __intrinsic_asm "bitfieldInsert"; - case spirv: return spirv_asm { - result:$$vector<uint,N> = OpBitFieldInsert $base $insert $offset $bits - }; - default: - vector<uint,N> result; - [ForceUnroll] - for (int i = 0; i < N; ++i) - { - result[i] = bitfieldInsert(base[i], insert[i], offset, bits); - } - return result; - } -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] -public int bitfieldInsert(int base, int insert, int offset, int bits) -{ - __target_switch - { - case glsl: __intrinsic_asm "bitfieldInsert"; - case spirv: return spirv_asm { - result:$$int = OpBitFieldInsert $base $insert $offset $bits - }; - default: - uint clearMask = ~(((1u << bits) - 1u) << offset); - uint clearedBase = base & clearMask; - uint maskedInsert = (insert & ((1u << bits) - 1u)) << offset; - return clearedBase | maskedInsert; - } -} - -__generic<let N:int> -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] -public vector<int,N> bitfieldInsert(vector<int,N> base, vector<int,N> insert, int offset, int bits) -{ - __target_switch - { - case glsl: __intrinsic_asm "bitfieldInsert"; - case spirv: return spirv_asm { - result:$$vector<int,N> = OpBitFieldInsert $base $insert $offset $bits - }; - default: - vector<int,N> result; - [ForceUnroll] - for (int i = 0; i < N; ++i) - { - result[i] = bitfieldInsert(base[i], insert[i], offset, bits); - } - return result; - } -} - -[__readNone] -[ForceInline] -[require(cpp_cuda_glsl_hlsl_spirv, GLSL_400)] public int bitfieldReverse(highp int value) { __target_switch diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index 50fb6655e..f38adc67e 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -2862,6 +2862,16 @@ void CLikeSourceEmitter::defaultEmitInstExpr(IRInst* inst, const EmitOpInfo& inO m_writer->emit(")"); break; } + case kIROp_BitfieldExtract: + { + emitBitfieldExtractImpl(inst); + break; + } + case kIROp_BitfieldInsert: + { + emitBitfieldInsertImpl(inst); + break; + } case kIROp_PackAnyValue: { m_writer->emit("packAnyValue<"); @@ -3839,6 +3849,372 @@ void CLikeSourceEmitter::emitFuncDecorationsImpl(IRFunc* func) } } +bool CLikeSourceEmitter::tryGetIntInfo(IRType* elementType, bool& isSigned, int& bitWidth) +{ + Slang::IROp type = elementType->getOp(); + if (!(type >= kIROp_Int8Type && type <= kIROp_UInt64Type)) + return false; + isSigned = (type >= kIROp_Int8Type && type <= kIROp_Int64Type); + + Slang::IROp stype = (isSigned) ? type : Slang::IROp(type - 4); + bitWidth = 8 << (stype - kIROp_Int8Type); + return true; +} + +void CLikeSourceEmitter::emitVecNOrScalar( + IRVectorType* vectorType, + std::function<void()> emitComponentLogic) +{ + if (vectorType) + { + int N = int(getIntVal(vectorType->getElementCount())); + Slang::IRType* elementType = vectorType->getElementType(); + + // Special handling required for CUDA target + if (isCUDATarget(getTargetReq())) + { + m_writer->emit("make_"); + + switch (elementType->getOp()) + { + case kIROp_Int8Type: + m_writer->emit("char"); + break; + case kIROp_Int16Type: + m_writer->emit("short"); + break; + case kIROp_IntType: + m_writer->emit("int"); + break; + case kIROp_Int64Type: + m_writer->emit("longlong"); + break; + case kIROp_UInt8Type: + m_writer->emit("uchar"); + break; + case kIROp_UInt16Type: + m_writer->emit("ushort"); + break; + case kIROp_UIntType: + m_writer->emit("uint"); + break; + case kIROp_UInt64Type: + m_writer->emit("ulonglong"); + break; + default: + SLANG_ABORT_COMPILATION("Unhandled type emitting CUDA vector"); + } + + m_writer->emitRawText(std::to_string(N).c_str()); + } + + // In other languages, we can output the Slang vector type directly + else + { + emitType(vectorType); + } + + m_writer->emit("("); + for (int i = 0; i < N; ++i) + { + emitType(elementType); + m_writer->emit("("); + emitComponentLogic(); + m_writer->emit(")"); + if (i != N - 1) + m_writer->emit(", "); + } + m_writer->emit(")"); + } + else + { + m_writer->emit("("); + emitComponentLogic(); + m_writer->emit(")"); + } +} + +String CLikeSourceEmitter::_emitLiteralOneWithType(int bitWidth) +{ + if (getTarget() == CodeGenTarget::WGSL) + { + if (bitWidth != 32) + { + SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unexpected bit width"); + return String(); + } + else + { + String one; + one = "u32(1)"; + return one; + } + } + + String one; + switch (bitWidth) + { + case 8: + one = "uint8_t(1)"; + break; + case 16: + one = "uint16_t(1)"; + break; + case 32: + one = "uint32_t(1)"; + break; + case 64: + one = "uint64_t(1)"; + break; + default: + SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unexpected bit width"); + } + return one; +} + +void CLikeSourceEmitter::emitBitfieldExtractImpl(IRInst* inst) +{ + // If unsigned, bfue := ((val>>off)&((1u<<bts)-1)) + // Else signed, bfse := (((val>>off)&((1u<<bts)-1))<<(nbts-bts)>>(nbts-bts)); + // + // Note: In WGSL, the data type for bit operators are more restricted than in other languages. + // The number of bits to shift must be a u32 or vecN<u32>, therefore we have to cast this + // operand to u32 always. Another constraint is that for "&" and "|" operators, the operands + // must have the same type. + // TODO: We can consider to bring the logic to WGSLSourceEmitter::emitBitfieldExtractImpl so + // that we don't have to have those special handling here. + Slang::IRType* dataType = inst->getDataType(); + Slang::IRInst* val = inst->getOperand(0); + Slang::IRInst* off = inst->getOperand(1); + Slang::IRInst* bts = inst->getOperand(2); + + Slang::IRType* elementType = dataType; + IRVectorType* vectorType = as<IRVectorType>(elementType); + IRVectorType* vectorTypeForShiftNumber = nullptr; + + if (vectorType) + { + elementType = vectorType->getElementType(); + + if (getTarget() == CodeGenTarget::WGSL) + { + IRBuilder builder(elementType); + vectorTypeForShiftNumber = + builder.getVectorType(builder.getUIntType(), vectorType->getElementCount()); + } + else + { + vectorTypeForShiftNumber = vectorType; + } + } + + bool isSigned; + int bitWidth; + if (!tryGetIntInfo(elementType, isSigned, bitWidth)) + { + SLANG_DIAGNOSE_UNEXPECTED( + getSink(), + SourceLoc(), + "non-integer element type given to bitfieldExtract"); + return; + } + + String one = _emitLiteralOneWithType(bitWidth); + + // Emit open paren and type cast for later sign extension + if (isSigned) + { + m_writer->emit("("); + emitType(inst->getDataType()); + m_writer->emit("("); + } + + // Emit bitfield extraction ( (val >> off) & ((1u << bts) - 1) ) + m_writer->emit("("); + + // In WGSL, "&" operator requires the operands to have the same type, since the + // right operand '((1u << bts) - 1)' is known to be u32, we need to cast the left operand to + // u32. + if (getTarget() == CodeGenTarget::WGSL) + { + (vectorTypeForShiftNumber != nullptr) ? emitType(vectorTypeForShiftNumber) + : m_writer->emit("u32"); + } + + m_writer->emit("("); + + emitOperand(val, getInfo(EmitOp::General)); + m_writer->emit(">>"); + emitVecNOrScalar( + vectorTypeForShiftNumber, + [&]() { emitOperand(off, getInfo(EmitOp::General)); }); + + m_writer->emit(")&("); + emitVecNOrScalar( + vectorTypeForShiftNumber, + [&]() + { + m_writer->emit("((" + one + "<<"); + emitOperand(bts, getInfo(EmitOp::General)); + m_writer->emit(")-" + one + ")"); + }); + m_writer->emit("))"); + + // Emit sign extension logic + // ( type(bitfield << (numBits - bts) ) >> (numBits - bts) ) + // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + if (isSigned) + { + m_writer->emit("<<"); + emitVecNOrScalar( + vectorTypeForShiftNumber, + [&]() + { + m_writer->emit("("); + m_writer->emit(bitWidth); + m_writer->emit("-"); + emitOperand(bts, getInfo(EmitOp::General)); + m_writer->emit(")"); + }); + m_writer->emit(")>>"); + emitVecNOrScalar( + vectorTypeForShiftNumber, + [&]() + { + m_writer->emit("("); + m_writer->emit(bitWidth); + m_writer->emit("-"); + emitOperand(bts, getInfo(EmitOp::General)); + m_writer->emit(")"); + }); + m_writer->emit(")"); + } +} + +void CLikeSourceEmitter::emitBitfieldInsertImpl(IRInst* inst) +{ + // uint clearMask = ~(((1u << bits) - 1u) << offset); + // uint clearedBase = base & clearMask; + // uint maskedInsert = (insert & ((1u << bits) - 1u)) << offset; + // BitfieldInsert := T(uint(clearedBase) | uint(maskedInsert)); + Slang::IRType* dataType = inst->getDataType(); + Slang::IRInst* base = inst->getOperand(0); + Slang::IRInst* insert = inst->getOperand(1); + Slang::IRInst* off = inst->getOperand(2); + Slang::IRInst* bts = inst->getOperand(3); + + + Slang::IRType* elementType = dataType; + IRVectorType* vectorType = as<IRVectorType>(elementType); + IRVectorType* vectorTypeForShiftNumber = nullptr; + + if (vectorType) + { + elementType = vectorType->getElementType(); + + if (getTarget() == CodeGenTarget::WGSL) + { + IRBuilder builder(elementType); + vectorTypeForShiftNumber = + builder.getVectorType(builder.getUIntType(), vectorType->getElementCount()); + } + else + { + vectorTypeForShiftNumber = vectorType; + } + } + + bool isSigned; + int bitWidth; + if (!tryGetIntInfo(elementType, isSigned, bitWidth)) + { + SLANG_DIAGNOSE_UNEXPECTED( + getSink(), + SourceLoc(), + "non-integer element type given to bitfieldInsert"); + return; + } + + String one = _emitLiteralOneWithType(bitWidth); + + if (isSigned) + { + emitType(inst->getDataType()); + m_writer->emit("("); + } + m_writer->emit("("); + + // emit clearedBase := uint( base & ~( ((1u << bts) - 1u) << off ) ) + + // In WGSL, "&" operator requires the operands to have the same type, since the + // right operand '~( ((1u << bts) - 1u) << off )' is known to be u32, we need to + // cast the left operand to u32. + if (getTarget() == CodeGenTarget::WGSL) + { + (vectorTypeForShiftNumber != nullptr) ? emitType(vectorTypeForShiftNumber) + : m_writer->emit("u32"); + } + + m_writer->emit("("); + emitOperand(base, getInfo(EmitOp::General)); + m_writer->emit(")"); + + m_writer->emit("&"); + emitVecNOrScalar( + vectorTypeForShiftNumber, + [&]() + { + m_writer->emit("~(((" + one + "<<"); + emitOperand(bts, getInfo(EmitOp::General)); + + m_writer->emit(")-" + one + ")<<"); + + emitOperand(off, getInfo(EmitOp::General)); + m_writer->emit(")"); + }); + + // bitwise or clearedBase with maskedInsert + m_writer->emit(")|("); + + // Emit maskedInsert := ((insert & ((1u << bits) - 1u)) << offset); + + // - first emit mask := (insert & ((1u << bits) - 1u)) + m_writer->emit("("); + + // For the same reason as above, we need to cast the left operand to u32 for WGSL target. + if (getTarget() == CodeGenTarget::WGSL) + { + (vectorTypeForShiftNumber != nullptr) ? emitType(vectorTypeForShiftNumber) + : m_writer->emit("u32"); + } + m_writer->emit("("); + emitOperand(insert, getInfo(EmitOp::General)); + m_writer->emit(")"); + + m_writer->emit("&"); + emitVecNOrScalar( + vectorTypeForShiftNumber, + [&]() + { + m_writer->emit("(" + one + "<<"); + emitOperand(bts, getInfo(EmitOp::General)); + m_writer->emit(")-" + one); + }); + m_writer->emit(")"); + + // then emit shift := << offset + m_writer->emit("<<"); + emitVecNOrScalar( + vectorTypeForShiftNumber, + [&]() { emitOperand(off, getInfo(EmitOp::General)); }); + m_writer->emit(")"); + + if (isSigned) + { + m_writer->emit(")"); + } +} + void CLikeSourceEmitter::emitStruct(IRStructType* structType) { ensureTypePrelude(structType); diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h index 1da3a64dc..7f6f32923 100644 --- a/source/slang/slang-emit-c-like.h +++ b/source/slang/slang-emit-c-like.h @@ -615,6 +615,11 @@ protected: SLANG_UNUSED(baseName); } + bool tryGetIntInfo(IRType* elementType, bool& isSigned, int& bitWidth); + void emitVecNOrScalar(IRVectorType* vectorType, std::function<void()> func); + virtual void emitBitfieldExtractImpl(IRInst* inst); + virtual void emitBitfieldInsertImpl(IRInst* inst); + virtual void emitSubpassInputTypeImpl(IRSubpassInputType* type) { SLANG_UNUSED(type); } // Again necessary for & prefix intrinsics. May be removable in the future @@ -671,6 +676,8 @@ protected: // one. void _emitSwizzleStorePerElement(IRInst* inst); + String _emitLiteralOneWithType(int bitWidth); + CodeGenContext* m_codeGenContext = nullptr; IRModule* m_irModule = nullptr; diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp index f22419147..2c2447c53 100644 --- a/source/slang/slang-emit-glsl.cpp +++ b/source/slang/slang-emit-glsl.cpp @@ -2949,6 +2949,45 @@ void GLSLSourceEmitter::emitFuncDecorationImpl(IRDecoration* decoration) } } +void GLSLSourceEmitter::emitBitfieldExtractImpl(IRInst* inst) +{ + m_writer->emit("bitfieldExtract("); + + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + m_writer->emit(","); + + m_writer->emit("int("); + emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); + m_writer->emit(")"); + m_writer->emit(","); + + m_writer->emit("int("); + emitOperand(inst->getOperand(2), getInfo(EmitOp::General)); + m_writer->emit("))"); +} + +void GLSLSourceEmitter::emitBitfieldInsertImpl(IRInst* inst) +{ + m_writer->emit("bitfieldInsert("); + + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + m_writer->emit(","); + + emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); + m_writer->emit(","); + + m_writer->emit("int("); + emitOperand(inst->getOperand(2), getInfo(EmitOp::General)); + m_writer->emit(")"); + m_writer->emit(","); + + m_writer->emit("int("); + emitOperand(inst->getOperand(3), getInfo(EmitOp::General)); + m_writer->emit(")"); + + m_writer->emit(")"); +} + void GLSLSourceEmitter::emitSimpleTypeImpl(IRType* type) { switch (type->getOp()) diff --git a/source/slang/slang-emit-glsl.h b/source/slang/slang-emit-glsl.h index e9ec1dc00..49a7884c2 100644 --- a/source/slang/slang-emit-glsl.h +++ b/source/slang/slang-emit-glsl.h @@ -64,6 +64,9 @@ protected: virtual void emitFuncDecorationImpl(IRDecoration* decoration) SLANG_OVERRIDE; virtual void emitGlobalParamDefaultVal(IRGlobalParam* decl) SLANG_OVERRIDE; + virtual void emitBitfieldExtractImpl(IRInst* inst) SLANG_OVERRIDE; + virtual void emitBitfieldInsertImpl(IRInst* inst) SLANG_OVERRIDE; + virtual void handleRequiredCapabilitiesImpl(IRInst* inst) SLANG_OVERRIDE; virtual bool tryEmitGlobalParamImpl(IRGlobalParam* varDecl, IRType* varType) SLANG_OVERRIDE; diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 8759ea9d4..c618946ec 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -3404,6 +3404,12 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex case kIROp_BitCast: result = emitOpBitcast(parent, inst, inst->getDataType(), inst->getOperand(0)); break; + case kIROp_BitfieldExtract: + result = emitBitfieldExtract(parent, inst); + break; + case kIROp_BitfieldInsert: + result = emitBitfieldInsert(parent, inst); + break; case kIROp_MakeUInt64: result = emitMakeUInt64(parent, inst); break; @@ -6537,6 +6543,60 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex inst->getOperand(0)); } + SpvInst* emitBitfieldExtract(SpvInstParent* parent, IRInst* inst) + { + auto dataType = inst->getDataType(); + IRVectorType* vectorType = as<IRVectorType>(dataType); + Slang::IRType* elementType = dataType; + if (vectorType) + elementType = vectorType->getElementType(); + + const IntInfo i = getIntTypeInfo(elementType); + + // NM: technically, using bitfield intrinsics for anything non-32-bit goes against + // VK specification: VUID-StandaloneSpirv-Base-04781. However, it works on at least + // NVIDIA HW. + SpvOp opcode = i.isSigned ? SpvOpBitFieldSExtract : SpvOpBitFieldUExtract; + return emitInst( + parent, + inst, + opcode, + inst->getFullType(), + kResultID, + inst->getOperand(0), + inst->getOperand(1), + inst->getOperand(2)); + } + + SpvInst* emitBitfieldInsert(SpvInstParent* parent, IRInst* inst) + { + auto dataType = inst->getDataType(); + IRVectorType* vectorType = as<IRVectorType>(dataType); + Slang::IRType* elementType = dataType; + if (vectorType) + elementType = vectorType->getElementType(); + + const IntInfo i = getIntTypeInfo(elementType); + + if (i.width == 64) + requireSPIRVCapability(SpvCapabilityInt64); + if (i.width == 16) + requireSPIRVCapability(SpvCapabilityInt16); + if (i.width == 8) + requireSPIRVCapability(SpvCapabilityInt8); + + return emitInst( + parent, + inst, + SpvOpBitFieldInsert, + inst->getFullType(), + kResultID, + inst->getOperand(0), + inst->getOperand(1), + inst->getOperand(2), + inst->getOperand(3)); + } + template<typename T, typename Ts> SpvInst* emitCompositeConstruct( SpvInstParent* parent, diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h index 6d4f7a2ca..01466ed00 100644 --- a/source/slang/slang-ir-inst-defs.h +++ b/source/slang/slang-ir-inst-defs.h @@ -387,6 +387,9 @@ INST(Alloca, alloca, 1, 0) INST(UpdateElement, updateElement, 2, 0) INST(DetachDerivative, detachDerivative, 1, 0) +INST(BitfieldExtract, bitfieldExtract, 3, 0) +INST(BitfieldInsert, bitfieldInsert, 4, 0) + INST(PackAnyValue, packAnyValue, 1, 0) INST(UnpackAnyValue, unpackAnyValue, 1, 0) diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index 829e72575..53adce87a 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -3893,6 +3893,10 @@ public: IRInst* emitGlobalValueRef(IRInst* globalInst); + IRInst* emitBitfieldExtract(IRType* type, IRInst* op0, IRInst* op1, IRInst* op2); + + IRInst* emitBitfieldInsert(IRType* type, IRInst* op0, IRInst* op1, IRInst* op2, IRInst* op3); + IRInst* emitPackAnyValue(IRType* type, IRInst* value); IRInst* emitUnpackAnyValue(IRType* type, IRInst* value); diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index d1c16a3a1..5e5d94b14 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -1951,6 +1951,20 @@ static T* createInst( } template<typename T> +static T* createInst( + IRBuilder* builder, + IROp op, + IRType* type, + IRInst* arg1, + IRInst* arg2, + IRInst* arg3, + IRInst* arg4) +{ + IRInst* args[] = {arg1, arg2, arg3, arg4}; + return createInstImpl<T>(builder, op, type, 4, &args[0]); +} + +template<typename T> static T* createInstWithTrailingArgs( IRBuilder* builder, IROp op, @@ -3625,7 +3639,25 @@ IRInst* IRBuilder::emitLookupInterfaceMethodInst( IRInst* IRBuilder::emitGetSequentialIDInst(IRInst* rttiObj) { auto inst = createInst<IRAlloca>(this, kIROp_GetSequentialID, getUIntType(), rttiObj); + addInst(inst); + return inst; +} +IRInst* IRBuilder::emitBitfieldExtract(IRType* type, IRInst* value, IRInst* offset, IRInst* bits) +{ + auto inst = createInst<IRInst>(this, kIROp_BitfieldExtract, type, value, offset, bits); + addInst(inst); + return inst; +} + +IRInst* IRBuilder::emitBitfieldInsert( + IRType* type, + IRInst* base, + IRInst* insert, + IRInst* offset, + IRInst* bits) +{ + auto inst = createInst<IRInst>(this, kIROp_BitfieldInsert, type, base, insert, offset, bits); addInst(inst); return inst; } @@ -8188,6 +8220,8 @@ bool IRInst::mightHaveSideEffects(SideEffectAnalysisOptions options) case kIROp_PtrCast: case kIROp_CastDynamicResource: case kIROp_AllocObj: + case kIROp_BitfieldExtract: + case kIROp_BitfieldInsert: case kIROp_PackAnyValue: case kIROp_UnpackAnyValue: case kIROp_Reinterpret: diff --git a/tests/glsl-intrinsic/intrinsic-basic.slang b/tests/glsl-intrinsic/intrinsic-basic.slang index 82c7d142c..4e1dfe8c4 100644 --- a/tests/glsl-intrinsic/intrinsic-basic.slang +++ b/tests/glsl-intrinsic/intrinsic-basic.slang @@ -479,13 +479,13 @@ bool Test_ScalarType() // CHECK_GLSL-COUNT-2: bitfieldExtract( // CHECK_SPIR: OpBitFieldSExtract{{ }} // CHECK_SPIR: OpBitFieldUExtract{{ }} - && genIType(0) == bitfieldExtract(genIType(zero), int(zero), int(zero)) - && genUType(0) == bitfieldExtract(genUType(zero), int(zero), int(zero)) + && genIType(0) == bitfieldExtract(genIType(zero), uint(zero), uint(zero)) + && genUType(0) == bitfieldExtract(genUType(zero), uint(zero), uint(zero)) // CHECK_GLSL-COUNT-2: bitfieldInsert( // CHECK_SPIR-COUNT-2: OpBitFieldInsert{{ }} - && genIType(0) == bitfieldInsert(genIType(zero), genIType(zero), int(zero), int(zero)) - && genUType(0) == bitfieldInsert(genUType(zero), genUType(zero), int(zero), int(zero)) + && genIType(0) == bitfieldInsert(genIType(zero), genIType(zero), uint(zero), uint(zero)) + && genUType(0) == bitfieldInsert(genUType(zero), genUType(zero), uint(zero), uint(zero)) // CHECK_GLSL-COUNT-2: bitfieldReverse( // CHECK_SPIR-COUNT-2: OpBitReverse{{ }} @@ -1062,15 +1062,15 @@ bool Test_VectorType() // CHECK_SPIR-NOT: OpBitFieldSExtract{{ }} // CHECK_SPIR: OpBitFieldUExtract{{ }} // CHECK_SPIR-NOT: OpBitFieldUExtract{{ }} - && genIType(0) == bitfieldExtract(genIType(zero), int(zero), int(zero)) - && genUType(0) == bitfieldExtract(genUType(zero), int(zero), int(zero)) + && genIType(0) == bitfieldExtract(genIType(zero), uint(zero), uint(zero)) + && genUType(0) == bitfieldExtract(genUType(zero), uint(zero), uint(zero)) // CHECK_GLSL-COUNT-2: bitfieldInsert( // CHECK_GLSL-NOT: bitfieldInsert( // CHECK_SPIR-COUNT-2: OpBitFieldInsert{{ }} // CHECK_SPIR-NOT: OpBitFieldInsert{{ }} - && genIType(0) == bitfieldInsert(genIType(zero), genIType(zero), int(zero), int(zero)) - && genUType(0) == bitfieldInsert(genUType(zero), genUType(zero), int(zero), int(zero)) + && genIType(0) == bitfieldInsert(genIType(zero), genIType(zero), uint(zero), uint(zero)) + && genUType(0) == bitfieldInsert(genUType(zero), genUType(zero), uint(zero), uint(zero)) // CHECK_GLSL-COUNT-2: bitfieldReverse( // CHECK_GLSL-NOT: bitfieldReverse( diff --git a/tests/language-feature/bitfield/bitfield-extract.slang b/tests/language-feature/bitfield/bitfield-extract.slang new file mode 100644 index 000000000..c8cfaace4 --- /dev/null +++ b/tests/language-feature/bitfield/bitfield-extract.slang @@ -0,0 +1,64 @@ +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -compile-arg -skip-spirv-validation -emit-spirv-directly +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-dx12 -use-dxil +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-mtl +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-cpu +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-cuda +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-wgpu + +// CHECK: 1 +// CHECK-NEXT: 2 +// CHECK-NEXT: 3 +// CHECK-NEXT: 4 +// CHECK-NEXT: 5 +// CHECK-NEXT: 6 +// CHECK-NEXT: 7 +// CHECK-NEXT: 8 +// CHECK-NEXT: 21 +// CHECK-NEXT: 7A +// CHECK-NEXT: FFFFFFFA +// CHECK-NEXT: A +// CHECK-NEXT: 67 +// CHECK-NEXT: FFFFFFEF +// CHECK-NEXT: 32 +// CHECK-NEXT: FFFFFFA9 + +//TEST_INPUT:ubuffer(data=[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1], stride=4):out,name=i32Buffer +RWStructuredBuffer<int> i32Buffer; + +[numthreads(1, 1, 1)] +void computeMain() +{ + // 32-bit tests + { + // Simple hex extraction to test, varying the offset. + uint value = 0x87654321; + i32Buffer[0] = bitfieldExtract(value, 4 * 0, 4); + i32Buffer[1] = bitfieldExtract(value, 4 * 1, 4); + i32Buffer[2] = bitfieldExtract(value, 4 * 2, 4); + i32Buffer[3] = bitfieldExtract(value, 4 * 3, 4); + i32Buffer[4] = bitfieldExtract(value, 4 * 4, 4); + i32Buffer[5] = bitfieldExtract(value, 4 * 5, 4); + i32Buffer[6] = bitfieldExtract(value, 4 * 6, 4); + i32Buffer[7] = bitfieldExtract(value, 4 * 7, 4); + + // Now varying the bit length + value = 0b00111011111011110001111010100001; + i32Buffer[8] = bitfieldExtract(value, 0, 6); + i32Buffer[9] = bitfieldExtract(value, 6, 8); + + // Sign extension case + // - For unsigned data types, the most significant bits of the result will be set to zero. + // - For signed data types, the most significant bits will be set to the value of bit offset + base - 1 + // (i.e., it is sign extended to the width of the return type). + i32Buffer[10] = bitfieldExtract(0b1010111, 3, 4); // 0b1010 -> 0b11111111111111111111111111111010 + i32Buffer[11] = bitfieldExtract(0b1010111u, 3, 4); // 0b1111 -> 0b00000000000000000000000000001010 + + // Component-wise extraction + int4 val4 = int4(0x12345678, 0x9abcdef0, 0x87654321, 0xfedcba98); + int4 ext4 = bitfieldExtract(val4, 4, 8); + i32Buffer[12] = ext4.x; + i32Buffer[13] = ext4.y; + i32Buffer[14] = ext4.z; + i32Buffer[15] = ext4.w; + } +} diff --git a/tests/language-feature/bitfield/bitfield-insert.slang b/tests/language-feature/bitfield/bitfield-insert.slang new file mode 100644 index 000000000..bc3db0b8d --- /dev/null +++ b/tests/language-feature/bitfield/bitfield-insert.slang @@ -0,0 +1,68 @@ +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -compile-arg -skip-spirv-validation -emit-spirv-directly +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-dx12 -use-dxil +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-mtl +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-cpu +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-cuda +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-wgpu + +// CHECK: 8765432F +// CHECK-NEXT: 876543F1 +// CHECK-NEXT: 87654F21 +// CHECK-NEXT: 8765F321 +// CHECK-NEXT: A8 +// CHECK-NEXT: 3FC0 +// CHECK-NEXT: AF +// CHECK-NEXT: 123456F8 +// CHECK-NEXT: 9ABCDE60 +// CHECK-NEXT: 87654331 +// CHECK-NEXT: FEDCBA68 +// CHECK-NEXT: 12345EF8 +// CHECK-NEXT: 9ABCD560 +// CHECK-NEXT: 87654431 +// CHECK-NEXT: FEDCB568 + +//TEST_INPUT:ubuffer(data=[0 1 2 3 4 5 6 7 8 9 10 11 12 13 14], stride=4):out,name=u32Buffer +RWStructuredBuffer<uint> u32Buffer; + +[numthreads(1, 1, 1)] +void computeMain() +{ + // 32-bit tests + { + // Simple hex insertion to test, varying the offset. + uint base = 0x87654321; + uint value = 0xABCDEF; + u32Buffer[0] = bitfieldInsert(base, value, 4 * 0, 4); // 0x8765432F + u32Buffer[1] = bitfieldInsert(base, value, 4 * 1, 4); // 0x876543F1 + u32Buffer[2] = bitfieldInsert(base, value, 4 * 2, 4); // 0x8765F321 + u32Buffer[3] = bitfieldInsert(base, value, 4 * 3, 4); // 0x87F54321 + + // Test with varying bit length + base = 0; + value = 0b101010; + u32Buffer[4] = bitfieldInsert(base, value, 2, 6); // 0b10101000 + value = 0b11111111; + u32Buffer[5] = bitfieldInsert(base, value, 6, 8); // 0b11111111000000 + + // Test with int input + u32Buffer[6] = bitfieldInsert(0b10100000, 0b1111, 0, 4); // 0b10101111 + + // Test with a vector + uint4 base4 = uint4(0x12345678, 0x9abcdef0, 0x87654321, 0xfedcba98); + uint4 value4 = uint4(0xABCDEF, 0x123456, 0x876543, 0x123456); + uint4 output4 = bitfieldInsert(base4, value4, 4, 4); + u32Buffer[7] = output4.x; + u32Buffer[8] = output4.y; + u32Buffer[9] = output4.z; + u32Buffer[10] = output4.w; + + // Test with a int vector + int4 ibase4 = int4(0x12345678, 0x9abcdef0, 0x87654321, 0xfedcba98); + int4 ivalue4 = int4(0xABCDEF, 0x123456, 0x876543, 0x123456); + int4 ioutput4 = bitfieldInsert(ibase4, ivalue4, 4, 8); + u32Buffer[11] = ioutput4.x; + u32Buffer[12] = ioutput4.y; + u32Buffer[13] = ioutput4.z; + u32Buffer[14] = ioutput4.w; + } +} |
