diff options
Diffstat (limited to 'source/slang')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 30 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.cpp | 24 | ||||
| -rw-r--r-- | source/slang/slang-emit-glsl.cpp | 38 | ||||
| -rw-r--r-- | source/slang/slang-emit-hlsl.cpp | 174 | ||||
| -rw-r--r-- | source/slang/slang-emit.cpp | 89 | ||||
| -rw-r--r-- | source/slang/slang-glsl-extension-tracker.cpp | 2 | ||||
| -rw-r--r-- | source/slang/slang-glsl-extension-tracker.h | 2 | ||||
| -rw-r--r-- | source/slang/slang-ir-byte-address-legalize.cpp | 924 | ||||
| -rw-r--r-- | source/slang/slang-ir-byte-address-legalize.h | 27 | ||||
| -rw-r--r-- | source/slang/slang-ir-inst-defs.h | 50 | ||||
| -rw-r--r-- | source/slang/slang-ir-insts.h | 21 | ||||
| -rw-r--r-- | source/slang/slang-ir-layout.cpp | 239 | ||||
| -rw-r--r-- | source/slang/slang-ir-layout.h | 70 | ||||
| -rw-r--r-- | source/slang/slang.vcxproj | 4 | ||||
| -rw-r--r-- | source/slang/slang.vcxproj.filters | 12 |
15 files changed, 1701 insertions, 5 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 67f44cdac..a84e88ca8 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -41,8 +41,28 @@ struct ByteAddressBuffer uint4 Load4(int location); uint4 Load4(int location, out uint status); + + T Load<T>(int location) + { + return __byteAddressBufferLoad<T>(this, location); + } }; +__intrinsic_op($(kIROp_ByteAddressBufferLoad)) +T __byteAddressBufferLoad<T>(ByteAddressBuffer buffer, int offset); + +__intrinsic_op($(kIROp_ByteAddressBufferLoad)) +T __byteAddressBufferLoad<T>(RWByteAddressBuffer buffer, int offset); + +__intrinsic_op($(kIROp_ByteAddressBufferLoad)) +T __byteAddressBufferLoad<T>(RasterizerOrderedByteAddressBuffer buffer, int offset); + +__intrinsic_op($(kIROp_ByteAddressBufferStore)) +void __byteAddressBufferStore<T>(RWByteAddressBuffer buffer, int offset, T value); + +__intrinsic_op($(kIROp_ByteAddressBufferStore)) +void __byteAddressBufferStore<T>(RasterizerOrderedByteAddressBuffer buffer, int offset, T value); + __generic<T> __magic_type(HLSLStructuredBufferType) __intrinsic_type($(kIROp_HLSLStructuredBufferType)) @@ -135,6 +155,11 @@ struct $(item.name) uint4 Load4(int location, out uint status); + T Load<T>(int location) + { + return __byteAddressBufferLoad<T>(this, location); + } + // Added operations: __target_intrinsic(glsl, "($3 = atomicAdd($0._data[$1/4], $2))") @@ -241,6 +266,11 @@ struct $(item.name) void Store4( uint address, uint4 value); + + void Store<T>(int offset, T value) + { + __byteAddressBufferStore(this, offset, value); + } }; ${{{{ diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index 72ee1644d..9da7008b6 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -2199,6 +2199,30 @@ void CLikeSourceEmitter::defaultEmitInstExpr(IRInst* inst, const EmitOpInfo& inO emitOperand(inst->getOperand(0), outerPrec); break; + case kIROp_ByteAddressBufferLoad: + m_writer->emit("("); + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + m_writer->emit(").Load<"); + emitType(inst->getDataType()); + m_writer->emit(">("); + emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); + m_writer->emit(")"); + break; + + case kIROp_ByteAddressBufferStore: + { + auto prec = getInfo(EmitOp::Postfix); + needClose = maybeEmitParens(outerPrec, prec); + + emitOperand(inst->getOperand(0), leftSide(outerPrec, prec)); + m_writer->emit(".Store("); + emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); + m_writer->emit(","); + emitOperand(inst->getOperand(2), getInfo(EmitOp::General)); + m_writer->emit(")"); + } + break; + default: diagnoseUnhandledInst(inst); break; diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp index c05f14f25..251b164cd 100644 --- a/source/slang/slang-emit-glsl.cpp +++ b/source/slang/slang-emit-glsl.cpp @@ -1290,7 +1290,44 @@ bool GLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu emitOperand(inst->getOperand(0), outerPrec); return true; } + case kIROp_StructuredBufferLoad: + { + auto outerPrec = inOuterPrec; + auto prec = getInfo(EmitOp::Postfix); + bool needClose = maybeEmitParens(outerPrec, prec); + + emitOperand(inst->getOperand(0), leftSide(outerPrec, prec)); + m_writer->emit("._data["); + emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); + m_writer->emit("]"); + + maybeCloseParens(needClose); + return true; + } + case kIROp_StructuredBufferStore: + { + auto outerPrec = inOuterPrec; + + auto assignPrec = getInfo(EmitOp::Assign); + bool assignNeedsClose = maybeEmitParens(outerPrec, assignPrec); + { + auto subscriptPrec = getInfo(EmitOp::Postfix); + bool subscriptNeedsClose = maybeEmitParens(assignPrec, subscriptPrec); + + emitOperand(inst->getOperand(0), leftSide(assignPrec, subscriptPrec)); + m_writer->emit("._data["); + emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); + m_writer->emit("]"); + + maybeCloseParens(subscriptNeedsClose); + } + + m_writer->emit(" = "); + emitOperand(inst->getOperand(2), rightSide(assignPrec, outerPrec)); + maybeCloseParens(assignNeedsClose); + return true; + } default: break; } @@ -1458,6 +1495,7 @@ void GLSLSourceEmitter::emitSimpleTypeImpl(IRType* type) case kIROp_FloatType: case kIROp_DoubleType: { + _requireBaseType(cast<IRBasicType>(type)->getBaseType()); m_writer->emit(getDefaultBuiltinTypeName(type->op)); return; } diff --git a/source/slang/slang-emit-hlsl.cpp b/source/slang/slang-emit-hlsl.cpp index f0238ce70..e48a166c5 100644 --- a/source/slang/slang-emit-hlsl.cpp +++ b/source/slang/slang-emit-hlsl.cpp @@ -435,27 +435,86 @@ bool HLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu } case kIROp_BitCast: { + // For simplicity, we will handle all bit-cast operations + // by first casting the "from" type to an intermediate + // integer type to hold the bits, and then convert *the* + // type over to the desired "to" type. + // + // A fundamental invariant that must be guaranteed + // by earlier steps is that a bit-cast instruction + // is only generated when the "from" and "to" types + // have the same size, and (in the case where they + // are vectors) number of elements. + // + // In textual order, the conversion to the "to" type + // comes first. + // auto toType = extractBaseType(inst->getDataType()); switch (toType) { default: diagnoseUnhandledInst(inst); break; - case BaseType::UInt: - break; + + case BaseType::Int8: + case BaseType::Int16: case BaseType::Int: + case BaseType::Int64: + case BaseType::UInt8: + case BaseType::UInt16: + case BaseType::UInt: + case BaseType::UInt64: + // Because the intermediate type will always + // be an integer type, we can convert to + // another integer type of the same size + // via a cast. m_writer->emit("("); emitType(inst->getDataType()); m_writer->emit(")"); break; + case BaseType::Float: + // Note: at present HLSL only supports + // reinterpreting integer bits as a `float`. + // + // There is no current function (it seems) + // for bit-casting an `int16_t` to a `half`. + // + // TODO: There is an `asdouble` function + // for converting two 32-bit integer values into + // one `double`. We could use that for + // bit casts of 64-bit values with a bit of + // extra work, but doing so might be best + // handled in an IR pass that legalizes + // bit-casts. + // m_writer->emit("asfloat"); break; } - m_writer->emit("("); + int closeCount = 1; + + auto fromType = extractBaseType(inst->getOperand(0)->getDataType()); + switch( fromType ) + { + default: + diagnoseUnhandledInst(inst); + break; + + case BaseType::UInt: + case BaseType::Int: + break; + + case BaseType::Float: + m_writer->emit("asuint("); + closeCount++; + break; + } + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); - m_writer->emit(")"); + + while(closeCount--) + m_writer->emit(")"); return true; } case kIROp_StringLit: @@ -474,6 +533,113 @@ bool HLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu emitOperand(inst->getOperand(0), outerPrec); return true; } + case kIROp_ByteAddressBufferLoad: + { + // HLSL byte-address buffers have two kinds of `Load` operations. + // + // First we have the `Load`, `Load2`, `Load3`, and `Load4` operations, + // which are *not* generic/templated, and always return a scalar + // or vector of `uint`. These are available on all profiles that + // support byte-address buffers. + // + // Second we have the `Load<T>` generic, which itself comes in + // two flavors. The basic version can only handle the case where `T` + // is a scalar or vector, but can handle more types than the + // non-generic operations. The more complex version can handle + // aggregate tyeps as well, but we don't need to worry about + // that because we will have legalized such operations out + // already. + // + // Our task here is thus to pick between `Load`/`Load2`/`Load3`/`Load4` + // or `Load<T>`, always preferring the functions that are more + // universally available. + // + // We will thus inspect the type that is being loaded, + // and determine if it is a scalar or vector, and then + // if the elemnet type of that scalar/vector is `uint`. + // + auto elementType = inst->getDataType(); + IRIntegerValue elementCount = 1; + if( auto vecType = as<IRVectorType>(elementType) ) + { + if( auto elementCountInst = as<IRIntLit>(vecType->getElementCount()) ) + { + elementType = vecType->getElementType(); + elementCount = elementCountInst->getValue(); + } + } + + if( elementType->op == kIROp_UIntType ) + { + // If we are in the case that can use `Load`/`Load2`/`Load3`/`Load4`, + // then we will always prefer to use it. + // + auto outerPrec = inOuterPrec; + auto prec = getInfo(EmitOp::Postfix); + bool needClose = maybeEmitParens(outerPrec, prec); + + emitOperand(inst->getOperand(0), leftSide(outerPrec, prec)); + m_writer->emit(".Load"); + if( elementCount != 1 ) + { + m_writer->emit(elementCount); + } + m_writer->emit("("); + emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); + m_writer->emit(")"); + + maybeCloseParens(needClose); + return true; + } + + // Otherwise we fall back to the base case, which + // is already handled by the base `CLikeSourceEmitter` + return false; + } + case kIROp_ByteAddressBufferStore: + { + // Similar to the case for a load, we want to specialize + // the generated code for the case where we store a `uint` + // or a vector of `uint`. + // + auto elementType = inst->getDataType(); + IRIntegerValue elementCount = 1; + if( auto vecType = as<IRVectorType>(elementType) ) + { + if( auto elementCountInst = as<IRIntLit>(vecType->getElementCount()) ) + { + elementType = vecType->getElementType(); + elementCount = elementCountInst->getValue(); + } + } + if( elementType->op == kIROp_UIntType ) + { + auto outerPrec = inOuterPrec; + auto prec = getInfo(EmitOp::Postfix); + bool needClose = maybeEmitParens(outerPrec, prec); + + emitOperand(inst->getOperand(0), leftSide(outerPrec, prec)); + m_writer->emit(".Store"); + if( elementCount != 1 ) + { + m_writer->emit(elementCount); + } + m_writer->emit("("); + emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); + m_writer->emit(", "); + emitOperand(inst->getOperand(2), getInfo(EmitOp::General)); + m_writer->emit(")"); + + maybeCloseParens(needClose); + return true; + } + + // Otherwise we fall back to the base case, which + // is already handled by the base `CLikeSourceEmitter` + return false; + } + break; + default: break; } // Not handled diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 3caef0a9f..efa56c261 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -5,6 +5,7 @@ #include "../core/slang-type-text-util.h" #include "slang-ir-bind-existentials.h" +#include "slang-ir-byte-address-legalize.h" #include "slang-ir-dce.h" #include "slang-ir-entry-point-uniforms.h" #include "slang-ir-glsl-legalize.h" @@ -411,6 +412,94 @@ Result linkAndOptimizeIR( break; } + // For all targets, we translate load/store operations + // of aggregate types from/to byte-address buffers into + // stores of individual scalar or vector values. + // + { + ByteAddressBufferLegalizationOptions byteAddressBufferOptions; + + // Depending on the target, we may decide to do + // more aggressive translation that reduces the + // load/store operations down to invididual scalars + // (splitting up vector ops). + // + switch( target ) + { + default: + break; + + case CodeGenTarget::GLSL: + // For GLSL targets, we want to translate the vector load/store + // operations into scalar ops. This is in part as a simplification, + // but it also ensures that our generated code respects the lax + // alignment rules for D3D byte-address buffers (the base address + // of a buffer need not be more than 4-byte aligned, and loads + // of vectors need only be aligned based on their element type). + // + // TODO: We should consider having an extended variant of `Load<T>` + // on byte-address buffers which expresses a programmer's knowledge + // that the load will have greater alignment than required by D3D. + // That could either come as an explicit guaranteed-alignment + // operand, or instead as something like a `Load4Aligned<T>` operation + // that returns a `vector<4,T>` and assumes `4*sizeof(T)` alignemtn. + // + byteAddressBufferOptions.scalarizeVectorLoadStore = true; + + // For GLSL targets, there really isn't a low-level concept + // of a byte-address buffer at all, and the standard "shader storage + // buffer" (SSBO) feature is a lot closer to an HLSL structured + // buffer for our purposes. + // + // In particular, each SSBO can only have a single element type, + // so that even with bitcasts we can't have a single buffer declaration + // (e.g., one with `uint` elements) service all load/store operations + // (e.g., a `half` value can't be stored atomically if there are + // `uint` elements, unless we use explicit atomics). + // + // In order to simplify things, we will translate byte-address buffer + // ops to equivalent structured-buffer ops for GLSL targets, where + // each unique type being loaded/stored yields a different global + // parameter declaration of the buffer. + // + byteAddressBufferOptions.translateToStructuredBufferOps = true; + break; + } + + // We also need to decide whether to translate + // any "leaf" load/store operations over to + // use only unsigned-integer types and then + // bit-cast, or if we prefer to leave them + // as load/store of the original type. + // + switch( target ) + { + case CodeGenTarget::HLSL: + { + auto profile = targetRequest->targetProfile; + if( profile.getFamily() == ProfileFamily::DX ) + { + if(profile.GetVersion() <= ProfileVersion::DX_5_0) + { + // Fxc and earlier dxc versions do not support + // a templates `.Load<T>` operation on byte-address + // buffers, and instead need us to emit separate + // `uint` loads and then bit-cast over to + // the correct type. + // + byteAddressBufferOptions.useBitCastFromUInt = true; + } + } + } + break; + + default: + break; + } + + legalizeByteAddressBufferOps(session, irModule, byteAddressBufferOptions); + } + // For GLSL only, we will need to perform "legalization" of // the entry point and any entry-point parameters. // diff --git a/source/slang/slang-glsl-extension-tracker.cpp b/source/slang/slang-glsl-extension-tracker.cpp index 30acd8936..53e51d633 100644 --- a/source/slang/slang-glsl-extension-tracker.cpp +++ b/source/slang/slang-glsl-extension-tracker.cpp @@ -41,6 +41,8 @@ void GLSLExtensionTracker::requireBaseTypeExtension(BaseType baseType) switch (baseType) { case BaseType::Half: + case BaseType::UInt16: + case BaseType::Int16: { // https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GL_EXT_shader_16bit_storage.txt requireExtension(UnownedStringSlice::fromLiteral("GL_EXT_shader_16bit_storage")); diff --git a/source/slang/slang-glsl-extension-tracker.h b/source/slang/slang-glsl-extension-tracker.h index 79dcd720e..5127674a3 100644 --- a/source/slang/slang-glsl-extension-tracker.h +++ b/source/slang/slang-glsl-extension-tracker.h @@ -37,7 +37,7 @@ public: protected: static uint32_t _getFlag(BaseType baseType) { return uint32_t(1) << int(baseType); } - uint32_t m_hasBaseTypeFlags = 0xffffffff & ~(_getFlag(BaseType::UInt64) + _getFlag(BaseType::Int64) + _getFlag(BaseType::Half)); + uint32_t m_hasBaseTypeFlags = _getFlag(BaseType::Float) | _getFlag(BaseType::Int) | _getFlag(BaseType::UInt) | _getFlag(BaseType::Void) | _getFlag(BaseType::Bool); ProfileVersion m_profileVersion = ProfileVersion::GLSL_110; diff --git a/source/slang/slang-ir-byte-address-legalize.cpp b/source/slang/slang-ir-byte-address-legalize.cpp new file mode 100644 index 000000000..e33408855 --- /dev/null +++ b/source/slang/slang-ir-byte-address-legalize.cpp @@ -0,0 +1,924 @@ +// slang-ir-byte-address-legalize.cpp +#include "slang-ir-byte-address-legalize.h" + +// This file implements an IR pass that translates load/store operations +// on byte-address buffers to be legal for a chosen target. +// +// Currently this pass only applies to the operations generated for +// the generic `*ByteAddressBuffer.Load<T>` and `.Store<T>` operations, +// and not the non-generic versions that traffic in `uint` (e.g., +// `Load2` or `Store3`). + +#include "slang-ir-insts.h" +#include "slang-ir-layout.h" + +namespace Slang +{ + +// As is typical for IR passes in Slang, we will encapsulate the state +// while we process the code in a context type. +// +struct ByteAddressBufferLegalizationContext +{ + // We need access to the original session, as well as the options + // that control what constructs we legalize, and how. + // + Session* m_session = nullptr; + ByteAddressBufferLegalizationOptions m_options; + + // We will also use a central IR builder when generating new + // code as part of legalization (rather than create/destroy + // IR builders on the fly). + // + SharedIRBuilder m_sharedBuilder; + IRBuilder m_builder; + + // Everything starts with a request to process a module, + // which delegates to the central recrusive walk of the IR. + // + void processModule(IRModule* module) + { + m_sharedBuilder.session = m_session; + m_sharedBuilder.module = module; + + m_builder.sharedBuilder = &m_sharedBuilder; + + processInstRec(module->getModuleInst()); + } + + // We recursively walk the entire IR structure (except + // for decorations), and process any byte-address buffer + // load or store operations. + // + void processInstRec(IRInst* inst) + { + switch( inst->op ) + { + case kIROp_ByteAddressBufferLoad: + processLoad(inst); + break; + + case kIROp_ByteAddressBufferStore: + processStore(inst); + break; + } + + IRInst* nextChild = nullptr; + for( IRInst* child = inst->getFirstChild(); child; child = nextChild ) + { + nextChild = child->getNextInst(); + processInstRec(child); + } + } + + // The logic for both the load and store cases is similar, + // so we will present the entire load case first and then + // move on to stores. + // + void processLoad(IRInst* load) + { + // What we want to do with a load depends on the type + // being loaded. + // + auto type = load->getDataType(); + + // We start by looking at the type being loaded so + // that we can opt out if it is legal. + // + if( isTypeLegalForByteAddressLoadStore(type) ) + return; + + // If the type is one that requires legalization, + // then we will set up to insert new IR instructions + // to replace it. + // + m_builder.setInsertBefore(load); + + // We then emit a "legal load" with the same buffer + // and byte offset from the original. + // + auto buffer = load->getOperand(0); + auto offset = load->getOperand(1); + auto legalLoad = emitLegalLoad(type, buffer, offset, 0); + + // If it currently possible for the legalization + // to fail (perhaps because of something else that + // is invalid in the IR), so we will defensively + // leave the code along in that case. + // + if(!legalLoad) + return; + + // If we were able to generate a legal load operation, + // then the value it yields can be used to fully + // replace the previous illegal load. + // + load->replaceUsesWith(legalLoad); + load->removeAndDeallocate(); + } + + bool isTypeLegalForByteAddressLoadStore(IRType* type) + { + // Whether or not a type is legal to use for + // byte-address buffer load/store depends on + // properties of the target, which will have + // been passed into this pass via its options. + // + // If we are expected to translate all byte-address + // operations to equivalent structured-buffer + // operations, then that means *no* type is + // legal for byte-address load/store. + // + if(m_options.translateToStructuredBufferOps) + return false; + + // Basic types are usually legal to load/store + // on all targets. + // + if( auto basicType = as<IRBasicType>(type) ) + { + // On targets that require translation to + // make all load/store use `uint` values, + // any scalar type that isn't `uint` is + // illegal. + // + if( m_options.useBitCastFromUInt + && basicType->getBaseType() != BaseType::UInt ) + { + return false; + } + + // Otherwise, scalar types are assumed + // legal for load/store. + // + return true; + } + + // Vector types also depend on the options. + // + if( as<IRVectorType>(type) ) + { + // If we've been asked to scalarize all + // vector load/store, then we need to + // tread them as illegal. + // + if(m_options.scalarizeVectorLoadStore) + return false; + + } + + // All other types are treated as always illegal, + // so that we will legalize the load/store ops + // in all cases. + // + // Note: recent builds of dxc (perhaps coupled with + // recent shader models) support byte-address load/store + // of more complex types, but it is simpler for Slang + // to just legalize all the composite cases rather + // than rely on a downstream compiler. + // + return false; + } + + // The core workhorse routine for the load case is `emitLegalLoad`, + // which tries to emit load operations that read a value of the + // given `type` from the given `buffer` at the required `baseOffset` + // plus the `immediateOffset` if any. + // + IRInst* emitLegalLoad(IRType* type, IRInst* buffer, IRInst* baseOffset, IRIntegerValue immediateOffset) + { + // The right way to load a value depends primarily + // on the type, and secondarily on the options + // that have been specified for this pass. + // + if( auto structType = as<IRStructType>(type) ) + { + // When loading a value of `struct` type, we will + // load each field with its own operation. + // + // Note: A more "clever" implementation might try + // to emit a minimal number of loads of whatever + // is the largest supported type matching the + // alignment of `structType`, and then break those + // loaded values into fields with bit-level ops + // once they are in registers. + // + // Such an approach could conceivably allow more + // types to be loadable even on targets that + // don't directly support them (e.g., a structure + // with an `int` and two `int16_t` could be loadable + // even when targetting DXBC). + // + // The flip side to such an approach would be that + // it would complicate the generated code, and also + // make the rules about when a type is supported + // for byte-address load/store much more complicated. + + // We collect the loaded per-field values into an + // array, which we will then use to construct the + // full value of the `struct` type. + // + List<IRInst*> fieldVals; + for( auto field : structType->getFields() ) + { + auto fieldType = field->getFieldType(); + + // The relative offset of each field is calculated using + // the IR-based layout subsystem, which works with the + // "natural" in-memory layout of types. + // + // It is possible for layout computation to fail (e.g., + // if the field type somehow wasn't one that can be + // laid out "naturally"). If the layout process fails, + // then we fail to legalize this load. + // + IRIntegerValue fieldOffset = 0; + SLANG_RETURN_NULL_ON_FAIL(getNaturalOffset(field, &fieldOffset)); + + // Otherwise, we load the field by recursively calling this function + // on the field type, with an adjusted immediate offset. + // + // If legalizing the field load fails, then we fail the load + // of the structure as well. Any loads that were generated + // for earlier fields will be left behind but can be eliminated + // as dead code. + // + auto fieldVal = emitLegalLoad(fieldType, buffer, baseOffset, immediateOffset + fieldOffset); + if(!fieldVal) + return nullptr; + + fieldVals.add(fieldVal); + } + + // Once all the field values have been loaded, we can bind + // then together to make a singel value of the `struct` type, + // representing the reuslt of the legalized load. + // + return m_builder.emitMakeStruct(type, fieldVals); + } + else if( auto arrayType = as<IRArrayTypeBase>(type) ) + { + // Loading a value of array type amounts to loading each + // of its elements. There is shared logic between the + // array, matrix, and vector cases, so we factor it into + // a subroutien that we will explain later. + // + // We need a known constant number of elements in an array + // to be able to emit per-element loads, so we skip + // legalization if the array type isn't in the right form + // for us to proceed. + // + auto elementCountInst = as<IRIntLit>(arrayType->getElementCount()); + if( elementCountInst ) + { + return emitLegalSequenceLoad(type, buffer, baseOffset, immediateOffset, kIROp_makeArray, arrayType->getElementType(), elementCountInst->getValue()); + } + } + else if( auto matType = as<IRMatrixType>(type) ) + { + // Handling a matrix is largely like an array, with the + // small detail that we need to construct the row type + // that we expect to load for each "element." + // + // TODO: The logic here assumes row-major layout, because + // the row-vs-column-major information has been dropped + // by this point in the IR. + // + // In order to allow both row- and column-major matrices + // to be loaded from byte-address buffers, we would need + // to make row-vs-column-major-ness be part of the IR + // type system so that IR layout can take it into account. + // + // For now we have to live with the "natural" layout of + // matrices always being row-major. + // + auto rowCountInst = as<IRIntLit>(matType->getRowCount()); + if( rowCountInst ) + { + auto rowType = m_builder.getVectorType(matType->getElementType(), matType->getColumnCount()); + return emitLegalSequenceLoad(type, buffer, baseOffset, immediateOffset, kIROp_MakeMatrix, rowType, rowCountInst->getValue()); + } + } + else if( auto vecType = as<IRVectorType>(type) ) + { + // One of the options that can vary per-target is whether to + // scalarize vetor load/store operations. When that option + // is turned on, we can treat a vector load just like an + // array load. + // + auto elementCountInst = as<IRIntLit>(vecType->getElementCount()); + if( m_options.scalarizeVectorLoadStore && elementCountInst) + { + return emitLegalSequenceLoad(type, buffer, baseOffset, immediateOffset, kIROp_makeVector, vecType->getElementType(), elementCountInst->getValue()); + } + + // If we aren't scalarizing a vetor load then we next need + // to consider the case where the target might only support + // byte-address load/store of unsigned integer data (e.g., + // this is the case for D3D11/DXBC). + // + // We can still support loads of vectors with other element + // types by first loading the data as unsigned integers, and + // then bit-casting it to the correct type (e.g., load a + // `uint4` with `Load4()` and then bit-cast to `float4` using + // `asfloat()`). + // + if(m_options.useBitCastFromUInt) + { + // We will look at the element type of the vector (which must + // be a basic type for this to work). + // + if( auto elementType = as<IRBasicType>(vecType->getElementType()) ) + { + // If there is a distinct unsigned integer type of the + // same size as the element type, then we can use that + // for our load. + // + if( auto unsignedElementType = getSameSizeUIntType(elementType) ) + { + // We form the appropriate unsigned-integer vector type, + // and then emit a load for it. + // + auto unsignedVecType = m_builder.getVectorType(unsignedElementType, vecType->getElementCount()); + auto unsignedVecVal = emitSimpleLoad(unsignedVecType, buffer, baseOffset, immediateOffset); + + // Once we have loaded the bits into a temporary, + // we can bit-cast it to the correct tyep and + // we have our result. + // + return m_builder.emitBitCast(vecType, unsignedVecVal); + } + } + } + + // Any cases of vectors not handled above are allowed to fall through + // and be handled in the catch-all logic below. + } + else if( auto basicType = as<IRBasicType>(type) ) + { + // Most basic scalar types can be handled directly on targets, + // but as described above for vectors, the D3D11/DXBC target + // only support loading `uint` values, so we need to emulate + // loads of other types (like `float`) by first loading a + // `uint` and then bit-casting. + // + if(m_options.useBitCastFromUInt) + { + if( auto unsignedType = getSameSizeUIntType(basicType) ) + { + auto unsignedVal = emitSimpleLoad(unsignedType, buffer, baseOffset, immediateOffset); + return m_builder.emitBitCast(basicType, unsignedVal); + } + } + } + + // If none of the many special cases above was triggered, then we + // are in the base case and assume we want to emit a single load + // for the type we were given. + // + return emitSimpleLoad(type, buffer, baseOffset, immediateOffset); + } + + // Loading of sequences for arrays, matrices, and vectors is + // bottlenecked through a single function. + // + IRInst* emitLegalSequenceLoad(IRType* type, IRInst* buffer, IRInst* baseOffset, IRIntegerValue immediateOffset, IROp op, IRType* elementType, IRIntegerValue elementCount) + { + // Or goal here is to produce a value of the given `type`, loaded from `buffer` + // at `baseOffset` plus `immediateOffset`. + // + // We will do this by emitting `elementCount` loads for the elements of + // the given `elementType`, and then grouping them into the final sequence + // using the given `op` (which will be something like `kIROp_MakeArray`). + + // To know how many bytes to step between loads, we must compute + // the "stride" of the element type. + // + IRSizeAndAlignment elementLayout; + SLANG_RETURN_NULL_ON_FAIL(getNaturalSizeAndAlignment(elementType, &elementLayout)); + IRIntegerValue elementStride = elementLayout.getStride(); + + // We will collect all the element values into an array so + // that we can construct the sequence when we are done. + // + List<IRInst*> elementVals; + for( IRIntegerValue ii = 0; ii < elementCount; ++ii ) + { + auto elementVal = emitLegalLoad(elementType, buffer, baseOffset, immediateOffset + ii*elementStride); + if(!elementVal) + return nullptr; + + elementVals.add(elementVal); + } + + // Once we are done loading the elements we construct the sequence value. + // + return m_builder.emitIntrinsicInst(type, op, elementVals.getCount(), elementVals.getBuffer()); + } + + // All of the loading operations above eventually bottom out at `emitSimpleLoad`, + // which is meant to handle the base case where we do *not* want to + // recurse on the structure of `type`. + // + IRInst* emitSimpleLoad(IRType* type, IRInst* buffer, IRInst* baseOffset, IRIntegerValue immediateOffset) + { + // For all of the operations above this in the call chain we have been + // tracking a pair of a `baseOffset` as an IR instruction, and an + // `immediateOffset` value. Keeping things split avoided introducing + // a bunch of `add` instructions that could be constant-folded away. + // + // Instead, now that we are about to emit a load "for real" + // we want to turn those two offset values into one. + // + IRInst* offset = emitOffsetAddIfNeeded(baseOffset, immediateOffset); + + // At this point there is one last (major) detail we need to + // get into, which is that some targets (currently just GLSL) + // do not actually have anything like byte-address buffers + // as a built-in feature. + // + // Instead, GLSL has "shader storage buffers" which are + // tied to a particular element type when declared. E.g.,: + // + // buffer MyBuffer { uint _data[]; } myBuffer; + // + // The `myBuffer` declaration above can be used to load + // `uint` values, but isn't much use if you want to load/store + // a `half` or a `double` efficiently (and atomically, + // where possible/guaranteed). + // + // Shader storage buffers like this are closer in spirit to + // HLSL/Slang "structured buffers," so we think of this code + // path as converting byte-address buffer operations into + // structured-buffer operations. + // + // To make things work for GLSL output, we need to generate + // multiple `buffer` declarations that all alias one another + // (accomplished by giving them the same `binding`), but that + // declare buffers with different element types. + // + if( m_options.translateToStructuredBufferOps ) + { + // In order to emit a suitable structured-buffer load, + // we need to find or create a global declaration for + // a structured buffer that is "equivalent" to `buffer`, + // but has `type` as its element type. + // + // That operation could conceivably fail, and when it + // does we will fall back to the default handling of + // emitting a byte-address buffer load (which will + // then fail to generate valid GLSL code). + // + if( auto structuredBuffer = getEquivalentStructuredBuffer(type, buffer) ) + { + // The `offset` instruction represents the byte offset of + // the thing we are trying to load, and we need to translate + // that into an *index* for use with a structured buffer. + // + // We convert the offset to an index by dividing by the + // stride of `type` as computed with our "natural layout" rules. + // + // This logic will be invalid if `offset` isn't a multiple of + // the stride of `type`, but that case would have been + // undefined behavior anyway. + // + auto offsetType = offset->getDataType(); + + IRSizeAndAlignment typeLayout; + SLANG_RETURN_NULL_ON_FAIL(getNaturalSizeAndAlignment(type, &typeLayout)); + auto typeStrideVal = typeLayout.getStride(); + + auto typeStrideInst = m_builder.getIntValue(offsetType, typeStrideVal); + IRInst* divArgs[] = { offset, typeStrideInst }; + auto index = m_builder.emitIntrinsicInst(offsetType, kIROp_Div, 2, divArgs); + + IRInst* args[] = { structuredBuffer, index }; + return m_builder.emitIntrinsicInst(type, kIROp_StructuredBufferLoad, 2, args); + } + } + + // When we finally run out of special cases to handle, we just emit + // a byte-address buffer load operation directly, assuming it will + // work for the chosen target. + // + { + IRInst* loadArgs[] = { buffer, offset }; + return m_builder.emitIntrinsicInst(type, kIROp_ByteAddressBufferLoad, 2, loadArgs); + } + } + + IRInst* emitOffsetAddIfNeeded(IRInst* baseOffset, IRIntegerValue immediateOffset) + { + // We need to create an instruction to represent + // `baseOffset` plus `immediateOffset`. + // + // An important special case is when `immediateOffset` is zero: + // + if(immediateOffset == 0) + return baseOffset; + + // Otherwise, we emit an `add` instruction of the appropriate type + // + auto type = baseOffset->getDataType(); + IRInst* args[] = { baseOffset, m_builder.getIntValue(type, immediateOffset) }; + return m_builder.emitIntrinsicInst(type, kIROp_Add, 2, args); + } + + // At this point we have gone through the main logic of the load path, + // and before we turn our attention to the store path we can go + // ahead and define some of the utility functions that the code above + // requires. + + // In order to handle interesting types on D3D11/DXBC, we need to + // be able to map a base type to another type of the same size. + // + BaseType getSameSizeUIntBaseType(IROp op) + { + // For now we are only handling the 32-bit types here, because + // the D3D11/DXBC target will not be able to handle 16- or + // 64-bit types anyway. This could be improved over time + // if needed. + // + switch( op ) + { + case kIROp_IntType: + case kIROp_FloatType: + case kIROp_BoolType: + // The basic 32-bit types (and `bool`) can be handled by + // loading `uint` values and then bit-casting. + // + // Note: We aren't listing `kIROp_UIntType` here because + // we don't want to introduce a bit-cast in the case where + // the load was already for a `uint`. + // + return BaseType::UInt; + + default: + // All other types map to a sentinel value of `Void` to + // indicate that a bit-cast solution shouldn't be attempted: + // either load the original type, or fail. + // + return BaseType::Void; + + } + } + IRBasicType* getSameSizeUIntType(IRType* type) + { + auto unsignedBaseType = getSameSizeUIntBaseType(type->op); + if(unsignedBaseType == BaseType::Void) + return nullptr; + + return m_builder.getBasicType(unsignedBaseType); + } + + // When replacing byte-address buffer load/store operations with + // structured buffer ones, we need to be able to map an IR instruction + // that represents a byte-address buffer to one that represents an + // "equivalent" structured buffer. + // + // An important/tricky detail here is that the byte-address buffer + // might have been passed in as a function parameter, or be indexed + // from an array, etc. + // + // The logic here assumes this pass has run after a full legalization + // pass on resource parameter usage, so that any references to + // buffers in an instruction are "grounded" in a known global shader + // parameter. + + IRInst* getEquivalentStructuredBuffer(IRType* elementType, IRInst* byteAddressBuffer) + { + // The simple case for replacement is when the byte-address buffer to + // be replaced is a global shader parameter. That path will get its + // own routine. + if(auto byteAddressBufferParam = as<IRGlobalParam>(byteAddressBuffer)) + { + return getEquivalentStructuredBufferParam(elementType, byteAddressBufferParam); + } + + if( byteAddressBuffer->op == kIROp_getElement ) + { + // If the code is fetching the byte-address buffer from an + // array, then we need to create an "equivalent" structured + // buffer array, and then index into that. + // + auto byteAddressBufferArray = byteAddressBuffer->getOperand(0); + auto index = byteAddressBuffer->getOperand(1); + + auto structuredBufferArray = getEquivalentStructuredBuffer(elementType, byteAddressBufferArray); + if(!structuredBufferArray) + return nullptr; + + auto structuredBufferArrayType = as<IRArrayTypeBase>(structuredBufferArray->getDataType()); + if(!structuredBufferArrayType) + return nullptr; + + // If we succeeded in creating a declaration for an array of + // structured buffers to index into, we can now emit a new + // operation to index into that array instead, and the result + // will work as our "equivalent" structured buffer. + // + return m_builder.emitElementExtract(structuredBufferArrayType->getElementType(), structuredBufferArray, index); + } + + // If we failed to pattern-match the byte-address buffer operand + // against something we can handle, then we need to bail out + // of our attempt to legalize things here. + // + // TODO: Should we make this case an error? + // + return nullptr; + } + + // Our seach for an "equivalent" structured buffer should bottom out when + // we find a global shader parameter of byte-address buffer type, or an + // array (of array of array of ...) byte-address buffer type. + // + // We then want to create an equivalent shader parameter of a matching + // structured buffer (or array...) type. + // + // To avoid creating too many buffers (e.g., one per load), we will cache and + // re-use the buffers we declare in this way. Note that we do *not* need + // to worry if the deduplication is perfect, because we are already assuming + // that the target will handle multiple buffers with the same `binding` + // correctly. + // + Dictionary<KeyValuePair<IRInst*, IRInst*>, IRGlobalParam*> m_cachedStructuredBuffers; + IRGlobalParam* getEquivalentStructuredBufferParam(IRType* elementType, IRGlobalParam* byteAddressBufferParam) + { + KeyValuePair<IRInst*, IRInst*> key(elementType, byteAddressBufferParam); + + IRGlobalParam* structuredBufferParam; + if(!m_cachedStructuredBuffers.TryGetValue(key, structuredBufferParam)) + { + structuredBufferParam = createEquivalentStructuredBufferParam(elementType, byteAddressBufferParam); + m_cachedStructuredBuffers.Add(key, structuredBufferParam); + } + return structuredBufferParam; + } + + IRGlobalParam* createEquivalentStructuredBufferParam(IRType* elementType, IRGlobalParam* byteAddressBufferParam) + { + // When we need to create a new structured buffer to stand in for + // some byte-address buffer (with a new `elementType` being used + // for load/store), we need to figure out the "equivalent" type + // to use for the new buffer. + // + auto byteAddressBufferParamType = byteAddressBufferParam->getDataType(); + auto structuredBufferParamType = getEquivalentStructuredBufferParamType(elementType, byteAddressBufferParamType); + if(!structuredBufferParamType) + return nullptr; + + // Next we will create a global shader parameter using the new + // type. + // + // Note: we are creating a new `IRBuilder` here rather than using + // `m_builder` because this logic could get called during the middle + // of legalizing a load or store, and we don't want to mess with + // the insertion location of `m_builder`. + // + IRBuilder paramBuilder; + paramBuilder.sharedBuilder = &m_sharedBuilder; + paramBuilder.setInsertBefore(byteAddressBufferParam); + + auto structuredBufferParam = paramBuilder.createGlobalParam(structuredBufferParamType); + + // The new parameter needs to be given a layout to match the existing + // parameter, so that it is given the same `binding` in the generated code. + // + if( auto layoutDecoration = byteAddressBufferParam->findDecoration<IRLayoutDecoration>() ) + { + paramBuilder.addLayoutDecoration(structuredBufferParam, layoutDecoration->getLayout()); + } + + return structuredBufferParam; + } + + IRType* getEquivalentStructuredBufferParamType(IRType* elementType, IRType* byteAddressBufferType) + { + // Our task in this function is to compute the type for + // a structure buffer that is equivalent to `byteAddressBufferType`, + // but with the given `elementType`. + + switch( byteAddressBufferType->op ) + { + // The basic `*ByteAddressBuffer` types map directly to the `*StructuredBuffer<elementType>` cases. + case kIROp_HLSLByteAddressBufferType: return m_builder.getType(kIROp_HLSLStructuredBufferType, elementType); + case kIROp_HLSLRWByteAddressBufferType: return m_builder.getType(kIROp_HLSLRWStructuredBufferType, elementType); + case kIROp_HLSLRasterizerOrderedByteAddressBufferType: return m_builder.getType(kIROp_HLSLRasterizerOrderedStructuredBufferType, elementType); + + case kIROp_ArrayType: + case kIROp_UnsizedArrayType: + { + // Array types (both sized and unsized) need to translate + // their element type to an equivalent structured buffer + // and build a new array type with the same element count. + // + auto arrayType = cast<IRArrayTypeBase>(byteAddressBufferType); + return m_builder.getArrayTypeBase( + byteAddressBufferType->op, + getEquivalentStructuredBufferParamType(elementType, arrayType->getElementType()), + arrayType->getElementCount()); + } + + default: + return nullptr; + } + } + + // At this point we've covered all the logic for the load case down + // to the last detail. + // + // All that remains is to go over the equivalent logic for the case + // of byte-address buffer stores, which mostly parallels code we've + // already discussed. + + void processStore(IRInst* store) + { + // Just as for loads, the logic for stores is base don the type + // being used, but unlike in the load case we don't care about + // the type of the store operation, but instead the operand + // that represents the value to be stored. + // + auto value = store->getOperand(2); + auto type = value->getDataType(); + + // Types that are already legal to use don't require any processing. + // + if(isTypeLegalForByteAddressLoadStore(type)) + return; + + // Otherwise we set up to try and emit a replacement. + // + m_builder.setInsertBefore(store); + + // It is possible that our attempt to emit a replacement will fail + // (this should only happen if we run into types that shouldn't + // actually be allowed on a target), and in those cases we will + // leave the original store around as well (this is at worst a + // performance issue, but we should still consider trying to + // tighten this up and make all uhandled cases be hard errors). + // + auto result = emitLegalStore(type, store->getOperand(0), store->getOperand(1), 0, value); + if(SLANG_FAILED(result)) + return; + + store->removeAndDeallocate(); + } + + Result emitLegalStore(IRType* type, IRInst* buffer, IRInst* baseOffset, IRIntegerValue immediateOffset, IRInst* value) + { + // The flow for emitting a legal store is very similar to that for + // legal loads; we will recurse on the structure of `type` and + // emit stores for fields/elements as needed. + + if( auto structType = as<IRStructType>(type) ) + { + // To store a structure, we store each of its fields at + // the appropriate relative offset. + // + for( auto field : structType->getFields() ) + { + auto fieldType = field->getFieldType(); + + IRIntegerValue fieldOffset; + SLANG_RETURN_ON_FAIL(getNaturalOffset(field, &fieldOffset)); + + auto fieldVal = m_builder.emitFieldExtract(fieldType, value, field->getKey()); + SLANG_RETURN_ON_FAIL(emitLegalStore(fieldType, buffer, baseOffset, immediateOffset + fieldOffset, fieldVal)); + } + return SLANG_OK; + } + else if( auto arrayType = as<IRArrayTypeBase>(type) ) + { + // Arrays and other sequences bottleneck through a helper + // function, which we will cover later. + // + auto elementCountInst = as<IRIntLit>(arrayType->getElementCount()); + if( elementCountInst ) + { + return emitLegalSequenceStore(buffer, baseOffset, immediateOffset, value, arrayType->getElementType(), elementCountInst->getValue()); + } + } + else if( auto matType = as<IRMatrixType>(type) ) + { + // Matrix storesget the same caveat as the load case: + // we are only supporting row-major layout for now. + // + auto rowCountInst = as<IRIntLit>(matType->getRowCount()); + if( rowCountInst ) + { + auto rowType = m_builder.getVectorType(matType->getElementType(), matType->getColumnCount()); + return emitLegalSequenceStore(buffer, baseOffset, immediateOffset, value, rowType, rowCountInst->getValue()); + } + } + else if( auto vecType = as<IRVectorType>(type) ) + { + auto elementCountInst = as<IRIntLit>(vecType->getElementCount()); + if( m_options.scalarizeVectorLoadStore && elementCountInst) + { + return emitLegalSequenceStore(buffer, baseOffset, immediateOffset, value, vecType->getElementType(), elementCountInst->getValue()); + } + + if(m_options.useBitCastFromUInt) + { + auto elementType = as<IRBasicType>(vecType->getElementType()); + if( auto unsignedElementType = getSameSizeUIntType(elementType) ) + { + // The bit-cast case for stores is similar to the case + // for loads, except that we cast the value before + // storing it (instead of casting a value after loading). + // + auto unsignedVecType = m_builder.getVectorType(unsignedElementType, vecType->getElementCount()); + auto unsignedVecVal = m_builder.emitBitCast(unsignedVecType, value); + return emitSimpleStore(unsignedVecType, buffer, baseOffset, immediateOffset, unsignedVecVal); + } + } + } + else if( auto basicType = as<IRBasicType>(type) ) + { + if(m_options.useBitCastFromUInt) + { + if( auto unsignedType = getSameSizeUIntType(basicType) ) + { + auto unsignedVal = m_builder.emitBitCast(unsignedType, value); + return emitSimpleStore(unsignedType, buffer, baseOffset, immediateOffset, unsignedVal); + } + } + } + + return emitSimpleStore(type, buffer, baseOffset, immediateOffset, value); + } + + Result emitSimpleStore(IRType* type, IRInst* buffer, IRInst* baseOffset, IRIntegerValue immediateOfset, IRInst* value) + { + IRInst* offset = emitOffsetAddIfNeeded(baseOffset, immediateOfset); + + if( m_options.translateToStructuredBufferOps ) + { + if( auto structuredBuffer = getEquivalentStructuredBuffer(type, buffer) ) + { + // Similar to the load case, if we are replacing byte-address + // buffers with structured buffers, then once we find the + // "equivalent" buffer to use, we emit a structured-buffer store, + // with an index computed by dividing the offset by the stride. + // + auto indexType = offset->getDataType(); + + IRSizeAndAlignment typeLayout; + SLANG_RETURN_ON_FAIL(getNaturalSizeAndAlignment(type, &typeLayout)); + + auto typeStride = m_builder.getIntValue(indexType, typeLayout.getStride()); + + IRInst* divArgs[] = { offset, typeStride }; + auto index = m_builder.emitIntrinsicInst(indexType, kIROp_Div, 2, divArgs); + + IRInst* args[] = { structuredBuffer, index, value }; + m_builder.emitIntrinsicInst(type, kIROp_StructuredBufferStore, 3, args); + return SLANG_OK; + } + + } + + { + IRInst* storeArgs[] = { buffer, offset, value }; + m_builder.emitIntrinsicInst(m_builder.getVoidType(), kIROp_ByteAddressBufferStore, 3, storeArgs); + return SLANG_OK; + } + } + + Result emitLegalSequenceStore(IRInst* buffer, IRInst* baseOffset, IRIntegerValue immediateOffset, IRInst* value, IRType* elementType, IRIntegerValue elementCount) + { + // The store case for sequences is similar to the load case. + // + // We iterate over the elements and fetch then store each one. + // + IRSizeAndAlignment elementLayout; + SLANG_RETURN_ON_FAIL(getNaturalSizeAndAlignment(elementType, &elementLayout)); + IRIntegerValue elementStride = elementLayout.getStride(); + + auto indexType = m_builder.getIntType(); + for( IRIntegerValue ii = 0; ii < elementCount; ++ii ) + { + auto elementIndex = m_builder.getIntValue(indexType, ii); + auto elementVal = m_builder.emitElementExtract(elementType, value, elementIndex); + SLANG_RETURN_ON_FAIL(emitLegalStore(elementType, buffer, baseOffset, immediateOffset + ii*elementStride, elementVal)); + } + + return SLANG_OK; + } +}; + + +void legalizeByteAddressBufferOps( + Session* session, + IRModule* module, + ByteAddressBufferLegalizationOptions const& options) +{ + ByteAddressBufferLegalizationContext context; + context.m_session = session; + context.m_options = options; + context.processModule(module); +} + +} + diff --git a/source/slang/slang-ir-byte-address-legalize.h b/source/slang/slang-ir-byte-address-legalize.h new file mode 100644 index 000000000..7b5c8ed3e --- /dev/null +++ b/source/slang/slang-ir-byte-address-legalize.h @@ -0,0 +1,27 @@ +// slang-ir-byte-address-legalize.h +#pragma once + +namespace Slang +{ +class Session; +struct IRModule; + +struct ByteAddressBufferLegalizationOptions +{ + bool scalarizeVectorLoadStore = false; + bool useBitCastFromUInt = false; + bool translateToStructuredBufferOps = false; +}; + + /// Legalize byte-address buffer `Load()` and `Store()` operations. + /// + /// This function translates load/store operations that involve + /// aggregate types into primitive load-store operations on + /// scalar or vector types. + /// +void legalizeByteAddressBufferOps( + Session* session, + IRModule* module, + ByteAddressBufferLegalizationOptions const& options); +} + diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h index 6c01a700a..46ad566fd 100644 --- a/source/slang/slang-ir-inst-defs.h +++ b/source/slang/slang-ir-inst-defs.h @@ -233,6 +233,50 @@ INST(getElementPtr, getElementPtr, 2, 0) // "Subscript" an image at a pixel coordinate to get pointer INST(ImageSubscript, imageSubscript, 2, 0) +// Load (almost) arbitrary-type data from a byte-address buffer +// +// %dst = byteAddressBufferLoad(%buffer, %offset) +// +// where +// - `buffer` is a value of some `ByteAddressBufferTypeBase` type +// - `offset` is an `int` +// - `dst` is a value of some type containing only ordinary data +// +INST(ByteAddressBufferLoad, byteAddressBufferLoad, 2, 0) + +// Store (almost) arbitrary-type data to a byte-address buffer +// +// byteAddressBufferLoad(%buffer, %offset, %src) +// +// where +// - `buffer` is a value of some `ByteAddressBufferTypeBase` type +// - `offset` is an `int` +// - `src` is a value of some type containing only ordinary data +// +INST(ByteAddressBufferStore, byteAddressBufferStore, 3, 0) + +// Load data from a structured buffer +// +// %dst = structuredBufferLoad(%buffer, %index) +// +// where +// - `buffer` is a value of some `StructuredBufferTypeBase` type with element type T +// - `offset` is an `int` +// - `dst` is a value of type T +// +INST(StructuredBufferLoad, structuredBufferLoad, 2, 0) + +// Store data to a structured buffer +// +// structuredBufferLoad(%buffer, %offset, %src) +// +// where +// - `buffer` is a value of some `StructuredBufferTypeBase` type with element type T +// - `offset` is an `int` +// - `src` is a value of type T +// +INST(StructuredBufferStore, structuredBufferStore, 3, 0) + // Construct a vector from a scalar // // %dst = constructVectorFromScalar %T %N %val @@ -453,6 +497,12 @@ INST(HighLevelDeclDecoration, highLevelDecl, 1, 0) /// An `[unsafeForceInlineEarly]` decoration specifies that calls to this function should be inline after initial codegen INST(UnsafeForceInlineEarlyDecoration, unsafeForceInlineEarly, 0, 0) + /// A `[naturalSizeAndAlignment(s,a)]` decoration is attached to a type to indicate that is has natural size `s` and alignment `a` + INST(NaturalSizeAndAlignmentDecoration, naturalSizeAndAlignment, 2, 0) + + /// A `[naturalOffset(o)]` decoration is attached to a field to indicate that it has natural offset `o` in the parent type + INST(NaturalOffsetDecoration, naturalOffset, 1, 0) + /* LinkageDecoration */ INST(ImportDecoration, import, 1, 0) INST(ExportDecoration, export, 1, 0) diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index e307dc41e..957a53a0e 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -359,6 +359,27 @@ struct IRFormatDecoration : IRDecoration IR_SIMPLE_DECORATION(UnsafeForceInlineEarlyDecoration) +struct IRNaturalSizeAndAlignmentDecoration : IRDecoration +{ + enum { kOp = kIROp_NaturalSizeAndAlignmentDecoration }; + IR_LEAF_ISA(NaturalSizeAndAlignmentDecoration) + + IRIntLit* getSizeOperand() { return cast<IRIntLit>(getOperand(0)); } + IRIntLit* getAlignmentOperand() { return cast<IRIntLit>(getOperand(1)); } + + IRIntegerValue getSize() { return getSizeOperand()->getValue(); } + IRIntegerValue getAlignment() { return getAlignmentOperand()->getValue(); } +}; + +struct IRNaturalOffsetDecoration : IRDecoration +{ + enum { kOp = kIROp_NaturalOffsetDecoration }; + IR_LEAF_ISA(NaturalOffsetDecoration) + + IRIntLit* getOffsetOperand() { return cast<IRIntLit>(getOperand(0)); } + + IRIntegerValue getOffset() { return getOffsetOperand()->getValue(); } +}; // An instruction that specializes another IR value // (representing a generic) to a particular set of generic arguments diff --git a/source/slang/slang-ir-layout.cpp b/source/slang/slang-ir-layout.cpp new file mode 100644 index 000000000..0003d279a --- /dev/null +++ b/source/slang/slang-ir-layout.cpp @@ -0,0 +1,239 @@ +// slang-ir-layout.cpp +#include "slang-ir-layout.h" + +#include "slang-ir-insts.h" + +// This file implements facilities for computing and caching layout +// information on IR types. +// +// Unlike the AST-level layout system, this code currently only +// handles the notion of "natural" layout for IR types, which is +// the layout they use when stored in general-purpose memory +// without additional constraints. +// +// In general, "natural" layout for all targets is assumed to follow +// the same basic rules: +// +// * Scalars are all naturally aligned and have the "obvious" size +// +// * Arrays are laid out by separating elements by their "stride" (size rounded up to alignment) +// +// * Vectors are laid out as arrays of elements +// +// * Matrices are laid out as arrays of rows +// +// * Structures are laid out by packing fields in order, placing each field on the "next" +// suitably aligned offset. The alignment of a structure is the maximum alignment of +// its fields. +// +// Right now this file implements a one-size-fits-all version of natural +// layout that might not be a perfect fit for all targets. In particular +// this code currently assumes: +// +// * The `bool` type is laid out as 4 bytes (equivalent to an `int`) +// +// * The size of a structure or array type is *not* rounded up to a multiple +// of its alignment. This means that fields may be laid out in +// the "tail padding" of previous fields in the same structure. This is +// correct behavior for VK/D3D, but does not match the behavior of typical +// C/C++ compilers. +// +// * All matrices are laid out in row-major order, regardless of any +// settings in user code. +// +// TODO: Addressing the above issues would require extending this file to somehow +// get target-specific layout information as an input. One option would be +// to attach information about "natural" layout on the target to the `IRModuleInst` +// as a decoration, similar to how an LLVM IR module stores a "layout string." + +namespace Slang +{ + +static Result _calcNaturalSizeAndAlignment(IRType* type, IRSizeAndAlignment* outSizeAndAlignment) +{ + switch( type->op ) + { + +#define CASE(TYPE, SIZE, ALIGNMENT) \ + case kIROp_##TYPE##Type: \ + *outSizeAndAlignment = IRSizeAndAlignment(SIZE, ALIGNMENT); \ + return SLANG_OK \ + /* end */ + + // Most base types are "naturally aligned" (meaning alignment and size are the same) +#define BASE(TYPE, SIZE) CASE(TYPE, SIZE, SIZE) + + BASE(Int8, 1); + BASE(UInt8, 1); + + BASE(Int16, 2); + BASE(UInt16, 2); + BASE(Half, 2); + + BASE(Int, 4); + BASE(UInt, 4); + BASE(Float, 4); + + BASE(Int64, 8); + BASE(UInt64, 8); + BASE(Double, 8); + + // We are currently handling `bool` following the HLSL + // precednet of storing it in 4 bytes. + // + // TODO: It would be good to try to make this follow + // per-platform conventions, or at least to be able + // to use a 1-byte encoding where available. + // + BASE(Bool, 4); + + // The Slang `void` type is treated as a zero-byte + // type, so that it does not influence layout at all. + // + CASE(Void, 0, 1); + +#undef CASE + +#undef CASE + + case kIROp_StructType: + { + auto structType = cast<IRStructType>(type); + IRSizeAndAlignment structLayout; + for( auto field : structType->getFields() ) + { + IRSizeAndAlignment fieldTypeLayout; + SLANG_RETURN_ON_FAIL(getNaturalSizeAndAlignment(field->getFieldType(), &fieldTypeLayout)); + + structLayout.size = align(structLayout.size, fieldTypeLayout.alignment); + structLayout.alignment = std::max(structLayout.alignment, fieldTypeLayout.alignment); + + IRIntegerValue fieldOffset = structLayout.size; + if( auto module = type->getModule() ) + { + // If we are in a situation where attaching new + // decorations is possible, then we want to + // cache the field offset on the IR field + // instruction. + // + SharedIRBuilder sharedBuilder; + sharedBuilder.module = module; + sharedBuilder.session = module->getSession(); + + IRBuilder builder; + builder.sharedBuilder = &sharedBuilder; + + auto intType = builder.getIntType(); + builder.addDecoration( + field, + kIROp_NaturalOffsetDecoration, + builder.getIntValue(intType, fieldOffset)); + } + + structLayout.size += fieldTypeLayout.size; + } + *outSizeAndAlignment = structLayout; + return SLANG_OK; + } + break; + + case kIROp_ArrayType: + { + auto arrayType = cast<IRArrayType>(type); + + auto elementCountLit = as<IRIntLit>(arrayType->getElementCount()); + if(!elementCountLit) + return SLANG_FAIL; + auto elementCount = elementCountLit->getValue(); + + if( elementCount == 0 ) + { + *outSizeAndAlignment = IRSizeAndAlignment(0, 1); + return SLANG_OK; + } + + auto elementType = arrayType->getElementType(); + IRSizeAndAlignment elementTypeLayout; + SLANG_RETURN_ON_FAIL(getNaturalSizeAndAlignment(elementType, &elementTypeLayout)); + + auto elementStride = elementTypeLayout.getStride(); + + *outSizeAndAlignment = IRSizeAndAlignment( + elementStride * (elementCount - 1) + elementTypeLayout.size, + elementTypeLayout.alignment); + return SLANG_OK; + } + break; + + default: + return SLANG_FAIL; + } +} + +Result getNaturalSizeAndAlignment(IRType* type, IRSizeAndAlignment* outSizeAndAlignment) +{ + if( auto decor = type->findDecoration<IRNaturalSizeAndAlignmentDecoration>() ) + { + *outSizeAndAlignment = IRSizeAndAlignment(decor->getSize(), (int)decor->getAlignment()); + return SLANG_OK; + } + + IRSizeAndAlignment sizeAndAlignment; + SLANG_RETURN_ON_FAIL(_calcNaturalSizeAndAlignment(type, &sizeAndAlignment)); + + if( auto module = type->getModule() ) + { + SharedIRBuilder sharedBuilder; + sharedBuilder.module = module; + sharedBuilder.session = module->getSession(); + + IRBuilder builder; + builder.sharedBuilder = &sharedBuilder; + + auto intType = builder.getIntType(); + builder.addDecoration( + type, + kIROp_NaturalSizeAndAlignmentDecoration, + builder.getIntValue(intType, sizeAndAlignment.size), + builder.getIntValue(intType, sizeAndAlignment.alignment)); + } + + *outSizeAndAlignment = sizeAndAlignment; + return SLANG_OK; +} + + +Result getNaturalOffset(IRStructField* field, IRIntegerValue* outOffset) +{ + if( auto decor = field->findDecoration<IRNaturalOffsetDecoration>() ) + { + *outOffset = decor->getOffset(); + return SLANG_OK; + } + + // Offsets are computed as part of layout out types, + // so we expect that layout of the "parent" type + // of the field should add an offset to it if + // possible. + + auto structType = as<IRStructType>(field->getParent()); + if(!structType) + return SLANG_FAIL; + + IRSizeAndAlignment structTypeLayout; + SLANG_RETURN_ON_FAIL(getNaturalSizeAndAlignment(structType, &structTypeLayout)); + + if( auto decor = field->findDecoration<IRNaturalOffsetDecoration>() ) + { + *outOffset = decor->getOffset(); + return SLANG_OK; + } + + // If attempting to lay out the parent type didn't + // cause the field to get an offset, then we are + // in an unexpected case with no easy answer. + // + return SLANG_FAIL; +} + +} diff --git a/source/slang/slang-ir-layout.h b/source/slang/slang-ir-layout.h new file mode 100644 index 000000000..64653b5f3 --- /dev/null +++ b/source/slang/slang-ir-layout.h @@ -0,0 +1,70 @@ +// slang-ir-layout.h +#pragma once + +// This file provides utilities for computing and caching the *natural* +// layout of types in the IR. +// +// The natural layout is the layout a target uses for a type when it is +// stored in unconstrainted general-purpose memory (to the extent that +// the target supports unconstrained general-purpose memory). +// +// For targets like the CPU and CUDA which support a simple flat address +// space, the natural layout is the only layout used for any type. +// +// For targets like D3D DXBC/DXIL and Vulkan SPIR-V, the natural layout +// matches how a type is stored in a "structured buffer" or "shader +// storage buffer." +// + +#include "slang-ir.h" + + +namespace Slang +{ + + /// Align `value` to the next multiple of `alignment`, which must be a power of two. +inline IRIntegerValue align(IRIntegerValue value, int alignment) +{ + return (value + alignment-1) & ~IRIntegerValue(alignment-1); +} + + + /// The size and alignment of an IR type. +struct IRSizeAndAlignment +{ + IRSizeAndAlignment() + {} + + IRSizeAndAlignment(IRIntegerValue size, int alignment) + : size(size) + , alignment(alignment) + {} + + IRIntegerValue size = 0; + + int alignment = 1; + + inline IRIntegerValue getStride() + { + return align(size, alignment); + } +}; + + /// Compute (if necessary) and return the natural size and alignment of `type`. + /// + /// This operation may fail if `type` is not one that can be stored in + /// general-purpose memory for the current target. In that case the + /// type is considered to have no natural layout. + /// +Result getNaturalSizeAndAlignment(IRType* type, IRSizeAndAlignment* outSizeAndAlignment); + + /// Compute (if necessary) and return the natural offset of `field` + /// + /// This operation can fail if the parent type of `field` is not one + /// that can be stored in general-purpose memory. In that case, the + /// field is considered to have no natural offset. + /// +Result getNaturalOffset(IRStructField* field, IRIntegerValue* outOffset); + +} + diff --git a/source/slang/slang.vcxproj b/source/slang/slang.vcxproj index 766893da3..e97d38256 100644 --- a/source/slang/slang.vcxproj +++ b/source/slang/slang.vcxproj @@ -209,6 +209,7 @@ <ClInclude Include="slang-hlsl-intrinsic-set.h" /> <ClInclude Include="slang-image-format-defs.h" /> <ClInclude Include="slang-ir-bind-existentials.h" /> + <ClInclude Include="slang-ir-byte-address-legalize.h" /> <ClInclude Include="slang-ir-clone.h" /> <ClInclude Include="slang-ir-constexpr.h" /> <ClInclude Include="slang-ir-dce.h" /> @@ -218,6 +219,7 @@ <ClInclude Include="slang-ir-inline.h" /> <ClInclude Include="slang-ir-inst-defs.h" /> <ClInclude Include="slang-ir-insts.h" /> + <ClInclude Include="slang-ir-layout.h" /> <ClInclude Include="slang-ir-link.h" /> <ClInclude Include="slang-ir-missing-return.h" /> <ClInclude Include="slang-ir-restructure-scoping.h" /> @@ -295,6 +297,7 @@ <ClCompile Include="slang-glsl-extension-tracker.cpp" /> <ClCompile Include="slang-hlsl-intrinsic-set.cpp" /> <ClCompile Include="slang-ir-bind-existentials.cpp" /> + <ClCompile Include="slang-ir-byte-address-legalize.cpp" /> <ClCompile Include="slang-ir-clone.cpp" /> <ClCompile Include="slang-ir-constexpr.cpp" /> <ClCompile Include="slang-ir-dce.cpp" /> @@ -302,6 +305,7 @@ <ClCompile Include="slang-ir-entry-point-uniforms.cpp" /> <ClCompile Include="slang-ir-glsl-legalize.cpp" /> <ClCompile Include="slang-ir-inline.cpp" /> + <ClCompile Include="slang-ir-layout.cpp" /> <ClCompile Include="slang-ir-legalize-types.cpp" /> <ClCompile Include="slang-ir-link.cpp" /> <ClCompile Include="slang-ir-missing-return.cpp" /> diff --git a/source/slang/slang.vcxproj.filters b/source/slang/slang.vcxproj.filters index 442f545c6..f46e77ebc 100644 --- a/source/slang/slang.vcxproj.filters +++ b/source/slang/slang.vcxproj.filters @@ -78,6 +78,9 @@ <ClInclude Include="slang-ir-bind-existentials.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="slang-ir-byte-address-legalize.h"> + <Filter>Header Files</Filter> + </ClInclude> <ClInclude Include="slang-ir-clone.h"> <Filter>Header Files</Filter> </ClInclude> @@ -105,6 +108,9 @@ <ClInclude Include="slang-ir-insts.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="slang-ir-layout.h"> + <Filter>Header Files</Filter> + </ClInclude> <ClInclude Include="slang-ir-link.h"> <Filter>Header Files</Filter> </ClInclude> @@ -332,6 +338,9 @@ <ClCompile Include="slang-ir-bind-existentials.cpp"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="slang-ir-byte-address-legalize.cpp"> + <Filter>Source Files</Filter> + </ClCompile> <ClCompile Include="slang-ir-clone.cpp"> <Filter>Source Files</Filter> </ClCompile> @@ -353,6 +362,9 @@ <ClCompile Include="slang-ir-inline.cpp"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="slang-ir-layout.cpp"> + <Filter>Source Files</Filter> + </ClCompile> <ClCompile Include="slang-ir-legalize-types.cpp"> <Filter>Source Files</Filter> </ClCompile> |
