From d9c57e613f2dacd221d9c46c10395cf373a8fcaf Mon Sep 17 00:00:00 2001 From: Theresa Foley <10618364+tangent-vector@users.noreply.github.com> Date: Mon, 10 Jul 2023 17:48:51 -0700 Subject: Add support for texture footprint queries (#2970) --- source/slang/hlsl.meta.slang | 645 ++++++++++++++++++++++++++ source/slang/slang-check-conversion.cpp | 16 + source/slang/slang-diagnostic-defs.h | 1 + source/slang/slang-emit.cpp | 5 + source/slang/slang-intrinsic-expand.cpp | 23 + source/slang/slang-ir-inst-defs.h | 8 + source/slang/slang-ir-insts.h | 20 + source/slang/slang-ir-lower-binding-query.cpp | 610 ++++++++++++++++++++++++ source/slang/slang-ir-lower-binding-query.h | 22 + source/slang/slang-lower-to-ir.cpp | 1 + 10 files changed, 1351 insertions(+) create mode 100644 source/slang/slang-ir-lower-binding-query.cpp create mode 100644 source/slang/slang-ir-lower-binding-query.h (limited to 'source') diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 3847168ab..ebc91095f 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -7105,3 +7105,648 @@ func saturated_cooperation_using( return fallback(input, otherArg); } } + +${{{{ +// +// Texture Footprint Queries +// +// This section introduces the types and methods related +// to the `GL_NV_shader_texture_footprint` GLSL extension, +// and the matching NVAPI operations. +// +// Footprint queries are allowed on both 2D and 3D textures, +// and are structurally similar for the two, so we will +// use a meta-loop to deduplicate the code for the two +// cases. +// +static const struct FootprintTextureShape +{ + int rank; + char const* suffix; + char const* texelIndexType; + char const* sampleCoordsType; +} kFootprintTextureShapes[] = +{ + { 2, "2D", "uint2", "float2" }, + { 3, "3D", "uint3", "float3" }, +}; +for(auto shape : kFootprintTextureShapes) +{ + auto ND = shape.suffix; + auto anchorType = shape.texelIndexType; + auto offsetType = shape.texelIndexType; + auto coordsType = shape.sampleCoordsType; + +// A footprint query yields a data structure +// that describes blocks of texels that +// conservatively cover the data that might +// be fetched in the query. +// +// A given sampling operation might access two +// mip levels of a texture when, e.g., trilinear +// filtering is on. A footprint query may ask for +// a footprint in either the coarse or fine level +// of the pair. +// +// We first define a `struct` type that closely maps +// to how a footprint is defined for each of the +// implementations we support, and then wrap that +// in a derived `struct` that includes the extra +// data that is returned by the GLSL API via the +// function reuslt. +// +}}}} + +__glsl_version(450) +__glsl_extension(GL_NV_shader_texture_footprint) +__target_intrinsic(glsl, gl_TextureFootprint$(ND)NV) +__target_intrinsic(hlsl, uint4) +struct __TextureFootprintData$(ND) +{ + typealias Anchor = $(anchorType); + typealias Offset = $(offsetType); + typealias Mask = uint2; + typealias LOD = uint; + typealias Granularity = uint; + + property anchor : Anchor + { + [__NoSideEffect] + [__requiresNVAPI] + __target_intrinsic(hlsl, NvFootprintExtractAnchorTileLoc$(ND)) + __target_intrinsic(glsl, "$0.anchor") + get; + } + + property offset : Offset + { + [__NoSideEffect] + [__requiresNVAPI] + __target_intrinsic(hlsl, NvFootprintExtractOffset$(ND)) + __target_intrinsic(glsl, "$0.offset") + get; + } + + property mask : Mask + { + [__NoSideEffect] + [__requiresNVAPI] + __target_intrinsic(hlsl, NvFootprintExtractBitmask) + __target_intrinsic(glsl, "$0.mask") + get; + } + + property lod : LOD + { + [__NoSideEffect] + [__requiresNVAPI] + __target_intrinsic(hlsl, NvFootprintExtractLOD) + __target_intrinsic(glsl, "$0.lod") + get; + } + + property granularity : Granularity + { + [__NoSideEffect] + [__requiresNVAPI] + __target_intrinsic(hlsl, NvFootprintExtractReturnGran) + __target_intrinsic(glsl, "$0.granularity") + get; + } +} + +struct TextureFootprint$(ND) : __TextureFootprintData$(ND) +{ + bool _isSingleLevel; + + property isSingleLevel : bool + { + [__NoSideEffect] + get + { + return _isSingleLevel; + } + } +} + +${ +// The NVAPI operations are defined to take the space/register +// indices of their texture and sampler parameters, rather than +// taking the texture/sampler objects directly. +// +// In order to support this approach, we need intrinsics that +// can magically fetch the binding information for a resource. +// +// TODO: These operations are kind of *screaming* for us to +// have a built-in `interface` that all of the opaque resource +// types conform to, so that we can define builtins that work +// for any resource type. +} + +__intrinsic_op($(kIROp_GetRegisterSpace)) uint __getRegisterSpace(Texture2D texture); +__intrinsic_op($(kIROp_GetRegisterSpace)) uint __getRegisterSpace(Texture3D texture); +__intrinsic_op($(kIROp_GetRegisterSpace)) uint __getRegisterSpace(SamplerState sampler); + +__intrinsic_op($(kIROp_GetRegisterIndex)) uint __getRegisterIndex(Texture2D texture); +__intrinsic_op($(kIROp_GetRegisterIndex)) uint __getRegisterIndex(Texture3D texture); +__intrinsic_op($(kIROp_GetRegisterIndex)) uint __getRegisterIndex(SamplerState sampler); + +${ +// We define the new operations via an `extension` +// on the relevant texture type(s), rather than +// further clutter the original type declarations. +} + +__generic +extension Texture$(ND) +{ +${ +// We introduce a few convenience type aliases here, +// which both keep our declarations simpler and easier +// to understand, but which might *also* be useful to +// users of the stdlib, so that they can write things +// like `Texture2D.Footprint`, and also have auto-complete +// help them find such members. +// +// TODO: The `Coords` type really ought to be something +// defined on the base texture types, rather than via +// this `extension`. +} + typealias Coords = $(coordsType); + typealias Footprint = TextureFootprint$(ND); + typealias __FootprintData = __TextureFootprintData$(ND); + typealias FootprintGranularity = Footprint.Granularity; + +${ +// For the GLSL extension, the choice between the +// coarse and fine level is modeled as a `bool` +// parameter to the query operation(s). We define +// the GLSL functions here as intrinsics, so that +// we can refer to them later in the definitions +// of our stdlib operaitons. +// +// Note: despite the GLSL extension defining the `granularity` +// member of the query result as having type `uint`, the +// function signatures all take `int` parameters for the +// granularity instead. +// +} + + [__NoSideEffect] + __glsl_version(450) + __glsl_extension(GL_NV_shader_texture_footprint) + __target_intrinsic(glsl, + "textureFootprintNV($p, $*2)") + bool __queryFootprintGLSL( + SamplerState sampler, + Coords coords, + int granularity, + bool useCoarseLevel, + out __FootprintData footprint); + + [__NoSideEffect] + __glsl_version(450) + __glsl_extension(GL_NV_shader_texture_footprint) + __target_intrinsic(glsl, + "textureFootprintNV($p, $*2)") + bool __queryFootprintGLSL( + SamplerState sampler, + Coords coords, + int granularity, + bool useCoarseLevel, + out __FootprintData footprint, + float bias); + + [__NoSideEffect] + __glsl_version(450) + __glsl_extension(GL_NV_shader_texture_footprint) + __glsl_extension(GL_ARB_sparse_texture_clamp) + __target_intrinsic(glsl, + "textureFootprintClampNV($p, $*2)") + bool __queryFootprintClampGLSL( + SamplerState sampler, + Coords coords, + float lodClamp, + int granularity, + bool useCoarseLevel, + out __FootprintData footprint); + + [__NoSideEffect] + __glsl_version(450) + __glsl_extension(GL_NV_shader_texture_footprint) + __glsl_extension(GL_ARB_sparse_texture_clamp) + __target_intrinsic(glsl, + "textureFootprintClampNV($p, $*2)") + bool __queryFootprintClampGLSL( + SamplerState sampler, + Coords coords, + float lodClamp, + int granularity, + bool useCoarseLevel, + out __FootprintData footprint, + float bias); + + [__NoSideEffect] + __glsl_version(450) + __glsl_extension(GL_NV_shader_texture_footprint) + __target_intrinsic(glsl, + "textureFootprintLodNV($p, $*2)") + [__requiresNVAPI] + bool __queryFootprintLodGLSL( + SamplerState sampler, + Coords coords, + float lod, + int granularity, + bool useCoarseLevel, + out __FootprintData footprint); + + +${{{ + // Texture sampling with gradient is only available for 2D textures. + if(shape.rank == 2) { +}}} + [__NoSideEffect] + __glsl_version(450) + __glsl_extension(GL_NV_shader_texture_footprint) + __target_intrinsic(glsl, + "textureFootprintGradNV($p, $*2)") + [__requiresNVAPI] + bool __queryFootprintGradGLSL( + SamplerState sampler, + Coords coords, + Coords dx, + Coords dy, + int granularity, + bool useCoarseLevel, + out __FootprintData footprint); + + [__NoSideEffect] + __glsl_version(450) + __glsl_extension(GL_NV_shader_texture_footprint) + __glsl_extension(GL_ARB_sparse_texture_clamp) + __target_intrinsic(glsl, + "textureFootprintGradClampNV($p, $*2)") + bool __queryFootprintGradClampGLSL( + SamplerState sampler, + Coords coords, + Coords dx, + Coords dy, + float lodClamp, + int granularity, + bool useCoarseLevel, + out __FootprintData footprint); +${{{ + } +}}} + + +${{{{ +// The NVAPI texture query operations encode the choice +// between coarse and fine levels as part of the function +// name, and so we are forced to match this convention +// if we want to provide a more portable API. +// +// TODO: We could conceivably define the functions to use +// a parameter for the coarse/fine choice, which is required +// to be `constexpr` for the HLSL/NVAPI target. +// +static const struct LevelChoice +{ +char const* name; +char const* isCoarseVal; +} kLevelChoices[] = +{ + { "Coarse", "true" }, + { "Fine", "false" }, +}; +for(auto levelChoice : kLevelChoices) +{ + auto CoarseOrFine = levelChoice.name; + auto isCoarseVal = levelChoice.isCoarseVal; + +// We now go ahead and define the intrinsics provided by NVAPI, +// which have a very different signature from the GLSL ones. +// +// Note: the NVAPI functions also support an optional texel +// offset parameter. For now we are not including overloads +// with that parameter, since they have no equivalent in +// the GLSL extension. +// +}}}} + + [__NoSideEffect] + [__requiresNVAPI] + __target_intrinsic(hlsl, + "NvFootprint$(CoarseOrFine)($0, $1, $2, $3, NV_EXTN_TEXTURE_$(ND), $*4)") + static __FootprintData __queryFootprint$(CoarseOrFine)NVAPI( + uint textureSpace, + uint textureIndex, + uint samplerSpace, + uint samplerIndex, + Coords coords, + FootprintGranularity granularity); + + [__NoSideEffect] + [__requiresNVAPI] + __target_intrinsic(hlsl, + "NvFootprint$(CoarseOrFine)Bias($0, $1, $2, $3, NV_EXTN_TEXTURE_$(ND), $*4)") + static __FootprintData __queryFootprint$(CoarseOrFine)BiasNVAPI( + uint textureSpace, + uint textureIndex, + uint samplerSpace, + uint samplerIndex, + Coords coords, + FootprintGranularity granularity, + float lodBias); + + [__NoSideEffect] + [__requiresNVAPI] + __target_intrinsic(hlsl, + "NvFootprint$(CoarseOrFine)Level($0, $1, $2, $3, NV_EXTN_TEXTURE_$(ND), $*4)") + static __FootprintData __queryFootprint$(CoarseOrFine)LevelNVAPI( + uint textureSpace, + uint textureIndex, + uint samplerSpace, + uint samplerIndex, + Coords coords, + FootprintGranularity granularity, + float lod); + +${{{ + // Texture sampling with gradient is only available for 2D textures. + if(shape.rank == 2) { +}}} + [__NoSideEffect] + [__requiresNVAPI] + __target_intrinsic(hlsl, + "NvFootprint$(CoarseOrFine)Grad($0, $1, $2, $3, NV_EXTN_TEXTURE_$(ND), $*4)") + static __FootprintData __queryFootprint$(CoarseOrFine)GradNVAPI( + uint textureSpace, + uint textureIndex, + uint samplerSpace, + uint samplerIndex, + Coords coords, + FootprintGranularity granularity, + Coords dx, + Coords dy); +${{{ + } +}}} + +${ +// We now define the portable operations that will be officially +// supported by the standard library. For each operation, we +// need to provide both a version that maps to the GLSL extension, +// and a version that uses the NVAPI functions. +// +// Some function variations are only available with one extension +// or the other, so we try our best to only define them where +// each is available. +} + + /// Query the footprint that would be accessed by a texture sampling operation. + /// + /// This operation queries the footprint that would be accessed + /// by a comparable call to: + /// + /// t.Sample(sampler, coords); + /// + [__NoSideEffect] + __specialized_for_target(glsl) + Footprint queryFootprint$(CoarseOrFine)( + FootprintGranularity granularity, + SamplerState sampler, + Coords coords) + { + Footprint footprint; + footprint._isSingleLevel = __queryFootprintGLSL(sampler, coords, granularity, $(isCoarseVal), footprint); + return footprint; + } + + /// Query the footprint that would be accessed by a texture sampling operation. + /// + /// This operation queries the footprint that would be accessed + /// by a comparable call to: + /// + /// t.Sample(sampler, coords); + /// + [__NoSideEffect] + __specialized_for_target(hlsl) + Footprint queryFootprint$(CoarseOrFine)( + FootprintGranularity granularity, + SamplerState sampler, + Coords coords) + { + return { __queryFootprint$(CoarseOrFine)NVAPI( + __getRegisterSpace(this), __getRegisterIndex(this), + __getRegisterSpace(sampler), __getRegisterIndex(sampler), + coords, granularity), false }; + } + + /// Query the footprint that would be accessed by a texture sampling operation. + /// + /// This operation queries the footprint that would be accessed + /// by a comparable call to: + /// + /// t.SampleBias(sampler, coords, lodBias); + /// + [__NoSideEffect] + __specialized_for_target(glsl) + Footprint queryFootprint$(CoarseOrFine)Bias( + FootprintGranularity granularity, + SamplerState sampler, + Coords coords, + float lodBias) + { + Footprint footprint; + footprint._isSingleLevel = __queryFootprintGLSL(sampler, coords, granularity, $(isCoarseVal), footprint, lodBias); + return footprint; + } + + /// Query the footprint that would be accessed by a texture sampling operation. + /// + /// This operation queries the footprint that would be accessed + /// by a comparable call to: + /// + /// t.SampleBias(sampler, coords, lodBias); + /// + [__NoSideEffect] + __specialized_for_target(hlsl) + Footprint queryFootprint$(CoarseOrFine)Bias( + FootprintGranularity granularity, + SamplerState sampler, + Coords coords, + float lodBias) + { + return { __queryFootprint$(CoarseOrFine)BiasNVAPI( + __getRegisterSpace(this), __getRegisterIndex(this), + __getRegisterSpace(sampler), __getRegisterIndex(sampler), + coords, granularity, lodBias), false }; + } + + /// Query the footprint that would be accessed by a texture sampling operation. + /// + /// This operation queries the footprint that would be accessed + /// by a comparable call to: + /// + /// t.SampleClamp(sampler, coords, lodClamp); + /// + [__NoSideEffect] + __specialized_for_target(glsl) + Footprint queryFootprint$(CoarseOrFine)Clamp( + FootprintGranularity granularity, + SamplerState sampler, + Coords coords, + float lodClamp) + { + Footprint footprint; + footprint._isSingleLevel = __queryFootprintClampGLSL(sampler, coords, lodClamp, granularity, $(isCoarseVal), footprint); + return footprint; + } + + /// Query the footprint that would be accessed by a texture sampling operation. + /// + /// This operation queries the footprint that would be accessed + /// by a comparable call to: + /// + /// t.SampleBiasClamp(sampler, coords, lodBias, lodClamp); + /// + [__NoSideEffect] + __specialized_for_target(glsl) + Footprint queryFootprint$(CoarseOrFine)BiasClamp( + FootprintGranularity granularity, + SamplerState sampler, + Coords coords, + float lodBias, + float lodClamp) + { + Footprint footprint; + footprint._isSingleLevel = __queryFootprintClampGLSL(sampler, coords, lodClamp, granularity, $(isCoarseVal), footprint, lodBias); + return footprint; + } + + /// Query the footprint that would be accessed by a texture sampling operation. + /// + /// This operation queries the footprint that would be accessed + /// by a comparable call to: + /// + /// t.SampleLevel(sampler, coords, lod); + /// + [__NoSideEffect] + __specialized_for_target(glsl) + Footprint queryFootprint$(CoarseOrFine)Level( + FootprintGranularity granularity, + SamplerState sampler, + Coords coords, + float lod) + { + Footprint footprint; + footprint._isSingleLevel = __queryFootprintLodGLSL(sampler, coords, lod, granularity, $(isCoarseVal), footprint); + return footprint; + } + + /// Query the footprint that would be accessed by a texture sampling operation. + /// + /// This operation queries the footprint that would be accessed + /// by a comparable call to: + /// + /// t.SampleLevel(sampler, coords, lod); + /// + [__NoSideEffect] + __specialized_for_target(hlsl) + Footprint queryFootprint$(CoarseOrFine)Level( + FootprintGranularity granularity, + SamplerState sampler, + Coords coords, + float lod) + { + return { __queryFootprint$(CoarseOrFine)LevelNVAPI( + __getRegisterSpace(this), __getRegisterIndex(this), + __getRegisterSpace(sampler), __getRegisterIndex(sampler), + coords, granularity, lod), false }; + } + + +${{{ + // Texture sampling with gradient is only available for 2D textures. + if(shape.rank == 2) { +}}} + + /// Query the footprint that would be accessed by a texture sampling operation. + /// + /// This operation queries the footprint that would be accessed + /// by a comparable call to: + /// + /// t.SampleGrad(sampler, coords, dx, dy); + /// + [__NoSideEffect] + __specialized_for_target(glsl) + Footprint queryFootprint$(CoarseOrFine)Grad( + FootprintGranularity granularity, + SamplerState sampler, + Coords coords, + Coords dx, + Coords dy) + { + Footprint footprint; + footprint._isSingleLevel = __queryFootprintGradGLSL(sampler, coords, dx, dy, granularity, $(isCoarseVal), footprint); + return footprint; + } + + /// Query the footprint that would be accessed by a texture sampling operation. + /// + /// This operation queries the footprint that would be accessed + /// by a comparable call to: + /// + /// t.SampleGrad(sampler, coords, dx, dy); + /// + [__NoSideEffect] + __specialized_for_target(hlsl) + Footprint queryFootprint$(CoarseOrFine)Grad( + FootprintGranularity granularity, + SamplerState sampler, + Coords coords, + Coords dx, + Coords dy) + { + return { __queryFootprint$(CoarseOrFine)GradNVAPI( + __getRegisterSpace(this), __getRegisterIndex(this), + __getRegisterSpace(sampler), __getRegisterIndex(sampler), + coords, granularity, dx, dy), false }; + } + + /// Query the footprint that would be accessed by a texture sampling operation. + /// + /// This operation queries the footprint that would be accessed + /// by a comparable call to: + /// + /// t.SampleGradClamp(sampler, coords, dx, dy, lodClamp); + /// + [__NoSideEffect] + __specialized_for_target(glsl) + Footprint queryFootprint$(CoarseOrFine)GradClamp( + FootprintGranularity granularity, + SamplerState sampler, + Coords coords, + Coords dx, + Coords dy, + float lodClamp) + { + Footprint footprint; + footprint._isSingleLevel = __queryFootprintGradClampGLSL(sampler, coords, dx, dy, lodClamp, granularity, $(isCoarseVal), footprint); + return footprint; + } + +${{{ + } // if(shape.rank == 2) +}}} + +${{{{ +} +}}}} + +} // extension + +${{{{ +} +}}}} diff --git a/source/slang/slang-check-conversion.cpp b/source/slang/slang-check-conversion.cpp index a6130d4e8..357b75cce 100644 --- a/source/slang/slang-check-conversion.cpp +++ b/source/slang/slang-check-conversion.cpp @@ -902,7 +902,23 @@ namespace Slang if(auto witness = tryGetSubtypeWitness(fromType, toAggTypeDeclRef)) { if (outToExpr) + { *outToExpr = createCastToSuperTypeExpr(toType, fromExpr, witness); + + // If the original expression was an l-value, then the result + // of the cast may be an l-value itself. We want to be able + // to invoke `[mutating]` methods on a value that is cast to + // an interface it conforms to, and we also expect to be able + // to pass a value of a derived `struct` type into methods that + // expect a value of its base type. + // + // TODO: vet this logic for correctness. + // + if (fromExpr && fromExpr->type.isLeftValue) + { + (*outToExpr)->type.isLeftValue = true; + } + } if (outCost) *outCost = kConversionCost_CastToInterface; return true; diff --git a/source/slang/slang-diagnostic-defs.h b/source/slang/slang-diagnostic-defs.h index 8f6526d9d..92f79eac5 100644 --- a/source/slang/slang-diagnostic-defs.h +++ b/source/slang/slang-diagnostic-defs.h @@ -710,6 +710,7 @@ DIAGNOSTIC(55102, Error, invalidTorchKernelParamType, "'$0' is not a valid param DIAGNOSTIC(81110, Error, nvapiMacroMismatch, "conflicting definitions for NVAPI macro '$0': '$1' and '$2'") +DIAGNOSTIC(81111, Error, opaqueReferenceMustResolveToGlobal, "could not determine register/space for a resource or sampler used with NVAPI") // 99999 - Internal compiler errors, and not-yet-classified diagnostics. diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 4c050ffcc..41230d169 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -30,6 +30,7 @@ #include "slang-ir-legalize-varying-params.h" #include "slang-ir-link.h" #include "slang-ir-com-interface.h" +#include "slang-ir-lower-binding-query.h" #include "slang-ir-lower-generics.h" #include "slang-ir-lower-tuple-types.h" #include "slang-ir-lower-result-type.h" @@ -856,6 +857,10 @@ Result linkAndOptimizeIR( cleanUpVoidType(irModule); + // Lower the `getRegisterIndex` and `getRegisterSpace` intrinsics. + // + lowerBindingQueries(irModule, sink); + // For some small improvement in type safety we represent these as opaque // structs instead of regular arrays. // diff --git a/source/slang/slang-intrinsic-expand.cpp b/source/slang/slang-intrinsic-expand.cpp index fd1a4e4d5..3b551f844 100644 --- a/source/slang/slang-intrinsic-expand.cpp +++ b/source/slang/slang-intrinsic-expand.cpp @@ -813,6 +813,29 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor) } break; + case '*': + { + // An escape like `$*3` indicates that all arguments + // from index 3 (in this example) and up should be + // emitted as comma-separated expressions. + // + // We therefore expect the next byte to be a digit: + // + SLANG_RELEASE_ASSERT(*cursor >= '0' && *cursor <= '9'); + Index firstArgIndex = (*cursor++) - '0' + m_argIndexOffset; + SLANG_RELEASE_ASSERT(m_argCount > firstArgIndex); + + for (Index argIndex = firstArgIndex; argIndex < m_argCount; ++argIndex) + { + if (argIndex != firstArgIndex) + { + m_writer->emit(", "); + } + m_emitter->emitOperand(m_args[argIndex].get(), getInfo(EmitOp::General)); + } + } + break; + default: SLANG_UNEXPECTED("bad format in intrinsic definition"); break; diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h index 636264a6d..60f07c17f 100644 --- a/source/slang/slang-ir-inst-defs.h +++ b/source/slang/slang-ir-inst-defs.h @@ -620,6 +620,14 @@ INST(TorchTensorGetView, TorchTensorGetView, 0, 0) INST(AllocateOpaqueHandle, allocateOpaqueHandle, 0, 0) + // Return the register index thtat a resource is bound to. + INST(GetRegisterIndex, getRegisterIndex, 1, 0) + + // Return the registe space that a resource is bound to. + INST(GetRegisterSpace, getRegisterSpace, 1, 0) + +INST_RANGE(BindingQuery, GetRegisterIndex, GetRegisterSpace) + /* Decoration */ INST(HighLevelDeclDecoration, highLevelDecl, 1, 0) diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index 18a0677b0..4eb3982d3 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -2680,6 +2680,26 @@ struct IRLiveRangeEnd : IRLiveRangeMarker IR_LEAF_ISA(LiveRangeEnd); }; +/// An instruction that queries binding information about an opaque/resource value. +/// +struct IRBindingQuery : IRInst +{ + IR_PARENT_ISA(BindingQuery); + + IRInst* getOpaqueValue() { return getOperand(0); } +}; + +struct IRGetRegisterIndex : IRBindingQuery +{ + IR_LEAF_ISA(GetRegisterIndex); +}; + +struct IRGetRegisterSpace : IRBindingQuery +{ + IR_LEAF_ISA(GetRegisterSpace); +}; + + struct IRBuilderSourceLocRAII; struct IRBuilder diff --git a/source/slang/slang-ir-lower-binding-query.cpp b/source/slang/slang-ir-lower-binding-query.cpp new file mode 100644 index 000000000..d0128d365 --- /dev/null +++ b/source/slang/slang-ir-lower-binding-query.cpp @@ -0,0 +1,610 @@ +// slang-ir-lower-tuple-types.cpp + +#include "slang-ir-lower-tuple-types.h" +#include "slang-ir.h" +#include "slang-ir-insts.h" + +// The pass in this file lowers the `getRegisterIndex()` and +// `getSpaceIndex()` intrinsics, by replacing them with literal +// values derived from the binding information on shader parameters. +// +// If these operations are applied to a global shader parameter, +// then we can simply read the binding information from that parameter +// and use it directly. +// +// Otherwise, we expect that the opaque object (resource/sampler/etc.) +// being referenced was passed down into the current function from +// a caller. We thus introduce new function parameters after the +// resource in question, transforming, e.g., this: +// +// void doThings( +// float a, +// Texture2D t, +// float b ) +// { +// ... __getRegisterIndex(t) ... +// ... __getSpaceIndex(t) ... +// } +// ... +// doThings(myTexture); +// +// into this: +// +// void doThings( +// float a, +// Texture2D t, +// uint t_index, +// uint t_space, +// float b ) +// { +// ... t_index ... +// ... t_space ... +// } +// ... +// doThings(myTexture, __getRegisterIndex(myTexture), __getRegisterSpace(myTexture)); +// +// At that point we have removed the invocations of `getRegisterIndex` +// and `getRegisterSpace` in the callee function, but introduced new +// invocations in the caller function, so we need to iterate until +// we eventually either bottom out at a global shader parameter, or +// run into a context that we cannot simplify. + +namespace Slang +{ + // There are a ton of passes we've implemented now that use + // some basic work-list structures, and it seems a bit silly + // to be writing that code intermixed with the actual algorithm. + // + // For this file, we break the common work-list functionality + // out into a base type that we can re-use in specific passes. + // + struct WorkListPass + { + public: + IRModule* module; + DiagnosticSink* sink; + + protected: + + // The base type needs to abstract over how the + // concrete pass will process each instruction + // that gets placed into the work list. + + virtual void processInst(IRInst* inst) = 0; + + // Otherwise, the implementation of the work list + // itself is straightforward, and not anything + // that hasn't been seen in other files. + + List workList; + HashSet workListSet; + + void addToWorkList(IRInst* inst) + { + if (workListSet.contains(inst)) + return; + + workList.add(inst); + workListSet.add(inst); + } + + void processWorkList() + { + while (workList.getCount() != 0) + { + IRInst* inst = workList.getLast(); + + workList.removeLast(); + workListSet.remove(inst); + + processInst(inst); + + for (auto child = inst->getLastChild(); child; child = child->getPrevInst()) + { + addToWorkList(child); + } + } + } + + // As long as we are factoring out repeated cruft, + // it seems reasonable to *also* deal with the + // frequent need to buffer up instructions to + // be deleted when a pass is complete. + + List toBeDeleted; + HashSet toBeDeletedSet; + + void addToBeDeleted(IRInst* inst) + { + if (toBeDeletedSet.contains(inst)) + return; + + toBeDeleted.add(inst); + toBeDeletedSet.add(inst); + } + + void processDeletions() + { + for (auto inst : toBeDeleted) + { + inst->removeAndDeallocate(); + } + toBeDeleted.clear(); + toBeDeletedSet.clear(); + } + }; + + // The concrete pass will then be a specialization of + // the base work-list abstraction. + // + struct BindingQueryLoweringContext : public WorkListPass + { + // All of the intrinsics we will be processing use + // the same result type (`uint`), so it is helpful + // to cache a pointer to the IR type at the start + // of the pass and re-use it. + // + IRType* indexType = nullptr; + + void processModule() + { + IRBuilder builder(module); + indexType = builder.getUIntType(); + + // Processing the module consists of recursively + // processing all the instructions in one pass, + // and then potentially revisiting instructions + // that had new intrinsics added to their bodies. + // + addToWorkList(module->getModuleInst()); + processWorkList(); + } + + void processInst(IRInst* inst) + { + // For this pass, we really only care about + // our binding query instructions. + // + if (auto query = as(inst)) + { + processQueryInst(query); + } + } + + void processQueryInst(IRBindingQuery* inst) + { + // Processing one of the query instructions is conceptually + // simple: we find a compute a value to replace it with, + // and then simply *replace* the instruction. + // + auto replacementValue = findOrComputeReplacementValueFor(inst); + if (!replacementValue) + { + // If we cannot find or compute a replacement value, + // then we need to treat it as an error, since the + // binding query intrinsics don't admit any reasonable + // runtime implementation. + // + sink->diagnose( + inst, + Diagnostics::opaqueReferenceMustResolveToGlobal); + return; + } + + inst->replaceUsesWith(replacementValue); + inst->removeAndDeallocate(); + } + + // We want to cache the results of computing the binding + // information for an opaque-type value, in case doing + // so required adding or modifying code. + // + // For that purpose, we introduce a simple data structure + // to hold the two pieces of binding information we + // care about. + // + struct OpaqueValueInfo + { + IRInst* registerIndex = nullptr; + IRInst* registerSpace = nullptr; + }; + + IRInst* findOrComputeReplacementValueFor(IRBindingQuery* query) + { + // Finding the replacement for a given query instruction + // then amounts to computing (or caching) the binding + // information for the opaque-type value it queries, + // and then projecting out the appropriate field. + + auto opaqueValue = query->getOpaqueValue(); + auto opaqueValueInfo = findOrComputeOpaqueValueInfo(opaqueValue); + + switch (query->getOp()) + { + default: + SLANG_UNEXPECTED("unhandled binding query instruction type"); + UNREACHABLE_RETURN(query); + + case kIROp_GetRegisterIndex: + return opaqueValueInfo.registerIndex; + + case kIROp_GetRegisterSpace: + return opaqueValueInfo.registerSpace; + } + } + + // The information will be cached in a dictionary, + // keyed on the opaque-type value that the information + // was computed for. + // + Dictionary mapOpaqueValueToInfo; + + // Looking up the cached information (if any) is a simple + // matter of using the dictionary. + // + // (We have a distinct operation for lookup vs. the + // memo-cached lookup below, because we may want to + // query this information while computing an entry, + // and we don't want to introduce potential recursion. + // + OpaqueValueInfo* findOpaqueValueInfo(IRInst* opaqueValue) + { + return mapOpaqueValueToInfo.tryGetValue(opaqueValue); + } + + OpaqueValueInfo findOrComputeOpaqueValueInfo(IRInst* opaqueValue) + { + if (auto foundInfo = findOpaqueValueInfo(opaqueValue)) + return *foundInfo; + + // If there is no information registered in the cache, we + // compute it on-demand. + // + // Note that there is no potential for circularity, so + // long as the implementation of `computeOpaqueValueInfo` + // does not itself call `findOrComputeValueInfo`. + // + auto computedInfo = computeOpaqueValueInfo(opaqueValue); + mapOpaqueValueToInfo.add(opaqueValue, computedInfo); + return computedInfo; + } + + // We are now (finally) getting into the meat of what this + // pass needs to do. Given an instruction with an opaque + // type, we need to try to compute the register and space + // it is bound to, or conspire to have that information + // passed along. + // + OpaqueValueInfo computeOpaqueValueInfo(IRInst* opaqueValue) + { + if (auto globalParam = as(opaqueValue)) + { + // The simple/base case is when we have a global shader + // parameter that has layout information attached. + // + // Note that this pass needs to run late enough that + // shader parameters declared at other scopes will have + // been massaged into the appropriate form. + // + if (auto layoutDecoration = globalParam->findDecoration()) + { + if (auto layout = as(layoutDecoration->getLayout())) + { + // We expect any shader parameter of an opaque type + // to have a relevant resource kind, but it isn't + // too hard to code defensively. We will iterate + // over the resource kinds that are present and + // take the first one that represents an opaque type. + // + for (auto offsetAttr : layout->getOffsetAttrs()) + { + switch (offsetAttr->getResourceKind()) + { + default: + break; + + case LayoutResourceKind::ShaderResource: + case LayoutResourceKind::UnorderedAccess: + case LayoutResourceKind::ConstantBuffer: + case LayoutResourceKind::SamplerState: + case LayoutResourceKind::DescriptorTableSlot: + { + IRBuilder builder(module); + + OpaqueValueInfo info; + info.registerIndex = builder.getIntValue(indexType, offsetAttr->getOffset()); + info.registerSpace = builder.getIntValue(indexType, offsetAttr->getSpace()); + return info; + } + break; + } + } + } + } + } + else if (auto param = as(opaqueValue)) + { + // The other very interesting case is when the opaque-type + // value is an `IRParam`, which indicates that it is either + // a function parameter or a phi node of a basic block. + // + // Either way, we always expect a parameter to appear as + // a child of a block. + // + auto block = as(param->getParent()); + SLANG_ASSERT(block); + + // When rewriting call sites, we will need to know the + // index of `param` within the parameter list. + // + Index paramIndex = -1; + { + Count paramCounter = 0; + for (auto p : block->getParams()) + { + Index i = paramCounter++; + if (p == param) + { + paramIndex = i; + break; + } + } + SLANG_ASSERT(paramIndex >= 0); + } + + // In either case (function parameter or block parameter), + // we will insert additional parameters after the original + // parameter, so that the register index and space can + // be passed along explicitly. + // + IRBuilder builder(module); + + // We create new parameters to pass along the register index/space, + // and manually insert them where we want them in the parameter list. + // + auto registerIndexParam = builder.createParam(builder.getUIntType()); + auto registerSpaceParam = builder.createParam(builder.getUIntType()); + // + registerSpaceParam->insertAfter(param); + registerIndexParam->insertAfter(param); + + // We would like for the newly-introduced parameters to have + // nice human-readable names, if the original parameter did. + // + if (auto nameHintDecoration = param->findDecoration()) + { + String hint; + hint.append(nameHintDecoration->getName()); + hint.append("."); + builder.addNameHintDecoration(registerIndexParam, (hint + "index").getUnownedSlice()); + builder.addNameHintDecoration(registerSpaceParam, (hint + "space").getUnownedSlice()); + } + + // Similarly, the new parameters should get debugging-related + // source location information from the original parameter, + // if it had any. + // + registerIndexParam->sourceLoc = param->sourceLoc; + registerSpaceParam->sourceLoc = param->sourceLoc; + + // Now we need to scan for the places that the function or block + // that the parameter belongs to gets referenced. At each such + // location, we will pass along arguments to match the additional + // parameters. + // + if (!block->getPrevBlock()) + { + // If this is the first block in the parent function, + // then this is a function parameter, and we will + // iterate over call sites of the function and rewrite + // them to pass along arguments for the new parameters. + // + auto func = block->getParent(); + + for (auto use = func->firstUse; use; use = use->nextUse) + { + auto user = use->getUser(); + if (auto call = as(user)) + { + if (call->getCallee() == func) + { + rewriteCall(call, paramIndex); + } + } + } + } + else + { + // If this is a block parameter, we will iterate over + // the instructions that branch to the block, and rewrite + // their argument lists, similar to what we do for function calls. + // + for (auto use = block->firstUse; use; use = use->nextUse) + { + auto user = use->getUser(); + if (auto branch = as(user)) + { + if (branch->getTargetBlock() == block) + { + rewriteBranch(branch, paramIndex); + } + } + } + } + + // The new parameters that we introduced will be used to + // replace any binding query intrinsics applied to + // this opaque value. + // + OpaqueValueInfo info; + info.registerIndex = registerIndexParam; + info.registerSpace = registerSpaceParam; + return info; + } + + // By default we find that we cannot query binding information + // for the given instruction. + OpaqueValueInfo info; + return info; + } + + // In our IR, there isn't a lot of difference between a `call` + // and an `unconditionalBranch`; indeed, this is part of what + // motivates the use of `IRParam`s for both function parameters + // and phi nodes. + // + // However, while both blocks and functions use the same `IRParam` + // representation, we (currently) do not have a common base + // between the `call` and `unconditionalBranch` instructions. + // + // Rather than have duplicate logic between the two cases, we + // simply observe that for our purposes rewriting either a + // `call` or an `unconditionalBranch` amounts to doing + // special-case work on *one* operand of the original, while + // copying over all the other operands as-is. + // + // Given this observation, we can bottleneck both calls and + // branches into a common worker routine by passing down + // the instruction to be rewritten and a pointer to the + // `IRUse` for the one "interesting" operand. + + void rewriteCall(IRCall* oldCall, Index paramIndex) + { + rewriteCallOrBranch( + oldCall, + oldCall->getArgs() + paramIndex); + } + + void rewriteBranch(IRUnconditionalBranch* oldBranch, Index paramIndex) + { + rewriteCallOrBranch( + oldBranch, + oldBranch->getArgs() + paramIndex); + } + + void rewriteCallOrBranch( + IRInst* oldCallOrBranch, + IRUse* oldOperandToRewrite) + { + // Our goal here is to generate a new version of + // `oldCallOrBranch` that copies over most of the + // operands as-is, but introduces our rewrites + // around the chosen operand. + + IRBuilder builder(module); + builder.setInsertBefore(oldCallOrBranch); + + // We capture the old operand list as a range of + // `IRUse`s, and set up a fresh list to hold the + // new operands. + // + auto oldOperandsBegin = oldCallOrBranch->getOperands(); + auto oldOperandsEnd = oldOperandsBegin + oldCallOrBranch->getOperandCount(); + // + List newOperands; + + // All of the operands that precede the interesting + // one can be copied over from the old list to the + // new one as-is. + // + for (auto u = oldOperandsBegin; u < oldOperandToRewrite; ++u) + { + auto operand = u->get(); + newOperands.add(operand); + } + + // Next we look at the value of the "intersting" + // operand, knowing that we need to pass along + // not only the original value but also the + // binding information. + // + IRInst* arg = oldOperandToRewrite->get(); + IRInst* registerIndex = nullptr; + IRInst* registerSpace = nullptr; + + // As a simple optimization, if we have *already* + // computed and cached binding information for + // the argument, we can re-use that information + // here and now. + // + if (auto info = findOpaqueValueInfo(arg)) + { + registerIndex = info->registerIndex; + registerSpace = info->registerSpace; + } + else + { + // If there is no cached information for + // the argument, we choose *not* to make + // a recursive call into `findOrComputeOpaqueValueInfo`. + // + // Instead we will simply emit additional + // binding query intrinsics into the body + // of the caller (right before the call site), + // and add those instructions to our work + // list, to be eliminated later. + // + registerIndex = builder.emitIntrinsicInst( + indexType, + kIROp_GetRegisterIndex, + 1, &arg); + registerSpace = builder.emitIntrinsicInst( + indexType, + kIROp_GetRegisterSpace, + 1, &arg); + // + addToWorkList(registerIndex); + addToWorkList(registerSpace); + } + + // Whether we have found existing binding information, + // or emitted new intrinsics, we are now ready + // to append the argument and its binding information + // to the new operand list. + // + newOperands.add(arg); + newOperands.add(registerIndex); + newOperands.add(registerSpace); + + // Any operands of the original instruction that come + // after the one we rewrite can be copied over as-is. + // + // Note: we don't currently have any operands that would + // appear after the arguments of a `call` or `branch`, + // but the fact that we encode `IRAttr`s on an instruction + // as additional (trailing) operands means that this could + // conceivably happen at some point. + // + for (auto u = oldOperandToRewrite + 1; u < oldOperandsEnd; ++u) + { + auto operand = u->get(); + newOperands.add(operand); + } + + // Once we've built up the new operand list, we can emit + // a new instruction that has the same opcode and type, + // with the new operands, and then use it to replace + // the existing instruction. + // + auto newCallOrBranch = builder.emitIntrinsicInst( + oldCallOrBranch->getFullType(), + oldCallOrBranch->getOp(), + newOperands.getCount(), + newOperands.getBuffer()); + + oldCallOrBranch->transferDecorationsTo(newCallOrBranch); + oldCallOrBranch->replaceUsesWith(newCallOrBranch); + oldCallOrBranch->removeAndDeallocate(); + } + }; + + void lowerBindingQueries( + IRModule* module, + DiagnosticSink* sink) + { + BindingQueryLoweringContext context; + context.module = module; + context.sink = sink; + context.processModule(); + } +} diff --git a/source/slang/slang-ir-lower-binding-query.h b/source/slang/slang-ir-lower-binding-query.h new file mode 100644 index 000000000..3694eecdb --- /dev/null +++ b/source/slang/slang-ir-lower-binding-query.h @@ -0,0 +1,22 @@ +// slang-ir-lower-binding-query.h +#pragma once + +#include "slang-ir.h" + +namespace Slang +{ + struct IRModule; + class DiagnosticSink; + + /// Lower the `getRegisterIndex` and `getRegisterSpace` intrinsics. + /// + /// These operations semantically return binding information on their + /// argument, which must be a value of an opaque type (resource, + /// sampler, etc.). These operations can only ever work on values that + /// derive (in one way or another) from a global shader parameter. + /// + void lowerBindingQueries( + IRModule* module, + DiagnosticSink* sink); + +} diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index 7b649ff0d..d869bf60e 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -6805,6 +6805,7 @@ struct DeclLoweringVisitor : DeclVisitor // auto irKey = getBuilder()->createStructKey(); addLinkageDecoration(context, irKey, inheritanceDecl); + getBuilder()->addNameHintDecoration(irKey, UnownedTerminatedStringSlice("base")); auto keyVal = LoweredValInfo::simple(irKey); context->setGlobalValue(inheritanceDecl, keyVal); return keyVal; -- cgit v1.2.3