diff options
| author | Yong He <yonghe@outlook.com> | 2024-05-16 10:43:49 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-05-16 10:43:49 -0700 |
| commit | 725735a87b9d223c2afc83bbd049055b1e44a976 (patch) | |
| tree | 225f381774fb9b5209007ae7fc455a14f57a6fe6 | |
| parent | 0a6180299352d7a2ec850004564c7a95b37a41c4 (diff) | |
RasterizerOrder resource for spirv and metal. (#4175)
* RasterizerOrder resource for spirv and metal.
Also fixes the byte address buffer logic for metal.
* Fix.
* Delete commented lines.
---------
Co-authored-by: Jay Kwak <82421531+jkwak-work@users.noreply.github.com>
| -rw-r--r-- | source/slang/core.meta.slang | 40 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 125 | ||||
| -rw-r--r-- | source/slang/slang-ast-modifier.h | 1 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.cpp | 5 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.h | 2 | ||||
| -rw-r--r-- | source/slang/slang-emit-glsl.cpp | 20 | ||||
| -rw-r--r-- | source/slang/slang-emit-metal.cpp | 34 | ||||
| -rw-r--r-- | source/slang/slang-emit-metal.h | 1 | ||||
| -rw-r--r-- | source/slang/slang-emit-spirv-ops.h | 12 | ||||
| -rw-r--r-- | source/slang/slang-emit-spirv.cpp | 9 | ||||
| -rw-r--r-- | source/slang/slang-emit.cpp | 7 | ||||
| -rw-r--r-- | source/slang/slang-ir-byte-address-legalize.cpp | 139 | ||||
| -rw-r--r-- | source/slang/slang-ir-byte-address-legalize.h | 1 | ||||
| -rw-r--r-- | source/slang/slang-ir-inst-defs.h | 3 | ||||
| -rw-r--r-- | source/slang/slang-ir-insts.h | 12 | ||||
| -rw-r--r-- | source/slang/slang-ir-spirv-legalize.cpp | 98 | ||||
| -rw-r--r-- | source/slang/slang-ir.cpp | 10 | ||||
| -rw-r--r-- | tests/hlsl/raster-order-resource.slang | 50 | ||||
| -rw-r--r-- | tests/metal/byte-address-buffer.slang | 30 |
19 files changed, 538 insertions, 61 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index bf69eb9ad..272e95c85 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -2062,42 +2062,14 @@ int __offsetOf(in T t, in F field) } /// Mark beginning of "interlocked" operations in a fragment shader. -__glsl_extension(GL_ARB_fragment_shader_interlock) -__glsl_version(420) -void beginInvocationInterlock() -{ - __target_switch - { - case glsl: - __intrinsic_asm "beginInvocationInterlockARB"; - case spirv: - spirv_asm { - OpCapability FragmentShaderPixelInterlockEXT; - OpExtension "SPV_EXT_fragment_shader_interlock"; - OpExecutionMode __entryPoint PixelInterlockOrderedEXT; - OpBeginInvocationInterlockEXT; - }; - } -} +[require(glsl_spirv, GL_ARB_fragment_shader_interlock, fragment)] +__intrinsic_op($(kIROp_BeginFragmentShaderInterlock)) +void beginInvocationInterlock(); /// Mark end of "interlocked" operations in a fragment shader. -__glsl_extension(GL_ARB_fragment_shader_interlock) -__glsl_version(420) -void endInvocationInterlock() -{ - __target_switch - { - case glsl: - __intrinsic_asm "endInvocationInterlockARB"; - case spirv: - spirv_asm { - OpCapability FragmentShaderPixelInterlockEXT; - OpExtension "SPV_EXT_fragment_shader_interlock"; - OpExecutionMode __entryPoint PixelInterlockOrderedEXT; - OpEndInvocationInterlockEXT; - }; - } -} +[require(glsl_spirv, GL_ARB_fragment_shader_interlock, fragment)] +__intrinsic_op($(kIROp_EndFragmentShaderInterlock)) +void endInvocationInterlock(); // Operators to apply to `enum` types diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 26e691ddb..11470212a 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2599,7 +2599,7 @@ extension __TextureImpl<T,Shape,isArray,0,sampleCount,$(access),isShadow, 0,form { [__readNone] [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_1)] T Load(vector<int, Shape.dimensions+isArray> location) { __target_switch @@ -2644,6 +2644,66 @@ extension __TextureImpl<T,Shape,isArray,0,sampleCount,$(access),isShadow, 0,form %sampled:__sampledType(T) = OpImageRead $this $location; __truncate $$T result __sampledType(T) %sampled; }; + case metal: + switch (Shape.flavor) + { + case $(SLANG_TEXTURE_1D): + // lod is not supported for 1D texture + if (isArray == 1) + // Tv read(uint coord, uint array, uint lod = 0) const + __intrinsic_asm "$0.read(uint(($1).x), uint(($1).y))"; + else + // Tv read(uint coord, uint lod = 0) const + __intrinsic_asm "$0.read(uint(($1).x))"; + break; + case $(SLANG_TEXTURE_2D): + if (isShadow == 1) + { + if (isArray == 1) + // T read(uint2 coord, uint array, uint lod = 0) const + __intrinsic_asm "$0.read(vec<uint,2>(($1).xy), uint(($1).z), uint(($1).w))"; + else + // T read(uint2 coord, uint lod = 0) const + __intrinsic_asm "$0.read(vec<uint,2>(($1).xy), uint(($1).z))"; + } + else + { + if (isArray == 1) + // Tv read(uint2 coord, uint array, uint lod = 0) const + __intrinsic_asm "$0.read(vec<uint,2>(($1).xy), uint(($1).z), uint(($1).w))"; + else + // Tv read(uint2 coord, uint lod = 0) const + __intrinsic_asm "$0.read(vec<uint,2>(($1).xy), uint(($1).z))"; + } + break; + case $(SLANG_TEXTURE_3D): + if (isShadow == 0 && isArray == 0) + // Tv read(uint3 coord, uint lod = 0) const + __intrinsic_asm "$0.read(vec<uint,3>(($1).xyz), uint(($1).w))"; + break; + case $(SLANG_TEXTURE_CUBE): + if (isShadow == 1) + { + if (isArray == 1) + // T read(uint2 coord, uint face, uint array, uint lod = 0) const + __intrinsic_asm "$0.read(vec<uint,2>(($1).xy), uint(($1).z)%6, uint(($1).z)/6, uint(($1).w))"; + else + // T read(uint2 coord, uint face, uint lod = 0) const + __intrinsic_asm "$0.read(vec<uint,2>(($1).xy), uint(($1).z), uint(($1).w))"; + } + else + { + if (isArray == 1) + // Tv read(uint2 coord, uint face, uint array, uint lod = 0) const + __intrinsic_asm "$0.read(vec<uint,2>(($1).xy), uint(($1).z)%6, uint(($1).z)/6, uint(($1).w))"; + else + // Tv read(uint2 coord, uint face, uint lod = 0) const + __intrinsic_asm "$0.read(vec<uint,2>(($1).xy), uint(($1).z), uint(($1).w))"; + } + break; + } + // TODO: This needs to be handled by the capability system + __intrinsic_asm "<invalid intrinsics>"; } } @@ -2694,7 +2754,7 @@ extension __TextureImpl<T,Shape,isArray,0,sampleCount,$(access),isShadow, 0,form { [__readNone] [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_1)] get { __target_switch @@ -2705,13 +2765,14 @@ extension __TextureImpl<T,Shape,isArray,0,sampleCount,$(access),isShadow, 0,form case glsl: case spirv: case cuda: + case metal: return Load(location); } } [nonmutating] [ForceInline] - [require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1)] + [require(cpp_cuda_glsl_hlsl_metal_spirv, texture_sm_4_1)] set(T newValue) { __target_switch @@ -2755,6 +2816,64 @@ extension __TextureImpl<T,Shape,isArray,0,sampleCount,$(access),isShadow, 0,form { OpImageWrite $this $location __convertTexel(newValue); }; + case metal: + switch (Shape.flavor) + { + case $(SLANG_TEXTURE_1D): + // lod is not supported for 1D texture + if (isArray == 1) + // void write(Tv color, uint coord, uint array, uint lod = 0) const + __intrinsic_asm "$0.write($2, uint(($1).x))"; + else + // void write(Tv color, uint coord, uint lod = 0) const + __intrinsic_asm "$0.write($2, uint(($1).x))"; + break; + case $(SLANG_TEXTURE_2D): + if (isShadow == 1) + { + if (isArray == 1) + // void write(Tv color, uint2 coord, uint array, uint lod = 0) const + __intrinsic_asm "$0.write($2, vec<uint,2>(($1).xy), uint(($1).z))"; + else + // void write(Tv color, uint2 coord, uint lod = 0) const + __intrinsic_asm "$0.write($2, vec<uint,2>(($1).xy))"; + } + else + { + if (isArray == 1) + // void write(Tv color, uint2 coord, uint array, uint lod = 0) const + __intrinsic_asm "$0.write($2, vec<uint,2>(($1).xy), uint(($1).z))"; + else + // void write(Tv color, uint2 coord, uint lod = 0) const + __intrinsic_asm "$0.write($2, vec<uint,2>(($1).xy))"; + } + break; + case $(SLANG_TEXTURE_3D): + if (isShadow == 0 && isArray == 0) + // void write(Tv color, uint3 coord, uint lod = 0) const + __intrinsic_asm "$0.write($2, vec<uint,3>(($1).xyz))"; + break; + case $(SLANG_TEXTURE_CUBE): + if (isShadow == 1) + { + if (isArray == 1) + // void write(Tv color, uint2 coord, uint face, uint array, uint lod = 0) const + __intrinsic_asm "$0.write($2, vec<uint,2>(($1).xy), uint(($1).z)%6, uint(($1).z)/6)"; + else + // void write(Tv color, uint2 coord, uint face, uint lod = 0) const + __intrinsic_asm "$0.write($2, vec<uint,2>(($1).xy), uint(($1).z))"; + } + else + { + if (isArray == 1) + // void write(Tv color, uint2 coord, uint face, uint array, uint lod = 0) const + __intrinsic_asm "$0.write($2, vec<uint,2>(($1).xy), uint(($1).z)%6, uint(($1).z)/6)"; + else + // void write(Tv color, uint2 coord, uint face, uint lod = 0) const + __intrinsic_asm "$0.write($2, vec<uint,2>(($1).xy), uint(($1).z))"; + } + break; + } } } diff --git a/source/slang/slang-ast-modifier.h b/source/slang/slang-ast-modifier.h index 03d3e6fcf..39e167ab8 100644 --- a/source/slang/slang-ast-modifier.h +++ b/source/slang/slang-ast-modifier.h @@ -1638,6 +1638,7 @@ public: kWriteOnly = 0b100, kVolatile = 0b1000, kRestrict = 0b10000, + kRasterizerOrdered = 0b100000, }; }; diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index 7551f4da9..d85c72ae5 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -3361,6 +3361,7 @@ void CLikeSourceEmitter::emitSimpleFuncParamImpl(IRParam* param) emitParamType(paramType, paramName); emitSemantics(param); + emitPostDeclarationAttributesForType(paramType); } void CLikeSourceEmitter::emitSimpleFuncParamsImpl(IRFunc* func) @@ -3649,6 +3650,7 @@ void CLikeSourceEmitter::emitStructDeclarationsBlock(IRStructType* structType, b emitMemoryQualifiers(fieldKey); emitType(fieldType, getName(fieldKey)); emitSemantics(fieldKey, allowOffsetLayout); + emitPostDeclarationAttributesForType(fieldType); m_writer->emit(";\n"); } @@ -3733,6 +3735,7 @@ void CLikeSourceEmitter::emitClass(IRClassType* classType) emitType(fieldType, getName(fieldKey)); emitSemantics(fieldKey); + emitPostDeclarationAttributesForType(fieldType); m_writer->emit(";\n"); } @@ -3900,8 +3903,8 @@ void CLikeSourceEmitter::emitVar(IRVar* varDecl) emitType(varType, getName(varDecl)); emitSemantics(varDecl); - emitLayoutSemantics(varDecl); + emitPostDeclarationAttributesForType(varType); // TODO: ideally this logic should scan ahead to see if it can find a `store` // instruction that writes to the `var`, within the same block, such that all diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h index ab0b2c3e4..9b39aa6f4 100644 --- a/source/slang/slang-emit-c-like.h +++ b/source/slang/slang-emit-c-like.h @@ -461,7 +461,7 @@ public: protected: - + virtual void emitPostDeclarationAttributesForType(IRInst* type) { SLANG_UNUSED(type); } virtual bool doesTargetSupportPtrTypes() { return false; } virtual void emitLayoutSemanticsImpl(IRInst* inst, char const* uniformSemanticSpelling = "register") { SLANG_UNUSED(inst); SLANG_UNUSED(uniformSemanticSpelling); } virtual void emitParameterGroupImpl(IRGlobalParam* varDecl, IRUniformParameterGroupType* type) = 0; diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp index 5cf508876..8063fcc1d 100644 --- a/source/slang/slang-emit-glsl.cpp +++ b/source/slang/slang-emit-glsl.cpp @@ -2089,6 +2089,20 @@ bool GLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu // Handled return true; } + case kIROp_BeginFragmentShaderInterlock: + { + _requireGLSLVersion(420); + _requireGLSLExtension(UnownedStringSlice::fromLiteral("GL_ARB_fragment_shader_interlock")); + m_writer->emit("beginInvocationInterlockARB()"); + return true; + } + case kIROp_EndFragmentShaderInterlock: + { + _requireGLSLVersion(420); + _requireGLSLExtension(UnownedStringSlice::fromLiteral("GL_ARB_fragment_shader_interlock")); + m_writer->emit("endInvocationInterlockARB()"); + return true; + } default: break; } @@ -2576,12 +2590,6 @@ void GLSLSourceEmitter::emitSimpleTypeImpl(IRType* type) m_writer->emit("accelerationStructureEXT"); break; } - - // TODO: These "translations" are obviously wrong for GLSL. - case kIROp_HLSLByteAddressBufferType: m_writer->emit("ByteAddressBuffer"); break; - case kIROp_HLSLRWByteAddressBufferType: m_writer->emit("RWByteAddressBuffer"); break; - case kIROp_HLSLRasterizerOrderedByteAddressBufferType: m_writer->emit("RasterizerOrderedByteAddressBuffer"); break; - default: SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled buffer type"); break; diff --git a/source/slang/slang-emit-metal.cpp b/source/slang/slang-emit-metal.cpp index ff497a20f..c6ffee953 100644 --- a/source/slang/slang-emit-metal.cpp +++ b/source/slang/slang-emit-metal.cpp @@ -349,7 +349,7 @@ bool MetalSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inO emitOperand(buffer, getInfo(EmitOp::General)); m_writer->emit("[("); emitOperand(offset, getInfo(EmitOp::General)); - m_writer->emit(")>>2)]"); + m_writer->emit(")>>2])"); return true; } case kIROp_ByteAddressBufferStore: @@ -361,7 +361,7 @@ bool MetalSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inO emitOperand(buffer, getInfo(EmitOp::General)); m_writer->emit("[("); emitOperand(offset, getInfo(EmitOp::General)); - m_writer->emit(")>>2)] = as_type<uint32_t>("); + m_writer->emit(")>>2] = as_type<uint32_t>("); emitOperand(inst->getOperand(2), getInfo(EmitOp::General)); m_writer->emit(")"); return true; @@ -433,17 +433,8 @@ void MetalSourceEmitter::emitVectorTypeNameImpl(IRType* elementType, IRIntegerVa void MetalSourceEmitter::emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) { - switch (decl->getMode()) - { - case kIRLoopControl_Unroll: - m_writer->emit("[unroll]\n"); - break; - case kIRLoopControl_Loop: - m_writer->emit("[loop]\n"); - break; - default: - break; - } + // Metal does not support loop control attributes. + SLANG_UNUSED(decl); } static bool _canEmitExport(const Profile& profile) @@ -866,6 +857,23 @@ void MetalSourceEmitter::emitSimpleFuncParamImpl(IRParam* param) emitFuncParamLayoutImpl(param); } +void MetalSourceEmitter::emitPostDeclarationAttributesForType(IRInst* type) +{ + Super::emitPostDeclarationAttributesForType(type); + if (auto textureType = as<IRTextureTypeBase>(type)) + { + if (textureType->getAccess() == SLANG_RESOURCE_ACCESS_RASTER_ORDERED) + { + m_writer->emit(" [[raster_order_group(0)]]"); + } + } + else if (as<IRHLSLRasterizerOrderedByteAddressBufferType>(type) || + as<IRHLSLRasterizerOrderedStructuredBufferType>(type)) + { + m_writer->emit(" [[raster_order_group(0)]]"); + } +} + static UnownedStringSlice _getInterpolationModifierText(IRInterpolationMode mode) { switch (mode) diff --git a/source/slang/slang-emit-metal.h b/source/slang/slang-emit-metal.h index ddc1c7665..55ad3d4cb 100644 --- a/source/slang/slang-emit-metal.h +++ b/source/slang/slang-emit-metal.h @@ -34,6 +34,7 @@ protected: virtual void emitRateQualifiersAndAddressSpaceImpl(IRRate* rate, IRIntegerValue addressSpace) SLANG_OVERRIDE; virtual void emitSemanticsImpl(IRInst* inst, bool allowOffsets) SLANG_OVERRIDE; virtual void emitSimpleFuncParamImpl(IRParam* param) SLANG_OVERRIDE; + virtual void emitPostDeclarationAttributesForType(IRInst* type) SLANG_OVERRIDE; virtual void emitInterpolationModifiersImpl(IRInst* varInst, IRType* valueType, IRVarLayout* layout) SLANG_OVERRIDE; virtual void emitPackOffsetModifier(IRInst* varInst, IRType* valueType, IRPackOffsetDecoration* decoration) SLANG_OVERRIDE; diff --git a/source/slang/slang-emit-spirv-ops.h b/source/slang/slang-emit-spirv-ops.h index 8f75796a1..c7c301abe 100644 --- a/source/slang/slang-emit-spirv-ops.h +++ b/source/slang/slang-emit-spirv-ops.h @@ -2350,6 +2350,18 @@ SpvInst* emitOpUnreachable(SpvInstParent* parent, IRInst* inst) return emitInst(parent, inst, SpvOpUnreachable); } +// https://htmlpreview.github.io/?https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/EXT/SPV_EXT_fragment_shader_interlock.html#shaders-fragment-shader-interlock +SpvInst* emitOpBeginInvocationInterlockEXT(SpvInstParent* parent, IRInst* inst) +{ + return emitInst(parent, inst, SpvOpBeginInvocationInterlockEXT); +} + +// https://htmlpreview.github.io/?https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/EXT/SPV_EXT_fragment_shader_interlock.html#shaders-fragment-shader-interlock +SpvInst* emitOpEndInvocationInterlockEXT(SpvInstParent* parent, IRInst* inst) +{ + return emitInst(parent, inst, SpvOpEndInvocationInterlockEXT); +} + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpExecutionModeId template<typename T> SpvInst* emitOpExecutionModeId( diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 8219d3534..a32e904b7 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -2787,6 +2787,15 @@ struct SPIRVEmitContext case kIROp_discard: result = emitOpKill(parent, inst); break; + case kIROp_BeginFragmentShaderInterlock: + ensureExtensionDeclaration(UnownedStringSlice("SPV_EXT_fragment_shader_interlock")); + requireSPIRVCapability(SpvCapabilityFragmentShaderPixelInterlockEXT); + emitOpExecutionMode(getSection(SpvLogicalSectionID::ExecutionModes), nullptr, getParentFunc(inst), SpvExecutionModePixelInterlockOrderedEXT); + result = emitOpBeginInvocationInterlockEXT(parent, inst); + break; + case kIROp_EndFragmentShaderInterlock: + result = emitOpEndInvocationInterlockEXT(parent, inst); + break; case kIROp_unconditionalBranch: { // If we are jumping to the main block of a loop, diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 7cc056009..af4add9b7 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -753,6 +753,13 @@ Result linkAndOptimizeIR( // byteAddressBufferOptions.translateToStructuredBufferOps = true; break; + case CodeGenTarget::Metal: + case CodeGenTarget::MetalLib: + case CodeGenTarget::MetalLibAssembly: + byteAddressBufferOptions.scalarizeVectorLoadStore = true; + byteAddressBufferOptions.translateToStructuredBufferOps = false; + byteAddressBufferOptions.lowerBasicTypeOps = true; + break; } // We also need to decide whether to translate diff --git a/source/slang/slang-ir-byte-address-legalize.cpp b/source/slang/slang-ir-byte-address-legalize.cpp index 0561d8744..38bee566c 100644 --- a/source/slang/slang-ir-byte-address-legalize.cpp +++ b/source/slang/slang-ir-byte-address-legalize.cpp @@ -15,6 +15,8 @@ namespace Slang { +bool isCPUTarget(TargetRequest* targetReq); + // As is typical for IR passes in Slang, we will encapsulate the state // while we process the code in a context type. // @@ -617,6 +619,66 @@ struct ByteAddressBufferLegalizationContext } } + if (m_options.lowerBasicTypeOps) + { + // Some platforms e.g. Metal does not allow loading basic types that are not 4-byte sized. + // We need to lower such loads. + IRSizeAndAlignment sizeAlignment; + SLANG_RETURN_NULL_ON_FAIL(getNaturalSizeAndAlignment(m_targetProgram->getOptionSet(), type, &sizeAlignment)); + if (sizeAlignment.size == 8) + { + // We need to load the value as two 4-byte values and then combine them. + auto loOffset = offset; + auto hiOffset = emitOffsetAddIfNeeded(offset, 4); + IRInst* loadLoArgs[] = { buffer, loOffset }; + IRInst* loadHiArgs[] = { buffer, hiOffset }; + auto loLoad = m_builder.emitIntrinsicInst(m_builder.getUIntType(), kIROp_ByteAddressBufferLoad, 2, loadLoArgs); + auto hiLoad = m_builder.emitIntrinsicInst(m_builder.getUIntType(), kIROp_ByteAddressBufferLoad, 2, loadHiArgs); + auto lo64 = m_builder.emitCast(m_builder.getUInt64Type(), loLoad); + auto hi64 = m_builder.emitCast(m_builder.getUInt64Type(), hiLoad); + auto shift = m_builder.emitShl(m_builder.getUInt64Type(), hi64, m_builder.getIntValue(m_builder.getUInt64Type(), 32)); + auto fullValue = m_builder.emitBitOr(m_builder.getUInt64Type(), lo64, shift); + return m_builder.emitBitCast(type, fullValue); + } + else if (sizeAlignment.size < 4) + { + auto alignedOffset = m_builder.emitDiv(offset->getDataType(), offset, m_builder.getIntValue(offset->getDataType(), 4)); + alignedOffset = m_builder.emitMul(offset->getDataType(), alignedOffset, m_builder.getIntValue(offset->getDataType(), 4)); + IRInst* loadArgs[] = { buffer, alignedOffset }; + auto val = m_builder.emitIntrinsicInst(m_builder.getUIntType(), kIROp_ByteAddressBufferLoad, 2, loadArgs); + auto shiftAmount = m_builder.emitSub(offset->getDataType(), offset, alignedOffset); + shiftAmount = m_builder.emitMul(offset->getDataType(), shiftAmount, m_builder.getIntValue(offset->getDataType(), 8)); + IRInst* mask = nullptr; + switch (sizeAlignment.size) + { + case 1: + mask = m_builder.getIntValue(m_builder.getUIntType(), 0xFF); + break; + case 2: + mask = m_builder.getIntValue(m_builder.getUIntType(), 0xFFFF); + break; + default: + SLANG_ASSERT(!"Unexpected size"); + break; + } + auto shift = m_builder.emitShr(m_builder.getUIntType(), val, shiftAmount); + auto masked = m_builder.emitBitAnd(m_builder.getUIntType(), shift, mask); + IRInst* casted = nullptr; + switch (sizeAlignment.size) + { + case 1: + casted = m_builder.emitCast(m_builder.getUInt8Type(), masked); + break; + case 2: + casted = m_builder.emitCast(m_builder.getUInt16Type(), masked); + break; + default: + SLANG_ASSERT(!"Unexpected size"); + break; + } + return m_builder.emitBitCast(type, casted); + } + } // When we finally run out of special cases to handle, we just emit // a byte-address buffer load operation directly, assuming it will // work for the chosen target. @@ -672,7 +734,25 @@ struct ByteAddressBufferLegalizationContext // the load was already for a `uint`. // return BaseType::UInt; - + case kIROp_Int8Type: + case kIROp_UInt8Type: + return BaseType::UInt8; + case kIROp_Int16Type: + case kIROp_UInt16Type: + case kIROp_HalfType: + return BaseType::UInt16; + case kIROp_Int64Type: + case kIROp_UInt64Type: + case kIROp_DoubleType: + return BaseType::UInt64; + case kIROp_IntPtrType: + case kIROp_UIntPtrType: + case kIROp_RawPointerType: + case kIROp_PtrType: + if (isCPUTarget(m_target) && sizeof(void*) == 4) + return BaseType::UInt; + else + return BaseType::UInt64; default: // All other types map to a sentinel value of `Void` to // indicate that a bit-cast solution shouldn't be attempted: @@ -1071,7 +1151,62 @@ struct ByteAddressBufferLegalizationContext } } - + if (m_options.lowerBasicTypeOps) + { + // Some platforms e.g. Metal does not allow storing basic types that are not 4-byte sized. + // We need to lower such loads. + IRSizeAndAlignment sizeAlignment; + SLANG_RETURN_ON_FAIL(getNaturalSizeAndAlignment(m_targetProgram->getOptionSet(), type, &sizeAlignment)); + if (sizeAlignment.size == 8) + { + // We need to store the value as two 4-byte values. + auto uint64Val = m_builder.emitBitCast(m_builder.getUInt64Type(), value); + auto loVal = m_builder.emitCast(m_builder.getUIntType(), uint64Val); + auto hiVal = m_builder.emitCast( + m_builder.getUIntType(), + m_builder.emitShr(m_builder.getUInt64Type(), + uint64Val, m_builder.getIntValue(m_builder.getUInt64Type(), 32))); + auto loOffset = offset; + auto hiOffset = emitOffsetAddIfNeeded(offset, 4); + IRInst* storeLoArgs[] = { buffer, loOffset, loVal }; + IRInst* storeHiArgs[] = { buffer, hiOffset, hiVal }; + m_builder.emitIntrinsicInst(m_builder.getVoidType(), kIROp_ByteAddressBufferStore, 3, storeLoArgs); + m_builder.emitIntrinsicInst(m_builder.getVoidType(), kIROp_ByteAddressBufferStore, 3, storeHiArgs); + return SLANG_OK; + } + else if (sizeAlignment.size < 4) + { + IRInst* loadArgs[] = {buffer, offset}; + auto existingVal = m_builder.emitIntrinsicInst(m_builder.getUIntType(), kIROp_ByteAddressBufferLoad, 2, loadArgs); + auto alignedOffset = m_builder.emitDiv(offset->getDataType(), offset, m_builder.getIntValue(offset->getDataType(), 4)); + alignedOffset = m_builder.emitMul(offset->getDataType(), alignedOffset, m_builder.getIntValue(offset->getDataType(), 4)); + auto shiftAmount = m_builder.emitSub(offset->getDataType(), offset, alignedOffset); + shiftAmount = m_builder.emitMul(offset->getDataType(), shiftAmount, m_builder.getIntValue(offset->getDataType(), 8)); + auto uintVal = m_builder.emitCast(m_builder.getUIntType(), + m_builder.emitBitCast(getSameSizeUIntType(value->getDataType()), value)); + auto shiftedData = m_builder.emitShl(m_builder.getUIntType(), uintVal, shiftAmount); + IRInst* mask = nullptr; + switch (sizeAlignment.size) + { + case 1: + mask = m_builder.getIntValue(m_builder.getUIntType(), 0xFF); + break; + case 2: + mask = m_builder.getIntValue(m_builder.getUIntType(), 0xFFFF); + break; + default: + SLANG_ASSERT(!"Unexpected size"); + return SLANG_FAIL; + } + mask = m_builder.emitShl(m_builder.getUIntType(), mask, shiftAmount); + mask = m_builder.emitBitNot(m_builder.getUIntType(), mask); + auto maskedData = m_builder.emitBitAnd(m_builder.getUIntType(), existingVal, mask); + auto newData = m_builder.emitBitOr(m_builder.getUIntType(), maskedData, shiftedData); + IRInst* storeArgs[] = { buffer, alignedOffset, newData }; + m_builder.emitIntrinsicInst(m_builder.getVoidType(), kIROp_ByteAddressBufferStore, 3, storeArgs); + return SLANG_OK; + } + } { IRInst* storeArgs[] = { buffer, offset, value }; m_builder.emitIntrinsicInst(m_builder.getVoidType(), kIROp_ByteAddressBufferStore, 3, storeArgs); diff --git a/source/slang/slang-ir-byte-address-legalize.h b/source/slang/slang-ir-byte-address-legalize.h index 71ab8a4e1..1ae69070e 100644 --- a/source/slang/slang-ir-byte-address-legalize.h +++ b/source/slang/slang-ir-byte-address-legalize.h @@ -13,6 +13,7 @@ struct ByteAddressBufferLegalizationOptions bool scalarizeVectorLoadStore = false; bool useBitCastFromUInt = false; bool translateToStructuredBufferOps = false; + bool lowerBasicTypeOps = false; }; /// Legalize byte-address buffer `Load()` and `Store()` operations. diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h index 4bad614b3..b7873b8bd 100644 --- a/source/slang/slang-ir-inst-defs.h +++ b/source/slang/slang-ir-inst-defs.h @@ -1170,6 +1170,9 @@ INST(ExistentialTypeSpecializationDictionary, ExistentialTypeSpecializationDicti /* Differentiable Type Dictionary */ INST(DifferentiableTypeDictionaryItem, DifferentiableTypeDictionaryItem, 0, 0) +INST(BeginFragmentShaderInterlock, BeginFragmentShaderInterlock, 0, 0) +INST(EndFragmentShaderInterlock, BeginFragmentShaderInterlock, 0, 0) + /* DebugInfo */ INST(DebugSource, DebugSource, 2, HOISTABLE) INST(DebugLine, DebugLine, 5, 0) diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index 5a9fa9a32..1729085be 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -3391,6 +3391,8 @@ public: IRBasicType* getInt64Type(); IRBasicType* getUIntType(); IRBasicType* getUInt64Type(); + IRBasicType* getUInt16Type(); + IRBasicType* getUInt8Type(); IRBasicType* getCharType(); IRStringType* getStringType(); IRNativeStringType* getNativeStringType(); @@ -4175,6 +4177,16 @@ public: UInt caseArgCount, IRInst* const* caseArgs); + IRInst* emitBeginFragmentShaderInterlock() + { + return emitIntrinsicInst(getVoidType(), kIROp_BeginFragmentShaderInterlock, 0, nullptr); + } + + IRInst* emitEndFragmentShaderInterlock() + { + return emitIntrinsicInst(getVoidType(), kIROp_EndFragmentShaderInterlock, 0, nullptr); + } + IRGlobalGenericParam* emitGlobalGenericParam( IRType* type); diff --git a/source/slang/slang-ir-spirv-legalize.cpp b/source/slang/slang-ir-spirv-legalize.cpp index d2a6bd557..c023537b9 100644 --- a/source/slang/slang-ir-spirv-legalize.cpp +++ b/source/slang/slang-ir-spirv-legalize.cpp @@ -631,9 +631,16 @@ struct SPIRVLegalizationContext : public SourceEmitterBase storageClass = SpvStorageClassStorageBuffer; needLoad = false; + auto memoryFlags = MemoryQualifierSetModifier::Flags::kNone; + // structured buffers in GLSL should be annotated as ReadOnly if (as<IRHLSLStructuredBufferType>(structuredBufferType)) - builder.addMemoryQualifierSetDecoration(inst, MemoryQualifierSetModifier::Flags::kReadOnly); + memoryFlags = MemoryQualifierSetModifier::Flags::kReadOnly; + if (as<IRHLSLRasterizerOrderedStructuredBufferType>(structuredBufferType)) + memoryFlags = MemoryQualifierSetModifier::Flags::kRasterizerOrdered; + + if (memoryFlags != MemoryQualifierSetModifier::Flags::kNone) + builder.addMemoryQualifierSetDecoration(inst, memoryFlags); } else if (auto glslShaderStorageBufferType = as<IRGLSLShaderStorageBufferType>(innerType)) { @@ -2289,6 +2296,94 @@ void simplifyIRForSpirvLegalization(TargetProgram* target, DiagnosticSink* sink, } } +static bool isRasterOrderedResource(IRInst* inst) +{ + if (auto memoryQualifierDecoration = inst->findDecoration<IRMemoryQualifierSetDecoration>()) + { + if (memoryQualifierDecoration->getMemoryQualifierBit() & MemoryQualifierSetModifier::Flags::kRasterizerOrdered) + return true; + } + auto type = inst->getDataType(); + for (;;) + { + if (auto ptrType = as<IRPtrTypeBase>(type)) + { + type = ptrType->getValueType(); + continue; + } + if (auto arrayType = as<IRArrayTypeBase>(type)) + { + type = arrayType->getElementType(); + continue; + } + break; + } + if (auto textureType = as<IRTextureTypeBase>(type)) + { + if (textureType->getAccess() == SLANG_RESOURCE_ACCESS_RASTER_ORDERED) + return true; + } + return false; +} + +static bool hasExplicitInterlockInst(IRFunc* func) +{ + for (auto block : func->getBlocks()) + { + for (auto inst : block->getChildren()) + { + if (inst->getOp() == kIROp_BeginFragmentShaderInterlock) + return true; + } + } + return false; +} + +void insertFragmentShaderInterlock(SPIRVEmitSharedContext* context, IRModule* module) +{ + HashSet<IRFunc*> fragmentShaders; + for (auto& [inst, entryPoints] : context->m_referencingEntryPoints) + { + if (isRasterOrderedResource(inst)) + { + for (auto entryPoint : entryPoints) + { + auto entryPointDecor = entryPoint->findDecoration<IREntryPointDecoration>(); + if (!entryPointDecor) + continue; + + if (entryPointDecor->getProfile().getStage() == Stage::Fragment) + { + fragmentShaders.add(entryPoint); + } + } + } + } + + IRBuilder builder(module); + for (auto entryPoint : fragmentShaders) + { + if (hasExplicitInterlockInst(entryPoint)) + continue; + auto firstBlock = entryPoint->getFirstBlock(); + if (!firstBlock) + continue; + builder.setInsertBefore(firstBlock->getFirstOrdinaryInst()); + builder.emitBeginFragmentShaderInterlock(); + for (auto block : entryPoint->getBlocks()) + { + if (auto inst = block->getTerminator()) + { + if (inst->getOp() == kIROp_Return || inst->getOp() == kIROp_discard) + { + builder.setInsertBefore(inst); + builder.emitEndFragmentShaderInterlock(); + } + } + } + } +} + void legalizeIRForSPIRV( SPIRVEmitSharedContext* context, IRModule* module, @@ -2299,6 +2394,7 @@ void legalizeIRForSPIRV( legalizeSPIRV(context, module); simplifyIRForSpirvLegalization(context->m_targetProgram, codeGenContext->getSink(), module); buildEntryPointReferenceGraph(context->m_referencingEntryPoints, module); + insertFragmentShaderInterlock(context, module); } } // namespace Slang diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index c0bec9654..cc095a0cb 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -2665,6 +2665,16 @@ namespace Slang return (IRBasicType*)getType(kIROp_UInt64Type); } + IRBasicType* IRBuilder::getUInt16Type() + { + return (IRBasicType*)getType(kIROp_UInt16Type); + } + + IRBasicType* IRBuilder::getUInt8Type() + { + return (IRBasicType*)getType(kIROp_UInt8Type); + } + IRBasicType* IRBuilder::getCharType() { return (IRBasicType*)getType(kIROp_CharType); diff --git a/tests/hlsl/raster-order-resource.slang b/tests/hlsl/raster-order-resource.slang new file mode 100644 index 000000000..2db9f31dc --- /dev/null +++ b/tests/hlsl/raster-order-resource.slang @@ -0,0 +1,50 @@ +//TEST:SIMPLE(filecheck=SPIRV): -target spirv -fvk-use-entrypoint-name +//TEST:SIMPLE(filecheck=METAL): -target metal +//TEST:SIMPLE(filecheck=METALASM): -target metallib + + +// METAL-DAG: {{.*}}{{\[\[}}raster_order_group(0){{\]\]}} {{\[\[}}buffer(0) +// METAL-DAG: {{.*}}{{\[\[}}raster_order_group(0){{\]\]}} {{\[\[}}texture(0) +// METAL-DAG: {{.*}}{{\[\[}}raster_order_group(0){{\]\]}} {{\[\[}}buffer(1) + +// METALASM: @fragMain + +RasterizerOrderedByteAddressBuffer buffer; + +[shader("fragment")] +float4 fragMain() : SV_Target +{ + // SPIRV: %fragMain_0 = OpFunction + // SPIRV: OpBeginInvocationInterlockEXT + // SPIRV: OpEndInvocationInterlockEXT + + buffer.Store(0, 0x12345678); + return float4(1, 0, 0, 1); +} + +RasterizerOrderedTexture2D tex; + +[shader("fragment")] +float4 fragMain2() : SV_Target +{ + // SPIRV: %fragMain2_0 = OpFunction + // SPIRV: OpBeginInvocationInterlockEXT + // SPIRV: OpEndInvocationInterlockEXT + + tex[uint2(0, 0)] = float4(1, 0, 0, 1); + return float4(1, 0, 0, 1); +} + +RasterizerOrderedStructuredBuffer<float> buffer2; + +[shader("fragment")] +float4 fragMain3() : SV_Target +{ + // SPIRV: %fragMain3_0 = OpFunction + // SPIRV: OpBeginInvocationInterlockEXT + // SPIRV: OpEndInvocationInterlockEXT + + buffer2[0] = 1; + return float4(1, 0, 0, 1); +} + diff --git a/tests/metal/byte-address-buffer.slang b/tests/metal/byte-address-buffer.slang new file mode 100644 index 000000000..f3169ebf4 --- /dev/null +++ b/tests/metal/byte-address-buffer.slang @@ -0,0 +1,30 @@ +//TEST:SIMPLE(filecheck=CHECK): -target metal +//TEST:SIMPLE(filecheck=CHECK-ASM): -target metallib + +uniform RWStructuredBuffer<float> outputBuffer; + +RWByteAddressBuffer buffer; + +// CHECK-ASM: define void @main_kernel + +struct TestStruct +{ + uint8_t a; + float16_t h; + float b; + float4 c; + float4x3 d; +} + +[numthreads(1,1,1)] +void main_kernel(uint3 tid: SV_DispatchThreadID) +{ + // CHECK: uint [[WORD0:[a-zA-Z0-9_]+]] = as_type<uint>({{.*}}[(int(0))>>2]); + // CHECK: uint8_t [[A:[a-zA-Z0-9_]+]] = uint8_t([[WORD0]] >> int(0) & 255U); + // CHECK: uint [[WORD1:[a-zA-Z0-9_]+]] = as_type<uint>({{.*}}[(int(0))>>2]); + // CHECK: half [[H:[a-zA-Z0-9_]+]] = as_type<half>(uint16_t([[WORD1]] >> int(16) & 65535U)); + + // CHECK: {{.*}}[(int(128))>>2] = as_type<uint32_t>(({{.*}} & 4294967040U) | uint([[A]]) << int(0)); + // CHECK: {{.*}}[(int(128))>>2] = as_type<uint32_t>(({{.*}} & 65535U) | uint(as_type<uint16_t>([[H]])) << int(16)); + buffer.Store(128, buffer.Load<TestStruct>(0)); +} |
