From 9fd74379c22af14f794d48fdc22e772d47f61ca3 Mon Sep 17 00:00:00 2001 From: ArielG-NV <159081215+ArielG-NV@users.noreply.github.com> Date: Wed, 13 Mar 2024 15:03:16 -0400 Subject: Implement glsl atomic's [non image or memory scope] with optional extension(s); resolves #3587 for GLSL & SPIR-V targets (#3755) The following commit implements atomic operations & types associated with OpenGL 4.6, GL_EXT_vulkan_glsl_relaxed, GLSL_EXT_shader_atomic_float, GLSL_EXT_shader_atomic_float2, for GLSL & SPIR-V targets. Fully implements all functions, and built-in type's, resolves https://github.com/shader-slang/slang/issues/3560 for GLSL & SPRI-V targets. [Atomic extensions for GLSL can be found here](https://github.com/KhronosGroup/GLSL/tree/main) Notes of worth: * atomic_uint is well defined in GLSL->OpenGL, although was removed in GLSL->VK unless a compiler extension is supported (GL_EXT_vulkan_glsl_relaxed). This support entails transforming all atomic_uint operations and references into a storage buffer. SPIR-V has AtomicCounter+AtomicStorage (atomic_uint parallel) but does not implement these capabilities for SPIR-V->VK in any scenario. Due to the case we transform atomic_uint ourselves (GLSL_Syntax->Slang_IR) to accommodate transforming atomic_uint into valid syntax. * GLSL_EXT_shader_atomic_float2 (all float16_t & some float/double operations) support is minimal and worth watching out for if enabling the tests. --- source/slang/glsl.meta.slang | 603 ++++++++++++++++++++- source/slang/slang-ast-modifier.h | 8 + source/slang/slang-ast-type.h | 5 + source/slang/slang-check-modifier.cpp | 2 + source/slang/slang-diagnostic-defs.h | 1 + source/slang/slang-emit-c-like.cpp | 4 + source/slang/slang-ir-inst-defs.h | 3 + source/slang/slang-ir-insts.h | 12 + source/slang/slang-ir-link.cpp | 138 +++++ source/slang/slang-ir-spirv-legalize.cpp | 1 + source/slang/slang-ir-util.cpp | 3 + source/slang/slang-ir.cpp | 12 + source/slang/slang-lower-to-ir.cpp | 5 + source/slang/slang-parameter-binding.cpp | 6 + source/slang/slang-parser.cpp | 81 ++- source/slang/slang-reflection-api.cpp | 4 + source/slang/slang-type-layout.cpp | 8 + source/slang/slang-type-layout.h | 2 + tests/glsl-intrinsic/atomic/atomicCounter.slang | 132 +++++ .../atomic/atomicCounterTestMultiple.slang | 33 ++ tests/glsl-intrinsic/atomic/atomicErrorTest1.slang | 13 + tests/glsl-intrinsic/atomic/atomicErrorTest2.slang | 13 + tests/glsl-intrinsic/atomic/atomicErrorTest3.slang | 13 + tests/glsl-intrinsic/atomic/atomicErrorTest4.slang | 13 + .../atomic/atomicStorageBuffer.slang | 381 +++++++++++++ tools/gfx/vulkan/vk-api.h | 36 +- tools/gfx/vulkan/vk-device.cpp | 17 +- 27 files changed, 1533 insertions(+), 16 deletions(-) create mode 100644 tests/glsl-intrinsic/atomic/atomicCounter.slang create mode 100644 tests/glsl-intrinsic/atomic/atomicCounterTestMultiple.slang create mode 100644 tests/glsl-intrinsic/atomic/atomicErrorTest1.slang create mode 100644 tests/glsl-intrinsic/atomic/atomicErrorTest2.slang create mode 100644 tests/glsl-intrinsic/atomic/atomicErrorTest3.slang create mode 100644 tests/glsl-intrinsic/atomic/atomicErrorTest4.slang create mode 100644 tests/glsl-intrinsic/atomic/atomicStorageBuffer.slang diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang index 824b3e3f3..d07233583 100644 --- a/source/slang/glsl.meta.slang +++ b/source/slang/glsl.meta.slang @@ -4615,4 +4615,605 @@ __spirv_version(1.3) [require(spirv)] { shader_subgroup_preamble(); return QuadReadAcrossDiagonal(value); -} \ No newline at end of file +} + +//// GLSL atomic + +// The following type internally is a Shader Storage Buffer +// as per GL_EXT_vulkan_glsl_relaxed +__magic_type(GLSLAtomicUintType) +__intrinsic_type($(kIROp_GLSLAtomicUintType)) +public struct atomic_uint +{ +}; + +// tier of float refers to atomic extension support of float1 or float2. +// if we are inside a atomic_float function we will run the check for float1 tier +// types and operations to enable the according ext needed for these operations + +__generic +[ForceInline] void typeRequireChecks_atomic_using_float0_tier() +{ + __target_switch + { + case glsl: + { + if (__type_equals() || __type_equals()) + __requireGLSLExtension("GL_EXT_shader_atomic_int64"); + } + case spirv: + return; + } +} +__generic +[ForceInline] void typeRequireChecks_atomic_using_float1_tier() +{ + __target_switch + { + case glsl: + { + if (__type_equals()) + __requireGLSLExtension("GL_EXT_shader_atomic_float"); + else if (__type_equals() || __type_equals()) + { + __requireGLSLExtension("GL_EXT_shader_atomic_float2"); + __requireGLSLExtension("GL_EXT_shader_explicit_arithmetic_types"); + } + else if (__type_equals()) + __requireGLSLExtension("GL_EXT_shader_atomic_float"); + else if (__type_equals() || __type_equals()) + __requireGLSLExtension("GL_EXT_shader_atomic_int64"); + } + case spirv: + return; + } +} +__generic +[ForceInline] void typeRequireChecks_atomic_using_float2_tier() +{ + __target_switch + { + case glsl: + { + if (__type_equals()) + __requireGLSLExtension("GL_EXT_shader_atomic_float2"); + else if (__type_equals() || __type_equals()) + { + __requireGLSLExtension("GL_EXT_shader_atomic_float2"); + __requireGLSLExtension("GL_EXT_shader_explicit_arithmetic_types"); + } + else if (__type_equals()) + __requireGLSLExtension("GL_EXT_shader_atomic_float2"); + else if (__type_equals() || __type_equals()) + __requireGLSLExtension("GL_EXT_shader_atomic_int64"); + } + case spirv: + return; + } +} + +__generic +void typeRequireChecks_atomic_using_add() +{ + __target_switch + { + case glsl: + return; + case spirv: + { + if (__type_equals()) + { + spirv_asm + { + OpExtension "SPV_EXT_shader_atomic_float_add"; + OpCapability AtomicFloat32AddEXT + }; + } + else if (__type_equals() + || __type_equals()) + { + spirv_asm + { + OpExtension "SPV_EXT_shader_atomic_float_add"; + OpCapability AtomicFloat16AddEXT + }; + } + else if (__type_equals()) + { + spirv_asm + { + OpExtension "SPV_EXT_shader_atomic_float_add"; + OpCapability AtomicFloat64AddEXT + }; + } + else if (__type_equals() + || __type_equals()) + { + spirv_asm + { + OpCapability Int64Atomics + }; + } + } + } +} +__generic +void typeRequireChecks_atomic_using_MinMax() +{ + __target_switch + { + case glsl: + return; + case spirv: + { + if (__type_equals()) + { + spirv_asm + { + OpExtension "SPV_EXT_shader_atomic_float_add"; + OpCapability AtomicFloat32MinMaxEXT + }; + } + else if (__type_equals() + || __type_equals()) + { + spirv_asm + { + OpExtension "SPV_EXT_shader_atomic_float_add"; + OpCapability AtomicFloat16MinMaxEXT + }; + } + else if (__type_equals()) + { + spirv_asm + { + OpExtension "SPV_EXT_shader_atomic_float_add"; + OpCapability AtomicFloat64MinMaxEXT + }; + } + else if (__type_equals() + || __type_equals()) + { + spirv_asm + { + OpCapability Int64Atomics + }; + } + } + } +} +__generic +[ForceInline] void typeRequireChecks_atomic_using_Logical_CAS() +{ + __target_switch + { + case glsl: + return; + case spirv: + { + if (__type_equals() + || __type_equals()) + { + spirv_asm + { + OpCapability Int64Atomics + }; + } + } + } +} + +${{{{ +static const struct { + const char* name; + const char* classType; + const char *subclassType; + const char *suffix; + const bool isFloat; +} atomics[] = + { + { + "uint", "I", "U", "", false + }, + { + "uint64_t", "I", "U", "", false + }, + { + "int", "I", "S", "", false + }, + { + "int64_t", "I", "S", "", false + }, + { + "float16_t", "F", "F", "EXT", true + }, + { + "float", "F", "F", "EXT", true + }, + { + "double", "F", "F", "EXT", true + }, + }; +for (const auto& item : atomics) +{ +}}}} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public $(item.name) atomicAdd(inout $(item.name) mem, $(item.name) data) +{ + typeRequireChecks_atomic_using_float1_tier<$(item.name)>(); + typeRequireChecks_atomic_using_add<$(item.name)>(); + __target_switch + { + case glsl: __intrinsic_asm "atomicAdd($0, $1)"; + case spirv: + return spirv_asm + { + OpAtomic$(item.classType)Add$(item.suffix) $$$(item.name) result &mem Device UniformMemory $data + }; + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public $(item.name) atomicMin(inout $(item.name) mem, $(item.name) data) +{ + typeRequireChecks_atomic_using_float2_tier<$(item.name)>(); + typeRequireChecks_atomic_using_MinMax<$(item.name)>(); + __target_switch + { + case glsl: __intrinsic_asm "atomicMin($0, $1)"; + case spirv: + return spirv_asm + { + OpAtomic$(item.subclassType)Min$(item.suffix) $$$(item.name) result &mem Device UniformMemory $data + }; + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public $(item.name) atomicMax(inout $(item.name) mem, $(item.name) data) +{ + typeRequireChecks_atomic_using_float2_tier<$(item.name)>(); + typeRequireChecks_atomic_using_MinMax<$(item.name)>(); + __target_switch + { + case glsl: __intrinsic_asm "atomicMax($0, $1)"; + case spirv: + return spirv_asm + { + OpAtomic$(item.subclassType)Max$(item.suffix) $$$(item.name) result &mem Device UniformMemory $data + }; + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public $(item.name) atomicExchange(inout $(item.name) mem, $(item.name) data) +{ + typeRequireChecks_atomic_using_float1_tier<$(item.name)>(); + __target_switch + { + case glsl: __intrinsic_asm "atomicExchange($0, $1)"; + case spirv: + return spirv_asm + { + OpAtomicExchange $$$(item.name) result &mem Device UniformMemory $data + }; + } +} + +${{{{ +if(item.isFloat) + continue; +}}}} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public $(item.name) atomicAnd(inout $(item.name) mem, $(item.name) data) +{ + typeRequireChecks_atomic_using_float0_tier<$(item.name)>(); + typeRequireChecks_atomic_using_Logical_CAS<$(item.name)>(); + __target_switch + { + case glsl: + { + __intrinsic_asm "atomicAnd($0, $1)"; + } + case spirv: + return spirv_asm + { + OpAtomicAnd $$$(item.name) result &mem Device UniformMemory $data + }; + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public $(item.name) atomicOr(inout $(item.name) mem, $(item.name) data) +{ + typeRequireChecks_atomic_using_float0_tier<$(item.name)>(); + typeRequireChecks_atomic_using_Logical_CAS<$(item.name)>(); + __target_switch + { + case glsl: __intrinsic_asm "atomicOr($0, $1)"; + case spirv: + return spirv_asm + { + OpAtomicOr $$$(item.name) result &mem Device UniformMemory $data + }; + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public $(item.name) atomicXor(inout $(item.name) mem, $(item.name) data) +{ + typeRequireChecks_atomic_using_float0_tier<$(item.name)>(); + typeRequireChecks_atomic_using_Logical_CAS<$(item.name)>(); + __target_switch + { + case glsl: __intrinsic_asm "atomicXor($0, $1)"; + case spirv: + return spirv_asm + { + OpAtomicXor $$$(item.name) result &mem Device UniformMemory $data + }; + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public $(item.name) atomicCompSwap(inout $(item.name) mem, $(item.name) compare, $(item.name) data) +{ + typeRequireChecks_atomic_using_float0_tier<$(item.name)>(); + typeRequireChecks_atomic_using_Logical_CAS<$(item.name)>(); + __target_switch + { + case glsl: __intrinsic_asm "atomicCompSwap($0, $1, $2)"; + case spirv: + return spirv_asm + { + result:$$$(item.name) = OpAtomicCompareExchange &mem Device None None $data $compare + }; + } +} + +${{{{ +} +}}}} + +// all atomic_uint functions are mangled at compile time, +// all types are converted into a field address of a 'uint' +// relative to the layout(offset) of the atomic_uint +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public uint atomicCounterIncrement(atomic_uint c) +{ + + __target_switch + { + case glsl: __intrinsic_asm "atomicAdd($0, 1)"; + case spirv: + { + return spirv_asm + { + OpAtomicIIncrement $$uint result $c Device UniformMemory + }; + } + } +} + +__glsl_version(430) [require(glsl)] +[ForceInline] public uint atomicCounterDecrement_GLSL_helper(atomic_uint c) +{ + __target_switch + { + case glsl: + { + __intrinsic_asm "atomicExchange($0,$0-1)"; + } + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public uint atomicCounter(atomic_uint c) +{ + __target_switch + { + case glsl: + { + __intrinsic_asm "($0)"; + } + case spirv: + { + return spirv_asm + { + OpLoad $$uint result $c + }; + } + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public uint atomicCounterDecrement(atomic_uint c) +{ + __target_switch + { + case glsl: + { + atomicCounterDecrement_GLSL_helper(c); + return atomicCounter(c); + } + case spirv: + { + // spirv OpAtomicIDecrement returns pre-sub-1, glsl returns the new value + // we want a discarded side effect and then return the new value + return spirv_asm + { + %discardedValue:$$uint = OpAtomicIDecrement $c Device UniformMemory; + OpLoad $$uint result $c + }; + } + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public uint atomicCounterAdd(atomic_uint c, uint data) +{ + __target_switch + { + case glsl: __intrinsic_asm "atomicAdd($0, $1)"; + case spirv: + { + return spirv_asm + { + OpAtomicIAdd $$uint result $c Device UniformMemory $data + }; + } + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public uint atomicCounterSubtract(atomic_uint c, uint data) +{ + __target_switch + { + case glsl: + { + __intrinsic_asm "atomicExchange($0,$0-$1)"; + } + case spirv: + { + return spirv_asm + { + OpAtomicISub $$uint result $c Device UniformMemory $data + }; + } + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public uint atomicCounterMin(atomic_uint c, uint data) +{ + __target_switch + { + case glsl: __intrinsic_asm "atomicMin($0, $1)"; + case spirv: + { + return spirv_asm + { + OpAtomicUMin $$uint result $c Device UniformMemory $data + }; + } + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public uint atomicCounterMax(atomic_uint c, uint data) +{ + __target_switch + { + case glsl: __intrinsic_asm "atomicMax($0, $1)"; + case spirv: + { + return spirv_asm + { + OpAtomicUMax $$uint result $c Device UniformMemory $data + }; + } + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public uint atomicCounterAnd(atomic_uint c, uint data) +{ + __target_switch + { + case glsl: __intrinsic_asm "atomicAnd($0, $1)"; + case spirv: + { + return spirv_asm + { + OpAtomicAnd $$uint result $c Device UniformMemory $data + }; + } + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public uint atomicCounterOr(atomic_uint c, uint data) +{ + __target_switch + { + case glsl: __intrinsic_asm "atomicOr($0, $1)"; + case spirv: + { + return spirv_asm + { + OpAtomicOr $$uint result $c Device UniformMemory $data + }; + } + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public uint atomicCounterXor(atomic_uint c, uint data) +{ + __target_switch + { + case glsl: __intrinsic_asm "atomicXor($0, $1)"; + case spirv: + { + return spirv_asm + { + OpAtomicXor $$uint result $c Device UniformMemory $data + }; + } + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public uint atomicCounterExchange(atomic_uint c, uint data) +{ + __target_switch + { + case glsl: __intrinsic_asm "atomicExchange($0, $1)"; + case spirv: + { + return spirv_asm + { + OpAtomicExchange $$uint result $c Device UniformMemory $data + }; + } + } +} + +__spirv_version(1.0)[require(spirv)] +__glsl_version(430) [require(glsl)] +[ForceInline] public uint atomicCounterCompSwap(atomic_uint c, uint compare, uint data) +{ + __target_switch + { + case glsl: __intrinsic_asm "atomicCompSwap($0, $1, $2)"; + case spirv: + { + return spirv_asm + { + OpAtomicCompareExchange $$uint result $c Device UniformMemory UniformMemory $data $compare + }; + } + } +} diff --git a/source/slang/slang-ast-modifier.h b/source/slang/slang-ast-modifier.h index ed8cbf514..49dc1b81f 100644 --- a/source/slang/slang-ast-modifier.h +++ b/source/slang/slang-ast-modifier.h @@ -737,6 +737,14 @@ class GLSLBindingAttribute : public Attribute int32_t set = 0; }; + +class GLSLOffsetLayoutAttribute : public Attribute +{ + SLANG_AST_CLASS(GLSLOffsetLayoutAttribute) + + int64_t offset; +}; + class GLSLSimpleIntegerLayoutAttribute : public Attribute { SLANG_AST_CLASS(GLSLSimpleIntegerLayoutAttribute) diff --git a/source/slang/slang-ast-type.h b/source/slang/slang-ast-type.h index d47e3a496..1e066fb2a 100644 --- a/source/slang/slang-ast-type.h +++ b/source/slang/slang-ast-type.h @@ -263,6 +263,11 @@ class HLSLConsumeStructuredBufferType : public HLSLStructuredBufferTypeBase SLANG_AST_CLASS(HLSLConsumeStructuredBufferType) }; +class GLSLAtomicUintType : public BuiltinType +{ + SLANG_AST_CLASS(GLSLAtomicUintType) +}; + class HLSLPatchType : public BuiltinType { SLANG_AST_CLASS(HLSLPatchType) diff --git a/source/slang/slang-check-modifier.cpp b/source/slang/slang-check-modifier.cpp index cf4bf3b02..6359096b9 100644 --- a/source/slang/slang-check-modifier.cpp +++ b/source/slang/slang-check-modifier.cpp @@ -988,6 +988,7 @@ namespace Slang case ASTNodeType::GLSLParsedLayoutModifier: case ASTNodeType::GLSLConstantIDLayoutModifier: case ASTNodeType::GLSLLocationLayoutModifier: + case ASTNodeType::GLSLOffsetLayoutAttribute: case ASTNodeType::GLSLUnparsedLayoutModifier: case ASTNodeType::GLSLLayoutModifierGroupMarker: case ASTNodeType::GLSLLayoutModifierGroupBegin: @@ -1063,6 +1064,7 @@ namespace Slang case ASTNodeType::GLSLParsedLayoutModifier: case ASTNodeType::GLSLConstantIDLayoutModifier: case ASTNodeType::GLSLLocationLayoutModifier: + case ASTNodeType::GLSLOffsetLayoutAttribute: case ASTNodeType::GLSLUnparsedLayoutModifier: case ASTNodeType::GLSLLayoutModifierGroupMarker: case ASTNodeType::GLSLLayoutModifierGroupBegin: diff --git a/source/slang/slang-diagnostic-defs.h b/source/slang/slang-diagnostic-defs.h index 54761e772..8df51211a 100644 --- a/source/slang/slang-diagnostic-defs.h +++ b/source/slang/slang-diagnostic-defs.h @@ -238,6 +238,7 @@ DIAGNOSTIC(20012, Error, invalidSPIRVVersion, "Expecting SPIR-V version as eithe DIAGNOSTIC(20013, Error, invalidCUDASMVersion, "Expecting CUDA SM version as either 'major.minor', or quoted if has patch (eg for '7.0' or \"7.0\"')") DIAGNOSTIC(20014, Error, classIsReservedKeyword, "'class' is a reserved keyword in this context; use 'struct' instead.") DIAGNOSTIC(20015, Error, unknownSPIRVCapability, "unknown SPIR-V capability '$0'.") +DIAGNOSTIC(20016, Error, missingLayoutBindingModifier, "Expecting 'binding' modifier in the layout qualifier here") DIAGNOSTIC(20101, Warning, unintendedEmptyStatement, "potentially unintended empty statement at this location; use {} instead.") diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index 451147dd0..44d74f219 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -2663,6 +2663,10 @@ void CLikeSourceEmitter::defaultEmitInstExpr(IRInst* inst, const EmitOpInfo& inO } break; } + case kIROp_RequireGLSLExtension: + { + break; //should already have set requirement; case covered for empty intrinsic block + } default: diagnoseUnhandledInst(inst); break; diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h index 26e0ebd69..977fca904 100644 --- a/source/slang/slang-ir-inst-defs.h +++ b/source/slang/slang-ir-inst-defs.h @@ -127,6 +127,8 @@ INST(Nop, nop, 0, 0) INST(ComPtrType, ComPtr, 1, HOISTABLE) // A NativePtr type represents a native pointer to a managed resource. INST(NativePtrType, NativePtr, 1, HOISTABLE) + // An AtomicUint is a placeholder type for a storage buffer, and will be mangled during compiling. + INST(GLSLAtomicUintType, GLSLAtomicUint, 0, HOISTABLE) /* SamplerStateTypeBase */ INST(SamplerStateType, SamplerState, 0, HOISTABLE) @@ -848,6 +850,7 @@ INST(HighLevelDeclDecoration, highLevelDecl, 1, 0) INST(GlobalOutputDecoration, output, 0, 0) INST(GlobalInputDecoration, output, 0, 0) INST(GLSLLocationDecoration, glslLocation, 1, 0) + INST(GLSLOffsetDecoration, glslOffset, 1, 0) INST(PayloadDecoration, payload, 0, 0) /* Mesh Shader outputs */ diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index 195bf577c..a85b279b8 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -370,6 +370,12 @@ struct IRGLSLLocationDecoration : IRDecoration IRIntLit* getLocation() { return cast(getOperand(0)); } }; +struct IRGLSLOffsetDecoration : IRDecoration +{ + IR_LEAF_ISA(GLSLOffsetDecoration) + IRIntLit* getOffset() { return cast(getOperand(0)); } +}; + struct IRNVAPIMagicDecoration : IRDecoration { enum { kOp = kIROp_NVAPIMagicDecoration }; @@ -3816,6 +3822,7 @@ public: // Create an initially empty `GLSLShaderStorageBufferType` type. IRGLSLShaderStorageBufferType* createGLSLShaderStorableBufferType(); + IRGLSLShaderStorageBufferType* createGLSLShaderStorableBufferType(UInt operandCount, IRInst* const* operands); // Create an empty `interface` type. IRInterfaceType* createInterfaceType(UInt operandCount, IRInst* const* operands); @@ -4431,6 +4438,11 @@ public: addDecoration(value, kIROp_DebugLocationDecoration, debugSource, getIntValue(getUIntType(), line), getIntValue(getUIntType(), col)); } + void addUnsafeForceInlineDecoration(IRInst* value) + { + addDecoration(value, kIROp_UnsafeForceInlineEarlyDecoration); + } + void addForceInlineDecoration(IRInst* value) { addDecoration(value, kIROp_ForceInlineDecoration); diff --git a/source/slang/slang-ir-link.cpp b/source/slang/slang-ir-link.cpp index 18cb850c0..a74c0c8f2 100644 --- a/source/slang/slang-ir-link.cpp +++ b/source/slang/slang-ir-link.cpp @@ -8,6 +8,7 @@ #include "slang-ir-string-hash.h" #include "slang-ir-autodiff.h" #include "slang-ir-specialize-target-switch.h" +#include "slang-ir-layout.h" #include "slang-module-library.h" #include "../core/slang-performance-profiler.h" @@ -1520,6 +1521,138 @@ static void diagnoseUnresolvedSymbols(TargetRequest* req, DiagnosticSink* sink, } } +void convertAtomicToStorageBuffer( + IRSpecContext* context, + Dictionary>& bindingToInstMapUnsorted) +{ + // Atomic_uint definitions needs to become a storage buffer to follow GL_EXT_vulkan_glsl_relaxed + // and to allow translation of atomic_uint into SPIRV + + IRBuilder builder = *context->builder; + + for (auto& bindingToInstList : bindingToInstMapUnsorted) + { + int64_t maxOffset = 0; + for (auto& i : bindingToInstList.second) + { + int64_t currOffset = int64_t(i->findDecoration()->getOffset()->getValue()); + maxOffset = (maxOffset < currOffset) ? currOffset : maxOffset; + } + auto instToSwitch = *bindingToInstList.second.begin(); + builder.setInsertBefore(instToSwitch); + + auto elementType = builder.getArrayType( + builder.getUIntType(), + builder.getIntValue(builder.getUIntType(), (maxOffset / sizeof(uint32_t))+1) + ); + + StringBuilder nameStruct; + nameStruct << "atomic_uints"; + nameStruct << bindingToInstList.first; + nameStruct << "_t"; + nameStruct << "_paramGroup"; + auto structType = builder.createStructType(); + builder.addNameHintDecoration(structType, nameStruct.produceString().getUnownedSlice()); + + auto elementBufferKey = builder.createStructKey(); + builder.addNameHintDecoration(elementBufferKey, UnownedStringSlice("_data")); + auto elementBufferType = elementType; + auto _dataField = builder.createStructField(structType, elementBufferKey, elementBufferType); + + auto std430 = builder._createInst(sizeof(IRTypeLayoutRules), builder.getType(kIROp_Std430BufferLayoutType), kIROp_Std430BufferLayoutType); + IRGLSLShaderStorageBufferType* storageBuffer; + { + IRInst* ops[] = { structType, std430 }; + storageBuffer = builder.createGLSLShaderStorableBufferType(2, ops); + } + + instToSwitch->setFullType(storageBuffer); + + // All references to a atomic_uint need to be an element ref. to emulate storage buffer usage + // All function calls must be inlined since storage buffers cannot pass as parameters to atomic methods + for (auto& i : bindingToInstList.second) + { + int64_t currOffset = int64_t(i->findDecoration()->getOffset()->getValue()); + + // we need a next node to be stored since the following code + // changes IRUse* of the use->user node, meaning we will lose + // our IRUse list of the atomic_uint being swapped out + IRUse* next = nullptr; + for (auto use = i->firstUse; use; use = next) + { + next = use->nextUse; + auto user = use->user; + + switch (user->getOp()) + { + case kIROp_StructFieldLayoutAttr: + { + // Definitions do nothing if unused + break; + } + case kIROp_Call: + { + builder.setInsertBefore(user); + auto fieldAddress = builder.emitFieldAddress( + builder.getPtrType(_dataField->getFieldType()), + instToSwitch, + _dataField->getKey() + ); + auto elementAddr = builder.emitElementAddress( + builder.getPtrType(builder.getUIntType()), + fieldAddress, + builder.getIntValue(builder.getIntType(), currOffset/4)); + + user->setOperand(1, elementAddr); + auto funcTypeInst = (user->getOperand(0)); + auto funcType = funcTypeInst->getFullType(); + + auto paramReplacment = builder.getInOutType(builder.getUIntType()); + funcType->getOperand(1)->replaceUsesWith(paramReplacment); + builder.addForceInlineDecoration(funcTypeInst); + + break; + } + } + } + if (i->typeUse.usedValue->getOp() == kIROp_GLSLAtomicUintType) + { + i->removeAndDeallocate(); + } + } + } +} + +void GLSLReplaceAtomicUint(IRSpecContext* context, TargetProgram* targetProgram, IRModule* irModule) +{ + if (!targetProgram->getOptionSet().getBoolOption(CompilerOptionName::AllowGLSL)) return; + + Dictionary> bindingToInstMapUnsorted; + for (auto inst : irModule->getGlobalInsts()) + { + if (inst->typeUse.usedValue) + { + switch (inst->typeUse.usedValue->getOp()) + { + case kIROp_GLSLAtomicUintType: + { + // atomic_uint are supported by GLSL->VK through converting to a different type (GL_EXT_vulkan_glsl_relaxed). + // atomic_uint are not supported by SPIR-V->VK; this means that to get SPIR-V to work we must convert the type ourselves + // to an equivlent representation (storage buffer); the added benifit is that then HLSL is possible to emit as a target as well + // since atomic_uint is not an HLSL concept, but storageBuffer->RWBuffer is and HLSL concept + auto layout = inst->findDecoration()->getLayout(); + auto layoutVal = as(layout->getOperand(1)); + assert(layoutVal != nullptr); + bindingToInstMapUnsorted.getOrAddValue(uint32_t(layoutVal->getOffset()), List()).add(inst); + break; + } + }; + } + } + + convertAtomicToStorageBuffer(context, bindingToInstMapUnsorted); +} + LinkedIR linkIR( CodeGenContext* codeGenContext) { @@ -1728,6 +1861,11 @@ LinkedIR linkIR( // definition. diagnoseUnresolvedSymbols(targetReq, codeGenContext->getSink(), state->irModule); + // type-use reformatter of GLSL types (only if compiler is set to AllowGLSL mode) + // which are not supported by SPIRV->Vulkan but is supported by GLSL->Vulkan through + // compiler magic tricks + GLSLReplaceAtomicUint(context, targetProgram, state->irModule); + // TODO: *technically* we should consider the case where // we have global variables with initializers, since // these should get run whether or not the entry point diff --git a/source/slang/slang-ir-spirv-legalize.cpp b/source/slang/slang-ir-spirv-legalize.cpp index 511c596a4..1feba361b 100644 --- a/source/slang/slang-ir-spirv-legalize.cpp +++ b/source/slang/slang-ir-spirv-legalize.cpp @@ -275,6 +275,7 @@ struct SPIRVLegalizationContext : public SourceEmitterBase switch (type->getOp()) { case kIROp_RaytracingAccelerationStructureType: + case kIROp_GLSLAtomicUintType: case kIROp_RayQueryType: return true; default: diff --git a/source/slang/slang-ir-util.cpp b/source/slang/slang-ir-util.cpp index 5b29d23a8..295fbd642 100644 --- a/source/slang/slang-ir-util.cpp +++ b/source/slang/slang-ir-util.cpp @@ -181,6 +181,7 @@ bool isValueType(IRInst* dataType) case kIROp_ArrayType: case kIROp_FuncType: case kIROp_RaytracingAccelerationStructureType: + case kIROp_GLSLAtomicUintType: return true; default: // Read-only resource handles are considered as Value type. @@ -507,6 +508,8 @@ void getTypeNameHint(StringBuilder& sb, IRInst* type) case kIROp_HLSLRasterizerOrderedByteAddressBufferType: sb << "RasterizerOrderedByteAddressBuffer"; break; + case kIROp_GLSLAtomicUintType: + sb << "AtomicCounter"; case kIROp_RaytracingAccelerationStructureType: sb << "RayTracingAccelerationStructure"; break; diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index 696e862d6..fdc10e774 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -4495,6 +4495,18 @@ namespace Slang return ssboType; } + IRGLSLShaderStorageBufferType* IRBuilder::createGLSLShaderStorableBufferType(UInt operandCount, IRInst* const* operands) + { + IRGLSLShaderStorageBufferType* ssboType = createInst( + this, + kIROp_GLSLShaderStorageBufferType, + getTypeKind(), + operandCount, + operands); + addGlobalValue(this, ssboType); + return ssboType; + } + IRInterfaceType* IRBuilder::createInterfaceType(UInt operandCount, IRInst* const* operands) { IRInterfaceType* interfaceType = createInst( diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index 566e5a878..6e6ba6255 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -2183,6 +2183,11 @@ void addVarDecorations( builder->addDecoration(inst, kIROp_GLSLLocationDecoration, builder->getIntValue(builder->getIntType(), stringToInt(glslLocationMod->valToken.getContent()))); } + else if (auto glslOffsetMod = as(mod)) + { + builder->addDecoration(inst, kIROp_GLSLOffsetDecoration, + builder->getIntValue(builder->getIntType(), glslOffsetMod->offset)); + } else if (auto hlslSemantic = as< HLSLSimpleSemantic>(mod)) { builder->addSemanticDecoration(inst, hlslSemantic->name.getContent()); diff --git a/source/slang/slang-parameter-binding.cpp b/source/slang/slang-parameter-binding.cpp index 267f23e6c..ebaa58adb 100644 --- a/source/slang/slang-parameter-binding.cpp +++ b/source/slang/slang-parameter-binding.cpp @@ -934,6 +934,12 @@ static void addExplicitParameterBinding( if (overlappedVarLayout) { + //legal if atomicUint + if(parameterInfo->varLayout->varDecl.getDecl()->getType()->astNodeType == ASTNodeType::GLSLAtomicUintType + && overlappedVarLayout->varDecl.getDecl()->getType()->astNodeType == ASTNodeType::GLSLAtomicUintType) + { + return; + } auto paramA = parameterInfo->varLayout->varDecl.getDecl(); auto paramB = overlappedVarLayout->varDecl.getDecl(); diff --git a/source/slang/slang-parser.cpp b/source/slang/slang-parser.cpp index 3315c786a..cc87a3daa 100644 --- a/source/slang/slang-parser.cpp +++ b/source/slang/slang-parser.cpp @@ -229,6 +229,29 @@ namespace Slang lastErrorLoc = loc; } } + + public: + void setBindingOffset(int binding, int64_t byteOffset) + { + bindingToByteOffset.set(binding, byteOffset); + } + int64_t getNextBindingOffset(int binding) + { + int64_t currentOffset; + if (bindingToByteOffset.addIfNotExists(binding, 0)) + currentOffset = 0; + else + currentOffset = bindingToByteOffset.getValue(binding) + sizeof(uint32_t); + + bindingToByteOffset.set( + binding, + currentOffset + sizeof(uint32_t) + ); + return currentOffset; + } + + private: + Dictionary bindingToByteOffset; }; // Forward Declarations @@ -4365,6 +4388,42 @@ namespace Slang return attrDecl; } + static void addSpecialGLSLModifiersBasedOnType( + Parser* parser, + Decl* decl, + Modifiers* modifiers) + { + auto varDeclBase = as(decl); + if (!varDeclBase) return; + auto declRefExpr = as(varDeclBase->type.exp); + if (!declRefExpr) return; + auto bindingMod = modifiers->findModifier(); + if (!bindingMod) return; + + // here is a problem; we link types into a literal in IR stage post parse + // but, order (top down) mattter when parsing atomic_uint offset + // more over, we can have patterns like: offset = 20, no offset [+4], offset = 16. + // Therefore we must parse all in order. The issue then is we will struggle to + // subsitute atomic_uint for storage buffers... + if (auto name = declRefExpr->name) + { + if (name->text.equals("atomic_uint")) + { + if (!modifiers->findModifier()) + { + const int64_t nextOffset = parser->getNextBindingOffset(bindingMod->binding); + GLSLOffsetLayoutAttribute* modifier = parser->astBuilder->create(); + modifier->keywordName = NULL; //no keyword name given + modifier->loc = bindingMod->loc; //has no location in file, set to parent binding + modifier->offset = nextOffset; + + Modifiers newModifier; + newModifier.first = modifier; + _addModifiers(decl, newModifier); + } + } + } + } // Finish up work on a declaration that was parsed static void CompleteDecl( Parser* parser, @@ -4403,6 +4462,10 @@ namespace Slang } else { + if (parser->options.allowGLSLInput) + { + addSpecialGLSLModifiersBasedOnType(parser, declToModify, &modifiers); + } _addModifiers(declToModify, modifiers); } @@ -7787,6 +7850,7 @@ namespace Slang CASE(std140, GLSLStd140Modifier) CASE(std430, GLSLStd430Modifier) CASE(scalar, GLSLScalarModifier) + CASE(offset, GLSLOffsetLayoutAttribute) CASE(location, GLSLLocationLayoutModifier) { modifier = parser->astBuilder->create(); @@ -7797,12 +7861,27 @@ namespace Slang modifier->keywordName = nameAndLoc.name; modifier->loc = nameAndLoc.loc; + // Special handling for GLSLLayoutModifier if (auto glslModifier = as(modifier)) { + // not all GLSLLayoutModifier subtypes have an OpAssign after if (AdvanceIf(parser, TokenType::OpAssign)) - { glslModifier->valToken = parser->ReadToken(TokenType::IntegerLiteral); + } + //Special handling for GLSLOffsetLayoutAttribute to add to the byte offset tracker at a binding location + else if (auto glslOffset = as(modifier)) + { + if (auto binding = listBuilder.find()) + { + // all GLSLOffsetLayoutAttribute have an OpAssign with value token + parser->ReadToken(TokenType::OpAssign); + glslOffset->offset = int64_t(getIntegerLiteralValue(parser->ReadToken(TokenType::IntegerLiteral))); + parser->setBindingOffset(binding->binding, glslOffset->offset); + } + else + { + parser->diagnose(modifier->loc, Diagnostics::missingLayoutBindingModifier); } } diff --git a/source/slang/slang-reflection-api.cpp b/source/slang/slang-reflection-api.cpp index d91dd5858..9b20e2933 100644 --- a/source/slang/slang-reflection-api.cpp +++ b/source/slang/slang-reflection-api.cpp @@ -1315,6 +1315,10 @@ namespace Slang return SLANG_BINDING_TYPE_MUTABLE_RAW_BUFFER; } } + else if (as(type)) + { + return SLANG_BINDING_TYPE_MUTABLE_RAW_BUFFER; + } else if( as(type) ) { // TODO Immutable buffers diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp index 3f49d62d2..f7efd4ee4 100644 --- a/source/slang/slang-type-layout.cpp +++ b/source/slang/slang-type-layout.cpp @@ -3858,6 +3858,14 @@ static TypeLayoutResult _createTypeLayout( type, rules); } + else if (auto atomicType = as(type)) + { + ShaderParameterKind kind = ShaderParameterKind::AtomicUint; + return createSimpleTypeLayout( + rules->GetObjectLayout(kind, context.objectLayoutOptions), + type, + rules); + } // TODO: need a better way to handle this stuff... #define CASE(TYPE, KIND) \ diff --git a/source/slang/slang-type-layout.h b/source/slang/slang-type-layout.h index c17f2ebb4..a12c310b9 100644 --- a/source/slang/slang-type-layout.h +++ b/source/slang/slang-type-layout.h @@ -944,6 +944,8 @@ enum class ShaderParameterKind RegisterSpace, AppendConsumeStructuredBuffer, + + AtomicUint, }; struct SimpleLayoutRulesImpl diff --git a/tests/glsl-intrinsic/atomic/atomicCounter.slang b/tests/glsl-intrinsic/atomic/atomicCounter.slang new file mode 100644 index 000000000..a3b938565 --- /dev/null +++ b/tests/glsl-intrinsic/atomic/atomicCounter.slang @@ -0,0 +1,132 @@ +//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl -DTARGET_GLSL +//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly -DTARGET_SPIRV +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly +#version 430 + +//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name=outputBuffer +buffer MyBlockName +{ + uint data[2]; +} outputBuffer; + +// CHECK_GLSL-DAG: void main( +// CHECK_SPV-DAG: OpEntryPoint + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0], stride=4):name=one +layout(binding = 1, offset = 12) uniform atomic_uint one; +bool testSetterAndGetter() +{ + return true + +// CHECK_GLSL-DAG: atomicExchange +// CHECK_SPV-DAG: OpAtomicExchange + && atomicCounterExchange(one, 1) == 0 + +// CHECK_GLSL-DAG: atomicExchange +// no idea how to check the spirv reliabley... + && atomicCounter(one) == 1 + && atomicCounterExchange(one, 5) == 1 + && atomicCounter(one) == 5 + ; +} + +bool counterAsParam(atomic_uint param) +{ + return true + && atomicCounterExchange(param, 5) != 100 + +// CHECK_GLSL-DAG: atomicAdd( +// CHECK_SPV-DAG: OpAtomicIAdd + && atomicCounterIncrement(param) == 5 + && atomicCounter(param) == 6 + ; +} + +// GLSL_CHECK-LABEL: bool testAtomicUint +// SPV_CHECK-LABEL: testAtomicUint +bool testAtomicUint() +{ +// ensure the code emits for `one` index into [3] for 12/4 +// CHECK_GLSL-DAG: {{.*}}_data_0[3]{{.*}} + return true + + && atomicCounterExchange(one, 5) != 100 +// CHECK_GLSL-DAG: atomicAdd( +// CHECK_SPV-DAG: OpAtomicIIncrement + && atomicCounterIncrement(one) == 5 + && atomicCounter(one) == 6 + + && atomicCounterExchange(one, 5) != 100 +// CHECK_GLSL-DAG: atomicExchange( +// CHECK_SPV-DAG: OpAtomicIDecrement + && atomicCounterDecrement(one) == 4 + + && atomicCounterExchange(one, 5) != 100 +// CHECK_GLSL-DAG: atomicAdd( + && atomicCounterAdd(one, 1) == 5 + && atomicCounter(one) == 6 + + && atomicCounterExchange(one, 5) != 100 +// CHECK_GLSL-DAG: atomicExchange +// CHECK_SPV-DAG: OpAtomicISub + && atomicCounterSubtract(one, 1) == 5 + && atomicCounter(one) == 4 + + && atomicCounterExchange(one, 5) != 100 +// CHECK_GLSL-DAG: atomicMin( +// CHECK_SPV-DAG: OpAtomicUMin + && atomicCounterMin(one, 1) == 5 + && atomicCounter(one) == 1 + + && atomicCounterExchange(one, 5) != 100 +// CHECK_GLSL-DAG: atomicMax( +// CHECK_SPV-DAG: OpAtomicUMax + && atomicCounterMax(one, 1) == 5 + && atomicCounter(one) == 5 + +// CHECK_GLSL-DAG: atomicAnd( +// CHECK_SPV: OpAtomicAnd + && atomicCounterExchange(one, 5) != 100 + && atomicCounterAnd(one, 2) == 5 + && atomicCounter(one) == 0 + +// CHECK_GLSL-DAG: atomicOr( +// CHECK_SPV-DAG: OpAtomicOr + && atomicCounterExchange(one, 5) != 100 + && atomicCounterOr(one, 8) == 5 + && atomicCounter(one) == 13 + +// CHECK_GLSL-DAG: atomicXor( +// CHECK_SPV-DAG: OpAtomicXor + && atomicCounterExchange(one, 5) != 100 + && atomicCounterXor(one, 4) == 5 + && atomicCounter(one) == 1 + +// CHECK_GLSL-DAG: atomicCompSwap( +// CHECK_SPV-DAG: OpAtomicCompareExchange + && atomicCounterExchange(one, 5) != 100 + && atomicCounterCompSwap(one, 5, 3) == 5 + && atomicCounter(one) == 3 + +// CHECK_GLSL-DAG: atomicCompSwap( +// CHECK_SPV-DAG: OpAtomicCompareExchange + && atomicCounterExchange(one, 5) != 100 + && atomicCounterCompSwap(one, 5, 3) == 5 + && atomicCounter(one) == 3 + + && counterAsParam(one); + ; +} + +void computeMain() +{ + outputBuffer.data[0] = true + && testSetterAndGetter() + ; + outputBuffer.data[1] = true + && testAtomicUint() + ; + // BUF: 1 + // BUF-NEXT: 1 +} diff --git a/tests/glsl-intrinsic/atomic/atomicCounterTestMultiple.slang b/tests/glsl-intrinsic/atomic/atomicCounterTestMultiple.slang new file mode 100644 index 000000000..ec296968c --- /dev/null +++ b/tests/glsl-intrinsic/atomic/atomicCounterTestMultiple.slang @@ -0,0 +1,33 @@ +//DIAGNOSTIC_TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl -DTARGET_GLSL +#version 430 + +//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer +buffer MyBlockName +{ + uint data[1]; +} outputBuffer; + +layout(binding = 1, offset = 12) uniform atomic_uint one; +layout(binding = 1) uniform atomic_uint two; +layout(binding = 1, offset = 4) uniform atomic_uint three; +layout(binding = 1) uniform atomic_uint four; +layout(binding = 2) uniform atomic_uint five; + +void computeMain() +{ + + outputBuffer.data[0] = true +// CHECK_GLSL: one_0._data_0[3] + && atomicCounter(one) == 0 +// CHECK_GLSL: one_0._data_0[4] + && atomicCounter(two) == 0 +// CHECK_GLSL: one_0._data_0[1] + && atomicCounter(three) == 0 +// CHECK_GLSL: one_0._data_0[2] + && atomicCounter(four) == 0 +// CHECK_GLSL: five_0._data_1[0] + && atomicCounter(five) == 0 + + ; + +} diff --git a/tests/glsl-intrinsic/atomic/atomicErrorTest1.slang b/tests/glsl-intrinsic/atomic/atomicErrorTest1.slang new file mode 100644 index 000000000..0512d598e --- /dev/null +++ b/tests/glsl-intrinsic/atomic/atomicErrorTest1.slang @@ -0,0 +1,13 @@ +//DIAGNOSTIC_TEST:SIMPLE(filecheck=CHECK): -allow-glsl -stage compute -entry computeMain -target glsl -DTARGET_GLSL +//DIAGNOSTIC_TEST:SIMPLE(filecheck=CHECK): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly -DTARGET_SPIRV +#version 430 + +// CHECK: error 20001 + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0], stride=4):name=one +layout(binding = 1, offset = ) uniform atomic_uint one; + +void computeMain() +{ + +} diff --git a/tests/glsl-intrinsic/atomic/atomicErrorTest2.slang b/tests/glsl-intrinsic/atomic/atomicErrorTest2.slang new file mode 100644 index 000000000..ca7d94e54 --- /dev/null +++ b/tests/glsl-intrinsic/atomic/atomicErrorTest2.slang @@ -0,0 +1,13 @@ +//DIAGNOSTIC_TEST:SIMPLE(filecheck=CHECK): -allow-glsl -stage compute -entry computeMain -target glsl -DTARGET_GLSL +//DIAGNOSTIC_TEST:SIMPLE(filecheck=CHECK): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly -DTARGET_SPIRV +#version 430 + +// CHECK: error 20001 + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0], stride=4):name=one +layout(binding 1) uniform atomic_uint one; + +void computeMain() +{ + +} diff --git a/tests/glsl-intrinsic/atomic/atomicErrorTest3.slang b/tests/glsl-intrinsic/atomic/atomicErrorTest3.slang new file mode 100644 index 000000000..b21d27f6d --- /dev/null +++ b/tests/glsl-intrinsic/atomic/atomicErrorTest3.slang @@ -0,0 +1,13 @@ +//DIAGNOSTIC_TEST:SIMPLE(filecheck=CHECK): -allow-glsl -stage compute -entry computeMain -target glsl -DTARGET_GLSL +//DIAGNOSTIC_TEST:SIMPLE(filecheck=CHECK): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly -DTARGET_SPIRV +#version 430 + +// CHECK: error 20016 + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0], stride=4):name=one +layout(offset ) uniform atomic_uint one; + +void computeMain() +{ + +} diff --git a/tests/glsl-intrinsic/atomic/atomicErrorTest4.slang b/tests/glsl-intrinsic/atomic/atomicErrorTest4.slang new file mode 100644 index 000000000..404e2338c --- /dev/null +++ b/tests/glsl-intrinsic/atomic/atomicErrorTest4.slang @@ -0,0 +1,13 @@ +//DIAGNOSTIC_TEST:SIMPLE(filecheck=CHECK): -allow-glsl -stage compute -entry computeMain -target glsl -DTARGET_GLSL +//DIAGNOSTIC_TEST:SIMPLE(filecheck=CHECK): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly -DTARGET_SPIRV +#version 430 + +// CHECK: error 20001 + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0], stride=4):name=one +layout(binding = 1, offset) uniform atomic_uint one; + +void computeMain() +{ + +} diff --git a/tests/glsl-intrinsic/atomic/atomicStorageBuffer.slang b/tests/glsl-intrinsic/atomic/atomicStorageBuffer.slang new file mode 100644 index 000000000..5e00e1ec8 --- /dev/null +++ b/tests/glsl-intrinsic/atomic/atomicStorageBuffer.slang @@ -0,0 +1,381 @@ +//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl -DTARGET_GLSL +//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv -emit-spirv-directly -DTARGET_SPIRV +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly +#version 430 + +// float2 is currently a very new extension; most hardware lacks +// this extension and will fail the test if attempting to use atomic_float2 +// operations +// #define TEST_when_shader_atomic_float2_is_available + +//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer +buffer MyBlockName +{ + uint data[1]; +} outputBuffer; + +//TEST_INPUT:ubuffer(data=[0], stride=4):name=int32Buffer +buffer MyBlockName1 +{ + int data[1]; +} int32Buffer; + +//TEST_INPUT:ubuffer(data=[0 0], stride=8):name=int64Buffer +buffer MyBlockName2 +{ + uint64_t data[1]; +} int64Buffer; + +//TEST_INPUT:ubuffer(data=[0], stride=4):name=uint32Buffer +buffer MyBlockName3 +{ + uint data[1]; +} uint32Buffer; + +//TEST_INPUT:ubuffer(data=[0 0], stride=8):name=uint64Buffer +buffer MyBlockName4 +{ + uint64_t data[1]; +} uint64Buffer; + +//TEST_INPUT:ubuffer(data=[0.0], stride=2):name=float16Buffer +buffer MyBlockName5 +{ + half data[1]; +} float16Buffer; + +//TEST_INPUT:ubuffer(data=[0.0], stride=2):name=float32Buffer +buffer MyBlockName6 +{ + float data[1]; +} float32Buffer; + +//TEST_INPUT:ubuffer(data=[0.0 0.0], stride=8):name=float64Buffer +buffer MyBlockName7 +{ + double data[1]; +} float64Buffer; + +// added to tests `out TYPE data` due to Slang bug +bool i32_init(int val, out int data) +{ + data = val; + return true; +} +bool i32_expect(int val) +{ + return int32Buffer.data[0] == val; +} +bool testAtomicInt32() +{ + return true + + && i32_init(5, int32Buffer.data[0]) + && atomicAdd(int32Buffer.data[0], 1) == 5 + && i32_expect(6) + + && i32_init(5, int32Buffer.data[0]) + && atomicMin(int32Buffer.data[0], 1) == 5 + && i32_expect(1) + + && i32_init(5, int32Buffer.data[0]) + && atomicMax(int32Buffer.data[0], 1) == 5 + && i32_expect(5) + + && i32_init(5, int32Buffer.data[0]) + && atomicExchange(int32Buffer.data[0], 2) == 5 + && i32_expect(2) + + && i32_init(5, int32Buffer.data[0]) + && atomicAnd(int32Buffer.data[0], 1) == 5 + && i32_expect(1) + + && i32_init(5, int32Buffer.data[0]) + && atomicOr(int32Buffer.data[0], 2) == 5 + && i32_expect(7) + + && i32_init(5, int32Buffer.data[0]) + && atomicXor(int32Buffer.data[0], 3) == 5 + && i32_expect(6) + + && i32_init(5, int32Buffer.data[0]) + && atomicCompSwap(int32Buffer.data[0], 5, 2) == 5 + && i32_expect(2) + + && i32_init(5, int32Buffer.data[0]) + && atomicCompSwap(int32Buffer.data[0], 4, 2) == 5 + && i32_expect(5) + ; +} + +bool i64_init(int64_t val, out int64_t data) +{ + data = val; + return true; +} +bool i64_expect(int64_t val) +{ + return int64Buffer.data[0] == val; +} +bool testAtomicInt64() +{ + return true + + && i64_init(5, int64Buffer.data[0]) + && atomicAdd(int64Buffer.data[0], 1) == 5 + && i64_expect(6) + + && i64_init(5, int64Buffer.data[0]) + && atomicMin(int64Buffer.data[0], 1) == 5 + && i64_expect(1) + + && i64_init(5, int64Buffer.data[0]) + && atomicMax(int64Buffer.data[0], 1) == 5 + && i64_expect(5) + + && i64_init(5, int64Buffer.data[0]) + && atomicExchange(int64Buffer.data[0], 2) == 5 + && i64_expect(2) + + && i64_init(5, int64Buffer.data[0]) + && atomicAnd(int64Buffer.data[0], 1) == 5 + && i64_expect(1) + + && i64_init(5, int64Buffer.data[0]) + && atomicOr(int64Buffer.data[0], 2) == 5 + && i64_expect(7) + + && i64_init(5, int64Buffer.data[0]) + && atomicXor(int64Buffer.data[0], 3) == 5 + && i64_expect(6) + + && i64_init(5, int64Buffer.data[0]) + && atomicCompSwap(int64Buffer.data[0], 5, 2) == 5 + && i64_expect(2) + + && i64_init(5, int64Buffer.data[0]) + && atomicCompSwap(int64Buffer.data[0], 4, 2) == 5 + && i64_expect(5) + ; +} + +bool u32_init(uint val, out uint data) +{ + data = val; + return true; +} +bool u32_expect(uint val) +{ + return uint32Buffer.data[0] == val; +} +bool testAtomicUint32() +{ + return true + + && u32_init(5, uint32Buffer.data[0]) + && atomicAdd(uint32Buffer.data[0], 1) == 5 + && u32_expect(6) + + && u32_init(5, uint32Buffer.data[0]) + && atomicMin(uint32Buffer.data[0], 1) == 5 + && u32_expect(1) + + && u32_init(5, uint32Buffer.data[0]) + && atomicMax(uint32Buffer.data[0], 1) == 5 + && u32_expect(5) + + && u32_init(5, uint32Buffer.data[0]) + && atomicExchange(uint32Buffer.data[0], 2) == 5 + && u32_expect(2) + + && u32_init(5, uint32Buffer.data[0]) + && atomicAnd(uint32Buffer.data[0], 1) == 5 + && u32_expect(1) + + && u32_init(5, uint32Buffer.data[0]) + && atomicOr(uint32Buffer.data[0], 2) == 5 + && u32_expect(7) + + && u32_init(5, uint32Buffer.data[0]) + && atomicXor(uint32Buffer.data[0], 3) == 5 + && u32_expect(6) + + && u32_init(5, uint32Buffer.data[0]) + && atomicCompSwap(uint32Buffer.data[0], 5, 2) == 5 + && u32_expect(2) + + && u32_init(5, uint32Buffer.data[0]) + && atomicCompSwap(uint32Buffer.data[0], 4, 2) == 5 + && u32_expect(5) + ; +} + +bool u64_init(uint64_t val, out uint64_t data) +{ + data = val; + return true; +} +bool u64_expect(uint64_t val) +{ + return uint64Buffer.data[0] == val; +} +bool testAtomicUint64() +{ + return true + + && u64_init(5, uint64Buffer.data[0]) + && atomicAdd(uint64Buffer.data[0], 1) == 5 + && u64_expect(6) + + && u64_init(5, uint64Buffer.data[0]) + && atomicMin(uint64Buffer.data[0], 1) == 5 + && u64_expect(1) + + && u64_init(5, uint64Buffer.data[0]) + && atomicMax(uint64Buffer.data[0], 1) == 5 + && u64_expect(5) + + && u64_init(5, uint64Buffer.data[0]) + && atomicExchange(uint64Buffer.data[0], 2) == 5 + && u64_expect(2) + + && u64_init(5, uint64Buffer.data[0]) + && atomicAnd(uint64Buffer.data[0], 1) == 5 + && u64_expect(1) + + && u64_init(5, uint64Buffer.data[0]) + && atomicOr(uint64Buffer.data[0], 2) == 5 + && u64_expect(7) + + && u64_init(5, uint64Buffer.data[0]) + && atomicXor(uint64Buffer.data[0], 3) == 5 + && u64_expect(6) + + && u64_init(5, uint64Buffer.data[0]) + && atomicCompSwap(uint64Buffer.data[0], 5, 2) == 5 + && u64_expect(2) + + && u64_init(5, uint64Buffer.data[0]) + && atomicCompSwap(uint64Buffer.data[0], 4, 2) == 5 + && u64_expect(5) + ; +} + +bool f16_init(half val, out half data) +{ + data = val; + return true; +} +bool f16_expect(half val) +{ + return float16Buffer.data[0] == val; +} +bool testAtomicFloat16() +{ + return true + +#ifdef TEST_when_shader_atomic_float2_is_available + && f16_init(5, float16Buffer.data[0]) + && atomicAdd(float16Buffer.data[0], half(1)) == half(5) + && f16_expect(6) + + && f16_init(5, float16Buffer.data[0]) + && atomicMin(float16Buffer.data[0], half(1)) == half(5) + && f16_expect(1) + + && f16_init(5, float16Buffer.data[0]) + && atomicMax(float16Buffer.data[0], half(1)) == half(5) + && f16_expect(5) + + && f16_init(5, float16Buffer.data[0]) + && atomicExchange(float16Buffer.data[0], half(2)) == half(5) + && f16_expect(2) +#endif // TEST_when_shader_atomic_float2_is_available + ; +} + +bool f32_init(float val, out float data) +{ + data = val; + return true; +} +bool f32_expect(float val) +{ + return float32Buffer.data[0] == val; +} +bool testAtomicFloat32() +{ + return true + + && f32_init(5, float32Buffer.data[0]) + && atomicAdd(float32Buffer.data[0], float(1)) == float(5) + && f32_expect(6) + +#ifdef TEST_when_shader_atomic_float2_is_available + && f32_init(5, float32Buffer.data[0]) + && atomicMin(float32Buffer.data[0], float(1)) == float(5) + && f32_expect(1) + + && f32_init(5, float32Buffer.data[0]) + && atomicMax(float32Buffer.data[0], float(1)) == float(5) + && f32_expect(5) + + && f32_init(5, float32Buffer.data[0]) + && atomicExchange(float32Buffer.data[0], float(2)) == float(5) + && f32_expect(2) +#endif // TEST_when_shader_atomic_float2_is_available + ; +} + +bool f64_init(double val, out double data) +{ + data = val; + return true; +} +bool f64_expect(double val) +{ + return float64Buffer.data[0] == val; +} +bool testAtomicFloat64() +{ + return true + + && f64_init(5, float64Buffer.data[0]) + && atomicAdd(float64Buffer.data[0], double(1)) == double(5) + && f64_expect(6) + +#ifdef TEST_when_shader_atomic_float2_is_available + && f64_init(5, float64Buffer.data[0]) + && atomicMin(float64Buffer.data[0], double(1)) == double(5) + && f64_expect(1) + + && f64_init(5, float64Buffer.data[0]) + && atomicMax(float64Buffer.data[0], double(1)) == double(5) + && f64_expect(5) + + && f64_init(5, float64Buffer.data[0]) + && atomicExchange(float64Buffer.data[0], double(2)) == double(5) + && f64_expect(2) +#endif // TEST_when_shader_atomic_float2_is_available + ; +} + +layout(local_size_x = 1) in; +void computeMain() +{ + // testing has the following pattern in 3 lines per operation: + // set the value, operation on value, test the result + outputBuffer.data[0] = true + && testAtomicInt32() + && testAtomicInt64() + && testAtomicUint32() + && testAtomicUint64() + && testAtomicFloat16() + && testAtomicFloat32() + && testAtomicFloat64() + ; + // CHECK_GLSL: void main( + // CHECK_SPV: OpEntryPoint + // BUF: 1 +} diff --git a/tools/gfx/vulkan/vk-api.h b/tools/gfx/vulkan/vk-api.h index f7523eb7f..27a19acfb 100644 --- a/tools/gfx/vulkan/vk-api.h +++ b/tools/gfx/vulkan/vk-api.h @@ -241,32 +241,43 @@ struct VulkanExtendedFeatureProperties { // 16 bit storage features VkPhysicalDevice16BitStorageFeatures storage16BitFeatures = { - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR}; + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR + }; // Atomic Float features VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomicFloatFeatures = { - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT}; + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT + }; + VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT atomicFloat2Features = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT + }; // Extended dynamic state features VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extendedDynamicStateFeatures = { - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT}; + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT + }; // Acceleration structure features VkPhysicalDeviceAccelerationStructureFeaturesKHR accelerationStructureFeatures = { - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR}; + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR + }; // Ray tracing pipeline features VkPhysicalDeviceRayTracingPipelineFeaturesKHR rayTracingPipelineFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR }; // Ray query (inline ray-tracing) features VkPhysicalDeviceRayQueryFeaturesKHR rayQueryFeatures = { - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR}; + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR + }; // Inline uniform block features VkPhysicalDeviceInlineUniformBlockFeaturesEXT inlineUniformBlockFeatures = { - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT}; + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT + }; // Robustness2 features VkPhysicalDeviceRobustness2FeaturesEXT robustness2Features = { - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT}; + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT + }; VkPhysicalDeviceRayTracingInvocationReorderFeaturesNV rayTracingInvocationReorderFeatures = { - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_INVOCATION_REORDER_FEATURES_NV}; + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_INVOCATION_REORDER_FEATURES_NV + }; // Clock features VkPhysicalDeviceShaderClockFeaturesKHR clockFeatures = { @@ -280,15 +291,18 @@ struct VulkanExtendedFeatureProperties // Multiview features VkPhysicalDeviceMultiviewFeaturesKHR multiviewFeatures = { - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR }; + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR + }; // Fragment shading rate features VkPhysicalDeviceFragmentShadingRateFeaturesKHR fragmentShadingRateFeatures = { - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR }; + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR + }; // Vulkan 1.2 features. VkPhysicalDeviceVulkan12Features vulkan12Features = { - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES}; + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES + }; }; struct VulkanApi diff --git a/tools/gfx/vulkan/vk-device.cpp b/tools/gfx/vulkan/vk-device.cpp index 2a914b86b..1b046d8f2 100644 --- a/tools/gfx/vulkan/vk-device.cpp +++ b/tools/gfx/vulkan/vk-device.cpp @@ -476,13 +476,17 @@ Result DeviceImpl::initVulkanInstanceAndDevice( extendedFeatures.clockFeatures.pNext = deviceFeatures2.pNext; deviceFeatures2.pNext = &extendedFeatures.clockFeatures; - // Atomic Float + // Atomic Float // To detect atomic float we need // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkPhysicalDeviceShaderAtomicFloatFeaturesEXT.html extendedFeatures.atomicFloatFeatures.pNext = deviceFeatures2.pNext; deviceFeatures2.pNext = &extendedFeatures.atomicFloatFeatures; + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT.html + extendedFeatures.atomicFloat2Features.pNext = deviceFeatures2.pNext; + deviceFeatures2.pNext = &extendedFeatures.atomicFloat2Features; + // mesh shader features extendedFeatures.meshShaderFeatures.pNext = deviceFeatures2.pNext; deviceFeatures2.pNext = &extendedFeatures.meshShaderFeatures; @@ -543,7 +547,7 @@ Result DeviceImpl::initVulkanInstanceAndDevice( // SIMPLE_EXTENSION_FEATURE(struct, feature member name, extension // name, features...) will check for the presence of the boolean // feature member in struct and the availability of the extensions. If - // they are both present then the extensions are addded, the struct + // they are both present then the extensions are added, the struct // linked into the deviceCreateInfo chain and the features added to the // supported features list. #define SIMPLE_EXTENSION_FEATURE(s, m, e, ...) \ @@ -563,11 +567,18 @@ Result DeviceImpl::initVulkanInstanceAndDevice( SIMPLE_EXTENSION_FEATURE( extendedFeatures.atomicFloatFeatures, - shaderBufferFloat32AtomicAdd, + shaderBufferFloat32Atomics, VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, "atomic-float" ); + SIMPLE_EXTENSION_FEATURE( + extendedFeatures.atomicFloat2Features, + shaderBufferFloat16Atomics, + VK_EXT_SHADER_ATOMIC_FLOAT_2_EXTENSION_NAME, + "atomic-float-2" + ); + SIMPLE_EXTENSION_FEATURE( extendedFeatures.extendedDynamicStateFeatures, extendedDynamicState, -- cgit v1.2.3