diff options
Diffstat (limited to 'source')
| -rw-r--r-- | source/compiler-core/slang-gcc-compiler-util.cpp | 2 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 1249 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.cpp | 7 | ||||
| -rw-r--r-- | source/slang/slang-emit.cpp | 9 | ||||
| -rw-r--r-- | source/slang/slang-ir-inst-defs.h | 7 | ||||
| -rw-r--r-- | source/slang/slang-ir-legalize-extract-from-texture-access.cpp | 136 | ||||
| -rw-r--r-- | source/slang/slang-ir-legalize-extract-from-texture-access.h | 11 | ||||
| -rw-r--r-- | source/slang/slang-ir-legalize-image-subscript.cpp | 15 | ||||
| -rw-r--r-- | source/slang/slang-ir-legalize-is-texture-access.cpp | 84 | ||||
| -rw-r--r-- | source/slang/slang-ir-legalize-is-texture-access.h | 11 |
10 files changed, 839 insertions, 692 deletions
diff --git a/source/compiler-core/slang-gcc-compiler-util.cpp b/source/compiler-core/slang-gcc-compiler-util.cpp index 52d4a0c29..01f197875 100644 --- a/source/compiler-core/slang-gcc-compiler-util.cpp +++ b/source/compiler-core/slang-gcc-compiler-util.cpp @@ -476,7 +476,7 @@ static SlangResult _parseGCCFamilyLine(SliceAllocator& allocator, const UnownedS if (targetDesc.payload == ArtifactDesc::Payload::MetalAIR) { - cmdLine.addArg("-std=macos-metal2.3"); + cmdLine.addArg("-std=metal3.1"); } // Our generated code very often casts between dissimilar types with the diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 59a64a192..9fcd002a8 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -8785,789 +8785,678 @@ __intrinsic_op($(kIROp_MetalAtomicCast)) [require(metal)] T* __getMetalAtomicRef(__ref T x); -${{{{ -for (const char* fetchAndModify : {"add", "and", "max", "min", "or", "sub", "xor"}) -{ -}}}} - __generic<AtomicType, T> - [ForceInline] - [require(metal)] - void __metalInterlocked_$(fetchAndModify)(AtomicType dest, T value) - { - __intrinsic_asm "atomic_fetch_$(fetchAndModify)_explicit($0, $1, memory_order_relaxed)"; - } +// Checks if input is a ImageSubscript +__generic<T> +__intrinsic_op($(kIROp_IsTextureAccess)) +bool __isTextureAccess(__ref T x); - __generic<AtomicType, T> - [ForceInline] - [require(metal)] - void __metalInterlocked_$(fetchAndModify)(AtomicType dest, T value, out T original_value) - { - __intrinsic_asm "((*($2)) = (($[0])(atomic_fetch_$(fetchAndModify)_explicit($0, $1, memory_order_relaxed))))", T; - } -${{{{ -} // fetchAndModify -}}}} +// Checks if input is a texture of T type scalar +__generic<T> +__intrinsic_op($(kIROp_IsTextureScalarAccess)) +bool __isTextureScalarAccess(__ref T x); -__generic<AtomicType, T> -[ForceInline] -[require(metal)] -void __metalInterlocked_exchange(AtomicType dest, T value, out T original_value) -{ - __intrinsic_asm "((*($2)) = (($[0])(atomic_exchange_explicit($0, $1, memory_order_relaxed))))", T; -} +// Checks if input is a texture array +__generic<T> +__intrinsic_op($(kIROp_IsTextureArrayAccess)) +bool __isTextureArrayAccess(__ref T x); + +// Accepts an ImageSubscript +// Gets Texture used with ImageSubscript. +__generic<TextureAccess> +__intrinsic_op($(kIROp_ExtractTextureFromTextureAccess)) +TextureAccess* __extractTextureFromTextureAccess(__ref TextureAccess x); + +// Accepts an ImageSubscript +// Gets Coord from ImageSubscript. Swizzles out ArrayCoord if applicable +__generic<TextureAccess> +__intrinsic_op($(kIROp_ExtractCoordFromTextureAccess)) +uint __extractCoordFromTextureAccess(__ref TextureAccess x); + +// Accepts an ImageSubscript +// Gets ArrayCoord from ImageSubscript +__generic<TextureAccess> +__intrinsic_op($(kIROp_ExtractArrayCoordFromTextureAccess)) +uint __extractArrayCoordFromTextureAccess(__ref TextureAccess x); -__generic<AtomicType, T> -[ForceInline] -[require(metal)] -void __metalInterlocked_compare_exchange(AtomicType dest, __ref T compare_value, T value) +${{{{ +for (bool isArray : {false, true}) { - __intrinsic_asm "atomic_compare_exchange_weak_explicit($0, $1, $2, memory_order_relaxed, memory_order_relaxed)"; -} + StringBuilder coordBuilder; + StringBuilder coordFetchBuilder; + + StringBuilder threeParamsASMBuilder; + StringBuilder threeParamsOutputParamASMBuilder; + + StringBuilder fourParamsASMBuilder; -__generic<AtomicType, T> -[ForceInline] -[require(metal)] -void __metalInterlocked_compare_exchange(AtomicType dest, T compare_value, T value, out T original_value) -{ - __metalInterlocked_compare_exchange(dest, compare_value, value); - original_value = compare_value; -} + coordBuilder << "Coord coord"; + coordFetchBuilder << "coord"; + + threeParamsASMBuilder << "$1, $2"; -__glsl_version(430) -[ForceInline] -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedAdd(__ref int dest, int value) -{ - __target_switch + fourParamsASMBuilder << "$1, $2, $3"; + if(isArray) { - case hlsl: __intrinsic_asm "InterlockedAdd"; - case cuda: __intrinsic_asm "atomicAdd($0, $1)"; - case glsl: __intrinsic_asm "$atomicAdd($A, $1)"; - case metal: - __metalInterlocked_add(__getMetalAtomicRef(dest), value); - return; - case spirv: - spirv_asm - { - result:$$int = OpAtomicIAdd &dest Device None $value - }; + coordBuilder << ", uint arrayCoord"; + coordFetchBuilder << ", arrayCoord"; + threeParamsASMBuilder << ", $3"; + fourParamsASMBuilder << ", $4"; + threeParamsOutputParamASMBuilder << "$4"; } -} - -[ForceInline] -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedAdd(__ref uint dest, uint value) -{ - __target_switch + else { - case hlsl: __intrinsic_asm "InterlockedAdd"; - case cuda: __intrinsic_asm "atomicAdd((int*)$0, $1)"; - case glsl: __intrinsic_asm "$atomicAdd($A, $1)"; - case metal: - __metalInterlocked_add(__getMetalAtomicRef(dest), value); - return; - case spirv: - spirv_asm - { - result:$$uint = OpAtomicIAdd &dest Device None $value - }; + threeParamsOutputParamASMBuilder << "$3"; } -} + auto coordString = coordBuilder.toString(); + auto coordFetchString = coordFetchBuilder.toString(); + + auto threeParamsASMString = threeParamsASMBuilder.toString(); + auto threeParamsOutputParamASMString = threeParamsOutputParamASMBuilder.toString(); -[ForceInline] -void InterlockedAdd(__ref uint dest, int value) -{ - InterlockedAdd(dest, (uint)value); -} + auto fourParamsASMString = fourParamsASMBuilder.toString(); +}}}} -[ForceInline] -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedAdd(__ref int dest, int value, out int original_value) -{ - __target_switch +${{{{ + for (const char* atomicOperation : {"add", "and", "max", "min", "or", "sub", "xor"}) { - case hlsl: __intrinsic_asm "InterlockedAdd"; - case cuda: __intrinsic_asm "(*$2 = atomicAdd($0, $1))"; - case glsl: __intrinsic_asm "($2 = $atomicAdd($A, $1))"; - case metal: - __metalInterlocked_add(__getMetalAtomicRef(dest), value, original_value); - return; - case spirv: - spirv_asm +}}}} + __generic<TextureType, T, Coord> + [ForceInline] + [require(metal)] + vector<T, 4> __metalImageInterlocked_$(atomicOperation)(TextureType tex, $(coordString), vector<T, 4> value) { - %original:$$int = OpAtomicIAdd &dest Device None $value; - OpStore &original_value %original - }; - } -} + static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures"); + static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>, + "__metalImageInterlocked implementation only allows 'uint' coordinates"); + __intrinsic_asm "$0.atomic_fetch_$(atomicOperation)($(threeParamsASMString))"; + } -[ForceInline] -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedAdd(__ref uint dest, uint value, out uint original_value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedAdd"; - case cuda: __intrinsic_asm "(*$2 = (uint)atomicAdd((int*)$0, $1))"; - case glsl: __intrinsic_asm "($2 = $atomicAdd($A, $1))"; - case metal: - __metalInterlocked_add(__getMetalAtomicRef(dest), value, original_value); - return; - case spirv: - spirv_asm + __generic<TextureType, T, Coord> + [ForceInline] + [require(metal)] + void __metalImageInterlocked_$(atomicOperation)(TextureType tex, $(coordString), vector<T, 4> value, out T original_value) { - %original:$$uint = OpAtomicIAdd &dest Device None $value; - OpStore &original_value %original - }; - } -} + static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures"); + static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>, + "__metalImageInterlocked implementation only allows 'uint' coordinates"); + original_value = __metalImageInterlocked_$(atomicOperation)(tex, $(coordFetchString), value)[0]; + } +${{{{ + } // atomicOperation +}}}} -[ForceInline] -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedAdd(__ref int64_t dest, int64_t value) -{ - __target_switch + __generic<TextureType, T, Coord> + [ForceInline] + [require(metal)] + vector<T, 4> __metalImageInterlocked_exchange(TextureType tex, $(coordString), vector<T, 4> value) { - case hlsl: __intrinsic_asm "InterlockedAdd"; - case cuda: __intrinsic_asm "atomicAdd((uint64_t*)$0, $1)"; - case glsl: - __requireGLSLExtension("GL_EXT_shader_atomic_int64"); - __intrinsic_asm "$atomicAdd($A, $1)"; - case spirv: - spirv_asm - { - OpCapability Int64Atomics; - result:$$int64_t = OpAtomicIAdd &dest Device None $value; - }; + static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures"); + static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>, + "__metalImageInterlocked implementation only allows 'uint' coordinates"); + __intrinsic_asm "($0.atomic_exchange($(threeParamsASMString)))"; } -} - -[ForceInline] -void InterlockedAdd(__ref int64_t dest, int64_t value, out int64_t original_value) -{ - __target_switch + __generic<TextureType, T, Coord> + [ForceInline] + [require(metal)] + void __metalImageInterlocked_exchange(TextureType tex, $(coordString), vector<T, 4> value, out T original_value) { - case hlsl: __intrinsic_asm "InterlockedAdd"; - case cuda: __intrinsic_asm "atomicAdd((uint64_t*)$0, $1)"; - case glsl: - __requireGLSLExtension("GL_EXT_shader_atomic_int64"); - __intrinsic_asm "$atomicAdd($A, $1)"; - case spirv: - spirv_asm - { - OpCapability Int64Atomics; - %origin:$$int64_t = OpAtomicIAdd &dest Device None $value; - OpStore &original_value %origin - }; + static_assert(T is int || T is uint, "Metal atomic texture operations only allow 'int'/'uint' textures"); + static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>, + "__metalImageInterlocked implementation only allows 'uint' coordinates"); + original_value = __metalImageInterlocked_exchange(tex, $(coordFetchString), value)[0]; } -} -[ForceInline] -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedAdd(__ref uint64_t dest, uint64_t value) -{ - __target_switch + __generic<TextureType, T, Coord> + [ForceInline] + [require(metal)] + void __metalImageInterlocked_compare_exchange(TextureType tex, $(coordString), __ref vector<T, 4> compare_value, vector<T, 4> value) { - case hlsl: __intrinsic_asm "InterlockedAdd"; - case cuda: __intrinsic_asm "atomicAdd($0, $1)"; - case glsl: - __requireGLSLExtension("GL_EXT_shader_atomic_int64"); - __intrinsic_asm "$atomicAdd($A, $1)"; - case spirv: - spirv_asm - { - OpCapability Int64Atomics; - result:$$uint64_t = OpAtomicIAdd &dest Device None $value; - }; + static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures"); + static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>, + "__metalImageInterlocked implementation only allows 'uint' coordinates"); + __intrinsic_asm "($0.atomic_compare_exchange_weak($(fourParamsASMString)))"; } -} - -[ForceInline] -void InterlockedAdd(__ref uint64_t dest, uint64_t value, out uint64_t original_value) -{ - __target_switch + __generic<TextureType, T, Coord> + [ForceInline] + [require(metal)] + void __metalImageInterlocked_compare_exchange(TextureType tex, $(coordString), vector<T, 4> compare_value, vector<T, 4> value, out T original_value) { - case hlsl: __intrinsic_asm "InterlockedAdd"; - case cuda: __intrinsic_asm "atomicAdd($0, $1)"; - case glsl: - __requireGLSLExtension("GL_EXT_shader_atomic_int64"); - __intrinsic_asm "$atomicAdd($A, $1)"; - case spirv: - spirv_asm - { - OpCapability Int64Atomics; - %origin:$$uint64_t = OpAtomicIAdd &dest Device None $value; - OpStore &original_value %origin - }; + static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures"); + static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>, + "__metalImageInterlocked implementation only allows 'uint' coordinates"); + __metalImageInterlocked_compare_exchange(tex, $(coordFetchString), compare_value, value); + original_value = compare_value[0]; } -} +${{{{ +} // isArray +}}}} -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedAnd(__ref int dest, int value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedAnd"; - case cuda: __intrinsic_asm "atomicAnd($0, $1)"; - case glsl: __intrinsic_asm "$atomicAnd($A, $1)"; - case metal: - __metalInterlocked_and(__getMetalAtomicRef(dest), value); - return; - case spirv: - spirv_asm - { - result:$$int = OpAtomicAnd &dest Device None $value; - }; - } -} +${{{{ -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedAnd(__ref uint dest, uint value) +// Generated functions: + +// atomicAdd, InterlockedAdd, atomic_fetch_add_explicit, OpAtomicIAdd, OpAtomicFAddEXT +// __cudaInterlocked_add, __glslInterlocked_add, __hlslInterlocked_add, __metalInterlocked_add, __spirvInterlocked_add + +// atomicAnd, InterlockedAnd, atomic_fetch_and_explicit, OpAtomicAnd +// __cudaInterlocked_and, __glslInterlocked_and, __hlslInterlocked_and, __metalInterlocked_and, __spirvInterlocked_and + +// atomicMax, InterlockedMax, atomic_fetch_max_explicit, OpAtomicUMax, OpAtomicSMax, OpAtomicFMaxEXT +// __cudaInterlocked_max, __glslInterlocked_max, __hlslInterlocked_max, __metalInterlocked_max, __spirvInterlocked_max + +// atomicMin, InterlockedMin, atomic_fetch_min_explicit, OpAtomicUMin, OpAtomicSMin, OpAtomicFMinEXT +// __cudaInterlocked_min, __glslInterlocked_min, __hlslInterlocked_min, __metalInterlocked_min, __spirvInterlocked_min + +// atomicOr, InterlockedOr, atomic_fetch_or_explicit, OpAtomicOr +// __cudaInterlocked_or, __glslInterlocked_or, __hlslInterlocked_or, __metalInterlocked_or, __spirvInterlocked_or + +// atomicXor, InterlockedXor, atomic_fetch_xor_explicit, OpAtomicXor +// __cudaInterlocked_xor, __glslInterlocked_xor, __hlslInterlocked_xor, __metalInterlocked_xor, __spirvInterlocked_xor + +// atomicExchange, atomicExch, InterlockedExchange, atomic_exchange_explicit, OpAtomicExchange +// __cudaInterlocked_exchange, __glslInterlocked_exchange, __hlslInterlocked_exchange, __metalInterlocked_exchange, __spirvInterlocked_exchange + +struct InternalAtomicOperationInfo { - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedAnd"; - case cuda: __intrinsic_asm "atomicAnd((int*)$0, $1)"; - case glsl: __intrinsic_asm "$atomicAnd($A, $1)"; - case metal: - __metalInterlocked_and(__getMetalAtomicRef(dest), value); - return; - case spirv: - spirv_asm - { - result:$$uint = OpAtomicAnd &dest Device None $value; - }; - } -} + const char* slangSuffix; + const char* cudaSuffix; + const char* glslSuffix; + const char* hlslSuffix; + const char* metalSuffix; + const char* spirvFloatSuffix; + const char* spirvUIntSuffix; + const char* spirvIntSuffix; -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedAnd(__ref int dest, int value, out int original_value) + const char* assertExpr; +}; + +InternalAtomicOperationInfo internalAtomicOperationInfo[7] = { + { "add", "Add", "Add", "Add", "fetch_add", "FAddEXT", "IAdd", "IAdd", "true" }, + { "and", "And", "And", "And", "fetch_and", "And", "And", "And", "!__isFloat<T>()" }, + { "max", "Max", "Max", "Max", "fetch_max", "FMaxEXT", "UMax", "SMax", "true" }, + { "min", "Min", "Min", "Min", "fetch_min", "FMinEXT", "UMin", "SMin", "true" }, + { "or", "Or", "Or", "Or", "fetch_or", "Or", "Or", "Or", "!__isFloat<T>()" }, + { "xor", "Xor", "Xor", "Xor", "fetch_xor", "Xor", "Xor", "Xor", "!__isFloat<T>()" }, + { "exchange", "Exch", "Exchange", "Exchange", "exchange", "Exchange", "Exchange", "Exchange", "true" }, +}; + +for (InternalAtomicOperationInfo atomicOp : internalAtomicOperationInfo) { - __target_switch +}}}} + __generic<AtomicType, T> + [ForceInline] + [require(metal)] + void __metalInterlocked_$(atomicOp.slangSuffix)(AtomicType dest, T value) { - case hlsl: __intrinsic_asm "InterlockedAnd"; - case cuda: __intrinsic_asm "(*$2 = atomicAnd($0, $1))"; - case glsl: __intrinsic_asm "($2 = $atomicAnd($A, $1))"; - case metal: - __metalInterlocked_and(__getMetalAtomicRef(dest), value, original_value); - return; - case spirv: - spirv_asm - { - %original:$$int = OpAtomicAnd &dest Device None $value; - OpStore &original_value %original - }; + static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); + __intrinsic_asm "atomic_$(atomicOp.metalSuffix)_explicit($0, $1, memory_order_relaxed)"; } -} -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedAnd(__ref uint dest, uint value, out uint original_value) -{ - __target_switch + __generic<AtomicType, T> + [ForceInline] + [require(metal)] + void __metalInterlocked_$(atomicOp.slangSuffix)(AtomicType dest, T value, out T original_value) { - case hlsl: __intrinsic_asm "InterlockedAnd"; - case glsl: __intrinsic_asm "($2 = atomicAnd($0, $1))"; - case cuda: __intrinsic_asm "(*$2 = atomicAnd((int*)$0, $1))"; - case metal: - __metalInterlocked_and(__getMetalAtomicRef(dest), value, original_value); - return; - case spirv: - spirv_asm - { - %original:$$uint = OpAtomicAnd &dest Device None $value; - OpStore &original_value %original - }; + static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); + __intrinsic_asm "((*($2)) = (atomic_$(atomicOp.metalSuffix)_explicit($0, $1, memory_order_relaxed)))"; } -} -[ForceInline] -void InterlockedAnd(__ref uint64_t dest, uint64_t value) -{ - __target_switch + __generic<T> + [ForceInline] + [require(cuda)] + void __cudaInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value) { - case hlsl: __intrinsic_asm "InterlockedAnd"; + static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); + __intrinsic_asm "atomic$(atomicOp.cudaSuffix)((int*)$0, $1)"; } -} -[ForceInline] -void InterlockedAnd(__ref uint64_t dest, uint64_t value, out uint64_t original_value) -{ - __target_switch + __generic<T> + [ForceInline] + [require(cuda)] + void __cudaInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value) { - case hlsl: __intrinsic_asm "InterlockedAnd"; + static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); + __intrinsic_asm "(*$2 = atomic$(atomicOp.cudaSuffix)((int*)$0, $1))"; } -} -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value) -{ - __target_switch + __generic<T> + [ForceInline] + [require(glsl)] + void __glslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value) { - case hlsl: __intrinsic_asm "InterlockedCompareExchange"; - case glsl: __intrinsic_asm "($3 = $atomicCompSwap($A, $1, $2))"; - case cuda: __intrinsic_asm "(*$3 = atomicCAS($0, $1, $2))"; - case metal: - __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value); - return; - case spirv: - spirv_asm - { - %original:$$int = OpAtomicCompareExchange &dest Device None None $value $compare_value; - OpStore &original_value %original - }; + static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); + __intrinsic_asm "$atomic$(atomicOp.glslSuffix)($A, $1)"; } -} -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, out uint original_value) -{ - __target_switch + __generic<T> + [ForceInline] + [require(glsl)] + void __glslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value) { - case hlsl: __intrinsic_asm "InterlockedCompareExchange"; - case glsl: __intrinsic_asm "($3 = $atomicCompSwap($A, $1, $2))"; - case cuda: __intrinsic_asm "(*$3 = (uint)atomicCAS((int*)$0, $1, $2))"; - case metal: - __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value); - return; - case spirv: - spirv_asm - { - %original:$$uint = OpAtomicCompareExchange &dest Device None None $value $compare_value; - OpStore &original_value %original - }; + static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); + __intrinsic_asm "($2 = $atomic$(atomicOp.glslSuffix)($A, $1))"; } -} -[ForceInline] -void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value) -{ - __target_switch + __generic<T> + [ForceInline] + [require(hlsl)] + void __hlslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value) { - case hlsl: __intrinsic_asm "InterlockedCompareExchangeFloatBitwise"; - case metal: - __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value); - return; + static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); + __intrinsic_asm "Interlocked$(atomicOp.hlslSuffix)"; } -} -[ForceInline] -void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value, out float original_value) -{ - __target_switch + __generic<T> + [ForceInline] + [require(hlsl)] + void __hlslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value) { - case hlsl: __intrinsic_asm "InterlockedCompareExchangeFloatBitwise"; - case metal: - __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value); - return; + static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); + __intrinsic_asm "Interlocked$(atomicOp.hlslSuffix)"; } -} -${{{{ -for (const char* T : {"int64_t", "uint64_t"}) -{ -}}}} + __generic<T> [ForceInline] - void InterlockedCompareExchange(__ref $(T) dest, $(T) compare_value, $(T) value) + [require(spirv)] + void __spirvInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value) { - __target_switch + static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); + if (__isFloat<T>()) + { + spirv_asm + { + result:$$T = OpAtomic$(atomicOp.spirvFloatSuffix) &dest Device None $value + }; + } + else if (__isUnsignedInt<T>()) { - case hlsl: __intrinsic_asm "InterlockedCompareExchange"; + spirv_asm + { + result:$$T = OpAtomic$(atomicOp.spirvUIntSuffix) &dest Device None $value + }; + } + else if (__isInt<T>()) + { + spirv_asm + { + result:$$T = OpAtomic$(atomicOp.spirvIntSuffix) &dest Device None $value + }; } } + __generic<T> [ForceInline] - void InterlockedCompareExchange(__ref $(T) dest, $(T) compare_value, $(T) value, out $(T) original_value) + [require(spirv)] + void __spirvInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value) { - __target_switch + static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); + if (__isFloat<T>()) + { + spirv_asm + { + %original:$$T = OpAtomic$(atomicOp.spirvFloatSuffix) &dest Device None $value; + OpStore &original_value %original + }; + } + else if (__isUnsignedInt<T>()) + { + spirv_asm + { + %original:$$T = OpAtomic$(atomicOp.spirvUIntSuffix) &dest Device None $value; + OpStore &original_value %original + }; + } + else if (__isInt<T>()) { - case hlsl: __intrinsic_asm "InterlockedCompareExchange"; + spirv_asm + { + %original:$$T = OpAtomic$(atomicOp.spirvIntSuffix) &dest Device None $value; + OpStore &original_value %original + }; } } + ${{{{ -} // T +} // fetchAndModify }}}} -__glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedCompareStore(__ref int dest, int compare_value, int value) +__generic<AtomicType, T> +[ForceInline] +[require(metal)] +void __metalInterlocked_compare_exchange(AtomicType dest, __ref T compare_value, T value) { - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedCompareStore"; - case glsl: __intrinsic_asm "$atomicCompSwap($A, $1, $2)"; - case cuda: __intrinsic_asm "atomicCAS($0, $1, $2)"; - case spirv: - spirv_asm - { - result:$$int = OpAtomicCompareExchange &dest Device None None $value $compare_value; - }; - } + __intrinsic_asm "atomic_compare_exchange_weak_explicit($0, $1, $2, memory_order_relaxed, memory_order_relaxed)"; } -__glsl_version(430) -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value) +__generic<AtomicType, T> +[ForceInline] +[require(metal)] +void __metalInterlocked_compare_exchange(AtomicType dest, T compare_value, T value, out T original_value) { - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedCompareStore"; - case glsl: __intrinsic_asm "$atomicCompSwap($A, $1, $2)"; - case cuda: __intrinsic_asm "atomicCAS((int*)$0, $1, $2)"; - case spirv: - spirv_asm - { - result:$$uint = OpAtomicCompareExchange &dest Device None None $value $compare_value; - }; - } + __metalInterlocked_compare_exchange(dest, compare_value, value); + original_value = compare_value; } +__generic<T> +__glsl_version(430) [ForceInline] -void InterlockedCompareStoreFloatBitwise(__ref float dest, float compare_value, float value) +[require(cuda)] +void __cudaInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value) { - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedCompareStoreFloatBitwise"; - } + __intrinsic_asm "atomicCAS($0, $1, $2)"; } +__generic<T> [ForceInline] -void InterlockedCompareStore(__ref int64_t dest, int64_t compare_value, int64_t value); +[require(cuda)] +void __cudaInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value) { - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedCompareStore"; - } + __intrinsic_asm "*$3 = atomicCAS($0, $1, $2)"; } +__generic<T> [ForceInline] -void InterlockedCompareStore(__ref uint64_t dest, uint64_t compare_value, uint64_t value); +[require(glsl)] +void __glslInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value) { - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedCompareStore"; - } + __intrinsic_asm "$atomicCompSwap($A, $1, $2)"; } -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedExchange(__ref int dest, int value, out int original_value) +__generic<T> +[ForceInline] +[require(glsl)] +void __glslInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value) { - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedExchange"; - case glsl: __intrinsic_asm "($2 = $atomicExchange($A, $1))"; - case cuda: __intrinsic_asm "(*$2 = atomicExch($0, $1))"; - case metal: - __metalInterlocked_exchange(__getMetalAtomicRef(dest), value, original_value); - return; - case spirv: - spirv_asm - { - %r:$$int = OpAtomicExchange &dest Device None $value; - OpStore &original_value %r - }; - } + __intrinsic_asm "($3 = $atomicCompSwap($A, $1, $2))"; } -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedExchange(__ref uint dest, uint value, out uint original_value) +__generic<T> +[ForceInline] +[require(hlsl)] +void __hlslInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value) { - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedExchange"; - case glsl: __intrinsic_asm "($2 = $atomicExchange($A, $1))"; - case cuda: __intrinsic_asm "(*$2 = (uint)atomicExch((int*)$0, $1))"; - case metal: - __metalInterlocked_exchange(__getMetalAtomicRef(dest), value, original_value); - return; - case spirv: - spirv_asm - { - %r:$$uint = OpAtomicExchange &dest Device None $value; - OpStore &original_value %r - }; - } + __intrinsic_asm "InterlockedCompareExchange"; } +__generic<T> [ForceInline] -void InterlockedExchange(__ref float dest, float value) +[require(hlsl)] +void __hlslInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value) { - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedExchange"; - } + __intrinsic_asm "InterlockedCompareExchange"; } +__generic<T> [ForceInline] -void InterlockedExchange(__ref float dest, float value, out float original_value) +[require(spirv)] +void __spirvInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value) { - __target_switch + spirv_asm { - case hlsl: __intrinsic_asm "InterlockedExchange"; - case metal: - __metalInterlocked_exchange(__getMetalAtomicRef(dest), value, original_value); - return; - } + %result:$$T = OpAtomicCompareExchange &dest Device None None $value $compare_value; + }; } +__generic<T> [ForceInline] -void InterlockedExchange(__ref int64_t dest, int64_t value) +[require(spirv)] +void __spirvInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value) { - __target_switch + spirv_asm { - case hlsl: __intrinsic_asm "InterlockedExchange"; - } + %original:$$T = OpAtomicCompareExchange &dest Device None None $value $compare_value; + OpStore &original_value %original + }; } +__generic<T> [ForceInline] -void InterlockedExchange(__ref int64_t dest, int64_t value, out int64_t original_value) +[require(hlsl)] +void __hlslInterlocked_compare_exchange_float_bitwise(__ref T dest, T compare_value, T value) { - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedExchange"; - } + __intrinsic_asm "InterlockedCompareExchangeFloatBitwise"; } +__generic<T> [ForceInline] -void InterlockedExchange(__ref uint64_t dest, uint64_t value) +[require(hlsl)] +void __hlslInterlocked_compare_exchange_float_bitwise(__ref T dest, T compare_value, T value, out T original_value) { - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedExchange"; - } + __intrinsic_asm "InterlockedCompareExchangeFloatBitwise"; } -[ForceInline] -void InterlockedExchange(__ref uint64_t dest, uint64_t value, out uint64_t original_value) +${{{{ +// Generates code for: +// InterlockedAdd, InterlockedAnd, InterlockedOr, InterlockedXor, +// InterlockedMax, InterlockedMin, InterlockedExchange +struct SlangAtomicOperationInfo { - __target_switch + const char* slangCallSuffix; + const char* internalCallSuffix; +}; + +SlangAtomicOperationInfo slangAtomicOperationInfo[7] = { + { "Add", "add" }, + { "And", "and" }, + { "Or", "or" }, + { "Xor", "xor" }, + { "Max", "max" }, + { "Min", "min" }, + { "Exchange", "exchange" }, +}; + +for (SlangAtomicOperationInfo atomicOp : slangAtomicOperationInfo) +{ + for(const char* T : {"int", "uint"}) { - case hlsl: __intrinsic_asm "InterlockedExchange"; - } -} +}}}} +[ForceInline] __glsl_version(430) [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedMax(__ref int dest, int value) +void Interlocked$(atomicOp.slangCallSuffix)(__ref $(T) dest, $(T) value) { + static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); __target_switch { - case hlsl: __intrinsic_asm "InterlockedMax"; - case glsl: __intrinsic_asm "$atomicMax($A, $1)"; - case cuda: __intrinsic_asm "atomicMax($0, $1)"; + case hlsl: __hlslInterlocked_$(atomicOp.internalCallSuffix)(dest, value); + case cuda: __cudaInterlocked_$(atomicOp.internalCallSuffix)(dest, value); + case glsl: __glslInterlocked_$(atomicOp.internalCallSuffix)(dest, value); + case spirv: __spirvInterlocked_$(atomicOp.internalCallSuffix)(dest, value); case metal: - __metalInterlocked_max(__getMetalAtomicRef(dest), value); - return; - case spirv: - spirv_asm + if (__isTextureAccess(dest)) { - result:$$int = OpAtomicSMax &dest Device None $value; - }; + if(__isTextureArrayAccess(dest)) + { + __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest), + __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vector<$(T), 4>(value)); + } + else + { + __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest), + __extractCoordFromTextureAccess(dest), vector<$(T), 4>(value)); + } + } + else + { + __metalInterlocked_$(atomicOp.internalCallSuffix)(__getMetalAtomicRef(dest), value); + } + return; } } +[ForceInline] __glsl_version(430) [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedMax(__ref uint dest, uint value) +void Interlocked$(atomicOp.slangCallSuffix)(__ref $(T) dest, $(T) value, out $(T) original_value) { + static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to a scalar texture or non-texture"); __target_switch { - case hlsl: __intrinsic_asm "InterlockedMax"; - case glsl: __intrinsic_asm "$atomicMax($A, $1)"; - case cuda: __intrinsic_asm "atomicMax((int*)$0, $1)"; + case hlsl: __hlslInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value); + case cuda: __cudaInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value); + case glsl: __glslInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value); + case spirv: __spirvInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value); case metal: - __metalInterlocked_max(__getMetalAtomicRef(dest), value); + if (__isTextureAccess(dest)) + if(__isTextureArrayAccess(dest)) + { + __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest), + __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vector<$(T),4>(value), original_value); + } + else + { + __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest), + __extractCoordFromTextureAccess(dest), vector<$(T),4>(value), original_value); + } + else + __metalInterlocked_$(atomicOp.internalCallSuffix)(__getMetalAtomicRef(dest), value, original_value); return; - case spirv: - spirv_asm - { - result:$$uint = OpAtomicUMax &dest Device None $value; - }; } } -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedMax(__ref int dest, int value, out int original_value) +${{{{ + } // for(const char* T : {"int64_t", "uint64_t"}) +}}}} + +[ForceInline] +void Interlocked$(atomicOp.slangCallSuffix)(__ref uint dest, int value) +{ + Interlocked$(atomicOp.slangCallSuffix)(dest, (uint)value); +} + +${{{{ +} // for (SlangAtomicOperationInfo atomicOp : slangAtomicOperationInfo) +}}}} + +${{{{ +for(const char* T : {"int64_t", "uint64_t"}) { +}}}} +[ForceInline] +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +void InterlockedAdd(__ref $(T) dest, $(T) value) +{ __target_switch { - case hlsl: __intrinsic_asm "InterlockedMax"; - case glsl: __intrinsic_asm "($2 = $atomicMax($A, $1))"; - case cuda: __intrinsic_asm "(*$2 = atomicMax($0, $1))"; - case metal: - __metalInterlocked_max(__getMetalAtomicRef(dest), value, original_value); - return; + case hlsl: __hlslInterlocked_add(dest, value); + case cuda: __cudaInterlocked_add(dest, value); + case glsl: + __requireGLSLExtension("GL_EXT_shader_atomic_int64"); + __glslInterlocked_add(dest, value); case spirv: spirv_asm { - %v:$$int = OpAtomicSMax &dest Device None $value; - OpStore &original_value %v + OpCapability Int64Atomics; + result:$$$(T) = OpAtomicIAdd &dest Device None $value; }; } } -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedMax(__ref uint dest, uint value, out uint original_value) +[ForceInline] +void InterlockedAdd(__ref $(T) dest, $(T) value, out $(T) original_value) { __target_switch { - case hlsl: __intrinsic_asm "InterlockedMax"; - case glsl: __intrinsic_asm "($2 = $atomicMax($A, $1))"; - case cuda: __intrinsic_asm "(*$2 = (uint)atomicMax((int*)$0, $1))"; - case metal: - __metalInterlocked_max(__getMetalAtomicRef(dest), value, original_value); - return; + case hlsl: __hlslInterlocked_add(dest, value, original_value); + case cuda: __cudaInterlocked_add(dest, value, original_value); + case glsl: + __requireGLSLExtension("GL_EXT_shader_atomic_int64"); + __glslInterlocked_add(dest, value, original_value); case spirv: spirv_asm { - %v:$$uint = OpAtomicUMax &dest Device None $value; - OpStore &original_value %v + OpCapability Int64Atomics; + %origin:$$$(T) = OpAtomicIAdd &dest Device None $value; + OpStore &original_value %origin }; } } [ForceInline] -void InterlockedMax(__ref int64_t dest, int64_t value) +void InterlockedAnd(__ref $(T) dest, $(T) value) { __target_switch { - case hlsl: __intrinsic_asm "InterlockedMax"; + case hlsl: __hlslInterlocked_and(dest, value); } } [ForceInline] -void InterlockedMax(__ref int64_t dest, int64_t value, out int64_t original_value) +void InterlockedAnd(__ref $(T) dest, $(T) value, out $(T) original_value) { __target_switch { - case hlsl: __intrinsic_asm "InterlockedMax"; + case hlsl: __hlslInterlocked_and(dest, value, original_value); } } [ForceInline] -void InterlockedMax(__ref uint64_t dest, uint64_t value) +void InterlockedCompareExchange(__ref $(T) dest, $(T) compare_value, $(T) value) { __target_switch { - case hlsl: __intrinsic_asm "InterlockedMax"; + case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value); } } [ForceInline] -void InterlockedMax(__ref uint64_t dest, uint64_t value, out uint64_t original_value) +void InterlockedCompareExchange(__ref $(T) dest, $(T) compare_value, $(T) value, out $(T) original_value) { __target_switch { - case hlsl: __intrinsic_asm "InterlockedMax"; + case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value, original_value); } } -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedMin(__ref int dest, int value) +[ForceInline] +void InterlockedCompareStore(__ref $(T) dest, $(T) compare_value, $(T) value); { __target_switch { - case hlsl: __intrinsic_asm "InterlockedMin"; - case glsl: __intrinsic_asm "$atomicMin($A, $1)"; - case cuda: __intrinsic_asm "atomicMin($0, $1)"; - case metal: - __metalInterlocked_min(__getMetalAtomicRef(dest), value); - return; - case spirv: - spirv_asm - { - result:$$int = OpAtomicSMin &dest Device None $value; - }; + case hlsl: __intrinsic_asm "InterlockedCompareStore"; } } -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedMin(__ref uint dest, uint value) +[ForceInline] +void InterlockedExchange(__ref $(T) dest, $(T) value) { __target_switch { - case hlsl: __intrinsic_asm "InterlockedMin"; - case glsl: __intrinsic_asm "$atomicMin($A, $1)"; - case cuda: __intrinsic_asm "atomicMin((int*)$0, $1)"; - case metal: - __metalInterlocked_min(__getMetalAtomicRef(dest), value); - return; - case spirv: - spirv_asm - { - result:$$uint = OpAtomicUMin &dest Device None $value; - }; + case hlsl: __intrinsic_asm "InterlockedExchange"; } } -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedMin(__ref int dest, int value, out int original_value) +[ForceInline] +void InterlockedExchange(__ref $(T) dest, $(T) value, out $(T) original_value) { __target_switch { - case hlsl: __intrinsic_asm "InterlockedMin"; - case glsl: __intrinsic_asm "($2 = $atomicMin($A, $1))"; - case cuda: __intrinsic_asm "(*$2 = atomicMin($0, $1))"; - case metal: - __metalInterlocked_min(__getMetalAtomicRef(dest), value, original_value); - return; - case spirv: - spirv_asm - { - %v:$$int = OpAtomicSMin &dest Device None $value; - OpStore &original_value %v - }; + case hlsl: __intrinsic_asm "InterlockedExchange"; } } -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedMin(__ref uint dest, uint value, out uint original_value) +[ForceInline] +void InterlockedMax(__ref $(T) dest, $(T) value) { __target_switch { - case hlsl: __intrinsic_asm "InterlockedMin"; - case glsl: __intrinsic_asm "($2 = $atomicMin($A, $1))"; - case cuda: __intrinsic_asm "(*$2 = (uint)atomicMin((int*)$0, $1))"; - case metal: - __metalInterlocked_min(__getMetalAtomicRef(dest), value, original_value); - return; - case spirv: - spirv_asm - { - %v:$$uint = OpAtomicUMin &dest Device None $value; - OpStore &original_value %v - }; + case hlsl: __intrinsic_asm "InterlockedMax"; } } [ForceInline] -void InterlockedMin(__ref int64_t dest, int64_t value) +void InterlockedMax(__ref $(T) dest, $(T) value, out $(T) original_value) { __target_switch { - case hlsl: __intrinsic_asm "InterlockedMin"; + case hlsl: __intrinsic_asm "InterlockedMax"; } } [ForceInline] -void InterlockedMin(__ref int64_t dest, int64_t value, out int64_t original_value) +void InterlockedMin(__ref $(T) dest, $(T) value) { __target_switch { @@ -9576,7 +9465,7 @@ void InterlockedMin(__ref int64_t dest, int64_t value, out int64_t original_v } [ForceInline] -void InterlockedMin(__ref uint64_t dest, uint64_t value) +void InterlockedMin(__ref $(T) dest, $(T) value, out $(T) original_value) { __target_switch { @@ -9585,215 +9474,219 @@ void InterlockedMin(__ref uint64_t dest, uint64_t value) } [ForceInline] -void InterlockedMin(__ref uint64_t dest, uint64_t value, out uint64_t original_value) +void InterlockedOr(__ref $(T) dest, $(T) value) { __target_switch { - case hlsl: __intrinsic_asm "InterlockedMin"; + case hlsl: __intrinsic_asm "InterlockedOr"; } } -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedOr(__ref int dest, int value) +[ForceInline] +void InterlockedOr(__ref $(T) dest, $(T) value, out $(T) original_value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedOr"; - case cuda: __intrinsic_asm "atomicOr((int*)$0, $1)"; - case glsl: __intrinsic_asm "$atomicOr($A, $1)"; - case metal: - __metalInterlocked_or(__getMetalAtomicRef(dest), value); - return; - case spirv: - spirv_asm - { - result:$$int = OpAtomicOr &dest Device None $value; - }; } } -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedOr(__ref uint dest, uint value) +[ForceInline] +void InterlockedXor(__ref $(T) dest, $(T) value) { __target_switch { - case hlsl: __intrinsic_asm "InterlockedOr"; - case cuda: __intrinsic_asm "atomicOr((int*)$0, $1)"; - case glsl: __intrinsic_asm "$atomicOr($A, $1)"; - case metal: - __metalInterlocked_or(__getMetalAtomicRef(dest), value); - return; - case spirv: - spirv_asm - { - result:$$uint = OpAtomicOr &dest Device None $value; - }; + case hlsl: __intrinsic_asm "InterlockedXor"; } } -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedOr(__ref int dest, int value, out int original_value) +[ForceInline] +void InterlockedXor(__ref $(T) dest, $(T) value, out $(T) original_value) { - __target_switch + __target_switch { - case hlsl: __intrinsic_asm "InterlockedOr"; - case glsl: __intrinsic_asm "($2 = atomicOr($0, $1))"; - case cuda: __intrinsic_asm "(*$2 = atomicOr($0, $1))"; - case metal: - __metalInterlocked_or(__getMetalAtomicRef(dest), value, original_value); - return; - case spirv: - spirv_asm - { - %original:$$int = OpAtomicOr &dest Device None $value; - OpStore &original_value %original - }; + case hlsl: __intrinsic_asm "InterlockedXor"; } } +${{{{ +} // for(const char* T : {"int64_t", "uint64_t"}) +}}}} + +[ForceInline] __glsl_version(430) [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedOr(__ref uint dest, uint value, out uint original_value) +void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value) { - __target_switch + static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); + __target_switch { - case hlsl: __intrinsic_asm "InterlockedOr"; - case glsl: __intrinsic_asm "($2 = atomicOr($0, $1))"; - case cuda: __intrinsic_asm "(*$2 = atomicOr((int*)$0, $1))"; + case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value, original_value); + case glsl: __glslInterlocked_compare_exchange(dest, compare_value, value, original_value); + case cuda: __cudaInterlocked_compare_exchange(dest, compare_value, value, original_value); + case spirv: __spirvInterlocked_compare_exchange(dest, compare_value, value, original_value); case metal: - __metalInterlocked_or(__getMetalAtomicRef(dest), value, original_value); - return; - case spirv: - spirv_asm + if (__isTextureAccess(dest)) { - %original:$$uint = OpAtomicOr &dest Device None $value; - OpStore &original_value %original - }; + vector<int, 4> vec_compare_value = vector<int, 4>(compare_value); + if(__isTextureArrayAccess(dest)) + { + __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), + __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector<int, 4>(value), original_value); + } + else + { + __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), + __extractCoordFromTextureAccess(dest), vec_compare_value, vector<int, 4>(value), original_value); + } + } + else + { + __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value); + } + return; } } [ForceInline] -void InterlockedOr(__ref uint64_t dest, uint64_t value) +__glsl_version(430) +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] +void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, out uint original_value) { + static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); __target_switch { - case hlsl: __intrinsic_asm "InterlockedOr"; + case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value, original_value); + case cuda: __cudaInterlocked_compare_exchange(dest, compare_value, value, original_value); + case glsl: __glslInterlocked_compare_exchange(dest, compare_value, value, original_value); + case spirv: __spirvInterlocked_compare_exchange(dest, compare_value, value, original_value); + case metal: + if (__isTextureAccess(dest)) + { + vector<uint, 4> vec_compare_value = vector<uint, 4>(compare_value); + if(__isTextureArrayAccess(dest)) + { + __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), + __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector<uint, 4>(value), original_value); + } + else + { + __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), + __extractCoordFromTextureAccess(dest), vec_compare_value, vector<uint, 4>(value), original_value); + } + } + else + { + __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value); + } + return; } } [ForceInline] -void InterlockedOr(__ref uint64_t dest, uint64_t value, out uint64_t original_value) +void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value) { + static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); __target_switch { - case hlsl: __intrinsic_asm "InterlockedOr"; + case hlsl: __hlslInterlocked_compare_exchange_float_bitwise(dest, compare_value, value); + case metal: + static_assert(!__isTextureAccess(dest), "float atomic texture operations are disallowed with Metal target's"); + __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value); + return; } } -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedXor(__ref int dest, int value) +[ForceInline] +void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value, out float original_value) { + static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); __target_switch { - case hlsl: __intrinsic_asm "InterlockedXor"; - case cuda: __intrinsic_asm "atomicXor((int*)$0, $1)"; - case glsl: __intrinsic_asm "$atomicXor($A, $1)"; + case hlsl: __hlslInterlocked_compare_exchange_float_bitwise(dest, compare_value, value, original_value); case metal: - __metalInterlocked_xor(__getMetalAtomicRef(dest), value); + static_assert(!__isTextureAccess(dest), "float atomic texture operations are disallowed with Metal target's"); + __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value); return; - case spirv: - spirv_asm - { - result:$$int = OpAtomicXor &dest Device None $value; - }; } } +[ForceInline] __glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedXor(__ref uint dest, uint value) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +void InterlockedCompareStore(__ref int dest, int compare_value, int value) { __target_switch { - case hlsl: __intrinsic_asm "InterlockedXor"; - case cuda: __intrinsic_asm "atomicXor((int*)$0, $1)"; - case glsl: __intrinsic_asm "$atomicXor($A, $1)"; - case metal: - __metalInterlocked_xor(__getMetalAtomicRef(dest), value); - return; + case hlsl: __intrinsic_asm "InterlockedCompareStore"; + case glsl: __intrinsic_asm "$atomicCompSwap($A, $1, $2)"; + case cuda: __intrinsic_asm "atomicCAS($0, $1, $2)"; case spirv: spirv_asm { - result:$$uint = OpAtomicXor &dest Device None $value; + result:$$int = OpAtomicCompareExchange &dest Device None None $value $compare_value; }; } } +[ForceInline] __glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedXor(__ref int dest, int value, out int original_value) +[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)] +void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value) { - __target_switch + __target_switch { - case hlsl: __intrinsic_asm "InterlockedXor"; - case glsl: __intrinsic_asm "($2 = atomicXor($0, $1))"; - case cuda: __intrinsic_asm "(*$2 = atomicXor($0, $1))"; - case metal: - __metalInterlocked_xor(__getMetalAtomicRef(dest), value, original_value); - return; + case hlsl: __intrinsic_asm "InterlockedCompareStore"; + case glsl: __intrinsic_asm "$atomicCompSwap($A, $1, $2)"; + case cuda: __intrinsic_asm "atomicCAS((int*)$0, $1, $2)"; case spirv: spirv_asm { - %original:$$int = OpAtomicXor &dest Device None $value; - OpStore &original_value %original + result:$$uint = OpAtomicCompareExchange &dest Device None None $value $compare_value; }; } } -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)] -void InterlockedXor(__ref uint dest, uint value, out uint original_value) +[ForceInline] +void InterlockedCompareStoreFloatBitwise(__ref float dest, float compare_value, float value) { - __target_switch + __target_switch { - case hlsl: __intrinsic_asm "InterlockedXor"; - case glsl: __intrinsic_asm "($2 = atomicXor($0, $1))"; - case cuda: __intrinsic_asm "(*$2 = (uint)atomicXor((int*)$0, $1))"; - case metal: - __metalInterlocked_xor(__getMetalAtomicRef(dest), value, original_value); - return; - case spirv: - spirv_asm - { - %original:$$uint = OpAtomicXor &dest Device None $value; - OpStore &original_value %original - }; + case hlsl: __intrinsic_asm "InterlockedCompareStoreFloatBitwise"; } } + [ForceInline] -void InterlockedXor(__ref uint64_t dest, uint64_t value) +void InterlockedExchange(__ref float dest, float value) { + static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); __target_switch { - case hlsl: __intrinsic_asm "InterlockedXor"; + case hlsl: __hlslInterlocked_exchange(dest, value); + case metal: + static_assert(!__isTextureAccess(dest), "'float' atomic texture operations are disallowed with Metal target's"); + __metalInterlocked_exchange(__getMetalAtomicRef(dest), value); + return; } } [ForceInline] -void InterlockedXor(__ref uint64_t dest, uint64_t value, out uint64_t original_value) +void InterlockedExchange(__ref float dest, float value, out float original_value) { + static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); __target_switch { - case hlsl: __intrinsic_asm "InterlockedXor"; + case hlsl: __hlslInterlocked_exchange(dest, value, original_value); + case metal: + static_assert(!__isTextureAccess(dest), "'float' atomic texture operations are disallowed with Metal target's"); + __metalInterlocked_exchange(__getMetalAtomicRef(dest), value, original_value); + return; } } + // Is floating-point value finite? __generic<T : __BuiltinFloatingPointType> diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index 66ee12ca6..7ce2c7900 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -2566,10 +2566,15 @@ void CLikeSourceEmitter::defaultEmitInstExpr(IRInst* inst, const EmitOpInfo& inO emitOperand(inst->getOperand(1), rightSide(outerPrec, prec)); break; } + + case kIROp_ImageSubscript: + // We should have legalized ImageSubscript before emit for metal targets + if (isMetalTarget(this->getTargetReq())) + getSink()->diagnose(inst, Diagnostics::unimplemented, "kIROp_ImageSubscript is unimplemented for Metal, expected legalization beforehand"); + [[fallthrough]]; case kIROp_GetElement: case kIROp_MeshOutputRef: case kIROp_GetElementPtr: - case kIROp_ImageSubscript: // HACK: deal with translation of GLSL geometry shader input arrays. if(auto decoration = inst->getOperand(0)->findDecoration<IRGLSLOuterArrayDecoration>()) { diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index d8f0686d5..243dd65e8 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -50,7 +50,9 @@ #include "slang-ir-lower-l-value-cast.h" #include "slang-ir-lower-reinterpret.h" #include "slang-ir-loop-unroll.h" +#include "slang-ir-legalize-extract-from-texture-access.h" #include "slang-ir-legalize-image-subscript.h" +#include "slang-ir-legalize-is-texture-access.h" #include "slang-ir-legalize-vector-types.h" #include "slang-ir-metadata.h" #include "slang-ir-optix-entry-point-uniforms.h" @@ -907,6 +909,9 @@ Result linkAndOptimizeIR( legalizeVectorTypes(irModule, sink); + // Legalize `__isTextureAccess` and related. + legalizeIsTextureAccess(irModule); + // Once specialization and type legalization have been performed, // we should perform some of our basic optimization steps again, // to see if we can clean up any temporaries created by legalization. @@ -1154,9 +1159,13 @@ Result linkAndOptimizeIR( if(isD3DTarget(targetRequest)) legalizeNonStructParameterToStructForHLSL(irModule); + legalizeExtractFromTextureAccess(irModule); + // Legalize `ImageSubscript` loads. switch (target) { + case CodeGenTarget::MetalLibAssembly: + case CodeGenTarget::MetalLib: case CodeGenTarget::Metal: case CodeGenTarget::GLSL: case CodeGenTarget::SPIRV: diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h index 3132536e3..987486eae 100644 --- a/source/slang/slang-ir-inst-defs.h +++ b/source/slang/slang-ir-inst-defs.h @@ -687,6 +687,13 @@ INST(GetPerVertexInputArray, GetPerVertexInputArray, 1, 0) INST(ForceVarIntoStructTemporarily, ForceVarIntoStructTemporarily, 1, 0) INST(MetalAtomicCast, MetalAtomicCast, 1, 0) +INST(IsTextureAccess, IsTextureAccess, 1, 0) +INST(IsTextureScalarAccess, IsTextureScalarAccess, 1, 0) +INST(IsTextureArrayAccess, IsTextureArrayAccess, 1, 0) +INST(ExtractTextureFromTextureAccess, ExtractTextureFromTextureAccess, 1, 0) +INST(ExtractCoordFromTextureAccess, ExtractCoordFromTextureAccess, 1, 0) +INST(ExtractArrayCoordFromTextureAccess, ExtractArrayCoordFromTextureAccess, 1, 0) + INST(MakeArrayList, makeArrayList, 0, 0) INST(MakeTensorView, makeTensorView, 0, 0) INST(AllocateTorchTensor, allocTorchTensor, 0, 0) diff --git a/source/slang/slang-ir-legalize-extract-from-texture-access.cpp b/source/slang/slang-ir-legalize-extract-from-texture-access.cpp new file mode 100644 index 000000000..de1e244a8 --- /dev/null +++ b/source/slang/slang-ir-legalize-extract-from-texture-access.cpp @@ -0,0 +1,136 @@ +#include "slang-ir-legalize-extract-from-texture-access.h" + +#include "slang-ir.h" +#include "slang-ir-insts.h" +#include "slang-ir-util.h" +#include "slang-ir-clone.h" +#include "slang-ir-specialize-address-space.h" +#include "slang-parameter-binding.h" +#include "slang-ir-legalize-image-subscript.h" +#include "slang-ir-legalize-varying-params.h" +#include "slang-ir-simplify-cfg.h" + +namespace Slang +{ + void legalizeExtractTextureFromTextureAccess(IRBuilder& builder, IRInst* inst) + { + SLANG_ASSERT(inst); + + builder.setInsertBefore(inst); + IRImageSubscript* imageSubscript = as<IRImageSubscript>(getRootAddr(inst->getOperand(0))); + SLANG_ASSERT(imageSubscript); + SLANG_ASSERT(imageSubscript->getImage()); + inst->replaceUsesWith(imageSubscript->getImage()); + inst->removeAndDeallocate(); + // Ensure we are done processing the imageSubscript before we remove it + if (!imageSubscript->hasUses()) + imageSubscript->removeAndDeallocate(); + } + + void legalizeExtractArrayCoordFromTextureAccess(IRBuilder& builder, IRInst* inst) + { + SLANG_ASSERT(inst); + + builder.setInsertBefore(inst); + IRImageSubscript* imageSubscript = as<IRImageSubscript>(getRootAddr(inst->getOperand(0))); + SLANG_ASSERT(imageSubscript); + SLANG_ASSERT(imageSubscript->getImage()); + + auto image = as<IRTextureType>(imageSubscript->getImage()->getDataType()); + IRInst* coord = imageSubscript->getCoord(); + if(image->isArray()) + { + // Extract final element which is 'ArrayCoord' + IRVectorType* coordType = as<IRVectorType>(imageSubscript->getCoord()->getDataType()); + SLANG_ASSERT(coordType); + auto coordSize = getIRVectorElementSize(coordType); + + IRType* newArrayCoordType = coordType->getElementType(); + auto arrayCoordLocation = coordSize - 1; + List<UInt> swizzleIndicies = { (UInt)arrayCoordLocation }; + + coord = builder.emitSwizzle(newArrayCoordType, coord, 1, swizzleIndicies.getBuffer()); + } + else + coord = builder.getIntValue(builder.getUIntType(), 0); + + + inst->replaceUsesWith(coord); + inst->removeAndDeallocate(); + // Ensure we are done processing the imageSubscript completly before we remove it + if (!imageSubscript->hasUses()) + imageSubscript->removeAndDeallocate(); + } + + void legalizeExtractCoordFromTextureAccess(IRBuilder& builder, IRInst* inst) + { + SLANG_ASSERT(inst); + + builder.setInsertBefore(inst); + IRImageSubscript* imageSubscript = as<IRImageSubscript>(getRootAddr(inst->getOperand(0))); + SLANG_ASSERT(imageSubscript); + SLANG_ASSERT(imageSubscript->getImage()); + + auto image = as<IRTextureType>(imageSubscript->getImage()->getDataType()); + IRInst* coord = imageSubscript->getCoord(); + if(image->isArray()) + { + // Extract all but final element which is 'ArrayCoord' + IRVectorType* coordType = as<IRVectorType>(imageSubscript->getCoord()->getDataType()); + auto coordSize = getIRVectorElementSize(coordType); + SLANG_ASSERT(coordType); + + IRType* newCoordType = nullptr; + auto newCoordSize = coordSize - 1; + if(newCoordSize != 1) + newCoordType = builder.getVectorType(coordType->getElementType(), newCoordSize); + else + newCoordType = coordType->getElementType(); + List<UInt> swizzleIndicies = {1, 2, 3, 4}; + + coord = builder.emitSwizzle(newCoordType, coord, newCoordSize, swizzleIndicies.getBuffer()); + } + + inst->replaceUsesWith(coord); + inst->removeAndDeallocate(); + // Ensure we are done processing the imageSubscript completly before we remove it + if (!imageSubscript->hasUses()) + imageSubscript->removeAndDeallocate(); + } + + void legalizeExtractFromTextureAccess(IRModule* module) + { + IRBuilder builder(module); + for (auto globalInst : module->getModuleInst()->getChildren()) + { + auto func = as<IRFunc>(globalInst); + if (!func) + continue; + for (auto block : func->getBlocks()) + { + auto inst = block->getFirstInst(); + IRInst* next; + for ( ; inst; inst = next) + { + next = inst->getNextInst(); + switch (inst->getOp()) + { + case kIROp_ExtractArrayCoordFromTextureAccess: + if (as<IRImageSubscript>(getRootAddr(inst->getOperand(0)))) + legalizeExtractArrayCoordFromTextureAccess(builder, inst); + continue; + case kIROp_ExtractCoordFromTextureAccess: + if (as<IRImageSubscript>(getRootAddr(inst->getOperand(0)))) + legalizeExtractCoordFromTextureAccess(builder, inst); + continue; + case kIROp_ExtractTextureFromTextureAccess: + if (as<IRImageSubscript>(getRootAddr(inst->getOperand(0)))) + legalizeExtractTextureFromTextureAccess(builder, inst); + continue; + } + } + } + } + } +} + diff --git a/source/slang/slang-ir-legalize-extract-from-texture-access.h b/source/slang/slang-ir-legalize-extract-from-texture-access.h new file mode 100644 index 000000000..016c86def --- /dev/null +++ b/source/slang/slang-ir-legalize-extract-from-texture-access.h @@ -0,0 +1,11 @@ +#pragma once + +#include "slang-ir.h" +#include "slang-compiler.h" + +namespace Slang +{ + class DiagnosticSink; + + void legalizeExtractFromTextureAccess(IRModule* module); +} diff --git a/source/slang/slang-ir-legalize-image-subscript.cpp b/source/slang/slang-ir-legalize-image-subscript.cpp index b5b240675..4c7de2b95 100644 --- a/source/slang/slang-ir-legalize-image-subscript.cpp +++ b/source/slang/slang-ir-legalize-image-subscript.cpp @@ -15,16 +15,8 @@ namespace Slang SLANG_ASSERT(storeInst); builder.setInsertBefore(storeInst); - IRImageSubscript* imageSubscript = nullptr; auto getElementPtr = as<IRGetElementPtr>(storeInst->getOperand(0)); - if(getElementPtr) - { - imageSubscript = as<IRImageSubscript>(getElementPtr->getBase()); - } - else - { - imageSubscript = as<IRImageSubscript>(storeInst->getOperand(0)); - } + IRImageSubscript* imageSubscript = as<IRImageSubscript>(getRootAddr(storeInst->getOperand(0))); SLANG_ASSERT(imageSubscript); SLANG_ASSERT(imageSubscript->getImage()); IRTextureType* textureType = as<IRTextureType>(imageSubscript->getImage()->getFullType()); @@ -190,10 +182,9 @@ namespace Slang { case kIROp_Store: case kIROp_SwizzledStore: - if (getRootAddr(inst->getOperand(0))->getOp() == kIROp_ImageSubscript) - { + if (as<IRImageSubscript>(getRootAddr(inst->getOperand(0)))) legalizeStore(target, builder, inst, sink); - } + continue; } } } diff --git a/source/slang/slang-ir-legalize-is-texture-access.cpp b/source/slang/slang-ir-legalize-is-texture-access.cpp new file mode 100644 index 000000000..929da591b --- /dev/null +++ b/source/slang/slang-ir-legalize-is-texture-access.cpp @@ -0,0 +1,84 @@ +#include "slang-ir-legalize-is-texture-access.h" + +#include "slang-ir.h" +#include "slang-ir-insts.h" +#include "slang-ir-util.h" +#include "slang-ir-clone.h" +#include "slang-ir-specialize-address-space.h" +#include "slang-parameter-binding.h" +#include "slang-ir-legalize-image-subscript.h" +#include "slang-ir-legalize-varying-params.h" +#include "slang-ir-simplify-cfg.h" + +namespace Slang +{ + IRImageSubscript* getTextureAccess(IRInst* inst) + { + return as<IRImageSubscript>(getRootAddr(inst->getOperand(0))); + } + + void legalizeIsTextureAccess(IRModule* module) + { + HashSet<IRFunc*> functionsToSimplifyCFG; + IRBuilder builder(module); + for (auto globalInst : module->getModuleInst()->getChildren()) + { + auto func = as<IRFunc>(globalInst); + if (!func) + continue; + for (auto block : func->getBlocks()) + { + auto inst = block->getFirstInst(); + IRInst* next; + for ( ; inst; inst = next) + { + next = inst->getNextInst(); + switch (inst->getOp()) + { + case kIROp_IsTextureAccess: + if (getTextureAccess(inst)) + inst->replaceUsesWith(builder.getBoolValue(true)); + else + { + inst->replaceUsesWith(builder.getBoolValue(false)); + functionsToSimplifyCFG.add(func); + } + inst->removeAndDeallocate(); + continue; + case kIROp_IsTextureArrayAccess: + { + auto textureAccess = getTextureAccess(inst); + if (textureAccess && as<IRTextureType>(textureAccess->getImage()->getDataType())->isArray()) + inst->replaceUsesWith(builder.getBoolValue(true)); + else + { + inst->replaceUsesWith(builder.getBoolValue(false)); + functionsToSimplifyCFG.add(func); + } + inst->removeAndDeallocate(); + continue; + } + case kIROp_IsTextureScalarAccess: + { + auto textureAccess = getTextureAccess(inst); + if (textureAccess && !as<IRVectorType>(as<IRTextureType>(textureAccess->getImage()->getDataType())->getElementType())) + inst->replaceUsesWith(builder.getBoolValue(true)); + else + { + inst->replaceUsesWith(builder.getBoolValue(false)); + functionsToSimplifyCFG.add(func); + } + inst->removeAndDeallocate(); + continue; + } + } + } + } + } + // Requires a simplifyCFG to ensure Slang does not evaluate 'IRTextureType' code path for + // 'inst' for when 'inst' is not a 'IRTextureType'/TextureAccessor + for(auto func : functionsToSimplifyCFG) + simplifyCFG(func, CFGSimplificationOptions::getFast()); + } +} + diff --git a/source/slang/slang-ir-legalize-is-texture-access.h b/source/slang/slang-ir-legalize-is-texture-access.h new file mode 100644 index 000000000..eccfe8fcb --- /dev/null +++ b/source/slang/slang-ir-legalize-is-texture-access.h @@ -0,0 +1,11 @@ +#pragma once + +#include "slang-ir.h" +#include "slang-compiler.h" + +namespace Slang +{ + class DiagnosticSink; + + void legalizeIsTextureAccess(IRModule* module); +} |
