summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
Diffstat (limited to 'source')
-rw-r--r--source/compiler-core/slang-gcc-compiler-util.cpp2
-rw-r--r--source/slang/hlsl.meta.slang1249
-rw-r--r--source/slang/slang-emit-c-like.cpp7
-rw-r--r--source/slang/slang-emit.cpp9
-rw-r--r--source/slang/slang-ir-inst-defs.h7
-rw-r--r--source/slang/slang-ir-legalize-extract-from-texture-access.cpp136
-rw-r--r--source/slang/slang-ir-legalize-extract-from-texture-access.h11
-rw-r--r--source/slang/slang-ir-legalize-image-subscript.cpp15
-rw-r--r--source/slang/slang-ir-legalize-is-texture-access.cpp84
-rw-r--r--source/slang/slang-ir-legalize-is-texture-access.h11
10 files changed, 839 insertions, 692 deletions
diff --git a/source/compiler-core/slang-gcc-compiler-util.cpp b/source/compiler-core/slang-gcc-compiler-util.cpp
index 52d4a0c29..01f197875 100644
--- a/source/compiler-core/slang-gcc-compiler-util.cpp
+++ b/source/compiler-core/slang-gcc-compiler-util.cpp
@@ -476,7 +476,7 @@ static SlangResult _parseGCCFamilyLine(SliceAllocator& allocator, const UnownedS
if (targetDesc.payload == ArtifactDesc::Payload::MetalAIR)
{
- cmdLine.addArg("-std=macos-metal2.3");
+ cmdLine.addArg("-std=metal3.1");
}
// Our generated code very often casts between dissimilar types with the
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 59a64a192..9fcd002a8 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -8785,789 +8785,678 @@ __intrinsic_op($(kIROp_MetalAtomicCast))
[require(metal)]
T* __getMetalAtomicRef(__ref T x);
-${{{{
-for (const char* fetchAndModify : {"add", "and", "max", "min", "or", "sub", "xor"})
-{
-}}}}
- __generic<AtomicType, T>
- [ForceInline]
- [require(metal)]
- void __metalInterlocked_$(fetchAndModify)(AtomicType dest, T value)
- {
- __intrinsic_asm "atomic_fetch_$(fetchAndModify)_explicit($0, $1, memory_order_relaxed)";
- }
+// Checks if input is a ImageSubscript
+__generic<T>
+__intrinsic_op($(kIROp_IsTextureAccess))
+bool __isTextureAccess(__ref T x);
- __generic<AtomicType, T>
- [ForceInline]
- [require(metal)]
- void __metalInterlocked_$(fetchAndModify)(AtomicType dest, T value, out T original_value)
- {
- __intrinsic_asm "((*($2)) = (($[0])(atomic_fetch_$(fetchAndModify)_explicit($0, $1, memory_order_relaxed))))", T;
- }
-${{{{
-} // fetchAndModify
-}}}}
+// Checks if input is a texture of T type scalar
+__generic<T>
+__intrinsic_op($(kIROp_IsTextureScalarAccess))
+bool __isTextureScalarAccess(__ref T x);
-__generic<AtomicType, T>
-[ForceInline]
-[require(metal)]
-void __metalInterlocked_exchange(AtomicType dest, T value, out T original_value)
-{
- __intrinsic_asm "((*($2)) = (($[0])(atomic_exchange_explicit($0, $1, memory_order_relaxed))))", T;
-}
+// Checks if input is a texture array
+__generic<T>
+__intrinsic_op($(kIROp_IsTextureArrayAccess))
+bool __isTextureArrayAccess(__ref T x);
+
+// Accepts an ImageSubscript
+// Gets Texture used with ImageSubscript.
+__generic<TextureAccess>
+__intrinsic_op($(kIROp_ExtractTextureFromTextureAccess))
+TextureAccess* __extractTextureFromTextureAccess(__ref TextureAccess x);
+
+// Accepts an ImageSubscript
+// Gets Coord from ImageSubscript. Swizzles out ArrayCoord if applicable
+__generic<TextureAccess>
+__intrinsic_op($(kIROp_ExtractCoordFromTextureAccess))
+uint __extractCoordFromTextureAccess(__ref TextureAccess x);
+
+// Accepts an ImageSubscript
+// Gets ArrayCoord from ImageSubscript
+__generic<TextureAccess>
+__intrinsic_op($(kIROp_ExtractArrayCoordFromTextureAccess))
+uint __extractArrayCoordFromTextureAccess(__ref TextureAccess x);
-__generic<AtomicType, T>
-[ForceInline]
-[require(metal)]
-void __metalInterlocked_compare_exchange(AtomicType dest, __ref T compare_value, T value)
+${{{{
+for (bool isArray : {false, true})
{
- __intrinsic_asm "atomic_compare_exchange_weak_explicit($0, $1, $2, memory_order_relaxed, memory_order_relaxed)";
-}
+ StringBuilder coordBuilder;
+ StringBuilder coordFetchBuilder;
+
+ StringBuilder threeParamsASMBuilder;
+ StringBuilder threeParamsOutputParamASMBuilder;
+
+ StringBuilder fourParamsASMBuilder;
-__generic<AtomicType, T>
-[ForceInline]
-[require(metal)]
-void __metalInterlocked_compare_exchange(AtomicType dest, T compare_value, T value, out T original_value)
-{
- __metalInterlocked_compare_exchange(dest, compare_value, value);
- original_value = compare_value;
-}
+ coordBuilder << "Coord coord";
+ coordFetchBuilder << "coord";
+
+ threeParamsASMBuilder << "$1, $2";
-__glsl_version(430)
-[ForceInline]
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedAdd(__ref int dest, int value)
-{
- __target_switch
+ fourParamsASMBuilder << "$1, $2, $3";
+ if(isArray)
{
- case hlsl: __intrinsic_asm "InterlockedAdd";
- case cuda: __intrinsic_asm "atomicAdd($0, $1)";
- case glsl: __intrinsic_asm "$atomicAdd($A, $1)";
- case metal:
- __metalInterlocked_add(__getMetalAtomicRef(dest), value);
- return;
- case spirv:
- spirv_asm
- {
- result:$$int = OpAtomicIAdd &dest Device None $value
- };
+ coordBuilder << ", uint arrayCoord";
+ coordFetchBuilder << ", arrayCoord";
+ threeParamsASMBuilder << ", $3";
+ fourParamsASMBuilder << ", $4";
+ threeParamsOutputParamASMBuilder << "$4";
}
-}
-
-[ForceInline]
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedAdd(__ref uint dest, uint value)
-{
- __target_switch
+ else
{
- case hlsl: __intrinsic_asm "InterlockedAdd";
- case cuda: __intrinsic_asm "atomicAdd((int*)$0, $1)";
- case glsl: __intrinsic_asm "$atomicAdd($A, $1)";
- case metal:
- __metalInterlocked_add(__getMetalAtomicRef(dest), value);
- return;
- case spirv:
- spirv_asm
- {
- result:$$uint = OpAtomicIAdd &dest Device None $value
- };
+ threeParamsOutputParamASMBuilder << "$3";
}
-}
+ auto coordString = coordBuilder.toString();
+ auto coordFetchString = coordFetchBuilder.toString();
+
+ auto threeParamsASMString = threeParamsASMBuilder.toString();
+ auto threeParamsOutputParamASMString = threeParamsOutputParamASMBuilder.toString();
-[ForceInline]
-void InterlockedAdd(__ref uint dest, int value)
-{
- InterlockedAdd(dest, (uint)value);
-}
+ auto fourParamsASMString = fourParamsASMBuilder.toString();
+}}}}
-[ForceInline]
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedAdd(__ref int dest, int value, out int original_value)
-{
- __target_switch
+${{{{
+ for (const char* atomicOperation : {"add", "and", "max", "min", "or", "sub", "xor"})
{
- case hlsl: __intrinsic_asm "InterlockedAdd";
- case cuda: __intrinsic_asm "(*$2 = atomicAdd($0, $1))";
- case glsl: __intrinsic_asm "($2 = $atomicAdd($A, $1))";
- case metal:
- __metalInterlocked_add(__getMetalAtomicRef(dest), value, original_value);
- return;
- case spirv:
- spirv_asm
+}}}}
+ __generic<TextureType, T, Coord>
+ [ForceInline]
+ [require(metal)]
+ vector<T, 4> __metalImageInterlocked_$(atomicOperation)(TextureType tex, $(coordString), vector<T, 4> value)
{
- %original:$$int = OpAtomicIAdd &dest Device None $value;
- OpStore &original_value %original
- };
- }
-}
+ static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures");
+ static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>,
+ "__metalImageInterlocked implementation only allows 'uint' coordinates");
+ __intrinsic_asm "$0.atomic_fetch_$(atomicOperation)($(threeParamsASMString))";
+ }
-[ForceInline]
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedAdd(__ref uint dest, uint value, out uint original_value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedAdd";
- case cuda: __intrinsic_asm "(*$2 = (uint)atomicAdd((int*)$0, $1))";
- case glsl: __intrinsic_asm "($2 = $atomicAdd($A, $1))";
- case metal:
- __metalInterlocked_add(__getMetalAtomicRef(dest), value, original_value);
- return;
- case spirv:
- spirv_asm
+ __generic<TextureType, T, Coord>
+ [ForceInline]
+ [require(metal)]
+ void __metalImageInterlocked_$(atomicOperation)(TextureType tex, $(coordString), vector<T, 4> value, out T original_value)
{
- %original:$$uint = OpAtomicIAdd &dest Device None $value;
- OpStore &original_value %original
- };
- }
-}
+ static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures");
+ static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>,
+ "__metalImageInterlocked implementation only allows 'uint' coordinates");
+ original_value = __metalImageInterlocked_$(atomicOperation)(tex, $(coordFetchString), value)[0];
+ }
+${{{{
+ } // atomicOperation
+}}}}
-[ForceInline]
-[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedAdd(__ref int64_t dest, int64_t value)
-{
- __target_switch
+ __generic<TextureType, T, Coord>
+ [ForceInline]
+ [require(metal)]
+ vector<T, 4> __metalImageInterlocked_exchange(TextureType tex, $(coordString), vector<T, 4> value)
{
- case hlsl: __intrinsic_asm "InterlockedAdd";
- case cuda: __intrinsic_asm "atomicAdd((uint64_t*)$0, $1)";
- case glsl:
- __requireGLSLExtension("GL_EXT_shader_atomic_int64");
- __intrinsic_asm "$atomicAdd($A, $1)";
- case spirv:
- spirv_asm
- {
- OpCapability Int64Atomics;
- result:$$int64_t = OpAtomicIAdd &dest Device None $value;
- };
+ static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures");
+ static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>,
+ "__metalImageInterlocked implementation only allows 'uint' coordinates");
+ __intrinsic_asm "($0.atomic_exchange($(threeParamsASMString)))";
}
-}
-
-[ForceInline]
-void InterlockedAdd(__ref int64_t dest, int64_t value, out int64_t original_value)
-{
- __target_switch
+ __generic<TextureType, T, Coord>
+ [ForceInline]
+ [require(metal)]
+ void __metalImageInterlocked_exchange(TextureType tex, $(coordString), vector<T, 4> value, out T original_value)
{
- case hlsl: __intrinsic_asm "InterlockedAdd";
- case cuda: __intrinsic_asm "atomicAdd((uint64_t*)$0, $1)";
- case glsl:
- __requireGLSLExtension("GL_EXT_shader_atomic_int64");
- __intrinsic_asm "$atomicAdd($A, $1)";
- case spirv:
- spirv_asm
- {
- OpCapability Int64Atomics;
- %origin:$$int64_t = OpAtomicIAdd &dest Device None $value;
- OpStore &original_value %origin
- };
+ static_assert(T is int || T is uint, "Metal atomic texture operations only allow 'int'/'uint' textures");
+ static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>,
+ "__metalImageInterlocked implementation only allows 'uint' coordinates");
+ original_value = __metalImageInterlocked_exchange(tex, $(coordFetchString), value)[0];
}
-}
-[ForceInline]
-[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedAdd(__ref uint64_t dest, uint64_t value)
-{
- __target_switch
+ __generic<TextureType, T, Coord>
+ [ForceInline]
+ [require(metal)]
+ void __metalImageInterlocked_compare_exchange(TextureType tex, $(coordString), __ref vector<T, 4> compare_value, vector<T, 4> value)
{
- case hlsl: __intrinsic_asm "InterlockedAdd";
- case cuda: __intrinsic_asm "atomicAdd($0, $1)";
- case glsl:
- __requireGLSLExtension("GL_EXT_shader_atomic_int64");
- __intrinsic_asm "$atomicAdd($A, $1)";
- case spirv:
- spirv_asm
- {
- OpCapability Int64Atomics;
- result:$$uint64_t = OpAtomicIAdd &dest Device None $value;
- };
+ static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures");
+ static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>,
+ "__metalImageInterlocked implementation only allows 'uint' coordinates");
+ __intrinsic_asm "($0.atomic_compare_exchange_weak($(fourParamsASMString)))";
}
-}
-
-[ForceInline]
-void InterlockedAdd(__ref uint64_t dest, uint64_t value, out uint64_t original_value)
-{
- __target_switch
+ __generic<TextureType, T, Coord>
+ [ForceInline]
+ [require(metal)]
+ void __metalImageInterlocked_compare_exchange(TextureType tex, $(coordString), vector<T, 4> compare_value, vector<T, 4> value, out T original_value)
{
- case hlsl: __intrinsic_asm "InterlockedAdd";
- case cuda: __intrinsic_asm "atomicAdd($0, $1)";
- case glsl:
- __requireGLSLExtension("GL_EXT_shader_atomic_int64");
- __intrinsic_asm "$atomicAdd($A, $1)";
- case spirv:
- spirv_asm
- {
- OpCapability Int64Atomics;
- %origin:$$uint64_t = OpAtomicIAdd &dest Device None $value;
- OpStore &original_value %origin
- };
+ static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures");
+ static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>,
+ "__metalImageInterlocked implementation only allows 'uint' coordinates");
+ __metalImageInterlocked_compare_exchange(tex, $(coordFetchString), compare_value, value);
+ original_value = compare_value[0];
}
-}
+${{{{
+} // isArray
+}}}}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedAnd(__ref int dest, int value)
-{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedAnd";
- case cuda: __intrinsic_asm "atomicAnd($0, $1)";
- case glsl: __intrinsic_asm "$atomicAnd($A, $1)";
- case metal:
- __metalInterlocked_and(__getMetalAtomicRef(dest), value);
- return;
- case spirv:
- spirv_asm
- {
- result:$$int = OpAtomicAnd &dest Device None $value;
- };
- }
-}
+${{{{
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedAnd(__ref uint dest, uint value)
+// Generated functions:
+
+// atomicAdd, InterlockedAdd, atomic_fetch_add_explicit, OpAtomicIAdd, OpAtomicFAddEXT
+// __cudaInterlocked_add, __glslInterlocked_add, __hlslInterlocked_add, __metalInterlocked_add, __spirvInterlocked_add
+
+// atomicAnd, InterlockedAnd, atomic_fetch_and_explicit, OpAtomicAnd
+// __cudaInterlocked_and, __glslInterlocked_and, __hlslInterlocked_and, __metalInterlocked_and, __spirvInterlocked_and
+
+// atomicMax, InterlockedMax, atomic_fetch_max_explicit, OpAtomicUMax, OpAtomicSMax, OpAtomicFMaxEXT
+// __cudaInterlocked_max, __glslInterlocked_max, __hlslInterlocked_max, __metalInterlocked_max, __spirvInterlocked_max
+
+// atomicMin, InterlockedMin, atomic_fetch_min_explicit, OpAtomicUMin, OpAtomicSMin, OpAtomicFMinEXT
+// __cudaInterlocked_min, __glslInterlocked_min, __hlslInterlocked_min, __metalInterlocked_min, __spirvInterlocked_min
+
+// atomicOr, InterlockedOr, atomic_fetch_or_explicit, OpAtomicOr
+// __cudaInterlocked_or, __glslInterlocked_or, __hlslInterlocked_or, __metalInterlocked_or, __spirvInterlocked_or
+
+// atomicXor, InterlockedXor, atomic_fetch_xor_explicit, OpAtomicXor
+// __cudaInterlocked_xor, __glslInterlocked_xor, __hlslInterlocked_xor, __metalInterlocked_xor, __spirvInterlocked_xor
+
+// atomicExchange, atomicExch, InterlockedExchange, atomic_exchange_explicit, OpAtomicExchange
+// __cudaInterlocked_exchange, __glslInterlocked_exchange, __hlslInterlocked_exchange, __metalInterlocked_exchange, __spirvInterlocked_exchange
+
+struct InternalAtomicOperationInfo
{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedAnd";
- case cuda: __intrinsic_asm "atomicAnd((int*)$0, $1)";
- case glsl: __intrinsic_asm "$atomicAnd($A, $1)";
- case metal:
- __metalInterlocked_and(__getMetalAtomicRef(dest), value);
- return;
- case spirv:
- spirv_asm
- {
- result:$$uint = OpAtomicAnd &dest Device None $value;
- };
- }
-}
+ const char* slangSuffix;
+ const char* cudaSuffix;
+ const char* glslSuffix;
+ const char* hlslSuffix;
+ const char* metalSuffix;
+ const char* spirvFloatSuffix;
+ const char* spirvUIntSuffix;
+ const char* spirvIntSuffix;
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedAnd(__ref int dest, int value, out int original_value)
+ const char* assertExpr;
+};
+
+InternalAtomicOperationInfo internalAtomicOperationInfo[7] = {
+ { "add", "Add", "Add", "Add", "fetch_add", "FAddEXT", "IAdd", "IAdd", "true" },
+ { "and", "And", "And", "And", "fetch_and", "And", "And", "And", "!__isFloat<T>()" },
+ { "max", "Max", "Max", "Max", "fetch_max", "FMaxEXT", "UMax", "SMax", "true" },
+ { "min", "Min", "Min", "Min", "fetch_min", "FMinEXT", "UMin", "SMin", "true" },
+ { "or", "Or", "Or", "Or", "fetch_or", "Or", "Or", "Or", "!__isFloat<T>()" },
+ { "xor", "Xor", "Xor", "Xor", "fetch_xor", "Xor", "Xor", "Xor", "!__isFloat<T>()" },
+ { "exchange", "Exch", "Exchange", "Exchange", "exchange", "Exchange", "Exchange", "Exchange", "true" },
+};
+
+for (InternalAtomicOperationInfo atomicOp : internalAtomicOperationInfo)
{
- __target_switch
+}}}}
+ __generic<AtomicType, T>
+ [ForceInline]
+ [require(metal)]
+ void __metalInterlocked_$(atomicOp.slangSuffix)(AtomicType dest, T value)
{
- case hlsl: __intrinsic_asm "InterlockedAnd";
- case cuda: __intrinsic_asm "(*$2 = atomicAnd($0, $1))";
- case glsl: __intrinsic_asm "($2 = $atomicAnd($A, $1))";
- case metal:
- __metalInterlocked_and(__getMetalAtomicRef(dest), value, original_value);
- return;
- case spirv:
- spirv_asm
- {
- %original:$$int = OpAtomicAnd &dest Device None $value;
- OpStore &original_value %original
- };
+ static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
+ __intrinsic_asm "atomic_$(atomicOp.metalSuffix)_explicit($0, $1, memory_order_relaxed)";
}
-}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedAnd(__ref uint dest, uint value, out uint original_value)
-{
- __target_switch
+ __generic<AtomicType, T>
+ [ForceInline]
+ [require(metal)]
+ void __metalInterlocked_$(atomicOp.slangSuffix)(AtomicType dest, T value, out T original_value)
{
- case hlsl: __intrinsic_asm "InterlockedAnd";
- case glsl: __intrinsic_asm "($2 = atomicAnd($0, $1))";
- case cuda: __intrinsic_asm "(*$2 = atomicAnd((int*)$0, $1))";
- case metal:
- __metalInterlocked_and(__getMetalAtomicRef(dest), value, original_value);
- return;
- case spirv:
- spirv_asm
- {
- %original:$$uint = OpAtomicAnd &dest Device None $value;
- OpStore &original_value %original
- };
+ static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
+ __intrinsic_asm "((*($2)) = (atomic_$(atomicOp.metalSuffix)_explicit($0, $1, memory_order_relaxed)))";
}
-}
-[ForceInline]
-void InterlockedAnd(__ref uint64_t dest, uint64_t value)
-{
- __target_switch
+ __generic<T>
+ [ForceInline]
+ [require(cuda)]
+ void __cudaInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value)
{
- case hlsl: __intrinsic_asm "InterlockedAnd";
+ static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
+ __intrinsic_asm "atomic$(atomicOp.cudaSuffix)((int*)$0, $1)";
}
-}
-[ForceInline]
-void InterlockedAnd(__ref uint64_t dest, uint64_t value, out uint64_t original_value)
-{
- __target_switch
+ __generic<T>
+ [ForceInline]
+ [require(cuda)]
+ void __cudaInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value)
{
- case hlsl: __intrinsic_asm "InterlockedAnd";
+ static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
+ __intrinsic_asm "(*$2 = atomic$(atomicOp.cudaSuffix)((int*)$0, $1))";
}
-}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value)
-{
- __target_switch
+ __generic<T>
+ [ForceInline]
+ [require(glsl)]
+ void __glslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value)
{
- case hlsl: __intrinsic_asm "InterlockedCompareExchange";
- case glsl: __intrinsic_asm "($3 = $atomicCompSwap($A, $1, $2))";
- case cuda: __intrinsic_asm "(*$3 = atomicCAS($0, $1, $2))";
- case metal:
- __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value);
- return;
- case spirv:
- spirv_asm
- {
- %original:$$int = OpAtomicCompareExchange &dest Device None None $value $compare_value;
- OpStore &original_value %original
- };
+ static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
+ __intrinsic_asm "$atomic$(atomicOp.glslSuffix)($A, $1)";
}
-}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, out uint original_value)
-{
- __target_switch
+ __generic<T>
+ [ForceInline]
+ [require(glsl)]
+ void __glslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value)
{
- case hlsl: __intrinsic_asm "InterlockedCompareExchange";
- case glsl: __intrinsic_asm "($3 = $atomicCompSwap($A, $1, $2))";
- case cuda: __intrinsic_asm "(*$3 = (uint)atomicCAS((int*)$0, $1, $2))";
- case metal:
- __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value);
- return;
- case spirv:
- spirv_asm
- {
- %original:$$uint = OpAtomicCompareExchange &dest Device None None $value $compare_value;
- OpStore &original_value %original
- };
+ static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
+ __intrinsic_asm "($2 = $atomic$(atomicOp.glslSuffix)($A, $1))";
}
-}
-[ForceInline]
-void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value)
-{
- __target_switch
+ __generic<T>
+ [ForceInline]
+ [require(hlsl)]
+ void __hlslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value)
{
- case hlsl: __intrinsic_asm "InterlockedCompareExchangeFloatBitwise";
- case metal:
- __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value);
- return;
+ static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
+ __intrinsic_asm "Interlocked$(atomicOp.hlslSuffix)";
}
-}
-[ForceInline]
-void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value, out float original_value)
-{
- __target_switch
+ __generic<T>
+ [ForceInline]
+ [require(hlsl)]
+ void __hlslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value)
{
- case hlsl: __intrinsic_asm "InterlockedCompareExchangeFloatBitwise";
- case metal:
- __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value);
- return;
+ static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
+ __intrinsic_asm "Interlocked$(atomicOp.hlslSuffix)";
}
-}
-${{{{
-for (const char* T : {"int64_t", "uint64_t"})
-{
-}}}}
+ __generic<T>
[ForceInline]
- void InterlockedCompareExchange(__ref $(T) dest, $(T) compare_value, $(T) value)
+ [require(spirv)]
+ void __spirvInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value)
{
- __target_switch
+ static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
+ if (__isFloat<T>())
+ {
+ spirv_asm
+ {
+ result:$$T = OpAtomic$(atomicOp.spirvFloatSuffix) &dest Device None $value
+ };
+ }
+ else if (__isUnsignedInt<T>())
{
- case hlsl: __intrinsic_asm "InterlockedCompareExchange";
+ spirv_asm
+ {
+ result:$$T = OpAtomic$(atomicOp.spirvUIntSuffix) &dest Device None $value
+ };
+ }
+ else if (__isInt<T>())
+ {
+ spirv_asm
+ {
+ result:$$T = OpAtomic$(atomicOp.spirvIntSuffix) &dest Device None $value
+ };
}
}
+ __generic<T>
[ForceInline]
- void InterlockedCompareExchange(__ref $(T) dest, $(T) compare_value, $(T) value, out $(T) original_value)
+ [require(spirv)]
+ void __spirvInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value)
{
- __target_switch
+ static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)");
+ if (__isFloat<T>())
+ {
+ spirv_asm
+ {
+ %original:$$T = OpAtomic$(atomicOp.spirvFloatSuffix) &dest Device None $value;
+ OpStore &original_value %original
+ };
+ }
+ else if (__isUnsignedInt<T>())
+ {
+ spirv_asm
+ {
+ %original:$$T = OpAtomic$(atomicOp.spirvUIntSuffix) &dest Device None $value;
+ OpStore &original_value %original
+ };
+ }
+ else if (__isInt<T>())
{
- case hlsl: __intrinsic_asm "InterlockedCompareExchange";
+ spirv_asm
+ {
+ %original:$$T = OpAtomic$(atomicOp.spirvIntSuffix) &dest Device None $value;
+ OpStore &original_value %original
+ };
}
}
+
${{{{
-} // T
+} // fetchAndModify
}}}}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedCompareStore(__ref int dest, int compare_value, int value)
+__generic<AtomicType, T>
+[ForceInline]
+[require(metal)]
+void __metalInterlocked_compare_exchange(AtomicType dest, __ref T compare_value, T value)
{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedCompareStore";
- case glsl: __intrinsic_asm "$atomicCompSwap($A, $1, $2)";
- case cuda: __intrinsic_asm "atomicCAS($0, $1, $2)";
- case spirv:
- spirv_asm
- {
- result:$$int = OpAtomicCompareExchange &dest Device None None $value $compare_value;
- };
- }
+ __intrinsic_asm "atomic_compare_exchange_weak_explicit($0, $1, $2, memory_order_relaxed, memory_order_relaxed)";
}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value)
+__generic<AtomicType, T>
+[ForceInline]
+[require(metal)]
+void __metalInterlocked_compare_exchange(AtomicType dest, T compare_value, T value, out T original_value)
{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedCompareStore";
- case glsl: __intrinsic_asm "$atomicCompSwap($A, $1, $2)";
- case cuda: __intrinsic_asm "atomicCAS((int*)$0, $1, $2)";
- case spirv:
- spirv_asm
- {
- result:$$uint = OpAtomicCompareExchange &dest Device None None $value $compare_value;
- };
- }
+ __metalInterlocked_compare_exchange(dest, compare_value, value);
+ original_value = compare_value;
}
+__generic<T>
+__glsl_version(430)
[ForceInline]
-void InterlockedCompareStoreFloatBitwise(__ref float dest, float compare_value, float value)
+[require(cuda)]
+void __cudaInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value)
{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedCompareStoreFloatBitwise";
- }
+ __intrinsic_asm "atomicCAS($0, $1, $2)";
}
+__generic<T>
[ForceInline]
-void InterlockedCompareStore(__ref int64_t dest, int64_t compare_value, int64_t value);
+[require(cuda)]
+void __cudaInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value)
{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedCompareStore";
- }
+ __intrinsic_asm "*$3 = atomicCAS($0, $1, $2)";
}
+__generic<T>
[ForceInline]
-void InterlockedCompareStore(__ref uint64_t dest, uint64_t compare_value, uint64_t value);
+[require(glsl)]
+void __glslInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value)
{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedCompareStore";
- }
+ __intrinsic_asm "$atomicCompSwap($A, $1, $2)";
}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedExchange(__ref int dest, int value, out int original_value)
+__generic<T>
+[ForceInline]
+[require(glsl)]
+void __glslInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value)
{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedExchange";
- case glsl: __intrinsic_asm "($2 = $atomicExchange($A, $1))";
- case cuda: __intrinsic_asm "(*$2 = atomicExch($0, $1))";
- case metal:
- __metalInterlocked_exchange(__getMetalAtomicRef(dest), value, original_value);
- return;
- case spirv:
- spirv_asm
- {
- %r:$$int = OpAtomicExchange &dest Device None $value;
- OpStore &original_value %r
- };
- }
+ __intrinsic_asm "($3 = $atomicCompSwap($A, $1, $2))";
}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedExchange(__ref uint dest, uint value, out uint original_value)
+__generic<T>
+[ForceInline]
+[require(hlsl)]
+void __hlslInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value)
{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedExchange";
- case glsl: __intrinsic_asm "($2 = $atomicExchange($A, $1))";
- case cuda: __intrinsic_asm "(*$2 = (uint)atomicExch((int*)$0, $1))";
- case metal:
- __metalInterlocked_exchange(__getMetalAtomicRef(dest), value, original_value);
- return;
- case spirv:
- spirv_asm
- {
- %r:$$uint = OpAtomicExchange &dest Device None $value;
- OpStore &original_value %r
- };
- }
+ __intrinsic_asm "InterlockedCompareExchange";
}
+__generic<T>
[ForceInline]
-void InterlockedExchange(__ref float dest, float value)
+[require(hlsl)]
+void __hlslInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value)
{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedExchange";
- }
+ __intrinsic_asm "InterlockedCompareExchange";
}
+__generic<T>
[ForceInline]
-void InterlockedExchange(__ref float dest, float value, out float original_value)
+[require(spirv)]
+void __spirvInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value)
{
- __target_switch
+ spirv_asm
{
- case hlsl: __intrinsic_asm "InterlockedExchange";
- case metal:
- __metalInterlocked_exchange(__getMetalAtomicRef(dest), value, original_value);
- return;
- }
+ %result:$$T = OpAtomicCompareExchange &dest Device None None $value $compare_value;
+ };
}
+__generic<T>
[ForceInline]
-void InterlockedExchange(__ref int64_t dest, int64_t value)
+[require(spirv)]
+void __spirvInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value)
{
- __target_switch
+ spirv_asm
{
- case hlsl: __intrinsic_asm "InterlockedExchange";
- }
+ %original:$$T = OpAtomicCompareExchange &dest Device None None $value $compare_value;
+ OpStore &original_value %original
+ };
}
+__generic<T>
[ForceInline]
-void InterlockedExchange(__ref int64_t dest, int64_t value, out int64_t original_value)
+[require(hlsl)]
+void __hlslInterlocked_compare_exchange_float_bitwise(__ref T dest, T compare_value, T value)
{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedExchange";
- }
+ __intrinsic_asm "InterlockedCompareExchangeFloatBitwise";
}
+__generic<T>
[ForceInline]
-void InterlockedExchange(__ref uint64_t dest, uint64_t value)
+[require(hlsl)]
+void __hlslInterlocked_compare_exchange_float_bitwise(__ref T dest, T compare_value, T value, out T original_value)
{
- __target_switch
- {
- case hlsl: __intrinsic_asm "InterlockedExchange";
- }
+ __intrinsic_asm "InterlockedCompareExchangeFloatBitwise";
}
-[ForceInline]
-void InterlockedExchange(__ref uint64_t dest, uint64_t value, out uint64_t original_value)
+${{{{
+// Generates code for:
+// InterlockedAdd, InterlockedAnd, InterlockedOr, InterlockedXor,
+// InterlockedMax, InterlockedMin, InterlockedExchange
+struct SlangAtomicOperationInfo
{
- __target_switch
+ const char* slangCallSuffix;
+ const char* internalCallSuffix;
+};
+
+SlangAtomicOperationInfo slangAtomicOperationInfo[7] = {
+ { "Add", "add" },
+ { "And", "and" },
+ { "Or", "or" },
+ { "Xor", "xor" },
+ { "Max", "max" },
+ { "Min", "min" },
+ { "Exchange", "exchange" },
+};
+
+for (SlangAtomicOperationInfo atomicOp : slangAtomicOperationInfo)
+{
+ for(const char* T : {"int", "uint"})
{
- case hlsl: __intrinsic_asm "InterlockedExchange";
- }
-}
+}}}}
+[ForceInline]
__glsl_version(430)
[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedMax(__ref int dest, int value)
+void Interlocked$(atomicOp.slangCallSuffix)(__ref $(T) dest, $(T) value)
{
+ static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedMax";
- case glsl: __intrinsic_asm "$atomicMax($A, $1)";
- case cuda: __intrinsic_asm "atomicMax($0, $1)";
+ case hlsl: __hlslInterlocked_$(atomicOp.internalCallSuffix)(dest, value);
+ case cuda: __cudaInterlocked_$(atomicOp.internalCallSuffix)(dest, value);
+ case glsl: __glslInterlocked_$(atomicOp.internalCallSuffix)(dest, value);
+ case spirv: __spirvInterlocked_$(atomicOp.internalCallSuffix)(dest, value);
case metal:
- __metalInterlocked_max(__getMetalAtomicRef(dest), value);
- return;
- case spirv:
- spirv_asm
+ if (__isTextureAccess(dest))
{
- result:$$int = OpAtomicSMax &dest Device None $value;
- };
+ if(__isTextureArrayAccess(dest))
+ {
+ __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest),
+ __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vector<$(T), 4>(value));
+ }
+ else
+ {
+ __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest),
+ __extractCoordFromTextureAccess(dest), vector<$(T), 4>(value));
+ }
+ }
+ else
+ {
+ __metalInterlocked_$(atomicOp.internalCallSuffix)(__getMetalAtomicRef(dest), value);
+ }
+ return;
}
}
+[ForceInline]
__glsl_version(430)
[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedMax(__ref uint dest, uint value)
+void Interlocked$(atomicOp.slangCallSuffix)(__ref $(T) dest, $(T) value, out $(T) original_value)
{
+ static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to a scalar texture or non-texture");
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedMax";
- case glsl: __intrinsic_asm "$atomicMax($A, $1)";
- case cuda: __intrinsic_asm "atomicMax((int*)$0, $1)";
+ case hlsl: __hlslInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value);
+ case cuda: __cudaInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value);
+ case glsl: __glslInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value);
+ case spirv: __spirvInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value);
case metal:
- __metalInterlocked_max(__getMetalAtomicRef(dest), value);
+ if (__isTextureAccess(dest))
+ if(__isTextureArrayAccess(dest))
+ {
+ __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest),
+ __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vector<$(T),4>(value), original_value);
+ }
+ else
+ {
+ __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest),
+ __extractCoordFromTextureAccess(dest), vector<$(T),4>(value), original_value);
+ }
+ else
+ __metalInterlocked_$(atomicOp.internalCallSuffix)(__getMetalAtomicRef(dest), value, original_value);
return;
- case spirv:
- spirv_asm
- {
- result:$$uint = OpAtomicUMax &dest Device None $value;
- };
}
}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedMax(__ref int dest, int value, out int original_value)
+${{{{
+ } // for(const char* T : {"int64_t", "uint64_t"})
+}}}}
+
+[ForceInline]
+void Interlocked$(atomicOp.slangCallSuffix)(__ref uint dest, int value)
+{
+ Interlocked$(atomicOp.slangCallSuffix)(dest, (uint)value);
+}
+
+${{{{
+} // for (SlangAtomicOperationInfo atomicOp : slangAtomicOperationInfo)
+}}}}
+
+${{{{
+for(const char* T : {"int64_t", "uint64_t"})
{
+}}}}
+[ForceInline]
+[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)]
+void InterlockedAdd(__ref $(T) dest, $(T) value)
+{
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedMax";
- case glsl: __intrinsic_asm "($2 = $atomicMax($A, $1))";
- case cuda: __intrinsic_asm "(*$2 = atomicMax($0, $1))";
- case metal:
- __metalInterlocked_max(__getMetalAtomicRef(dest), value, original_value);
- return;
+ case hlsl: __hlslInterlocked_add(dest, value);
+ case cuda: __cudaInterlocked_add(dest, value);
+ case glsl:
+ __requireGLSLExtension("GL_EXT_shader_atomic_int64");
+ __glslInterlocked_add(dest, value);
case spirv:
spirv_asm
{
- %v:$$int = OpAtomicSMax &dest Device None $value;
- OpStore &original_value %v
+ OpCapability Int64Atomics;
+ result:$$$(T) = OpAtomicIAdd &dest Device None $value;
};
}
}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedMax(__ref uint dest, uint value, out uint original_value)
+[ForceInline]
+void InterlockedAdd(__ref $(T) dest, $(T) value, out $(T) original_value)
{
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedMax";
- case glsl: __intrinsic_asm "($2 = $atomicMax($A, $1))";
- case cuda: __intrinsic_asm "(*$2 = (uint)atomicMax((int*)$0, $1))";
- case metal:
- __metalInterlocked_max(__getMetalAtomicRef(dest), value, original_value);
- return;
+ case hlsl: __hlslInterlocked_add(dest, value, original_value);
+ case cuda: __cudaInterlocked_add(dest, value, original_value);
+ case glsl:
+ __requireGLSLExtension("GL_EXT_shader_atomic_int64");
+ __glslInterlocked_add(dest, value, original_value);
case spirv:
spirv_asm
{
- %v:$$uint = OpAtomicUMax &dest Device None $value;
- OpStore &original_value %v
+ OpCapability Int64Atomics;
+ %origin:$$$(T) = OpAtomicIAdd &dest Device None $value;
+ OpStore &original_value %origin
};
}
}
[ForceInline]
-void InterlockedMax(__ref int64_t dest, int64_t value)
+void InterlockedAnd(__ref $(T) dest, $(T) value)
{
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedMax";
+ case hlsl: __hlslInterlocked_and(dest, value);
}
}
[ForceInline]
-void InterlockedMax(__ref int64_t dest, int64_t value, out int64_t original_value)
+void InterlockedAnd(__ref $(T) dest, $(T) value, out $(T) original_value)
{
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedMax";
+ case hlsl: __hlslInterlocked_and(dest, value, original_value);
}
}
[ForceInline]
-void InterlockedMax(__ref uint64_t dest, uint64_t value)
+void InterlockedCompareExchange(__ref $(T) dest, $(T) compare_value, $(T) value)
{
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedMax";
+ case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value);
}
}
[ForceInline]
-void InterlockedMax(__ref uint64_t dest, uint64_t value, out uint64_t original_value)
+void InterlockedCompareExchange(__ref $(T) dest, $(T) compare_value, $(T) value, out $(T) original_value)
{
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedMax";
+ case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value, original_value);
}
}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedMin(__ref int dest, int value)
+[ForceInline]
+void InterlockedCompareStore(__ref $(T) dest, $(T) compare_value, $(T) value);
{
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedMin";
- case glsl: __intrinsic_asm "$atomicMin($A, $1)";
- case cuda: __intrinsic_asm "atomicMin($0, $1)";
- case metal:
- __metalInterlocked_min(__getMetalAtomicRef(dest), value);
- return;
- case spirv:
- spirv_asm
- {
- result:$$int = OpAtomicSMin &dest Device None $value;
- };
+ case hlsl: __intrinsic_asm "InterlockedCompareStore";
}
}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedMin(__ref uint dest, uint value)
+[ForceInline]
+void InterlockedExchange(__ref $(T) dest, $(T) value)
{
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedMin";
- case glsl: __intrinsic_asm "$atomicMin($A, $1)";
- case cuda: __intrinsic_asm "atomicMin((int*)$0, $1)";
- case metal:
- __metalInterlocked_min(__getMetalAtomicRef(dest), value);
- return;
- case spirv:
- spirv_asm
- {
- result:$$uint = OpAtomicUMin &dest Device None $value;
- };
+ case hlsl: __intrinsic_asm "InterlockedExchange";
}
}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedMin(__ref int dest, int value, out int original_value)
+[ForceInline]
+void InterlockedExchange(__ref $(T) dest, $(T) value, out $(T) original_value)
{
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedMin";
- case glsl: __intrinsic_asm "($2 = $atomicMin($A, $1))";
- case cuda: __intrinsic_asm "(*$2 = atomicMin($0, $1))";
- case metal:
- __metalInterlocked_min(__getMetalAtomicRef(dest), value, original_value);
- return;
- case spirv:
- spirv_asm
- {
- %v:$$int = OpAtomicSMin &dest Device None $value;
- OpStore &original_value %v
- };
+ case hlsl: __intrinsic_asm "InterlockedExchange";
}
}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedMin(__ref uint dest, uint value, out uint original_value)
+[ForceInline]
+void InterlockedMax(__ref $(T) dest, $(T) value)
{
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedMin";
- case glsl: __intrinsic_asm "($2 = $atomicMin($A, $1))";
- case cuda: __intrinsic_asm "(*$2 = (uint)atomicMin((int*)$0, $1))";
- case metal:
- __metalInterlocked_min(__getMetalAtomicRef(dest), value, original_value);
- return;
- case spirv:
- spirv_asm
- {
- %v:$$uint = OpAtomicUMin &dest Device None $value;
- OpStore &original_value %v
- };
+ case hlsl: __intrinsic_asm "InterlockedMax";
}
}
[ForceInline]
-void InterlockedMin(__ref int64_t dest, int64_t value)
+void InterlockedMax(__ref $(T) dest, $(T) value, out $(T) original_value)
{
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedMin";
+ case hlsl: __intrinsic_asm "InterlockedMax";
}
}
[ForceInline]
-void InterlockedMin(__ref int64_t dest, int64_t value, out int64_t original_value)
+void InterlockedMin(__ref $(T) dest, $(T) value)
{
__target_switch
{
@@ -9576,7 +9465,7 @@ void InterlockedMin(__ref int64_t dest, int64_t value, out int64_t original_v
}
[ForceInline]
-void InterlockedMin(__ref uint64_t dest, uint64_t value)
+void InterlockedMin(__ref $(T) dest, $(T) value, out $(T) original_value)
{
__target_switch
{
@@ -9585,215 +9474,219 @@ void InterlockedMin(__ref uint64_t dest, uint64_t value)
}
[ForceInline]
-void InterlockedMin(__ref uint64_t dest, uint64_t value, out uint64_t original_value)
+void InterlockedOr(__ref $(T) dest, $(T) value)
{
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedMin";
+ case hlsl: __intrinsic_asm "InterlockedOr";
}
}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedOr(__ref int dest, int value)
+[ForceInline]
+void InterlockedOr(__ref $(T) dest, $(T) value, out $(T) original_value)
{
__target_switch
{
case hlsl: __intrinsic_asm "InterlockedOr";
- case cuda: __intrinsic_asm "atomicOr((int*)$0, $1)";
- case glsl: __intrinsic_asm "$atomicOr($A, $1)";
- case metal:
- __metalInterlocked_or(__getMetalAtomicRef(dest), value);
- return;
- case spirv:
- spirv_asm
- {
- result:$$int = OpAtomicOr &dest Device None $value;
- };
}
}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedOr(__ref uint dest, uint value)
+[ForceInline]
+void InterlockedXor(__ref $(T) dest, $(T) value)
{
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedOr";
- case cuda: __intrinsic_asm "atomicOr((int*)$0, $1)";
- case glsl: __intrinsic_asm "$atomicOr($A, $1)";
- case metal:
- __metalInterlocked_or(__getMetalAtomicRef(dest), value);
- return;
- case spirv:
- spirv_asm
- {
- result:$$uint = OpAtomicOr &dest Device None $value;
- };
+ case hlsl: __intrinsic_asm "InterlockedXor";
}
}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedOr(__ref int dest, int value, out int original_value)
+[ForceInline]
+void InterlockedXor(__ref $(T) dest, $(T) value, out $(T) original_value)
{
- __target_switch
+ __target_switch
{
- case hlsl: __intrinsic_asm "InterlockedOr";
- case glsl: __intrinsic_asm "($2 = atomicOr($0, $1))";
- case cuda: __intrinsic_asm "(*$2 = atomicOr($0, $1))";
- case metal:
- __metalInterlocked_or(__getMetalAtomicRef(dest), value, original_value);
- return;
- case spirv:
- spirv_asm
- {
- %original:$$int = OpAtomicOr &dest Device None $value;
- OpStore &original_value %original
- };
+ case hlsl: __intrinsic_asm "InterlockedXor";
}
}
+${{{{
+} // for(const char* T : {"int64_t", "uint64_t"})
+}}}}
+
+[ForceInline]
__glsl_version(430)
[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedOr(__ref uint dest, uint value, out uint original_value)
+void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value)
{
- __target_switch
+ static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
+ __target_switch
{
- case hlsl: __intrinsic_asm "InterlockedOr";
- case glsl: __intrinsic_asm "($2 = atomicOr($0, $1))";
- case cuda: __intrinsic_asm "(*$2 = atomicOr((int*)$0, $1))";
+ case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value, original_value);
+ case glsl: __glslInterlocked_compare_exchange(dest, compare_value, value, original_value);
+ case cuda: __cudaInterlocked_compare_exchange(dest, compare_value, value, original_value);
+ case spirv: __spirvInterlocked_compare_exchange(dest, compare_value, value, original_value);
case metal:
- __metalInterlocked_or(__getMetalAtomicRef(dest), value, original_value);
- return;
- case spirv:
- spirv_asm
+ if (__isTextureAccess(dest))
{
- %original:$$uint = OpAtomicOr &dest Device None $value;
- OpStore &original_value %original
- };
+ vector<int, 4> vec_compare_value = vector<int, 4>(compare_value);
+ if(__isTextureArrayAccess(dest))
+ {
+ __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest),
+ __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector<int, 4>(value), original_value);
+ }
+ else
+ {
+ __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest),
+ __extractCoordFromTextureAccess(dest), vec_compare_value, vector<int, 4>(value), original_value);
+ }
+ }
+ else
+ {
+ __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value);
+ }
+ return;
}
}
[ForceInline]
-void InterlockedOr(__ref uint64_t dest, uint64_t value)
+__glsl_version(430)
+[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
+void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, out uint original_value)
{
+ static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedOr";
+ case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value, original_value);
+ case cuda: __cudaInterlocked_compare_exchange(dest, compare_value, value, original_value);
+ case glsl: __glslInterlocked_compare_exchange(dest, compare_value, value, original_value);
+ case spirv: __spirvInterlocked_compare_exchange(dest, compare_value, value, original_value);
+ case metal:
+ if (__isTextureAccess(dest))
+ {
+ vector<uint, 4> vec_compare_value = vector<uint, 4>(compare_value);
+ if(__isTextureArrayAccess(dest))
+ {
+ __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest),
+ __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector<uint, 4>(value), original_value);
+ }
+ else
+ {
+ __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest),
+ __extractCoordFromTextureAccess(dest), vec_compare_value, vector<uint, 4>(value), original_value);
+ }
+ }
+ else
+ {
+ __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value);
+ }
+ return;
}
}
[ForceInline]
-void InterlockedOr(__ref uint64_t dest, uint64_t value, out uint64_t original_value)
+void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value)
{
+ static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedOr";
+ case hlsl: __hlslInterlocked_compare_exchange_float_bitwise(dest, compare_value, value);
+ case metal:
+ static_assert(!__isTextureAccess(dest), "float atomic texture operations are disallowed with Metal target's");
+ __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value);
+ return;
}
}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedXor(__ref int dest, int value)
+[ForceInline]
+void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value, out float original_value)
{
+ static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedXor";
- case cuda: __intrinsic_asm "atomicXor((int*)$0, $1)";
- case glsl: __intrinsic_asm "$atomicXor($A, $1)";
+ case hlsl: __hlslInterlocked_compare_exchange_float_bitwise(dest, compare_value, value, original_value);
case metal:
- __metalInterlocked_xor(__getMetalAtomicRef(dest), value);
+ static_assert(!__isTextureAccess(dest), "float atomic texture operations are disallowed with Metal target's");
+ __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value);
return;
- case spirv:
- spirv_asm
- {
- result:$$int = OpAtomicXor &dest Device None $value;
- };
}
}
+[ForceInline]
__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedXor(__ref uint dest, uint value)
+[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)]
+void InterlockedCompareStore(__ref int dest, int compare_value, int value)
{
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedXor";
- case cuda: __intrinsic_asm "atomicXor((int*)$0, $1)";
- case glsl: __intrinsic_asm "$atomicXor($A, $1)";
- case metal:
- __metalInterlocked_xor(__getMetalAtomicRef(dest), value);
- return;
+ case hlsl: __intrinsic_asm "InterlockedCompareStore";
+ case glsl: __intrinsic_asm "$atomicCompSwap($A, $1, $2)";
+ case cuda: __intrinsic_asm "atomicCAS($0, $1, $2)";
case spirv:
spirv_asm
{
- result:$$uint = OpAtomicXor &dest Device None $value;
+ result:$$int = OpAtomicCompareExchange &dest Device None None $value $compare_value;
};
}
}
+[ForceInline]
__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedXor(__ref int dest, int value, out int original_value)
+[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda)]
+void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value)
{
- __target_switch
+ __target_switch
{
- case hlsl: __intrinsic_asm "InterlockedXor";
- case glsl: __intrinsic_asm "($2 = atomicXor($0, $1))";
- case cuda: __intrinsic_asm "(*$2 = atomicXor($0, $1))";
- case metal:
- __metalInterlocked_xor(__getMetalAtomicRef(dest), value, original_value);
- return;
+ case hlsl: __intrinsic_asm "InterlockedCompareStore";
+ case glsl: __intrinsic_asm "$atomicCompSwap($A, $1, $2)";
+ case cuda: __intrinsic_asm "atomicCAS((int*)$0, $1, $2)";
case spirv:
spirv_asm
{
- %original:$$int = OpAtomicXor &dest Device None $value;
- OpStore &original_value %original
+ result:$$uint = OpAtomicCompareExchange &dest Device None None $value $compare_value;
};
}
}
-__glsl_version(430)
-[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda)]
-void InterlockedXor(__ref uint dest, uint value, out uint original_value)
+[ForceInline]
+void InterlockedCompareStoreFloatBitwise(__ref float dest, float compare_value, float value)
{
- __target_switch
+ __target_switch
{
- case hlsl: __intrinsic_asm "InterlockedXor";
- case glsl: __intrinsic_asm "($2 = atomicXor($0, $1))";
- case cuda: __intrinsic_asm "(*$2 = (uint)atomicXor((int*)$0, $1))";
- case metal:
- __metalInterlocked_xor(__getMetalAtomicRef(dest), value, original_value);
- return;
- case spirv:
- spirv_asm
- {
- %original:$$uint = OpAtomicXor &dest Device None $value;
- OpStore &original_value %original
- };
+ case hlsl: __intrinsic_asm "InterlockedCompareStoreFloatBitwise";
}
}
+
[ForceInline]
-void InterlockedXor(__ref uint64_t dest, uint64_t value)
+void InterlockedExchange(__ref float dest, float value)
{
+ static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedXor";
+ case hlsl: __hlslInterlocked_exchange(dest, value);
+ case metal:
+ static_assert(!__isTextureAccess(dest), "'float' atomic texture operations are disallowed with Metal target's");
+ __metalInterlocked_exchange(__getMetalAtomicRef(dest), value);
+ return;
}
}
[ForceInline]
-void InterlockedXor(__ref uint64_t dest, uint64_t value, out uint64_t original_value)
+void InterlockedExchange(__ref float dest, float value, out float original_value)
{
+ static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture");
__target_switch
{
- case hlsl: __intrinsic_asm "InterlockedXor";
+ case hlsl: __hlslInterlocked_exchange(dest, value, original_value);
+ case metal:
+ static_assert(!__isTextureAccess(dest), "'float' atomic texture operations are disallowed with Metal target's");
+ __metalInterlocked_exchange(__getMetalAtomicRef(dest), value, original_value);
+ return;
}
}
+
// Is floating-point value finite?
__generic<T : __BuiltinFloatingPointType>
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index 66ee12ca6..7ce2c7900 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -2566,10 +2566,15 @@ void CLikeSourceEmitter::defaultEmitInstExpr(IRInst* inst, const EmitOpInfo& inO
emitOperand(inst->getOperand(1), rightSide(outerPrec, prec));
break;
}
+
+ case kIROp_ImageSubscript:
+ // We should have legalized ImageSubscript before emit for metal targets
+ if (isMetalTarget(this->getTargetReq()))
+ getSink()->diagnose(inst, Diagnostics::unimplemented, "kIROp_ImageSubscript is unimplemented for Metal, expected legalization beforehand");
+ [[fallthrough]];
case kIROp_GetElement:
case kIROp_MeshOutputRef:
case kIROp_GetElementPtr:
- case kIROp_ImageSubscript:
// HACK: deal with translation of GLSL geometry shader input arrays.
if(auto decoration = inst->getOperand(0)->findDecoration<IRGLSLOuterArrayDecoration>())
{
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index d8f0686d5..243dd65e8 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -50,7 +50,9 @@
#include "slang-ir-lower-l-value-cast.h"
#include "slang-ir-lower-reinterpret.h"
#include "slang-ir-loop-unroll.h"
+#include "slang-ir-legalize-extract-from-texture-access.h"
#include "slang-ir-legalize-image-subscript.h"
+#include "slang-ir-legalize-is-texture-access.h"
#include "slang-ir-legalize-vector-types.h"
#include "slang-ir-metadata.h"
#include "slang-ir-optix-entry-point-uniforms.h"
@@ -907,6 +909,9 @@ Result linkAndOptimizeIR(
legalizeVectorTypes(irModule, sink);
+ // Legalize `__isTextureAccess` and related.
+ legalizeIsTextureAccess(irModule);
+
// Once specialization and type legalization have been performed,
// we should perform some of our basic optimization steps again,
// to see if we can clean up any temporaries created by legalization.
@@ -1154,9 +1159,13 @@ Result linkAndOptimizeIR(
if(isD3DTarget(targetRequest))
legalizeNonStructParameterToStructForHLSL(irModule);
+ legalizeExtractFromTextureAccess(irModule);
+
// Legalize `ImageSubscript` loads.
switch (target)
{
+ case CodeGenTarget::MetalLibAssembly:
+ case CodeGenTarget::MetalLib:
case CodeGenTarget::Metal:
case CodeGenTarget::GLSL:
case CodeGenTarget::SPIRV:
diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h
index 3132536e3..987486eae 100644
--- a/source/slang/slang-ir-inst-defs.h
+++ b/source/slang/slang-ir-inst-defs.h
@@ -687,6 +687,13 @@ INST(GetPerVertexInputArray, GetPerVertexInputArray, 1, 0)
INST(ForceVarIntoStructTemporarily, ForceVarIntoStructTemporarily, 1, 0)
INST(MetalAtomicCast, MetalAtomicCast, 1, 0)
+INST(IsTextureAccess, IsTextureAccess, 1, 0)
+INST(IsTextureScalarAccess, IsTextureScalarAccess, 1, 0)
+INST(IsTextureArrayAccess, IsTextureArrayAccess, 1, 0)
+INST(ExtractTextureFromTextureAccess, ExtractTextureFromTextureAccess, 1, 0)
+INST(ExtractCoordFromTextureAccess, ExtractCoordFromTextureAccess, 1, 0)
+INST(ExtractArrayCoordFromTextureAccess, ExtractArrayCoordFromTextureAccess, 1, 0)
+
INST(MakeArrayList, makeArrayList, 0, 0)
INST(MakeTensorView, makeTensorView, 0, 0)
INST(AllocateTorchTensor, allocTorchTensor, 0, 0)
diff --git a/source/slang/slang-ir-legalize-extract-from-texture-access.cpp b/source/slang/slang-ir-legalize-extract-from-texture-access.cpp
new file mode 100644
index 000000000..de1e244a8
--- /dev/null
+++ b/source/slang/slang-ir-legalize-extract-from-texture-access.cpp
@@ -0,0 +1,136 @@
+#include "slang-ir-legalize-extract-from-texture-access.h"
+
+#include "slang-ir.h"
+#include "slang-ir-insts.h"
+#include "slang-ir-util.h"
+#include "slang-ir-clone.h"
+#include "slang-ir-specialize-address-space.h"
+#include "slang-parameter-binding.h"
+#include "slang-ir-legalize-image-subscript.h"
+#include "slang-ir-legalize-varying-params.h"
+#include "slang-ir-simplify-cfg.h"
+
+namespace Slang
+{
+ void legalizeExtractTextureFromTextureAccess(IRBuilder& builder, IRInst* inst)
+ {
+ SLANG_ASSERT(inst);
+
+ builder.setInsertBefore(inst);
+ IRImageSubscript* imageSubscript = as<IRImageSubscript>(getRootAddr(inst->getOperand(0)));
+ SLANG_ASSERT(imageSubscript);
+ SLANG_ASSERT(imageSubscript->getImage());
+ inst->replaceUsesWith(imageSubscript->getImage());
+ inst->removeAndDeallocate();
+ // Ensure we are done processing the imageSubscript before we remove it
+ if (!imageSubscript->hasUses())
+ imageSubscript->removeAndDeallocate();
+ }
+
+ void legalizeExtractArrayCoordFromTextureAccess(IRBuilder& builder, IRInst* inst)
+ {
+ SLANG_ASSERT(inst);
+
+ builder.setInsertBefore(inst);
+ IRImageSubscript* imageSubscript = as<IRImageSubscript>(getRootAddr(inst->getOperand(0)));
+ SLANG_ASSERT(imageSubscript);
+ SLANG_ASSERT(imageSubscript->getImage());
+
+ auto image = as<IRTextureType>(imageSubscript->getImage()->getDataType());
+ IRInst* coord = imageSubscript->getCoord();
+ if(image->isArray())
+ {
+ // Extract final element which is 'ArrayCoord'
+ IRVectorType* coordType = as<IRVectorType>(imageSubscript->getCoord()->getDataType());
+ SLANG_ASSERT(coordType);
+ auto coordSize = getIRVectorElementSize(coordType);
+
+ IRType* newArrayCoordType = coordType->getElementType();
+ auto arrayCoordLocation = coordSize - 1;
+ List<UInt> swizzleIndicies = { (UInt)arrayCoordLocation };
+
+ coord = builder.emitSwizzle(newArrayCoordType, coord, 1, swizzleIndicies.getBuffer());
+ }
+ else
+ coord = builder.getIntValue(builder.getUIntType(), 0);
+
+
+ inst->replaceUsesWith(coord);
+ inst->removeAndDeallocate();
+ // Ensure we are done processing the imageSubscript completly before we remove it
+ if (!imageSubscript->hasUses())
+ imageSubscript->removeAndDeallocate();
+ }
+
+ void legalizeExtractCoordFromTextureAccess(IRBuilder& builder, IRInst* inst)
+ {
+ SLANG_ASSERT(inst);
+
+ builder.setInsertBefore(inst);
+ IRImageSubscript* imageSubscript = as<IRImageSubscript>(getRootAddr(inst->getOperand(0)));
+ SLANG_ASSERT(imageSubscript);
+ SLANG_ASSERT(imageSubscript->getImage());
+
+ auto image = as<IRTextureType>(imageSubscript->getImage()->getDataType());
+ IRInst* coord = imageSubscript->getCoord();
+ if(image->isArray())
+ {
+ // Extract all but final element which is 'ArrayCoord'
+ IRVectorType* coordType = as<IRVectorType>(imageSubscript->getCoord()->getDataType());
+ auto coordSize = getIRVectorElementSize(coordType);
+ SLANG_ASSERT(coordType);
+
+ IRType* newCoordType = nullptr;
+ auto newCoordSize = coordSize - 1;
+ if(newCoordSize != 1)
+ newCoordType = builder.getVectorType(coordType->getElementType(), newCoordSize);
+ else
+ newCoordType = coordType->getElementType();
+ List<UInt> swizzleIndicies = {1, 2, 3, 4};
+
+ coord = builder.emitSwizzle(newCoordType, coord, newCoordSize, swizzleIndicies.getBuffer());
+ }
+
+ inst->replaceUsesWith(coord);
+ inst->removeAndDeallocate();
+ // Ensure we are done processing the imageSubscript completly before we remove it
+ if (!imageSubscript->hasUses())
+ imageSubscript->removeAndDeallocate();
+ }
+
+ void legalizeExtractFromTextureAccess(IRModule* module)
+ {
+ IRBuilder builder(module);
+ for (auto globalInst : module->getModuleInst()->getChildren())
+ {
+ auto func = as<IRFunc>(globalInst);
+ if (!func)
+ continue;
+ for (auto block : func->getBlocks())
+ {
+ auto inst = block->getFirstInst();
+ IRInst* next;
+ for ( ; inst; inst = next)
+ {
+ next = inst->getNextInst();
+ switch (inst->getOp())
+ {
+ case kIROp_ExtractArrayCoordFromTextureAccess:
+ if (as<IRImageSubscript>(getRootAddr(inst->getOperand(0))))
+ legalizeExtractArrayCoordFromTextureAccess(builder, inst);
+ continue;
+ case kIROp_ExtractCoordFromTextureAccess:
+ if (as<IRImageSubscript>(getRootAddr(inst->getOperand(0))))
+ legalizeExtractCoordFromTextureAccess(builder, inst);
+ continue;
+ case kIROp_ExtractTextureFromTextureAccess:
+ if (as<IRImageSubscript>(getRootAddr(inst->getOperand(0))))
+ legalizeExtractTextureFromTextureAccess(builder, inst);
+ continue;
+ }
+ }
+ }
+ }
+ }
+}
+
diff --git a/source/slang/slang-ir-legalize-extract-from-texture-access.h b/source/slang/slang-ir-legalize-extract-from-texture-access.h
new file mode 100644
index 000000000..016c86def
--- /dev/null
+++ b/source/slang/slang-ir-legalize-extract-from-texture-access.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "slang-ir.h"
+#include "slang-compiler.h"
+
+namespace Slang
+{
+ class DiagnosticSink;
+
+ void legalizeExtractFromTextureAccess(IRModule* module);
+}
diff --git a/source/slang/slang-ir-legalize-image-subscript.cpp b/source/slang/slang-ir-legalize-image-subscript.cpp
index b5b240675..4c7de2b95 100644
--- a/source/slang/slang-ir-legalize-image-subscript.cpp
+++ b/source/slang/slang-ir-legalize-image-subscript.cpp
@@ -15,16 +15,8 @@ namespace Slang
SLANG_ASSERT(storeInst);
builder.setInsertBefore(storeInst);
- IRImageSubscript* imageSubscript = nullptr;
auto getElementPtr = as<IRGetElementPtr>(storeInst->getOperand(0));
- if(getElementPtr)
- {
- imageSubscript = as<IRImageSubscript>(getElementPtr->getBase());
- }
- else
- {
- imageSubscript = as<IRImageSubscript>(storeInst->getOperand(0));
- }
+ IRImageSubscript* imageSubscript = as<IRImageSubscript>(getRootAddr(storeInst->getOperand(0)));
SLANG_ASSERT(imageSubscript);
SLANG_ASSERT(imageSubscript->getImage());
IRTextureType* textureType = as<IRTextureType>(imageSubscript->getImage()->getFullType());
@@ -190,10 +182,9 @@ namespace Slang
{
case kIROp_Store:
case kIROp_SwizzledStore:
- if (getRootAddr(inst->getOperand(0))->getOp() == kIROp_ImageSubscript)
- {
+ if (as<IRImageSubscript>(getRootAddr(inst->getOperand(0))))
legalizeStore(target, builder, inst, sink);
- }
+ continue;
}
}
}
diff --git a/source/slang/slang-ir-legalize-is-texture-access.cpp b/source/slang/slang-ir-legalize-is-texture-access.cpp
new file mode 100644
index 000000000..929da591b
--- /dev/null
+++ b/source/slang/slang-ir-legalize-is-texture-access.cpp
@@ -0,0 +1,84 @@
+#include "slang-ir-legalize-is-texture-access.h"
+
+#include "slang-ir.h"
+#include "slang-ir-insts.h"
+#include "slang-ir-util.h"
+#include "slang-ir-clone.h"
+#include "slang-ir-specialize-address-space.h"
+#include "slang-parameter-binding.h"
+#include "slang-ir-legalize-image-subscript.h"
+#include "slang-ir-legalize-varying-params.h"
+#include "slang-ir-simplify-cfg.h"
+
+namespace Slang
+{
+ IRImageSubscript* getTextureAccess(IRInst* inst)
+ {
+ return as<IRImageSubscript>(getRootAddr(inst->getOperand(0)));
+ }
+
+ void legalizeIsTextureAccess(IRModule* module)
+ {
+ HashSet<IRFunc*> functionsToSimplifyCFG;
+ IRBuilder builder(module);
+ for (auto globalInst : module->getModuleInst()->getChildren())
+ {
+ auto func = as<IRFunc>(globalInst);
+ if (!func)
+ continue;
+ for (auto block : func->getBlocks())
+ {
+ auto inst = block->getFirstInst();
+ IRInst* next;
+ for ( ; inst; inst = next)
+ {
+ next = inst->getNextInst();
+ switch (inst->getOp())
+ {
+ case kIROp_IsTextureAccess:
+ if (getTextureAccess(inst))
+ inst->replaceUsesWith(builder.getBoolValue(true));
+ else
+ {
+ inst->replaceUsesWith(builder.getBoolValue(false));
+ functionsToSimplifyCFG.add(func);
+ }
+ inst->removeAndDeallocate();
+ continue;
+ case kIROp_IsTextureArrayAccess:
+ {
+ auto textureAccess = getTextureAccess(inst);
+ if (textureAccess && as<IRTextureType>(textureAccess->getImage()->getDataType())->isArray())
+ inst->replaceUsesWith(builder.getBoolValue(true));
+ else
+ {
+ inst->replaceUsesWith(builder.getBoolValue(false));
+ functionsToSimplifyCFG.add(func);
+ }
+ inst->removeAndDeallocate();
+ continue;
+ }
+ case kIROp_IsTextureScalarAccess:
+ {
+ auto textureAccess = getTextureAccess(inst);
+ if (textureAccess && !as<IRVectorType>(as<IRTextureType>(textureAccess->getImage()->getDataType())->getElementType()))
+ inst->replaceUsesWith(builder.getBoolValue(true));
+ else
+ {
+ inst->replaceUsesWith(builder.getBoolValue(false));
+ functionsToSimplifyCFG.add(func);
+ }
+ inst->removeAndDeallocate();
+ continue;
+ }
+ }
+ }
+ }
+ }
+ // Requires a simplifyCFG to ensure Slang does not evaluate 'IRTextureType' code path for
+ // 'inst' for when 'inst' is not a 'IRTextureType'/TextureAccessor
+ for(auto func : functionsToSimplifyCFG)
+ simplifyCFG(func, CFGSimplificationOptions::getFast());
+ }
+}
+
diff --git a/source/slang/slang-ir-legalize-is-texture-access.h b/source/slang/slang-ir-legalize-is-texture-access.h
new file mode 100644
index 000000000..eccfe8fcb
--- /dev/null
+++ b/source/slang/slang-ir-legalize-is-texture-access.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "slang-ir.h"
+#include "slang-compiler.h"
+
+namespace Slang
+{
+ class DiagnosticSink;
+
+ void legalizeIsTextureAccess(IRModule* module);
+}