diff options
| author | Tim Foley <tfoleyNV@users.noreply.github.com> | 2018-09-24 19:17:12 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-09-24 19:17:12 -0700 |
| commit | 4f979d74acf2800d7bd2b38155d2bdc47b57d54b (patch) | |
| tree | 0aba87d2213fc5e6023a8b331c80756559b5f593 /source | |
| parent | 32c8479fb964b1936564ef9cf68e434500d7b7df (diff) | |
Fixes around atomic operations (#652)
* Fixes around atomic operations
Work on #651
The existing handling of atomic operations had a few issues:
* The HLSL atomic functions (`Interlocked*`) didn't have mappings to GLSL
* Atomic operations on images weren't supported at all because the subscript operation on `RWTexture*` types didn't provide a `ref` acessor
* The HLSL atomic functions were only providing the overloads that return the previous value through an `out` parameter, and not the ones that ignore the previous value.
This change fixes these issues with the following changes:
* `RWTexture*` types now have a `ref` accessor on their subscript operation which maps to a new `imageSubscript` operation in the IR. By default this translates back to `tex[idx]` in output HLSL, but it makes a custom mapping possible for GLSL
* The `Interlocked*` function definitions were expanded to include the overloads without the `out` parameter
* GLSL translations were added for the `Interlocked*` functions. These mappings use some new customization points in the intrinsic operation emit logic to support outputting calls to either `atomic*` or `imageAtomic*` as required, and to expand an argument that is a subscript into an image as multiple arguments.
This whole approach is quite hacky, and it doesn't seem like the approach we should take in the long run.
* Fix: typo in InterlockedAnd lowering
One of the cases of `InterlockedAnd` was lowering to `atomicAnd` with a `$0` where we wanted the `$A` substitution to handle the possibility of an image.
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/core.meta.slang | 6 | ||||
| -rw-r--r-- | source/slang/core.meta.slang.h | 6 | ||||
| -rw-r--r-- | source/slang/emit.cpp | 72 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 70 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang.h | 70 | ||||
| -rw-r--r-- | source/slang/ir-inst-defs.h | 3 |
6 files changed, 217 insertions, 10 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index 56f5d8d1f..aa93863df 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -784,11 +784,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) default: sb << "__target_intrinsic(glsl, \"imageStore($0, " << ivecN << "($1), $V2)\") set;\n"; - // Note: HLSL doesn't support component-granularity access into typed UAVs, - // and also doesn't support atomic operations on them. As such, there should - // be no reason why a `ref` accessor is required here. - // - // sb << "ref;\n"; + sb << "__intrinsic_op(" << int(kIROp_ImageSubscript) << ") ref;\n"; break; } diff --git a/source/slang/core.meta.slang.h b/source/slang/core.meta.slang.h index 3b6801a25..01bb01137 100644 --- a/source/slang/core.meta.slang.h +++ b/source/slang/core.meta.slang.h @@ -799,11 +799,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt) default: sb << "__target_intrinsic(glsl, \"imageStore($0, " << ivecN << "($1), $V2)\") set;\n"; - // Note: HLSL doesn't support component-granularity access into typed UAVs, - // and also doesn't support atomic operations on them. As such, there should - // be no reason why a `ref` accessor is required here. - // - // sb << "ref;\n"; + sb << "__intrinsic_op(" << int(kIROp_ImageSubscript) << ") ref;\n"; break; } diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp index 381b3c7c8..1b527c01f 100644 --- a/source/slang/emit.cpp +++ b/source/slang/emit.cpp @@ -3164,6 +3164,77 @@ struct EmitVisitor } break; + case 'a': + { + // We have an operation that needs to lower to either + // `atomic*` or `imageAtomic*` for GLSL, depending on + // whether its first operand is a subscript into an + // array. This `$a` is the first `a` in `atomic`, + // so we will replace it accordingly. + // + // TODO: This distinction should be made earlier, + // with the front-end picking the right overload + // based on the "address space" of the argument. + + UInt argIndex = 0; + SLANG_RELEASE_ASSERT(argCount > argIndex); + + auto arg = args[argIndex].get(); + if(arg->op == kIROp_ImageSubscript) + { + Emit("imageA"); + } + else + { + Emit("a"); + } + } + break; + + case 'A': + { + // We have an operand that represents the destination + // of an atomic operation in GLSL, and it should + // be lowered based on whether it is an ordinary l-value, + // or an image subscript. In the image subscript case + // this operand will turn into multiple arguments + // to the `imageAtomic*` function. + // + + UInt argIndex = 0; + SLANG_RELEASE_ASSERT(argCount > argIndex); + + auto arg = args[argIndex].get(); + if(arg->op == kIROp_ImageSubscript) + { + if(getTarget(ctx) == CodeGenTarget::GLSL) + { + // TODO: we don't handle the multisample + // case correctly here, where the last + // component of the image coordinate needs + // to be broken out into its own argument. + // + Emit("("); + emitIROperand(ctx, arg->getOperand(0), mode); + Emit("), ("); + emitIROperand(ctx, arg->getOperand(1), mode); + Emit(")"); + } + else + { + Emit("("); + emitIROperand(ctx, arg, mode); + Emit(")"); + } + } + else + { + Emit("("); + emitIROperand(ctx, arg, mode); + Emit(")"); + } + } + break; default: SLANG_UNEXPECTED("bad format in intrinsic definition"); @@ -3542,6 +3613,7 @@ struct EmitVisitor case kIROp_getElement: case kIROp_getElementPtr: + case kIROp_ImageSubscript: // HACK: deal with translation of GLSL geometry shader input arrays. if(auto decoration = inst->getOperand(0)->findDecoration<IRGLSLOuterArrayDecoration>()) { diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 208f98278..ac993179e 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -650,31 +650,101 @@ __target_intrinsic(glsl, "groupMemoryBarrier()); (barrier()") void GroupMemoryBarrierWithGroupSync(); // Atomics + +__target_intrinsic(glsl, "$atomicAdd($A, $1)") +void InterlockedAdd(__ref int dest, int value); + +__target_intrinsic(glsl, "$atomicAdd($A, $1)") +void InterlockedAdd(__ref uint dest, uint value); + +__target_intrinsic(glsl, "($2 = $atomicAdd($A, $1))") void InterlockedAdd(__ref int dest, int value, out int original_value); + +__target_intrinsic(glsl, "($2 = $atomicAdd($A, $1))") void InterlockedAdd(__ref uint dest, uint value, out uint original_value); +__target_intrinsic(glsl, "$atomicAnd($A, $1)") +void InterlockedAnd(__ref int dest, int value); + +__target_intrinsic(glsl, "$atomicAnd($A, $1)") +void InterlockedAnd(__ref uint dest, uint value); + +__target_intrinsic(glsl, "($2 = $atomicAnd($A, $1))") void InterlockedAnd(__ref int dest, int value, out int original_value); + +__target_intrinsic(glsl, "($2 = $atomicAnd($A, $1))") void InterlockedAnd(__ref uint dest, uint value, out uint original_value); +__target_intrinsic(glsl, "($3 = $atomicCompSwap($A, $1, $2))") void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value); + +__target_intrinsic(glsl, "($3 = $atomicCompSwap($A, $1, $2))") void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, out uint original_value); +__target_intrinsic(glsl, "$atomicCompSwap($A, $1, $2)") void InterlockedCompareStore(__ref int dest, int compare_value, int value); + +__target_intrinsic(glsl, "$atomicCompSwap($A, $1, $2)") void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value); +__target_intrinsic(glsl, "$atomicExchange($A, $1)") +void InterlockedExchange(__ref int dest, int value); + +__target_intrinsic(glsl, "$atomicExchange($A, $1)") +void InterlockedExchange(__ref uint dest, uint value); + +__target_intrinsic(glsl, "($2 = $atomicExchange($A, $1))") void InterlockedExchange(__ref int dest, int value, out int original_value); + +__target_intrinsic(glsl, "($2 = $atomicExchange($A, $1))") void InterlockedExchange(__ref uint dest, uint value, out uint original_value); +__target_intrinsic(glsl, "$atomicMax($A, $1)") +void InterlockedMax(__ref int dest, int value); + +__target_intrinsic(glsl, "$atomicMax($A, $1)") +void InterlockedMax(__ref uint dest, uint value); + +__target_intrinsic(glsl, "($2 = $atomicMax($A, $1))") void InterlockedMax(__ref int dest, int value, out int original_value); + +__target_intrinsic(glsl, "($2 = $atomicMax($A, $1))") void InterlockedMax(__ref uint dest, uint value, out uint original_value); +__target_intrinsic(glsl, "$atomicMin($A, $1)") +void InterlockedMin(in out int dest, int value); + +__target_intrinsic(glsl, "$atomicMin($A, $1)") +void InterlockedMin(in out uint dest, uint value); + +__target_intrinsic(glsl, "($2 = $atomicMin($A, $1))") void InterlockedMin(in out int dest, int value, out int original_value); + +__target_intrinsic(glsl, "($2 = $atomicMin($A, $1))") void InterlockedMin(in out uint dest, uint value, out uint original_value); +__target_intrinsic(glsl, "$atomicOr($A, $1)") +void InterlockedOr(__ref int dest, int value); + +__target_intrinsic(glsl, "$atomicOr($A, $1)") +void InterlockedOr(__ref uint dest, uint value); + +__target_intrinsic(glsl, "($2 = $atomicOr($A, $1))") void InterlockedOr(__ref int dest, int value, out int original_value); + +__target_intrinsic(glsl, "($2 = $atomicOr($A, $1))") void InterlockedOr(__ref uint dest, uint value, out uint original_value); +__target_intrinsic(glsl, "$atomicXor($A, $1)") +void InterlockedXor(__ref int dest, int value); + +__target_intrinsic(glsl, "$atomicXor($A, $1)") +void InterlockedXor(__ref uint dest, uint value); + +__target_intrinsic(glsl, "($2 = $atomicXor($A, $1))") void InterlockedXor(__ref int dest, int value, out int original_value); + +__target_intrinsic(glsl, "($2 = $atomicXor($A, $1))") void InterlockedXor(__ref uint dest, uint value, out uint original_value); // Is floating-point value finite? diff --git a/source/slang/hlsl.meta.slang.h b/source/slang/hlsl.meta.slang.h index f5ed05745..967330729 100644 --- a/source/slang/hlsl.meta.slang.h +++ b/source/slang/hlsl.meta.slang.h @@ -695,31 +695,101 @@ SLANG_RAW("__target_intrinsic(glsl, \"groupMemoryBarrier()); (barrier()\")\n") SLANG_RAW("void GroupMemoryBarrierWithGroupSync();\n") SLANG_RAW("\n") SLANG_RAW("// Atomics\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"$atomicAdd($A, $1)\")\n") +SLANG_RAW("void InterlockedAdd(__ref int dest, int value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"$atomicAdd($A, $1)\")\n") +SLANG_RAW("void InterlockedAdd(__ref uint dest, uint value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"($2 = $atomicAdd($A, $1))\")\n") SLANG_RAW("void InterlockedAdd(__ref int dest, int value, out int original_value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"($2 = $atomicAdd($A, $1))\")\n") SLANG_RAW("void InterlockedAdd(__ref uint dest, uint value, out uint original_value);\n") SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"$atomicAnd($A, $1)\")\n") +SLANG_RAW("void InterlockedAnd(__ref int dest, int value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"$atomicAnd($A, $1)\")\n") +SLANG_RAW("void InterlockedAnd(__ref uint dest, uint value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"($2 = $atomicAnd($A, $1))\")\n") SLANG_RAW("void InterlockedAnd(__ref int dest, int value, out int original_value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"($2 = $atomicAnd($A, $1))\")\n") SLANG_RAW("void InterlockedAnd(__ref uint dest, uint value, out uint original_value);\n") SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"($3 = $atomicCompSwap($A, $1, $2))\")\n") SLANG_RAW("void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"($3 = $atomicCompSwap($A, $1, $2))\")\n") SLANG_RAW("void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, out uint original_value);\n") SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"$atomicCompSwap($A, $1, $2)\")\n") SLANG_RAW("void InterlockedCompareStore(__ref int dest, int compare_value, int value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"$atomicCompSwap($A, $1, $2)\")\n") SLANG_RAW("void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value);\n") SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"$atomicExchange($A, $1)\")\n") +SLANG_RAW("void InterlockedExchange(__ref int dest, int value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"$atomicExchange($A, $1)\")\n") +SLANG_RAW("void InterlockedExchange(__ref uint dest, uint value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"($2 = $atomicExchange($A, $1))\")\n") SLANG_RAW("void InterlockedExchange(__ref int dest, int value, out int original_value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"($2 = $atomicExchange($A, $1))\")\n") SLANG_RAW("void InterlockedExchange(__ref uint dest, uint value, out uint original_value);\n") SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"$atomicMax($A, $1)\")\n") +SLANG_RAW("void InterlockedMax(__ref int dest, int value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"$atomicMax($A, $1)\")\n") +SLANG_RAW("void InterlockedMax(__ref uint dest, uint value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"($2 = $atomicMax($A, $1))\")\n") SLANG_RAW("void InterlockedMax(__ref int dest, int value, out int original_value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"($2 = $atomicMax($A, $1))\")\n") SLANG_RAW("void InterlockedMax(__ref uint dest, uint value, out uint original_value);\n") SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"$atomicMin($A, $1)\")\n") +SLANG_RAW("void InterlockedMin(in out int dest, int value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"$atomicMin($A, $1)\")\n") +SLANG_RAW("void InterlockedMin(in out uint dest, uint value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"($2 = $atomicMin($A, $1))\")\n") SLANG_RAW("void InterlockedMin(in out int dest, int value, out int original_value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"($2 = $atomicMin($A, $1))\")\n") SLANG_RAW("void InterlockedMin(in out uint dest, uint value, out uint original_value);\n") SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"$atomicOr($A, $1)\")\n") +SLANG_RAW("void InterlockedOr(__ref int dest, int value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"$atomicOr($A, $1)\")\n") +SLANG_RAW("void InterlockedOr(__ref uint dest, uint value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"($2 = $atomicOr($A, $1))\")\n") SLANG_RAW("void InterlockedOr(__ref int dest, int value, out int original_value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"($2 = $atomicOr($A, $1))\")\n") SLANG_RAW("void InterlockedOr(__ref uint dest, uint value, out uint original_value);\n") SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"$atomicXor($A, $1)\")\n") +SLANG_RAW("void InterlockedXor(__ref int dest, int value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"$atomicXor($A, $1)\")\n") +SLANG_RAW("void InterlockedXor(__ref uint dest, uint value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"($2 = $atomicXor($A, $1))\")\n") SLANG_RAW("void InterlockedXor(__ref int dest, int value, out int original_value);\n") +SLANG_RAW("\n") +SLANG_RAW("__target_intrinsic(glsl, \"($2 = $atomicXor($A, $1))\")\n") SLANG_RAW("void InterlockedXor(__ref uint dest, uint value, out uint original_value);\n") SLANG_RAW("\n") SLANG_RAW("// Is floating-point value finite?\n") diff --git a/source/slang/ir-inst-defs.h b/source/slang/ir-inst-defs.h index c12a69304..43437e793 100644 --- a/source/slang/ir-inst-defs.h +++ b/source/slang/ir-inst-defs.h @@ -226,6 +226,9 @@ INST(FieldAddress, get_field_addr, 2, 0) INST(getElement, getElement, 2, 0) INST(getElementPtr, getElementPtr, 2, 0) +// "Subscript" an image at a pixel coordinate to get pointer +INST(ImageSubscript, imageSubscript, 2, 0) + // Construct a vector from a scalar // // %dst = constructVectorFromScalar %T %N %val |
