diff options
| author | Tim Foley <tfoleyNV@users.noreply.github.com> | 2020-09-02 09:51:25 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-09-02 09:51:25 -0700 |
| commit | 7f567df6937b33c653c424af3abb20d32eb80561 (patch) | |
| tree | 34952785f3e4f924ba252e5758cccb1db7b218b2 | |
| parent | c2873f406d544057e0ec61e61fb8580ca768e493 (diff) | |
Add support for (undocumented) HLSL 16-bit bit-cast ops (#1528)
As of SM 6.2, the dxc compiler added support for a set of 16-bit bit-cast operations to mirror the `asuint`, `asfloat`, and `asint` operations that were provided for 32-bit scalar types. These operations are not publicly documented, so we didn't think to add them.
It should be noted that there was already a similar operation in HLSL, called `f32tof16`, that took as input a `float` and then packed a half-precision version of it into the low bits of a `uint`. The problem is that using that operation for `half`->`uint16_t` conversion required a round trip through a `float`, and downstream compilers seemingly can't optimize away that conversion.
This change adds the new operations along with a test that tries to make use of them to ensure the results are what is expected. There are enough cases to cover that I had to write the test in a way where each thread only writes out a subset of the required output.
There are two other changes here are that are not directly related to the main feature:
First, it seems like the `[__forceInlineEarly]` attribute on some of these overloads interacts poorly with generics, and results in an `IRVectorType` appearing at local scope in the output code. That is semantically reasonable given our IR model, but it would ideally be something that gets eliminated as a result of deduplication of types. For now I've introduced a slight hack to make types always get inlined into their use sites during emission, which should handle the case of locally-defined types. I'm not 100% happy with that solution, but it seemed better than introducing a bunch of unrelated fixes into this PR.
Second, the way that conversion operations were being declared for matrix types seems to have been incorrect: we had a single *explicit* initializer added to matrix types via an `extension` that allowed them to be initialized from other matrix types with the same size and *any* element type. In order to support implicit conversions of matrix types, I cribbed the code we were already using to introduce implicit conversion operations for vector types.
| -rw-r--r-- | source/slang/core.meta.slang | 34 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 65 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.cpp | 12 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/bit-cast/bit-cast-16-bit.slang | 103 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/bit-cast/bit-cast-16-bit.slang.expected.txt | 12 |
5 files changed, 219 insertions, 7 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index 0eaf5cb1a..62039992e 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -520,13 +520,6 @@ for( int C = 2; C <= 4; ++C ) } sb << ");\n"; - - // initialize from another matrix of the same size - // - // TODO(tfoley): See comment about how this overlaps - // with implicit conversion, in the `vector` case above - sb << "__generic<U> __init(matrix<U," << R << ", " << C << ">);\n"; - // initialize from a matrix of larger size for(int rr = R; rr <= 4; ++rr) for( int cc = C; cc <= 4; ++cc ) @@ -537,6 +530,33 @@ for( int C = 2; C <= 4; ++C ) sb << "}\n"; } + +for (int tt = 0; tt < kBaseTypeCount; ++tt) +{ + if(kBaseTypes[tt].tag == BaseType::Void) continue; + auto toType = kBaseTypes[tt].name; +}}}} +__generic<let R : int, let C : int> extension matrix<$(toType),R,C> +{ +${{{{ + for (int ff = 0; ff < kBaseTypeCount; ++ff) + { + if(kBaseTypes[ff].tag == BaseType::Void) continue; + if( tt == ff ) continue; + + auto cost = getBaseTypeConversionCost( + kBaseTypes[tt], + kBaseTypes[ff]); + auto fromType = kBaseTypes[ff].name; +}}}} + __implicit_conversion($(cost)) + __init(matrix<$(fromType),R,C> value); +${{{{ + } +}}}} +} +${{{{ +} }}}} diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index c89ab8ff9..aeadf8eba 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -990,6 +990,71 @@ __generic<let N : int, let M : int> matrix<uint,N,M> asuint(matrix<uint,N,M> x) { return x; } + +// 16-bit bitcast ops (HLSL SM 6.2) +// +// TODO: We need to map these to GLSL/SPIR-V +// operations that don't require an intermediate +// conversion to fp32. + +// Identity cases: + +[__unsafeForceInlineEarly] float16_t asfloat16(float16_t value) { return value; } +[__unsafeForceInlineEarly] vector<float16_t,N> asfloat16<let N : int>(vector<float16_t,N> value) { return value; } +[__unsafeForceInlineEarly] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<float16_t,R,C> value) { return value; } + +[__unsafeForceInlineEarly] int16_t asint16(int16_t value) { return value; } +[__unsafeForceInlineEarly] vector<int16_t,N> asint16<let N : int>(vector<int16_t,N> value) { return value; } +[__unsafeForceInlineEarly] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return value; } + +[__unsafeForceInlineEarly] uint16_t asuint16(uint16_t value) { return value; } +[__unsafeForceInlineEarly] vector<uint16_t,N> asuint16<let N : int>(vector<uint16_t,N> value) { return value; } +[__unsafeForceInlineEarly] matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<uint16_t,R,C> value) { return value; } + +// Signed<->unsigned cases: + +[__unsafeForceInlineEarly] int16_t asint16(uint16_t value) { return value; } +[__unsafeForceInlineEarly] vector<int16_t,N> asint16<let N : int>(vector<uint16_t,N> value) { return value; } +[__unsafeForceInlineEarly] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<uint16_t,R,C> value) { return value; } + +[__unsafeForceInlineEarly] uint16_t asuint16(int16_t value) { return value; } +[__unsafeForceInlineEarly] vector<uint16_t,N> asuint16<let N : int>(vector<int16_t,N> value) { return value; } +[__unsafeForceInlineEarly] matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return value; } + +// Float->unsigned cases: + +__target_intrinsic(hlsl) +__target_intrinsic(glsl, "uint16_t(packHalf2x16(vec2($0, 0.0)))") +uint16_t asuint16(float16_t value); + +vector<uint16_t,N> asuint16<let N : int>(vector<float16_t,N> value) +{ VECTOR_MAP_UNARY(uint16_t, N, asuint16, value); } + +matrix<uint16_t,R,C> asuint16<let R : int, let C : int>(matrix<float16_t,R,C> value) +{ MATRIX_MAP_UNARY(uint16_t, R, C, asuint16, value); } + +// Unsigned->float cases: + +__target_intrinsic(hlsl) +__target_intrinsic(glsl, "float16_t(unpackHalf2x16($0).x)") +float16_t asfloat16(uint16_t value); + +vector<float16_t,N> asfloat16<let N : int>(vector<uint16_t,N> value) +{ VECTOR_MAP_UNARY(float16_t, N, asfloat16, value); } + +matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<uint16_t,R,C> value) +{ MATRIX_MAP_UNARY(float16_t, R, C, asfloat16, value); } + +// Float<->signed cases: + +__target_intrinsic(hlsl) [__unsafeForceInlineEarly] int16_t asint16(float16_t value) { return asuint16(value); } +__target_intrinsic(hlsl) [__unsafeForceInlineEarly] vector<int16_t,N> asint16<let N : int>(vector<float16_t,N> value) { return asuint16(value); } +__target_intrinsic(hlsl) [__unsafeForceInlineEarly] matrix<int16_t,R,C> asint16<let R : int, let C : int>(matrix<float16_t,R,C> value) { return asuint16(value); } + +__target_intrinsic(hlsl) [__unsafeForceInlineEarly] float16_t asfloat16(int16_t value) { return asfloat16(asuint16(value)); } +__target_intrinsic(hlsl) [__unsafeForceInlineEarly] vector<float16_t,N> asfloat16<let N : int>(vector<int16_t,N> value) { return asfloat16(asuint16(value)); } +__target_intrinsic(hlsl) [__unsafeForceInlineEarly] matrix<float16_t,R,C> asfloat16<let R : int, let C : int>(matrix<int16_t,R,C> value) { return asfloat16(asuint16(value)); } + // Inverse tangent (HLSL SM 1.0) __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index d85085639..4b9c01c55 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -974,6 +974,18 @@ bool CLikeSourceEmitter::shouldFoldInstIntoUseSites(IRInst* inst) // for temporary variables. auto type = inst->getDataType(); + // We treat instructions that yield a type as things we should *always* fold. + // + // TODO: In general, at the point where we emit code we do not expect to + // find types being constructed locally (inside function bodies), but this + // can end up happening because of interaction between different features. + // Notably, if a generic function gets force-inlined early in codegen, + // then any types it constructs will be inlined into the body of the caller + // by default. + // + if(as<IRType>(inst) || as<IRTypeKind>(type)) + return true; + // Unwrap any layers of array-ness from the type, so that // we can look at the underlying data type, in case we // should *never* expose a value of that type diff --git a/tests/hlsl-intrinsic/bit-cast/bit-cast-16-bit.slang b/tests/hlsl-intrinsic/bit-cast/bit-cast-16-bit.slang new file mode 100644 index 000000000..66f015172 --- /dev/null +++ b/tests/hlsl-intrinsic/bit-cast/bit-cast-16-bit.slang @@ -0,0 +1,103 @@ +// bit-cast-16-bit.slang + +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile sm_6_2 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute + +//TEST_INPUT:ubuffer(data=[0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], stride=4):name inputBuffer +RWStructuredBuffer<int> inputBuffer; + +int16_t readI(inout int index) { return inputBuffer[(index++) & 0xF]; } +uint16_t readU(inout int index) { return inputBuffer[(index++) & 0xF]; } +float16_t readF(inout int index) { return float(inputBuffer[(index++) & 0xF]); } + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer +RWStructuredBuffer<int> outputBuffer; + +void write(int initial, inout int index, uint value) +{ + let tmp = index++; + if((tmp & 3) == initial) + { + outputBuffer[tmp & 0xF] = value; + } +} + +void write(int initial, inout int index, uint16_t value) +{ + write(initial, index, uint(value)); +} + +void write(int initial, inout int index, int16_t value) +{ + write(initial, index, uint(int(value))); +} + +void write(int initial, inout int index, float16_t value) +{ + write(initial, index, asuint(float(value))); +} + + +void test(int initial) +{ + int input = initial; + int output = 0; + + // Scalar + { + let i = readI(input); + let u = readU(input); + let f = readF(input); + + // int->float + let a = asfloat16(i); + write(initial, output, a); + + // float->uint + let b = asuint16(f); + write(initial, output, b); + + // uint->int + let c = asint16(u); + write(initial, output, c); + + // float->float + let d = asfloat16(f); + write(initial, output, d); + } + + // Vector + { + let i = int16_t2(readI(input), readI(input)); + let u = uint16_t2(readU(input), readU(input)); + let f = float16_t2(readF(input), readF(input)); + + // uint->float + let a = asfloat16(u); + write(initial, output, a.x); + write(initial, output, a.y); + + // float->int + let b = asint16(f); + write(initial, output, b.x); + write(initial, output, b.y); + + // int->uint + let c = asuint16(i); + write(initial, output, c.x); + write(initial, output, c.y); + + // int->int + let d = asint16(i); + write(initial, output, d.x); + write(initial, output, d.y); + } + +} + + +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + test(dispatchThreadID.x); +} diff --git a/tests/hlsl-intrinsic/bit-cast/bit-cast-16-bit.slang.expected.txt b/tests/hlsl-intrinsic/bit-cast/bit-cast-16-bit.slang.expected.txt new file mode 100644 index 000000000..5d8824457 --- /dev/null +++ b/tests/hlsl-intrinsic/bit-cast/bit-cast-16-bit.slang.expected.txt @@ -0,0 +1,12 @@ +0 +4200 +3 +40A00000 +34A00000 +34E00000 +4880 +4980 +3 +5 +5 +7 |
