summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2022-11-09 09:15:15 -0500
committerGitHub <noreply@github.com>2022-11-09 09:15:15 -0500
commite743ddd49045284b706cc2cbbb615acc6fe3d882 (patch)
tree71aa7e68dc38410abb86defabe7550e940882a7c
parentbf67309454032b4f92d0bc9735b608e56b16882f (diff)
f32tof16 and f16tof32 support for CPU targets (#2500)
* #include an absolute path didn't work - because paths were taken to always be relative. * Float16 support for C++/CPU based targets with f16tof32 and f32tof16. * Small correction around INF/NAN handling for f32tof16 * Small improvement to f16tof32 * Disable CUDA test for now.
-rw-r--r--prelude/slang-cpp-scalar-intrinsics.h96
-rw-r--r--tests/hlsl-intrinsic/f16tof32.slang18
-rw-r--r--tests/hlsl-intrinsic/f16tof32.slang.expected.txt8
-rw-r--r--tests/hlsl-intrinsic/f32tof16.slang37
-rw-r--r--tests/hlsl-intrinsic/f32tof16.slang.expected.txt8
5 files changed, 166 insertions, 1 deletions
diff --git a/prelude/slang-cpp-scalar-intrinsics.h b/prelude/slang-cpp-scalar-intrinsics.h
index 60f1dd278..66035260d 100644
--- a/prelude/slang-cpp-scalar-intrinsics.h
+++ b/prelude/slang-cpp-scalar-intrinsics.h
@@ -18,7 +18,6 @@ namespace SLANG_PRELUDE_NAMESPACE {
# define SLANG_PRELUDE_PI 3.14159265358979323846
#endif
-// ----------------------------- F32 -----------------------------------------
union Union32
{
@@ -34,6 +33,101 @@ union Union64
double d;
};
+// 32 bit cast conversions
+SLANG_FORCE_INLINE int32_t _bitCastFloatToInt(float f) { Union32 u; u.f = f; return u.i; }
+SLANG_FORCE_INLINE float _bitCastIntToFloat(int32_t i) { Union32 u; u.i = i; return u.f; }
+SLANG_FORCE_INLINE uint32_t _bitCastFloatToUInt(float f) { Union32 u; u.f = f; return u.u; }
+SLANG_FORCE_INLINE float _bitCastUIntToFloat(uint32_t ui) { Union32 u; u.u = ui; return u.f; }
+
+// ----------------------------- F16 -----------------------------------------
+
+
+// This impl is based on FloatToHalf that is in Slang codebase
+uint32_t f32tof16(const float value)
+{
+ const uint32_t inBits = _bitCastFloatToUInt(value);
+
+ // bits initially set to just the sign bit
+ uint32_t bits = (inBits >> 16) & 0x8000;
+ // Mantissa can't be used as is, as it holds last bit, for rounding.
+ uint32_t m = (inBits >> 12) & 0x07ff;
+ uint32_t e = (inBits >> 23) & 0xff;
+
+ if (e < 103)
+ {
+ // It's zero
+ return bits;
+ }
+ if (e == 0xff)
+ {
+ // Could be a NAN or INF. Is INF if *input* mantissa is 0.
+
+ // Remove last bit for rounding to make output mantissa.
+ m >>= 1;
+
+ // We *assume* float16/float32 signaling bit and remaining bits
+ // semantics are the same. (The signalling bit convention is target specific!).
+ // Non signal bit's usage within mantissa for a NAN are also target specific.
+
+ // If the m is 0, it could be because the result is INF, but it could also be because all the
+ // bits that made NAN were dropped as we have less mantissa bits in f16.
+
+ // To fix for this we make non zero if m is 0 and the input mantissa was not.
+ // This will (typically) produce a signalling NAN.
+ m += uint32_t(m == 0 && (inBits & 0x007fffffu));
+
+ // Combine for output
+ return (bits | 0x7c00u | m);
+ }
+ if (e > 142)
+ {
+ // INF.
+ return bits | 0x7c00u;
+ }
+ if (e < 113)
+ {
+ m |= 0x0800u;
+ bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
+ return bits;
+ }
+ bits |= ((e - 112) << 10) | (m >> 1);
+ bits += m & 1;
+ return bits;
+}
+
+static const float g_f16tof32Magic = _bitCastIntToFloat((127 + (127 - 15)) << 23);
+
+float f16tof32(const uint32_t value)
+{
+ const uint32_t sign = (value & 0x8000) << 16;
+ uint32_t exponent = (value & 0x7c00) >> 10;
+ uint32_t mantissa = (value & 0x03ff);
+
+ if (exponent == 0)
+ {
+ // If mantissa is 0 we are done, as output is 0.
+ // If it's not zero we must have a denormal.
+ if (mantissa)
+ {
+ // We have a denormal so use the magic to do exponent adjust
+ return _bitCastIntToFloat(sign | ((value & 0x7fff) << 13)) * g_f16tof32Magic;
+ }
+ }
+ else
+ {
+ // If the exponent is NAN or INF exponent is 0x1f on input.
+ // If that's the case, we just need to set the exponent to 0xff on output
+ // and the mantissa can just stay the same. If its 0 it's INF, else it is NAN and we just copy the bits
+ //
+ // Else we need to correct the exponent in the normalized case.
+ exponent = (exponent == 0x1F) ? 0xff : (exponent + (-15 + 127));
+ }
+
+ return _bitCastUIntToFloat(sign | (exponent << 23) | (mantissa << 13));
+}
+
+// ----------------------------- F32 -----------------------------------------
+
// Helpers
SLANG_FORCE_INLINE float F32_calcSafeRadians(float radians);
diff --git a/tests/hlsl-intrinsic/f16tof32.slang b/tests/hlsl-intrinsic/f16tof32.slang
new file mode 100644
index 000000000..b73ade4cf
--- /dev/null
+++ b/tests/hlsl-intrinsic/f16tof32.slang
@@ -0,0 +1,18 @@
+//TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12
+//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
+RWStructuredBuffer<float> outputBuffer;
+
+[numthreads(8, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ int idx = int(dispatchThreadID.x);
+
+ uint values[] = { 0, 0xC600, 0x4A00, 0xCE00, 0x2555, 0xA155, 0x1D55, 0x9955 };
+
+ outputBuffer[idx] = f16tof32(values[idx]);
+} \ No newline at end of file
diff --git a/tests/hlsl-intrinsic/f16tof32.slang.expected.txt b/tests/hlsl-intrinsic/f16tof32.slang.expected.txt
new file mode 100644
index 000000000..833998d8e
--- /dev/null
+++ b/tests/hlsl-intrinsic/f16tof32.slang.expected.txt
@@ -0,0 +1,8 @@
+0
+C0C00000
+41400000
+C1C00000
+3CAAA000
+BC2AA000
+3BAAA000
+BB2AA000
diff --git a/tests/hlsl-intrinsic/f32tof16.slang b/tests/hlsl-intrinsic/f32tof16.slang
new file mode 100644
index 000000000..465b2840a
--- /dev/null
+++ b/tests/hlsl-intrinsic/f32tof16.slang
@@ -0,0 +1,37 @@
+//TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12
+//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
+RWStructuredBuffer<uint> outputBuffer;
+
+[numthreads(8, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ int idx = int(dispatchThreadID.x);
+
+ // We want to test 0
+ float value = 0.0f;
+ // Produces some somewhat interesting numbers
+ if (idx != 0)
+ {
+ value = (3 << idx);
+
+ if ((idx & 1) != 0)
+ {
+ value = -value;
+ }
+
+ // Do the recip
+ if ((idx & 4) != 0)
+ {
+ value = 1.0f / value;
+ }
+ }
+
+ uint r = f32tof16(value);
+
+ outputBuffer[idx] = r;
+} \ No newline at end of file
diff --git a/tests/hlsl-intrinsic/f32tof16.slang.expected.txt b/tests/hlsl-intrinsic/f32tof16.slang.expected.txt
new file mode 100644
index 000000000..2a2175a43
--- /dev/null
+++ b/tests/hlsl-intrinsic/f32tof16.slang.expected.txt
@@ -0,0 +1,8 @@
+0
+C600
+4A00
+CE00
+2555
+A155
+1D55
+9955