From e510a287deb25f2542a68bf21382f2557740d70c Mon Sep 17 00:00:00 2001
From: jsmall-nvidia <jsmall@nvidia.com>
Date: Thu, 6 May 2021 12:45:00 -0400
Subject: Half texture support (#1836)

* #include an absolute path didn't work - because paths were taken to always be relative.

* Split out StringEscapeUtil.

* Added StringEscapeUtil.

* Fix typo in unix quoting type.

* Small comment improvements.

* Try to fix linux linking issue.

* Fix typo.

* Attempt to fix linux link issue.

* Update VS proj even though nothing really changed.

* Fix another typo issue.

* Fix for windows issue.
Fixed bug.

* Make separate Utils for escaping.

* Fix typo.

* Split out into StringEscapeHandler.

* Windows shell does handle removing quotes (so remove code to remove them).

* Handle unescaping if not initiating using the shell.

* Slight improvement around shell like decoding.

* Simplify command extraction.

* Add shared-library category type.

* Fix bug in command extraction.

* Typo in transcendental category.

* Enable unit-test on in smoke test category.

* Make parsing failing output as a failing test.

* Fixes for transcendental tests. Disable tests that do not work.

* Changed category parsing.

* Removed the TestResult parameter from _gatherTestsForFile.
Made testsList only output.

* Remove testing if all tests were disabled.

* Make args of CommandLine always unescaped.

* Add category.

* Don't need escaping on unix/linux.

* Remove some no longer used functions.

* Add requireSMVersion to CUDAExtensionTracker.

* half-calc.slang now works for CUDA.

* bit-cast-16-bit works on CUDA.

* WIP handling of CUDA vector<half> types.

* Half swizzle CUDA.

* Half vector test.

* Fix swizzle half bug.

* Fix compilation issue with narrowing to Index.

* Add unary ops.

* Add some vector scalar maths ops.

* Add half vector conversions for CUDA.

* Fix erroneous comment.

* Support for half comparisons.

* First pass test for half compare.

* Fix bug in CUDA specialized emit control.
Updated tests to have pre and post inc/dec.

* Removed unneeded parts of the cuda prelude.

* Half structured buffer works on CUDA.

* Added name lookup for Gfx::Format

* Support half texture type in test system.

* Test for half reading on CUDA.

* Add half formats to Vk and D3D utils.

* Fix getAt for CUDA - where there might not be a .x member in a vector.
---
 tools/gfx/cpu/render-cpu.cpp | 73 ++++++++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 29 deletions(-)

(limited to 'tools/gfx/cpu/render-cpu.cpp')
diff --git a/tools/gfx/cpu/render-cpu.cpp b/tools/gfx/cpu/render-cpu.cpp
index 2d7d858c4..0bdc06ad6 100644
--- a/tools/gfx/cpu/render-cpu.cpp
+++ b/tools/gfx/cpu/render-cpu.cpp
@@ -89,6 +89,18 @@ void _unpackFloatTexel(void const* texelData, void* outData, size_t outSize)
     memcpy(outData, temp, outSize);
 }
 
+template<int N>
+void _unpackFloat16Texel(void const* texelData, void* outData, size_t outSize)
+{
+    auto input = (int16_t const*)texelData;
+
+    float temp[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
+    for (int i = 0; i < N; ++i)
+        temp[i] = HalfToFloat(input[i]);
+
+    memcpy(outData, temp, outSize);
+}
+
 static inline float _unpackUnorm8Value(uint8_t value)
 {
     return value / 255.0f;
@@ -143,42 +155,45 @@ void _unpackUInt32Texel(void const* texelData, void* outData, size_t outSize)
     memcpy(outData, temp, outSize);
 }
 
-#define TEXTURE_FORMAT_INFO(FORMAT) static const CPUTextureFormatInfo kCPUTextureFormatInfo_##FORMAT
+struct CPUFormatInfoMap
+{
+    CPUFormatInfoMap()
+    {
+        memset(m_infos, 0, sizeof(m_infos));
 
-TEXTURE_FORMAT_INFO(RGBA_Float32)      = { &_unpackFloatTexel<4> };
-TEXTURE_FORMAT_INFO(RGB_Float32)       = { &_unpackFloatTexel<3> };
-TEXTURE_FORMAT_INFO(RG_Float32)        = { &_unpackFloatTexel<2> };
-TEXTURE_FORMAT_INFO(R_Float32)         = { &_unpackFloatTexel<1> };
-TEXTURE_FORMAT_INFO(RGBA_Unorm_UInt8)  = { &_unpackUnorm8Texel<4> };
-TEXTURE_FORMAT_INFO(BGRA_Unorm_UInt8)  = { &_unpackUnormBGRA8Texel };
-TEXTURE_FORMAT_INFO(R_UInt16)          = { &_unpackUInt16Texel<1> };
-TEXTURE_FORMAT_INFO(R_UInt32)          = { &_unpackUInt32Texel<1> };
-TEXTURE_FORMAT_INFO(D_Float32)         = { &_unpackFloatTexel<1> };
+        set(Format::RGBA_Float32, &_unpackFloatTexel<4>);
+        set(Format::RGB_Float32, &_unpackFloatTexel<3>);
 
-#undef TEXTURE_FORMAT_INFO
+        set(Format::RG_Float32, &_unpackFloatTexel<2>);
+        set(Format::R_Float32, &_unpackFloatTexel<1>);
 
-static CPUTextureFormatInfo const* _getFormatInfo(Format format)
-{
-    switch(format)
+        set(Format::RGBA_Float16, &_unpackFloat16Texel<4>);
+        set(Format::RG_Float16, &_unpackFloat16Texel<2>);
+        set(Format::R_Float16, &_unpackFloat16Texel<1>);
+
+        set(Format::RGBA_Unorm_UInt8, &_unpackUnorm8Texel<4>);
+        set(Format::BGRA_Unorm_UInt8, &_unpackUnormBGRA8Texel);
+        set(Format::R_UInt16, &_unpackUInt16Texel<1>);
+        set(Format::R_UInt32, &_unpackUInt32Texel<1>);
+        set(Format::D_Float32, &_unpackFloatTexel<1>);
+    }
+
+    void set(Format format, CPUTextureUnpackFunc func)
     {
-    case Format::D_Unorm24_S8:
-    default:
-        return nullptr;
+        auto& info = m_infos[Index(format)];
+        info.unpackFunc = func;
+    }
+    SLANG_FORCE_INLINE const CPUTextureFormatInfo& get(Format format) const { return m_infos[Index(format)]; }
 
+    CPUTextureFormatInfo m_infos[Index(Format::CountOf)];
+};
 
-#define CASE(FORMAT) case Format::FORMAT: return &kCPUTextureFormatInfo_##FORMAT;
-    CASE(RGBA_Float32)
-    CASE(RGB_Float32)
-    CASE(RG_Float32)
-    CASE(R_Float32)
-    CASE(RGBA_Unorm_UInt8)
-    CASE(BGRA_Unorm_UInt8)
-    CASE(R_UInt16)
-    CASE(R_UInt32)
-    CASE(D_Float32)
+static const CPUFormatInfoMap g_formatInfoMap;
 
-#undef CASE
-    }
+static CPUTextureFormatInfo const* _getFormatInfo(Format format)
+{
+    const CPUTextureFormatInfo& info = g_formatInfoMap.get(format);
+    return info.unpackFunc ? &info : nullptr;
 }
 
 class CPUTextureResource : public TextureResource
-- 
cgit v1.2.3