From bbd6df7e5eb2c1d8811fbcd1ac37a86e2f9237bb Mon Sep 17 00:00:00 2001
From: jsmall-nvidia <jsmall@nvidia.com>
Date: Sun, 6 Jun 2021 12:43:19 -0400
Subject: Fixed issue around 4xFloat16 texture on CUDA (#1874)

* #include an absolute path didn't work - because paths were taken to always be relative.

* Fixes around Float16. Incorrect calculation of 'elementSize'.
---
 tools/gfx/cuda/render-cuda.cpp            | 8 +++++---
 tools/render-test/shader-input-layout.cpp | 9 ++++-----
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp
index 263360018..0859524f0 100644
--- a/tools/gfx/cuda/render-cuda.cpp
+++ b/tools/gfx/cuda/render-cuda.cpp
@@ -1229,6 +1229,8 @@ public:
         tex->m_cudaContext = m_context;
 
         CUresourcetype resourceType;
+
+        // The size of the element/texel in bytes
         size_t elementSize = 0;
 
         // Our `ITextureResource::Desc` uses an enumeration to specify
@@ -1266,7 +1268,7 @@ public:
         {
             CUarray_format format = CU_AD_FORMAT_FLOAT;
             int numChannels = 0;
-
+            
             switch (desc.format)
             {
             case Format::RGBA_Float32:
@@ -1278,7 +1280,7 @@ public:
                     const FormatInfo info = gfxGetFormatInfo(desc.format);
                     format = CU_AD_FORMAT_FLOAT;
                     numChannels = info.channelCount;
-                    elementSize = sizeof(float);
+                    elementSize = sizeof(float) * numChannels;
                     break;
                 }
             case Format::RGBA_Float16:
@@ -1288,7 +1290,7 @@ public:
                     const FormatInfo info = gfxGetFormatInfo(desc.format);
                     format = CU_AD_FORMAT_HALF;
                     numChannels = info.channelCount;
-                    elementSize = sizeof(uint16_t);
+                    elementSize = sizeof(uint16_t) * numChannels;
                     break;
                 }
             case Format::RGBA_Unorm_UInt8:
diff --git a/tools/render-test/shader-input-layout.cpp b/tools/render-test/shader-input-layout.cpp
index 3ab0366a5..11356a29a 100644
--- a/tools/render-test/shader-input-layout.cpp
+++ b/tools/render-test/shader-input-layout.cpp
@@ -928,7 +928,6 @@ namespace renderer_test
         {
             // TODO(JS):
             // Bool is here, because it's not clear across APIs how bool is laid out in memory
-            // Float16 is here as we don't have a convert Float16 to float function laying around
             default:
             case ScalarType::None:
             case ScalarType::Void:
@@ -1180,10 +1179,10 @@ namespace renderer_test
                             for (Index j = 0; j < pixelCount; ++j, srcPixels += 4, dstPixels += 4)
                             {
                                 // Copy out rgba
-                                dstPixels[0] = FloatToHalf(srcPixels[0] * 1.0f / 255);
-                                dstPixels[1] = FloatToHalf(srcPixels[1] * 1.0f / 255);
-                                dstPixels[2] = FloatToHalf(srcPixels[2] * 1.0f / 255);
-                                dstPixels[3] = FloatToHalf(srcPixels[3] * 1.0f / 255);
+                                dstPixels[0] = srcPixels[0] * (1.0f / 255);
+                                dstPixels[1] = srcPixels[1] * (1.0f / 255);
+                                dstPixels[2] = srcPixels[2] * (1.0f / 255);
+                                dstPixels[3] = srcPixels[3] * (1.0f / 255);
                             }
                             break;
                         }
-- 
cgit v1.2.3