From bbd6df7e5eb2c1d8811fbcd1ac37a86e2f9237bb Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Sun, 6 Jun 2021 12:43:19 -0400 Subject: Fixed issue around 4xFloat16 texture on CUDA (#1874) * #include an absolute path didn't work - because paths were taken to always be relative. * Fixes around Float16. Incorrect calculation of 'elementSize'. --- tools/gfx/cuda/render-cuda.cpp | 8 +++++--- tools/render-test/shader-input-layout.cpp | 9 ++++----- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp index 263360018..0859524f0 100644 --- a/tools/gfx/cuda/render-cuda.cpp +++ b/tools/gfx/cuda/render-cuda.cpp @@ -1229,6 +1229,8 @@ public: tex->m_cudaContext = m_context; CUresourcetype resourceType; + + // The size of the element/texel in bytes size_t elementSize = 0; // Our `ITextureResource::Desc` uses an enumeration to specify @@ -1266,7 +1268,7 @@ public: { CUarray_format format = CU_AD_FORMAT_FLOAT; int numChannels = 0; - + switch (desc.format) { case Format::RGBA_Float32: @@ -1278,7 +1280,7 @@ public: const FormatInfo info = gfxGetFormatInfo(desc.format); format = CU_AD_FORMAT_FLOAT; numChannels = info.channelCount; - elementSize = sizeof(float); + elementSize = sizeof(float) * numChannels; break; } case Format::RGBA_Float16: @@ -1288,7 +1290,7 @@ public: const FormatInfo info = gfxGetFormatInfo(desc.format); format = CU_AD_FORMAT_HALF; numChannels = info.channelCount; - elementSize = sizeof(uint16_t); + elementSize = sizeof(uint16_t) * numChannels; break; } case Format::RGBA_Unorm_UInt8: diff --git a/tools/render-test/shader-input-layout.cpp b/tools/render-test/shader-input-layout.cpp index 3ab0366a5..11356a29a 100644 --- a/tools/render-test/shader-input-layout.cpp +++ b/tools/render-test/shader-input-layout.cpp @@ -928,7 +928,6 @@ namespace renderer_test { // TODO(JS): // Bool is here, because it's not clear across APIs how bool is laid out in memory - // Float16 is here as we don't have a convert Float16 to float function laying around default: case ScalarType::None: case ScalarType::Void: @@ -1180,10 +1179,10 @@ namespace renderer_test for (Index j = 0; j < pixelCount; ++j, srcPixels += 4, dstPixels += 4) { // Copy out rgba - dstPixels[0] = FloatToHalf(srcPixels[0] * 1.0f / 255); - dstPixels[1] = FloatToHalf(srcPixels[1] * 1.0f / 255); - dstPixels[2] = FloatToHalf(srcPixels[2] * 1.0f / 255); - dstPixels[3] = FloatToHalf(srcPixels[3] * 1.0f / 255); + dstPixels[0] = srcPixels[0] * (1.0f / 255); + dstPixels[1] = srcPixels[1] * (1.0f / 255); + dstPixels[2] = srcPixels[2] * (1.0f / 255); + dstPixels[3] = srcPixels[3] * (1.0f / 255); } break; } -- cgit v1.2.3