Fix RWTexture issues on CUDA (#1876)

* #include an absolute path didn't work - because paths were taken to always be relative. * Re-enable CUDA RWTexture tests. Re-enable RWTexture1D test Make sure tests have only single mip for RWTexture (required for CUDA) * Fix issue with reading CUDA surface. Re-enable working CUDA RWTextureTest. Enable 1D case.
author: jsmall-nvidia <jsmall@nvidia.com> 2021-06-08 08:48:47 -0400
committer: GitHub <noreply@github.com> 2021-06-08 08:48:47 -0400
commit: fb50fab76a723f46026474ea5bb0226c297d1fd5 (patch)
tree: f489f30ccb94a130c942d907b9f009e7cc6aabd4
parent: 5974f3e543b56cd11f28093c5a9d7410b2b3f979 (diff)
7 files changed, 35 insertions, 31 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 6b73630a3..afdd96029 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -934,6 +934,12 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                                     {
                                         sb << '.' << char(i + 'x');
                                     }
+
+                                    // Surface access is *byte* addressed in x in CUDA
+                                    if (i == 0) 
+                                    {
+                                        sb << " * $E";
+                                    }
                                 }
                                 if (isArray)
                                 {
@@ -949,7 +955,9 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                                     sb << "Layered";
                                 }
                                 sb << "read";
-                                sb << "<$T0>($0, ($1).x, ($1).y, ($1).z"; 
+
+                                // Surface access is *byte* addressed in x in CUDA
+                                sb << "<$T0>($0, ($1).x * $E, ($1).y, ($1).z"; 
                                 if (isArray)
                                 {
                                     sb << ", int(($1).w)";
@@ -1090,12 +1098,12 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
                                 sb << ", ($1)";
                                 if (vecCount > 1)
                                 {
-                                    sb << '.' << char(i + 'x');
-                                    // Surface access is *byte* addressed in x in CUDA
-                                    if (i == 0)
-                                    {
-                                        sb << " * $E";
-                                    }
+                                    sb << '.' << char(i + 'x');    
+                                }
+                                // Surface access is *byte* addressed in x in CUDA
+                                if (i == 0)
+                                {
+                                    sb << " * $E";
                                 }
                             }
 
diff --git a/tests/compute/half-rw-texture-convert.slang b/tests/compute/half-rw-texture-convert.slang
index 161033637..d28ccd7b0 100644
--- a/tests/compute/half-rw-texture-convert.slang
+++ b/tests/compute/half-rw-texture-convert.slang
@@ -11,15 +11,9 @@
 //DIABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type -shaderobj
 
 // Note that this test is a little silly. The output does not confirm that the write actually worked.
-// This is because it's not trivial on CUDA to do so. If there is a write conversion, the RWTexture
-// is backed by a surface. There is a texture converting write (in sust.p) but not in reading.
-// 
-// In practice if if you wanted a texture read, you'd either only be able to read *without* format 
-// conversion, or via a separate binding of the same surface as a Texture.
-// There's no simple way to describe either, so this test just confirms it outputs PTX that can 
-// be executed, and unfortunately doesn't test if the write conversion actually *worked*
-
-//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj -render-features half
+// half-rw-texture-convert2.slang tests this
+
+//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj -render-features half
 
 //TEST_INPUT: RWTexture2D(format=R_Float16, size=4, content = one, mipMaps = 1):name rwt2D
 [format("r16f")]
diff --git a/tests/compute/half-rw-texture-simple.slang b/tests/compute/half-rw-texture-simple.slang
index e5b5ec08e..0438c6964 100644
--- a/tests/compute/half-rw-texture-simple.slang
+++ b/tests/compute/half-rw-texture-simple.slang
@@ -10,6 +10,9 @@
 //DIABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type -shaderobj
 //TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj -render-features half
 
+//TEST_INPUT: RWTexture1D(format=R_Float16, size=4, content = one, mipMaps = 1):name rwt1D
+RWTexture1D<half> rwt1D;
+
 //TEST_INPUT: RWTexture2D(format=R_Float16, size=4, content = one, mipMaps = 1):name rwt2D
 RWTexture2D<half> rwt2D;
 
@@ -26,7 +29,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     
     float val = 0.0f;
  
-    //val += rwt1D[idx];
+    val += rwt1D[idx];
     
     half h0 = rwt2D[uint2(idx, idx)];
 
diff --git a/tests/compute/half-rw-texture-simple.slang.expected.txt b/tests/compute/half-rw-texture-simple.slang.expected.txt
index c0e0cfb4b..164570278 100644
--- a/tests/compute/half-rw-texture-simple.slang.expected.txt
+++ b/tests/compute/half-rw-texture-simple.slang.expected.txt
@@ -1,5 +1,5 @@
 type: float
-5.000000
 6.000000
 7.000000
 8.000000
+9.000000
diff --git a/tests/compute/rw-texture-simple.slang b/tests/compute/rw-texture-simple.slang
index a5a103200..2752afe3a 100644
--- a/tests/compute/rw-texture-simple.slang
+++ b/tests/compute/rw-texture-simple.slang
@@ -6,14 +6,14 @@
 //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -output-using-type -shaderobj
 // TODO(JS): Doesn't work on vk currently, because createTextureView not implemented on vk renderer
 //DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type -shaderobj
-// TODO(JS): Doesn't work on certain CI systems. 
-//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj
 
-//TEST_INPUT: RWTexture1D(format=R_Float32, size=4, content = one):name rwt1D
+//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj
+
+//TEST_INPUT: RWTexture1D(format=R_Float32, size=4, content = one, mipMaps = 1):name rwt1D
 RWTexture1D<float> rwt1D;
-//TEST_INPUT: RWTexture2D(format=R_Float32, size=4, content = one):name rwt2D
+//TEST_INPUT: RWTexture2D(format=R_Float32, size=4, content = one, mipMaps = 1):name rwt2D
 RWTexture2D<float> rwt2D;
-//TEST_INPUT: RWTexture3D(format=R_Float32, size=4, content = one):name rwt3D
+//TEST_INPUT: RWTexture3D(format=R_Float32, size=4, content = one, mipMaps = 1):name rwt3D
 RWTexture3D<float> rwt3D;
 
 //TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
@@ -26,15 +26,11 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     
     float val = 0.0f;
  
-    //val += rwt1D.Load(idx);
     val += rwt1D[idx];
     val += rwt2D[uint2(idx, idx)];
     val += rwt3D[uint3(idx, idx, idx)];
     
-    // NOTE! This is disabled because on CUDA, whilst this has an effect it is not what is expected.
-    // The value read back has changed but seems to always be 1.
-    // rwt1D[idx] = idx;
-    
+    rwt1D[idx] = idx;
     rwt2D[uint2(idx, idx)] = idx;    
     rwt3D[uint3(idx, idx, idx)] = idx;    
             
diff --git a/tests/compute/rw-texture-simple.slang.expected.txt b/tests/compute/rw-texture-simple.slang.expected.txt
index 78ed77898..1d9023742 100644
--- a/tests/compute/rw-texture-simple.slang.expected.txt
+++ b/tests/compute/rw-texture-simple.slang.expected.txt
@@ -1,5 +1,5 @@
 type: float
-4.000000
+3.000000
 6.000000
-8.000000
-10.000000
+9.000000
+12.000000
diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp
index 0859524f0..64da1721d 100644
--- a/tools/gfx/cuda/render-cuda.cpp
+++ b/tools/gfx/cuda/render-cuda.cpp
@@ -1625,6 +1625,9 @@ public:
             //
             if (desc.allowedStates.contains(ResourceState::UnorderedAccess))
             {
+                // On CUDA surfaces only support a single MIP map
+                SLANG_ASSERT(desc.numMipLevels == 1);
+
                 SLANG_CUDA_RETURN_ON_FAIL(cuSurfObjectCreate(&tex->m_cudaSurfObj, &resDesc));
             }
author	jsmall-nvidia <jsmall@nvidia.com>	2021-06-08 08:48:47 -0400
committer	GitHub <noreply@github.com>	2021-06-08 08:48:47 -0400
commit	fb50fab76a723f46026474ea5bb0226c297d1fd5 (patch)
tree	f489f30ccb94a130c942d907b9f009e7cc6aabd4
parent	5974f3e543b56cd11f28093c5a9d7410b2b3f979 (diff)