Support for reads from RWTexture<half> (#1837)

* #include an absolute path didn't work - because paths were taken to always be relative. * Split out StringEscapeUtil. * Added StringEscapeUtil. * Fix typo in unix quoting type. * Small comment improvements. * Try to fix linux linking issue. * Fix typo. * Attempt to fix linux link issue. * Update VS proj even though nothing really changed. * Fix another typo issue. * Fix for windows issue. Fixed bug. * Make separate Utils for escaping. * Fix typo. * Split out into StringEscapeHandler. * Windows shell does handle removing quotes (so remove code to remove them). * Handle unescaping if not initiating using the shell. * Slight improvement around shell like decoding. * Simplify command extraction. * Add shared-library category type. * Fix bug in command extraction. * Typo in transcendental category. * Enable unit-test on in smoke test category. * Make parsing failing output as a failing test. * Fixes for transcendental tests. Disable tests that do not work. * Changed category parsing. * Removed the TestResult parameter from _gatherTestsForFile. Made testsList only output. * Remove testing if all tests were disabled. * Make args of CommandLine always unescaped. * Add category. * Don't need escaping on unix/linux. * Remove some no longer used functions. * Add requireSMVersion to CUDAExtensionTracker. * half-calc.slang now works for CUDA. * bit-cast-16-bit works on CUDA. * WIP handling of CUDA vector<half> types. * Half swizzle CUDA. * Half vector test. * Fix swizzle half bug. * Fix compilation issue with narrowing to Index. * Add unary ops. * Add some vector scalar maths ops. * Add half vector conversions for CUDA. * Fix erroneous comment. * Support for half comparisons. * First pass test for half compare. * Fix bug in CUDA specialized emit control. Updated tests to have pre and post inc/dec. * Removed unneeded parts of the cuda prelude. * Half structured buffer works on CUDA. * Added name lookup for Gfx::Format * Support half texture type in test system. * Test for half reading on CUDA. * Add half formats to Vk and D3D utils. * Fix getAt for CUDA - where there might not be a .x member in a vector. * Template specialization for half surface access works. * Half RWTexture support. * Test for half RWTexture access. * Update half-rw-texture test. * Remove test function from CUDA prelude.
author: jsmall-nvidia <jsmall@nvidia.com> 2021-05-06 18:09:44 -0400
committer: GitHub <noreply@github.com> 2021-05-06 15:09:44 -0700
commit: 8ee5e4501c746e34a1b59c643422cca56e2be214 (patch)
tree: 439531e7e025565778776c8302e457c526a84406
parent: e510a287deb25f2542a68bf21382f2557740d70c (diff)
3 files changed, 128 insertions, 0 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index a627cc652..7aaa4c462 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -294,6 +294,78 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 operator/(__half lh, const __half4& r
 SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 operator-(const __half4& h) { return __half4{__hneg2(h.xy), __hneg2(h.zw)}; }
 SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 operator+(const __half4& h) { return h; }
 
+// Convenience functions ushort -> half
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 __ushort_as_half(const ushort2& i) { return __halves2half2(__ushort_as_half(i.x), __ushort_as_half(i.y)); }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 __ushort_as_half(const ushort3& i) { return __half3{__halves2half2(__ushort_as_half(i.x), __ushort_as_half(i.y)), __ushort_as_half(i.z)}; }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 __ushort_as_half(const ushort4& i) { return __half4{ __halves2half2(__ushort_as_half(i.x), __ushort_as_half(i.y)), __halves2half2(__ushort_as_half(i.z), __ushort_as_half(i.w)) }; }
+
+// Convenience functions half -> ushort
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort2 __half_as_ushort(const __half2& i) { return make_ushort2(__half_as_ushort(i.x), __half_as_ushort(i.y)); }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort3 __half_as_ushort(const __half3& i) { return make_ushort3(__half_as_ushort(i.xy.x), __half_as_ushort(i.xy.y), __half_as_ushort(i.z)); }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort4 __half_as_ushort(const __half4& i) { return make_ushort4(__half_as_ushort(i.xy.x), __half_as_ushort(i.xy.y), __half_as_ushort(i.zw.x), __half_as_ushort(i.zw.y)); }
+
+
+#define SLANG_DROP_PARENS(...) __VA_ARGS__
+
+#define SLANG_SURFACE_READ(FUNC_NAME, TYPE_ARGS, ARGS) \
+template <> \
+SLANG_FORCE_INLINE SLANG_CUDA_CALL __half FUNC_NAME<__half>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
+{ \
+    return __ushort_as_half(FUNC_NAME<ushort>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
+} \
+\
+template <> \
+SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 FUNC_NAME<__half2>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
+{ \
+    return __ushort_as_half(FUNC_NAME<ushort2>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
+} \
+\
+template <> \
+SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 FUNC_NAME<__half4>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
+{ \
+    return __ushort_as_half(FUNC_NAME<ushort4>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
+}
+
+SLANG_SURFACE_READ(surf1Dread, (int x), (x))
+SLANG_SURFACE_READ(surf2Dread, (int x, int y), (x, y))
+SLANG_SURFACE_READ(surf3Dread, (int x, int y, int z), (x, y, z))
+SLANG_SURFACE_READ(surf1DLayeredread, (int x, int layer), (x, layer))
+SLANG_SURFACE_READ(surf2DLayeredread, (int x, int y, int layer), (x, y, layer))
+SLANG_SURFACE_READ(surfCubemapread, (int x, int y, int face), (x, y, face))
+SLANG_SURFACE_READ(surfCubemapLayeredread, (int x, int y, int layerFace), (x, y, layerFace))
+
+// The following doesn't quite work, for reasons currently not determined
+#if 0
+#define SLANG_SURFACE_WRITE(FUNC_NAME, TYPE_ARGS, ARGS) \
+template <> \
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half>(__half data, cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
+{ \
+    FUNC_NAME<ushort>(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode);  \
+} \
+\
+template <> \
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half2>(__half2 data, cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
+{ \
+    FUNC_NAME<ushort2>(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode);  \
+} \
+\
+template <> \
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half4>(__half4 data, cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
+{ \
+    FUNC_NAME<ushort4>(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode); \
+}
+
+SLANG_SURFACE_WRITE(surf1Dwrite, (int x), (x))
+SLANG_SURFACE_WRITE(surf2Dwrite, (int x, int y), (x, y))
+SLANG_SURFACE_WRITE(surf3Dwrite, (int x, int y, int z), (x, y, z))
+SLANG_SURFACE_WRITE(surf1DLayeredwrite, (int x, int layer), (x, layer))
+SLANG_SURFACE_WRITE(surf2DLayeredwrite, (int x, int y, int layer), (x, y, layer))
+SLANG_SURFACE_WRITE(surfCubemapwrite, (int x, int y, int face), (x, y, face))
+SLANG_SURFACE_WRITE(surfCubemapLayeredwrite, (int x, int y, int layerFace), (x, y, layerFace))
+#endif     
+     
 #endif
 
 // ----------------------------- F32 -----------------------------------------
diff --git a/tests/compute/half-rw-texture-simple.slang b/tests/compute/half-rw-texture-simple.slang
new file mode 100644
index 000000000..ffc34e1f9
--- /dev/null
+++ b/tests/compute/half-rw-texture-simple.slang
@@ -0,0 +1,51 @@
+
+// Native half not supported on CPU currently
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -output-using-type -shaderobj
+// Doesn't work on DX11 currently - locks up on binding
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -output-using-type -shaderobj
+// Produces a different result on DX12 with DXBC than expected(!). So disabled for now
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -output-using-type -shaderobj
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -output-using-type -shaderobj
+// TODO(JS): Doesn't work on vk currently, because createTextureView not implemented on vk renderer
+//DIABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type -shaderobj
+// TODO(JS): Doesn't work on certain CI systems. 
+//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj -render-features half
+
+//TEST_INPUT: RWTexture2D(format=R_Float16, size=4, content = one, mipMaps = 1):name rwt2D
+RWTexture2D<half> rwt2D;
+
+//TEST_INPUT: RWTexture2D(format=RGBA_Float16, size=4, content = one, mipMaps = 1):name rwt2D_4
+RWTexture2D<half4> rwt2D_4;
+
+//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
+RWStructuredBuffer<float> outputBuffer;
+
+[numthreads(4, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+    int idx = dispatchThreadID.x;
+    
+    float val = 0.0f;
+ 
+    //val += rwt1D[idx];
+    
+    half h0 = rwt2D[uint2(idx, idx)];
+
+    val += float(h0);
+    
+    half4 h1 = rwt2D_4[uint2(idx, idx)];
+    float4 f1 = h1;
+   
+    val += f1.x + f1.y + f1.z + f1.w;
+    
+    // NOTE! This is disabled because on CUDA, whilst this has an effect it is not what is expected.
+    // The value read back has changed but seems to always be 1.
+    // rwt1D[idx] = idx;
+    //rwt2D[uint2(idx, idx)] = half(idx);    
+    
+    //val += rwt1D[idx];
+    //val += rwt2D[uint2(idx, idx)];
+    //val += rwt3D[uint3(idx, idx, idx)];
+ 
+    outputBuffer[idx] = val;
+}
diff --git a/tests/compute/half-rw-texture-simple.slang.expected.txt b/tests/compute/half-rw-texture-simple.slang.expected.txt
new file mode 100644
index 000000000..6891ba983
--- /dev/null
+++ b/tests/compute/half-rw-texture-simple.slang.expected.txt
@@ -0,0 +1,5 @@
+type: float
+5.000000
+5.000000
+5.000000
+5.000000
author	jsmall-nvidia <jsmall@nvidia.com>	2021-05-06 18:09:44 -0400
committer	GitHub <noreply@github.com>	2021-05-06 15:09:44 -0700
commit	8ee5e4501c746e34a1b59c643422cca56e2be214 (patch)
tree	439531e7e025565778776c8302e457c526a84406
parent	e510a287deb25f2542a68bf21382f2557740d70c (diff)