summaryrefslogtreecommitdiffstats
path: root/prelude
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2021-05-14 16:59:35 -0400
committerGitHub <noreply@github.com>2021-05-14 16:59:35 -0400
commit12bcc039c2a2c0c69486b670503a7437931d73e4 (patch)
tree5e447359944d492d29b18c3c2f702c7fddeae269 /prelude
parenta2725fd03febf32051811af2fa50fd0de3b61dde (diff)
CUDA half RWTexture write support/doc improvements (#1839)
* #include an absolute path didn't work - because paths were taken to always be relative. * Fix for writing to RWTexture with half types on CUDA. * CUDA half functionality doc updates.
Diffstat (limited to 'prelude')
-rw-r--r--prelude/slang-cuda-prelude.h12
1 files changed, 9 insertions, 3 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index 7aaa4c462..a00e8f744 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -306,6 +306,15 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort2 __half_as_ushort(const __half2& i) {
SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort3 __half_as_ushort(const __half3& i) { return make_ushort3(__half_as_ushort(i.xy.x), __half_as_ushort(i.xy.y), __half_as_ushort(i.z)); }
SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort4 __half_as_ushort(const __half4& i) { return make_ushort4(__half_as_ushort(i.xy.x), __half_as_ushort(i.xy.y), __half_as_ushort(i.zw.x), __half_as_ushort(i.zw.y)); }
+// This is a little bit of a hack. Fortunately CUDA has the definitions of the templated types in
+// include/surface_indirect_functions.h
+// Here we find the template definition requires a specialization of __nv_isurf_trait to allow
+// a specialization of the surface write functions.
+// This *isn't* a problem on the read functions as they don't have a return type that uses this mechanism
+
+template<> struct __nv_isurf_trait<__half> { typedef void type; };
+template<> struct __nv_isurf_trait<__half2> { typedef void type; };
+template<> struct __nv_isurf_trait<__half4> { typedef void type; };
#define SLANG_DROP_PARENS(...) __VA_ARGS__
@@ -336,8 +345,6 @@ SLANG_SURFACE_READ(surf2DLayeredread, (int x, int y, int layer), (x, y, layer))
SLANG_SURFACE_READ(surfCubemapread, (int x, int y, int face), (x, y, face))
SLANG_SURFACE_READ(surfCubemapLayeredread, (int x, int y, int layerFace), (x, y, layerFace))
-// The following doesn't quite work, for reasons currently not determined
-#if 0
#define SLANG_SURFACE_WRITE(FUNC_NAME, TYPE_ARGS, ARGS) \
template <> \
SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half>(__half data, cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
@@ -364,7 +371,6 @@ SLANG_SURFACE_WRITE(surf1DLayeredwrite, (int x, int layer), (x, layer))
SLANG_SURFACE_WRITE(surf2DLayeredwrite, (int x, int y, int layer), (x, y, layer))
SLANG_SURFACE_WRITE(surfCubemapwrite, (int x, int y, int face), (x, y, face))
SLANG_SURFACE_WRITE(surfCubemapLayeredwrite, (int x, int y, int layerFace), (x, y, layerFace))
-#endif
#endif