diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2021-05-14 16:59:35 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-05-14 16:59:35 -0400 |
| commit | 12bcc039c2a2c0c69486b670503a7437931d73e4 (patch) | |
| tree | 5e447359944d492d29b18c3c2f702c7fddeae269 /prelude | |
| parent | a2725fd03febf32051811af2fa50fd0de3b61dde (diff) | |
CUDA half RWTexture write support/doc improvements (#1839)
* #include an absolute path didn't work - because paths were taken to always be relative.
* Fix for writing to RWTexture with half types on CUDA.
* CUDA half functionality doc updates.
Diffstat (limited to 'prelude')
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 7aaa4c462..a00e8f744 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -306,6 +306,15 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort2 __half_as_ushort(const __half2& i) { SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort3 __half_as_ushort(const __half3& i) { return make_ushort3(__half_as_ushort(i.xy.x), __half_as_ushort(i.xy.y), __half_as_ushort(i.z)); } SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort4 __half_as_ushort(const __half4& i) { return make_ushort4(__half_as_ushort(i.xy.x), __half_as_ushort(i.xy.y), __half_as_ushort(i.zw.x), __half_as_ushort(i.zw.y)); } +// This is a little bit of a hack. Fortunately CUDA has the definitions of the templated types in +// include/surface_indirect_functions.h +// Here we find the template definition requires a specialization of __nv_isurf_trait to allow +// a specialization of the surface write functions. +// This *isn't* a problem on the read functions as they don't have a return type that uses this mechanism + +template<> struct __nv_isurf_trait<__half> { typedef void type; }; +template<> struct __nv_isurf_trait<__half2> { typedef void type; }; +template<> struct __nv_isurf_trait<__half4> { typedef void type; }; #define SLANG_DROP_PARENS(...) __VA_ARGS__ @@ -336,8 +345,6 @@ SLANG_SURFACE_READ(surf2DLayeredread, (int x, int y, int layer), (x, y, layer)) SLANG_SURFACE_READ(surfCubemapread, (int x, int y, int face), (x, y, face)) SLANG_SURFACE_READ(surfCubemapLayeredread, (int x, int y, int layerFace), (x, y, layerFace)) -// The following doesn't quite work, for reasons currently not determined -#if 0 #define SLANG_SURFACE_WRITE(FUNC_NAME, TYPE_ARGS, ARGS) \ template <> \ SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half>(__half data, cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \ @@ -364,7 +371,6 @@ SLANG_SURFACE_WRITE(surf1DLayeredwrite, (int x, int layer), (x, layer)) SLANG_SURFACE_WRITE(surf2DLayeredwrite, (int x, int y, int layer), (x, y, layer)) SLANG_SURFACE_WRITE(surfCubemapwrite, (int x, int y, int face), (x, y, face)) SLANG_SURFACE_WRITE(surfCubemapLayeredwrite, (int x, int y, int layerFace), (x, y, layerFace)) -#endif #endif |
