summaryrefslogtreecommitdiffstats
path: root/prelude
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2021-05-15 10:52:55 -0400
committerGitHub <noreply@github.com>2021-05-15 10:52:55 -0400
commit1027225ac7ec8da0e471b633f358333c8a95b010 (patch)
tree39575cd03fed47dffb56d7ca0ec7eff3385d1407 /prelude
parent1856b8ad85266ed66985b42bd2321a35f8573a00 (diff)
Support for HW format conversions for RWTexture on CUDA (#1840)
* #include an absolute path didn't work - because paths were taken to always be relative. * Fix for writing to RWTexture with half types on CUDA. * CUDA half functionality doc updates. * First pass support for sust.p RWTexture format conversion on write. * Tidy up implementation of $C. Made clamping mode #define able. * A simple test for RWTexture CUDA format conversion.
Diffstat (limited to 'prelude')
-rw-r--r--prelude/slang-cuda-prelude.h33
1 files changed, 32 insertions, 1 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index a00e8f744..a439d274a 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -63,6 +63,15 @@
#ifndef SLANG_CUDA_BOUNDARY_MODE
# define SLANG_CUDA_BOUNDARY_MODE cudaBoundaryModeZero
+
+// Can be one of SLANG_CUDA_PTX_BOUNDARY_MODE. Only applies *PTX* emitted CUDA operations
+// which currently is just RWTextureRW format writes
+//
+// .trap causes an execution trap on out-of-bounds addresses
+// .clamp stores data at the nearest surface location (sized appropriately)
+// .zero drops stores to out-of-bounds addresses
+
+# define SLANG_PTX_BOUNDARY_MODE "zero"
#endif
struct TypeInfo
@@ -371,9 +380,31 @@ SLANG_SURFACE_WRITE(surf1DLayeredwrite, (int x, int layer), (x, layer))
SLANG_SURFACE_WRITE(surf2DLayeredwrite, (int x, int y, int layer), (x, y, layer))
SLANG_SURFACE_WRITE(surfCubemapwrite, (int x, int y, int face), (x, y, face))
SLANG_SURFACE_WRITE(surfCubemapLayeredwrite, (int x, int y, int layerFace), (x, y, layerFace))
-
+
#endif
+// Support for doing format conversion when writing to a surface/RWTexture
+
+template <typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode);
+template <typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode);
+
+// https://docs.nvidia.com/cuda/inline-ptx-assembly/index.html
+// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#surface-instructions-sust
+
+template <>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float>(float v, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode)
+{
+ asm volatile ( "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2};}\n\t" :: "l"(surfObj),"r"(x),"f"(v));
+}
+
+template <>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float>(float v, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode)
+{
+ asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"f"(v));
+}
+
// ----------------------------- F32 -----------------------------------------
// Unary