diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2021-05-15 10:52:55 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-05-15 10:52:55 -0400 |
| commit | 1027225ac7ec8da0e471b633f358333c8a95b010 (patch) | |
| tree | 39575cd03fed47dffb56d7ca0ec7eff3385d1407 /prelude | |
| parent | 1856b8ad85266ed66985b42bd2321a35f8573a00 (diff) | |
Support for HW format conversions for RWTexture on CUDA (#1840)
* #include an absolute path didn't work - because paths were taken to always be relative.
* Fix for writing to RWTexture with half types on CUDA.
* CUDA half functionality doc updates.
* First pass support for sust.p RWTexture format conversion on write.
* Tidy up implementation of $C.
Made clamping mode #define able.
* A simple test for RWTexture CUDA format conversion.
Diffstat (limited to 'prelude')
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 33 |
1 files changed, 32 insertions, 1 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index a00e8f744..a439d274a 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -63,6 +63,15 @@ #ifndef SLANG_CUDA_BOUNDARY_MODE # define SLANG_CUDA_BOUNDARY_MODE cudaBoundaryModeZero + +// Can be one of SLANG_CUDA_PTX_BOUNDARY_MODE. Only applies *PTX* emitted CUDA operations +// which currently is just RWTextureRW format writes +// +// .trap causes an execution trap on out-of-bounds addresses +// .clamp stores data at the nearest surface location (sized appropriately) +// .zero drops stores to out-of-bounds addresses + +# define SLANG_PTX_BOUNDARY_MODE "zero" #endif struct TypeInfo @@ -371,9 +380,31 @@ SLANG_SURFACE_WRITE(surf1DLayeredwrite, (int x, int layer), (x, layer)) SLANG_SURFACE_WRITE(surf2DLayeredwrite, (int x, int y, int layer), (x, y, layer)) SLANG_SURFACE_WRITE(surfCubemapwrite, (int x, int y, int face), (x, y, face)) SLANG_SURFACE_WRITE(surfCubemapLayeredwrite, (int x, int y, int layerFace), (x, y, layerFace)) - + #endif +// Support for doing format conversion when writing to a surface/RWTexture + +template <typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode); +template <typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode); + +// https://docs.nvidia.com/cuda/inline-ptx-assembly/index.html +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#surface-instructions-sust + +template <> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float>(float v, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode) +{ + asm volatile ( "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2};}\n\t" :: "l"(surfObj),"r"(x),"f"(v)); +} + +template <> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float>(float v, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode) +{ + asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"f"(v)); +} + // ----------------------------- F32 ----------------------------------------- // Unary |
