diff options
Diffstat (limited to 'prelude')
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 33 |
1 files changed, 32 insertions, 1 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index a00e8f744..a439d274a 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -63,6 +63,15 @@ #ifndef SLANG_CUDA_BOUNDARY_MODE # define SLANG_CUDA_BOUNDARY_MODE cudaBoundaryModeZero + +// Can be one of SLANG_CUDA_PTX_BOUNDARY_MODE. Only applies *PTX* emitted CUDA operations +// which currently is just RWTextureRW format writes +// +// .trap causes an execution trap on out-of-bounds addresses +// .clamp stores data at the nearest surface location (sized appropriately) +// .zero drops stores to out-of-bounds addresses + +# define SLANG_PTX_BOUNDARY_MODE "zero" #endif struct TypeInfo @@ -371,9 +380,31 @@ SLANG_SURFACE_WRITE(surf1DLayeredwrite, (int x, int layer), (x, layer)) SLANG_SURFACE_WRITE(surf2DLayeredwrite, (int x, int y, int layer), (x, y, layer)) SLANG_SURFACE_WRITE(surfCubemapwrite, (int x, int y, int face), (x, y, face)) SLANG_SURFACE_WRITE(surfCubemapLayeredwrite, (int x, int y, int layerFace), (x, y, layerFace)) - + #endif +// Support for doing format conversion when writing to a surface/RWTexture + +template <typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode); +template <typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode); + +// https://docs.nvidia.com/cuda/inline-ptx-assembly/index.html +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#surface-instructions-sust + +template <> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float>(float v, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode) +{ + asm volatile ( "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2};}\n\t" :: "l"(surfObj),"r"(x),"f"(v)); +} + +template <> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float>(float v, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode) +{ + asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"f"(v)); +} + // ----------------------------- F32 ----------------------------------------- // Unary |
