diff options
| author | Mukund Keshava <mkeshava@nvidia.com> | 2025-04-30 16:07:02 +0530 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-04-30 10:37:02 +0000 |
| commit | b0e150511a6a536c8ad9e74910b30ae179a10ec9 (patch) | |
| tree | cb749d757e0e556d987d6a30020971ed5a6aa41d /prelude | |
| parent | 41ac7a0d8b4e9c08eccc2153020900e0262cae84 (diff) | |
Add subscript operator support in cuda (#6830)
* cuda: Add support for subscript operator
This CL adds support for the subscript operator for Read Only
textures in cuda. Also adds a test for this.
Fixes #6781
* format code
* fix review comments
* format code
---------
Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com>
Co-authored-by: Ellie Hermaszewska <ellieh@nvidia.com>
Diffstat (limited to 'prelude')
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 38e018e3e..738f2fa16 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -3366,3 +3366,70 @@ struct TensorView *reinterpret_cast<T*>(data + offset) = val; } }; + +// Implementations for texture fetch/load functions using tex PTX intrinsics +// These are used for read-only texture access with integer coordinates +// See #6781 for details. + +// 1D is not supported via PTX. Keeping this placeholder in case it ever gets +// supported. +template<typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex1Dfetch_int(CUtexObject texObj, int x) +{ + T result; + float dummy; + asm("tex.1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5}];" + : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "l"(texObj), "r"(x)); + return result; +} + +template<typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex2Dfetch_int(CUtexObject texObj, int x, int y) +{ + T result; + float dummy; + asm("tex.2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];" + : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "l"(texObj), "r"(x), "r"(y)); + return result; +} + +template<typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex3Dfetch_int(CUtexObject texObj, int x, int y, int z) +{ + T result; + float dummy; + asm("tex.3d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" + : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "l"(texObj), "r"(x), "r"(y), "r"(z), "r"(z)); + // Note: The repeated z is a dummy used as the fourth operand in ptx. + // From the docs: + // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#texture-instructions-tex + // Operand c is a scalar or singleton tuple for 1d textures; is a two-element vector for 2d + // textures; and is a four-element vector for 3d textures. + return result; +} + +template<typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex1DArrayfetch_int(CUtexObject texObj, int x, int layer) +{ + T result; + float dummy; + asm("tex.a1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];" + : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "l"(texObj), "r"(x), "r"(layer)); + return result; +} + +template<typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL T +tex2DArrayfetch_int(CUtexObject texObj, int x, int y, int layer) +{ + T result; + float dummy; + asm("tex.a2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" + : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "l"(texObj), "r"(x), "r"(y), "r"(layer), "r"(layer)); + return result; +}
\ No newline at end of file |
