summaryrefslogtreecommitdiff
path: root/prelude
diff options
context:
space:
mode:
authorMukund Keshava <mkeshava@nvidia.com>2025-04-30 16:07:02 +0530
committerGitHub <noreply@github.com>2025-04-30 10:37:02 +0000
commitb0e150511a6a536c8ad9e74910b30ae179a10ec9 (patch)
treecb749d757e0e556d987d6a30020971ed5a6aa41d /prelude
parent41ac7a0d8b4e9c08eccc2153020900e0262cae84 (diff)
Add subscript operator support in cuda (#6830)
* cuda: Add support for subscript operator This CL adds support for the subscript operator for Read Only textures in cuda. Also adds a test for this. Fixes #6781 * format code * fix review comments * format code --------- Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com> Co-authored-by: Ellie Hermaszewska <ellieh@nvidia.com>
Diffstat (limited to 'prelude')
-rw-r--r--prelude/slang-cuda-prelude.h67
1 files changed, 67 insertions, 0 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index 38e018e3e..738f2fa16 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -3366,3 +3366,70 @@ struct TensorView
*reinterpret_cast<T*>(data + offset) = val;
}
};
+
+// Implementations for texture fetch/load functions using tex PTX intrinsics
+// These are used for read-only texture access with integer coordinates
+// See #6781 for details.
+
+// 1D is not supported via PTX. Keeping this placeholder in case it ever gets
+// supported.
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex1Dfetch_int(CUtexObject texObj, int x)
+{
+ T result;
+ float dummy;
+ asm("tex.1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5}];"
+ : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy)
+ : "l"(texObj), "r"(x));
+ return result;
+}
+
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex2Dfetch_int(CUtexObject texObj, int x, int y)
+{
+ T result;
+ float dummy;
+ asm("tex.2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];"
+ : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy)
+ : "l"(texObj), "r"(x), "r"(y));
+ return result;
+}
+
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex3Dfetch_int(CUtexObject texObj, int x, int y, int z)
+{
+ T result;
+ float dummy;
+ asm("tex.3d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];"
+ : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy)
+ : "l"(texObj), "r"(x), "r"(y), "r"(z), "r"(z));
+ // Note: The repeated z is a dummy used as the fourth operand in ptx.
+ // From the docs:
+ // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#texture-instructions-tex
+ // Operand c is a scalar or singleton tuple for 1d textures; is a two-element vector for 2d
+ // textures; and is a four-element vector for 3d textures.
+ return result;
+}
+
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex1DArrayfetch_int(CUtexObject texObj, int x, int layer)
+{
+ T result;
+ float dummy;
+ asm("tex.a1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];"
+ : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy)
+ : "l"(texObj), "r"(x), "r"(layer));
+ return result;
+}
+
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL T
+tex2DArrayfetch_int(CUtexObject texObj, int x, int y, int layer)
+{
+ T result;
+ float dummy;
+ asm("tex.a2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];"
+ : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy)
+ : "l"(texObj), "r"(x), "r"(y), "r"(layer), "r"(layer));
+ return result;
+} \ No newline at end of file