summaryrefslogtreecommitdiff
path: root/prelude
diff options
context:
space:
mode:
Diffstat (limited to 'prelude')
-rw-r--r--prelude/slang-cuda-prelude.h67
1 files changed, 67 insertions, 0 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index 38e018e3e..738f2fa16 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -3366,3 +3366,70 @@ struct TensorView
*reinterpret_cast<T*>(data + offset) = val;
}
};
+
+// Implementations for texture fetch/load functions using tex PTX intrinsics
+// These are used for read-only texture access with integer coordinates
+// See #6781 for details.
+
+// 1D is not supported via PTX. Keeping this placeholder in case it ever gets
+// supported.
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex1Dfetch_int(CUtexObject texObj, int x)
+{
+ T result;
+ float dummy;
+ asm("tex.1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5}];"
+ : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy)
+ : "l"(texObj), "r"(x));
+ return result;
+}
+
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex2Dfetch_int(CUtexObject texObj, int x, int y)
+{
+ T result;
+ float dummy;
+ asm("tex.2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];"
+ : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy)
+ : "l"(texObj), "r"(x), "r"(y));
+ return result;
+}
+
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex3Dfetch_int(CUtexObject texObj, int x, int y, int z)
+{
+ T result;
+ float dummy;
+ asm("tex.3d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];"
+ : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy)
+ : "l"(texObj), "r"(x), "r"(y), "r"(z), "r"(z));
+ // Note: The repeated z is a dummy used as the fourth operand in ptx.
+ // From the docs:
+ // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#texture-instructions-tex
+ // Operand c is a scalar or singleton tuple for 1d textures; is a two-element vector for 2d
+ // textures; and is a four-element vector for 3d textures.
+ return result;
+}
+
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex1DArrayfetch_int(CUtexObject texObj, int x, int layer)
+{
+ T result;
+ float dummy;
+ asm("tex.a1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];"
+ : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy)
+ : "l"(texObj), "r"(x), "r"(layer));
+ return result;
+}
+
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL T
+tex2DArrayfetch_int(CUtexObject texObj, int x, int y, int layer)
+{
+ T result;
+ float dummy;
+ asm("tex.a2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];"
+ : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy)
+ : "l"(texObj), "r"(x), "r"(y), "r"(layer), "r"(layer));
+ return result;
+} \ No newline at end of file