diff options
Diffstat (limited to 'prelude')
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 38e018e3e..738f2fa16 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -3366,3 +3366,70 @@ struct TensorView *reinterpret_cast<T*>(data + offset) = val; } }; + +// Implementations for texture fetch/load functions using tex PTX intrinsics +// These are used for read-only texture access with integer coordinates +// See #6781 for details. + +// 1D is not supported via PTX. Keeping this placeholder in case it ever gets +// supported. +template<typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex1Dfetch_int(CUtexObject texObj, int x) +{ + T result; + float dummy; + asm("tex.1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5}];" + : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "l"(texObj), "r"(x)); + return result; +} + +template<typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex2Dfetch_int(CUtexObject texObj, int x, int y) +{ + T result; + float dummy; + asm("tex.2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];" + : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "l"(texObj), "r"(x), "r"(y)); + return result; +} + +template<typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex3Dfetch_int(CUtexObject texObj, int x, int y, int z) +{ + T result; + float dummy; + asm("tex.3d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" + : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "l"(texObj), "r"(x), "r"(y), "r"(z), "r"(z)); + // Note: The repeated z is a dummy used as the fourth operand in ptx. + // From the docs: + // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#texture-instructions-tex + // Operand c is a scalar or singleton tuple for 1d textures; is a two-element vector for 2d + // textures; and is a four-element vector for 3d textures. + return result; +} + +template<typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex1DArrayfetch_int(CUtexObject texObj, int x, int layer) +{ + T result; + float dummy; + asm("tex.a1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];" + : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "l"(texObj), "r"(x), "r"(layer)); + return result; +} + +template<typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL T +tex2DArrayfetch_int(CUtexObject texObj, int x, int y, int layer) +{ + T result; + float dummy; + asm("tex.a2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" + : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "l"(texObj), "r"(x), "r"(y), "r"(layer), "r"(layer)); + return result; +}
\ No newline at end of file |
