summaryrefslogtreecommitdiffstats
path: root/prelude
diff options
context:
space:
mode:
authorMukund Keshava <mkeshava@nvidia.com>2025-07-16 20:46:06 +0530
committerGitHub <noreply@github.com>2025-07-16 15:16:06 +0000
commit0bc89f27b08fa7241a7be4f80c2161f25ea3bf78 (patch)
treef441047c0cb7a8d8ce4814e941d2d2ad39245cad /prelude
parent56e91e91e425b77a9d1512cca3d26ce446781935 (diff)
Fix CUDA issues with texture reads and surface writes (#7780)
* Fix 1D texture reads in CUDA target Fixes #7570: 1D surface writes don't work The issue was that the Load function for read-only textures (hlsl.meta.slang lines 3629-3656) only supported 2D and 3D textures for CUDA targets, causing 1D texture reads to fall through to <invalid intrinsic>. This affected the srcTexture[tid.x] read operation in the reproduction case. Changes: - Updated static_assert to include SLANG_TEXTURE_1D support - Added tex1DArrayfetch_int<T> for 1D array texture reads - Added tex1Dfetch_int<T> for regular 1D texture reads 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Mukund Keshava <mkeshavaNV@users.noreply.github.com> * Add 1D texture read support for CUDA target - Add tex1Dfetch_int template specializations for float2, float4, uint, uint2, uint4 - Remove TODO comment about 1D PTX not being supported - Enable 1D texture test in texture-subscript-cuda.slang - Fix assembly code issues in original template specializations 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Mukund Keshava <mkeshavaNV@users.noreply.github.com> * Update slang-cuda-prelude.h * Fix texture3d ptx issue * undo 1D texture changes * Update hlsl.meta.slang * Update hlsl.meta.slang * Update hlsl.meta.slang * Update hlsl.meta.slang * Extend texture-subscript-cuda.slang test with uint and int format variants Add test cases for newly supported texture formats in CUDA: - 2D textures with uint, uint2, uint4 - 2D textures with int, int2, int4 - 3D textures with uint, uint2, uint4 - 3D textures with int, int2, int4 This ensures the texture subscript operations work correctly for all the format variants added in the CUDA texture fixes. Co-authored-by: Mukund Keshava <mkeshavaNV@users.noreply.github.com> * update expected file --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Mukund Keshava <mkeshavaNV@users.noreply.github.com>
Diffstat (limited to 'prelude')
-rw-r--r--prelude/slang-cuda-prelude.h237
1 files changed, 232 insertions, 5 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index f0c053168..fd79b77aa 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -1324,10 +1324,11 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float>(
cudaSurfaceBoundaryMode boundaryMode)
{
asm volatile(
- "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4};}\n\t" ::"l"(surfObj),
+ "{sust.p.3d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3,%4}], {%5};}\n\t" ::"l"(surfObj),
"r"(x),
"r"(y),
"r"(z),
+ "r"(0),
"f"(v));
}
@@ -1376,11 +1377,12 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float2>(
{
const float vx = v.x, vy = v.y;
asm volatile(
- "{sust.p.2d.v2.b32." SLANG_PTX_BOUNDARY_MODE
- " [%0, {%1,%2,%3}], {%4,%5};}\n\t" ::"l"(surfObj),
+ "{sust.p.3d.v2.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2,%3,%4}], {%5,%6};}\n\t" ::"l"(surfObj),
"r"(x),
"r"(y),
"r"(z),
+ "r"(0),
"f"(vx),
"f"(vy));
}
@@ -1435,17 +1437,242 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float4>(
{
const float vx = v.x, vy = v.y, vz = v.z, vw = v.w;
asm volatile(
- "{sust.p.2d.v4.b32." SLANG_PTX_BOUNDARY_MODE
- " [%0, {%1,%2,%3}], {%4,%5,%6,%7};}\n\t" ::"l"(surfObj),
+ "{sust.p.3d.v4.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2,%3,%4}], {%5,%6,%7,%8};}\n\t" ::"l"(surfObj),
"r"(x),
"r"(y),
"r"(z),
+ "r"(0),
"f"(vx),
"f"(vy),
"f"(vz),
"f"(vw));
}
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<uint>(
+ uint v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ asm volatile(
+ "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(v));
+}
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<uint>(
+ uint v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ int z,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ asm volatile(
+ "{sust.p.3d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3,%4}], {%5};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(z),
+ "r"(0),
+ "r"(v));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<uint2>(
+ uint2 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ const uint vx = v.x, vy = v.y;
+ asm volatile(
+ "{sust.p.2d.v2.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3,%4};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(vx),
+ "r"(vy));
+}
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<uint2>(
+ uint2 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ int z,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ const uint vx = v.x, vy = v.y;
+ asm volatile(
+ "{sust.p.3d.v2.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2,%3,%4}], {%5,%6};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(z),
+ "r"(0),
+ "r"(vx),
+ "r"(vy));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<uint4>(
+ uint4 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ const uint vx = v.x, vy = v.y, vz = v.z, vw = v.w;
+ asm volatile(
+ "{sust.p.2d.v4.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2}], {%3,%4,%5,%6};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(vx),
+ "r"(vy),
+ "r"(vz),
+ "r"(vw));
+}
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<uint4>(
+ uint4 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ int z,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ const uint vx = v.x, vy = v.y, vz = v.z, vw = v.w;
+ asm volatile(
+ "{sust.p.3d.v4.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2,%3,%4}], {%5,%6,%7,%8};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(z),
+ "r"(0),
+ "r"(vx),
+ "r"(vy),
+ "r"(vz),
+ "r"(vw));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<int>(
+ int v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ asm volatile(
+ "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(v));
+}
+// Int2
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<int2>(
+ int2 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ const int vx = v.x, vy = v.y;
+ asm volatile(
+ "{sust.p.2d.v2.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3,%4};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(vx),
+ "r"(vy));
+}
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<int4>(
+ int4 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ const int vx = v.x, vy = v.y, vz = v.z, vw = v.w;
+ asm volatile(
+ "{sust.p.2d.v4.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2}], {%3,%4,%5,%6};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(vx),
+ "r"(vy),
+ "r"(vz),
+ "r"(vw));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<int>(
+ int v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ int z,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ asm volatile(
+ "{sust.p.3d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3,%4}], {%5};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(z),
+ "r"(0),
+ "r"(v));
+}
+// Int2
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<int2>(
+ int2 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ int z,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ const int vx = v.x, vy = v.y;
+ asm volatile(
+ "{sust.p.3d.v2.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2,%3,%4}], {%5,%6};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(z),
+ "r"(0),
+ "r"(vx),
+ "r"(vy));
+}
+// Int4
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<int4>(
+ int4 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ int z,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ const int vx = v.x, vy = v.y, vz = v.z, vw = v.w;
+ asm volatile(
+ "{sust.p.3d.v4.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2,%3,%4}], {%5,%6,%7,%8};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(z),
+ "r"(0),
+ "r"(vx),
+ "r"(vy),
+ "r"(vz),
+ "r"(vw));
+}
+
// ----------------------------- F32 -----------------------------------------
// Unary