4 files changed, 342 insertions, 8 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index f0c053168..fd79b77aa 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -1324,10 +1324,11 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float>(
     cudaSurfaceBoundaryMode boundaryMode)
 {
     asm volatile(
-        "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4};}\n\t" ::"l"(surfObj),
+        "{sust.p.3d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3,%4}], {%5};}\n\t" ::"l"(surfObj),
         "r"(x),
         "r"(y),
         "r"(z),
+        "r"(0),
         "f"(v));
 }
 
@@ -1376,11 +1377,12 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float2>(
 {
     const float vx = v.x, vy = v.y;
     asm volatile(
-        "{sust.p.2d.v2.b32." SLANG_PTX_BOUNDARY_MODE
-        " [%0, {%1,%2,%3}], {%4,%5};}\n\t" ::"l"(surfObj),
+        "{sust.p.3d.v2.b32." SLANG_PTX_BOUNDARY_MODE
+        " [%0, {%1,%2,%3,%4}], {%5,%6};}\n\t" ::"l"(surfObj),
         "r"(x),
         "r"(y),
         "r"(z),
+        "r"(0),
         "f"(vx),
         "f"(vy));
 }
@@ -1435,17 +1437,242 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float4>(
 {
     const float vx = v.x, vy = v.y, vz = v.z, vw = v.w;
     asm volatile(
-        "{sust.p.2d.v4.b32." SLANG_PTX_BOUNDARY_MODE
-        " [%0, {%1,%2,%3}], {%4,%5,%6,%7};}\n\t" ::"l"(surfObj),
+        "{sust.p.3d.v4.b32." SLANG_PTX_BOUNDARY_MODE
+        " [%0, {%1,%2,%3,%4}], {%5,%6,%7,%8};}\n\t" ::"l"(surfObj),
         "r"(x),
         "r"(y),
         "r"(z),
+        "r"(0),
         "f"(vx),
         "f"(vy),
         "f"(vz),
         "f"(vw));
 }
 
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<uint>(
+    uint v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    asm volatile(
+        "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "r"(v));
+}
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<uint>(
+    uint v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    int z,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    asm volatile(
+        "{sust.p.3d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3,%4}], {%5};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "r"(z),
+        "r"(0),
+        "r"(v));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<uint2>(
+    uint2 v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    const uint vx = v.x, vy = v.y;
+    asm volatile(
+        "{sust.p.2d.v2.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3,%4};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "r"(vx),
+        "r"(vy));
+}
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<uint2>(
+    uint2 v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    int z,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    const uint vx = v.x, vy = v.y;
+    asm volatile(
+        "{sust.p.3d.v2.b32." SLANG_PTX_BOUNDARY_MODE
+        " [%0, {%1,%2,%3,%4}], {%5,%6};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "r"(z),
+        "r"(0),
+        "r"(vx),
+        "r"(vy));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<uint4>(
+    uint4 v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    const uint vx = v.x, vy = v.y, vz = v.z, vw = v.w;
+    asm volatile(
+        "{sust.p.2d.v4.b32." SLANG_PTX_BOUNDARY_MODE
+        " [%0, {%1,%2}], {%3,%4,%5,%6};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "r"(vx),
+        "r"(vy),
+        "r"(vz),
+        "r"(vw));
+}
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<uint4>(
+    uint4 v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    int z,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    const uint vx = v.x, vy = v.y, vz = v.z, vw = v.w;
+    asm volatile(
+        "{sust.p.3d.v4.b32." SLANG_PTX_BOUNDARY_MODE
+        " [%0, {%1,%2,%3,%4}], {%5,%6,%7,%8};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "r"(z),
+        "r"(0),
+        "r"(vx),
+        "r"(vy),
+        "r"(vz),
+        "r"(vw));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<int>(
+    int v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    asm volatile(
+        "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "r"(v));
+}
+// Int2
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<int2>(
+    int2 v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    const int vx = v.x, vy = v.y;
+    asm volatile(
+        "{sust.p.2d.v2.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3,%4};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "r"(vx),
+        "r"(vy));
+}
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<int4>(
+    int4 v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    const int vx = v.x, vy = v.y, vz = v.z, vw = v.w;
+    asm volatile(
+        "{sust.p.2d.v4.b32." SLANG_PTX_BOUNDARY_MODE
+        " [%0, {%1,%2}], {%3,%4,%5,%6};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "r"(vx),
+        "r"(vy),
+        "r"(vz),
+        "r"(vw));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<int>(
+    int v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    int z,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    asm volatile(
+        "{sust.p.3d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3,%4}], {%5};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "r"(z),
+        "r"(0),
+        "r"(v));
+}
+// Int2
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<int2>(
+    int2 v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    int z,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    const int vx = v.x, vy = v.y;
+    asm volatile(
+        "{sust.p.3d.v2.b32." SLANG_PTX_BOUNDARY_MODE
+        " [%0, {%1,%2,%3,%4}], {%5,%6};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "r"(z),
+        "r"(0),
+        "r"(vx),
+        "r"(vy));
+}
+// Int4
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<int4>(
+    int4 v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    int z,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    const int vx = v.x, vy = v.y, vz = v.z, vw = v.w;
+    asm volatile(
+        "{sust.p.3d.v4.b32." SLANG_PTX_BOUNDARY_MODE
+        " [%0, {%1,%2,%3,%4}], {%5,%6,%7,%8};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "r"(z),
+        "r"(0),
+        "r"(vx),
+        "r"(vy),
+        "r"(vz),
+        "r"(vw));
+}
+
 // ----------------------------- F32 -----------------------------------------
 
 // Unary
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 216dfc04a..264098bec 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -28388,4 +28388,4 @@ void InterlockedAddF16Emulated(half* dest, half value, out half originalValue)
 void InterlockedAddF16x2(half2* dest, half2 value, out half2 originalValue)
 {
     originalValue = __atomic_add(*dest, value);
-}
-\ No newline at end of file
+}
diff --git a/tests/compute/texture-subscript-cuda.slang b/tests/compute/texture-subscript-cuda.slang
index 7c4a2cc78..26b1f9e09 100644
--- a/tests/compute/texture-subscript-cuda.slang
+++ b/tests/compute/texture-subscript-cuda.slang
@@ -24,10 +24,37 @@ Texture2DArray<float2> cudaT2DArray_f2;
 //TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_f4
 Texture2DArray<float4> cudaT2DArray_f4;
 
-//TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0], stride=4):out,name cudaOutputBuffer
+// New texture declarations for uint and int variants
+//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_uint
+Texture2D<uint> cudaT2D_uint;
+//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_uint2
+Texture2D<uint2> cudaT2D_uint2;
+//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_uint4
+Texture2D<uint4> cudaT2D_uint4;
+//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_int
+Texture2D<int> cudaT2D_int;
+//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_int2
+Texture2D<int2> cudaT2D_int2;
+//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_int4
+Texture2D<int4> cudaT2D_int4;
+
+//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_uint
+Texture3D<uint> cudaT3D_uint;
+//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_uint2
+Texture3D<uint2> cudaT3D_uint2;
+//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_uint4
+Texture3D<uint4> cudaT3D_uint4;
+//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_int
+Texture3D<int> cudaT3D_int;
+//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_int2
+Texture3D<int2> cudaT3D_int2;
+//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_int4
+Texture3D<int4> cudaT3D_int4;
+
+//TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name cudaOutputBuffer
 RWStructuredBuffer<float> cudaOutputBuffer;
 
-[numthreads(7, 1, 1)]
+[numthreads(11, 1, 1)]
 [shader("compute")]
 void computeMain(int3 dispatchThreadID : SV_DispatchThreadID)
 {
@@ -97,5 +124,81 @@ void computeMain(int3 dispatchThreadID : SV_DispatchThreadID)
             cudaOutputBuffer[idx] += result4.w;
         }
     break;
+
+    case 5:
+        {
+            // Test 2D textures with uint variants
+            int2 var = int2(1, 2);
+            uint result = cudaT2D_uint[var];
+            cudaOutputBuffer[idx] = float(result);
+            
+            uint2 result2 = cudaT2D_uint2[var];
+            cudaOutputBuffer[idx] += float(result2.x);
+            cudaOutputBuffer[idx] += float(result2.y);
+
+            uint4 result4 = cudaT2D_uint4[var];
+            cudaOutputBuffer[idx] += float(result4.x);
+            cudaOutputBuffer[idx] += float(result4.y);
+            cudaOutputBuffer[idx] += float(result4.z);
+            cudaOutputBuffer[idx] += float(result4.w);
+        }
+    break;
+
+    case 6:
+        {
+            // Test 2D textures with int variants
+            int2 var = int2(1, 2);
+            int result = cudaT2D_int[var];
+            cudaOutputBuffer[idx] = float(result);
+            
+            int2 result2 = cudaT2D_int2[var];
+            cudaOutputBuffer[idx] += float(result2.x);
+            cudaOutputBuffer[idx] += float(result2.y);
+
+            int4 result4 = cudaT2D_int4[var];
+            cudaOutputBuffer[idx] += float(result4.x);
+            cudaOutputBuffer[idx] += float(result4.y);
+            cudaOutputBuffer[idx] += float(result4.z);
+            cudaOutputBuffer[idx] += float(result4.w);
+        }
+    break;
+
+    case 7:
+        {
+            // Test 3D textures with uint variants
+            int3 var = int3(1, 1, 1);
+            uint result = cudaT3D_uint[var];
+            cudaOutputBuffer[idx] = float(result);
+            
+            uint2 result2 = cudaT3D_uint2[var];
+            cudaOutputBuffer[idx] += float(result2.x);
+            cudaOutputBuffer[idx] += float(result2.y);
+
+            uint4 result4 = cudaT3D_uint4[var];
+            cudaOutputBuffer[idx] += float(result4.x);
+            cudaOutputBuffer[idx] += float(result4.y);
+            cudaOutputBuffer[idx] += float(result4.z);
+            cudaOutputBuffer[idx] += float(result4.w);
+        }
+    break;
+
+    case 8:
+        {
+            // Test 3D textures with int variants
+            int3 var = int3(1, 1, 1);
+            int result = cudaT3D_int[var];
+            cudaOutputBuffer[idx] = float(result);
+            
+            int2 result2 = cudaT3D_int2[var];
+            cudaOutputBuffer[idx] += float(result2.x);
+            cudaOutputBuffer[idx] += float(result2.y);
+
+            int4 result4 = cudaT3D_int4[var];
+            cudaOutputBuffer[idx] += float(result4.x);
+            cudaOutputBuffer[idx] += float(result4.y);
+            cudaOutputBuffer[idx] += float(result4.z);
+            cudaOutputBuffer[idx] += float(result4.w);
+        }
+    break;
     }
 } 
 \ No newline at end of file
diff --git a/tests/compute/texture-subscript-cuda.slang.expected.txt b/tests/compute/texture-subscript-cuda.slang.expected.txt
index 698717361..ceb358832 100644
--- a/tests/compute/texture-subscript-cuda.slang.expected.txt
+++ b/tests/compute/texture-subscript-cuda.slang.expected.txt
@@ -3,5 +3,9 @@
 40E00000
 40E00000
 40E00000
+4FDE4000
+4FDE4000
+4FDE4000
+4FDE4000
 0
 0
 \ No newline at end of file