summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--prelude/slang-cuda-prelude.h237
-rw-r--r--source/slang/hlsl.meta.slang2
-rw-r--r--tests/compute/texture-subscript-cuda.slang107
-rw-r--r--tests/compute/texture-subscript-cuda.slang.expected.txt4
4 files changed, 342 insertions, 8 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index f0c053168..fd79b77aa 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -1324,10 +1324,11 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float>(
cudaSurfaceBoundaryMode boundaryMode)
{
asm volatile(
- "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4};}\n\t" ::"l"(surfObj),
+ "{sust.p.3d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3,%4}], {%5};}\n\t" ::"l"(surfObj),
"r"(x),
"r"(y),
"r"(z),
+ "r"(0),
"f"(v));
}
@@ -1376,11 +1377,12 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float2>(
{
const float vx = v.x, vy = v.y;
asm volatile(
- "{sust.p.2d.v2.b32." SLANG_PTX_BOUNDARY_MODE
- " [%0, {%1,%2,%3}], {%4,%5};}\n\t" ::"l"(surfObj),
+ "{sust.p.3d.v2.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2,%3,%4}], {%5,%6};}\n\t" ::"l"(surfObj),
"r"(x),
"r"(y),
"r"(z),
+ "r"(0),
"f"(vx),
"f"(vy));
}
@@ -1435,17 +1437,242 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float4>(
{
const float vx = v.x, vy = v.y, vz = v.z, vw = v.w;
asm volatile(
- "{sust.p.2d.v4.b32." SLANG_PTX_BOUNDARY_MODE
- " [%0, {%1,%2,%3}], {%4,%5,%6,%7};}\n\t" ::"l"(surfObj),
+ "{sust.p.3d.v4.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2,%3,%4}], {%5,%6,%7,%8};}\n\t" ::"l"(surfObj),
"r"(x),
"r"(y),
"r"(z),
+ "r"(0),
"f"(vx),
"f"(vy),
"f"(vz),
"f"(vw));
}
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<uint>(
+ uint v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ asm volatile(
+ "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(v));
+}
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<uint>(
+ uint v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ int z,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ asm volatile(
+ "{sust.p.3d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3,%4}], {%5};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(z),
+ "r"(0),
+ "r"(v));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<uint2>(
+ uint2 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ const uint vx = v.x, vy = v.y;
+ asm volatile(
+ "{sust.p.2d.v2.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3,%4};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(vx),
+ "r"(vy));
+}
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<uint2>(
+ uint2 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ int z,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ const uint vx = v.x, vy = v.y;
+ asm volatile(
+ "{sust.p.3d.v2.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2,%3,%4}], {%5,%6};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(z),
+ "r"(0),
+ "r"(vx),
+ "r"(vy));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<uint4>(
+ uint4 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ const uint vx = v.x, vy = v.y, vz = v.z, vw = v.w;
+ asm volatile(
+ "{sust.p.2d.v4.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2}], {%3,%4,%5,%6};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(vx),
+ "r"(vy),
+ "r"(vz),
+ "r"(vw));
+}
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<uint4>(
+ uint4 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ int z,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ const uint vx = v.x, vy = v.y, vz = v.z, vw = v.w;
+ asm volatile(
+ "{sust.p.3d.v4.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2,%3,%4}], {%5,%6,%7,%8};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(z),
+ "r"(0),
+ "r"(vx),
+ "r"(vy),
+ "r"(vz),
+ "r"(vw));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<int>(
+ int v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ asm volatile(
+ "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(v));
+}
+// Int2
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<int2>(
+ int2 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ const int vx = v.x, vy = v.y;
+ asm volatile(
+ "{sust.p.2d.v2.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3,%4};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(vx),
+ "r"(vy));
+}
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<int4>(
+ int4 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ const int vx = v.x, vy = v.y, vz = v.z, vw = v.w;
+ asm volatile(
+ "{sust.p.2d.v4.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2}], {%3,%4,%5,%6};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(vx),
+ "r"(vy),
+ "r"(vz),
+ "r"(vw));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<int>(
+ int v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ int z,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ asm volatile(
+ "{sust.p.3d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3,%4}], {%5};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(z),
+ "r"(0),
+ "r"(v));
+}
+// Int2
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<int2>(
+ int2 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ int z,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ const int vx = v.x, vy = v.y;
+ asm volatile(
+ "{sust.p.3d.v2.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2,%3,%4}], {%5,%6};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(z),
+ "r"(0),
+ "r"(vx),
+ "r"(vy));
+}
+// Int4
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<int4>(
+ int4 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ int z,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ const int vx = v.x, vy = v.y, vz = v.z, vw = v.w;
+ asm volatile(
+ "{sust.p.3d.v4.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2,%3,%4}], {%5,%6,%7,%8};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(z),
+ "r"(0),
+ "r"(vx),
+ "r"(vy),
+ "r"(vz),
+ "r"(vw));
+}
+
// ----------------------------- F32 -----------------------------------------
// Unary
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 216dfc04a..264098bec 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -28388,4 +28388,4 @@ void InterlockedAddF16Emulated(half* dest, half value, out half originalValue)
void InterlockedAddF16x2(half2* dest, half2 value, out half2 originalValue)
{
originalValue = __atomic_add(*dest, value);
-} \ No newline at end of file
+}
diff --git a/tests/compute/texture-subscript-cuda.slang b/tests/compute/texture-subscript-cuda.slang
index 7c4a2cc78..26b1f9e09 100644
--- a/tests/compute/texture-subscript-cuda.slang
+++ b/tests/compute/texture-subscript-cuda.slang
@@ -24,10 +24,37 @@ Texture2DArray<float2> cudaT2DArray_f2;
//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_f4
Texture2DArray<float4> cudaT2DArray_f4;
-//TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0], stride=4):out,name cudaOutputBuffer
+// New texture declarations for uint and int variants
+//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_uint
+Texture2D<uint> cudaT2D_uint;
+//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_uint2
+Texture2D<uint2> cudaT2D_uint2;
+//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_uint4
+Texture2D<uint4> cudaT2D_uint4;
+//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_int
+Texture2D<int> cudaT2D_int;
+//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_int2
+Texture2D<int2> cudaT2D_int2;
+//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_int4
+Texture2D<int4> cudaT2D_int4;
+
+//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_uint
+Texture3D<uint> cudaT3D_uint;
+//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_uint2
+Texture3D<uint2> cudaT3D_uint2;
+//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_uint4
+Texture3D<uint4> cudaT3D_uint4;
+//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_int
+Texture3D<int> cudaT3D_int;
+//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_int2
+Texture3D<int2> cudaT3D_int2;
+//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_int4
+Texture3D<int4> cudaT3D_int4;
+
+//TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name cudaOutputBuffer
RWStructuredBuffer<float> cudaOutputBuffer;
-[numthreads(7, 1, 1)]
+[numthreads(11, 1, 1)]
[shader("compute")]
void computeMain(int3 dispatchThreadID : SV_DispatchThreadID)
{
@@ -97,5 +124,81 @@ void computeMain(int3 dispatchThreadID : SV_DispatchThreadID)
cudaOutputBuffer[idx] += result4.w;
}
break;
+
+ case 5:
+ {
+ // Test 2D textures with uint variants
+ int2 var = int2(1, 2);
+ uint result = cudaT2D_uint[var];
+ cudaOutputBuffer[idx] = float(result);
+
+ uint2 result2 = cudaT2D_uint2[var];
+ cudaOutputBuffer[idx] += float(result2.x);
+ cudaOutputBuffer[idx] += float(result2.y);
+
+ uint4 result4 = cudaT2D_uint4[var];
+ cudaOutputBuffer[idx] += float(result4.x);
+ cudaOutputBuffer[idx] += float(result4.y);
+ cudaOutputBuffer[idx] += float(result4.z);
+ cudaOutputBuffer[idx] += float(result4.w);
+ }
+ break;
+
+ case 6:
+ {
+ // Test 2D textures with int variants
+ int2 var = int2(1, 2);
+ int result = cudaT2D_int[var];
+ cudaOutputBuffer[idx] = float(result);
+
+ int2 result2 = cudaT2D_int2[var];
+ cudaOutputBuffer[idx] += float(result2.x);
+ cudaOutputBuffer[idx] += float(result2.y);
+
+ int4 result4 = cudaT2D_int4[var];
+ cudaOutputBuffer[idx] += float(result4.x);
+ cudaOutputBuffer[idx] += float(result4.y);
+ cudaOutputBuffer[idx] += float(result4.z);
+ cudaOutputBuffer[idx] += float(result4.w);
+ }
+ break;
+
+ case 7:
+ {
+ // Test 3D textures with uint variants
+ int3 var = int3(1, 1, 1);
+ uint result = cudaT3D_uint[var];
+ cudaOutputBuffer[idx] = float(result);
+
+ uint2 result2 = cudaT3D_uint2[var];
+ cudaOutputBuffer[idx] += float(result2.x);
+ cudaOutputBuffer[idx] += float(result2.y);
+
+ uint4 result4 = cudaT3D_uint4[var];
+ cudaOutputBuffer[idx] += float(result4.x);
+ cudaOutputBuffer[idx] += float(result4.y);
+ cudaOutputBuffer[idx] += float(result4.z);
+ cudaOutputBuffer[idx] += float(result4.w);
+ }
+ break;
+
+ case 8:
+ {
+ // Test 3D textures with int variants
+ int3 var = int3(1, 1, 1);
+ int result = cudaT3D_int[var];
+ cudaOutputBuffer[idx] = float(result);
+
+ int2 result2 = cudaT3D_int2[var];
+ cudaOutputBuffer[idx] += float(result2.x);
+ cudaOutputBuffer[idx] += float(result2.y);
+
+ int4 result4 = cudaT3D_int4[var];
+ cudaOutputBuffer[idx] += float(result4.x);
+ cudaOutputBuffer[idx] += float(result4.y);
+ cudaOutputBuffer[idx] += float(result4.z);
+ cudaOutputBuffer[idx] += float(result4.w);
+ }
+ break;
}
} \ No newline at end of file
diff --git a/tests/compute/texture-subscript-cuda.slang.expected.txt b/tests/compute/texture-subscript-cuda.slang.expected.txt
index 698717361..ceb358832 100644
--- a/tests/compute/texture-subscript-cuda.slang.expected.txt
+++ b/tests/compute/texture-subscript-cuda.slang.expected.txt
@@ -3,5 +3,9 @@
40E00000
40E00000
40E00000
+4FDE4000
+4FDE4000
+4FDE4000
+4FDE4000
0
0 \ No newline at end of file