From 03f9242489d5598c9c7594ac12e269f57a018cda Mon Sep 17 00:00:00 2001 From: Mukund Keshava Date: Mon, 12 May 2025 11:09:41 +0530 Subject: cuda: Add more formats for texture read/write (#7012) * WiP: Add more formats for texture reads * fix test * format code * add float2/float4 versions for 1D and 3D as well * fixed review comment * fix review comments --------- Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com> Co-authored-by: Ellie Hermaszewska --- tests/compute/rw-texture-simple.slang | 63 +++++++++++- tests/compute/rw-texture-simple.slang.expected.txt | 6 +- tests/compute/texture-subscript-cuda.slang | 114 ++++++++++++++------- .../texture-subscript-cuda.slang.expected.txt | 6 +- tests/compute/texture-subscript-uint-cuda.slang | 86 ++++++++++++++++ 5 files changed, 231 insertions(+), 44 deletions(-) create mode 100644 tests/compute/texture-subscript-uint-cuda.slang (limited to 'tests/compute') diff --git a/tests/compute/rw-texture-simple.slang b/tests/compute/rw-texture-simple.slang index f5befcd57..d0d90acb9 100644 --- a/tests/compute/rw-texture-simple.slang +++ b/tests/compute/rw-texture-simple.slang @@ -14,6 +14,18 @@ RWTexture1D rwt1D; RWTexture2D rwt2D; //TEST_INPUT: RWTexture3D(format=R32Float, size=4, content = one, mipMaps = 1):name rwt3D RWTexture3D rwt3D; +//TEST_INPUT: RWTexture1D(format=RG32Float, size=4, content = one, mipMaps = 1):name rwt1D_float2 +RWTexture1D rwt1D_float2; +//TEST_INPUT: RWTexture1D(format=RGBA32Float, size=4, content = one, mipMaps = 1):name rwt1D_float4 +RWTexture1D rwt1D_float4; +//TEST_INPUT: RWTexture2D(format=RG32Float, size=4, content = one, mipMaps = 1):name rwt2D_float2 +RWTexture2D rwt2D_float2; +//TEST_INPUT: RWTexture2D(format=RGBA32Float, size=4, content = one, mipMaps = 1):name rwt2D_float4 +RWTexture2D rwt2D_float4; +//TEST_INPUT: RWTexture3D(format=RG32Float, size=4, content = one, mipMaps = 1):name rwt3D_float2 +RWTexture3D rwt3D_float2; +//TEST_INPUT: RWTexture3D(format=RGBA32Float, size=4, content = one, mipMaps = 1):name rwt3D_float4 +RWTexture3D rwt3D_float4; //TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer outputBuffer; @@ -25,6 +37,7 @@ void computeMain(int3 dispatchThreadID : SV_DispatchThreadID) float val = 0.0f; + // float texture operations val += rwt1D[idx]; val += rwt2D[uint2(idx, idx)]; val += rwt3D[uint3(idx, idx, idx)]; @@ -36,6 +49,54 @@ void computeMain(int3 dispatchThreadID : SV_DispatchThreadID) val += rwt1D[idx]; val += rwt2D[uint2(idx, idx)]; val += rwt3D[uint3(idx, idx, idx)]; - + + // float2 texture operations for 1D + float2 val2_1d = rwt1D_float2[idx]; + rwt1D_float2[idx] = float2(idx, idx); + val2_1d = rwt1D_float2[idx]; + val += val2_1d.x; + val += val2_1d.y; + + // float2 texture operations for 2D + float2 val2 = rwt2D_float2[uint2(idx, idx)]; + rwt2D_float2[uint2(idx, idx)] = float2(idx, idx); + val2 = rwt2D_float2[uint2(idx, idx)]; + val += val2.x; + val += val2.y; + + // float2 texture operations for 3D + float2 val2_3d = rwt3D_float2[uint3(idx, idx, idx)]; + rwt3D_float2[uint3(idx, idx, idx)] = float2(idx, idx); + val2_3d = rwt3D_float2[uint3(idx, idx, idx)]; + val += val2_3d.x; + val += val2_3d.y; + + // float4 texture operations for 1D + float4 val4_1d = rwt1D_float4[idx]; + rwt1D_float4[idx] = float4(idx, idx, idx, idx); + val4_1d = rwt1D_float4[idx]; + val += val4_1d.x; + val += val4_1d.y; + val += val4_1d.z; + val += val4_1d.w; + + // float4 texture operations for 2D + float4 val4 = rwt2D_float4[uint2(idx, idx)]; + rwt2D_float4[uint2(idx, idx)] = float4(idx, idx, idx, idx); + val4 = rwt2D_float4[uint2(idx, idx)]; + val += val4.x; + val += val4.y; + val += val4.z; + val += val4.w; + + // float4 texture operations for 3D + float4 val4_3d = rwt3D_float4[uint3(idx, idx, idx)]; + rwt3D_float4[uint3(idx, idx, idx)] = float4(idx, idx, idx, idx); + val4_3d = rwt3D_float4[uint3(idx, idx, idx)]; + val += val4_3d.x; + val += val4_3d.y; + val += val4_3d.z; + val += val4_3d.w; + outputBuffer[idx] = val; } diff --git a/tests/compute/rw-texture-simple.slang.expected.txt b/tests/compute/rw-texture-simple.slang.expected.txt index 1d9023742..3c49364c4 100644 --- a/tests/compute/rw-texture-simple.slang.expected.txt +++ b/tests/compute/rw-texture-simple.slang.expected.txt @@ -1,5 +1,5 @@ type: float 3.000000 -6.000000 -9.000000 -12.000000 +24.000000 +45.000000 +66.000000 diff --git a/tests/compute/texture-subscript-cuda.slang b/tests/compute/texture-subscript-cuda.slang index e64f42b19..7c4a2cc78 100644 --- a/tests/compute/texture-subscript-cuda.slang +++ b/tests/compute/texture-subscript-cuda.slang @@ -5,14 +5,24 @@ Texture1D cudaT1D; //TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D Texture2D cudaT2D; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_f2 +Texture2D cudaT2D_f2; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_f4 +Texture2D cudaT2D_f4; //TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D Texture3D cudaT3D; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_f2 +Texture3D cudaT3D_f2; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_f4 +Texture3D cudaT3D_f4; //TEST_INPUT: TextureCube(size=16, content = one):name cudaTCube TextureCube cudaTCube; //TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray Texture2DArray cudaT2DArray; -//TEST_INPUT: TextureCube(size=16, content = one, arrayLength=1):name cudaTCubeArray -TextureCubeArray cudaTCubeArray; +//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_f2 +Texture2DArray cudaT2DArray_f2; +//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_f4 +Texture2DArray cudaT2DArray_f4; //TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0], stride=4):out,name cudaOutputBuffer RWStructuredBuffer cudaOutputBuffer; @@ -23,39 +33,69 @@ void computeMain(int3 dispatchThreadID : SV_DispatchThreadID) { int idx = dispatchThreadID.x; - switch (idx) - { - case 1: - { - int var = 0; - float result = cudaT1D[0]; - // This is not supported in PTX. - //cudaOutputBuffer[idx] = result; - } - break; - - case 2: - { - int2 var = int2(1, 2); - float result = cudaT2D[var]; - cudaOutputBuffer[idx] = result; - } - break; - - case 3: - { - int3 var = int3(1, 1, 1); - float result = cudaT3D[var]; - cudaOutputBuffer[idx] = result; - } - break; - - case 4: - { - int3 var = int3(0, 0, 1); - float result = cudaT2DArray[var]; - cudaOutputBuffer[idx] = result; - } - break; - } + switch (idx) + { + case 1: + { + int var = 0; + float result = cudaT1D[0]; + // This is not supported in PTX. + //cudaOutputBuffer[idx] = result; + } + break; + + case 2: + { + int2 var = int2(1, 2); + float result = cudaT2D[var]; + cudaOutputBuffer[idx] = result; + + float2 result2 = cudaT2D_f2[var]; + cudaOutputBuffer[idx] += result2.x; + cudaOutputBuffer[idx] += result2.y; + + float4 result4 = cudaT2D_f4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.y; + cudaOutputBuffer[idx] += result4.z; + cudaOutputBuffer[idx] += result4.w; + } + break; + + case 3: + { + int3 var = int3(1, 1, 1); + float result = cudaT3D[var]; + cudaOutputBuffer[idx] = result; + + float2 result2 = cudaT3D_f2[var]; + cudaOutputBuffer[idx] += result2.x; + cudaOutputBuffer[idx] += result2.y; + + float4 result4 = cudaT3D_f4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.y; + cudaOutputBuffer[idx] += result4.z; + cudaOutputBuffer[idx] += result4.w; + } + break; + + case 4: + { + int3 var = int3(0, 0, 1); + float result = cudaT2DArray[var]; + cudaOutputBuffer[idx] = result; + + float2 result2 = cudaT2DArray_f2[var]; + cudaOutputBuffer[idx] += result2.x; + cudaOutputBuffer[idx] += result2.y; + + float4 result4 = cudaT2DArray_f4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.y; + cudaOutputBuffer[idx] += result4.z; + cudaOutputBuffer[idx] += result4.w; + } + break; + } } \ No newline at end of file diff --git a/tests/compute/texture-subscript-cuda.slang.expected.txt b/tests/compute/texture-subscript-cuda.slang.expected.txt index 133a47e56..698717361 100644 --- a/tests/compute/texture-subscript-cuda.slang.expected.txt +++ b/tests/compute/texture-subscript-cuda.slang.expected.txt @@ -1,7 +1,7 @@ 0 0 -3F800000 -3F800000 -3F800000 +40E00000 +40E00000 +40E00000 0 0 \ No newline at end of file diff --git a/tests/compute/texture-subscript-uint-cuda.slang b/tests/compute/texture-subscript-uint-cuda.slang new file mode 100644 index 000000000..d2b6cec59 --- /dev/null +++ b/tests/compute/texture-subscript-uint-cuda.slang @@ -0,0 +1,86 @@ +// Test for verifying subscript operator support with uint types in cuda. + +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -compute -shaderobj +//TEST_INPUT: Texture1D(size=4, content = one):name cudaT1D_u +Texture1D cudaT1D_u; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_u +Texture2D cudaT2D_u; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_u2 +Texture2D cudaT2D_u2; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_u4 +Texture2D cudaT2D_u4; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_u +Texture3D cudaT3D_u; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_u2 +Texture3D cudaT3D_u2; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_u4 +Texture3D cudaT3D_u4; +//TEST_INPUT: TextureCube(size=16, content = one):name cudaTCube_u +TextureCube cudaTCube_u; +//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_u +Texture2DArray cudaT2DArray_u; +//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_u2 +Texture2DArray cudaT2DArray_u2; +//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_u4 +Texture2DArray cudaT2DArray_u4; + +//TEST_INPUT: ubuffer(data=[0 0 0], stride=4):out,name cudaOutputBuffer +RWStructuredBuffer cudaOutputBuffer; + +[numthreads(3, 1, 1)] +[shader("compute")] +void computeMain(int3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = dispatchThreadID.x; + + switch (idx) + { + case 0: + { + int2 var = int2(1, 2); + uint result = cudaT2D_u[var]; + cudaOutputBuffer[idx] = result; + + uint2 result2 = cudaT2D_u2[var]; + cudaOutputBuffer[idx] += result2.x; + + uint4 result4 = cudaT2D_u4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.y; + } + break; + + case 1: + { + int3 var = int3(1, 1, 1); + uint result = cudaT3D_u[var]; + cudaOutputBuffer[idx] = result; + + uint2 result2 = cudaT3D_u2[var]; + cudaOutputBuffer[idx] += result2.x; + + uint4 result4 = cudaT3D_u4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.w; + } + break; + + case 2: + { + int3 var = int3(0, 0, 1); + uint result = cudaT2DArray_u[var]; + cudaOutputBuffer[idx] = result; + + uint2 result2 = cudaT2DArray_u2[var]; + cudaOutputBuffer[idx] += result2.y; + + uint4 result4 = cudaT2DArray_u4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.y; + } + break; + } +} +// CHECK:FE000000 +// CHECK:FE000000 +// CHECK:FE000000 \ No newline at end of file -- cgit v1.2.3