diff options
| author | Mukund Keshava <mkeshava@nvidia.com> | 2025-05-12 11:09:41 +0530 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-05-12 11:09:41 +0530 |
| commit | 03f9242489d5598c9c7594ac12e269f57a018cda (patch) | |
| tree | 2315d185985aa9d65dd4e3db8e1bf8d7a79d77c8 /tests | |
| parent | b46c342f47b61119a0dc517ce6eb75eab3398504 (diff) | |
cuda: Add more formats for texture read/write (#7012)
* WiP: Add more formats for texture reads
* fix test
* format code
* add float2/float4 versions for 1D and 3D as well
* fixed review comment
* fix review comments
---------
Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com>
Co-authored-by: Ellie Hermaszewska <ellieh@nvidia.com>
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/compute/rw-texture-simple.slang | 63 | ||||
| -rw-r--r-- | tests/compute/rw-texture-simple.slang.expected.txt | 6 | ||||
| -rw-r--r-- | tests/compute/texture-subscript-cuda.slang | 114 | ||||
| -rw-r--r-- | tests/compute/texture-subscript-cuda.slang.expected.txt | 6 | ||||
| -rw-r--r-- | tests/compute/texture-subscript-uint-cuda.slang | 86 |
5 files changed, 231 insertions, 44 deletions
diff --git a/tests/compute/rw-texture-simple.slang b/tests/compute/rw-texture-simple.slang index f5befcd57..d0d90acb9 100644 --- a/tests/compute/rw-texture-simple.slang +++ b/tests/compute/rw-texture-simple.slang @@ -14,6 +14,18 @@ RWTexture1D<float> rwt1D; RWTexture2D<float> rwt2D; //TEST_INPUT: RWTexture3D(format=R32Float, size=4, content = one, mipMaps = 1):name rwt3D RWTexture3D<float> rwt3D; +//TEST_INPUT: RWTexture1D(format=RG32Float, size=4, content = one, mipMaps = 1):name rwt1D_float2 +RWTexture1D<float2> rwt1D_float2; +//TEST_INPUT: RWTexture1D(format=RGBA32Float, size=4, content = one, mipMaps = 1):name rwt1D_float4 +RWTexture1D<float4> rwt1D_float4; +//TEST_INPUT: RWTexture2D(format=RG32Float, size=4, content = one, mipMaps = 1):name rwt2D_float2 +RWTexture2D<float2> rwt2D_float2; +//TEST_INPUT: RWTexture2D(format=RGBA32Float, size=4, content = one, mipMaps = 1):name rwt2D_float4 +RWTexture2D<float4> rwt2D_float4; +//TEST_INPUT: RWTexture3D(format=RG32Float, size=4, content = one, mipMaps = 1):name rwt3D_float2 +RWTexture3D<float2> rwt3D_float2; +//TEST_INPUT: RWTexture3D(format=RGBA32Float, size=4, content = one, mipMaps = 1):name rwt3D_float4 +RWTexture3D<float4> rwt3D_float4; //TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer<float> outputBuffer; @@ -25,6 +37,7 @@ void computeMain(int3 dispatchThreadID : SV_DispatchThreadID) float val = 0.0f; + // float texture operations val += rwt1D[idx]; val += rwt2D[uint2(idx, idx)]; val += rwt3D[uint3(idx, idx, idx)]; @@ -36,6 +49,54 @@ void computeMain(int3 dispatchThreadID : SV_DispatchThreadID) val += rwt1D[idx]; val += rwt2D[uint2(idx, idx)]; val += rwt3D[uint3(idx, idx, idx)]; - + + // float2 texture operations for 1D + float2 val2_1d = rwt1D_float2[idx]; + rwt1D_float2[idx] = float2(idx, idx); + val2_1d = rwt1D_float2[idx]; + val += val2_1d.x; + val += val2_1d.y; + + // float2 texture operations for 2D + float2 val2 = rwt2D_float2[uint2(idx, idx)]; + rwt2D_float2[uint2(idx, idx)] = float2(idx, idx); + val2 = rwt2D_float2[uint2(idx, idx)]; + val += val2.x; + val += val2.y; + + // float2 texture operations for 3D + float2 val2_3d = rwt3D_float2[uint3(idx, idx, idx)]; + rwt3D_float2[uint3(idx, idx, idx)] = float2(idx, idx); + val2_3d = rwt3D_float2[uint3(idx, idx, idx)]; + val += val2_3d.x; + val += val2_3d.y; + + // float4 texture operations for 1D + float4 val4_1d = rwt1D_float4[idx]; + rwt1D_float4[idx] = float4(idx, idx, idx, idx); + val4_1d = rwt1D_float4[idx]; + val += val4_1d.x; + val += val4_1d.y; + val += val4_1d.z; + val += val4_1d.w; + + // float4 texture operations for 2D + float4 val4 = rwt2D_float4[uint2(idx, idx)]; + rwt2D_float4[uint2(idx, idx)] = float4(idx, idx, idx, idx); + val4 = rwt2D_float4[uint2(idx, idx)]; + val += val4.x; + val += val4.y; + val += val4.z; + val += val4.w; + + // float4 texture operations for 3D + float4 val4_3d = rwt3D_float4[uint3(idx, idx, idx)]; + rwt3D_float4[uint3(idx, idx, idx)] = float4(idx, idx, idx, idx); + val4_3d = rwt3D_float4[uint3(idx, idx, idx)]; + val += val4_3d.x; + val += val4_3d.y; + val += val4_3d.z; + val += val4_3d.w; + outputBuffer[idx] = val; } diff --git a/tests/compute/rw-texture-simple.slang.expected.txt b/tests/compute/rw-texture-simple.slang.expected.txt index 1d9023742..3c49364c4 100644 --- a/tests/compute/rw-texture-simple.slang.expected.txt +++ b/tests/compute/rw-texture-simple.slang.expected.txt @@ -1,5 +1,5 @@ type: float 3.000000 -6.000000 -9.000000 -12.000000 +24.000000 +45.000000 +66.000000 diff --git a/tests/compute/texture-subscript-cuda.slang b/tests/compute/texture-subscript-cuda.slang index e64f42b19..7c4a2cc78 100644 --- a/tests/compute/texture-subscript-cuda.slang +++ b/tests/compute/texture-subscript-cuda.slang @@ -5,14 +5,24 @@ Texture1D<float> cudaT1D; //TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D Texture2D<float> cudaT2D; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_f2 +Texture2D<float2> cudaT2D_f2; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_f4 +Texture2D<float4> cudaT2D_f4; //TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D Texture3D<float> cudaT3D; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_f2 +Texture3D<float2> cudaT3D_f2; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_f4 +Texture3D<float4> cudaT3D_f4; //TEST_INPUT: TextureCube(size=16, content = one):name cudaTCube TextureCube<float> cudaTCube; //TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray Texture2DArray<float> cudaT2DArray; -//TEST_INPUT: TextureCube(size=16, content = one, arrayLength=1):name cudaTCubeArray -TextureCubeArray<float> cudaTCubeArray; +//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_f2 +Texture2DArray<float2> cudaT2DArray_f2; +//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_f4 +Texture2DArray<float4> cudaT2DArray_f4; //TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0], stride=4):out,name cudaOutputBuffer RWStructuredBuffer<float> cudaOutputBuffer; @@ -23,39 +33,69 @@ void computeMain(int3 dispatchThreadID : SV_DispatchThreadID) { int idx = dispatchThreadID.x; - switch (idx) - { - case 1: - { - int var = 0; - float result = cudaT1D[0]; - // This is not supported in PTX. - //cudaOutputBuffer[idx] = result; - } - break; - - case 2: - { - int2 var = int2(1, 2); - float result = cudaT2D[var]; - cudaOutputBuffer[idx] = result; - } - break; - - case 3: - { - int3 var = int3(1, 1, 1); - float result = cudaT3D[var]; - cudaOutputBuffer[idx] = result; - } - break; - - case 4: - { - int3 var = int3(0, 0, 1); - float result = cudaT2DArray[var]; - cudaOutputBuffer[idx] = result; - } - break; - } + switch (idx) + { + case 1: + { + int var = 0; + float result = cudaT1D[0]; + // This is not supported in PTX. + //cudaOutputBuffer[idx] = result; + } + break; + + case 2: + { + int2 var = int2(1, 2); + float result = cudaT2D[var]; + cudaOutputBuffer[idx] = result; + + float2 result2 = cudaT2D_f2[var]; + cudaOutputBuffer[idx] += result2.x; + cudaOutputBuffer[idx] += result2.y; + + float4 result4 = cudaT2D_f4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.y; + cudaOutputBuffer[idx] += result4.z; + cudaOutputBuffer[idx] += result4.w; + } + break; + + case 3: + { + int3 var = int3(1, 1, 1); + float result = cudaT3D[var]; + cudaOutputBuffer[idx] = result; + + float2 result2 = cudaT3D_f2[var]; + cudaOutputBuffer[idx] += result2.x; + cudaOutputBuffer[idx] += result2.y; + + float4 result4 = cudaT3D_f4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.y; + cudaOutputBuffer[idx] += result4.z; + cudaOutputBuffer[idx] += result4.w; + } + break; + + case 4: + { + int3 var = int3(0, 0, 1); + float result = cudaT2DArray[var]; + cudaOutputBuffer[idx] = result; + + float2 result2 = cudaT2DArray_f2[var]; + cudaOutputBuffer[idx] += result2.x; + cudaOutputBuffer[idx] += result2.y; + + float4 result4 = cudaT2DArray_f4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.y; + cudaOutputBuffer[idx] += result4.z; + cudaOutputBuffer[idx] += result4.w; + } + break; + } }
\ No newline at end of file diff --git a/tests/compute/texture-subscript-cuda.slang.expected.txt b/tests/compute/texture-subscript-cuda.slang.expected.txt index 133a47e56..698717361 100644 --- a/tests/compute/texture-subscript-cuda.slang.expected.txt +++ b/tests/compute/texture-subscript-cuda.slang.expected.txt @@ -1,7 +1,7 @@ 0 0 -3F800000 -3F800000 -3F800000 +40E00000 +40E00000 +40E00000 0 0
\ No newline at end of file diff --git a/tests/compute/texture-subscript-uint-cuda.slang b/tests/compute/texture-subscript-uint-cuda.slang new file mode 100644 index 000000000..d2b6cec59 --- /dev/null +++ b/tests/compute/texture-subscript-uint-cuda.slang @@ -0,0 +1,86 @@ +// Test for verifying subscript operator support with uint types in cuda. + +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -compute -shaderobj +//TEST_INPUT: Texture1D(size=4, content = one):name cudaT1D_u +Texture1D<uint> cudaT1D_u; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_u +Texture2D<uint> cudaT2D_u; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_u2 +Texture2D<uint2> cudaT2D_u2; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_u4 +Texture2D<uint4> cudaT2D_u4; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_u +Texture3D<uint> cudaT3D_u; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_u2 +Texture3D<uint2> cudaT3D_u2; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_u4 +Texture3D<uint4> cudaT3D_u4; +//TEST_INPUT: TextureCube(size=16, content = one):name cudaTCube_u +TextureCube<uint> cudaTCube_u; +//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_u +Texture2DArray<uint> cudaT2DArray_u; +//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_u2 +Texture2DArray<uint2> cudaT2DArray_u2; +//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_u4 +Texture2DArray<uint4> cudaT2DArray_u4; + +//TEST_INPUT: ubuffer(data=[0 0 0], stride=4):out,name cudaOutputBuffer +RWStructuredBuffer<uint> cudaOutputBuffer; + +[numthreads(3, 1, 1)] +[shader("compute")] +void computeMain(int3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = dispatchThreadID.x; + + switch (idx) + { + case 0: + { + int2 var = int2(1, 2); + uint result = cudaT2D_u[var]; + cudaOutputBuffer[idx] = result; + + uint2 result2 = cudaT2D_u2[var]; + cudaOutputBuffer[idx] += result2.x; + + uint4 result4 = cudaT2D_u4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.y; + } + break; + + case 1: + { + int3 var = int3(1, 1, 1); + uint result = cudaT3D_u[var]; + cudaOutputBuffer[idx] = result; + + uint2 result2 = cudaT3D_u2[var]; + cudaOutputBuffer[idx] += result2.x; + + uint4 result4 = cudaT3D_u4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.w; + } + break; + + case 2: + { + int3 var = int3(0, 0, 1); + uint result = cudaT2DArray_u[var]; + cudaOutputBuffer[idx] = result; + + uint2 result2 = cudaT2DArray_u2[var]; + cudaOutputBuffer[idx] += result2.y; + + uint4 result4 = cudaT2DArray_u4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.y; + } + break; + } +} +// CHECK:FE000000 +// CHECK:FE000000 +// CHECK:FE000000
\ No newline at end of file |
