diff options
| author | Mukund Keshava <mkeshava@nvidia.com> | 2025-05-12 11:09:41 +0530 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-05-12 11:09:41 +0530 |
| commit | 03f9242489d5598c9c7594ac12e269f57a018cda (patch) | |
| tree | 2315d185985aa9d65dd4e3db8e1bf8d7a79d77c8 | |
| parent | b46c342f47b61119a0dc517ce6eb75eab3398504 (diff) | |
cuda: Add more formats for texture read/write (#7012)
* WiP: Add more formats for texture reads
* fix test
* format code
* add float2/float4 versions for 1D and 3D as well
* fixed review comment
* fix review comments
---------
Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com>
Co-authored-by: Ellie Hermaszewska <ellieh@nvidia.com>
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 186 | ||||
| -rw-r--r-- | tests/compute/rw-texture-simple.slang | 63 | ||||
| -rw-r--r-- | tests/compute/rw-texture-simple.slang.expected.txt | 6 | ||||
| -rw-r--r-- | tests/compute/texture-subscript-cuda.slang | 114 | ||||
| -rw-r--r-- | tests/compute/texture-subscript-cuda.slang.expected.txt | 6 | ||||
| -rw-r--r-- | tests/compute/texture-subscript-uint-cuda.slang | 86 |
6 files changed, 406 insertions, 55 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 7c68384b3..a9e99683e 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -3408,9 +3408,9 @@ template<typename T> SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex1Dfetch_int(CUtexObject texObj, int x) { T result; - float dummy; + float stub; asm("tex.1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5}];" - : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "=f"(result), "=f"(stub), "=f"(stub), "=f"(stub) : "l"(texObj), "r"(x)); return result; } @@ -3419,22 +3419,75 @@ template<typename T> SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex2Dfetch_int(CUtexObject texObj, int x, int y) { T result; - float dummy; + float stub; asm("tex.2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];" - : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "=f"(result), "=f"(stub), "=f"(stub), "=f"(stub) : "l"(texObj), "r"(x), "r"(y)); return result; } +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL float2 tex2Dfetch_int(CUtexObject texObj, int x, int y) +{ + float result_x, result_y; + float stub; + asm("tex.2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];" + : "=f"(result_x), "=f"(result_y), "=f"(stub), "=f"(stub) + : "l"(texObj), "r"(x), "r"(y)); + return make_float2(result_x, result_y); +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL float4 tex2Dfetch_int(CUtexObject texObj, int x, int y) +{ + float result_x, result_y, result_z, result_w; + asm("tex.2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];" + : "=f"(result_x), "=f"(result_y), "=f"(result_z), "=f"(result_w) + : "l"(texObj), "r"(x), "r"(y)); + return make_float4(result_x, result_y, result_z, result_w); +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint tex2Dfetch_int(CUtexObject texObj, int x, int y) +{ + uint result; + uint stub; + asm("tex.2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];" + : "=r"(result), "=r"(stub), "=r"(stub), "=r"(stub) + : "l"(texObj), "r"(x), "r"(y)); + return result; +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint2 tex2Dfetch_int(CUtexObject texObj, int x, int y) +{ + uint result_x, result_y; + uint stub; + asm("tex.2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];" + : "=r"(result_x), "=r"(result_y), "=r"(stub), "=r"(stub) + : "l"(texObj), "r"(x), "r"(y)); + return make_uint2(result_x, result_y); +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint4 tex2Dfetch_int(CUtexObject texObj, int x, int y) +{ + uint result_x, result_y, result_z, result_w; + asm("tex.2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];" + : "=r"(result_x), "=r"(result_y), "=r"(result_z), "=r"(result_w) + : "l"(texObj), "r"(x), "r"(y)); + return make_uint4(result_x, result_y, result_z, result_w); +} + template<typename T> SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex3Dfetch_int(CUtexObject texObj, int x, int y, int z) { T result; - float dummy; + float stub; asm("tex.3d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" - : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "=f"(result), "=f"(stub), "=f"(stub), "=f"(stub) : "l"(texObj), "r"(x), "r"(y), "r"(z), "r"(z)); - // Note: The repeated z is a dummy used as the fourth operand in ptx. + // Note: The repeated z is a stub used as the fourth operand in ptx. // From the docs: // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#texture-instructions-tex // Operand c is a scalar or singleton tuple for 1d textures; is a two-element vector for 2d @@ -3442,13 +3495,66 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex3Dfetch_int(CUtexObject texObj, int x, i return result; } +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL float2 tex3Dfetch_int(CUtexObject texObj, int x, int y, int z) +{ + float result_x, result_y; + float stub; + asm("tex.3d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" + : "=f"(result_x), "=f"(result_y), "=f"(stub), "=f"(stub) + : "l"(texObj), "r"(x), "r"(y), "r"(z), "r"(z)); + return make_float2(result_x, result_y); +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL float4 tex3Dfetch_int(CUtexObject texObj, int x, int y, int z) +{ + float result_x, result_y, result_z, result_w; + asm("tex.3d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" + : "=f"(result_x), "=f"(result_y), "=f"(result_z), "=f"(result_w) + : "l"(texObj), "r"(x), "r"(y), "r"(z), "r"(z)); + return make_float4(result_x, result_y, result_z, result_w); +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint tex3Dfetch_int(CUtexObject texObj, int x, int y, int z) +{ + uint result; + uint stub; + asm("tex.3d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" + : "=r"(result), "=r"(stub), "=r"(stub), "=r"(stub) + : "l"(texObj), "r"(x), "r"(y), "r"(z), "r"(z)); + return result; +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint2 tex3Dfetch_int(CUtexObject texObj, int x, int y, int z) +{ + uint result_x, result_y; + uint stub; + asm("tex.3d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" + : "=r"(result_x), "=r"(result_y), "=r"(stub), "=r"(stub) + : "l"(texObj), "r"(x), "r"(y), "r"(z), "r"(z)); + return make_uint2(result_x, result_y); +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint4 tex3Dfetch_int(CUtexObject texObj, int x, int y, int z) +{ + uint result_x, result_y, result_z, result_w; + asm("tex.3d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" + : "=r"(result_x), "=r"(result_y), "=r"(result_z), "=r"(result_w) + : "l"(texObj), "r"(x), "r"(y), "r"(z), "r"(z)); + return make_uint4(result_x, result_y, result_z, result_w); +} + template<typename T> SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex1DArrayfetch_int(CUtexObject texObj, int x, int layer) { T result; - float dummy; + float stub; asm("tex.a1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];" - : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "=f"(result), "=f"(stub), "=f"(stub), "=f"(stub) : "l"(texObj), "r"(x), "r"(layer)); return result; } @@ -3458,9 +3564,67 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex2DArrayfetch_int(CUtexObject texObj, int x, int y, int layer) { T result; - float dummy; + float stub; asm("tex.a2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" - : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "=f"(result), "=f"(stub), "=f"(stub), "=f"(stub) : "l"(texObj), "r"(x), "r"(y), "r"(layer), "r"(layer)); return result; +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL float2 +tex2DArrayfetch_int(CUtexObject texObj, int x, int y, int layer) +{ + float result_x, result_y; + float stub; + asm("tex.a2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" + : "=f"(result_x), "=f"(result_y), "=f"(stub), "=f"(stub) + : "l"(texObj), "r"(x), "r"(y), "r"(layer), "r"(layer)); + return make_float2(result_x, result_y); +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL float4 +tex2DArrayfetch_int(CUtexObject texObj, int x, int y, int layer) +{ + float result_x, result_y, result_z, result_w; + asm("tex.a2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" + : "=f"(result_x), "=f"(result_y), "=f"(result_z), "=f"(result_w) + : "l"(texObj), "r"(x), "r"(y), "r"(layer), "r"(layer)); + return make_float4(result_x, result_y, result_z, result_w); +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint +tex2DArrayfetch_int(CUtexObject texObj, int x, int y, int layer) +{ + uint result; + uint stub; + asm("tex.a2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" + : "=r"(result), "=r"(stub), "=r"(stub), "=r"(stub) + : "l"(texObj), "r"(x), "r"(y), "r"(layer), "r"(layer)); + return result; +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint2 +tex2DArrayfetch_int(CUtexObject texObj, int x, int y, int layer) +{ + uint result_x, result_y; + uint stub; + asm("tex.a2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" + : "=r"(result_x), "=r"(result_y), "=r"(stub), "=r"(stub) + : "l"(texObj), "r"(x), "r"(y), "r"(layer), "r"(layer)); + return make_uint2(result_x, result_y); +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint4 +tex2DArrayfetch_int(CUtexObject texObj, int x, int y, int layer) +{ + uint result_x, result_y, result_z, result_w; + asm("tex.a2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" + : "=r"(result_x), "=r"(result_y), "=r"(result_z), "=r"(result_w) + : "l"(texObj), "r"(x), "r"(y), "r"(layer), "r"(layer)); + return make_uint4(result_x, result_y, result_z, result_w); }
\ No newline at end of file diff --git a/tests/compute/rw-texture-simple.slang b/tests/compute/rw-texture-simple.slang index f5befcd57..d0d90acb9 100644 --- a/tests/compute/rw-texture-simple.slang +++ b/tests/compute/rw-texture-simple.slang @@ -14,6 +14,18 @@ RWTexture1D<float> rwt1D; RWTexture2D<float> rwt2D; //TEST_INPUT: RWTexture3D(format=R32Float, size=4, content = one, mipMaps = 1):name rwt3D RWTexture3D<float> rwt3D; +//TEST_INPUT: RWTexture1D(format=RG32Float, size=4, content = one, mipMaps = 1):name rwt1D_float2 +RWTexture1D<float2> rwt1D_float2; +//TEST_INPUT: RWTexture1D(format=RGBA32Float, size=4, content = one, mipMaps = 1):name rwt1D_float4 +RWTexture1D<float4> rwt1D_float4; +//TEST_INPUT: RWTexture2D(format=RG32Float, size=4, content = one, mipMaps = 1):name rwt2D_float2 +RWTexture2D<float2> rwt2D_float2; +//TEST_INPUT: RWTexture2D(format=RGBA32Float, size=4, content = one, mipMaps = 1):name rwt2D_float4 +RWTexture2D<float4> rwt2D_float4; +//TEST_INPUT: RWTexture3D(format=RG32Float, size=4, content = one, mipMaps = 1):name rwt3D_float2 +RWTexture3D<float2> rwt3D_float2; +//TEST_INPUT: RWTexture3D(format=RGBA32Float, size=4, content = one, mipMaps = 1):name rwt3D_float4 +RWTexture3D<float4> rwt3D_float4; //TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer<float> outputBuffer; @@ -25,6 +37,7 @@ void computeMain(int3 dispatchThreadID : SV_DispatchThreadID) float val = 0.0f; + // float texture operations val += rwt1D[idx]; val += rwt2D[uint2(idx, idx)]; val += rwt3D[uint3(idx, idx, idx)]; @@ -36,6 +49,54 @@ void computeMain(int3 dispatchThreadID : SV_DispatchThreadID) val += rwt1D[idx]; val += rwt2D[uint2(idx, idx)]; val += rwt3D[uint3(idx, idx, idx)]; - + + // float2 texture operations for 1D + float2 val2_1d = rwt1D_float2[idx]; + rwt1D_float2[idx] = float2(idx, idx); + val2_1d = rwt1D_float2[idx]; + val += val2_1d.x; + val += val2_1d.y; + + // float2 texture operations for 2D + float2 val2 = rwt2D_float2[uint2(idx, idx)]; + rwt2D_float2[uint2(idx, idx)] = float2(idx, idx); + val2 = rwt2D_float2[uint2(idx, idx)]; + val += val2.x; + val += val2.y; + + // float2 texture operations for 3D + float2 val2_3d = rwt3D_float2[uint3(idx, idx, idx)]; + rwt3D_float2[uint3(idx, idx, idx)] = float2(idx, idx); + val2_3d = rwt3D_float2[uint3(idx, idx, idx)]; + val += val2_3d.x; + val += val2_3d.y; + + // float4 texture operations for 1D + float4 val4_1d = rwt1D_float4[idx]; + rwt1D_float4[idx] = float4(idx, idx, idx, idx); + val4_1d = rwt1D_float4[idx]; + val += val4_1d.x; + val += val4_1d.y; + val += val4_1d.z; + val += val4_1d.w; + + // float4 texture operations for 2D + float4 val4 = rwt2D_float4[uint2(idx, idx)]; + rwt2D_float4[uint2(idx, idx)] = float4(idx, idx, idx, idx); + val4 = rwt2D_float4[uint2(idx, idx)]; + val += val4.x; + val += val4.y; + val += val4.z; + val += val4.w; + + // float4 texture operations for 3D + float4 val4_3d = rwt3D_float4[uint3(idx, idx, idx)]; + rwt3D_float4[uint3(idx, idx, idx)] = float4(idx, idx, idx, idx); + val4_3d = rwt3D_float4[uint3(idx, idx, idx)]; + val += val4_3d.x; + val += val4_3d.y; + val += val4_3d.z; + val += val4_3d.w; + outputBuffer[idx] = val; } diff --git a/tests/compute/rw-texture-simple.slang.expected.txt b/tests/compute/rw-texture-simple.slang.expected.txt index 1d9023742..3c49364c4 100644 --- a/tests/compute/rw-texture-simple.slang.expected.txt +++ b/tests/compute/rw-texture-simple.slang.expected.txt @@ -1,5 +1,5 @@ type: float 3.000000 -6.000000 -9.000000 -12.000000 +24.000000 +45.000000 +66.000000 diff --git a/tests/compute/texture-subscript-cuda.slang b/tests/compute/texture-subscript-cuda.slang index e64f42b19..7c4a2cc78 100644 --- a/tests/compute/texture-subscript-cuda.slang +++ b/tests/compute/texture-subscript-cuda.slang @@ -5,14 +5,24 @@ Texture1D<float> cudaT1D; //TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D Texture2D<float> cudaT2D; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_f2 +Texture2D<float2> cudaT2D_f2; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_f4 +Texture2D<float4> cudaT2D_f4; //TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D Texture3D<float> cudaT3D; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_f2 +Texture3D<float2> cudaT3D_f2; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_f4 +Texture3D<float4> cudaT3D_f4; //TEST_INPUT: TextureCube(size=16, content = one):name cudaTCube TextureCube<float> cudaTCube; //TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray Texture2DArray<float> cudaT2DArray; -//TEST_INPUT: TextureCube(size=16, content = one, arrayLength=1):name cudaTCubeArray -TextureCubeArray<float> cudaTCubeArray; +//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_f2 +Texture2DArray<float2> cudaT2DArray_f2; +//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_f4 +Texture2DArray<float4> cudaT2DArray_f4; //TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0], stride=4):out,name cudaOutputBuffer RWStructuredBuffer<float> cudaOutputBuffer; @@ -23,39 +33,69 @@ void computeMain(int3 dispatchThreadID : SV_DispatchThreadID) { int idx = dispatchThreadID.x; - switch (idx) - { - case 1: - { - int var = 0; - float result = cudaT1D[0]; - // This is not supported in PTX. - //cudaOutputBuffer[idx] = result; - } - break; - - case 2: - { - int2 var = int2(1, 2); - float result = cudaT2D[var]; - cudaOutputBuffer[idx] = result; - } - break; - - case 3: - { - int3 var = int3(1, 1, 1); - float result = cudaT3D[var]; - cudaOutputBuffer[idx] = result; - } - break; - - case 4: - { - int3 var = int3(0, 0, 1); - float result = cudaT2DArray[var]; - cudaOutputBuffer[idx] = result; - } - break; - } + switch (idx) + { + case 1: + { + int var = 0; + float result = cudaT1D[0]; + // This is not supported in PTX. + //cudaOutputBuffer[idx] = result; + } + break; + + case 2: + { + int2 var = int2(1, 2); + float result = cudaT2D[var]; + cudaOutputBuffer[idx] = result; + + float2 result2 = cudaT2D_f2[var]; + cudaOutputBuffer[idx] += result2.x; + cudaOutputBuffer[idx] += result2.y; + + float4 result4 = cudaT2D_f4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.y; + cudaOutputBuffer[idx] += result4.z; + cudaOutputBuffer[idx] += result4.w; + } + break; + + case 3: + { + int3 var = int3(1, 1, 1); + float result = cudaT3D[var]; + cudaOutputBuffer[idx] = result; + + float2 result2 = cudaT3D_f2[var]; + cudaOutputBuffer[idx] += result2.x; + cudaOutputBuffer[idx] += result2.y; + + float4 result4 = cudaT3D_f4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.y; + cudaOutputBuffer[idx] += result4.z; + cudaOutputBuffer[idx] += result4.w; + } + break; + + case 4: + { + int3 var = int3(0, 0, 1); + float result = cudaT2DArray[var]; + cudaOutputBuffer[idx] = result; + + float2 result2 = cudaT2DArray_f2[var]; + cudaOutputBuffer[idx] += result2.x; + cudaOutputBuffer[idx] += result2.y; + + float4 result4 = cudaT2DArray_f4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.y; + cudaOutputBuffer[idx] += result4.z; + cudaOutputBuffer[idx] += result4.w; + } + break; + } }
\ No newline at end of file diff --git a/tests/compute/texture-subscript-cuda.slang.expected.txt b/tests/compute/texture-subscript-cuda.slang.expected.txt index 133a47e56..698717361 100644 --- a/tests/compute/texture-subscript-cuda.slang.expected.txt +++ b/tests/compute/texture-subscript-cuda.slang.expected.txt @@ -1,7 +1,7 @@ 0 0 -3F800000 -3F800000 -3F800000 +40E00000 +40E00000 +40E00000 0 0
\ No newline at end of file diff --git a/tests/compute/texture-subscript-uint-cuda.slang b/tests/compute/texture-subscript-uint-cuda.slang new file mode 100644 index 000000000..d2b6cec59 --- /dev/null +++ b/tests/compute/texture-subscript-uint-cuda.slang @@ -0,0 +1,86 @@ +// Test for verifying subscript operator support with uint types in cuda. + +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -compute -shaderobj +//TEST_INPUT: Texture1D(size=4, content = one):name cudaT1D_u +Texture1D<uint> cudaT1D_u; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_u +Texture2D<uint> cudaT2D_u; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_u2 +Texture2D<uint2> cudaT2D_u2; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_u4 +Texture2D<uint4> cudaT2D_u4; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_u +Texture3D<uint> cudaT3D_u; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_u2 +Texture3D<uint2> cudaT3D_u2; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_u4 +Texture3D<uint4> cudaT3D_u4; +//TEST_INPUT: TextureCube(size=16, content = one):name cudaTCube_u +TextureCube<uint> cudaTCube_u; +//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_u +Texture2DArray<uint> cudaT2DArray_u; +//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_u2 +Texture2DArray<uint2> cudaT2DArray_u2; +//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_u4 +Texture2DArray<uint4> cudaT2DArray_u4; + +//TEST_INPUT: ubuffer(data=[0 0 0], stride=4):out,name cudaOutputBuffer +RWStructuredBuffer<uint> cudaOutputBuffer; + +[numthreads(3, 1, 1)] +[shader("compute")] +void computeMain(int3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = dispatchThreadID.x; + + switch (idx) + { + case 0: + { + int2 var = int2(1, 2); + uint result = cudaT2D_u[var]; + cudaOutputBuffer[idx] = result; + + uint2 result2 = cudaT2D_u2[var]; + cudaOutputBuffer[idx] += result2.x; + + uint4 result4 = cudaT2D_u4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.y; + } + break; + + case 1: + { + int3 var = int3(1, 1, 1); + uint result = cudaT3D_u[var]; + cudaOutputBuffer[idx] = result; + + uint2 result2 = cudaT3D_u2[var]; + cudaOutputBuffer[idx] += result2.x; + + uint4 result4 = cudaT3D_u4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.w; + } + break; + + case 2: + { + int3 var = int3(0, 0, 1); + uint result = cudaT2DArray_u[var]; + cudaOutputBuffer[idx] = result; + + uint2 result2 = cudaT2DArray_u2[var]; + cudaOutputBuffer[idx] += result2.y; + + uint4 result4 = cudaT2DArray_u4[var]; + cudaOutputBuffer[idx] += result4.x; + cudaOutputBuffer[idx] += result4.y; + } + break; + } +} +// CHECK:FE000000 +// CHECK:FE000000 +// CHECK:FE000000
\ No newline at end of file |
