diff options
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 237 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 2 | ||||
| -rw-r--r-- | tests/compute/texture-subscript-cuda.slang | 107 | ||||
| -rw-r--r-- | tests/compute/texture-subscript-cuda.slang.expected.txt | 4 |
4 files changed, 342 insertions, 8 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index f0c053168..fd79b77aa 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -1324,10 +1324,11 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float>( cudaSurfaceBoundaryMode boundaryMode) { asm volatile( - "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4};}\n\t" ::"l"(surfObj), + "{sust.p.3d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3,%4}], {%5};}\n\t" ::"l"(surfObj), "r"(x), "r"(y), "r"(z), + "r"(0), "f"(v)); } @@ -1376,11 +1377,12 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float2>( { const float vx = v.x, vy = v.y; asm volatile( - "{sust.p.2d.v2.b32." SLANG_PTX_BOUNDARY_MODE - " [%0, {%1,%2,%3}], {%4,%5};}\n\t" ::"l"(surfObj), + "{sust.p.3d.v2.b32." SLANG_PTX_BOUNDARY_MODE + " [%0, {%1,%2,%3,%4}], {%5,%6};}\n\t" ::"l"(surfObj), "r"(x), "r"(y), "r"(z), + "r"(0), "f"(vx), "f"(vy)); } @@ -1435,17 +1437,242 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float4>( { const float vx = v.x, vy = v.y, vz = v.z, vw = v.w; asm volatile( - "{sust.p.2d.v4.b32." SLANG_PTX_BOUNDARY_MODE - " [%0, {%1,%2,%3}], {%4,%5,%6,%7};}\n\t" ::"l"(surfObj), + "{sust.p.3d.v4.b32." SLANG_PTX_BOUNDARY_MODE + " [%0, {%1,%2,%3,%4}], {%5,%6,%7,%8};}\n\t" ::"l"(surfObj), "r"(x), "r"(y), "r"(z), + "r"(0), "f"(vx), "f"(vy), "f"(vz), "f"(vw)); } +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<uint>( + uint v, + cudaSurfaceObject_t surfObj, + int x, + int y, + cudaSurfaceBoundaryMode boundaryMode) +{ + asm volatile( + "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" ::"l"(surfObj), + "r"(x), + "r"(y), + "r"(v)); +} +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<uint>( + uint v, + cudaSurfaceObject_t surfObj, + int x, + int y, + int z, + cudaSurfaceBoundaryMode boundaryMode) +{ + asm volatile( + "{sust.p.3d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3,%4}], {%5};}\n\t" ::"l"(surfObj), + "r"(x), + "r"(y), + "r"(z), + "r"(0), + "r"(v)); +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<uint2>( + uint2 v, + cudaSurfaceObject_t surfObj, + int x, + int y, + cudaSurfaceBoundaryMode boundaryMode) +{ + const uint vx = v.x, vy = v.y; + asm volatile( + "{sust.p.2d.v2.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3,%4};}\n\t" ::"l"(surfObj), + "r"(x), + "r"(y), + "r"(vx), + "r"(vy)); +} +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<uint2>( + uint2 v, + cudaSurfaceObject_t surfObj, + int x, + int y, + int z, + cudaSurfaceBoundaryMode boundaryMode) +{ + const uint vx = v.x, vy = v.y; + asm volatile( + "{sust.p.3d.v2.b32." SLANG_PTX_BOUNDARY_MODE + " [%0, {%1,%2,%3,%4}], {%5,%6};}\n\t" ::"l"(surfObj), + "r"(x), + "r"(y), + "r"(z), + "r"(0), + "r"(vx), + "r"(vy)); +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<uint4>( + uint4 v, + cudaSurfaceObject_t surfObj, + int x, + int y, + cudaSurfaceBoundaryMode boundaryMode) +{ + const uint vx = v.x, vy = v.y, vz = v.z, vw = v.w; + asm volatile( + "{sust.p.2d.v4.b32." SLANG_PTX_BOUNDARY_MODE + " [%0, {%1,%2}], {%3,%4,%5,%6};}\n\t" ::"l"(surfObj), + "r"(x), + "r"(y), + "r"(vx), + "r"(vy), + "r"(vz), + "r"(vw)); +} +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<uint4>( + uint4 v, + cudaSurfaceObject_t surfObj, + int x, + int y, + int z, + cudaSurfaceBoundaryMode boundaryMode) +{ + const uint vx = v.x, vy = v.y, vz = v.z, vw = v.w; + asm volatile( + "{sust.p.3d.v4.b32." SLANG_PTX_BOUNDARY_MODE + " [%0, {%1,%2,%3,%4}], {%5,%6,%7,%8};}\n\t" ::"l"(surfObj), + "r"(x), + "r"(y), + "r"(z), + "r"(0), + "r"(vx), + "r"(vy), + "r"(vz), + "r"(vw)); +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<int>( + int v, + cudaSurfaceObject_t surfObj, + int x, + int y, + cudaSurfaceBoundaryMode boundaryMode) +{ + asm volatile( + "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" ::"l"(surfObj), + "r"(x), + "r"(y), + "r"(v)); +} +// Int2 +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<int2>( + int2 v, + cudaSurfaceObject_t surfObj, + int x, + int y, + cudaSurfaceBoundaryMode boundaryMode) +{ + const int vx = v.x, vy = v.y; + asm volatile( + "{sust.p.2d.v2.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3,%4};}\n\t" ::"l"(surfObj), + "r"(x), + "r"(y), + "r"(vx), + "r"(vy)); +} +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<int4>( + int4 v, + cudaSurfaceObject_t surfObj, + int x, + int y, + cudaSurfaceBoundaryMode boundaryMode) +{ + const int vx = v.x, vy = v.y, vz = v.z, vw = v.w; + asm volatile( + "{sust.p.2d.v4.b32." SLANG_PTX_BOUNDARY_MODE + " [%0, {%1,%2}], {%3,%4,%5,%6};}\n\t" ::"l"(surfObj), + "r"(x), + "r"(y), + "r"(vx), + "r"(vy), + "r"(vz), + "r"(vw)); +} + +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<int>( + int v, + cudaSurfaceObject_t surfObj, + int x, + int y, + int z, + cudaSurfaceBoundaryMode boundaryMode) +{ + asm volatile( + "{sust.p.3d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3,%4}], {%5};}\n\t" ::"l"(surfObj), + "r"(x), + "r"(y), + "r"(z), + "r"(0), + "r"(v)); +} +// Int2 +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<int2>( + int2 v, + cudaSurfaceObject_t surfObj, + int x, + int y, + int z, + cudaSurfaceBoundaryMode boundaryMode) +{ + const int vx = v.x, vy = v.y; + asm volatile( + "{sust.p.3d.v2.b32." SLANG_PTX_BOUNDARY_MODE + " [%0, {%1,%2,%3,%4}], {%5,%6};}\n\t" ::"l"(surfObj), + "r"(x), + "r"(y), + "r"(z), + "r"(0), + "r"(vx), + "r"(vy)); +} +// Int4 +template<> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<int4>( + int4 v, + cudaSurfaceObject_t surfObj, + int x, + int y, + int z, + cudaSurfaceBoundaryMode boundaryMode) +{ + const int vx = v.x, vy = v.y, vz = v.z, vw = v.w; + asm volatile( + "{sust.p.3d.v4.b32." SLANG_PTX_BOUNDARY_MODE + " [%0, {%1,%2,%3,%4}], {%5,%6,%7,%8};}\n\t" ::"l"(surfObj), + "r"(x), + "r"(y), + "r"(z), + "r"(0), + "r"(vx), + "r"(vy), + "r"(vz), + "r"(vw)); +} + // ----------------------------- F32 ----------------------------------------- // Unary diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 216dfc04a..264098bec 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -28388,4 +28388,4 @@ void InterlockedAddF16Emulated(half* dest, half value, out half originalValue) void InterlockedAddF16x2(half2* dest, half2 value, out half2 originalValue) { originalValue = __atomic_add(*dest, value); -}
\ No newline at end of file +} diff --git a/tests/compute/texture-subscript-cuda.slang b/tests/compute/texture-subscript-cuda.slang index 7c4a2cc78..26b1f9e09 100644 --- a/tests/compute/texture-subscript-cuda.slang +++ b/tests/compute/texture-subscript-cuda.slang @@ -24,10 +24,37 @@ Texture2DArray<float2> cudaT2DArray_f2; //TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray_f4 Texture2DArray<float4> cudaT2DArray_f4; -//TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0], stride=4):out,name cudaOutputBuffer +// New texture declarations for uint and int variants +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_uint +Texture2D<uint> cudaT2D_uint; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_uint2 +Texture2D<uint2> cudaT2D_uint2; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_uint4 +Texture2D<uint4> cudaT2D_uint4; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_int +Texture2D<int> cudaT2D_int; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_int2 +Texture2D<int2> cudaT2D_int2; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D_int4 +Texture2D<int4> cudaT2D_int4; + +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_uint +Texture3D<uint> cudaT3D_uint; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_uint2 +Texture3D<uint2> cudaT3D_uint2; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_uint4 +Texture3D<uint4> cudaT3D_uint4; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_int +Texture3D<int> cudaT3D_int; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_int2 +Texture3D<int2> cudaT3D_int2; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D_int4 +Texture3D<int4> cudaT3D_int4; + +//TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name cudaOutputBuffer RWStructuredBuffer<float> cudaOutputBuffer; -[numthreads(7, 1, 1)] +[numthreads(11, 1, 1)] [shader("compute")] void computeMain(int3 dispatchThreadID : SV_DispatchThreadID) { @@ -97,5 +124,81 @@ void computeMain(int3 dispatchThreadID : SV_DispatchThreadID) cudaOutputBuffer[idx] += result4.w; } break; + + case 5: + { + // Test 2D textures with uint variants + int2 var = int2(1, 2); + uint result = cudaT2D_uint[var]; + cudaOutputBuffer[idx] = float(result); + + uint2 result2 = cudaT2D_uint2[var]; + cudaOutputBuffer[idx] += float(result2.x); + cudaOutputBuffer[idx] += float(result2.y); + + uint4 result4 = cudaT2D_uint4[var]; + cudaOutputBuffer[idx] += float(result4.x); + cudaOutputBuffer[idx] += float(result4.y); + cudaOutputBuffer[idx] += float(result4.z); + cudaOutputBuffer[idx] += float(result4.w); + } + break; + + case 6: + { + // Test 2D textures with int variants + int2 var = int2(1, 2); + int result = cudaT2D_int[var]; + cudaOutputBuffer[idx] = float(result); + + int2 result2 = cudaT2D_int2[var]; + cudaOutputBuffer[idx] += float(result2.x); + cudaOutputBuffer[idx] += float(result2.y); + + int4 result4 = cudaT2D_int4[var]; + cudaOutputBuffer[idx] += float(result4.x); + cudaOutputBuffer[idx] += float(result4.y); + cudaOutputBuffer[idx] += float(result4.z); + cudaOutputBuffer[idx] += float(result4.w); + } + break; + + case 7: + { + // Test 3D textures with uint variants + int3 var = int3(1, 1, 1); + uint result = cudaT3D_uint[var]; + cudaOutputBuffer[idx] = float(result); + + uint2 result2 = cudaT3D_uint2[var]; + cudaOutputBuffer[idx] += float(result2.x); + cudaOutputBuffer[idx] += float(result2.y); + + uint4 result4 = cudaT3D_uint4[var]; + cudaOutputBuffer[idx] += float(result4.x); + cudaOutputBuffer[idx] += float(result4.y); + cudaOutputBuffer[idx] += float(result4.z); + cudaOutputBuffer[idx] += float(result4.w); + } + break; + + case 8: + { + // Test 3D textures with int variants + int3 var = int3(1, 1, 1); + int result = cudaT3D_int[var]; + cudaOutputBuffer[idx] = float(result); + + int2 result2 = cudaT3D_int2[var]; + cudaOutputBuffer[idx] += float(result2.x); + cudaOutputBuffer[idx] += float(result2.y); + + int4 result4 = cudaT3D_int4[var]; + cudaOutputBuffer[idx] += float(result4.x); + cudaOutputBuffer[idx] += float(result4.y); + cudaOutputBuffer[idx] += float(result4.z); + cudaOutputBuffer[idx] += float(result4.w); + } + break; } }
\ No newline at end of file diff --git a/tests/compute/texture-subscript-cuda.slang.expected.txt b/tests/compute/texture-subscript-cuda.slang.expected.txt index 698717361..ceb358832 100644 --- a/tests/compute/texture-subscript-cuda.slang.expected.txt +++ b/tests/compute/texture-subscript-cuda.slang.expected.txt @@ -3,5 +3,9 @@ 40E00000 40E00000 40E00000 +4FDE4000 +4FDE4000 +4FDE4000 +4FDE4000 0 0
\ No newline at end of file |
