diff options
| author | Mukund Keshava <mkeshava@nvidia.com> | 2025-04-30 16:07:02 +0530 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-04-30 10:37:02 +0000 |
| commit | b0e150511a6a536c8ad9e74910b30ae179a10ec9 (patch) | |
| tree | cb749d757e0e556d987d6a30020971ed5a6aa41d | |
| parent | 41ac7a0d8b4e9c08eccc2153020900e0262cae84 (diff) | |
Add subscript operator support in cuda (#6830)
* cuda: Add support for subscript operator
This CL adds support for the subscript operator for Read Only
textures in cuda. Also adds a test for this.
Fixes #6781
* format code
* fix review comments
* format code
---------
Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com>
Co-authored-by: Ellie Hermaszewska <ellieh@nvidia.com>
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 67 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 33 | ||||
| -rw-r--r-- | tests/compute/texture-subscript-cuda.slang | 61 | ||||
| -rw-r--r-- | tests/compute/texture-subscript-cuda.slang.expected.txt | 7 | ||||
| -rw-r--r-- | tests/language-feature/capability/capability-invalid-fragment-in-compute.slang | 16 |
5 files changed, 171 insertions, 13 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 38e018e3e..738f2fa16 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -3366,3 +3366,70 @@ struct TensorView *reinterpret_cast<T*>(data + offset) = val; } }; + +// Implementations for texture fetch/load functions using tex PTX intrinsics +// These are used for read-only texture access with integer coordinates +// See #6781 for details. + +// 1D is not supported via PTX. Keeping this placeholder in case it ever gets +// supported. +template<typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex1Dfetch_int(CUtexObject texObj, int x) +{ + T result; + float dummy; + asm("tex.1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5}];" + : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "l"(texObj), "r"(x)); + return result; +} + +template<typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex2Dfetch_int(CUtexObject texObj, int x, int y) +{ + T result; + float dummy; + asm("tex.2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];" + : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "l"(texObj), "r"(x), "r"(y)); + return result; +} + +template<typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex3Dfetch_int(CUtexObject texObj, int x, int y, int z) +{ + T result; + float dummy; + asm("tex.3d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" + : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "l"(texObj), "r"(x), "r"(y), "r"(z), "r"(z)); + // Note: The repeated z is a dummy used as the fourth operand in ptx. + // From the docs: + // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#texture-instructions-tex + // Operand c is a scalar or singleton tuple for 1d textures; is a two-element vector for 2d + // textures; and is a four-element vector for 3d textures. + return result; +} + +template<typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex1DArrayfetch_int(CUtexObject texObj, int x, int layer) +{ + T result; + float dummy; + asm("tex.a1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];" + : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "l"(texObj), "r"(x), "r"(layer)); + return result; +} + +template<typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL T +tex2DArrayfetch_int(CUtexObject texObj, int x, int y, int layer) +{ + T result; + float dummy; + asm("tex.a2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" + : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy) + : "l"(texObj), "r"(x), "r"(y), "r"(layer), "r"(layer)); + return result; +}
\ No newline at end of file diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 44b9a8860..a6e1196e3 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -3610,7 +3610,7 @@ extension _Texture<T,Shape,isArray,0,sampleCount,0,isShadow,isCombined,format> //@public: [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv_wgsl, texture_sm_4_1_samplerless)] + [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, texture_sm_4_1_samplerless)] T Load(vector<int, Shape.dimensions+isArray+1> location) { __target_switch @@ -3618,6 +3618,34 @@ extension _Texture<T,Shape,isArray,0,sampleCount,0,isShadow,isCombined,format> case cpp: case hlsl: __intrinsic_asm ".Load"; + case cuda: + if (isArray != 0) + { + static_assert(Shape.flavor == $(SLANG_TEXTURE_2D) || Shape.flavor == $(SLANG_TEXTURE_3D), + "Integer coordinates are supported for texture reads only for 2D and 3D textures and 2D array textures."); + + if (Shape.flavor == $(SLANG_TEXTURE_2D)) + { + __intrinsic_asm "tex2DArrayfetch_int<$T0>($0, ($1).x, ($1).y, ($1).z)"; + } + else + { + __intrinsic_asm "<invalid intrinsic>"; + } + } + else + { + switch(Shape.flavor) + { + case $(SLANG_TEXTURE_2D): + __intrinsic_asm "tex2Dfetch_int<$T0>($0, ($1).x, ($1).y)"; + case $(SLANG_TEXTURE_3D): + __intrinsic_asm "tex3Dfetch_int<$T0>($0, ($1).x, ($1).y, ($1).z)"; + case $(SLANG_TEXTURE_CUBE): + default: + __intrinsic_asm "<invalid intrinsic>"; + } + } case metal: switch (Shape.flavor) { @@ -3824,7 +3852,7 @@ extension _Texture<T,Shape,isArray,0,sampleCount,0,isShadow,isCombined,format> { [__readNone] [ForceInline] - [require(cpp_glsl_hlsl_metal_spirv_wgsl, texture_sm_4_1_samplerless)] + [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, texture_sm_4_1_samplerless)] get { __target_switch @@ -3833,6 +3861,7 @@ extension _Texture<T,Shape,isArray,0,sampleCount,0,isShadow,isCombined,format> case hlsl: __intrinsic_asm ".operator[]"; case metal: + case cuda: return Load(__makeVector(location, 0)); case glsl: if (isCombined == 0) diff --git a/tests/compute/texture-subscript-cuda.slang b/tests/compute/texture-subscript-cuda.slang new file mode 100644 index 000000000..e64f42b19 --- /dev/null +++ b/tests/compute/texture-subscript-cuda.slang @@ -0,0 +1,61 @@ +// Test for verifying subscript operator support in cuda. + +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj +//TEST_INPUT: Texture1D(size=4, content = one):name cudaT1D +Texture1D<float> cudaT1D; +//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D +Texture2D<float> cudaT2D; +//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D +Texture3D<float> cudaT3D; +//TEST_INPUT: TextureCube(size=16, content = one):name cudaTCube +TextureCube<float> cudaTCube; +//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray +Texture2DArray<float> cudaT2DArray; +//TEST_INPUT: TextureCube(size=16, content = one, arrayLength=1):name cudaTCubeArray +TextureCubeArray<float> cudaTCubeArray; + +//TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0], stride=4):out,name cudaOutputBuffer +RWStructuredBuffer<float> cudaOutputBuffer; + +[numthreads(7, 1, 1)] +[shader("compute")] +void computeMain(int3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = dispatchThreadID.x; + + switch (idx) + { + case 1: + { + int var = 0; + float result = cudaT1D[0]; + // This is not supported in PTX. + //cudaOutputBuffer[idx] = result; + } + break; + + case 2: + { + int2 var = int2(1, 2); + float result = cudaT2D[var]; + cudaOutputBuffer[idx] = result; + } + break; + + case 3: + { + int3 var = int3(1, 1, 1); + float result = cudaT3D[var]; + cudaOutputBuffer[idx] = result; + } + break; + + case 4: + { + int3 var = int3(0, 0, 1); + float result = cudaT2DArray[var]; + cudaOutputBuffer[idx] = result; + } + break; + } +}
\ No newline at end of file diff --git a/tests/compute/texture-subscript-cuda.slang.expected.txt b/tests/compute/texture-subscript-cuda.slang.expected.txt new file mode 100644 index 000000000..133a47e56 --- /dev/null +++ b/tests/compute/texture-subscript-cuda.slang.expected.txt @@ -0,0 +1,7 @@ +0 +0 +3F800000 +3F800000 +3F800000 +0 +0
\ No newline at end of file diff --git a/tests/language-feature/capability/capability-invalid-fragment-in-compute.slang b/tests/language-feature/capability/capability-invalid-fragment-in-compute.slang index 946ba4470..ccf834167 100644 --- a/tests/language-feature/capability/capability-invalid-fragment-in-compute.slang +++ b/tests/language-feature/capability/capability-invalid-fragment-in-compute.slang @@ -1,7 +1,7 @@ -//TEST:SIMPLE(filecheck=CHECK): -target hlsl -emit-spirv-directly -entry computeMain -stage compute -allow-glsl -DPRE -//TEST:SIMPLE(filecheck=CHECK): -target hlsl -emit-spirv-directly -entry computeMain -stage compute -allow-glsl -DPOST -//TEST:SIMPLE(filecheck=CHECK_IGNORE_CAPS): -target hlsl -emit-spirv-directly -entry computeMain -stage compute -allow-glsl -ignore-capabilities -DPRE -//TEST:SIMPLE(filecheck=CHECK_IGNORE_CAPS): -target hlsl -emit-spirv-directly -entry computeMain -stage compute -allow-glsl -ignore-capabilities -DPOST +//TEST:SIMPLE(filecheck=CHECK): -target hlsl -emit-spirv-directly -entry computeMain -stage compute -allow-glsl +//TEST:SIMPLE(filecheck=CHECK): -target hlsl -emit-spirv-directly -entry computeMain -stage compute -allow-glsl +//TEST:SIMPLE(filecheck=CHECK_IGNORE_CAPS): -target hlsl -emit-spirv-directly -entry computeMain -stage compute -allow-glsl -ignore-capabilities +//TEST:SIMPLE(filecheck=CHECK_IGNORE_CAPS): -target hlsl -emit-spirv-directly -entry computeMain -stage compute -allow-glsl -ignore-capabilities // CHECK_IGNORE_CAPS-NOT: error 36107 // CHECK: error 36107 @@ -11,11 +11,5 @@ Texture2D<int> rw; [numthreads(1,1,1)] void computeMain() { -#ifdef PRE - rw.Load(0); -#endif - clip(0.0f); -#ifdef POST - rw.Load(0); -#endif + clip(0.0f); // clip is not supported in compute shader, so this throws an error. }
\ No newline at end of file |
