summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMukund Keshava <mkeshava@nvidia.com>2025-04-30 16:07:02 +0530
committerGitHub <noreply@github.com>2025-04-30 10:37:02 +0000
commitb0e150511a6a536c8ad9e74910b30ae179a10ec9 (patch)
treecb749d757e0e556d987d6a30020971ed5a6aa41d
parent41ac7a0d8b4e9c08eccc2153020900e0262cae84 (diff)
Add subscript operator support in cuda (#6830)
* cuda: Add support for subscript operator This CL adds support for the subscript operator for Read Only textures in cuda. Also adds a test for this. Fixes #6781 * format code * fix review comments * format code --------- Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com> Co-authored-by: Ellie Hermaszewska <ellieh@nvidia.com>
-rw-r--r--prelude/slang-cuda-prelude.h67
-rw-r--r--source/slang/hlsl.meta.slang33
-rw-r--r--tests/compute/texture-subscript-cuda.slang61
-rw-r--r--tests/compute/texture-subscript-cuda.slang.expected.txt7
-rw-r--r--tests/language-feature/capability/capability-invalid-fragment-in-compute.slang16
5 files changed, 171 insertions, 13 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index 38e018e3e..738f2fa16 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -3366,3 +3366,70 @@ struct TensorView
*reinterpret_cast<T*>(data + offset) = val;
}
};
+
+// Implementations for texture fetch/load functions using tex PTX intrinsics
+// These are used for read-only texture access with integer coordinates
+// See #6781 for details.
+
+// 1D is not supported via PTX. Keeping this placeholder in case it ever gets
+// supported.
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex1Dfetch_int(CUtexObject texObj, int x)
+{
+ T result;
+ float dummy;
+ asm("tex.1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5}];"
+ : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy)
+ : "l"(texObj), "r"(x));
+ return result;
+}
+
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex2Dfetch_int(CUtexObject texObj, int x, int y)
+{
+ T result;
+ float dummy;
+ asm("tex.2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];"
+ : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy)
+ : "l"(texObj), "r"(x), "r"(y));
+ return result;
+}
+
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex3Dfetch_int(CUtexObject texObj, int x, int y, int z)
+{
+ T result;
+ float dummy;
+ asm("tex.3d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];"
+ : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy)
+ : "l"(texObj), "r"(x), "r"(y), "r"(z), "r"(z));
+ // Note: The repeated z is a dummy used as the fourth operand in ptx.
+ // From the docs:
+ // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#texture-instructions-tex
+ // Operand c is a scalar or singleton tuple for 1d textures; is a two-element vector for 2d
+ // textures; and is a four-element vector for 3d textures.
+ return result;
+}
+
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL T tex1DArrayfetch_int(CUtexObject texObj, int x, int layer)
+{
+ T result;
+ float dummy;
+ asm("tex.a1d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6}];"
+ : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy)
+ : "l"(texObj), "r"(x), "r"(layer));
+ return result;
+}
+
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL T
+tex2DArrayfetch_int(CUtexObject texObj, int x, int y, int layer)
+{
+ T result;
+ float dummy;
+ asm("tex.a2d.v4.f32.s32 {%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];"
+ : "=f"(result), "=f"(dummy), "=f"(dummy), "=f"(dummy)
+ : "l"(texObj), "r"(x), "r"(y), "r"(layer), "r"(layer));
+ return result;
+} \ No newline at end of file
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 44b9a8860..a6e1196e3 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -3610,7 +3610,7 @@ extension _Texture<T,Shape,isArray,0,sampleCount,0,isShadow,isCombined,format>
//@public:
[__readNone]
[ForceInline]
- [require(cpp_glsl_hlsl_metal_spirv_wgsl, texture_sm_4_1_samplerless)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, texture_sm_4_1_samplerless)]
T Load(vector<int, Shape.dimensions+isArray+1> location)
{
__target_switch
@@ -3618,6 +3618,34 @@ extension _Texture<T,Shape,isArray,0,sampleCount,0,isShadow,isCombined,format>
case cpp:
case hlsl:
__intrinsic_asm ".Load";
+ case cuda:
+ if (isArray != 0)
+ {
+ static_assert(Shape.flavor == $(SLANG_TEXTURE_2D) || Shape.flavor == $(SLANG_TEXTURE_3D),
+ "Integer coordinates are supported for texture reads only for 2D and 3D textures and 2D array textures.");
+
+ if (Shape.flavor == $(SLANG_TEXTURE_2D))
+ {
+ __intrinsic_asm "tex2DArrayfetch_int<$T0>($0, ($1).x, ($1).y, ($1).z)";
+ }
+ else
+ {
+ __intrinsic_asm "<invalid intrinsic>";
+ }
+ }
+ else
+ {
+ switch(Shape.flavor)
+ {
+ case $(SLANG_TEXTURE_2D):
+ __intrinsic_asm "tex2Dfetch_int<$T0>($0, ($1).x, ($1).y)";
+ case $(SLANG_TEXTURE_3D):
+ __intrinsic_asm "tex3Dfetch_int<$T0>($0, ($1).x, ($1).y, ($1).z)";
+ case $(SLANG_TEXTURE_CUBE):
+ default:
+ __intrinsic_asm "<invalid intrinsic>";
+ }
+ }
case metal:
switch (Shape.flavor)
{
@@ -3824,7 +3852,7 @@ extension _Texture<T,Shape,isArray,0,sampleCount,0,isShadow,isCombined,format>
{
[__readNone]
[ForceInline]
- [require(cpp_glsl_hlsl_metal_spirv_wgsl, texture_sm_4_1_samplerless)]
+ [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, texture_sm_4_1_samplerless)]
get
{
__target_switch
@@ -3833,6 +3861,7 @@ extension _Texture<T,Shape,isArray,0,sampleCount,0,isShadow,isCombined,format>
case hlsl:
__intrinsic_asm ".operator[]";
case metal:
+ case cuda:
return Load(__makeVector(location, 0));
case glsl:
if (isCombined == 0)
diff --git a/tests/compute/texture-subscript-cuda.slang b/tests/compute/texture-subscript-cuda.slang
new file mode 100644
index 000000000..e64f42b19
--- /dev/null
+++ b/tests/compute/texture-subscript-cuda.slang
@@ -0,0 +1,61 @@
+// Test for verifying subscript operator support in cuda.
+
+//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj
+//TEST_INPUT: Texture1D(size=4, content = one):name cudaT1D
+Texture1D<float> cudaT1D;
+//TEST_INPUT: Texture2D(size=8, content = one):name cudaT2D
+Texture2D<float> cudaT2D;
+//TEST_INPUT: Texture3D(size=8, content = one):name cudaT3D
+Texture3D<float> cudaT3D;
+//TEST_INPUT: TextureCube(size=16, content = one):name cudaTCube
+TextureCube<float> cudaTCube;
+//TEST_INPUT: Texture2D(size=16, content = one, arrayLength=3):name cudaT2DArray
+Texture2DArray<float> cudaT2DArray;
+//TEST_INPUT: TextureCube(size=16, content = one, arrayLength=1):name cudaTCubeArray
+TextureCubeArray<float> cudaTCubeArray;
+
+//TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0], stride=4):out,name cudaOutputBuffer
+RWStructuredBuffer<float> cudaOutputBuffer;
+
+[numthreads(7, 1, 1)]
+[shader("compute")]
+void computeMain(int3 dispatchThreadID : SV_DispatchThreadID)
+{
+ int idx = dispatchThreadID.x;
+
+ switch (idx)
+ {
+ case 1:
+ {
+ int var = 0;
+ float result = cudaT1D[0];
+ // This is not supported in PTX.
+ //cudaOutputBuffer[idx] = result;
+ }
+ break;
+
+ case 2:
+ {
+ int2 var = int2(1, 2);
+ float result = cudaT2D[var];
+ cudaOutputBuffer[idx] = result;
+ }
+ break;
+
+ case 3:
+ {
+ int3 var = int3(1, 1, 1);
+ float result = cudaT3D[var];
+ cudaOutputBuffer[idx] = result;
+ }
+ break;
+
+ case 4:
+ {
+ int3 var = int3(0, 0, 1);
+ float result = cudaT2DArray[var];
+ cudaOutputBuffer[idx] = result;
+ }
+ break;
+ }
+} \ No newline at end of file
diff --git a/tests/compute/texture-subscript-cuda.slang.expected.txt b/tests/compute/texture-subscript-cuda.slang.expected.txt
new file mode 100644
index 000000000..133a47e56
--- /dev/null
+++ b/tests/compute/texture-subscript-cuda.slang.expected.txt
@@ -0,0 +1,7 @@
+0
+0
+3F800000
+3F800000
+3F800000
+0
+0 \ No newline at end of file
diff --git a/tests/language-feature/capability/capability-invalid-fragment-in-compute.slang b/tests/language-feature/capability/capability-invalid-fragment-in-compute.slang
index 946ba4470..ccf834167 100644
--- a/tests/language-feature/capability/capability-invalid-fragment-in-compute.slang
+++ b/tests/language-feature/capability/capability-invalid-fragment-in-compute.slang
@@ -1,7 +1,7 @@
-//TEST:SIMPLE(filecheck=CHECK): -target hlsl -emit-spirv-directly -entry computeMain -stage compute -allow-glsl -DPRE
-//TEST:SIMPLE(filecheck=CHECK): -target hlsl -emit-spirv-directly -entry computeMain -stage compute -allow-glsl -DPOST
-//TEST:SIMPLE(filecheck=CHECK_IGNORE_CAPS): -target hlsl -emit-spirv-directly -entry computeMain -stage compute -allow-glsl -ignore-capabilities -DPRE
-//TEST:SIMPLE(filecheck=CHECK_IGNORE_CAPS): -target hlsl -emit-spirv-directly -entry computeMain -stage compute -allow-glsl -ignore-capabilities -DPOST
+//TEST:SIMPLE(filecheck=CHECK): -target hlsl -emit-spirv-directly -entry computeMain -stage compute -allow-glsl
+//TEST:SIMPLE(filecheck=CHECK): -target hlsl -emit-spirv-directly -entry computeMain -stage compute -allow-glsl
+//TEST:SIMPLE(filecheck=CHECK_IGNORE_CAPS): -target hlsl -emit-spirv-directly -entry computeMain -stage compute -allow-glsl -ignore-capabilities
+//TEST:SIMPLE(filecheck=CHECK_IGNORE_CAPS): -target hlsl -emit-spirv-directly -entry computeMain -stage compute -allow-glsl -ignore-capabilities
// CHECK_IGNORE_CAPS-NOT: error 36107
// CHECK: error 36107
@@ -11,11 +11,5 @@ Texture2D<int> rw;
[numthreads(1,1,1)]
void computeMain()
{
-#ifdef PRE
- rw.Load(0);
-#endif
- clip(0.0f);
-#ifdef POST
- rw.Load(0);
-#endif
+ clip(0.0f); // clip is not supported in compute shader, so this throws an error.
} \ No newline at end of file