Immutable access qualifier for pointers and use `__ldg` on cuda. (#8710)

This PR implements `Access.Immutable` to allow pointers to immutable data. The new type `ImmutablePtr<T>` is defined as an alias of `Ptr<T, Address.Immutable>`. By forming a immutable pointer, the programmer is conveying to the compiler that the data at the pointer address will never change during the execution of the current program. Therefore loads from immutable pointers can be deduplicated by the compiler, and will translate to `__ldg` when generating code for CUDA. The SPIRV backend is not changed in this PR, since the current SPIRV spec makes it very difficult to specify loads from immutable address without generating tons of wrappers and boilerplate type declarations. We would like to see the spec evolved a bit to around its support of `NonWritable` physical storage pointers or immutable loads before we attempt to express such immutability in SPIRV. For now we simply emit ordinary pointers and loads when generating spirv. --------- Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com>
author: Yong He <yonghe@outlook.com> 2025-10-15 20:59:47 -0700
committer: GitHub <noreply@github.com> 2025-10-16 03:59:47 +0000
commit: 01510f2c922af8629c7a730ef92a31fa83bd9f49 (patch)
tree: bbec0cd5424e99670573dc3fa10fdf441320b684 /tests/optimization
parent: d1a935c683ac1eb93d95587ee26bdaae7eb17e31 (diff)
3 files changed, 60 insertions, 1 deletions
diff --git a/tests/optimization/buffer-load-defer-ptr.slang b/tests/optimization/buffer-load-defer-ptr.slang
new file mode 100644
index 000000000..cde006dcf
--- /dev/null
+++ b/tests/optimization/buffer-load-defer-ptr.slang
@@ -0,0 +1,38 @@
+//TEST:SIMPLE(filecheck=SPV): -target spirv
+
+struct Bottom
+{
+    float bigArray[1024];
+    float bottomGetValue(int index) { return bigArray[index]; }
+}
+
+struct Middle
+{
+    Bottom bottom;
+    float middleGetValue(int index) { return bottom.bottomGetValue(index); }
+}
+
+struct Top
+{
+    Middle middle;
+    float topGetValue(int index) { return middle.middleGetValue(index); }
+}
+
+struct Root
+{
+    Top top;
+}
+
+uniform ImmutablePtr<Root> cb;
+
+RWStructuredBuffer<float> outputBuffer;
+
+// SPV: OpEntryPoint
+// SPV-NOT: OpCompositeConstruct
+
+[shader("compute")]
+[numthreads(1, 1, 1)]
+void compute_main(uint3 tid: SV_DispatchThreadID)
+{
+    outputBuffer[0] = cb.top.topGetValue(0);
+}
diff --git a/tests/optimization/defer-structured-buffer-load.slang b/tests/optimization/defer-structured-buffer-load.slang
index f7f9b1888..22f93bcd1 100644
--- a/tests/optimization/defer-structured-buffer-load.slang
+++ b/tests/optimization/defer-structured-buffer-load.slang
@@ -28,7 +28,7 @@ RWStructuredBuffer<float> outputBuffer;
 // SPV: OpStore %{{.*}} %[[VALUE]]
 
 // CUDA: __device__ float Bottom_bottomGetValue{{.*}}(uint [[PARAM0:[A-Za-z0-9_]+]], int [[PARAM1:[A-Za-z0-9_]+]])
-// CUDA:  return (&(&(globalParams_0->sb_0){{\[}}[[PARAM0]]{{\]}})->bottom_0)->bigArray_0{{\[}}[[PARAM1]]{{\]}};
+// CUDA:  __ldg(&(&(&(globalParams_0->sb_0){{\[}}[[PARAM0]]{{\]}})->bottom_0)->bigArray_0{{\[}}[[PARAM1]]{{\]}});
 
 [shader("compute")]
 [numthreads(1, 1, 1)]
diff --git a/tests/optimization/immutable-buffer-load.slang b/tests/optimization/immutable-buffer-load.slang
new file mode 100644
index 000000000..251a1ce9c
--- /dev/null
+++ b/tests/optimization/immutable-buffer-load.slang
@@ -0,0 +1,21 @@
+// Test that we can use ImmutablePtr<T> to result in more optimized buffer loads
+// in SPIR-V and CUDA.
+
+//TEST:SIMPLE(filecheck=PTX): -target ptx -entry computeMain -stage compute
+//TEST:SIMPLE(filecheck=SPV): -target spirv -O0
+uniform ImmutablePtr<float4> data;
+
+uniform float4* result;
+
+float4 work(ImmutablePtr<float4> ptr)
+{
+    return *ptr;
+}
+
+[numthreads(1,1,1)]
+void computeMain()
+{
+    // SPV: Restrict
+    // PTX: ld.global.nc.v4.f32
+    *result = work(data) + float4(1,2,3,4);
+}
+\ No newline at end of file
author	Yong He <yonghe@outlook.com>	2025-10-15 20:59:47 -0700
committer	GitHub <noreply@github.com>	2025-10-16 03:59:47 +0000
commit	01510f2c922af8629c7a730ef92a31fa83bd9f49 (patch)
tree	bbec0cd5424e99670573dc3fa10fdf441320b684 /tests/optimization
parent	d1a935c683ac1eb93d95587ee26bdaae7eb17e31 (diff)