summaryrefslogtreecommitdiff
path: root/tests/cuda
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2025-10-15 20:59:47 -0700
committerGitHub <noreply@github.com>2025-10-16 03:59:47 +0000
commit01510f2c922af8629c7a730ef92a31fa83bd9f49 (patch)
treebbec0cd5424e99670573dc3fa10fdf441320b684 /tests/cuda
parentd1a935c683ac1eb93d95587ee26bdaae7eb17e31 (diff)
Immutable access qualifier for pointers and use `__ldg` on cuda. (#8710)
This PR implements `Access.Immutable` to allow pointers to immutable data. The new type `ImmutablePtr<T>` is defined as an alias of `Ptr<T, Address.Immutable>`. By forming a immutable pointer, the programmer is conveying to the compiler that the data at the pointer address will never change during the execution of the current program. Therefore loads from immutable pointers can be deduplicated by the compiler, and will translate to `__ldg` when generating code for CUDA. The SPIRV backend is not changed in this PR, since the current SPIRV spec makes it very difficult to specify loads from immutable address without generating tons of wrappers and boilerplate type declarations. We would like to see the spec evolved a bit to around its support of `NonWritable` physical storage pointers or immutable loads before we attempt to express such immutability in SPIRV. For now we simply emit ordinary pointers and loads when generating spirv. --------- Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com>
Diffstat (limited to 'tests/cuda')
-rw-r--r--tests/cuda/copy-elision-this-1.slang2
-rw-r--r--tests/cuda/dispatch-thread-id-extraction.slang12
2 files changed, 6 insertions, 8 deletions
diff --git a/tests/cuda/copy-elision-this-1.slang b/tests/cuda/copy-elision-this-1.slang
index 273e6dc58..376ef1f80 100644
--- a/tests/cuda/copy-elision-this-1.slang
+++ b/tests/cuda/copy-elision-this-1.slang
@@ -10,7 +10,7 @@ struct Data {
// CUDA: __device__ float Data_fetch{{.*}}(int {{.*}}, int {{.*}})
// CUDA-NEXT: {
- // CUDA-NEXT: return globalParams{{.*}}->data{{.*}}->input{{.*}}[{{.*}}].Load
+ // CUDA-NEXT: globalParams{{.*}}->data{{.*}}->input{{.*}}[{{.*}}]
float fetch(int buffer, int index)
{
return input[buffer][index];
diff --git a/tests/cuda/dispatch-thread-id-extraction.slang b/tests/cuda/dispatch-thread-id-extraction.slang
index 02705ff24..b1fac2d00 100644
--- a/tests/cuda/dispatch-thread-id-extraction.slang
+++ b/tests/cuda/dispatch-thread-id-extraction.slang
@@ -12,7 +12,7 @@ void computeMain(uint tid: SV_DispatchThreadID, StructuredBuffer<uint> src, RWSt
{
dst[tid.x] = src[tid.x];
}
-// CHECK: uint _S1 = (blockIdx * blockDim + threadIdx).x;
+// CHECK: uint {{.*}} = (blockIdx * blockDim + threadIdx).x;
[shader("compute")]
[numthreads(1, 1, 1)]
@@ -20,7 +20,7 @@ void computeMain2(uint2 tid: SV_DispatchThreadID, StructuredBuffer<uint> src, RW
{
dst[tid.x] = src[tid.y];
}
-// CHECK: uint2 _S2 = uint2 {(blockIdx * blockDim + threadIdx).x, (blockIdx * blockDim + threadIdx).y};
+// CHECK: uint2 {{.*}} = uint2 {(blockIdx * blockDim + threadIdx).x, (blockIdx * blockDim + threadIdx).y};
[shader("compute")]
[numthreads(1, 1, 1)]
@@ -28,9 +28,7 @@ void computeMain3(int2 tid: SV_DispatchThreadID, StructuredBuffer<uint> src, RWS
{
dst[tid.x] = src[tid.x];
}
-// CHECK: uint2 _S3 = uint2 {(blockIdx * blockDim + threadIdx).x, (blockIdx * blockDim + threadIdx).y};
-// CHECK: int2 _S4 = make_int2 ((int)_S3.x, (int)_S3.y);
-// CHECK: int _S5 = _S4.x;
+// CHECK: uint2 {{.*}} = uint2 {(blockIdx * blockDim + threadIdx).x, (blockIdx * blockDim + threadIdx).y};
[shader("compute")]
[numthreads(1, 1, 1)]
@@ -38,7 +36,7 @@ void computeMain4(int tid: SV_DispatchThreadID, StructuredBuffer<uint> src, RWSt
{
dst[tid.x] = src[tid.x];
}
-// CHECK: int _S6 = int((blockIdx * blockDim + threadIdx).x);
+// CHECK: int {{.*}} = int((blockIdx * blockDim + threadIdx).x);
[shader("compute")]
[numthreads(1, 1, 1)]
@@ -46,4 +44,4 @@ void computeMain5(int tid: SV_GroupIndex, StructuredBuffer<uint> src, RWStructur
{
dst[tid.x] = src[tid.x];
}
-// CHECK: int _S7 = int((threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x);
+// CHECK: int {{.*}} = int((threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x);