summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
Diffstat (limited to 'tests')
-rw-r--r--tests/cuda/copy-elision-this-1.slang2
-rw-r--r--tests/cuda/dispatch-thread-id-extraction.slang12
-rw-r--r--tests/optimization/buffer-load-defer-ptr.slang38
-rw-r--r--tests/optimization/defer-structured-buffer-load.slang2
-rw-r--r--tests/optimization/immutable-buffer-load.slang21
5 files changed, 66 insertions, 9 deletions
diff --git a/tests/cuda/copy-elision-this-1.slang b/tests/cuda/copy-elision-this-1.slang
index 273e6dc58..376ef1f80 100644
--- a/tests/cuda/copy-elision-this-1.slang
+++ b/tests/cuda/copy-elision-this-1.slang
@@ -10,7 +10,7 @@ struct Data {
// CUDA: __device__ float Data_fetch{{.*}}(int {{.*}}, int {{.*}})
// CUDA-NEXT: {
- // CUDA-NEXT: return globalParams{{.*}}->data{{.*}}->input{{.*}}[{{.*}}].Load
+ // CUDA-NEXT: globalParams{{.*}}->data{{.*}}->input{{.*}}[{{.*}}]
float fetch(int buffer, int index)
{
return input[buffer][index];
diff --git a/tests/cuda/dispatch-thread-id-extraction.slang b/tests/cuda/dispatch-thread-id-extraction.slang
index 02705ff24..b1fac2d00 100644
--- a/tests/cuda/dispatch-thread-id-extraction.slang
+++ b/tests/cuda/dispatch-thread-id-extraction.slang
@@ -12,7 +12,7 @@ void computeMain(uint tid: SV_DispatchThreadID, StructuredBuffer<uint> src, RWSt
{
dst[tid.x] = src[tid.x];
}
-// CHECK: uint _S1 = (blockIdx * blockDim + threadIdx).x;
+// CHECK: uint {{.*}} = (blockIdx * blockDim + threadIdx).x;
[shader("compute")]
[numthreads(1, 1, 1)]
@@ -20,7 +20,7 @@ void computeMain2(uint2 tid: SV_DispatchThreadID, StructuredBuffer<uint> src, RW
{
dst[tid.x] = src[tid.y];
}
-// CHECK: uint2 _S2 = uint2 {(blockIdx * blockDim + threadIdx).x, (blockIdx * blockDim + threadIdx).y};
+// CHECK: uint2 {{.*}} = uint2 {(blockIdx * blockDim + threadIdx).x, (blockIdx * blockDim + threadIdx).y};
[shader("compute")]
[numthreads(1, 1, 1)]
@@ -28,9 +28,7 @@ void computeMain3(int2 tid: SV_DispatchThreadID, StructuredBuffer<uint> src, RWS
{
dst[tid.x] = src[tid.x];
}
-// CHECK: uint2 _S3 = uint2 {(blockIdx * blockDim + threadIdx).x, (blockIdx * blockDim + threadIdx).y};
-// CHECK: int2 _S4 = make_int2 ((int)_S3.x, (int)_S3.y);
-// CHECK: int _S5 = _S4.x;
+// CHECK: uint2 {{.*}} = uint2 {(blockIdx * blockDim + threadIdx).x, (blockIdx * blockDim + threadIdx).y};
[shader("compute")]
[numthreads(1, 1, 1)]
@@ -38,7 +36,7 @@ void computeMain4(int tid: SV_DispatchThreadID, StructuredBuffer<uint> src, RWSt
{
dst[tid.x] = src[tid.x];
}
-// CHECK: int _S6 = int((blockIdx * blockDim + threadIdx).x);
+// CHECK: int {{.*}} = int((blockIdx * blockDim + threadIdx).x);
[shader("compute")]
[numthreads(1, 1, 1)]
@@ -46,4 +44,4 @@ void computeMain5(int tid: SV_GroupIndex, StructuredBuffer<uint> src, RWStructur
{
dst[tid.x] = src[tid.x];
}
-// CHECK: int _S7 = int((threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x);
+// CHECK: int {{.*}} = int((threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x);
diff --git a/tests/optimization/buffer-load-defer-ptr.slang b/tests/optimization/buffer-load-defer-ptr.slang
new file mode 100644
index 000000000..cde006dcf
--- /dev/null
+++ b/tests/optimization/buffer-load-defer-ptr.slang
@@ -0,0 +1,38 @@
+//TEST:SIMPLE(filecheck=SPV): -target spirv
+
+struct Bottom
+{
+ float bigArray[1024];
+ float bottomGetValue(int index) { return bigArray[index]; }
+}
+
+struct Middle
+{
+ Bottom bottom;
+ float middleGetValue(int index) { return bottom.bottomGetValue(index); }
+}
+
+struct Top
+{
+ Middle middle;
+ float topGetValue(int index) { return middle.middleGetValue(index); }
+}
+
+struct Root
+{
+ Top top;
+}
+
+uniform ImmutablePtr<Root> cb;
+
+RWStructuredBuffer<float> outputBuffer;
+
+// SPV: OpEntryPoint
+// SPV-NOT: OpCompositeConstruct
+
+[shader("compute")]
+[numthreads(1, 1, 1)]
+void compute_main(uint3 tid: SV_DispatchThreadID)
+{
+ outputBuffer[0] = cb.top.topGetValue(0);
+}
diff --git a/tests/optimization/defer-structured-buffer-load.slang b/tests/optimization/defer-structured-buffer-load.slang
index f7f9b1888..22f93bcd1 100644
--- a/tests/optimization/defer-structured-buffer-load.slang
+++ b/tests/optimization/defer-structured-buffer-load.slang
@@ -28,7 +28,7 @@ RWStructuredBuffer<float> outputBuffer;
// SPV: OpStore %{{.*}} %[[VALUE]]
// CUDA: __device__ float Bottom_bottomGetValue{{.*}}(uint [[PARAM0:[A-Za-z0-9_]+]], int [[PARAM1:[A-Za-z0-9_]+]])
-// CUDA: return (&(&(globalParams_0->sb_0){{\[}}[[PARAM0]]{{\]}})->bottom_0)->bigArray_0{{\[}}[[PARAM1]]{{\]}};
+// CUDA: __ldg(&(&(&(globalParams_0->sb_0){{\[}}[[PARAM0]]{{\]}})->bottom_0)->bigArray_0{{\[}}[[PARAM1]]{{\]}});
[shader("compute")]
[numthreads(1, 1, 1)]
diff --git a/tests/optimization/immutable-buffer-load.slang b/tests/optimization/immutable-buffer-load.slang
new file mode 100644
index 000000000..251a1ce9c
--- /dev/null
+++ b/tests/optimization/immutable-buffer-load.slang
@@ -0,0 +1,21 @@
+// Test that we can use ImmutablePtr<T> to result in more optimized buffer loads
+// in SPIR-V and CUDA.
+
+//TEST:SIMPLE(filecheck=PTX): -target ptx -entry computeMain -stage compute
+//TEST:SIMPLE(filecheck=SPV): -target spirv -O0
+uniform ImmutablePtr<float4> data;
+
+uniform float4* result;
+
+float4 work(ImmutablePtr<float4> ptr)
+{
+ return *ptr;
+}
+
+[numthreads(1,1,1)]
+void computeMain()
+{
+ // SPV: Restrict
+ // PTX: ld.global.nc.v4.f32
+ *result = work(data) + float4(1,2,3,4);
+} \ No newline at end of file