diff options
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/cuda/copy-elision-this-1.slang | 2 | ||||
| -rw-r--r-- | tests/cuda/dispatch-thread-id-extraction.slang | 12 | ||||
| -rw-r--r-- | tests/optimization/buffer-load-defer-ptr.slang | 38 | ||||
| -rw-r--r-- | tests/optimization/defer-structured-buffer-load.slang | 2 | ||||
| -rw-r--r-- | tests/optimization/immutable-buffer-load.slang | 21 |
5 files changed, 66 insertions, 9 deletions
diff --git a/tests/cuda/copy-elision-this-1.slang b/tests/cuda/copy-elision-this-1.slang index 273e6dc58..376ef1f80 100644 --- a/tests/cuda/copy-elision-this-1.slang +++ b/tests/cuda/copy-elision-this-1.slang @@ -10,7 +10,7 @@ struct Data { // CUDA: __device__ float Data_fetch{{.*}}(int {{.*}}, int {{.*}}) // CUDA-NEXT: { - // CUDA-NEXT: return globalParams{{.*}}->data{{.*}}->input{{.*}}[{{.*}}].Load + // CUDA-NEXT: globalParams{{.*}}->data{{.*}}->input{{.*}}[{{.*}}] float fetch(int buffer, int index) { return input[buffer][index]; diff --git a/tests/cuda/dispatch-thread-id-extraction.slang b/tests/cuda/dispatch-thread-id-extraction.slang index 02705ff24..b1fac2d00 100644 --- a/tests/cuda/dispatch-thread-id-extraction.slang +++ b/tests/cuda/dispatch-thread-id-extraction.slang @@ -12,7 +12,7 @@ void computeMain(uint tid: SV_DispatchThreadID, StructuredBuffer<uint> src, RWSt { dst[tid.x] = src[tid.x]; } -// CHECK: uint _S1 = (blockIdx * blockDim + threadIdx).x; +// CHECK: uint {{.*}} = (blockIdx * blockDim + threadIdx).x; [shader("compute")] [numthreads(1, 1, 1)] @@ -20,7 +20,7 @@ void computeMain2(uint2 tid: SV_DispatchThreadID, StructuredBuffer<uint> src, RW { dst[tid.x] = src[tid.y]; } -// CHECK: uint2 _S2 = uint2 {(blockIdx * blockDim + threadIdx).x, (blockIdx * blockDim + threadIdx).y}; +// CHECK: uint2 {{.*}} = uint2 {(blockIdx * blockDim + threadIdx).x, (blockIdx * blockDim + threadIdx).y}; [shader("compute")] [numthreads(1, 1, 1)] @@ -28,9 +28,7 @@ void computeMain3(int2 tid: SV_DispatchThreadID, StructuredBuffer<uint> src, RWS { dst[tid.x] = src[tid.x]; } -// CHECK: uint2 _S3 = uint2 {(blockIdx * blockDim + threadIdx).x, (blockIdx * blockDim + threadIdx).y}; -// CHECK: int2 _S4 = make_int2 ((int)_S3.x, (int)_S3.y); -// CHECK: int _S5 = _S4.x; +// CHECK: uint2 {{.*}} = uint2 {(blockIdx * blockDim + threadIdx).x, (blockIdx * blockDim + threadIdx).y}; [shader("compute")] [numthreads(1, 1, 1)] @@ -38,7 +36,7 @@ void computeMain4(int tid: SV_DispatchThreadID, StructuredBuffer<uint> src, RWSt { dst[tid.x] = src[tid.x]; } -// CHECK: int _S6 = int((blockIdx * blockDim + threadIdx).x); +// CHECK: int {{.*}} = int((blockIdx * blockDim + threadIdx).x); [shader("compute")] [numthreads(1, 1, 1)] @@ -46,4 +44,4 @@ void computeMain5(int tid: SV_GroupIndex, StructuredBuffer<uint> src, RWStructur { dst[tid.x] = src[tid.x]; } -// CHECK: int _S7 = int((threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x); +// CHECK: int {{.*}} = int((threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x); diff --git a/tests/optimization/buffer-load-defer-ptr.slang b/tests/optimization/buffer-load-defer-ptr.slang new file mode 100644 index 000000000..cde006dcf --- /dev/null +++ b/tests/optimization/buffer-load-defer-ptr.slang @@ -0,0 +1,38 @@ +//TEST:SIMPLE(filecheck=SPV): -target spirv + +struct Bottom +{ + float bigArray[1024]; + float bottomGetValue(int index) { return bigArray[index]; } +} + +struct Middle +{ + Bottom bottom; + float middleGetValue(int index) { return bottom.bottomGetValue(index); } +} + +struct Top +{ + Middle middle; + float topGetValue(int index) { return middle.middleGetValue(index); } +} + +struct Root +{ + Top top; +} + +uniform ImmutablePtr<Root> cb; + +RWStructuredBuffer<float> outputBuffer; + +// SPV: OpEntryPoint +// SPV-NOT: OpCompositeConstruct + +[shader("compute")] +[numthreads(1, 1, 1)] +void compute_main(uint3 tid: SV_DispatchThreadID) +{ + outputBuffer[0] = cb.top.topGetValue(0); +} diff --git a/tests/optimization/defer-structured-buffer-load.slang b/tests/optimization/defer-structured-buffer-load.slang index f7f9b1888..22f93bcd1 100644 --- a/tests/optimization/defer-structured-buffer-load.slang +++ b/tests/optimization/defer-structured-buffer-load.slang @@ -28,7 +28,7 @@ RWStructuredBuffer<float> outputBuffer; // SPV: OpStore %{{.*}} %[[VALUE]] // CUDA: __device__ float Bottom_bottomGetValue{{.*}}(uint [[PARAM0:[A-Za-z0-9_]+]], int [[PARAM1:[A-Za-z0-9_]+]]) -// CUDA: return (&(&(globalParams_0->sb_0){{\[}}[[PARAM0]]{{\]}})->bottom_0)->bigArray_0{{\[}}[[PARAM1]]{{\]}}; +// CUDA: __ldg(&(&(&(globalParams_0->sb_0){{\[}}[[PARAM0]]{{\]}})->bottom_0)->bigArray_0{{\[}}[[PARAM1]]{{\]}}); [shader("compute")] [numthreads(1, 1, 1)] diff --git a/tests/optimization/immutable-buffer-load.slang b/tests/optimization/immutable-buffer-load.slang new file mode 100644 index 000000000..251a1ce9c --- /dev/null +++ b/tests/optimization/immutable-buffer-load.slang @@ -0,0 +1,21 @@ +// Test that we can use ImmutablePtr<T> to result in more optimized buffer loads +// in SPIR-V and CUDA. + +//TEST:SIMPLE(filecheck=PTX): -target ptx -entry computeMain -stage compute +//TEST:SIMPLE(filecheck=SPV): -target spirv -O0 +uniform ImmutablePtr<float4> data; + +uniform float4* result; + +float4 work(ImmutablePtr<float4> ptr) +{ + return *ptr; +} + +[numthreads(1,1,1)] +void computeMain() +{ + // SPV: Restrict + // PTX: ld.global.nc.v4.f32 + *result = work(data) + float4(1,2,3,4); +}
\ No newline at end of file |
