diff options
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/cuda/copy-elision-this-1.slang | 10 | ||||
| -rw-r--r-- | tests/glsl/global-uniform-with-varyings.slang | 6 | ||||
| -rw-r--r-- | tests/metal/out-param.slang | 19 | ||||
| -rw-r--r-- | tests/optimization/buffer-load-defer-aliasing-1.slang | 45 | ||||
| -rw-r--r-- | tests/optimization/buffer-load-defer-aliasing.slang | 38 | ||||
| -rw-r--r-- | tests/optimization/buffer-load-defer-bindless.slang | 58 | ||||
| -rw-r--r-- | tests/optimization/buffer-load-defer-user-pointer.slang | 63 | ||||
| -rw-r--r-- | tests/optimization/buffer-load-defer.slang | 38 | ||||
| -rw-r--r-- | tests/optimization/buffer-load-specialize-1.slang | 35 | ||||
| -rw-r--r-- | tests/optimization/buffer-store-defer.slang | 51 | ||||
| -rw-r--r-- | tests/optimization/defer-structured-buffer-load.slang | 38 | ||||
| -rw-r--r-- | tests/vkray/raygen-trace-ray-param-non-struct.slang | 12 | ||||
| -rw-r--r-- | tests/wgsl/switch-case.slang | 9 |
13 files changed, 399 insertions, 23 deletions
diff --git a/tests/cuda/copy-elision-this-1.slang b/tests/cuda/copy-elision-this-1.slang index 295b45c73..273e6dc58 100644 --- a/tests/cuda/copy-elision-this-1.slang +++ b/tests/cuda/copy-elision-this-1.slang @@ -1,4 +1,6 @@ -//TEST:SIMPLE(filecheck=CUDA): -stage compute -entry computeMain -target cuda +//TEST:SIMPLE(filecheck=CUDA): -stage compute -entry computeMain -target cuda -line-directive-mode none +//TEST:SIMPLE(filecheck=PTX): -stage compute -entry computeMain -target cuda + struct Data { StructuredBuffer<float> input[2]; RWStructuredBuffer<float> output; @@ -6,7 +8,9 @@ struct Data { StructuredBuffer<uint> index_buffer; uint index_count; - // CUDA: fetch{{.*}}Data{{.*}}*{{.*}}this + // CUDA: __device__ float Data_fetch{{.*}}(int {{.*}}, int {{.*}}) + // CUDA-NEXT: { + // CUDA-NEXT: return globalParams{{.*}}->data{{.*}}->input{{.*}}[{{.*}}].Load float fetch(int buffer, int index) { return input[buffer][index]; @@ -15,6 +19,8 @@ struct Data { ParameterBlock<Data> data; +// PTX: computeMain + [shader("compute")] [numthreads(8, 8, 1)] void computeMain(uint3 tid: SV_DispatchThreadID) diff --git a/tests/glsl/global-uniform-with-varyings.slang b/tests/glsl/global-uniform-with-varyings.slang index 678855dbf..174560840 100644 --- a/tests/glsl/global-uniform-with-varyings.slang +++ b/tests/glsl/global-uniform-with-varyings.slang @@ -3,9 +3,9 @@ // CHECK_SPIRV: OpEntryPoint // CHECK_SPIRV: OpVariable {{.*}} Input {{.*}} Location 0 -// CHECK_SPIRV: OpVariable {{.*}} Uniform -// CHECK_SPIRV: OpVariable {{.*}} Input {{.*}} Location 1 -// CHECK_SPIRV: OpVariable {{.*}} Output {{.*}} Location 0 +// CHECK_SPIRV-DAG: OpVariable {{.*}} Uniform +// CHECK_SPIRV-DAG: OpVariable {{.*}} Input {{.*}} Location 1 +// CHECK_SPIRV-DAG: OpVariable {{.*}} Output {{.*}} Location 0 // CHECK_GLSL: layout(location = 0) // CHECK_GLSL-NEXT: in diff --git a/tests/metal/out-param.slang b/tests/metal/out-param.slang index e488f8844..68b6e2b62 100644 --- a/tests/metal/out-param.slang +++ b/tests/metal/out-param.slang @@ -9,13 +9,20 @@ //TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer<int> outputBuffer; -// METAL: void Test_out_param{{.*}}(int thread* value{{.*}}) -// METAL: void Test_out_param{{.*}}(int device* value{{.*}}) -// METAL: void Test_out_param{{.*}}(int threadgroup* value{{.*}}) +// CHECK-DAG: void Test_out_param{{.*}}(int thread* value{{.*}}) +// CHECK-DAG: void Test_out_param{{.*}}(int threadgroup* value{{.*}}) +// CHECK-DAG: void Test_out_param{{.*}}(int {{.*}}, KernelContext{{.*}} thread* {{.*}}) + +// CHECK-DAG: void Test_out_param_wrapper{{.*}}(int {{.*}}, KernelContext{{.*}} thread* {{.*}}) +// CHECK-DAG: void Test_out_param_wrapper{{.*}}(int thread* value{{.*}}) +// CHECK-DAG: void Test_out_param_wrapper{{.*}}(int threadgroup* value{{.*}}) + +// METAL-DAG: void Test_out_param{{.*}}(int thread* value{{.*}}) +// METAL-DAG: void Test_out_param{{.*}}(int threadgroup* value{{.*}}) + +// METAL-DAG: void Test_out_param_wrapper{{.*}}(int thread* value{{.*}}) +// METAL-DAG: void Test_out_param_wrapper{{.*}}(int threadgroup* value{{.*}}) -// METAL: void Test_out_param_wrapper{{.*}}(int thread* value{{.*}}) -// METAL: void Test_out_param_wrapper{{.*}}(int device* value{{.*}}) -// METAL: void Test_out_param_wrapper{{.*}}(int threadgroup* value{{.*}}) void Test_out_param(out int value) { diff --git a/tests/optimization/buffer-load-defer-aliasing-1.slang b/tests/optimization/buffer-load-defer-aliasing-1.slang new file mode 100644 index 000000000..f50d5306c --- /dev/null +++ b/tests/optimization/buffer-load-defer-aliasing-1.slang @@ -0,0 +1,45 @@ +//TEST:SIMPLE(filecheck=SPV): -target spirv -O0 + +// Test that we can defer buffer loads by ruling out potential aliasing writes. + +struct Bottom +{ + float bigArray[1024]; + + float bottomGetValue(int index) + { + // RWStructuredBuffer is considered to not alias with anything else. + // this write should not prevent deferring loading bigArray. + gOther[0] = 100; + // this write should not prevent deferring loading bigArray. + gSharedVar = 1; + // this write should not prevent deferring loading bigArray. + gStaticVar = 2; + + // We should return the value from bigArray from a previously loaded value of `this`. + return bigArray[index]; + } +} + +struct Root +{ + Bottom bottom1; + Bottom bottom2; +} + +uniform Root* gRoot; +uniform RWStructuredBuffer<int> gOther; +static int gStaticVar; +groupshared int gSharedVar; + + +RWStructuredBuffer<float> outputBuffer; + +[shader("compute")] +[numthreads(1, 1, 1)] +void compute_main(uint3 tid: SV_DispatchThreadID) +{ + // SPV: OpEntryPoint + // SPV-NOT: OpLoad %Bottom_natural + outputBuffer[0] = gRoot.bottom1.bottomGetValue(0); +} diff --git a/tests/optimization/buffer-load-defer-aliasing.slang b/tests/optimization/buffer-load-defer-aliasing.slang new file mode 100644 index 000000000..a0240cc40 --- /dev/null +++ b/tests/optimization/buffer-load-defer-aliasing.slang @@ -0,0 +1,38 @@ +//TEST:SIMPLE(filecheck=SPV): -target spirv -O0 + +// Test that we are not deferring buffer loads due to potential aliasing writes. + +struct Bottom +{ + float bigArray[1024]; + + float bottomGetValue(int index) + { + // this write may cause data stored at gRoot to be modified, + // thus bigArray[index] may be different from what it was before the call to + // bottomGetValue. So we should not defer loading bigArray until after this write. + *gOther = 100; + + // We should return the value from bigArray from a previously loaded value of `this`. + return bigArray[index]; + } +} + +struct Root +{ + Bottom bottom1; + Bottom bottom2; +} + +uniform Root* gRoot; +uniform int* gOther; + +RWStructuredBuffer<float> outputBuffer; + +[shader("compute")] +[numthreads(1, 1, 1)] +void compute_main(uint3 tid: SV_DispatchThreadID) +{ + // SPV: OpLoad %Bottom_natural + outputBuffer[0] = gRoot.bottom1.bottomGetValue(0); +} diff --git a/tests/optimization/buffer-load-defer-bindless.slang b/tests/optimization/buffer-load-defer-bindless.slang new file mode 100644 index 000000000..2108d562c --- /dev/null +++ b/tests/optimization/buffer-load-defer-bindless.slang @@ -0,0 +1,58 @@ +//TEST:SIMPLE(filecheck=CUDA): -target cuda -entry compute_main -stage compute +//TEST:SIMPLE(filecheck=PTX): -target ptx -entry compute_main -stage compute + +//TEST:SIMPLE(filecheck=SPV): -target spirv + +// Check that we can specialize buffer loads through bindless handles, and +// do not load big struct elements into registers unnecessarily. + +struct Bottom +{ + float bigArray[1024]; + float bottomGetValue(int index) { return bigArray[index]; } +} + +struct Middle +{ + Bottom bottom; + float middleGetValue(int index) { return bottom.bottomGetValue(index); } +} + +struct Top +{ + StructuredBuffer<Middle>.Handle middle; + + // Calling `middleGetValue` on `middle[0]` should not causing the entire `Middle` + // struct to be loaded into registers. Instead, we should be able to specialize + // `middleGetValue` to take a `StructuredBuffer<Middle>.Handle` and an `int` + // index, and recursively specialize `bottomGetValue` to only load the `Bottom.bigArray[index]` element. + float topGetValue(int index) { return middle[0].middleGetValue(index); } +} + +struct Root +{ + Top top; +} + +ConstantBuffer<Root> cb; + +RWStructuredBuffer<float> outputBuffer; + +// SPV: OpEntryPoint +// SPV-NOT: OpLoad %Middle +// SPV: %[[REG:[A-Za-z0-9_]+]] = OpLoad %float +// SPV: OpStore {{.*}} %[[REG]] + +// Check that the generated CUDA code contains a specialized `bottomGetValue` function that has +// the complete parameter list to access the `bigArray` element directly, without needing to load +// the entire `Bottom` struct from the caller. +// +// CUDA-DAG: __device__ float Bottom_bottomGetValue{{.*}}(StructuredBuffer<Middle{{.*}}> {{.*}}, int {{.*}}, int {{.*}}) +// PTX: compute_main + +[shader("compute")] +[numthreads(1, 1, 1)] +void compute_main(uint3 tid: SV_DispatchThreadID) +{ + outputBuffer[0] = cb.top.topGetValue(0); +} diff --git a/tests/optimization/buffer-load-defer-user-pointer.slang b/tests/optimization/buffer-load-defer-user-pointer.slang new file mode 100644 index 000000000..58e6386f9 --- /dev/null +++ b/tests/optimization/buffer-load-defer-user-pointer.slang @@ -0,0 +1,63 @@ +//TEST:SIMPLE(filecheck=SPV): -target spirv -O0 +//TEST:SIMPLE(filecheck=CUDA): -target cuda -entry compute_main -stage compute +//TEST:SIMPLE(filecheck=PTX): -target ptx -entry compute_main -stage compute + +// Check that we can specialize buffer loads through user pointers, and +// do not load big struct elements into registers unnecessarily. + +struct Bottom +{ + float bigArray[1024]; + float bottomGetValue(int index) { return bigArray[index]; } +} + +struct Middle +{ + Bottom bottom; + float middleGetValue(int index) { return bottom.bottomGetValue(index); } +} + +struct Top +{ + StructuredBuffer<Middle*>.Handle middle; + + // Calling `middleGetValue` on `middle[0]` should not causing the entire `Middle` + // struct to be loaded into registers. Instead, we should be able to specialize + // `middleGetValue` to take a `Middle*` and recursively specialize `bottomGetValue` + // to only load the `Bottom.bigArray[index]` element. + float topGetValue(int index) { return middle[0].middleGetValue(index); } +} + +struct Root +{ + Top top; +} + +ConstantBuffer<Root> cb; + +RWStructuredBuffer<float> outputBuffer; + +// Check that the generated CUDA code never loads a `Middle` or `Bottom` struct into a local var. +// CUDA-NOT: Middle{{[_A-Za-z0-9]*}} {{[a-zA-Z0-9_]+}} = +// CUDA-NOT: Bottom{{[_A-Za-z0-9]*}} {{[a-zA-Z0-9_]+}} = +// CUDA-NOT: Top{{[_A-Za-z0-9]*}} {{[a-zA-Z0-9_]+}} = + +// Check that the generated CUDA code can be compiled by nvrtc correctly into PTX. +// PTX: compute_main + +// Check that the generated (unoptimized) SPIR-V contains a specialized Bottom_bottomGetValue function +// that takes in a Bottom* and use access chain to load the required array element directly, without +// needing to load the entire Bottom struct. +// SPV: %Bottom_bottomGetValue = OpFunction %float None +// SPV: OpFunctionParameter %_ptr_PhysicalStorageBuffer_Middle_natural +// SPV: %[[INDEX:[A-Za-z0-9_]+]] = OpFunctionParameter %int +// SPV: %[[PTR:[A-Za-z0-9_]+]] = OpAccessChain %_ptr_PhysicalStorageBuffer_float %{{.*}} %[[INDEX]] +// SPV: %[[VALUE:[A-Za-z0-9_]+]] = OpLoad %float %[[PTR]] +// SPV: OpReturnValue %[[VALUE]] + +[shader("compute")] +[numthreads(1, 1, 1)] +void compute_main(uint3 tid: SV_DispatchThreadID) +{ + outputBuffer[0] = cb.top.topGetValue(0); +} diff --git a/tests/optimization/buffer-load-defer.slang b/tests/optimization/buffer-load-defer.slang new file mode 100644 index 000000000..b2df43c13 --- /dev/null +++ b/tests/optimization/buffer-load-defer.slang @@ -0,0 +1,38 @@ +//TEST:SIMPLE(filecheck=SPV): -target spirv + +struct Bottom +{ + float bigArray[1024]; + float bottomGetValue(int index) { return bigArray[index]; } +} + +struct Middle +{ + Bottom bottom; + float middleGetValue(int index) { return bottom.bottomGetValue(index); } +} + +struct Top +{ + Middle middle; + float topGetValue(int index) { return middle.middleGetValue(index); } +} + +struct Root +{ + Top top; +} + +ConstantBuffer<Root> cb; + +RWStructuredBuffer<float> outputBuffer; + +// SPV: OpEntryPoint +// SPV-NOT: OpCompositeConstruct + +[shader("compute")] +[numthreads(1, 1, 1)] +void compute_main(uint3 tid: SV_DispatchThreadID) +{ + outputBuffer[0] = cb.top.topGetValue(0); +} diff --git a/tests/optimization/buffer-load-specialize-1.slang b/tests/optimization/buffer-load-specialize-1.slang new file mode 100644 index 000000000..55f2df473 --- /dev/null +++ b/tests/optimization/buffer-load-specialize-1.slang @@ -0,0 +1,35 @@ +//TEST:SIMPLE(filecheck=SPV): -target spirv -O0 + +struct Bottom +{ + float bigArray[1024]; + // SPV: %Bottom_bottomGetValue = OpFunction %float None %{{.*}} + // SPV-NEXT: %{{.*}} = OpFunctionParameter %int + // SPV-NEXT: OpLabel + // SPV-NOT: OpCompositeConstruct + // SPV: OpFunctionEnd + + // SPV: %Bottom_bottomGetValue_0 = OpFunction %float None %{{.*}} + // SPV-NEXT: %{{.*}} = OpFunctionParameter %int + // SPV-NEXT: OpLabel + float bottomGetValue(int index) { return bigArray[index]; } +} + +struct Root +{ + Bottom bottom1; + Bottom bottom2; +} + +ConstantBuffer<Root> cb; + +RWStructuredBuffer<float> outputBuffer; + +[shader("compute")] +[numthreads(1, 1, 1)] +void compute_main(uint3 tid: SV_DispatchThreadID) +{ + outputBuffer[0] = cb.bottom1.bottomGetValue(0); + outputBuffer[1] = cb.bottom2.bottomGetValue(1); + outputBuffer[2] = cb.bottom2.bottomGetValue(2); +} diff --git a/tests/optimization/buffer-store-defer.slang b/tests/optimization/buffer-store-defer.slang new file mode 100644 index 000000000..14362a477 --- /dev/null +++ b/tests/optimization/buffer-store-defer.slang @@ -0,0 +1,51 @@ +//TEST:SIMPLE(filecheck=CUDA): -target cuda -stage compute -entry compute_main +//TEST:SIMPLE(filecheck=PTX): -target ptx -stage compute -entry compute_main +//TEST:SIMPLE(filecheck=SPV): -target spirv + +struct Bottom +{ + float bigArray[1024]; + + [mutating] + void setVal(int index, float value) { bigArray[index] = value; } +} + +struct Root +{ + Bottom top[2]; + [mutating] + void setTopVal(int x, int y, float value) + { + top[x].setVal(y, value); + } +} + +RWStructuredBuffer<Root> sb; + +// Check that we don't load the entire `Root` struct, modify it, and then write it back. +// Instead we should generate a single store instruction to write the single float value +// directly to the buffer. + +// SPV: OpEntryPoint +// SPV: OpLabel +// SPV-NEXT: OpAccessChain +// SPV-NOT: OpCompositeInsert +// SPV-NOT: OpLoad +// SPV: OpStore +// SPV-NOT: OpLoad +// SPV-NOT: OpCompositeInsert +// SPV: OpStore +// SPV: OpReturn + +// CUDA: __device__ void Bottom_setVal_0(int [[INDEX0:[A-Za-z0-9_]+]], int [[INDEX1:[A-Za-z0-9_]+]], int [[INDEX2:[A-Za-z0-9_]+]], float [[VAL:[A-Za-z0-9_]+]]) +// CUDA: (&(&(globalParams{{.*}}->sb{{.*}}){{\[}}[[INDEX0]]{{\]}})->top{{.*}}{{\[}}[[INDEX1]]{{\]}})->bigArray{{.*}}{{\[}}[[INDEX2]]{{\]}} = [[VAL]]; +// PTX: compute_main + +[shader("compute")] +[numthreads(1, 1, 1)] +void compute_main(uint3 tid: SV_DispatchThreadID) +{ + sb[0].setTopVal(1, 2, 100.0f); + + sb[3].top[1].setVal(8, 200.0f); +} diff --git a/tests/optimization/defer-structured-buffer-load.slang b/tests/optimization/defer-structured-buffer-load.slang new file mode 100644 index 000000000..f7f9b1888 --- /dev/null +++ b/tests/optimization/defer-structured-buffer-load.slang @@ -0,0 +1,38 @@ +//TEST:SIMPLE(filecheck=CUDA): -target cuda -entry compute_main -stage compute +//TEST:SIMPLE(filecheck=SPV): -target spirv + +// Test that we can defer loading big structured buffer elements. + +struct Bottom +{ + float bigArray[1024]; + float bottomGetValue(int index) { return bigArray[index]; } +} + +struct Root +{ + Bottom bottom; +} + +StructuredBuffer<Root> sb; + +RWStructuredBuffer<float> outputBuffer; + +// Check that we don't load the entire `Root` struct and then do ElementExtract to get to `bigArray[0]`. +// Instead we use access chain all the way to point to the required array element, and load just a single float. + +// SPV: OpEntryPoint +// SPV: %[[SBPTRARRAY:[A-Za-z0-9_]+]] = OpAccessChain %_ptr_StorageBuffer__arr_float_int_1024 +// SPV: %[[SBPTR:[A-Za-z0-9_]+]] = OpAccessChain %_ptr_StorageBuffer_float %[[SBPTRARRAY]] +// SPV: %[[VALUE:[A-Za-z0-9_]+]] = OpLoad %float %[[SBPTR]] +// SPV: OpStore %{{.*}} %[[VALUE]] + +// CUDA: __device__ float Bottom_bottomGetValue{{.*}}(uint [[PARAM0:[A-Za-z0-9_]+]], int [[PARAM1:[A-Za-z0-9_]+]]) +// CUDA: return (&(&(globalParams_0->sb_0){{\[}}[[PARAM0]]{{\]}})->bottom_0)->bigArray_0{{\[}}[[PARAM1]]{{\]}}; + +[shader("compute")] +[numthreads(1, 1, 1)] +void compute_main(uint3 tid: SV_DispatchThreadID) +{ + outputBuffer[0] = sb[tid.x].bottom.bottomGetValue(0); +} diff --git a/tests/vkray/raygen-trace-ray-param-non-struct.slang b/tests/vkray/raygen-trace-ray-param-non-struct.slang index b0a129761..72d85ed02 100644 --- a/tests/vkray/raygen-trace-ray-param-non-struct.slang +++ b/tests/vkray/raygen-trace-ray-param-non-struct.slang @@ -28,7 +28,7 @@ void main() // CHECK: rayPayload{{_[0-9]}}.data{{_[0-9]}} = {{.*}} // CHECK: TraceRay( - // CHECK: {{.*}} = rayPayload{{.*}}.data{{.*}}; + // CHECK: rayPayload{{.*}}.data{{.*}}; TraceRay(as, 1, 0xff, @@ -39,9 +39,9 @@ void main() someInData1); outputBuffer1[0] = outputBuffer1[0]+someInData1; - // CHECK: rayPayload{{_[0-9]}}.data{{_[0-9]}} = {{.*}} + // CHECK: rayPayload{{_[0-9]}}.data{{_[0-9]}} = rayPayload{{.*}}.data{{.*}}; // CHECK: TraceMotionRay( - // CHECK: {{.*}} = rayPayload{{.*}}.data{{.*}}; + // CHECK: rayPayload{{.*}}.data{{.*}}; TraceMotionRay(as, 1, 0xff, @@ -55,7 +55,7 @@ void main() // CHECK: rayPayload{{_[0-9]}}.data{{_[0-9]}} = {{.*}} // CHECK: NvTraceRayHitObject( - // CHECK: {{.*}} = rayPayload{{.*}}.data{{.*}}; + // CHECK: rayPayload{{.*}}.data{{.*}}; HitObject::TraceRay(as, 1, 0xff, @@ -68,7 +68,7 @@ void main() // CHECK: rayPayload{{_[0-9]}}.data{{_[0-9]}} = {{.*}} // CHECK: TraceMotionRay( - // CHECK: {{.*}} = rayPayload{{.*}}.data{{.*}}; + // CHECK: rayPayload{{.*}}.data{{.*}}; HitObject::TraceMotionRay(as, 1, 0xff, @@ -82,7 +82,7 @@ void main() // CHECK: rayPayload{{_[0-9]}}.data{{_[0-9]}} = {{.*}} // CHECK: NvInvokeHitObject( - // CHECK: {{.*}} = rayPayload{{.*}}.data{{.*}}; + // CHECK: rayPayload{{.*}}.data{{.*}}; HitObject hitObject_HitOrMiss; HitObject::Invoke( as, diff --git a/tests/wgsl/switch-case.slang b/tests/wgsl/switch-case.slang index c4ff0996e..fc24bd67a 100644 --- a/tests/wgsl/switch-case.slang +++ b/tests/wgsl/switch-case.slang @@ -70,17 +70,14 @@ func fs_main(VertexOutput input)->FragmentOutput return output; } -//WGSL: fn _S9( _S10 : Tuple_0) -> f32 -//WGSL-NEXT: { -//WGSL-NEXT: switch(_S10.value1_0.x) +//WGSL: switch({{.*}}) //WGSL-NEXT: { //WGSL-NEXT: case u32(0): //WGSL-NEXT: { -//WGSL-NEXT: return Circle_getArea_0(unpackAnyValue16_0(_S10.value2_0)); +//WGSL-NEXT: return Circle_getArea_0 //WGSL-NEXT: } //WGSL-NEXT: default : //WGSL-NEXT: { -//WGSL-NEXT: return Rectangle_getArea_0(unpackAnyValue16_1(_S10.value2_0)); +//WGSL-NEXT: return Rectangle_getArea_0 //WGSL-NEXT: } //WGSL-NEXT: } -//WGSL-NEXT: } |
