diff options
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 6 | ||||
| -rwxr-xr-x | source/slang/slang-compiler.h | 2 | ||||
| -rw-r--r-- | source/slang/slang-emit.cpp | 10 | ||||
| -rw-r--r-- | source/slang/slang-ir-lower-buffer-element-type.cpp | 4 | ||||
| -rw-r--r-- | tests/compute/column-major.slang | 60 | ||||
| -rw-r--r-- | tests/compute/column-major.slang.expected.txt | 5 | ||||
| -rw-r--r-- | tests/compute/constant-buffer-memory-packing.slang | 118 |
7 files changed, 173 insertions, 32 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 74a9d0cec..89f33e24b 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -207,6 +207,12 @@ union Union64 double d; }; +template<typename T> +SLANG_FORCE_INLINE SLANG_CUDA_CALL float make_float(T val) +{ + return (float)val; +} + SLANG_FORCE_INLINE SLANG_CUDA_CALL float _slang_fmod(float x, float y) { return ::fmodf(x, y); diff --git a/source/slang/slang-compiler.h b/source/slang/slang-compiler.h index 2409cedfb..7fc43d778 100755 --- a/source/slang/slang-compiler.h +++ b/source/slang/slang-compiler.h @@ -1769,6 +1769,8 @@ namespace Slang /// Are we generating code for a CUDA API (CUDA / OptiX)? bool isCUDATarget(TargetRequest* targetReq); + // Are we generating code for a CPU target + bool isCPUTarget(TargetRequest* targetReq); /// A request to generate output in some target format. class TargetRequest : public RefObject diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 679d8ce88..678b4137a 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -1267,15 +1267,7 @@ Result linkAndOptimizeIR( if (requiredLoweringPassSet.meshOutput) legalizeMeshOutputTypes(irModule); - if (options.shouldLegalizeExistentialAndResourceTypes) - { - if (!isMetalTarget(targetRequest)) - { - // We need to lower any types used in a buffer resource (e.g. ContantBuffer or StructuredBuffer) into - // a simple storage type that has target independent layout based on the kind of buffer resource. - lowerBufferElementTypeToStorageType(targetProgram, irModule); - } - } + lowerBufferElementTypeToStorageType(targetProgram, irModule); // Rewrite functions that return arrays to return them via `out` parameter, // since our target languages doesn't allow returning arrays. diff --git a/source/slang/slang-ir-lower-buffer-element-type.cpp b/source/slang/slang-ir-lower-buffer-element-type.cpp index 981e29697..d042aae43 100644 --- a/source/slang/slang-ir-lower-buffer-element-type.cpp +++ b/source/slang/slang-ir-lower-buffer-element-type.cpp @@ -877,7 +877,9 @@ namespace Slang void lowerBufferElementTypeToStorageType(TargetProgram* target, IRModule* module, bool lowerBufferPointer) { SlangMatrixLayoutMode defaultMatrixMode = (SlangMatrixLayoutMode)target->getOptionSet().getMatrixLayoutMode(); - if (defaultMatrixMode == SLANG_MATRIX_LAYOUT_MODE_UNKNOWN) + if ((isCPUTarget(target->getTargetReq()) || isCUDATarget(target->getTargetReq()) || isMetalTarget(target->getTargetReq()))) + defaultMatrixMode = SLANG_MATRIX_LAYOUT_ROW_MAJOR; + else if (defaultMatrixMode == SLANG_MATRIX_LAYOUT_MODE_UNKNOWN) defaultMatrixMode = SLANG_MATRIX_LAYOUT_ROW_MAJOR; LoweredElementTypeContext context(target, lowerBufferPointer, defaultMatrixMode); context.processModule(module); diff --git a/tests/compute/column-major.slang b/tests/compute/column-major.slang index 19d863260..1cd08434b 100644 --- a/tests/compute/column-major.slang +++ b/tests/compute/column-major.slang @@ -1,33 +1,59 @@ // column-major.slang -// Unfortunately CPU and CUDA only work with row layout, so they have to be disabled here. - -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -output-using-type -compile-arg -O3 -shaderobj -//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -output-using-type -shaderobj -Xslang -matrix-layout-column-major -//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -output-using-type -dx12 -shaderobj -Xslang -matrix-layout-column-major -//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type -shaderobj -Xslang -matrix-layout-column-major -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -output-using-type -mtl -shaderobj -Xslang -matrix-layout-column-major +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cpu -compute -compile-arg -O3 -shaderobj -Xslang -matrix-layout-column-major +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-slang -compute -shaderobj -Xslang -matrix-layout-column-major +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-slang -compute -dx12 -shaderobj -Xslang -matrix-layout-column-major +//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute -shaderobj -Xslang -matrix-layout-column-major +//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute -emit-spirv-via-glsl -Xslang -matrix-layout-column-major +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cuda -compute -shaderobj -Xslang -matrix-layout-column-major +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-mtl -compute -shaderobj -Xslang -matrix-layout-column-major // This data is in column major layout order.... //TEST_INPUT:cbuffer(data=[1.0 0.0 0.0 10.0 0.0 1.0 0.0 20.0 0.0 0.0 1.0 30.0 0.0 0.0 0.0 1.0]):name matrixBuffer ConstantBuffer<float4x4> matrixBuffer; -//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name output -RWStructuredBuffer<float> output; +//TEST_INPUT:ubuffer(data=[0], stride=4):out,name output +RWStructuredBuffer<uint> output; + +bool floatCheck(float data, float valueToCheckFor) +{ + return data < (valueToCheckFor + 0.001) && data > valueToCheckFor - 0.001; +} [numthreads(1, 1, 1)] void computeMain(uint3 tid : SV_DispatchThreadID) { float4 v = float4(1, 2, 3, 1); - float4x4 M = matrixBuffer; + float4x4 M1 = matrixBuffer; - float4 r = mul(v, M); - - output[0] = r.x; - output[1] = r.y; - output[2] = r.z; - output[3] = r.w; + float4 r = mul(v, M1); + + float4x4 M2 = mul(M1, M1); + + float4x4 M3 = float4x4( + 1.0, 0.0, 0.0, 10.0, + 0.0, 1.0, 0.0, 20.0, + 0.0, 0.0, 1.0, 30.0, + 0.0, 0.0, 0.0, 1.0 + ); + + output[0] = uint(true + && floatCheck(r.x, 11) + && floatCheck(r.y, 22) + && floatCheck(r.z, 33) + && floatCheck(r.w, 1) + + && floatCheck(M1[3][0], 10) + + && floatCheck(M2[3][0], 20) + && floatCheck(M2._41, 20) + && floatCheck(M2._41_32[0], 20) + && floatCheck(M2._33_42[0], 1) + && floatCheck(M2._42_33[0], 40) + + && floatCheck(M3[0][3], 10) + ); + //BUF: 1 } diff --git a/tests/compute/column-major.slang.expected.txt b/tests/compute/column-major.slang.expected.txt deleted file mode 100644 index 1e24f3253..000000000 --- a/tests/compute/column-major.slang.expected.txt +++ /dev/null @@ -1,5 +0,0 @@ -type: float -11.000000 -22.000000 -33.000000 -1.000000 diff --git a/tests/compute/constant-buffer-memory-packing.slang b/tests/compute/constant-buffer-memory-packing.slang new file mode 100644 index 000000000..5246c4d33 --- /dev/null +++ b/tests/compute/constant-buffer-memory-packing.slang @@ -0,0 +1,118 @@ +// column-major-with-row-major-operations.slang + +// Metal/CPP/CUDA do not deal with packing currently, different results will occur. +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cpu -compute -xslang -DTARGET_WITHOUT_PACKING +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-cuda -compute -xslang -DTARGET_WITHOUT_PACKING +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-mtl -compute -xslang -DTARGET_WITHOUT_PACKING + +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-slang -compute -dx12 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute +//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=BUF):-vk -compute -emit-spirv-via-glsl + +// CPP/Metal/CUDA due to not having memory packing will recieve the following ROW matrix: +// {1,2,3} +// {0,4,5} +// {6,0,7} + +// GLSL/SPIRV/HLSL due to having memory packing will recieve the following ROW/COL matrix: +// {1,2,3} +// {0,4,5} +// {6,0,7} + +//TEST_INPUT:cbuffer(data=[1.0 2.0 3.0 0.0 4.0 5.0 6.0 0.0 7.0 8.0 9.0 0]):name matrixTestCBuf1 +ConstantBuffer<row_major float3x3> matrixTestCBuf1; + +// CPP/Metal/CUDA due to not having memory packing will recieve the following COL matrix post-transpose: +// {1,0,8} +// {4,2,0} +// {7,5,3} + +//TEST_INPUT:cbuffer(data=[1.0 4.0 7.0 0.0 2.0 5.0 8.0 0.0 3.0 6.0 9.0 0.0]):name matrixTestCBuf2 +ConstantBuffer<column_major float3x3> matrixTestCBuf2; + +//TEST_INPUT:cbuffer(data=[1.0 2.0 3.0 0.0 4.0 5.0 6.0 0.0]):name NeedsPadding +cbuffer NeedsPadding +{ + float3 data1; + float3 data2; +}; + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name output +RWStructuredBuffer<uint> output; + +bool floatCheck(float data, float valueToCheckFor) +{ + return data < (valueToCheckFor + 0.001) && data > valueToCheckFor - 0.001; +} + +[numthreads(1, 1, 1)] +void computeMain(uint3 tid : SV_DispatchThreadID) +{ + float3x3 matrixTest1; + matrixTest1 = matrixTestCBuf1; + + float3x3 matrixTest2; + matrixTest2 = matrixTestCBuf2; + + output[0] = uint(true +#ifndef TARGET_WITHOUT_PACKING + && floatCheck(matrixTest1[0][0], 1) + && floatCheck(matrixTest1[0][1], 2) + && floatCheck(matrixTest1[0][2], 3) + && floatCheck(matrixTest1[1][0], 4) + && floatCheck(matrixTest1[1][1], 5) + && floatCheck(matrixTest1[1][2], 6) + && floatCheck(matrixTest1[2][0], 7) + && floatCheck(matrixTest1[2][1], 8) + && floatCheck(matrixTest1[2][2], 9) + + && floatCheck(matrixTest2[0][0], 1) + && floatCheck(matrixTest2[0][1], 2) + && floatCheck(matrixTest2[0][2], 3) + && floatCheck(matrixTest2[1][0], 4) + && floatCheck(matrixTest2[1][1], 5) + && floatCheck(matrixTest2[1][2], 6) + && floatCheck(matrixTest2[2][0], 7) + && floatCheck(matrixTest2[2][1], 8) + && floatCheck(matrixTest2[2][2], 9) + + && floatCheck(data1[0], 1) + && floatCheck(data1[1], 2) + && floatCheck(data1[2], 3) + && floatCheck(data2[0], 4) + && floatCheck(data2[1], 5) + && floatCheck(data2[2], 6) +#else + && floatCheck(matrixTest1[0][0], 1) + && floatCheck(matrixTest1[0][1], 2) + && floatCheck(matrixTest1[0][2], 3) + && floatCheck(matrixTest1[1][0], 0) + && floatCheck(matrixTest1[1][1], 4) + && floatCheck(matrixTest1[1][2], 5) + && floatCheck(matrixTest1[2][0], 6) + && floatCheck(matrixTest1[2][1], 0) + && floatCheck(matrixTest1[2][2], 7) + + && floatCheck(matrixTest2[0][0], 1) + && floatCheck(matrixTest2[0][1], 0) + && floatCheck(matrixTest2[0][2], 8) + && floatCheck(matrixTest2[1][0], 4) + && floatCheck(matrixTest2[1][1], 2) + && floatCheck(matrixTest2[1][2], 0) + && floatCheck(matrixTest2[2][0], 7) + && floatCheck(matrixTest2[2][1], 5) + && floatCheck(matrixTest2[2][2], 3) + + && floatCheck(data1[0], 1) + && floatCheck(data1[1], 2) + && floatCheck(data1[2], 3) + && floatCheck(data2[0], 0) + && floatCheck(data2[1], 4) + && floatCheck(data2[2], 5) +#endif + ); + output[1] = (uint)matrixTest2[0][0]; + output[2] = (uint)matrixTest2[0][1]; + output[3] = (uint)matrixTest2[0][2]; + //BUF: 1 +} |
