diff options
| -rw-r--r-- | docs/64bit-type-support.md | 78 | ||||
| -rw-r--r-- | prelude/slang-cpp-scalar-intrinsics.h | 35 | ||||
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 26 | ||||
| -rw-r--r-- | source/slang/slang-emit-cpp.cpp | 1 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/scalar-int64.slang | 19 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/scalar-int64.slang.expected.txt | 8 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/scalar-uint64.slang | 24 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/scalar-uint64.slang.expected.txt | 8 |
8 files changed, 180 insertions, 19 deletions
diff --git a/docs/64bit-type-support.md b/docs/64bit-type-support.md new file mode 100644 index 000000000..6b026d3b2 --- /dev/null +++ b/docs/64bit-type-support.md @@ -0,0 +1,78 @@ +Slang 64-bit Type Support +========================= + +The Slang language supports 64 bit built in types. Such as + +* double +* uint64_t +* int64_t + +This also applies to vector and matrix versions of these types. + +Unfortunately if a specific target supports the type or the typical HLSL instrinsic functions (such as sin/cos/max/min etc) depends very much on the target. + +Note this initial testing only tested scalar usage, and not vector or matrix intrinsics. + +Double support +============== + +Target | Compiler/Binary | Double Type | Intrinsics | Notes +---------|------------------|----------------|-----------------------|----------- +CPU | | Yes | Yes | 1 +CUDA | Nvrtx/PTX | Yes | Yes | 1 +D3D12 | DXC/DXIL | Yes | No | 2 +Vulkan | GlSlang/Spir-V | Yes | No | 3 +D3D11 | FXC/DXBC | No | No | +D3D12 | FXC/DXBC | No | No | + +1) CUDA and CPU support most intrinsics, with the notable exception currently of matrix invert +2) Requires SM 6.0 and above https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12 +3) Restriction is described in https://www.khronos.org/registry/spir-v/specs/1.0/GLSL.std.450.html +Note that GlSlang does produce spir-v that contains double intrinsic calls, the failure happens when validating the Spir-V + +``` +Validation: error 0: [ UNASSIGNED-CoreValidation-Shader-InconsistentSpirv ] Object: VK_NULL_HANDLE (Type = 0) | SPIR-V module not valid: GLSL.std.450 Sin: expected Result Type to be a 16 or 32-bit scalar or vector float type + %57 = OpExtInst %double %1 Sin %56 +``` + +D3D12 and VK may have some very limited intrinsic support such as sqrt, rsqrt + +uint64_t Support +================= + +Target | Compiler/Binary | uint64_t Type | Intrinsic support | Notes +---------|------------------|----------------|--------------------|-------- +CPU | | Yes | Yes | +CUDA | Nvrtx/PTX | Yes | Yes | +D3D12 | DXC/DXIL | Yes | Yes | +Vulkan | GlSlang/Spir-V | Yes | Yes | +D3D11 | FXC/DXBC | No | No | 1 +D3D12 | FXC/DXBC | No | No | 1 + +1) uint64_t support requires https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12, so DXBC is not a target. + +The intrinsics available on uint64_t type are `abs`, `min`, `max`, `clamp` and `countbits`. + +int64_t Support +================ + +Target | Compiler/Binary | int64_t Type | Intrinsic support | Notes +---------|------------------|----------------|--------------------|-------- +CPU | | Yes | Yes | +CUDA | Nvrtx/PTX | Yes | Yes | +Vulkan | GlSlang/Spir-V | Yes | Yes | +D3D12 | DXC/DXIL | Yes | Yes | 1 +D3D11 | FXC/DXBC | No | No | 2 +D3D12 | FXC/DXBC | No | No | 2 + +1) The sm6.0 docs (https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12) describe only supports uint64_t, but the dxc compiler page says int64_t is supported in HLSL 2016 (https://github.com/Microsoft/DirectXShaderCompiler/wiki/Language-Versions). Tests show that this is indeed the case. + +2) uint64_t support requires https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12, so DXBC is not a target. + +The intrinsics available on uint64_t type are `abs`, `min`, `max` and `clamp`. + +GLSL +==== + +GLSL/Spir-v based targets do not support 'generated' intrinsics on matrix types. For example 'sin(mat)' will not work on GLSL/Spir-v. + diff --git a/prelude/slang-cpp-scalar-intrinsics.h b/prelude/slang-cpp-scalar-intrinsics.h index 6c577733d..9fc387b6e 100644 --- a/prelude/slang-cpp-scalar-intrinsics.h +++ b/prelude/slang-cpp-scalar-intrinsics.h @@ -197,6 +197,7 @@ SLANG_FORCE_INLINE double U32_asdouble(uint32_t low, uint32_t hi) return u.d; } + SLANG_FORCE_INLINE uint32_t U32_countbits(uint32_t v) { #if SLANG_GCC_FAMILY @@ -214,6 +215,40 @@ SLANG_FORCE_INLINE uint32_t U32_countbits(uint32_t v) #endif } +// ----------------------------- U64 ----------------------------------------- + +SLANG_FORCE_INLINE uint64_t U64_abs(uint64_t f) { return f; } + +SLANG_FORCE_INLINE uint64_t U64_min(uint64_t a, uint64_t b) { return a < b ? a : b; } +SLANG_FORCE_INLINE uint64_t U64_max(uint64_t a, uint64_t b) { return a > b ? a : b; } + +SLANG_FORCE_INLINE uint64_t U64_clamp(uint64_t x, uint64_t min, uint64_t max) { return ( x < min) ? min : ((x > max) ? max : x); } + +SLANG_FORCE_INLINE uint32_t U64_countbits(uint64_t v) +{ +#if SLANG_GCC_FAMILY + return __builtin_popcountl(v); +#elif SLANG_PROCESSOR_X86_64 && SLANG_VC + return __popcnt64(v); +#else + uint64_t c = 0; + while (v) + { + c++; + v &= v - 1; + } + return c; +#endif +} + +// ----------------------------- I64 ----------------------------------------- + +SLANG_FORCE_INLINE int64_t I64_abs(int64_t f) { return (f < 0) ? -f : f; } + +SLANG_FORCE_INLINE int64_t I64_min(int64_t a, int64_t b) { return a < b ? a : b; } +SLANG_FORCE_INLINE int64_t I64_max(int64_t a, int64_t b) { return a > b ? a : b; } + +SLANG_FORCE_INLINE int64_t I64_clamp(int64_t x, int64_t min, int64_t max) { return ( x < min) ? min : ((x > max) ? max : x); } #ifdef SLANG_PRELUDE_NAMESPACE } diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 7e6e5957d..233903134 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -214,6 +214,32 @@ SLANG_CUDA_CALL uint32_t U32_countbits(uint32_t v) return __popc(v); } + +// ----------------------------- I64 ----------------------------------------- + +SLANG_CUDA_CALL int64_t I64_abs(int64_t f) { return (f < 0) ? -f : f; } + +SLANG_CUDA_CALL int64_t I64_min(int64_t a, int64_t b) { return a < b ? a : b; } +SLANG_CUDA_CALL int64_t I64_max(int64_t a, int64_t b) { return a > b ? a : b; } + +SLANG_CUDA_CALL int64_t I64_clamp(int64_t x, int64_t min, int64_t max) { return ( x < min) ? min : ((x > max) ? max : x); } + +// ----------------------------- U64 ----------------------------------------- + +SLANG_CUDA_CALL int64_t U64_abs(uint64_t f) { return f; } + +SLANG_CUDA_CALL int64_t U64_min(uint64_t a, uint64_t b) { return a < b ? a : b; } +SLANG_CUDA_CALL int64_t U64_max(uint64_t a, uint64_t b) { return a > b ? a : b; } + +SLANG_CUDA_CALL int64_t U64_clamp(uint64_t x, uint64_t min, uint64_t max) { return ( x < min) ? min : ((x > max) ? max : x); } + +SLANG_CUDA_CALL uint32_t U64_countbits(uint64_t v) +{ + // https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html#group__CUDA__MATH__INTRINSIC__INT_1g43c9c7d2b9ebf202ff1ef5769989be46 + return __popcll(v); +} + + // ----------------------------- ResourceType ----------------------------------------- diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp index 99cc2f61c..93d1cf2dd 100644 --- a/source/slang/slang-emit-cpp.cpp +++ b/source/slang/slang-emit-cpp.cpp @@ -72,6 +72,7 @@ static UnownedStringSlice _getTypePrefix(IROp op) case kIROp_UIntType: return UnownedStringSlice::fromLiteral("U32"); case kIROp_FloatType: return UnownedStringSlice::fromLiteral("F32"); case kIROp_Int64Type: return UnownedStringSlice::fromLiteral("I64"); + case kIROp_UInt64Type: return UnownedStringSlice::fromLiteral("U64"); case kIROp_DoubleType: return UnownedStringSlice::fromLiteral("F64"); default: return UnownedStringSlice::fromLiteral("?"); } diff --git a/tests/hlsl-intrinsic/scalar-int64.slang b/tests/hlsl-intrinsic/scalar-int64.slang index 4da2a553e..4ad805081 100644 --- a/tests/hlsl-intrinsic/scalar-int64.slang +++ b/tests/hlsl-intrinsic/scalar-int64.slang @@ -1,9 +1,9 @@ //TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -// No support for int64_t on dx11 +// No support for int64_t on dx11 (no sm 6.0) //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -// No support for int64_t on HLSL +// No support with Dx12 with dxbc. Needs SM6.0 + dxil //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -profile cs_6_0 -dx12 -use-dxil //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-cuda -compute @@ -13,9 +13,18 @@ RWStructuredBuffer<int> outputBuffer; [numthreads(4, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { - uint idx = dispatchThreadID.x; + int64_t idx = int64_t(dispatchThreadID.x); - int64_t v = int64_t(idx) * 0x400010035435435ll; + int64_t ti =0; + + ti += max(2, idx); + ti += min(idx, 1); + ti += abs(idx - 2); + ti += (idx * 3) % 5; + + ti += clamp(idx * 10, 11, 23); + + int64_t v = (ti * 0x400010035435435ll) / 3ll + 7ll - 9ll; outputBuffer[idx] = int(v) ^ int(((v >> 32) & 0xffffffff)); }
\ No newline at end of file diff --git a/tests/hlsl-intrinsic/scalar-int64.slang.expected.txt b/tests/hlsl-intrinsic/scalar-int64.slang.expected.txt index c0bb016cd..6ca5a87e0 100644 --- a/tests/hlsl-intrinsic/scalar-int64.slang.expected.txt +++ b/tests/hlsl-intrinsic/scalar-int64.slang.expected.txt @@ -1,4 +1,4 @@ -0 -31435535 -6286AA6A -93C9FF9F +1E50A006 +2793FF3D +8A1AA9A7 +ED76E236 diff --git a/tests/hlsl-intrinsic/scalar-uint64.slang b/tests/hlsl-intrinsic/scalar-uint64.slang index a990ccc22..dd165d8b8 100644 --- a/tests/hlsl-intrinsic/scalar-uint64.slang +++ b/tests/hlsl-intrinsic/scalar-uint64.slang @@ -4,7 +4,7 @@ // No support for uint64_t on fxc - we need SM6.0 and dxil // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12 //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_3 -use-dxil +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-cuda -compute @@ -13,12 +13,24 @@ RWStructuredBuffer<int> outputBuffer; [numthreads(4, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) -{ - uint idx = dispatchThreadID.x; +{ + uint64_t idx = dispatchThreadID.x; - uint64_t v = uint64_t(idx) * 0x8000100354354354ull; + uint64_t ti = 0; + + ti += max(2, idx); + ti += min(idx, 1ull); + ti += (idx * 3) % 5; + + ti += clamp(idx * 10, 11, 23); + + ti += countbits(idx * 13); + + uint64_t v = uint64_t(ti) * 0x8000100354354354ull; // Let's check all the bits make it - v |= 0x8000000000000000ull; + uint64_t u = v | 0x8000000000000000ull; - outputBuffer[idx] = int(v) ^ int(v >> 32); + v = max(u, v); + + outputBuffer[dispatchThreadID.x] = int(v) ^ int(v >> 32); }
\ No newline at end of file diff --git a/tests/hlsl-intrinsic/scalar-uint64.slang.expected.txt b/tests/hlsl-intrinsic/scalar-uint64.slang.expected.txt index b8be0469a..d3843ea52 100644 --- a/tests/hlsl-intrinsic/scalar-uint64.slang.expected.txt +++ b/tests/hlsl-intrinsic/scalar-uint64.slang.expected.txt @@ -1,4 +1,4 @@ -80000000 -D4355357 -286AA6AE -7C9FF9F5 +C6B4BB6F +142802D2 +619FA985 +34A0408 |
