From dbd8e8dc0847338a2a93d35385f48b5ce5671dd6 Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Mon, 2 Mar 2020 17:22:03 -0500 Subject: Feature/glsl wave intrinsic (#1253) * Test for some wave intrinsics. More wave intrinsic support on CUDA. * Use shfl_xor_sync. * Improvements around wave intrinsics. Fix built in integer types belong to __BuiltinIntegerType. * Improvements and fixes around Wave intrinsics. * Added WaveIsFirstLane test. No longer use __wavemask_lt, as appears not available as an intrinsic. * Small fixes to CUDA prelude. * Add wave-active-product test. Handle the special case for arbitray sums. * Used macro to implement CUDA wave intrinsics. * First pass at glsl wave intrinsics. Doesn't work in practice because require mechanism to set spir-v version Replace use of _lanemask_lt() for CUDA. --- prelude/slang-cuda-prelude.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'prelude') diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 1ca93d9d1..ce8e925a2 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -441,6 +441,12 @@ __forceinline__ __device__ uint32_t _getLaneId() } #endif +// Return mask of all the lanes less than the current lane +__forceinline__ __device__ int _getLaneLtMask() +{ + return (int(1) << _getLaneId()) - 1; +} + // Note! Note will return true if mask is 0, but thats okay, because there must be one // lane active to execute anything __inline__ __device__ bool _waveIsSingleLane(int mask) -- cgit v1.2.3