summaryrefslogtreecommitdiff
path: root/prelude
diff options
context:
space:
mode:
Diffstat (limited to 'prelude')
-rw-r--r--prelude/slang-cpp-scalar-intrinsics.h54
-rw-r--r--prelude/slang-cuda-prelude.h29
2 files changed, 81 insertions, 2 deletions
diff --git a/prelude/slang-cpp-scalar-intrinsics.h b/prelude/slang-cpp-scalar-intrinsics.h
index 9b045941a..731fd02be 100644
--- a/prelude/slang-cpp-scalar-intrinsics.h
+++ b/prelude/slang-cpp-scalar-intrinsics.h
@@ -717,6 +717,50 @@ SLANG_FORCE_INLINE uint32_t U32_countbits(uint32_t v)
#endif
}
+SLANG_FORCE_INLINE uint32_t U32_firstbitlow(uint32_t v)
+{
+ if (v == 0)
+ return ~0u;
+
+#if SLANG_GCC_FAMILY && !defined(SLANG_LLVM)
+ // __builtin_ctz returns number of trailing zeros, which is the 0-based index of first set bit
+ return __builtin_ctz(v);
+#elif SLANG_PROCESSOR_X86_64 && SLANG_VC
+ // _BitScanForward returns 1 on success, 0 on failure, and sets index
+ unsigned long index;
+ return _BitScanForward(&index, v) ? index : ~0u;
+#else
+ // Generic implementation - find first set bit
+ uint32_t result = 0;
+ while (result < 32 && !(v & (1u << result)))
+ result++;
+ return result;
+#endif
+}
+
+SLANG_FORCE_INLINE uint32_t U32_firstbithigh(uint32_t v)
+{
+ if ((int32_t)v < 0)
+ v = ~v;
+ if (v == 0)
+ return ~0u;
+#if SLANG_GCC_FAMILY && !defined(SLANG_LLVM)
+ // __builtin_clz returns number of leading zeros
+ // firstbithigh should return 0-based bit position of MSB
+ return 31 - __builtin_clz(v);
+#elif SLANG_PROCESSOR_X86_64 && SLANG_VC
+ // _BitScanReverse returns 1 on success, 0 on failure, and sets index
+ unsigned long index;
+ return _BitScanReverse(&index, v) ? index : ~0u;
+#else
+ // Generic implementation - find highest set bit
+ int result = 31;
+ while (result >= 0 && !(v & (1u << result)))
+ result--;
+ return result;
+#endif
+}
+
// ----------------------------- I32 -----------------------------------------
SLANG_FORCE_INLINE int32_t I32_abs(int32_t f)
@@ -755,6 +799,16 @@ SLANG_FORCE_INLINE uint32_t I32_countbits(int32_t v)
return U32_countbits(uint32_t(v));
}
+SLANG_FORCE_INLINE uint32_t I32_firstbitlow(int32_t v)
+{
+ return U32_firstbitlow(uint32_t(v));
+}
+
+SLANG_FORCE_INLINE uint32_t I32_firstbithigh(int32_t v)
+{
+ return U32_firstbithigh(uint32_t(v));
+}
+
// ----------------------------- U64 -----------------------------------------
SLANG_FORCE_INLINE uint64_t U64_abs(uint64_t f)
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index fd79b77aa..3ebdbe777 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -2081,10 +2081,26 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL double U32_asdouble(uint32_t low, uint32_t hi
SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_countbits(uint32_t v)
{
- // https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html#group__CUDA__MATH__INTRINSIC__INT_1g43c9c7d2b9ebf202ff1ef5769989be46
return __popc(v);
}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_firstbitlow(uint32_t v)
+{
+ // __ffs returns 1-based bit position or 0 if no bits set
+ // firstbitlow should return 0-based bit position or ~0u if no bits set
+ return v == 0 ? ~0u : (__ffs(v) - 1);
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_firstbithigh(uint32_t v)
+{
+ // maps to hlsl firstbithigh
+ if ((int32_t)v < 0)
+ v = ~v;
+ if (v == 0)
+ return ~0u;
+ return 31 - __clz(v);
+}
+
// ----------------------------- I32 -----------------------------------------
// Unary
@@ -2125,6 +2141,16 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t I32_countbits(int32_t v)
return U32_countbits(uint32_t(v));
}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t I32_firstbitlow(int32_t v)
+{
+ return U32_firstbitlow(uint32_t(v));
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t I32_firstbithigh(int32_t v)
+{
+ return U32_firstbithigh(uint32_t(v));
+}
+
// ----------------------------- U64 -----------------------------------------
SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_abs(uint64_t f)
@@ -2143,7 +2169,6 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_max(uint64_t a, uint64_t b)
SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U64_countbits(uint64_t v)
{
- // https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html#group__CUDA__MATH__INTRINSIC__INT_1g43c9c7d2b9ebf202ff1ef5769989be46
return __popcll(v);
}