summaryrefslogtreecommitdiff
path: root/prelude
diff options
context:
space:
mode:
authorEllie Hermaszewska <ellieh@nvidia.com>2024-10-29 14:49:26 +0800
committerGitHub <noreply@github.com>2024-10-29 14:49:26 +0800
commitf65d756bff8d4c5cbc15bd0322a2ae8e6b896a21 (patch)
treeea1d61342cd29368e19135000ec2948813096205 /prelude
parenta729c15e9dce9f5116a38afc66329ab2ca4cea54 (diff)
format
* format * Minor test fixes * enable checking cpp format in ci
Diffstat (limited to 'prelude')
-rw-r--r--prelude/slang-cpp-host-prelude.h57
-rw-r--r--prelude/slang-cpp-prelude.h362
-rw-r--r--prelude/slang-cpp-scalar-intrinsics.h591
-rw-r--r--prelude/slang-cpp-types-core.h569
-rw-r--r--prelude/slang-cpp-types.h1062
-rw-r--r--prelude/slang-cuda-prelude.h2635
-rw-r--r--prelude/slang-hlsl-prelude.h4
-rw-r--r--prelude/slang-llvm.h322
-rw-r--r--prelude/slang-torch-prelude.h93
9 files changed, 3741 insertions, 1954 deletions
diff --git a/prelude/slang-cpp-host-prelude.h b/prelude/slang-cpp-host-prelude.h
index 48056169d..8bc0f5cad 100644
--- a/prelude/slang-cpp-host-prelude.h
+++ b/prelude/slang-cpp-host-prelude.h
@@ -1,8 +1,8 @@
#ifndef SLANG_CPP_HOST_PRELUDE_H
#define SLANG_CPP_HOST_PRELUDE_H
-#include <cstdio>
#include <cmath>
+#include <cstdio>
#include <cstring>
#define SLANG_COM_PTR_ENABLE_REF_OPERATOR 1
@@ -14,42 +14,45 @@
#ifdef SLANG_LLVM
#include "slang-llvm.h"
#else // SLANG_LLVM
-# if SLANG_GCC_FAMILY && __GNUC__ < 6
-# include <cmath>
-# define SLANG_PRELUDE_STD std::
-# else
-# include <math.h>
-# define SLANG_PRELUDE_STD
-# endif
-
-# include <assert.h>
-# include <stdlib.h>
-# include <string.h>
-# include <stdint.h>
+#if SLANG_GCC_FAMILY && __GNUC__ < 6
+#include <cmath>
+#define SLANG_PRELUDE_STD std::
+#else
+#include <math.h>
+#define SLANG_PRELUDE_STD
+#endif
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
#endif // SLANG_LLVM
#if defined(_MSC_VER)
-# define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
+#define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
#else
-# define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
-//# define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport)) __attribute__((__visibility__("default")))
-#endif
-
-#ifdef __cplusplus
-# define SLANG_PRELUDE_EXTERN_C extern "C"
-# define SLANG_PRELUDE_EXTERN_C_START extern "C" {
-# define SLANG_PRELUDE_EXTERN_C_END }
+#define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
+// # define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport))
+// __attribute__((__visibility__("default")))
+#endif
+
+#ifdef __cplusplus
+#define SLANG_PRELUDE_EXTERN_C extern "C"
+#define SLANG_PRELUDE_EXTERN_C_START \
+ extern "C" \
+ {
+#define SLANG_PRELUDE_EXTERN_C_END }
#else
-# define SLANG_PRELUDE_EXTERN_C
-# define SLANG_PRELUDE_EXTERN_C_START
-# define SLANG_PRELUDE_EXTERN_C_END
-#endif
+#define SLANG_PRELUDE_EXTERN_C
+#define SLANG_PRELUDE_EXTERN_C_START
+#define SLANG_PRELUDE_EXTERN_C_END
+#endif
#include "slang-cpp-scalar-intrinsics.h"
using namespace Slang;
template<typename TResult, typename... Args>
-using Slang_FuncType = TResult(SLANG_MCALL *)(Args...);
+using Slang_FuncType = TResult(SLANG_MCALL*)(Args...);
#endif
diff --git a/prelude/slang-cpp-prelude.h b/prelude/slang-cpp-prelude.h
index 2b848dc3b..4dacac9c5 100644
--- a/prelude/slang-cpp-prelude.h
+++ b/prelude/slang-cpp-prelude.h
@@ -2,42 +2,45 @@
#define SLANG_CPP_PRELUDE_H
// Because the signiture of isnan, isfinite, and is isinf changed in C++, we use the macro
-// to use the version in the std namespace.
+// to use the version in the std namespace.
// https://stackoverflow.com/questions/39130040/cmath-hides-isnan-in-math-h-in-c14-c11
-
+
#ifdef SLANG_LLVM
#include "slang-llvm.h"
#else // SLANG_LLVM
-# if SLANG_GCC_FAMILY && __GNUC__ < 6
-# include <cmath>
-# define SLANG_PRELUDE_STD std::
-# else
-# include <math.h>
-# define SLANG_PRELUDE_STD
-# endif
-
-# include <assert.h>
-# include <stdlib.h>
-# include <string.h>
-# include <stdint.h>
+#if SLANG_GCC_FAMILY && __GNUC__ < 6
+#include <cmath>
+#define SLANG_PRELUDE_STD std::
+#else
+#include <math.h>
+#define SLANG_PRELUDE_STD
+#endif
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
#endif // SLANG_LLVM
#if defined(_MSC_VER)
-# define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
+#define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
#else
-# define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
-//# define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport)) __attribute__((__visibility__("default")))
-#endif
-
-#ifdef __cplusplus
-# define SLANG_PRELUDE_EXTERN_C extern "C"
-# define SLANG_PRELUDE_EXTERN_C_START extern "C" {
-# define SLANG_PRELUDE_EXTERN_C_END }
+#define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
+// # define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport))
+// __attribute__((__visibility__("default")))
+#endif
+
+#ifdef __cplusplus
+#define SLANG_PRELUDE_EXTERN_C extern "C"
+#define SLANG_PRELUDE_EXTERN_C_START \
+ extern "C" \
+ {
+#define SLANG_PRELUDE_EXTERN_C_END }
#else
-# define SLANG_PRELUDE_EXTERN_C
-# define SLANG_PRELUDE_EXTERN_C_START
-# define SLANG_PRELUDE_EXTERN_C_END
-#endif
+#define SLANG_PRELUDE_EXTERN_C
+#define SLANG_PRELUDE_EXTERN_C_START
+#define SLANG_PRELUDE_EXTERN_C_END
+#endif
#define SLANG_PRELUDE_EXPORT SLANG_PRELUDE_EXTERN_C SLANG_PRELUDE_SHARED_LIB_EXPORT
#define SLANG_PRELUDE_EXPORT_START SLANG_PRELUDE_EXTERN_C_START SLANG_PRELUDE_SHARED_LIB_EXPORT
@@ -45,65 +48,65 @@
#ifndef INFINITY
// Must overflow for double
-# define INFINITY float(1e+300 * 1e+300)
+#define INFINITY float(1e+300 * 1e+300)
#endif
#ifndef SLANG_INFINITY
-# define SLANG_INFINITY INFINITY
+#define SLANG_INFINITY INFINITY
#endif
// Detect the compiler type
#ifndef SLANG_COMPILER
-# define SLANG_COMPILER
+#define SLANG_COMPILER
/*
Compiler defines, see http://sourceforge.net/p/predef/wiki/Compilers/
NOTE that SLANG_VC holds the compiler version - not just 1 or 0
*/
-# if defined(_MSC_VER)
-# if _MSC_VER >= 1900
-# define SLANG_VC 14
-# elif _MSC_VER >= 1800
-# define SLANG_VC 12
-# elif _MSC_VER >= 1700
-# define SLANG_VC 11
-# elif _MSC_VER >= 1600
-# define SLANG_VC 10
-# elif _MSC_VER >= 1500
-# define SLANG_VC 9
-# else
-# error "unknown version of Visual C++ compiler"
-# endif
-# elif defined(__clang__)
-# define SLANG_CLANG 1
-# elif defined(__SNC__)
-# define SLANG_SNC 1
-# elif defined(__ghs__)
-# define SLANG_GHS 1
-# elif defined(__GNUC__) /* note: __clang__, __SNC__, or __ghs__ imply __GNUC__ */
-# define SLANG_GCC 1
-# else
-# error "unknown compiler"
-# endif
+#if defined(_MSC_VER)
+#if _MSC_VER >= 1900
+#define SLANG_VC 14
+#elif _MSC_VER >= 1800
+#define SLANG_VC 12
+#elif _MSC_VER >= 1700
+#define SLANG_VC 11
+#elif _MSC_VER >= 1600
+#define SLANG_VC 10
+#elif _MSC_VER >= 1500
+#define SLANG_VC 9
+#else
+#error "unknown version of Visual C++ compiler"
+#endif
+#elif defined(__clang__)
+#define SLANG_CLANG 1
+#elif defined(__SNC__)
+#define SLANG_SNC 1
+#elif defined(__ghs__)
+#define SLANG_GHS 1
+#elif defined(__GNUC__) /* note: __clang__, __SNC__, or __ghs__ imply __GNUC__ */
+#define SLANG_GCC 1
+#else
+#error "unknown compiler"
+#endif
/*
Any compilers not detected by the above logic are now now explicitly zeroed out.
*/
-# ifndef SLANG_VC
-# define SLANG_VC 0
-# endif
-# ifndef SLANG_CLANG
-# define SLANG_CLANG 0
-# endif
-# ifndef SLANG_SNC
-# define SLANG_SNC 0
-# endif
-# ifndef SLANG_GHS
-# define SLANG_GHS 0
-# endif
-# ifndef SLANG_GCC
-# define SLANG_GCC 0
-# endif
+#ifndef SLANG_VC
+#define SLANG_VC 0
+#endif
+#ifndef SLANG_CLANG
+#define SLANG_CLANG 0
+#endif
+#ifndef SLANG_SNC
+#define SLANG_SNC 0
+#endif
+#ifndef SLANG_GHS
+#define SLANG_GHS 0
+#endif
+#ifndef SLANG_GCC
+#define SLANG_GCC 0
+#endif
#endif /* SLANG_COMPILER */
/*
@@ -116,89 +119,90 @@ used later in the file.
Most applications should not need to touch this section.
*/
#ifndef SLANG_PLATFORM
-# define SLANG_PLATFORM
+#define SLANG_PLATFORM
/**
Operating system defines, see http://sourceforge.net/p/predef/wiki/OperatingSystems/
*/
-# if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_PARTITION_APP
-# define SLANG_WINRT 1 /* Windows Runtime, either on Windows RT or Windows 8 */
-# elif defined(XBOXONE)
-# define SLANG_XBOXONE 1
-# elif defined(_WIN64) /* note: XBOXONE implies _WIN64 */
-# define SLANG_WIN64 1
-# elif defined(_M_PPC)
-# define SLANG_X360 1
-# elif defined(_WIN32) /* note: _M_PPC implies _WIN32 */
-# define SLANG_WIN32 1
-# elif defined(__ANDROID__)
-# define SLANG_ANDROID 1
-# elif defined(__linux__) || defined(__CYGWIN__) /* note: __ANDROID__ implies __linux__ */
-# define SLANG_LINUX 1
-# elif defined(__APPLE__) && !defined(SLANG_LLVM)
-# include "TargetConditionals.h"
-# if TARGET_OS_MAC
-# define SLANG_OSX 1
-# else
-# define SLANG_IOS 1
-# endif
-# elif defined(__APPLE__)
-// On `slang-llvm` we can't inclue "TargetConditionals.h" in general, so for now assume its OSX.
-# define SLANG_OSX 1
-# elif defined(__CELLOS_LV2__)
-# define SLANG_PS3 1
-# elif defined(__ORBIS__)
-# define SLANG_PS4 1
-# elif defined(__SNC__) && defined(__arm__)
-# define SLANG_PSP2 1
-# elif defined(__ghs__)
-# define SLANG_WIIU 1
-# else
-# error "unknown target platform"
-# endif
+#if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_PARTITION_APP
+#define SLANG_WINRT 1 /* Windows Runtime, either on Windows RT or Windows 8 */
+#elif defined(XBOXONE)
+#define SLANG_XBOXONE 1
+#elif defined(_WIN64) /* note: XBOXONE implies _WIN64 */
+#define SLANG_WIN64 1
+#elif defined(_M_PPC)
+#define SLANG_X360 1
+#elif defined(_WIN32) /* note: _M_PPC implies _WIN32 */
+#define SLANG_WIN32 1
+#elif defined(__ANDROID__)
+#define SLANG_ANDROID 1
+#elif defined(__linux__) || defined(__CYGWIN__) /* note: __ANDROID__ implies __linux__ */
+#define SLANG_LINUX 1
+#elif defined(__APPLE__) && !defined(SLANG_LLVM)
+#include "TargetConditionals.h"
+#if TARGET_OS_MAC
+#define SLANG_OSX 1
+#else
+#define SLANG_IOS 1
+#endif
+#elif defined(__APPLE__)
+// On `slang-llvm` we can't inclue "TargetConditionals.h" in general, so for now assume its
+// OSX.
+#define SLANG_OSX 1
+#elif defined(__CELLOS_LV2__)
+#define SLANG_PS3 1
+#elif defined(__ORBIS__)
+#define SLANG_PS4 1
+#elif defined(__SNC__) && defined(__arm__)
+#define SLANG_PSP2 1
+#elif defined(__ghs__)
+#define SLANG_WIIU 1
+#else
+#error "unknown target platform"
+#endif
/*
Any platforms not detected by the above logic are now now explicitly zeroed out.
*/
-# ifndef SLANG_WINRT
-# define SLANG_WINRT 0
-# endif
-# ifndef SLANG_XBOXONE
-# define SLANG_XBOXONE 0
-# endif
-# ifndef SLANG_WIN64
-# define SLANG_WIN64 0
-# endif
-# ifndef SLANG_X360
-# define SLANG_X360 0
-# endif
-# ifndef SLANG_WIN32
-# define SLANG_WIN32 0
-# endif
-# ifndef SLANG_ANDROID
-# define SLANG_ANDROID 0
-# endif
-# ifndef SLANG_LINUX
-# define SLANG_LINUX 0
-# endif
-# ifndef SLANG_IOS
-# define SLANG_IOS 0
-# endif
-# ifndef SLANG_OSX
-# define SLANG_OSX 0
-# endif
-# ifndef SLANG_PS3
-# define SLANG_PS3 0
-# endif
-# ifndef SLANG_PS4
-# define SLANG_PS4 0
-# endif
-# ifndef SLANG_PSP2
-# define SLANG_PSP2 0
-# endif
-# ifndef SLANG_WIIU
-# define SLANG_WIIU 0
-# endif
+#ifndef SLANG_WINRT
+#define SLANG_WINRT 0
+#endif
+#ifndef SLANG_XBOXONE
+#define SLANG_XBOXONE 0
+#endif
+#ifndef SLANG_WIN64
+#define SLANG_WIN64 0
+#endif
+#ifndef SLANG_X360
+#define SLANG_X360 0
+#endif
+#ifndef SLANG_WIN32
+#define SLANG_WIN32 0
+#endif
+#ifndef SLANG_ANDROID
+#define SLANG_ANDROID 0
+#endif
+#ifndef SLANG_LINUX
+#define SLANG_LINUX 0
+#endif
+#ifndef SLANG_IOS
+#define SLANG_IOS 0
+#endif
+#ifndef SLANG_OSX
+#define SLANG_OSX 0
+#endif
+#ifndef SLANG_PS3
+#define SLANG_PS3 0
+#endif
+#ifndef SLANG_PS4
+#define SLANG_PS4 0
+#endif
+#ifndef SLANG_PSP2
+#define SLANG_PSP2 0
+#endif
+#ifndef SLANG_WIIU
+#define SLANG_WIIU 0
+#endif
#endif /* SLANG_PLATFORM */
/* Shorthands for "families" of compilers/platforms */
@@ -206,37 +210,38 @@ Any platforms not detected by the above logic are now now explicitly zeroed out.
#define SLANG_WINDOWS_FAMILY (SLANG_WINRT || SLANG_WIN32 || SLANG_WIN64)
#define SLANG_MICROSOFT_FAMILY (SLANG_XBOXONE || SLANG_X360 || SLANG_WINDOWS_FAMILY)
#define SLANG_LINUX_FAMILY (SLANG_LINUX || SLANG_ANDROID)
-#define SLANG_APPLE_FAMILY (SLANG_IOS || SLANG_OSX) /* equivalent to #if __APPLE__ */
-#define SLANG_UNIX_FAMILY (SLANG_LINUX_FAMILY || SLANG_APPLE_FAMILY) /* shortcut for unix/posix platforms */
+#define SLANG_APPLE_FAMILY (SLANG_IOS || SLANG_OSX) /* equivalent to #if __APPLE__ */
+#define SLANG_UNIX_FAMILY \
+ (SLANG_LINUX_FAMILY || SLANG_APPLE_FAMILY) /* shortcut for unix/posix platforms */
// GCC Specific
#if SLANG_GCC_FAMILY
-# define SLANG_ALIGN_OF(T) __alignof__(T)
+#define SLANG_ALIGN_OF(T) __alignof__(T)
-# define SLANG_BREAKPOINT(id) __builtin_trap()
+#define SLANG_BREAKPOINT(id) __builtin_trap()
-// Use this macro instead of offsetof, because gcc produces warning if offsetof is used on a
+// Use this macro instead of offsetof, because gcc produces warning if offsetof is used on a
// non POD type, even though it produces the correct result
-# define SLANG_OFFSET_OF(T, ELEMENT) (size_t(&((T*)1)->ELEMENT) - 1)
+#define SLANG_OFFSET_OF(T, ELEMENT) (size_t(&((T*)1)->ELEMENT) - 1)
#endif // SLANG_GCC_FAMILY
// Microsoft VC specific
#if SLANG_VC
-# define SLANG_ALIGN_OF(T) __alignof(T)
+#define SLANG_ALIGN_OF(T) __alignof(T)
-# define SLANG_BREAKPOINT(id) __debugbreak();
+#define SLANG_BREAKPOINT(id) __debugbreak();
#endif // SLANG_VC
// Default impls
#ifndef SLANG_OFFSET_OF
-# define SLANG_OFFSET_OF(X, Y) offsetof(X, Y)
+#define SLANG_OFFSET_OF(X, Y) offsetof(X, Y)
#endif
#ifndef SLANG_BREAKPOINT
// Make it crash with a write to 0!
-# define SLANG_BREAKPOINT(id) (*((int*)0) = int(id));
+#define SLANG_BREAKPOINT(id) (*((int*)0) = int(id));
#endif
// If slang.h has been included we don't need any of these definitions
@@ -244,33 +249,33 @@ Any platforms not detected by the above logic are now now explicitly zeroed out.
/* Macro for declaring if a method is no throw. Should be set before the return parameter. */
#ifndef SLANG_NO_THROW
-# if SLANG_WINDOWS_FAMILY && !defined(SLANG_DISABLE_EXCEPTIONS)
-# define SLANG_NO_THROW __declspec(nothrow)
-# endif
+#if SLANG_WINDOWS_FAMILY && !defined(SLANG_DISABLE_EXCEPTIONS)
+#define SLANG_NO_THROW __declspec(nothrow)
+#endif
#endif
#ifndef SLANG_NO_THROW
-# define SLANG_NO_THROW
+#define SLANG_NO_THROW
#endif
/* The `SLANG_STDCALL` and `SLANG_MCALL` defines are used to set the calling
convention for interface methods.
*/
#ifndef SLANG_STDCALL
-# if SLANG_MICROSOFT_FAMILY
-# define SLANG_STDCALL __stdcall
-# else
-# define SLANG_STDCALL
-# endif
+#if SLANG_MICROSOFT_FAMILY
+#define SLANG_STDCALL __stdcall
+#else
+#define SLANG_STDCALL
+#endif
#endif
#ifndef SLANG_MCALL
-# define SLANG_MCALL SLANG_STDCALL
+#define SLANG_MCALL SLANG_STDCALL
#endif
#ifndef SLANG_FORCE_INLINE
-# define SLANG_FORCE_INLINE inline
+#define SLANG_FORCE_INLINE inline
#endif
-// TODO(JS): Should these be in slang-cpp-types.h?
+// TODO(JS): Should these be in slang-cpp-types.h?
// They are more likely to clash with slang.h
struct SlangUUID
@@ -278,24 +283,25 @@ struct SlangUUID
uint32_t data1;
uint16_t data2;
uint16_t data3;
- uint8_t data4[8];
+ uint8_t data4[8];
};
typedef int32_t SlangResult;
struct ISlangUnknown
{
- virtual SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface(SlangUUID const& uuid, void** outObject) = 0;
+ virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+ queryInterface(SlangUUID const& uuid, void** outObject) = 0;
virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() = 0;
virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() = 0;
};
-#define SLANG_COM_INTERFACE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
- public: \
- SLANG_FORCE_INLINE static const SlangUUID& getTypeGuid() \
- { \
- static const SlangUUID guid = { a, b, c, d0, d1, d2, d3, d4, d5, d6, d7 }; \
- return guid; \
+#define SLANG_COM_INTERFACE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
+public: \
+ SLANG_FORCE_INLINE static const SlangUUID& getTypeGuid() \
+ { \
+ static const SlangUUID guid = {a, b, c, d0, d1, d2, d3, d4, d5, d6, d7}; \
+ return guid; \
}
#endif // SLANG_H
@@ -304,13 +310,13 @@ struct ISlangUnknown
#include "slang-cpp-scalar-intrinsics.h"
#include "slang-cpp-types.h"
-// TODO(JS): Hack! Output C++ code from slang can copy uninitialized variables.
+// TODO(JS): Hack! Output C++ code from slang can copy uninitialized variables.
#if defined(_MSC_VER)
-# pragma warning(disable : 4700)
+#pragma warning(disable : 4700)
#endif
#ifndef SLANG_UNROLL
-# define SLANG_UNROLL
+#define SLANG_UNROLL
#endif
#endif
diff --git a/prelude/slang-cpp-scalar-intrinsics.h b/prelude/slang-cpp-scalar-intrinsics.h
index 1ade8614f..6aa72df4f 100644
--- a/prelude/slang-cpp-scalar-intrinsics.h
+++ b/prelude/slang-cpp-scalar-intrinsics.h
@@ -2,24 +2,26 @@
#define SLANG_PRELUDE_SCALAR_INTRINSICS_H
#if !defined(SLANG_LLVM) && SLANG_PROCESSOR_X86_64 && SLANG_VC
-// If we have visual studio and 64 bit processor, we can assume we have popcnt, and can include x86 intrinsics
-# include <intrin.h>
+// If we have visual studio and 64 bit processor, we can assume we have popcnt, and can include
+// x86 intrinsics
+#include <intrin.h>
#endif
#ifndef SLANG_FORCE_INLINE
-# define SLANG_FORCE_INLINE inline
+#define SLANG_FORCE_INLINE inline
#endif
#ifdef SLANG_PRELUDE_NAMESPACE
-namespace SLANG_PRELUDE_NAMESPACE {
+namespace SLANG_PRELUDE_NAMESPACE
+{
#endif
#ifndef SLANG_PRELUDE_PI
-# define SLANG_PRELUDE_PI 3.14159265358979323846
+#define SLANG_PRELUDE_PI 3.14159265358979323846
#endif
-union Union32
+union Union32
{
uint32_t u;
int32_t i;
@@ -34,10 +36,30 @@ union Union64
};
// 32 bit cast conversions
-SLANG_FORCE_INLINE int32_t _bitCastFloatToInt(float f) { Union32 u; u.f = f; return u.i; }
-SLANG_FORCE_INLINE float _bitCastIntToFloat(int32_t i) { Union32 u; u.i = i; return u.f; }
-SLANG_FORCE_INLINE uint32_t _bitCastFloatToUInt(float f) { Union32 u; u.f = f; return u.u; }
-SLANG_FORCE_INLINE float _bitCastUIntToFloat(uint32_t ui) { Union32 u; u.u = ui; return u.f; }
+SLANG_FORCE_INLINE int32_t _bitCastFloatToInt(float f)
+{
+ Union32 u;
+ u.f = f;
+ return u.i;
+}
+SLANG_FORCE_INLINE float _bitCastIntToFloat(int32_t i)
+{
+ Union32 u;
+ u.i = i;
+ return u.f;
+}
+SLANG_FORCE_INLINE uint32_t _bitCastFloatToUInt(float f)
+{
+ Union32 u;
+ u.f = f;
+ return u.u;
+}
+SLANG_FORCE_INLINE float _bitCastUIntToFloat(uint32_t ui)
+{
+ Union32 u;
+ u.u = ui;
+ return u.f;
+}
// ----------------------------- F16 -----------------------------------------
@@ -61,27 +83,27 @@ SLANG_FORCE_INLINE uint32_t f32tof16(const float value)
if (e == 0xff)
{
// Could be a NAN or INF. Is INF if *input* mantissa is 0.
-
+
// Remove last bit for rounding to make output mantissa.
m >>= 1;
-
+
// We *assume* float16/float32 signaling bit and remaining bits
// semantics are the same. (The signalling bit convention is target specific!).
// Non signal bit's usage within mantissa for a NAN are also target specific.
-
- // If the m is 0, it could be because the result is INF, but it could also be because all the
- // bits that made NAN were dropped as we have less mantissa bits in f16.
-
+
+ // If the m is 0, it could be because the result is INF, but it could also be because all
+ // the bits that made NAN were dropped as we have less mantissa bits in f16.
+
// To fix for this we make non zero if m is 0 and the input mantissa was not.
// This will (typically) produce a signalling NAN.
m += uint32_t(m == 0 && (inBits & 0x007fffffu));
-
+
// Combine for output
return (bits | 0x7c00u | m);
}
if (e > 142)
{
- // INF.
+ // INF.
return bits | 0x7c00u;
}
if (e < 113)
@@ -105,7 +127,7 @@ SLANG_FORCE_INLINE float f16tof32(const uint32_t value)
if (exponent == 0)
{
- // If mantissa is 0 we are done, as output is 0.
+ // If mantissa is 0 we are done, as output is 0.
// If it's not zero we must have a denormal.
if (mantissa)
{
@@ -113,16 +135,17 @@ SLANG_FORCE_INLINE float f16tof32(const uint32_t value)
return _bitCastIntToFloat(sign | ((value & 0x7fff) << 13)) * g_f16tof32Magic;
}
}
- else
+ else
{
- // If the exponent is NAN or INF exponent is 0x1f on input.
+ // If the exponent is NAN or INF exponent is 0x1f on input.
// If that's the case, we just need to set the exponent to 0xff on output
- // and the mantissa can just stay the same. If its 0 it's INF, else it is NAN and we just copy the bits
+ // and the mantissa can just stay the same. If its 0 it's INF, else it is NAN and we just
+ // copy the bits
//
// Else we need to correct the exponent in the normalized case.
exponent = (exponent == 0x1F) ? 0xff : (exponent + (-15 + 127));
}
-
+
return _bitCastUIntToFloat(sign | (exponent << 23) | (mantissa << 13));
}
@@ -135,7 +158,7 @@ SLANG_FORCE_INLINE float F32_calcSafeRadians(float radians);
SLANG_PRELUDE_EXTERN_C_START
-// Unary
+// Unary
float F32_ceil(float f);
float F32_floor(float f);
float F32_round(float f);
@@ -158,12 +181,18 @@ float F32_trunc(float f);
float F32_sqrt(float f);
bool F32_isnan(float f);
-bool F32_isfinite(float f);
+bool F32_isfinite(float f);
bool F32_isinf(float f);
// Binary
-SLANG_FORCE_INLINE float F32_min(float a, float b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE float F32_max(float a, float b) { return a > b ? a : b; }
+SLANG_FORCE_INLINE float F32_min(float a, float b)
+{
+ return a < b ? a : b;
+}
+SLANG_FORCE_INLINE float F32_max(float a, float b)
+{
+ return a > b ? a : b;
+}
float F32_pow(float a, float b);
float F32_fmod(float a, float b);
float F32_remainder(float a, float b);
@@ -174,47 +203,140 @@ float F32_frexp(float x, int* e);
float F32_modf(float x, float* ip);
// Ternary
-SLANG_FORCE_INLINE float F32_fma(float a, float b, float c) { return a * b + c; }
+SLANG_FORCE_INLINE float F32_fma(float a, float b, float c)
+{
+ return a * b + c;
+}
SLANG_PRELUDE_EXTERN_C_END
#else
-// Unary
-SLANG_FORCE_INLINE float F32_ceil(float f) { return ::ceilf(f); }
-SLANG_FORCE_INLINE float F32_floor(float f) { return ::floorf(f); }
-SLANG_FORCE_INLINE float F32_round(float f) { return ::roundf(f); }
-SLANG_FORCE_INLINE float F32_sin(float f) { return ::sinf(f); }
-SLANG_FORCE_INLINE float F32_cos(float f) { return ::cosf(f); }
-SLANG_FORCE_INLINE float F32_tan(float f) { return ::tanf(f); }
-SLANG_FORCE_INLINE float F32_asin(float f) { return ::asinf(f); }
-SLANG_FORCE_INLINE float F32_acos(float f) { return ::acosf(f); }
-SLANG_FORCE_INLINE float F32_atan(float f) { return ::atanf(f); }
-SLANG_FORCE_INLINE float F32_sinh(float f) { return ::sinhf(f); }
-SLANG_FORCE_INLINE float F32_cosh(float f) { return ::coshf(f); }
-SLANG_FORCE_INLINE float F32_tanh(float f) { return ::tanhf(f); }
-SLANG_FORCE_INLINE float F32_log2(float f) { return ::log2f(f); }
-SLANG_FORCE_INLINE float F32_log(float f) { return ::logf(f); }
-SLANG_FORCE_INLINE float F32_log10(float f) { return ::log10f(f); }
-SLANG_FORCE_INLINE float F32_exp2(float f) { return ::exp2f(f); }
-SLANG_FORCE_INLINE float F32_exp(float f) { return ::expf(f); }
-SLANG_FORCE_INLINE float F32_abs(float f) { return ::fabsf(f); }
-SLANG_FORCE_INLINE float F32_trunc(float f) { return ::truncf(f); }
-SLANG_FORCE_INLINE float F32_sqrt(float f) { return ::sqrtf(f); }
-
-SLANG_FORCE_INLINE bool F32_isnan(float f) { return SLANG_PRELUDE_STD isnan(f); }
-SLANG_FORCE_INLINE bool F32_isfinite(float f) { return SLANG_PRELUDE_STD isfinite(f); }
-SLANG_FORCE_INLINE bool F32_isinf(float f) { return SLANG_PRELUDE_STD isinf(f); }
+// Unary
+SLANG_FORCE_INLINE float F32_ceil(float f)
+{
+ return ::ceilf(f);
+}
+SLANG_FORCE_INLINE float F32_floor(float f)
+{
+ return ::floorf(f);
+}
+SLANG_FORCE_INLINE float F32_round(float f)
+{
+ return ::roundf(f);
+}
+SLANG_FORCE_INLINE float F32_sin(float f)
+{
+ return ::sinf(f);
+}
+SLANG_FORCE_INLINE float F32_cos(float f)
+{
+ return ::cosf(f);
+}
+SLANG_FORCE_INLINE float F32_tan(float f)
+{
+ return ::tanf(f);
+}
+SLANG_FORCE_INLINE float F32_asin(float f)
+{
+ return ::asinf(f);
+}
+SLANG_FORCE_INLINE float F32_acos(float f)
+{
+ return ::acosf(f);
+}
+SLANG_FORCE_INLINE float F32_atan(float f)
+{
+ return ::atanf(f);
+}
+SLANG_FORCE_INLINE float F32_sinh(float f)
+{
+ return ::sinhf(f);
+}
+SLANG_FORCE_INLINE float F32_cosh(float f)
+{
+ return ::coshf(f);
+}
+SLANG_FORCE_INLINE float F32_tanh(float f)
+{
+ return ::tanhf(f);
+}
+SLANG_FORCE_INLINE float F32_log2(float f)
+{
+ return ::log2f(f);
+}
+SLANG_FORCE_INLINE float F32_log(float f)
+{
+ return ::logf(f);
+}
+SLANG_FORCE_INLINE float F32_log10(float f)
+{
+ return ::log10f(f);
+}
+SLANG_FORCE_INLINE float F32_exp2(float f)
+{
+ return ::exp2f(f);
+}
+SLANG_FORCE_INLINE float F32_exp(float f)
+{
+ return ::expf(f);
+}
+SLANG_FORCE_INLINE float F32_abs(float f)
+{
+ return ::fabsf(f);
+}
+SLANG_FORCE_INLINE float F32_trunc(float f)
+{
+ return ::truncf(f);
+}
+SLANG_FORCE_INLINE float F32_sqrt(float f)
+{
+ return ::sqrtf(f);
+}
+
+SLANG_FORCE_INLINE bool F32_isnan(float f)
+{
+ return SLANG_PRELUDE_STD isnan(f);
+}
+SLANG_FORCE_INLINE bool F32_isfinite(float f)
+{
+ return SLANG_PRELUDE_STD isfinite(f);
+}
+SLANG_FORCE_INLINE bool F32_isinf(float f)
+{
+ return SLANG_PRELUDE_STD isinf(f);
+}
// Binary
-SLANG_FORCE_INLINE float F32_min(float a, float b) { return ::fminf(a, b); }
-SLANG_FORCE_INLINE float F32_max(float a, float b) { return ::fmaxf(a, b); }
-SLANG_FORCE_INLINE float F32_pow(float a, float b) { return ::powf(a, b); }
-SLANG_FORCE_INLINE float F32_fmod(float a, float b) { return ::fmodf(a, b); }
-SLANG_FORCE_INLINE float F32_remainder(float a, float b) { return ::remainderf(a, b); }
-SLANG_FORCE_INLINE float F32_atan2(float a, float b) { return float(::atan2(a, b)); }
+SLANG_FORCE_INLINE float F32_min(float a, float b)
+{
+ return ::fminf(a, b);
+}
+SLANG_FORCE_INLINE float F32_max(float a, float b)
+{
+ return ::fmaxf(a, b);
+}
+SLANG_FORCE_INLINE float F32_pow(float a, float b)
+{
+ return ::powf(a, b);
+}
+SLANG_FORCE_INLINE float F32_fmod(float a, float b)
+{
+ return ::fmodf(a, b);
+}
+SLANG_FORCE_INLINE float F32_remainder(float a, float b)
+{
+ return ::remainderf(a, b);
+}
+SLANG_FORCE_INLINE float F32_atan2(float a, float b)
+{
+ return float(::atan2(a, b));
+}
-SLANG_FORCE_INLINE float F32_frexp(float x, int* e) { return ::frexpf(x, e); }
+SLANG_FORCE_INLINE float F32_frexp(float x, int* e)
+{
+ return ::frexpf(x, e);
+}
SLANG_FORCE_INLINE float F32_modf(float x, float* ip)
{
@@ -222,26 +344,48 @@ SLANG_FORCE_INLINE float F32_modf(float x, float* ip)
}
// Ternary
-SLANG_FORCE_INLINE float F32_fma(float a, float b, float c) { return ::fmaf(a, b, c); }
+SLANG_FORCE_INLINE float F32_fma(float a, float b, float c)
+{
+ return ::fmaf(a, b, c);
+}
#endif
SLANG_FORCE_INLINE float F32_calcSafeRadians(float radians)
{
- // Put 0 to 2pi cycles to cycle around 0 to 1
- float a = radians * (1.0f / float(SLANG_PRELUDE_PI * 2));
+ // Put 0 to 2pi cycles to cycle around 0 to 1
+ float a = radians * (1.0f / float(SLANG_PRELUDE_PI * 2));
// Get truncated fraction, as value in 0 - 1 range
a = a - F32_floor(a);
// Convert back to 0 - 2pi range
- return (a * float(SLANG_PRELUDE_PI * 2));
+ return (a * float(SLANG_PRELUDE_PI * 2));
}
-SLANG_FORCE_INLINE float F32_rsqrt(float f) { return 1.0f / F32_sqrt(f); }
-SLANG_FORCE_INLINE float F32_sign(float f) { return ( f == 0.0f) ? f : (( f < 0.0f) ? -1.0f : 1.0f); }
-SLANG_FORCE_INLINE float F32_frac(float f) { return f - F32_floor(f); }
+SLANG_FORCE_INLINE float F32_rsqrt(float f)
+{
+ return 1.0f / F32_sqrt(f);
+}
+SLANG_FORCE_INLINE float F32_sign(float f)
+{
+ return (f == 0.0f) ? f : ((f < 0.0f) ? -1.0f : 1.0f);
+}
+SLANG_FORCE_INLINE float F32_frac(float f)
+{
+ return f - F32_floor(f);
+}
-SLANG_FORCE_INLINE uint32_t F32_asuint(float f) { Union32 u; u.f = f; return u.u; }
-SLANG_FORCE_INLINE int32_t F32_asint(float f) { Union32 u; u.f = f; return u.i; }
+SLANG_FORCE_INLINE uint32_t F32_asuint(float f)
+{
+ Union32 u;
+ u.f = f;
+ return u.u;
+}
+SLANG_FORCE_INLINE int32_t F32_asint(float f)
+{
+ Union32 u;
+ u.f = f;
+ return u.i;
+}
// ----------------------------- F64 -----------------------------------------
@@ -251,7 +395,7 @@ SLANG_FORCE_INLINE double F64_calcSafeRadians(double radians);
SLANG_PRELUDE_EXTERN_C_START
-// Unary
+// Unary
double F64_ceil(double f);
double F64_floor(double f);
double F64_round(double f);
@@ -278,8 +422,14 @@ bool F64_isfinite(double f);
bool F64_isinf(double f);
// Binary
-SLANG_FORCE_INLINE double F64_min(double a, double b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE double F64_max(double a, double b) { return a > b ? a : b; }
+SLANG_FORCE_INLINE double F64_min(double a, double b)
+{
+ return a < b ? a : b;
+}
+SLANG_FORCE_INLINE double F64_max(double a, double b)
+{
+ return a > b ? a : b;
+}
double F64_pow(double a, double b);
double F64_fmod(double a, double b);
double F64_remainder(double a, double b);
@@ -290,48 +440,141 @@ double F64_frexp(double x, int* e);
double F64_modf(double x, double* ip);
// Ternary
-SLANG_FORCE_INLINE double F64_fma(double a, double b, double c) { return a * b + c; }
+SLANG_FORCE_INLINE double F64_fma(double a, double b, double c)
+{
+ return a * b + c;
+}
SLANG_PRELUDE_EXTERN_C_END
#else // SLANG_LLVM
-// Unary
-SLANG_FORCE_INLINE double F64_ceil(double f) { return ::ceil(f); }
-SLANG_FORCE_INLINE double F64_floor(double f) { return ::floor(f); }
-SLANG_FORCE_INLINE double F64_round(double f) { return ::round(f); }
-SLANG_FORCE_INLINE double F64_sin(double f) { return ::sin(f); }
-SLANG_FORCE_INLINE double F64_cos(double f) { return ::cos(f); }
-SLANG_FORCE_INLINE double F64_tan(double f) { return ::tan(f); }
-SLANG_FORCE_INLINE double F64_asin(double f) { return ::asin(f); }
-SLANG_FORCE_INLINE double F64_acos(double f) { return ::acos(f); }
-SLANG_FORCE_INLINE double F64_atan(double f) { return ::atan(f); }
-SLANG_FORCE_INLINE double F64_sinh(double f) { return ::sinh(f); }
-SLANG_FORCE_INLINE double F64_cosh(double f) { return ::cosh(f); }
-SLANG_FORCE_INLINE double F64_tanh(double f) { return ::tanh(f); }
-SLANG_FORCE_INLINE double F64_log2(double f) { return ::log2(f); }
-SLANG_FORCE_INLINE double F64_log(double f) { return ::log(f); }
-SLANG_FORCE_INLINE double F64_log10(float f) { return ::log10(f); }
-SLANG_FORCE_INLINE double F64_exp2(double f) { return ::exp2(f); }
-SLANG_FORCE_INLINE double F64_exp(double f) { return ::exp(f); }
-SLANG_FORCE_INLINE double F64_abs(double f) { return ::fabs(f); }
-SLANG_FORCE_INLINE double F64_trunc(double f) { return ::trunc(f); }
-SLANG_FORCE_INLINE double F64_sqrt(double f) { return ::sqrt(f); }
-
-
-SLANG_FORCE_INLINE bool F64_isnan(double f) { return SLANG_PRELUDE_STD isnan(f); }
-SLANG_FORCE_INLINE bool F64_isfinite(double f) { return SLANG_PRELUDE_STD isfinite(f); }
-SLANG_FORCE_INLINE bool F64_isinf(double f) { return SLANG_PRELUDE_STD isinf(f); }
+// Unary
+SLANG_FORCE_INLINE double F64_ceil(double f)
+{
+ return ::ceil(f);
+}
+SLANG_FORCE_INLINE double F64_floor(double f)
+{
+ return ::floor(f);
+}
+SLANG_FORCE_INLINE double F64_round(double f)
+{
+ return ::round(f);
+}
+SLANG_FORCE_INLINE double F64_sin(double f)
+{
+ return ::sin(f);
+}
+SLANG_FORCE_INLINE double F64_cos(double f)
+{
+ return ::cos(f);
+}
+SLANG_FORCE_INLINE double F64_tan(double f)
+{
+ return ::tan(f);
+}
+SLANG_FORCE_INLINE double F64_asin(double f)
+{
+ return ::asin(f);
+}
+SLANG_FORCE_INLINE double F64_acos(double f)
+{
+ return ::acos(f);
+}
+SLANG_FORCE_INLINE double F64_atan(double f)
+{
+ return ::atan(f);
+}
+SLANG_FORCE_INLINE double F64_sinh(double f)
+{
+ return ::sinh(f);
+}
+SLANG_FORCE_INLINE double F64_cosh(double f)
+{
+ return ::cosh(f);
+}
+SLANG_FORCE_INLINE double F64_tanh(double f)
+{
+ return ::tanh(f);
+}
+SLANG_FORCE_INLINE double F64_log2(double f)
+{
+ return ::log2(f);
+}
+SLANG_FORCE_INLINE double F64_log(double f)
+{
+ return ::log(f);
+}
+SLANG_FORCE_INLINE double F64_log10(float f)
+{
+ return ::log10(f);
+}
+SLANG_FORCE_INLINE double F64_exp2(double f)
+{
+ return ::exp2(f);
+}
+SLANG_FORCE_INLINE double F64_exp(double f)
+{
+ return ::exp(f);
+}
+SLANG_FORCE_INLINE double F64_abs(double f)
+{
+ return ::fabs(f);
+}
+SLANG_FORCE_INLINE double F64_trunc(double f)
+{
+ return ::trunc(f);
+}
+SLANG_FORCE_INLINE double F64_sqrt(double f)
+{
+ return ::sqrt(f);
+}
+
+
+SLANG_FORCE_INLINE bool F64_isnan(double f)
+{
+ return SLANG_PRELUDE_STD isnan(f);
+}
+SLANG_FORCE_INLINE bool F64_isfinite(double f)
+{
+ return SLANG_PRELUDE_STD isfinite(f);
+}
+SLANG_FORCE_INLINE bool F64_isinf(double f)
+{
+ return SLANG_PRELUDE_STD isinf(f);
+}
// Binary
-SLANG_FORCE_INLINE double F64_min(double a, double b) { return ::fmin(a, b); }
-SLANG_FORCE_INLINE double F64_max(double a, double b) { return ::fmax(a, b); }
-SLANG_FORCE_INLINE double F64_pow(double a, double b) { return ::pow(a, b); }
-SLANG_FORCE_INLINE double F64_fmod(double a, double b) { return ::fmod(a, b); }
-SLANG_FORCE_INLINE double F64_remainder(double a, double b) { return ::remainder(a, b); }
-SLANG_FORCE_INLINE double F64_atan2(double a, double b) { return ::atan2(a, b); }
+SLANG_FORCE_INLINE double F64_min(double a, double b)
+{
+ return ::fmin(a, b);
+}
+SLANG_FORCE_INLINE double F64_max(double a, double b)
+{
+ return ::fmax(a, b);
+}
+SLANG_FORCE_INLINE double F64_pow(double a, double b)
+{
+ return ::pow(a, b);
+}
+SLANG_FORCE_INLINE double F64_fmod(double a, double b)
+{
+ return ::fmod(a, b);
+}
+SLANG_FORCE_INLINE double F64_remainder(double a, double b)
+{
+ return ::remainder(a, b);
+}
+SLANG_FORCE_INLINE double F64_atan2(double a, double b)
+{
+ return ::atan2(a, b);
+}
-SLANG_FORCE_INLINE double F64_frexp(double x, int* e) { return ::frexp(x, e); }
+SLANG_FORCE_INLINE double F64_frexp(double x, int* e)
+{
+ return ::frexp(x, e);
+}
SLANG_FORCE_INLINE double F64_modf(double x, double* ip)
{
@@ -339,13 +582,25 @@ SLANG_FORCE_INLINE double F64_modf(double x, double* ip)
}
// Ternary
-SLANG_FORCE_INLINE double F64_fma(double a, double b, double c) { return ::fma(a, b, c); }
+SLANG_FORCE_INLINE double F64_fma(double a, double b, double c)
+{
+ return ::fma(a, b, c);
+}
#endif // SLANG_LLVM
-SLANG_FORCE_INLINE double F64_rsqrt(double f) { return 1.0 / F64_sqrt(f); }
-SLANG_FORCE_INLINE double F64_sign(double f) { return (f == 0.0) ? f : ((f < 0.0) ? -1.0 : 1.0); }
-SLANG_FORCE_INLINE double F64_frac(double f) { return f - F64_floor(f); }
+SLANG_FORCE_INLINE double F64_rsqrt(double f)
+{
+ return 1.0 / F64_sqrt(f);
+}
+SLANG_FORCE_INLINE double F64_sign(double f)
+{
+ return (f == 0.0) ? f : ((f < 0.0) ? -1.0 : 1.0);
+}
+SLANG_FORCE_INLINE double F64_frac(double f)
+{
+ return f - F64_floor(f);
+}
SLANG_FORCE_INLINE void F64_asuint(double d, uint32_t* low, uint32_t* hi)
{
@@ -365,24 +620,41 @@ SLANG_FORCE_INLINE void F64_asint(double d, int32_t* low, int32_t* hi)
SLANG_FORCE_INLINE double F64_calcSafeRadians(double radians)
{
- // Put 0 to 2pi cycles to cycle around 0 to 1
- double a = radians * (1.0f / (SLANG_PRELUDE_PI * 2));
+ // Put 0 to 2pi cycles to cycle around 0 to 1
+ double a = radians * (1.0f / (SLANG_PRELUDE_PI * 2));
// Get truncated fraction, as value in 0 - 1 range
a = a - F64_floor(a);
// Convert back to 0 - 2pi range
- return (a * (SLANG_PRELUDE_PI * 2));
+ return (a * (SLANG_PRELUDE_PI * 2));
}
// ----------------------------- I32 -----------------------------------------
-SLANG_FORCE_INLINE int32_t I32_abs(int32_t f) { return (f < 0) ? -f : f; }
+SLANG_FORCE_INLINE int32_t I32_abs(int32_t f)
+{
+ return (f < 0) ? -f : f;
+}
-SLANG_FORCE_INLINE int32_t I32_min(int32_t a, int32_t b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE int32_t I32_max(int32_t a, int32_t b) { return a > b ? a : b; }
+SLANG_FORCE_INLINE int32_t I32_min(int32_t a, int32_t b)
+{
+ return a < b ? a : b;
+}
+SLANG_FORCE_INLINE int32_t I32_max(int32_t a, int32_t b)
+{
+ return a > b ? a : b;
+}
-SLANG_FORCE_INLINE float I32_asfloat(int32_t x) { Union32 u; u.i = x; return u.f; }
-SLANG_FORCE_INLINE uint32_t I32_asuint(int32_t x) { return uint32_t(x); }
-SLANG_FORCE_INLINE double I32_asdouble(int32_t low, int32_t hi )
+SLANG_FORCE_INLINE float I32_asfloat(int32_t x)
+{
+ Union32 u;
+ u.i = x;
+ return u.f;
+}
+SLANG_FORCE_INLINE uint32_t I32_asuint(int32_t x)
+{
+ return uint32_t(x);
+}
+SLANG_FORCE_INLINE double I32_asdouble(int32_t low, int32_t hi)
{
Union64 u;
u.u = (uint64_t(hi) << 32) | uint32_t(low);
@@ -391,13 +663,30 @@ SLANG_FORCE_INLINE double I32_asdouble(int32_t low, int32_t hi )
// ----------------------------- U32 -----------------------------------------
-SLANG_FORCE_INLINE uint32_t U32_abs(uint32_t f) { return f; }
+SLANG_FORCE_INLINE uint32_t U32_abs(uint32_t f)
+{
+ return f;
+}
-SLANG_FORCE_INLINE uint32_t U32_min(uint32_t a, uint32_t b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE uint32_t U32_max(uint32_t a, uint32_t b) { return a > b ? a : b; }
+SLANG_FORCE_INLINE uint32_t U32_min(uint32_t a, uint32_t b)
+{
+ return a < b ? a : b;
+}
+SLANG_FORCE_INLINE uint32_t U32_max(uint32_t a, uint32_t b)
+{
+ return a > b ? a : b;
+}
-SLANG_FORCE_INLINE float U32_asfloat(uint32_t x) { Union32 u; u.u = x; return u.f; }
-SLANG_FORCE_INLINE uint32_t U32_asint(int32_t x) { return uint32_t(x); }
+SLANG_FORCE_INLINE float U32_asfloat(uint32_t x)
+{
+ Union32 u;
+ u.u = x;
+ return u.f;
+}
+SLANG_FORCE_INLINE uint32_t U32_asint(int32_t x)
+{
+ return uint32_t(x);
+}
SLANG_FORCE_INLINE double U32_asdouble(uint32_t low, uint32_t hi)
{
@@ -413,7 +702,7 @@ SLANG_FORCE_INLINE uint32_t U32_countbits(uint32_t v)
return __builtin_popcount(v);
#elif SLANG_PROCESSOR_X86_64 && SLANG_VC
return __popcnt(v);
-#else
+#else
uint32_t c = 0;
while (v)
{
@@ -426,21 +715,30 @@ SLANG_FORCE_INLINE uint32_t U32_countbits(uint32_t v)
// ----------------------------- U64 -----------------------------------------
-SLANG_FORCE_INLINE uint64_t U64_abs(uint64_t f) { return f; }
+SLANG_FORCE_INLINE uint64_t U64_abs(uint64_t f)
+{
+ return f;
+}
-SLANG_FORCE_INLINE uint64_t U64_min(uint64_t a, uint64_t b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE uint64_t U64_max(uint64_t a, uint64_t b) { return a > b ? a : b; }
+SLANG_FORCE_INLINE uint64_t U64_min(uint64_t a, uint64_t b)
+{
+ return a < b ? a : b;
+}
+SLANG_FORCE_INLINE uint64_t U64_max(uint64_t a, uint64_t b)
+{
+ return a > b ? a : b;
+}
// TODO(JS): We don't define countbits for 64bit in the core module currently.
-// It's not clear from documentation if it should return 32 or 64 bits, if it exists.
-// 32 bits can always hold the result, and will be implicitly promoted.
+// It's not clear from documentation if it should return 32 or 64 bits, if it exists.
+// 32 bits can always hold the result, and will be implicitly promoted.
SLANG_FORCE_INLINE uint32_t U64_countbits(uint64_t v)
{
-#if SLANG_GCC_FAMILY && !defined(SLANG_LLVM)
+#if SLANG_GCC_FAMILY && !defined(SLANG_LLVM)
return uint32_t(__builtin_popcountl(v));
#elif SLANG_PROCESSOR_X86_64 && SLANG_VC
return uint32_t(__popcnt64(v));
-#else
+#else
uint32_t c = 0;
while (v)
{
@@ -453,10 +751,19 @@ SLANG_FORCE_INLINE uint32_t U64_countbits(uint64_t v)
// ----------------------------- I64 -----------------------------------------
-SLANG_FORCE_INLINE int64_t I64_abs(int64_t f) { return (f < 0) ? -f : f; }
+SLANG_FORCE_INLINE int64_t I64_abs(int64_t f)
+{
+ return (f < 0) ? -f : f;
+}
-SLANG_FORCE_INLINE int64_t I64_min(int64_t a, int64_t b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE int64_t I64_max(int64_t a, int64_t b) { return a > b ? a : b; }
+SLANG_FORCE_INLINE int64_t I64_min(int64_t a, int64_t b)
+{
+ return a < b ? a : b;
+}
+SLANG_FORCE_INLINE int64_t I64_max(int64_t a, int64_t b)
+{
+ return a > b ? a : b;
+}
// ----------------------------- Interlocked ---------------------------------
@@ -465,17 +772,17 @@ SLANG_FORCE_INLINE int64_t I64_max(int64_t a, int64_t b) { return a > b ? a : b;
#else // SLANG_LLVM
-# ifdef _WIN32
-# include <intrin.h>
-# endif
+#ifdef _WIN32
+#include <intrin.h>
+#endif
SLANG_FORCE_INLINE void InterlockedAdd(uint32_t* dest, uint32_t value, uint32_t* oldValue)
{
-# ifdef _WIN32
+#ifdef _WIN32
*oldValue = _InterlockedExchangeAdd((long*)dest, (long)value);
-# else
+#else
*oldValue = __sync_fetch_and_add(dest, value);
-# endif
+#endif
}
#endif // SLANG_LLVM
@@ -492,7 +799,7 @@ SLANG_FORCE_INLINE double _slang_fmod(double x, double y)
}
#ifdef SLANG_PRELUDE_NAMESPACE
-}
+}
#endif
#endif
diff --git a/prelude/slang-cpp-types-core.h b/prelude/slang-cpp-types-core.h
index 25fe47202..6c0bb7544 100644
--- a/prelude/slang-cpp-types-core.h
+++ b/prelude/slang-cpp-types-core.h
@@ -2,11 +2,11 @@
#define SLANG_PRELUDE_CPP_TYPES_CORE_H
#ifndef SLANG_PRELUDE_ASSERT
-# ifdef SLANG_PRELUDE_ENABLE_ASSERT
-# define SLANG_PRELUDE_ASSERT(VALUE) assert(VALUE)
-# else
-# define SLANG_PRELUDE_ASSERT(VALUE)
-# endif
+#ifdef SLANG_PRELUDE_ENABLE_ASSERT
+#define SLANG_PRELUDE_ASSERT(VALUE) assert(VALUE)
+#else
+#define SLANG_PRELUDE_ASSERT(VALUE)
+#endif
#endif
// Since we are using unsigned arithmatic care is need in this comparison.
@@ -15,35 +15,42 @@
// Asserts for bounds checking.
// It is assumed index/count are unsigned types.
-#define SLANG_BOUND_ASSERT(index, count) SLANG_PRELUDE_ASSERT(index < count);
-#define SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_PRELUDE_ASSERT(index <= (sizeInBytes - elemSize) && (index & 3) == 0);
+#define SLANG_BOUND_ASSERT(index, count) SLANG_PRELUDE_ASSERT(index < count);
+#define SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+ SLANG_PRELUDE_ASSERT(index <= (sizeInBytes - elemSize) && (index & 3) == 0);
// Macros to zero index if an access is out of range
-#define SLANG_BOUND_ZERO_INDEX(index, count) index = (index < count) ? index : 0;
-#define SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes) index = (index <= (sizeInBytes - elemSize)) ? index : 0;
-
-// The 'FIX' macro define how the index is fixed. The default is to do nothing. If SLANG_ENABLE_BOUND_ZERO_INDEX
-// the fix macro will zero the index, if out of range
-#ifdef SLANG_ENABLE_BOUND_ZERO_INDEX
-# define SLANG_BOUND_FIX(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
-# define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
-# define SLANG_BOUND_FIX_FIXED_ARRAY(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
+#define SLANG_BOUND_ZERO_INDEX(index, count) index = (index < count) ? index : 0;
+#define SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+ index = (index <= (sizeInBytes - elemSize)) ? index : 0;
+
+// The 'FIX' macro define how the index is fixed. The default is to do nothing. If
+// SLANG_ENABLE_BOUND_ZERO_INDEX the fix macro will zero the index, if out of range
+#ifdef SLANG_ENABLE_BOUND_ZERO_INDEX
+#define SLANG_BOUND_FIX(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
+#define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+ SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
+#define SLANG_BOUND_FIX_FIXED_ARRAY(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
#else
-# define SLANG_BOUND_FIX(index, count)
-# define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
-# define SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
+#define SLANG_BOUND_FIX(index, count)
+#define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
+#define SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
#endif
#ifndef SLANG_BOUND_CHECK
-# define SLANG_BOUND_CHECK(index, count) SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX(index, count)
+#define SLANG_BOUND_CHECK(index, count) \
+ SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX(index, count)
#endif
#ifndef SLANG_BOUND_CHECK_BYTE_ADDRESS
-# define SLANG_BOUND_CHECK_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
+#define SLANG_BOUND_CHECK_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+ SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+ SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
#endif
#ifndef SLANG_BOUND_CHECK_FIXED_ARRAY
-# define SLANG_BOUND_CHECK_FIXED_ARRAY(index, count) SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
+#define SLANG_BOUND_CHECK_FIXED_ARRAY(index, count) \
+ SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
#endif
struct TypeInfo
@@ -51,34 +58,51 @@ struct TypeInfo
size_t typeSize;
};
-template <typename T, size_t SIZE>
+template<typename T, size_t SIZE>
struct FixedArray
{
- const T& operator[](size_t index) const { SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE); return m_data[index]; }
- T& operator[](size_t index) { SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE); return m_data[index]; }
+ const T& operator[](size_t index) const
+ {
+ SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE);
+ return m_data[index];
+ }
+ T& operator[](size_t index)
+ {
+ SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE);
+ return m_data[index];
+ }
T m_data[SIZE];
};
-// An array that has no specified size, becomes a 'Array'. This stores the size so it can potentially
-// do bounds checking.
-template <typename T>
+// An array that has no specified size, becomes a 'Array'. This stores the size so it can
+// potentially do bounds checking.
+template<typename T>
struct Array
{
- const T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
- T& operator[](size_t index) { SLANG_BOUND_CHECK(index, count); return data[index]; }
+ const T& operator[](size_t index) const
+ {
+ SLANG_BOUND_CHECK(index, count);
+ return data[index];
+ }
+ T& operator[](size_t index)
+ {
+ SLANG_BOUND_CHECK(index, count);
+ return data[index];
+ }
T* data;
size_t count;
};
-/* Constant buffers become a pointer to the contained type, so ConstantBuffer<T> becomes T* in C++ code.
-*/
+/* Constant buffers become a pointer to the contained type, so ConstantBuffer<T> becomes T* in C++
+ * code.
+ */
-template <typename T, int COUNT>
+template<typename T, int COUNT>
struct Vector;
-template <typename T>
+template<typename T>
struct Vector<T, 1>
{
T x;
@@ -86,58 +110,54 @@ struct Vector<T, 1>
T& operator[](size_t /*index*/) { return x; }
operator T() const { return x; }
Vector() = default;
- Vector(T scalar)
- {
- x = scalar;
- }
- template <typename U>
+ Vector(T scalar) { x = scalar; }
+ template<typename U>
Vector(Vector<U, 1> other)
{
x = (T)other.x;
}
- template <typename U, int otherSize>
+ template<typename U, int otherSize>
Vector(Vector<U, otherSize> other)
{
int minSize = 1;
- if (otherSize < minSize) minSize = otherSize;
+ if (otherSize < minSize)
+ minSize = otherSize;
for (int i = 0; i < minSize; i++)
(*this)[i] = (T)other[i];
}
};
-template <typename T>
+template<typename T>
struct Vector<T, 2>
{
T x, y;
const T& operator[](size_t index) const { return index == 0 ? x : y; }
T& operator[](size_t index) { return index == 0 ? x : y; }
Vector() = default;
- Vector(T scalar)
- {
- x = y = scalar;
- }
+ Vector(T scalar) { x = y = scalar; }
Vector(T _x, T _y)
{
x = _x;
y = _y;
}
- template <typename U>
+ template<typename U>
Vector(Vector<U, 2> other)
{
x = (T)other.x;
y = (T)other.y;
}
- template <typename U, int otherSize>
+ template<typename U, int otherSize>
Vector(Vector<U, otherSize> other)
{
int minSize = 2;
- if (otherSize < minSize) minSize = otherSize;
+ if (otherSize < minSize)
+ minSize = otherSize;
for (int i = 0; i < minSize; i++)
(*this)[i] = (T)other[i];
}
};
-template <typename T>
+template<typename T>
struct Vector<T, 3>
{
T x, y, z;
@@ -145,34 +165,32 @@ struct Vector<T, 3>
T& operator[](size_t index) { return *((T*)(this) + index); }
Vector() = default;
- Vector(T scalar)
- {
- x = y = z = scalar;
- }
+ Vector(T scalar) { x = y = z = scalar; }
Vector(T _x, T _y, T _z)
{
x = _x;
y = _y;
z = _z;
}
- template <typename U>
+ template<typename U>
Vector(Vector<U, 3> other)
{
x = (T)other.x;
y = (T)other.y;
z = (T)other.z;
}
- template <typename U, int otherSize>
+ template<typename U, int otherSize>
Vector(Vector<U, otherSize> other)
{
int minSize = 3;
- if (otherSize < minSize) minSize = otherSize;
+ if (otherSize < minSize)
+ minSize = otherSize;
for (int i = 0; i < minSize; i++)
(*this)[i] = (T)other[i];
}
};
-template <typename T>
+template<typename T>
struct Vector<T, 4>
{
T x, y, z, w;
@@ -180,10 +198,7 @@ struct Vector<T, 4>
const T& operator[](size_t index) const { return *((T*)(this) + index); }
T& operator[](size_t index) { return *((T*)(this) + index); }
Vector() = default;
- Vector(T scalar)
- {
- x = y = z = w = scalar;
- }
+ Vector(T scalar) { x = y = z = w = scalar; }
Vector(T _x, T _y, T _z, T _w)
{
x = _x;
@@ -191,19 +206,22 @@ struct Vector<T, 4>
z = _z;
w = _w;
}
- template <typename U, int otherSize>
+ template<typename U, int otherSize>
Vector(Vector<U, otherSize> other)
{
int minSize = 4;
- if (otherSize < minSize) minSize = otherSize;
+ if (otherSize < minSize)
+ minSize = otherSize;
for (int i = 0; i < minSize; i++)
(*this)[i] = (T)other[i];
}
-
};
template<typename T, int N>
-SLANG_FORCE_INLINE Vector<T, N> _slang_select(Vector<bool, N> condition, Vector<T, N> v0, Vector<T, N> v1)
+SLANG_FORCE_INLINE Vector<T, N> _slang_select(
+ Vector<bool, N> condition,
+ Vector<T, N> v0,
+ Vector<T, N> v1)
{
Vector<T, N> result;
for (int i = 0; i < N; i++)
@@ -228,7 +246,7 @@ SLANG_FORCE_INLINE T _slang_vector_get_element(Vector<T, N> x, int index)
template<typename T, int N>
SLANG_FORCE_INLINE const T* _slang_vector_get_element_ptr(const Vector<T, N>* x, int index)
{
- return &((*const_cast<Vector<T,N>*>(x))[index]);
+ return &((*const_cast<Vector<T, N>*>(x))[index]);
}
template<typename T, int N>
@@ -253,66 +271,70 @@ SLANG_FORCE_INLINE Vector<T, n> _slang_vector_reshape(const Vector<OtherT, m> ot
typedef uint32_t uint;
-#define SLANG_VECTOR_BINARY_OP(T, op) \
- template<int n> \
- SLANG_FORCE_INLINE Vector<T, n> operator op(const Vector<T, n>& thisVal, const Vector<T, n>& other) \
- { \
- Vector<T, n> result;\
- for (int i = 0; i < n; i++) \
- result[i] = thisVal[i] op other[i]; \
- return result;\
- }
-#define SLANG_VECTOR_BINARY_COMPARE_OP(T, op) \
- template<int n> \
- SLANG_FORCE_INLINE Vector<bool, n> operator op(const Vector<T, n>& thisVal, const Vector<T, n>& other) \
- { \
- Vector<bool, n> result;\
- for (int i = 0; i < n; i++) \
- result[i] = thisVal[i] op other[i]; \
- return result;\
- }
-
-#define SLANG_VECTOR_UNARY_OP(T, op) \
- template<int n> \
+#define SLANG_VECTOR_BINARY_OP(T, op) \
+ template<int n> \
+ SLANG_FORCE_INLINE Vector<T, n> operator op( \
+ const Vector<T, n>& thisVal, \
+ const Vector<T, n>& other) \
+ { \
+ Vector<T, n> result; \
+ for (int i = 0; i < n; i++) \
+ result[i] = thisVal[i] op other[i]; \
+ return result; \
+ }
+#define SLANG_VECTOR_BINARY_COMPARE_OP(T, op) \
+ template<int n> \
+ SLANG_FORCE_INLINE Vector<bool, n> operator op( \
+ const Vector<T, n>& thisVal, \
+ const Vector<T, n>& other) \
+ { \
+ Vector<bool, n> result; \
+ for (int i = 0; i < n; i++) \
+ result[i] = thisVal[i] op other[i]; \
+ return result; \
+ }
+
+#define SLANG_VECTOR_UNARY_OP(T, op) \
+ template<int n> \
SLANG_FORCE_INLINE Vector<T, n> operator op(const Vector<T, n>& thisVal) \
- { \
- Vector<T, n> result;\
- for (int i = 0; i < n; i++) \
- result[i] = op thisVal[i]; \
- return result;\
- }
-#define SLANG_INT_VECTOR_OPS(T) \
- SLANG_VECTOR_BINARY_OP(T, +)\
- SLANG_VECTOR_BINARY_OP(T, -)\
- SLANG_VECTOR_BINARY_OP(T, *)\
- SLANG_VECTOR_BINARY_OP(T, / )\
- SLANG_VECTOR_BINARY_OP(T, &)\
- SLANG_VECTOR_BINARY_OP(T, |)\
- SLANG_VECTOR_BINARY_OP(T, &&)\
- SLANG_VECTOR_BINARY_OP(T, ||)\
- SLANG_VECTOR_BINARY_OP(T, ^)\
- SLANG_VECTOR_BINARY_OP(T, %)\
- SLANG_VECTOR_BINARY_OP(T, >>)\
- SLANG_VECTOR_BINARY_OP(T, <<)\
- SLANG_VECTOR_BINARY_COMPARE_OP(T, >)\
- SLANG_VECTOR_BINARY_COMPARE_OP(T, <)\
- SLANG_VECTOR_BINARY_COMPARE_OP(T, >=)\
- SLANG_VECTOR_BINARY_COMPARE_OP(T, <=)\
- SLANG_VECTOR_BINARY_COMPARE_OP(T, ==)\
- SLANG_VECTOR_BINARY_COMPARE_OP(T, !=)\
- SLANG_VECTOR_UNARY_OP(T, !)\
+ { \
+ Vector<T, n> result; \
+ for (int i = 0; i < n; i++) \
+ result[i] = op thisVal[i]; \
+ return result; \
+ }
+#define SLANG_INT_VECTOR_OPS(T) \
+ SLANG_VECTOR_BINARY_OP(T, +) \
+ SLANG_VECTOR_BINARY_OP(T, -) \
+ SLANG_VECTOR_BINARY_OP(T, *) \
+ SLANG_VECTOR_BINARY_OP(T, /) \
+ SLANG_VECTOR_BINARY_OP(T, &) \
+ SLANG_VECTOR_BINARY_OP(T, |) \
+ SLANG_VECTOR_BINARY_OP(T, &&) \
+ SLANG_VECTOR_BINARY_OP(T, ||) \
+ SLANG_VECTOR_BINARY_OP(T, ^) \
+ SLANG_VECTOR_BINARY_OP(T, %) \
+ SLANG_VECTOR_BINARY_OP(T, >>) \
+ SLANG_VECTOR_BINARY_OP(T, <<) \
+ SLANG_VECTOR_BINARY_COMPARE_OP(T, >) \
+ SLANG_VECTOR_BINARY_COMPARE_OP(T, <) \
+ SLANG_VECTOR_BINARY_COMPARE_OP(T, >=) \
+ SLANG_VECTOR_BINARY_COMPARE_OP(T, <=) \
+ SLANG_VECTOR_BINARY_COMPARE_OP(T, ==) \
+ SLANG_VECTOR_BINARY_COMPARE_OP(T, !=) \
+ SLANG_VECTOR_UNARY_OP(T, !) \
SLANG_VECTOR_UNARY_OP(T, ~)
-#define SLANG_FLOAT_VECTOR_OPS(T) \
- SLANG_VECTOR_BINARY_OP(T, +)\
- SLANG_VECTOR_BINARY_OP(T, -)\
- SLANG_VECTOR_BINARY_OP(T, *)\
- SLANG_VECTOR_BINARY_OP(T, /)\
- SLANG_VECTOR_UNARY_OP(T, -)\
- SLANG_VECTOR_BINARY_COMPARE_OP(T, >)\
- SLANG_VECTOR_BINARY_COMPARE_OP(T, <)\
- SLANG_VECTOR_BINARY_COMPARE_OP(T, >=)\
- SLANG_VECTOR_BINARY_COMPARE_OP(T, <=)\
- SLANG_VECTOR_BINARY_COMPARE_OP(T, ==)\
+#define SLANG_FLOAT_VECTOR_OPS(T) \
+ SLANG_VECTOR_BINARY_OP(T, +) \
+ SLANG_VECTOR_BINARY_OP(T, -) \
+ SLANG_VECTOR_BINARY_OP(T, *) \
+ SLANG_VECTOR_BINARY_OP(T, /) \
+ SLANG_VECTOR_UNARY_OP(T, -) \
+ SLANG_VECTOR_BINARY_COMPARE_OP(T, >) \
+ SLANG_VECTOR_BINARY_COMPARE_OP(T, <) \
+ SLANG_VECTOR_BINARY_COMPARE_OP(T, >=) \
+ SLANG_VECTOR_BINARY_COMPARE_OP(T, <=) \
+ SLANG_VECTOR_BINARY_COMPARE_OP(T, ==) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, !=)
SLANG_INT_VECTOR_OPS(bool)
@@ -328,14 +350,14 @@ SLANG_INT_VECTOR_OPS(uint64_t)
SLANG_FLOAT_VECTOR_OPS(float)
SLANG_FLOAT_VECTOR_OPS(double)
-#define SLANG_VECTOR_INT_NEG_OP(T) \
- template<int N>\
+#define SLANG_VECTOR_INT_NEG_OP(T) \
+ template<int N> \
Vector<T, N> operator-(const Vector<T, N>& thisVal) \
- { \
- Vector<T, N> result;\
- for (int i = 0; i < N; i++) \
- result[i] = 0 - thisVal[i]; \
- return result;\
+ { \
+ Vector<T, N> result; \
+ for (int i = 0; i < N; i++) \
+ result[i] = 0 - thisVal[i]; \
+ return result; \
}
SLANG_VECTOR_INT_NEG_OP(int)
SLANG_VECTOR_INT_NEG_OP(int8_t)
@@ -346,14 +368,14 @@ SLANG_VECTOR_INT_NEG_OP(uint8_t)
SLANG_VECTOR_INT_NEG_OP(uint16_t)
SLANG_VECTOR_INT_NEG_OP(uint64_t)
-#define SLANG_FLOAT_VECTOR_MOD(T)\
- template<int N> \
+#define SLANG_FLOAT_VECTOR_MOD(T) \
+ template<int N> \
Vector<T, N> operator%(const Vector<T, N>& left, const Vector<T, N>& right) \
- {\
- Vector<T, N> result;\
- for (int i = 0; i < N; i++) \
- result[i] = _slang_fmod(left[i], right[i]); \
- return result;\
+ { \
+ Vector<T, N> result; \
+ for (int i = 0; i < N; i++) \
+ result[i] = _slang_fmod(left[i], right[i]); \
+ return result; \
}
SLANG_FLOAT_VECTOR_MOD(float)
@@ -366,7 +388,7 @@ SLANG_FLOAT_VECTOR_MOD(double)
#undef SLANG_VECTOR_INT_NEG_OP
#undef SLANG_FLOAT_VECTOR_MOD
-template <typename T, int ROWS, int COLS>
+template<typename T, int ROWS, int COLS>
struct Matrix
{
Vector<T, COLS> rows[ROWS];
@@ -377,10 +399,7 @@ struct Matrix
for (int i = 0; i < ROWS; i++)
rows[i] = Vector<T, COLS>(scalar);
}
- Matrix(const Vector<T, COLS>& row0)
- {
- rows[0] = row0;
- }
+ Matrix(const Vector<T, COLS>& row0) { rows[0] = row0; }
Matrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1)
{
rows[0] = row0;
@@ -392,7 +411,11 @@ struct Matrix
rows[1] = row1;
rows[2] = row2;
}
- Matrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1, const Vector<T, COLS>& row2, const Vector<T, COLS>& row3)
+ Matrix(
+ const Vector<T, COLS>& row0,
+ const Vector<T, COLS>& row1,
+ const Vector<T, COLS>& row2,
+ const Vector<T, COLS>& row3)
{
rows[0] = row0;
rows[1] = row1;
@@ -404,116 +427,188 @@ struct Matrix
{
int minRow = ROWS;
int minCol = COLS;
- if (minRow > otherRow) minRow = otherRow;
- if (minCol > otherCol) minCol = otherCol;
+ if (minRow > otherRow)
+ minRow = otherRow;
+ if (minCol > otherCol)
+ minCol = otherCol;
for (int i = 0; i < minRow; i++)
for (int j = 0; j < minCol; j++)
rows[i][j] = (T)other.rows[i][j];
}
Matrix(T v0, T v1, T v2, T v3)
{
- rows[0][0] = v0; rows[0][1] = v1;
- rows[1][0] = v2; rows[1][1] = v3;
+ rows[0][0] = v0;
+ rows[0][1] = v1;
+ rows[1][0] = v2;
+ rows[1][1] = v3;
}
Matrix(T v0, T v1, T v2, T v3, T v4, T v5)
{
if (COLS == 3)
{
- rows[0][0] = v0; rows[0][1] = v1; rows[0][2] = v2;
- rows[1][0] = v3; rows[1][1] = v4; rows[1][2] = v5;
+ rows[0][0] = v0;
+ rows[0][1] = v1;
+ rows[0][2] = v2;
+ rows[1][0] = v3;
+ rows[1][1] = v4;
+ rows[1][2] = v5;
}
else
{
- rows[0][0] = v0; rows[0][1] = v1;
- rows[1][0] = v2; rows[1][1] = v3;
- rows[2][0] = v4; rows[2][1] = v5;
+ rows[0][0] = v0;
+ rows[0][1] = v1;
+ rows[1][0] = v2;
+ rows[1][1] = v3;
+ rows[2][0] = v4;
+ rows[2][1] = v5;
}
}
Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7)
{
if (COLS == 4)
{
- rows[0][0] = v0; rows[0][1] = v1; rows[0][2] = v2; rows[0][3] = v3;
- rows[1][0] = v4; rows[1][1] = v5; rows[1][2] = v6; rows[1][3] = v7;
+ rows[0][0] = v0;
+ rows[0][1] = v1;
+ rows[0][2] = v2;
+ rows[0][3] = v3;
+ rows[1][0] = v4;
+ rows[1][1] = v5;
+ rows[1][2] = v6;
+ rows[1][3] = v7;
}
else
{
- rows[0][0] = v0; rows[0][1] = v1;
- rows[1][0] = v2; rows[1][1] = v3;
- rows[2][0] = v4; rows[2][1] = v5;
- rows[3][0] = v6; rows[3][1] = v7;
+ rows[0][0] = v0;
+ rows[0][1] = v1;
+ rows[1][0] = v2;
+ rows[1][1] = v3;
+ rows[2][0] = v4;
+ rows[2][1] = v5;
+ rows[3][0] = v6;
+ rows[3][1] = v7;
}
}
Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8)
{
- rows[0][0] = v0; rows[0][1] = v1; rows[0][2] = v2;
- rows[1][0] = v3; rows[1][1] = v4; rows[1][2] = v5;
- rows[2][0] = v6; rows[2][1] = v7; rows[2][2] = v8;
+ rows[0][0] = v0;
+ rows[0][1] = v1;
+ rows[0][2] = v2;
+ rows[1][0] = v3;
+ rows[1][1] = v4;
+ rows[1][2] = v5;
+ rows[2][0] = v6;
+ rows[2][1] = v7;
+ rows[2][2] = v8;
}
Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11)
{
if (COLS == 4)
{
- rows[0][0] = v0; rows[0][1] = v1; rows[0][2] = v2; rows[0][3] = v3;
- rows[1][0] = v4; rows[1][1] = v5; rows[1][2] = v6; rows[1][3] = v7;
- rows[2][0] = v8; rows[2][1] = v9; rows[2][2] = v10; rows[2][3] = v11;
+ rows[0][0] = v0;
+ rows[0][1] = v1;
+ rows[0][2] = v2;
+ rows[0][3] = v3;
+ rows[1][0] = v4;
+ rows[1][1] = v5;
+ rows[1][2] = v6;
+ rows[1][3] = v7;
+ rows[2][0] = v8;
+ rows[2][1] = v9;
+ rows[2][2] = v10;
+ rows[2][3] = v11;
}
else
{
- rows[0][0] = v0; rows[0][1] = v1; rows[0][2] = v2;
- rows[1][0] = v3; rows[1][1] = v4; rows[1][2] = v5;
- rows[2][0] = v6; rows[2][1] = v7; rows[2][2] = v8;
- rows[3][0] = v9; rows[3][1] = v10; rows[3][2] = v11;
+ rows[0][0] = v0;
+ rows[0][1] = v1;
+ rows[0][2] = v2;
+ rows[1][0] = v3;
+ rows[1][1] = v4;
+ rows[1][2] = v5;
+ rows[2][0] = v6;
+ rows[2][1] = v7;
+ rows[2][2] = v8;
+ rows[3][0] = v9;
+ rows[3][1] = v10;
+ rows[3][2] = v11;
}
}
- Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15)
+ Matrix(
+ T v0,
+ T v1,
+ T v2,
+ T v3,
+ T v4,
+ T v5,
+ T v6,
+ T v7,
+ T v8,
+ T v9,
+ T v10,
+ T v11,
+ T v12,
+ T v13,
+ T v14,
+ T v15)
{
- rows[0][0] = v0; rows[0][1] = v1; rows[0][2] = v2; rows[0][3] = v3;
- rows[1][0] = v4; rows[1][1] = v5; rows[1][2] = v6; rows[1][3] = v7;
- rows[2][0] = v8; rows[2][1] = v9; rows[2][2] = v10; rows[2][3] = v11;
- rows[3][0] = v12; rows[3][1] = v13; rows[3][2] = v14; rows[3][3] = v15;
+ rows[0][0] = v0;
+ rows[0][1] = v1;
+ rows[0][2] = v2;
+ rows[0][3] = v3;
+ rows[1][0] = v4;
+ rows[1][1] = v5;
+ rows[1][2] = v6;
+ rows[1][3] = v7;
+ rows[2][0] = v8;
+ rows[2][1] = v9;
+ rows[2][2] = v10;
+ rows[2][3] = v11;
+ rows[3][0] = v12;
+ rows[3][1] = v13;
+ rows[3][2] = v14;
+ rows[3][3] = v15;
}
};
-#define SLANG_MATRIX_BINARY_OP(T, op) \
- template<int R, int C> \
+#define SLANG_MATRIX_BINARY_OP(T, op) \
+ template<int R, int C> \
Matrix<T, R, C> operator op(const Matrix<T, R, C>& thisVal, const Matrix<T, R, C>& other) \
- { \
- Matrix<T, R, C> result;\
- for (int i = 0; i < R; i++) \
- for (int j = 0; j < C; j++) \
- result.rows[i][j] = thisVal.rows[i][j] op other.rows[i][j]; \
- return result;\
+ { \
+ Matrix<T, R, C> result; \
+ for (int i = 0; i < R; i++) \
+ for (int j = 0; j < C; j++) \
+ result.rows[i][j] = thisVal.rows[i][j] op other.rows[i][j]; \
+ return result; \
}
-#define SLANG_MATRIX_UNARY_OP(T, op) \
- template<int R, int C> \
+#define SLANG_MATRIX_UNARY_OP(T, op) \
+ template<int R, int C> \
Matrix<T, R, C> operator op(const Matrix<T, R, C>& thisVal) \
- { \
- Matrix<T, R, C> result;\
- for (int i = 0; i < R; i++) \
- for (int j = 0; j < C; j++) \
- result[i].rows[i][j] = op thisVal.rows[i][j]; \
- return result;\
- }
-#define SLANG_INT_MATRIX_OPS(T) \
- SLANG_MATRIX_BINARY_OP(T, +)\
- SLANG_MATRIX_BINARY_OP(T, -)\
- SLANG_MATRIX_BINARY_OP(T, *)\
- SLANG_MATRIX_BINARY_OP(T, / )\
- SLANG_MATRIX_BINARY_OP(T, &)\
- SLANG_MATRIX_BINARY_OP(T, |)\
- SLANG_MATRIX_BINARY_OP(T, &&)\
- SLANG_MATRIX_BINARY_OP(T, ||)\
- SLANG_MATRIX_BINARY_OP(T, ^)\
- SLANG_MATRIX_BINARY_OP(T, %)\
- SLANG_MATRIX_UNARY_OP(T, !)\
+ { \
+ Matrix<T, R, C> result; \
+ for (int i = 0; i < R; i++) \
+ for (int j = 0; j < C; j++) \
+ result[i].rows[i][j] = op thisVal.rows[i][j]; \
+ return result; \
+ }
+#define SLANG_INT_MATRIX_OPS(T) \
+ SLANG_MATRIX_BINARY_OP(T, +) \
+ SLANG_MATRIX_BINARY_OP(T, -) \
+ SLANG_MATRIX_BINARY_OP(T, *) \
+ SLANG_MATRIX_BINARY_OP(T, /) \
+ SLANG_MATRIX_BINARY_OP(T, &) \
+ SLANG_MATRIX_BINARY_OP(T, |) \
+ SLANG_MATRIX_BINARY_OP(T, &&) \
+ SLANG_MATRIX_BINARY_OP(T, ||) \
+ SLANG_MATRIX_BINARY_OP(T, ^) \
+ SLANG_MATRIX_BINARY_OP(T, %) \
+ SLANG_MATRIX_UNARY_OP(T, !) \
SLANG_MATRIX_UNARY_OP(T, ~)
#define SLANG_FLOAT_MATRIX_OPS(T) \
- SLANG_MATRIX_BINARY_OP(T, +)\
- SLANG_MATRIX_BINARY_OP(T, -)\
- SLANG_MATRIX_BINARY_OP(T, *)\
- SLANG_MATRIX_BINARY_OP(T, /)\
+ SLANG_MATRIX_BINARY_OP(T, +) \
+ SLANG_MATRIX_BINARY_OP(T, -) \
+ SLANG_MATRIX_BINARY_OP(T, *) \
+ SLANG_MATRIX_BINARY_OP(T, /) \
SLANG_MATRIX_UNARY_OP(T, -)
SLANG_INT_MATRIX_OPS(int)
SLANG_INT_MATRIX_OPS(int8_t)
@@ -527,38 +622,38 @@ SLANG_INT_MATRIX_OPS(uint64_t)
SLANG_FLOAT_MATRIX_OPS(float)
SLANG_FLOAT_MATRIX_OPS(double)
-#define SLANG_MATRIX_INT_NEG_OP(T) \
- template<int R, int C>\
+#define SLANG_MATRIX_INT_NEG_OP(T) \
+ template<int R, int C> \
SLANG_FORCE_INLINE Matrix<T, R, C> operator-(Matrix<T, R, C> thisVal) \
- { \
- Matrix<T, R, C> result;\
- for (int i = 0; i < R; i++) \
- for (int j = 0; j < C; j++) \
- result.rows[i][j] = 0 - thisVal.rows[i][j]; \
- return result;\
- }
- SLANG_MATRIX_INT_NEG_OP(int)
- SLANG_MATRIX_INT_NEG_OP(int8_t)
- SLANG_MATRIX_INT_NEG_OP(int16_t)
- SLANG_MATRIX_INT_NEG_OP(int64_t)
- SLANG_MATRIX_INT_NEG_OP(uint)
- SLANG_MATRIX_INT_NEG_OP(uint8_t)
- SLANG_MATRIX_INT_NEG_OP(uint16_t)
- SLANG_MATRIX_INT_NEG_OP(uint64_t)
-
-#define SLANG_FLOAT_MATRIX_MOD(T)\
- template<int R, int C> \
+ { \
+ Matrix<T, R, C> result; \
+ for (int i = 0; i < R; i++) \
+ for (int j = 0; j < C; j++) \
+ result.rows[i][j] = 0 - thisVal.rows[i][j]; \
+ return result; \
+ }
+SLANG_MATRIX_INT_NEG_OP(int)
+SLANG_MATRIX_INT_NEG_OP(int8_t)
+SLANG_MATRIX_INT_NEG_OP(int16_t)
+SLANG_MATRIX_INT_NEG_OP(int64_t)
+SLANG_MATRIX_INT_NEG_OP(uint)
+SLANG_MATRIX_INT_NEG_OP(uint8_t)
+SLANG_MATRIX_INT_NEG_OP(uint16_t)
+SLANG_MATRIX_INT_NEG_OP(uint64_t)
+
+#define SLANG_FLOAT_MATRIX_MOD(T) \
+ template<int R, int C> \
SLANG_FORCE_INLINE Matrix<T, R, C> operator%(Matrix<T, R, C> left, Matrix<T, R, C> right) \
- {\
- Matrix<T, R, C> result;\
- for (int i = 0; i < R; i++) \
- for (int j = 0; j < C; j++) \
- result.rows[i][j] = _slang_fmod(left.rows[i][j], right.rows[i][j]); \
- return result;\
+ { \
+ Matrix<T, R, C> result; \
+ for (int i = 0; i < R; i++) \
+ for (int j = 0; j < C; j++) \
+ result.rows[i][j] = _slang_fmod(left.rows[i][j], right.rows[i][j]); \
+ return result; \
}
- SLANG_FLOAT_MATRIX_MOD(float)
- SLANG_FLOAT_MATRIX_MOD(double)
+SLANG_FLOAT_MATRIX_MOD(float)
+SLANG_FLOAT_MATRIX_MOD(double)
#undef SLANG_FLOAT_MATRIX_MOD
#undef SLANG_MATRIX_BINARY_OP
#undef SLANG_MATRIX_UNARY_OP
@@ -574,5 +669,3 @@ TResult slang_bit_cast(TInput val)
}
#endif
-
-
diff --git a/prelude/slang-cpp-types.h b/prelude/slang-cpp-types.h
index 3f805a8b7..010ab8d6c 100644
--- a/prelude/slang-cpp-types.h
+++ b/prelude/slang-cpp-types.h
@@ -2,11 +2,12 @@
#define SLANG_PRELUDE_CPP_TYPES_H
#ifdef SLANG_PRELUDE_NAMESPACE
-namespace SLANG_PRELUDE_NAMESPACE {
+namespace SLANG_PRELUDE_NAMESPACE
+{
#endif
#ifndef SLANG_FORCE_INLINE
-# define SLANG_FORCE_INLINE inline
+#define SLANG_FORCE_INLINE inline
#endif
#include "slang-cpp-types-core.h"
@@ -23,8 +24,8 @@ typedef Vector<uint32_t, 2> uint2;
typedef Vector<uint32_t, 3> uint3;
typedef Vector<uint32_t, 4> uint4;
-// We can just map `NonUniformResourceIndex` type directly to the index type on CPU, as CPU does not require
-// any special handling around such accesses.
+// We can just map `NonUniformResourceIndex` type directly to the index type on CPU, as CPU does not
+// require any special handling around such accesses.
typedef size_t NonUniformResourceIndex;
// ----------------------------- ResourceType -----------------------------------------
@@ -32,47 +33,87 @@ typedef size_t NonUniformResourceIndex;
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-structuredbuffer-getdimensions
// Missing Load(_In_ int Location, _Out_ uint Status);
-template <typename T>
+template<typename T>
struct RWStructuredBuffer
{
- SLANG_FORCE_INLINE T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
- const T& Load(size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
- void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) { *outNumStructs = uint32_t(count); *outStride = uint32_t(sizeof(T)); }
-
+ SLANG_FORCE_INLINE T& operator[](size_t index) const
+ {
+ SLANG_BOUND_CHECK(index, count);
+ return data[index];
+ }
+ const T& Load(size_t index) const
+ {
+ SLANG_BOUND_CHECK(index, count);
+ return data[index];
+ }
+ void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride)
+ {
+ *outNumStructs = uint32_t(count);
+ *outStride = uint32_t(sizeof(T));
+ }
+
T* data;
size_t count;
};
-template <typename T>
+template<typename T>
struct StructuredBuffer
{
- SLANG_FORCE_INLINE const T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
- const T& Load(size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
- void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) { *outNumStructs = uint32_t(count); *outStride = uint32_t(sizeof(T)); }
-
+ SLANG_FORCE_INLINE const T& operator[](size_t index) const
+ {
+ SLANG_BOUND_CHECK(index, count);
+ return data[index];
+ }
+ const T& Load(size_t index) const
+ {
+ SLANG_BOUND_CHECK(index, count);
+ return data[index];
+ }
+ void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride)
+ {
+ *outNumStructs = uint32_t(count);
+ *outStride = uint32_t(sizeof(T));
+ }
+
T* data;
size_t count;
};
-template <typename T>
+template<typename T>
struct RWBuffer
{
- SLANG_FORCE_INLINE T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
- const T& Load(size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
+ SLANG_FORCE_INLINE T& operator[](size_t index) const
+ {
+ SLANG_BOUND_CHECK(index, count);
+ return data[index];
+ }
+ const T& Load(size_t index) const
+ {
+ SLANG_BOUND_CHECK(index, count);
+ return data[index];
+ }
void GetDimensions(uint32_t* outCount) { *outCount = uint32_t(count); }
-
+
T* data;
size_t count;
};
-template <typename T>
+template<typename T>
struct Buffer
{
- SLANG_FORCE_INLINE const T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
- const T& Load(size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
+ SLANG_FORCE_INLINE const T& operator[](size_t index) const
+ {
+ SLANG_BOUND_CHECK(index, count);
+ return data[index];
+ }
+ const T& Load(size_t index) const
+ {
+ SLANG_BOUND_CHECK(index, count);
+ return data[index];
+ }
void GetDimensions(uint32_t* outCount) { *outCount = uint32_t(count); }
-
+
T* data;
size_t count;
};
@@ -81,28 +122,28 @@ struct Buffer
struct ByteAddressBuffer
{
void GetDimensions(uint32_t* outDim) const { *outDim = uint32_t(sizeInBytes); }
- uint32_t Load(size_t index) const
- {
+ uint32_t Load(size_t index) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
- return data[index >> 2];
+ return data[index >> 2];
}
- uint2 Load2(size_t index) const
- {
+ uint2 Load2(size_t index) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
- const size_t dataIdx = index >> 2;
- return uint2{data[dataIdx], data[dataIdx + 1]};
+ const size_t dataIdx = index >> 2;
+ return uint2{data[dataIdx], data[dataIdx + 1]};
}
- uint3 Load3(size_t index) const
- {
+ uint3 Load3(size_t index) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
- const size_t dataIdx = index >> 2;
- return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]};
+ const size_t dataIdx = index >> 2;
+ return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]};
}
- uint4 Load4(size_t index) const
- {
+ uint4 Load4(size_t index) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
- const size_t dataIdx = index >> 2;
- return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]};
+ const size_t dataIdx = index >> 2;
+ return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]};
}
template<typename T>
T Load(size_t index) const
@@ -110,40 +151,40 @@ struct ByteAddressBuffer
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
return *(const T*)(((const char*)data) + index);
}
-
+
const uint32_t* data;
- size_t sizeInBytes; //< Must be multiple of 4
+ size_t sizeInBytes; //< Must be multiple of 4
};
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-rwbyteaddressbuffer
-// Missing support for Atomic operations
+// Missing support for Atomic operations
// Missing support for Load with status
struct RWByteAddressBuffer
{
void GetDimensions(uint32_t* outDim) const { *outDim = uint32_t(sizeInBytes); }
-
- uint32_t Load(size_t index) const
- {
+
+ uint32_t Load(size_t index) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
- return data[index >> 2];
+ return data[index >> 2];
}
- uint2 Load2(size_t index) const
- {
+ uint2 Load2(size_t index) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
- const size_t dataIdx = index >> 2;
- return uint2{data[dataIdx], data[dataIdx + 1]};
+ const size_t dataIdx = index >> 2;
+ return uint2{data[dataIdx], data[dataIdx + 1]};
}
- uint3 Load3(size_t index) const
- {
+ uint3 Load3(size_t index) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
- const size_t dataIdx = index >> 2;
- return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]};
+ const size_t dataIdx = index >> 2;
+ return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]};
}
- uint4 Load4(size_t index) const
- {
+ uint4 Load4(size_t index) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
- const size_t dataIdx = index >> 2;
- return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]};
+ const size_t dataIdx = index >> 2;
+ return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]};
}
template<typename T>
T Load(size_t index) const
@@ -152,30 +193,30 @@ struct RWByteAddressBuffer
return *(const T*)(((const char*)data) + index);
}
- void Store(size_t index, uint32_t v) const
- {
+ void Store(size_t index, uint32_t v) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
- data[index >> 2] = v;
+ data[index >> 2] = v;
}
- void Store2(size_t index, uint2 v) const
- {
+ void Store2(size_t index, uint2 v) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
- const size_t dataIdx = index >> 2;
+ const size_t dataIdx = index >> 2;
data[dataIdx + 0] = v.x;
data[dataIdx + 1] = v.y;
}
- void Store3(size_t index, uint3 v) const
- {
+ void Store3(size_t index, uint3 v) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
- const size_t dataIdx = index >> 2;
+ const size_t dataIdx = index >> 2;
data[dataIdx + 0] = v.x;
data[dataIdx + 1] = v.y;
data[dataIdx + 2] = v.z;
}
- void Store4(size_t index, uint4 v) const
- {
+ void Store4(size_t index, uint4 v) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
- const size_t dataIdx = index >> 2;
+ const size_t dataIdx = index >> 2;
data[dataIdx + 0] = v.x;
data[dataIdx + 1] = v.y;
data[dataIdx + 2] = v.z;
@@ -189,7 +230,7 @@ struct RWByteAddressBuffer
}
uint32_t* data;
- size_t sizeInBytes; //< Must be multiple of 4
+ size_t sizeInBytes; //< Must be multiple of 4
};
struct ISamplerState;
@@ -206,7 +247,7 @@ struct SamplerComparisonState
};
#ifndef SLANG_RESOURCE_SHAPE
-# define SLANG_RESOURCE_SHAPE
+#define SLANG_RESOURCE_SHAPE
typedef unsigned int SlangResourceShape;
enum
{
@@ -243,7 +284,7 @@ enum
};
#endif
-//
+//
struct TextureDimensions
{
void reset()
@@ -259,25 +300,25 @@ struct TextureDimensions
int count = 0;
switch (baseShape)
{
- case SLANG_TEXTURE_1D:
+ case SLANG_TEXTURE_1D:
{
outDims[count++] = width;
break;
}
- case SLANG_TEXTURE_2D:
+ case SLANG_TEXTURE_2D:
{
outDims[count++] = width;
outDims[count++] = height;
break;
}
- case SLANG_TEXTURE_3D:
+ case SLANG_TEXTURE_3D:
{
outDims[count++] = width;
outDims[count++] = height;
outDims[count++] = depth;
break;
}
- case SLANG_TEXTURE_CUBE:
+ case SLANG_TEXTURE_CUBE:
{
outDims[count++] = width;
outDims[count++] = height;
@@ -298,19 +339,19 @@ struct TextureDimensions
int count = 0;
switch (baseShape)
{
- case SLANG_TEXTURE_1D:
+ case SLANG_TEXTURE_1D:
{
outDims[count++] = width;
break;
}
- case SLANG_TEXTURE_CUBE:
- case SLANG_TEXTURE_2D:
+ case SLANG_TEXTURE_CUBE:
+ case SLANG_TEXTURE_2D:
{
outDims[count++] = width;
outDims[count++] = height;
break;
}
- case SLANG_TEXTURE_3D:
+ case SLANG_TEXTURE_3D:
{
outDims[count++] = width;
outDims[count++] = height;
@@ -345,97 +386,146 @@ struct TextureDimensions
uint32_t shape;
uint32_t width, height, depth;
uint32_t numberOfLevels;
- uint32_t arrayElementCount; ///< For array types, 0 otherwise
+ uint32_t arrayElementCount; ///< For array types, 0 otherwise
};
-
-
-
// Texture
struct ITexture
{
virtual TextureDimensions GetDimensions(int mipLevel = -1) = 0;
virtual void Load(const int32_t* v, void* outData, size_t dataSize) = 0;
- virtual void Sample(SamplerState samplerState, const float* loc, void* outData, size_t dataSize) = 0;
- virtual void SampleLevel(SamplerState samplerState, const float* loc, float level, void* outData, size_t dataSize) = 0;
+ virtual void Sample(
+ SamplerState samplerState,
+ const float* loc,
+ void* outData,
+ size_t dataSize) = 0;
+ virtual void SampleLevel(
+ SamplerState samplerState,
+ const float* loc,
+ float level,
+ void* outData,
+ size_t dataSize) = 0;
};
-template <typename T>
+template<typename T>
struct Texture1D
{
void GetDimensions(uint32_t* outWidth) { *outWidth = texture->GetDimensions().width; }
- void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outNumberOfLevels)
- {
- auto dims = texture->GetDimensions(mipLevel);
- *outWidth = dims.width;
- *outNumberOfLevels = dims.numberOfLevels;
+ void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outNumberOfLevels)
+ {
+ auto dims = texture->GetDimensions(mipLevel);
+ *outWidth = dims.width;
+ *outNumberOfLevels = dims.numberOfLevels;
}
-
+
void GetDimensions(float* outWidth) { *outWidth = texture->GetDimensions().width; }
- void GetDimensions(uint32_t mipLevel, float* outWidth, float* outNumberOfLevels)
- {
- auto dims = texture->GetDimensions(mipLevel);
- *outWidth = dims.width;
- *outNumberOfLevels = dims.numberOfLevels;
+ void GetDimensions(uint32_t mipLevel, float* outWidth, float* outNumberOfLevels)
+ {
+ auto dims = texture->GetDimensions(mipLevel);
+ *outWidth = dims.width;
+ *outNumberOfLevels = dims.numberOfLevels;
+ }
+
+ T Load(const int2& loc) const
+ {
+ T out;
+ texture->Load(&loc.x, &out, sizeof(out));
+ return out;
}
-
- T Load(const int2& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
- T Sample(SamplerState samplerState, float loc) const { T out; texture->Sample(samplerState, &loc, &out, sizeof(out)); return out; }
- T SampleLevel(SamplerState samplerState, float loc, float level) { T out; texture->SampleLevel(samplerState, &loc, level, &out, sizeof(out)); return out; }
-
- ITexture* texture;
+ T Sample(SamplerState samplerState, float loc) const
+ {
+ T out;
+ texture->Sample(samplerState, &loc, &out, sizeof(out));
+ return out;
+ }
+ T SampleLevel(SamplerState samplerState, float loc, float level)
+ {
+ T out;
+ texture->SampleLevel(samplerState, &loc, level, &out, sizeof(out));
+ return out;
+ }
+
+ ITexture* texture;
};
-template <typename T>
+template<typename T>
struct Texture2D
{
- void GetDimensions(uint32_t* outWidth, uint32_t* outHeight)
- {
- const auto dims = texture->GetDimensions();
- *outWidth = dims.width;
- *outHeight = dims.height;
+ void GetDimensions(uint32_t* outWidth, uint32_t* outHeight)
+ {
+ const auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
+ *outHeight = dims.height;
}
- void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ uint32_t* outWidth,
+ uint32_t* outHeight,
+ uint32_t* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
*outHeight = dims.height;
*outNumberOfLevels = dims.numberOfLevels;
}
- void GetDimensions(float* outWidth, float* outHeight)
- {
- const auto dims = texture->GetDimensions();
- *outWidth = dims.width;
- *outHeight = dims.height;
+ void GetDimensions(float* outWidth, float* outHeight)
+ {
+ const auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
+ *outHeight = dims.height;
}
- void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ float* outWidth,
+ float* outHeight,
+ float* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
*outHeight = dims.height;
*outNumberOfLevels = dims.numberOfLevels;
}
-
- T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
- T Sample(SamplerState samplerState, const float2& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
- T SampleLevel(SamplerState samplerState, const float2& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
-
- ITexture* texture;
+
+ T Load(const int3& loc) const
+ {
+ T out;
+ texture->Load(&loc.x, &out, sizeof(out));
+ return out;
+ }
+ T Sample(SamplerState samplerState, const float2& loc) const
+ {
+ T out;
+ texture->Sample(samplerState, &loc.x, &out, sizeof(out));
+ return out;
+ }
+ T SampleLevel(SamplerState samplerState, const float2& loc, float level)
+ {
+ T out;
+ texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out));
+ return out;
+ }
+
+ ITexture* texture;
};
-template <typename T>
+template<typename T>
struct Texture3D
{
void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outDepth)
{
- const auto dims = texture->GetDimensions();
- *outWidth = dims.width;
- *outHeight = dims.height;
+ const auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
+ *outHeight = dims.height;
*outDepth = dims.depth;
}
- void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outDepth, uint32_t* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ uint32_t* outWidth,
+ uint32_t* outHeight,
+ uint32_t* outDepth,
+ uint32_t* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
@@ -445,12 +535,17 @@ struct Texture3D
}
void GetDimensions(float* outWidth, float* outHeight, float* outDepth)
{
- const auto dims = texture->GetDimensions();
- *outWidth = dims.width;
- *outHeight = dims.height;
+ const auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
+ *outHeight = dims.height;
*outDepth = dims.depth;
}
- void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outDepth, float* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ float* outWidth,
+ float* outHeight,
+ float* outDepth,
+ float* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
@@ -458,78 +553,144 @@ struct Texture3D
*outDepth = dims.depth;
*outNumberOfLevels = dims.numberOfLevels;
}
-
- T Load(const int4& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
- T Sample(SamplerState samplerState, const float3& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
- T SampleLevel(SamplerState samplerState, const float3& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
-
- ITexture* texture;
+
+ T Load(const int4& loc) const
+ {
+ T out;
+ texture->Load(&loc.x, &out, sizeof(out));
+ return out;
+ }
+ T Sample(SamplerState samplerState, const float3& loc) const
+ {
+ T out;
+ texture->Sample(samplerState, &loc.x, &out, sizeof(out));
+ return out;
+ }
+ T SampleLevel(SamplerState samplerState, const float3& loc, float level)
+ {
+ T out;
+ texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out));
+ return out;
+ }
+
+ ITexture* texture;
};
-template <typename T>
+template<typename T>
struct TextureCube
{
- void GetDimensions(uint32_t* outWidth, uint32_t* outHeight)
- {
- const auto dims = texture->GetDimensions();
- *outWidth = dims.width;
- *outHeight = dims.height;
+ void GetDimensions(uint32_t* outWidth, uint32_t* outHeight)
+ {
+ const auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
+ *outHeight = dims.height;
}
- void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ uint32_t* outWidth,
+ uint32_t* outHeight,
+ uint32_t* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
*outHeight = dims.height;
*outNumberOfLevels = dims.numberOfLevels;
}
- void GetDimensions(float* outWidth, float* outHeight)
- {
- const auto dims = texture->GetDimensions();
- *outWidth = dims.width;
- *outHeight = dims.height;
+ void GetDimensions(float* outWidth, float* outHeight)
+ {
+ const auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
+ *outHeight = dims.height;
}
- void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ float* outWidth,
+ float* outHeight,
+ float* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
*outHeight = dims.height;
*outNumberOfLevels = dims.numberOfLevels;
}
-
- T Sample(SamplerState samplerState, const float3& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
- T SampleLevel(SamplerState samplerState, const float3& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
-
- ITexture* texture;
+
+ T Sample(SamplerState samplerState, const float3& loc) const
+ {
+ T out;
+ texture->Sample(samplerState, &loc.x, &out, sizeof(out));
+ return out;
+ }
+ T SampleLevel(SamplerState samplerState, const float3& loc, float level)
+ {
+ T out;
+ texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out));
+ return out;
+ }
+
+ ITexture* texture;
};
-template <typename T>
+template<typename T>
struct Texture1DArray
{
- void GetDimensions(uint32_t* outWidth, uint32_t* outElements) { auto dims = texture->GetDimensions(); *outWidth = dims.width; *outElements = dims.arrayElementCount; }
- void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outElements, uint32_t* outNumberOfLevels)
+ void GetDimensions(uint32_t* outWidth, uint32_t* outElements)
{
- auto dims = texture->GetDimensions(mipLevel);
- *outWidth = dims.width;
+ auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
+ *outElements = dims.arrayElementCount;
+ }
+ void GetDimensions(
+ uint32_t mipLevel,
+ uint32_t* outWidth,
+ uint32_t* outElements,
+ uint32_t* outNumberOfLevels)
+ {
+ auto dims = texture->GetDimensions(mipLevel);
+ *outWidth = dims.width;
*outNumberOfLevels = dims.numberOfLevels;
- *outElements = dims.arrayElementCount;
- }
- void GetDimensions(float* outWidth, float* outElements) { auto dims = texture->GetDimensions(); *outWidth = dims.width; *outElements = dims.arrayElementCount; }
- void GetDimensions(uint32_t mipLevel, float* outWidth, float* outElements, float* outNumberOfLevels)
+ *outElements = dims.arrayElementCount;
+ }
+ void GetDimensions(float* outWidth, float* outElements)
{
- auto dims = texture->GetDimensions(mipLevel);
- *outWidth = dims.width;
+ auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
+ *outElements = dims.arrayElementCount;
+ }
+ void GetDimensions(
+ uint32_t mipLevel,
+ float* outWidth,
+ float* outElements,
+ float* outNumberOfLevels)
+ {
+ auto dims = texture->GetDimensions(mipLevel);
+ *outWidth = dims.width;
*outNumberOfLevels = dims.numberOfLevels;
- *outElements = dims.arrayElementCount;
+ *outElements = dims.arrayElementCount;
+ }
+
+ T Load(const int3& loc) const
+ {
+ T out;
+ texture->Load(&loc.x, &out, sizeof(out));
+ return out;
+ }
+ T Sample(SamplerState samplerState, const float2& loc) const
+ {
+ T out;
+ texture->Sample(samplerState, &loc.x, &out, sizeof(out));
+ return out;
+ }
+ T SampleLevel(SamplerState samplerState, const float2& loc, float level)
+ {
+ T out;
+ texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out));
+ return out;
}
-
- T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
- T Sample(SamplerState samplerState, const float2& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
- T SampleLevel(SamplerState samplerState, const float2& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
-
- ITexture* texture;
+
+ ITexture* texture;
};
-template <typename T>
+template<typename T>
struct Texture2DArray
{
void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements)
@@ -539,7 +700,12 @@ struct Texture2DArray
*outHeight = dims.height;
*outElements = dims.arrayElementCount;
}
- void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements, uint32_t* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ uint32_t* outWidth,
+ uint32_t* outHeight,
+ uint32_t* outElements,
+ uint32_t* outNumberOfLevels)
{
auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
@@ -547,7 +713,7 @@ struct Texture2DArray
*outElements = dims.arrayElementCount;
*outNumberOfLevels = dims.numberOfLevels;
}
-
+
void GetDimensions(uint32_t* outWidth, float* outHeight, float* outElements)
{
auto dims = texture->GetDimensions();
@@ -555,7 +721,12 @@ struct Texture2DArray
*outHeight = dims.height;
*outElements = dims.arrayElementCount;
}
- void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outElements, float* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ float* outWidth,
+ float* outHeight,
+ float* outElements,
+ float* outNumberOfLevels)
{
auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
@@ -563,15 +734,30 @@ struct Texture2DArray
*outElements = dims.arrayElementCount;
*outNumberOfLevels = dims.numberOfLevels;
}
-
- T Load(const int4& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
- T Sample(SamplerState samplerState, const float3& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
- T SampleLevel(SamplerState samplerState, const float3& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
-
- ITexture* texture;
+
+ T Load(const int4& loc) const
+ {
+ T out;
+ texture->Load(&loc.x, &out, sizeof(out));
+ return out;
+ }
+ T Sample(SamplerState samplerState, const float3& loc) const
+ {
+ T out;
+ texture->Sample(samplerState, &loc.x, &out, sizeof(out));
+ return out;
+ }
+ T SampleLevel(SamplerState samplerState, const float3& loc, float level)
+ {
+ T out;
+ texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out));
+ return out;
+ }
+
+ ITexture* texture;
};
-template <typename T>
+template<typename T>
struct TextureCubeArray
{
void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements)
@@ -581,7 +767,12 @@ struct TextureCubeArray
*outHeight = dims.height;
*outElements = dims.arrayElementCount;
}
- void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements, uint32_t* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ uint32_t* outWidth,
+ uint32_t* outHeight,
+ uint32_t* outElements,
+ uint32_t* outNumberOfLevels)
{
auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
@@ -589,7 +780,7 @@ struct TextureCubeArray
*outElements = dims.arrayElementCount;
*outNumberOfLevels = dims.numberOfLevels;
}
-
+
void GetDimensions(uint32_t* outWidth, float* outHeight, float* outElements)
{
auto dims = texture->GetDimensions();
@@ -597,7 +788,12 @@ struct TextureCubeArray
*outHeight = dims.height;
*outElements = dims.arrayElementCount;
}
- void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outElements, float* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ float* outWidth,
+ float* outHeight,
+ float* outElements,
+ float* outNumberOfLevels)
{
auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
@@ -605,81 +801,124 @@ struct TextureCubeArray
*outElements = dims.arrayElementCount;
*outNumberOfLevels = dims.numberOfLevels;
}
-
- T Sample(SamplerState samplerState, const float4& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
- T SampleLevel(SamplerState samplerState, const float4& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
-
- ITexture* texture;
+
+ T Sample(SamplerState samplerState, const float4& loc) const
+ {
+ T out;
+ texture->Sample(samplerState, &loc.x, &out, sizeof(out));
+ return out;
+ }
+ T SampleLevel(SamplerState samplerState, const float4& loc, float level)
+ {
+ T out;
+ texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out));
+ return out;
+ }
+
+ ITexture* texture;
};
/* !!!!!!!!!!!!!!!!!!!!!!!!!!! RWTexture !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
struct IRWTexture : ITexture
{
- /// Get the reference to the element at loc.
+ /// Get the reference to the element at loc.
virtual void* refAt(const uint32_t* loc) = 0;
};
-template <typename T>
+template<typename T>
struct RWTexture1D
{
void GetDimensions(uint32_t* outWidth) { *outWidth = texture->GetDimensions().width; }
- void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outNumberOfLevels) { auto dims = texture->GetDimensions(mipLevel); *outWidth = dims.width; *outNumberOfLevels = dims.numberOfLevels; }
-
+ void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outNumberOfLevels)
+ {
+ auto dims = texture->GetDimensions(mipLevel);
+ *outWidth = dims.width;
+ *outNumberOfLevels = dims.numberOfLevels;
+ }
+
void GetDimensions(float* outWidth) { *outWidth = texture->GetDimensions().width; }
- void GetDimensions(uint32_t mipLevel, float* outWidth, float* outNumberOfLevels) { auto dims = texture->GetDimensions(mipLevel); *outWidth = dims.width; *outNumberOfLevels = dims.numberOfLevels; }
-
- T Load(int32_t loc) const { T out; texture->Load(&loc, &out, sizeof(out)); return out; }
+ void GetDimensions(uint32_t mipLevel, float* outWidth, float* outNumberOfLevels)
+ {
+ auto dims = texture->GetDimensions(mipLevel);
+ *outWidth = dims.width;
+ *outNumberOfLevels = dims.numberOfLevels;
+ }
+
+ T Load(int32_t loc) const
+ {
+ T out;
+ texture->Load(&loc, &out, sizeof(out));
+ return out;
+ }
T& operator[](uint32_t loc) { return *(T*)texture->refAt(&loc); }
- IRWTexture* texture;
+ IRWTexture* texture;
};
-template <typename T>
+template<typename T>
struct RWTexture2D
{
- void GetDimensions(uint32_t* outWidth, uint32_t* outHeight)
- {
- const auto dims = texture->GetDimensions();
- *outWidth = dims.width;
- *outHeight = dims.height;
+ void GetDimensions(uint32_t* outWidth, uint32_t* outHeight)
+ {
+ const auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
+ *outHeight = dims.height;
}
- void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ uint32_t* outWidth,
+ uint32_t* outHeight,
+ uint32_t* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
*outHeight = dims.height;
*outNumberOfLevels = dims.numberOfLevels;
}
- void GetDimensions(float* outWidth, float* outHeight)
- {
- const auto dims = texture->GetDimensions();
- *outWidth = dims.width;
- *outHeight = dims.height;
+ void GetDimensions(float* outWidth, float* outHeight)
+ {
+ const auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
+ *outHeight = dims.height;
}
- void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ float* outWidth,
+ float* outHeight,
+ float* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
*outHeight = dims.height;
*outNumberOfLevels = dims.numberOfLevels;
}
-
- T Load(const int2& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
+
+ T Load(const int2& loc) const
+ {
+ T out;
+ texture->Load(&loc.x, &out, sizeof(out));
+ return out;
+ }
T& operator[](const uint2& loc) { return *(T*)texture->refAt(&loc.x); }
IRWTexture* texture;
};
-template <typename T>
+template<typename T>
struct RWTexture3D
{
void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outDepth)
{
- const auto dims = texture->GetDimensions();
- *outWidth = dims.width;
- *outHeight = dims.height;
+ const auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
+ *outHeight = dims.height;
*outDepth = dims.depth;
}
- void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outDepth, uint32_t* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ uint32_t* outWidth,
+ uint32_t* outHeight,
+ uint32_t* outDepth,
+ uint32_t* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
@@ -689,12 +928,17 @@ struct RWTexture3D
}
void GetDimensions(float* outWidth, float* outHeight, float* outDepth)
{
- const auto dims = texture->GetDimensions();
- *outWidth = dims.width;
- *outHeight = dims.height;
+ const auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
+ *outHeight = dims.height;
*outDepth = dims.depth;
}
- void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outDepth, float* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ float* outWidth,
+ float* outHeight,
+ float* outDepth,
+ float* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
@@ -702,60 +946,83 @@ struct RWTexture3D
*outDepth = dims.depth;
*outNumberOfLevels = dims.numberOfLevels;
}
-
- T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
+
+ T Load(const int3& loc) const
+ {
+ T out;
+ texture->Load(&loc.x, &out, sizeof(out));
+ return out;
+ }
T& operator[](const uint3& loc) { return *(T*)texture->refAt(&loc.x); }
IRWTexture* texture;
};
-template <typename T>
+template<typename T>
struct RWTexture1DArray
{
- void GetDimensions(uint32_t* outWidth, uint32_t* outElements)
- {
- auto dims = texture->GetDimensions();
- *outWidth = dims.width;
- *outElements = dims.arrayElementCount;
+ void GetDimensions(uint32_t* outWidth, uint32_t* outElements)
+ {
+ auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
+ *outElements = dims.arrayElementCount;
}
- void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outElements, uint32_t* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ uint32_t* outWidth,
+ uint32_t* outElements,
+ uint32_t* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
*outElements = dims.arrayElementCount;
*outNumberOfLevels = dims.numberOfLevels;
}
- void GetDimensions(float* outWidth, float* outElements)
- {
- auto dims = texture->GetDimensions();
- *outWidth = dims.width;
- *outElements = dims.arrayElementCount;
+ void GetDimensions(float* outWidth, float* outElements)
+ {
+ auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
+ *outElements = dims.arrayElementCount;
}
- void GetDimensions(uint32_t mipLevel, float* outWidth, float* outElements, float* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ float* outWidth,
+ float* outElements,
+ float* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
*outElements = dims.arrayElementCount;
*outNumberOfLevels = dims.numberOfLevels;
}
-
- T Load(int2 loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
+
+ T Load(int2 loc) const
+ {
+ T out;
+ texture->Load(&loc.x, &out, sizeof(out));
+ return out;
+ }
T& operator[](uint2 loc) { return *(T*)texture->refAt(&loc.x); }
IRWTexture* texture;
};
-template <typename T>
+template<typename T>
struct RWTexture2DArray
{
void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements)
{
- auto dims = texture->GetDimensions();
- *outWidth = dims.width;
+ auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
*outHeight = dims.height;
- *outElements = dims.arrayElementCount;
+ *outElements = dims.arrayElementCount;
}
- void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements, uint32_t* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ uint32_t* outWidth,
+ uint32_t* outHeight,
+ uint32_t* outElements,
+ uint32_t* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
@@ -765,12 +1032,17 @@ struct RWTexture2DArray
}
void GetDimensions(float* outWidth, float* outHeight, float* outElements)
{
- auto dims = texture->GetDimensions();
- *outWidth = dims.width;
+ auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
*outHeight = dims.height;
- *outElements = dims.arrayElementCount;
+ *outElements = dims.arrayElementCount;
}
- void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outElements, float* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ float* outWidth,
+ float* outHeight,
+ float* outElements,
+ float* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
@@ -778,8 +1050,13 @@ struct RWTexture2DArray
*outElements = dims.arrayElementCount;
*outNumberOfLevels = dims.numberOfLevels;
}
-
- T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
+
+ T Load(const int3& loc) const
+ {
+ T out;
+ texture->Load(&loc.x, &out, sizeof(out));
+ return out;
+ }
T& operator[](const uint3& loc) { return *(T*)texture->refAt(&loc.x); }
IRWTexture* texture;
@@ -787,91 +1064,167 @@ struct RWTexture2DArray
// FeedbackTexture
-struct FeedbackType {};
-struct SAMPLER_FEEDBACK_MIN_MIP : FeedbackType {};
-struct SAMPLER_FEEDBACK_MIP_REGION_USED : FeedbackType {};
+struct FeedbackType
+{
+};
+struct SAMPLER_FEEDBACK_MIN_MIP : FeedbackType
+{
+};
+struct SAMPLER_FEEDBACK_MIP_REGION_USED : FeedbackType
+{
+};
struct IFeedbackTexture
{
virtual TextureDimensions GetDimensions(int mipLevel = -1) = 0;
- // Note here we pass the optional clamp parameter as a pointer. Passing nullptr means no clamp.
- // This was preferred over having two function definitions, and having to differentiate their names
- virtual void WriteSamplerFeedback(ITexture* tex, SamplerState samp, const float* location, const float* clamp = nullptr) = 0;
- virtual void WriteSamplerFeedbackBias(ITexture* tex, SamplerState samp, const float* location, float bias, const float* clamp = nullptr) = 0;
- virtual void WriteSamplerFeedbackGrad(ITexture* tex, SamplerState samp, const float* location, const float* ddx, const float* ddy, const float* clamp = nullptr) = 0;
-
- virtual void WriteSamplerFeedbackLevel(ITexture* tex, SamplerState samp, const float* location, float lod) = 0;
+ // Note here we pass the optional clamp parameter as a pointer. Passing nullptr means no clamp.
+ // This was preferred over having two function definitions, and having to differentiate their
+ // names
+ virtual void WriteSamplerFeedback(
+ ITexture* tex,
+ SamplerState samp,
+ const float* location,
+ const float* clamp = nullptr) = 0;
+ virtual void WriteSamplerFeedbackBias(
+ ITexture* tex,
+ SamplerState samp,
+ const float* location,
+ float bias,
+ const float* clamp = nullptr) = 0;
+ virtual void WriteSamplerFeedbackGrad(
+ ITexture* tex,
+ SamplerState samp,
+ const float* location,
+ const float* ddx,
+ const float* ddy,
+ const float* clamp = nullptr) = 0;
+
+ virtual void WriteSamplerFeedbackLevel(
+ ITexture* tex,
+ SamplerState samp,
+ const float* location,
+ float lod) = 0;
};
-template <typename T>
+template<typename T>
struct FeedbackTexture2D
{
- void GetDimensions(uint32_t* outWidth, uint32_t* outHeight)
- {
- const auto dims = texture->GetDimensions();
- *outWidth = dims.width;
- *outHeight = dims.height;
+ void GetDimensions(uint32_t* outWidth, uint32_t* outHeight)
+ {
+ const auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
+ *outHeight = dims.height;
}
- void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ uint32_t* outWidth,
+ uint32_t* outHeight,
+ uint32_t* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
*outHeight = dims.height;
*outNumberOfLevels = dims.numberOfLevels;
}
- void GetDimensions(float* outWidth, float* outHeight)
- {
- const auto dims = texture->GetDimensions();
- *outWidth = dims.width;
- *outHeight = dims.height;
+ void GetDimensions(float* outWidth, float* outHeight)
+ {
+ const auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
+ *outHeight = dims.height;
}
- void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ float* outWidth,
+ float* outHeight,
+ float* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
*outHeight = dims.height;
*outNumberOfLevels = dims.numberOfLevels;
}
-
- template <typename S>
- void WriteSamplerFeedback(Texture2D<S> tex, SamplerState samp, float2 location, float clamp) { texture->WriteSamplerFeedback(tex.texture, samp, &location.x, &clamp); }
- template <typename S>
- void WriteSamplerFeedbackBias(Texture2D<S> tex, SamplerState samp, float2 location, float bias, float clamp) { texture->WriteSamplerFeedbackBias(tex.texture, samp, &location.x, bias, &clamp); }
+ template<typename S>
+ void WriteSamplerFeedback(Texture2D<S> tex, SamplerState samp, float2 location, float clamp)
+ {
+ texture->WriteSamplerFeedback(tex.texture, samp, &location.x, &clamp);
+ }
- template <typename S>
- void WriteSamplerFeedbackGrad(Texture2D<S> tex, SamplerState samp, float2 location, float2 ddx, float2 ddy, float clamp) { texture->WriteSamplerFeedbackGrad(tex.texture, samp, &location.x, &ddx.x, &ddy.x, &clamp); }
+ template<typename S>
+ void WriteSamplerFeedbackBias(
+ Texture2D<S> tex,
+ SamplerState samp,
+ float2 location,
+ float bias,
+ float clamp)
+ {
+ texture->WriteSamplerFeedbackBias(tex.texture, samp, &location.x, bias, &clamp);
+ }
+
+ template<typename S>
+ void WriteSamplerFeedbackGrad(
+ Texture2D<S> tex,
+ SamplerState samp,
+ float2 location,
+ float2 ddx,
+ float2 ddy,
+ float clamp)
+ {
+ texture->WriteSamplerFeedbackGrad(tex.texture, samp, &location.x, &ddx.x, &ddy.x, &clamp);
+ }
// Level
- template <typename S>
- void WriteSamplerFeedbackLevel(Texture2D<S> tex, SamplerState samp, float2 location, float lod) { texture->WriteSamplerFeedbackLevel(tex.texture, samp, &location.x, lod); }
-
+ template<typename S>
+ void WriteSamplerFeedbackLevel(Texture2D<S> tex, SamplerState samp, float2 location, float lod)
+ {
+ texture->WriteSamplerFeedbackLevel(tex.texture, samp, &location.x, lod);
+ }
+
// Without Clamp
- template <typename S>
- void WriteSamplerFeedback(Texture2D<S> tex, SamplerState samp, float2 location) { texture->WriteSamplerFeedback(tex.texture, samp, &location.x); }
+ template<typename S>
+ void WriteSamplerFeedback(Texture2D<S> tex, SamplerState samp, float2 location)
+ {
+ texture->WriteSamplerFeedback(tex.texture, samp, &location.x);
+ }
+
+ template<typename S>
+ void WriteSamplerFeedbackBias(Texture2D<S> tex, SamplerState samp, float2 location, float bias)
+ {
+ texture->WriteSamplerFeedbackBias(tex.texture, samp, &location.x, bias);
+ }
- template <typename S>
- void WriteSamplerFeedbackBias(Texture2D<S> tex, SamplerState samp, float2 location, float bias) { texture->WriteSamplerFeedbackBias(tex.texture, samp, &location.x, bias); }
+ template<typename S>
+ void WriteSamplerFeedbackGrad(
+ Texture2D<S> tex,
+ SamplerState samp,
+ float2 location,
+ float2 ddx,
+ float2 ddy)
+ {
+ texture->WriteSamplerFeedbackGrad(tex.texture, samp, &location.x, &ddx.x, &ddy.x);
+ }
- template <typename S>
- void WriteSamplerFeedbackGrad(Texture2D<S> tex, SamplerState samp, float2 location, float2 ddx, float2 ddy) { texture->WriteSamplerFeedbackGrad(tex.texture, samp, &location.x, &ddx.x, &ddy.x); }
-
IFeedbackTexture* texture;
};
-template <typename T>
+template<typename T>
struct FeedbackTexture2DArray
{
void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements)
{
- auto dims = texture->GetDimensions();
- *outWidth = dims.width;
+ auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
*outHeight = dims.height;
- *outElements = dims.arrayElementCount;
+ *outElements = dims.arrayElementCount;
}
- void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements, uint32_t* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ uint32_t* outWidth,
+ uint32_t* outHeight,
+ uint32_t* outElements,
+ uint32_t* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
@@ -881,12 +1234,17 @@ struct FeedbackTexture2DArray
}
void GetDimensions(float* outWidth, float* outHeight, float* outElements)
{
- auto dims = texture->GetDimensions();
- *outWidth = dims.width;
+ auto dims = texture->GetDimensions();
+ *outWidth = dims.width;
*outHeight = dims.height;
- *outElements = dims.arrayElementCount;
+ *outElements = dims.arrayElementCount;
}
- void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outElements, float* outNumberOfLevels)
+ void GetDimensions(
+ uint32_t mipLevel,
+ float* outWidth,
+ float* outHeight,
+ float* outElements,
+ float* outNumberOfLevels)
{
const auto dims = texture->GetDimensions(mipLevel);
*outWidth = dims.width;
@@ -894,31 +1252,81 @@ struct FeedbackTexture2DArray
*outElements = dims.arrayElementCount;
*outNumberOfLevels = dims.numberOfLevels;
}
-
- template <typename S>
- void WriteSamplerFeedback(Texture2DArray<S> texArray, SamplerState samp, float3 location, float clamp) { texture->WriteSamplerFeedback(texArray.texture, samp, &location.x, &clamp); }
- template <typename S>
- void WriteSamplerFeedbackBias(Texture2DArray<S> texArray, SamplerState samp, float3 location, float bias, float clamp) { texture->WriteSamplerFeedbackBias(texArray.texture, samp, &location.x, bias, &clamp); }
+ template<typename S>
+ void WriteSamplerFeedback(
+ Texture2DArray<S> texArray,
+ SamplerState samp,
+ float3 location,
+ float clamp)
+ {
+ texture->WriteSamplerFeedback(texArray.texture, samp, &location.x, &clamp);
+ }
+
+ template<typename S>
+ void WriteSamplerFeedbackBias(
+ Texture2DArray<S> texArray,
+ SamplerState samp,
+ float3 location,
+ float bias,
+ float clamp)
+ {
+ texture->WriteSamplerFeedbackBias(texArray.texture, samp, &location.x, bias, &clamp);
+ }
- template <typename S>
- void WriteSamplerFeedbackGrad(Texture2DArray<S> texArray, SamplerState samp, float3 location, float3 ddx, float3 ddy, float clamp) { texture->WriteSamplerFeedbackGrad(texArray.texture, samp, &location.x, &ddx.x, &ddy.x, &clamp); }
+ template<typename S>
+ void WriteSamplerFeedbackGrad(
+ Texture2DArray<S> texArray,
+ SamplerState samp,
+ float3 location,
+ float3 ddx,
+ float3 ddy,
+ float clamp)
+ {
+ texture
+ ->WriteSamplerFeedbackGrad(texArray.texture, samp, &location.x, &ddx.x, &ddy.x, &clamp);
+ }
// Level
- template <typename S>
- void WriteSamplerFeedbackLevel(Texture2DArray<S> texArray, SamplerState samp, float3 location, float lod) { texture->WriteSamplerFeedbackLevel(texArray.texture, samp, &location.x, lod); }
+ template<typename S>
+ void WriteSamplerFeedbackLevel(
+ Texture2DArray<S> texArray,
+ SamplerState samp,
+ float3 location,
+ float lod)
+ {
+ texture->WriteSamplerFeedbackLevel(texArray.texture, samp, &location.x, lod);
+ }
// Without Clamp
- template <typename S>
- void WriteSamplerFeedback(Texture2DArray<S> texArray, SamplerState samp, float3 location) { texture->WriteSamplerFeedback(texArray.texture, samp, &location.x); }
+ template<typename S>
+ void WriteSamplerFeedback(Texture2DArray<S> texArray, SamplerState samp, float3 location)
+ {
+ texture->WriteSamplerFeedback(texArray.texture, samp, &location.x);
+ }
- template <typename S>
- void WriteSamplerFeedbackBias(Texture2DArray<S> texArray, SamplerState samp, float3 location, float bias) { texture->WriteSamplerFeedbackBias(texArray.texture, samp, &location.x, bias); }
+ template<typename S>
+ void WriteSamplerFeedbackBias(
+ Texture2DArray<S> texArray,
+ SamplerState samp,
+ float3 location,
+ float bias)
+ {
+ texture->WriteSamplerFeedbackBias(texArray.texture, samp, &location.x, bias);
+ }
+
+ template<typename S>
+ void WriteSamplerFeedbackGrad(
+ Texture2DArray<S> texArray,
+ SamplerState samp,
+ float3 location,
+ float3 ddx,
+ float3 ddy)
+ {
+ texture->WriteSamplerFeedbackGrad(texArray.texture, samp, &location.x, &ddx.x, &ddy.x);
+ }
- template <typename S>
- void WriteSamplerFeedbackGrad(Texture2DArray<S> texArray, SamplerState samp, float3 location, float3 ddx, float3 ddy) { texture->WriteSamplerFeedbackGrad(texArray.texture, samp, &location.x, &ddx.x, &ddy.x); }
-
IFeedbackTexture* texture;
};
@@ -933,20 +1341,24 @@ struct ComputeThreadVaryingInput
struct ComputeVaryingInput
{
- uint3 startGroupID; ///< start groupID
- uint3 endGroupID; ///< Non inclusive end groupID
+ uint3 startGroupID; ///< start groupID
+ uint3 endGroupID; ///< Non inclusive end groupID
};
-// The uniformEntryPointParams and uniformState must be set to structures that match layout that the kernel expects.
-// This can be determined via reflection for example.
+// The uniformEntryPointParams and uniformState must be set to structures that match layout that the
+// kernel expects. This can be determined via reflection for example.
-typedef void(*ComputeThreadFunc)(ComputeThreadVaryingInput* varyingInput, void* uniformEntryPointParams, void* uniformState);
-typedef void(*ComputeFunc)(ComputeVaryingInput* varyingInput, void* uniformEntryPointParams, void* uniformState);
+typedef void (*ComputeThreadFunc)(
+ ComputeThreadVaryingInput* varyingInput,
+ void* uniformEntryPointParams,
+ void* uniformState);
+typedef void (*ComputeFunc)(
+ ComputeVaryingInput* varyingInput,
+ void* uniformEntryPointParams,
+ void* uniformState);
#ifdef SLANG_PRELUDE_NAMESPACE
}
#endif
#endif
-
-
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index e0335f08a..9ac903955 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -15,51 +15,53 @@
#endif
-// Define SLANG_CUDA_ENABLE_HALF to use the cuda_fp16 include to add half support.
+// Define SLANG_CUDA_ENABLE_HALF to use the cuda_fp16 include to add half support.
// For this to work NVRTC needs to have the path to the CUDA SDK.
//
-// As it stands the includes paths defined for Slang are passed down to NVRTC. Similarly defines defined for the Slang compile
-// are passed down.
+// As it stands the includes paths defined for Slang are passed down to NVRTC. Similarly defines
+// defined for the Slang compile are passed down.
#ifdef SLANG_CUDA_ENABLE_HALF
-// We don't want half2 operators, because it will implement comparison operators that return a bool(!). We want to generate
-// those functions. Doing so means that we will have to define all the other half2 operators.
-# define __CUDA_NO_HALF2_OPERATORS__
-# include <cuda_fp16.h>
+// We don't want half2 operators, because it will implement comparison operators that return a
+// bool(!). We want to generate those functions. Doing so means that we will have to define all
+// the other half2 operators.
+#define __CUDA_NO_HALF2_OPERATORS__
+#include <cuda_fp16.h>
#endif
#ifdef SLANG_CUDA_ENABLE_OPTIX
#include <optix.h>
#endif
-// Define slang offsetof implementation
+// Define slang offsetof implementation
#ifndef SLANG_OFFSET_OF
-# define SLANG_OFFSET_OF(type, member) (size_t)((char*)&(((type *)0)->member) - (char*)0)
+#define SLANG_OFFSET_OF(type, member) (size_t)((char*)&(((type*)0)->member) - (char*)0)
#endif
#ifndef SLANG_ALIGN_OF
-# define SLANG_ALIGN_OF(type) __alignof__(type)
+#define SLANG_ALIGN_OF(type) __alignof__(type)
#endif
// Must be large enough to cause overflow and therefore infinity
#ifndef SLANG_INFINITY
-# define SLANG_INFINITY ((float)(1e+300 * 1e+300))
+#define SLANG_INFINITY ((float)(1e+300 * 1e+300))
#endif
// For now we'll disable any asserts in this prelude
-#define SLANG_PRELUDE_ASSERT(x)
+#define SLANG_PRELUDE_ASSERT(x)
-#ifndef SLANG_CUDA_WARP_SIZE
-# define SLANG_CUDA_WARP_SIZE 32
+#ifndef SLANG_CUDA_WARP_SIZE
+#define SLANG_CUDA_WARP_SIZE 32
#endif
-#define SLANG_CUDA_WARP_MASK (SLANG_CUDA_WARP_SIZE - 1) // Used for masking threadIdx.x to the warp lane index
+#define SLANG_CUDA_WARP_MASK \
+ (SLANG_CUDA_WARP_SIZE - 1) // Used for masking threadIdx.x to the warp lane index
#define SLANG_CUDA_WARP_BITMASK (~int(0))
//
#define SLANG_FORCE_INLINE inline
-#define SLANG_CUDA_CALL __device__
+#define SLANG_CUDA_CALL __device__
#define SLANG_FORCE_INLINE inline
#define SLANG_INLINE inline
@@ -71,54 +73,63 @@
// Asserts for bounds checking.
// It is assumed index/count are unsigned types.
-#define SLANG_BOUND_ASSERT(index, count) SLANG_PRELUDE_ASSERT(index < count);
-#define SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_PRELUDE_ASSERT(index <= (sizeInBytes - elemSize) && (index & 3) == 0);
+#define SLANG_BOUND_ASSERT(index, count) SLANG_PRELUDE_ASSERT(index < count);
+#define SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+ SLANG_PRELUDE_ASSERT(index <= (sizeInBytes - elemSize) && (index & 3) == 0);
// Macros to zero index if an access is out of range
-#define SLANG_BOUND_ZERO_INDEX(index, count) index = (index < count) ? index : 0;
-#define SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes) index = (index <= (sizeInBytes - elemSize)) ? index : 0;
-
-// The 'FIX' macro define how the index is fixed. The default is to do nothing. If SLANG_ENABLE_BOUND_ZERO_INDEX
-// the fix macro will zero the index, if out of range
-#ifdef SLANG_ENABLE_BOUND_ZERO_INDEX
-# define SLANG_BOUND_FIX(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
-# define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
-# define SLANG_BOUND_FIX_FIXED_ARRAY(index, count) SLANG_BOUND_ZERO_INDEX(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
+#define SLANG_BOUND_ZERO_INDEX(index, count) index = (index < count) ? index : 0;
+#define SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+ index = (index <= (sizeInBytes - elemSize)) ? index : 0;
+
+// The 'FIX' macro define how the index is fixed. The default is to do nothing. If
+// SLANG_ENABLE_BOUND_ZERO_INDEX the fix macro will zero the index, if out of range
+#ifdef SLANG_ENABLE_BOUND_ZERO_INDEX
+#define SLANG_BOUND_FIX(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
+#define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+ SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
+#define SLANG_BOUND_FIX_FIXED_ARRAY(index, count) \
+ SLANG_BOUND_ZERO_INDEX(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
#else
-# define SLANG_BOUND_FIX(index, count)
-# define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
-# define SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
+#define SLANG_BOUND_FIX(index, count)
+#define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
+#define SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
#endif
#ifndef SLANG_BOUND_CHECK
-# define SLANG_BOUND_CHECK(index, count) SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX(index, count)
+#define SLANG_BOUND_CHECK(index, count) \
+ SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX(index, count)
#endif
#ifndef SLANG_BOUND_CHECK_BYTE_ADDRESS
-# define SLANG_BOUND_CHECK_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
+#define SLANG_BOUND_CHECK_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+ SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+ SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
#endif
#ifndef SLANG_BOUND_CHECK_FIXED_ARRAY
-# define SLANG_BOUND_CHECK_FIXED_ARRAY(index, count) SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
+#define SLANG_BOUND_CHECK_FIXED_ARRAY(index, count) \
+ SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
#endif
- // This macro handles how out-of-range surface coordinates are handled;
- // I can equal
- // cudaBoundaryModeClamp, in which case out-of-range coordinates are clamped to the valid range
- // cudaBoundaryModeZero, in which case out-of-range reads return zero and out-of-range writes are ignored
- // cudaBoundaryModeTrap, in which case out-of-range accesses cause the kernel execution to fail.
-
+// This macro handles how out-of-range surface coordinates are handled;
+// I can equal
+// cudaBoundaryModeClamp, in which case out-of-range coordinates are clamped to the valid range
+// cudaBoundaryModeZero, in which case out-of-range reads return zero and out-of-range writes are
+// ignored cudaBoundaryModeTrap, in which case out-of-range accesses cause the kernel execution to
+// fail.
+
#ifndef SLANG_CUDA_BOUNDARY_MODE
-# define SLANG_CUDA_BOUNDARY_MODE cudaBoundaryModeZero
+#define SLANG_CUDA_BOUNDARY_MODE cudaBoundaryModeZero
// Can be one of SLANG_CUDA_PTX_BOUNDARY_MODE. Only applies *PTX* emitted CUDA operations
// which currently is just RWTextureRW format writes
-//
+//
// .trap causes an execution trap on out-of-bounds addresses
// .clamp stores data at the nearest surface location (sized appropriately)
-// .zero drops stores to out-of-bounds addresses
+// .zero drops stores to out-of-bounds addresses
-# define SLANG_PTX_BOUNDARY_MODE "zero"
+#define SLANG_PTX_BOUNDARY_MODE "zero"
#endif
struct TypeInfo
@@ -126,51 +137,67 @@ struct TypeInfo
size_t typeSize;
};
-template <typename T, size_t SIZE>
+template<typename T, size_t SIZE>
struct FixedArray
{
- SLANG_CUDA_CALL const T& operator[](size_t index) const { SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE); return m_data[index]; }
- SLANG_CUDA_CALL T& operator[](size_t index) { SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE); return m_data[index]; }
-
+ SLANG_CUDA_CALL const T& operator[](size_t index) const
+ {
+ SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE);
+ return m_data[index];
+ }
+ SLANG_CUDA_CALL T& operator[](size_t index)
+ {
+ SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE);
+ return m_data[index];
+ }
+
T m_data[SIZE];
};
-// An array that has no specified size, becomes a 'Array'. This stores the size so it can potentially
-// do bounds checking.
-template <typename T>
+// An array that has no specified size, becomes a 'Array'. This stores the size so it can
+// potentially do bounds checking.
+template<typename T>
struct Array
{
- SLANG_CUDA_CALL const T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
- SLANG_CUDA_CALL T& operator[](size_t index) { SLANG_BOUND_CHECK(index, count); return data[index]; }
-
+ SLANG_CUDA_CALL const T& operator[](size_t index) const
+ {
+ SLANG_BOUND_CHECK(index, count);
+ return data[index];
+ }
+ SLANG_CUDA_CALL T& operator[](size_t index)
+ {
+ SLANG_BOUND_CHECK(index, count);
+ return data[index];
+ }
+
T* data;
size_t count;
};
// Typically defined in cuda.h, but we can't ship/rely on that, so just define here
-typedef unsigned long long CUtexObject;
-typedef unsigned long long CUsurfObject;
+typedef unsigned long long CUtexObject;
+typedef unsigned long long CUsurfObject;
-// On CUDA sampler state is actually bound up with the texture object. We have a SamplerState type,
-// backed as a pointer, to simplify code generation, with the downside that such a binding will take up
-// uniform space, even though it will have no effect.
+// On CUDA sampler state is actually bound up with the texture object. We have a SamplerState type,
+// backed as a pointer, to simplify code generation, with the downside that such a binding will take
+// up uniform space, even though it will have no effect.
// TODO(JS): Consider ways to strip use of variables of this type so have no binding,
struct SamplerStateUnused;
typedef SamplerStateUnused* SamplerState;
// TODO(JS): Not clear yet if this can be handled on CUDA, by just ignoring.
-// For now, just map to the index type.
+// For now, just map to the index type.
typedef size_t NonUniformResourceIndex;
// Code generator will generate the specific type
-template <typename T, int ROWS, int COLS>
+template<typename T, int ROWS, int COLS>
struct Matrix;
typedef int1 bool1;
typedef int2 bool2;
typedef int3 bool3;
-typedef int4 bool4;
+typedef int4 bool4;
#if SLANG_CUDA_RTC
@@ -193,7 +220,7 @@ typedef unsigned char uchar;
typedef unsigned short ushort;
typedef unsigned int uint;
-union Union32
+union Union32
{
uint32_t u;
int32_t i;
@@ -225,16 +252,37 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL double _slang_fmod(double x, double y)
#if SLANG_CUDA_ENABLE_HALF
// Add the other vector half types
-struct __half1 { __half x; };
-struct __align__(4) __half3 { __half x, y, z; };
-struct __align__(4) __half4 { __half x, y, z, w; };
+struct __half1
+{
+ __half x;
+};
+struct __align__(4) __half3
+{
+ __half x, y, z;
+};
+struct __align__(4) __half4
+{
+ __half x, y, z, w;
+};
#endif
-#define SLANG_VECTOR_GET_ELEMENT(T) \
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##1 x, int index) { return ((T*)(&x))[index]; }\
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##2 x, int index) { return ((T*)(&x))[index]; }\
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##3 x, int index) { return ((T*)(&x))[index]; }\
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##4 x, int index) { return ((T*)(&x))[index]; }
+#define SLANG_VECTOR_GET_ELEMENT(T) \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##1 x, int index) \
+ { \
+ return ((T*)(&x))[index]; \
+ } \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##2 x, int index) \
+ { \
+ return ((T*)(&x))[index]; \
+ } \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##3 x, int index) \
+ { \
+ return ((T*)(&x))[index]; \
+ } \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##4 x, int index) \
+ { \
+ return ((T*)(&x))[index]; \
+ }
SLANG_VECTOR_GET_ELEMENT(int)
SLANG_VECTOR_GET_ELEMENT(uint)
SLANG_VECTOR_GET_ELEMENT(short)
@@ -246,11 +294,23 @@ SLANG_VECTOR_GET_ELEMENT(ulonglong)
SLANG_VECTOR_GET_ELEMENT(float)
SLANG_VECTOR_GET_ELEMENT(double)
-#define SLANG_VECTOR_GET_ELEMENT_PTR(T) \
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##1* x, int index) { return ((T*)(x)) + index; }\
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##2* x, int index) { return ((T*)(x)) + index; }\
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##3* x, int index) { return ((T*)(x)) + index; }\
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##4* x, int index) { return ((T*)(x)) + index; }
+#define SLANG_VECTOR_GET_ELEMENT_PTR(T) \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##1 * x, int index) \
+ { \
+ return ((T*)(x)) + index; \
+ } \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##2 * x, int index) \
+ { \
+ return ((T*)(x)) + index; \
+ } \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##3 * x, int index) \
+ { \
+ return ((T*)(x)) + index; \
+ } \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##4 * x, int index) \
+ { \
+ return ((T*)(x)) + index; \
+ }
SLANG_VECTOR_GET_ELEMENT_PTR(int)
SLANG_VECTOR_GET_ELEMENT_PTR(uint)
SLANG_VECTOR_GET_ELEMENT_PTR(short)
@@ -267,57 +327,60 @@ SLANG_VECTOR_GET_ELEMENT(__half)
SLANG_VECTOR_GET_ELEMENT_PTR(__half)
#endif
-#define SLANG_CUDA_VECTOR_BINARY_OP(T, n, op) \
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n operator op(T##n thisVal, T##n other) \
- { \
- T##n result;\
- for (int i = 0; i < n; i++) \
- *_slang_vector_get_element_ptr(&result, i) = _slang_vector_get_element(thisVal,i) op _slang_vector_get_element(other,i); \
- return result;\
+#define SLANG_CUDA_VECTOR_BINARY_OP(T, n, op) \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n operator op(T##n thisVal, T##n other) \
+ { \
+ T##n result; \
+ for (int i = 0; i < n; i++) \
+ *_slang_vector_get_element_ptr(&result, i) = \
+ _slang_vector_get_element(thisVal, i) op _slang_vector_get_element(other, i); \
+ return result; \
}
-#define SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, op) \
+#define SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, op) \
SLANG_FORCE_INLINE SLANG_CUDA_CALL bool##n operator op(T##n thisVal, T##n other) \
- { \
- bool##n result;\
- for (int i = 0; i < n; i++) \
- *_slang_vector_get_element_ptr(&result, i) = (int)(_slang_vector_get_element(thisVal,i) op _slang_vector_get_element(other,i)); \
- return result;\
- }
-#define SLANG_CUDA_VECTOR_UNARY_OP(T, n, op) \
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n operator op(T##n thisVal) \
- { \
- T##n result;\
- for (int i = 0; i < n; i++) \
- *_slang_vector_get_element_ptr(&result, i) = op _slang_vector_get_element(thisVal,i); \
- return result;\
- }
-
-#define SLANG_CUDA_VECTOR_INT_OP(T, n) \
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, +)\
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, -)\
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, *)\
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, /)\
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, %)\
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, ^)\
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, &)\
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, |)\
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, &&)\
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, ||)\
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, >>)\
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, <<)\
- SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >)\
- SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <)\
- SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >=)\
- SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <=)\
- SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, ==)\
- SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, !=)\
- SLANG_CUDA_VECTOR_UNARY_OP(T, n, !)\
- SLANG_CUDA_VECTOR_UNARY_OP(T, n, -)\
+ { \
+ bool##n result; \
+ for (int i = 0; i < n; i++) \
+ *_slang_vector_get_element_ptr(&result, i) = \
+ (int)(_slang_vector_get_element(thisVal, i) \
+ op _slang_vector_get_element(other, i)); \
+ return result; \
+ }
+#define SLANG_CUDA_VECTOR_UNARY_OP(T, n, op) \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n operator op(T##n thisVal) \
+ { \
+ T##n result; \
+ for (int i = 0; i < n; i++) \
+ *_slang_vector_get_element_ptr(&result, i) = op _slang_vector_get_element(thisVal, i); \
+ return result; \
+ }
+
+#define SLANG_CUDA_VECTOR_INT_OP(T, n) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, +) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, -) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, *) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, /) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, %) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, ^) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, &) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, |) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, &&) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, ||) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, >>) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, <<) \
+ SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >) \
+ SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <) \
+ SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >=) \
+ SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <=) \
+ SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, ==) \
+ SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, !=) \
+ SLANG_CUDA_VECTOR_UNARY_OP(T, n, !) \
+ SLANG_CUDA_VECTOR_UNARY_OP(T, n, -) \
SLANG_CUDA_VECTOR_UNARY_OP(T, n, ~)
#define SLANG_CUDA_VECTOR_INT_OPS(T) \
- SLANG_CUDA_VECTOR_INT_OP(T, 2) \
- SLANG_CUDA_VECTOR_INT_OP(T, 3) \
+ SLANG_CUDA_VECTOR_INT_OP(T, 2) \
+ SLANG_CUDA_VECTOR_INT_OP(T, 3) \
SLANG_CUDA_VECTOR_INT_OP(T, 4)
SLANG_CUDA_VECTOR_INT_OPS(int)
@@ -329,23 +392,23 @@ SLANG_CUDA_VECTOR_INT_OPS(uchar)
SLANG_CUDA_VECTOR_INT_OPS(longlong)
SLANG_CUDA_VECTOR_INT_OPS(ulonglong)
-#define SLANG_CUDA_VECTOR_FLOAT_OP(T, n) \
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, +)\
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, -)\
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, *)\
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, /)\
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, &&)\
- SLANG_CUDA_VECTOR_BINARY_OP(T, n, ||)\
- SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >)\
- SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <)\
- SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >=)\
- SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <=)\
- SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, ==)\
- SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, !=)\
+#define SLANG_CUDA_VECTOR_FLOAT_OP(T, n) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, +) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, -) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, *) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, /) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, &&) \
+ SLANG_CUDA_VECTOR_BINARY_OP(T, n, ||) \
+ SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >) \
+ SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <) \
+ SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >=) \
+ SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <=) \
+ SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, ==) \
+ SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, !=) \
SLANG_CUDA_VECTOR_UNARY_OP(T, n, -)
#define SLANG_CUDA_VECTOR_FLOAT_OPS(T) \
- SLANG_CUDA_VECTOR_FLOAT_OP(T, 2) \
- SLANG_CUDA_VECTOR_FLOAT_OP(T, 3) \
+ SLANG_CUDA_VECTOR_FLOAT_OP(T, 2) \
+ SLANG_CUDA_VECTOR_FLOAT_OP(T, 3) \
SLANG_CUDA_VECTOR_FLOAT_OP(T, 4)
SLANG_CUDA_VECTOR_FLOAT_OPS(float)
@@ -353,27 +416,38 @@ SLANG_CUDA_VECTOR_FLOAT_OPS(double)
#if SLANG_CUDA_ENABLE_HALF
SLANG_CUDA_VECTOR_FLOAT_OPS(__half)
#endif
-#define SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, n)\
+#define SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, n) \
SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n operator%(const T##n& left, const T##n& right) \
- {\
- T##n result;\
- for (int i = 0; i < n; i++) \
- *_slang_vector_get_element_ptr(&result, i) = _slang_fmod(_slang_vector_get_element(left,i), _slang_vector_get_element(right,i)); \
- return result;\
- }
-#define SLANG_CUDA_FLOAT_VECTOR_MOD(T) \
- SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, 2)\
- SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, 3)\
+ { \
+ T##n result; \
+ for (int i = 0; i < n; i++) \
+ *_slang_vector_get_element_ptr(&result, i) = _slang_fmod( \
+ _slang_vector_get_element(left, i), \
+ _slang_vector_get_element(right, i)); \
+ return result; \
+ }
+#define SLANG_CUDA_FLOAT_VECTOR_MOD(T) \
+ SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, 2) \
+ SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, 3) \
SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, 4)
SLANG_CUDA_FLOAT_VECTOR_MOD(float)
SLANG_CUDA_FLOAT_VECTOR_MOD(double)
#if SLANG_CUDA_RTC || SLANG_CUDA_ENABLE_HALF
-#define SLANG_MAKE_VECTOR(T) \
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T##2 make_##T##2(T x, T y) { return T##2{x, y}; }\
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T##3 make_##T##3(T x, T y, T z) { return T##3{ x, y, z }; }\
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T##4 make_##T##4(T x, T y, T z, T w) { return T##4{ x, y, z, w }; }
+#define SLANG_MAKE_VECTOR(T) \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T##2 make_##T##2(T x, T y) \
+ { \
+ return T##2 {x, y}; \
+ } \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T##3 make_##T##3(T x, T y, T z) \
+ { \
+ return T##3 {x, y, z}; \
+ } \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T##4 make_##T##4(T x, T y, T z, T w) \
+ { \
+ return T##4 {x, y, z, w}; \
+ }
#endif
#if SLANG_CUDA_RTC
@@ -393,25 +467,67 @@ SLANG_MAKE_VECTOR(ulonglong)
SLANG_MAKE_VECTOR(__half)
#endif
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool1 make_bool1(bool x) { return bool1{ x }; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool2 make_bool2(bool x, bool y) { return bool2{ x, y }; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool3 make_bool3(bool x, bool y, bool z) { return bool3{ x, y, z }; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool4 make_bool4(bool x, bool y, bool z, bool w) { return bool4{ x, y, z, w }; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool2 make_bool2(bool x) { return bool2{ x, x }; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool3 make_bool3(bool x) { return bool3{ x, x, x }; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool4 make_bool4(bool x) { return bool4{ x, x, x, x }; }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool1 make_bool1(bool x)
+{
+ return bool1{x};
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool2 make_bool2(bool x, bool y)
+{
+ return bool2{x, y};
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool3 make_bool3(bool x, bool y, bool z)
+{
+ return bool3{x, y, z};
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool4 make_bool4(bool x, bool y, bool z, bool w)
+{
+ return bool4{x, y, z, w};
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool2 make_bool2(bool x)
+{
+ return bool2{x, x};
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool3 make_bool3(bool x)
+{
+ return bool3{x, x, x};
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool4 make_bool4(bool x)
+{
+ return bool4{x, x, x, x};
+}
#if SLANG_CUDA_RTC
-#define SLANG_MAKE_VECTOR_FROM_SCALAR(T) \
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T##1 make_##T##1(T x) { return T##1{x}; }\
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T##2 make_##T##2(T x) { return make_##T##2(x, x); }\
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T##3 make_##T##3(T x) { return make_##T##3(x, x, x); }\
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T##4 make_##T##4(T x) { return make_##T##4(x, x, x, x); }
+#define SLANG_MAKE_VECTOR_FROM_SCALAR(T) \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T##1 make_##T##1(T x) \
+ { \
+ return T##1 {x}; \
+ } \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T##2 make_##T##2(T x) \
+ { \
+ return make_##T##2(x, x); \
+ } \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T##3 make_##T##3(T x) \
+ { \
+ return make_##T##3(x, x, x); \
+ } \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T##4 make_##T##4(T x) \
+ { \
+ return make_##T##4(x, x, x, x); \
+ }
#else
-#define SLANG_MAKE_VECTOR_FROM_SCALAR(T) \
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T##2 make_##T##2(T x) { return make_##T##2(x, x); }\
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T##3 make_##T##3(T x) { return make_##T##3(x, x, x); }\
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T##4 make_##T##4(T x) { return make_##T##4(x, x, x, x); }
+#define SLANG_MAKE_VECTOR_FROM_SCALAR(T) \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T##2 make_##T##2(T x) \
+ { \
+ return make_##T##2(x, x); \
+ } \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T##3 make_##T##3(T x) \
+ { \
+ return make_##T##3(x, x, x); \
+ } \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T##4 make_##T##4(T x) \
+ { \
+ return make_##T##4(x, x, x, x); \
+ }
#endif
SLANG_MAKE_VECTOR_FROM_SCALAR(int)
SLANG_MAKE_VECTOR_FROM_SCALAR(uint)
@@ -426,18 +542,22 @@ SLANG_MAKE_VECTOR_FROM_SCALAR(double)
#if SLANG_CUDA_ENABLE_HALF
SLANG_MAKE_VECTOR_FROM_SCALAR(__half)
#if !SLANG_CUDA_RTC
-SLANG_FORCE_INLINE SLANG_CUDA_CALL __half1 make___half1(__half x) { return __half1{x}; }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL __half1 make___half1(__half x)
+{
+ return __half1{x};
+}
#endif
#endif
-#define SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(Fn,T,N) \
- SLANG_FORCE_INLINE SLANG_CUDA_CALL T##N Fn(T##N* address, T##N val) \
- {\
- T##N result; \
- for (int i = 0; i < N; i++) \
- *_slang_vector_get_element_ptr(&result, i) = Fn(_slang_vector_get_element_ptr(address, i), _slang_vector_get_element(val, i)); \
- return result; \
- }\
+#define SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(Fn, T, N) \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T##N Fn(T##N* address, T##N val) \
+ { \
+ T##N result; \
+ for (int i = 0; i < N; i++) \
+ *_slang_vector_get_element_ptr(&result, i) = \
+ Fn(_slang_vector_get_element_ptr(address, i), _slang_vector_get_element(val, i)); \
+ return result; \
+ }
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 900
SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, float, 2)
@@ -455,19 +575,24 @@ SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, ulonglong, 3)
SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, ulonglong, 4)
template<typename T, int n>
-struct GetVectorTypeImpl {};
-
-#define GET_VECTOR_TYPE_IMPL(T, n)\
-template<>\
-struct GetVectorTypeImpl<T,n>\
-{\
- typedef T##n type;\
- static SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n fromScalar(T v) { return make_##T##n(v); } \
+struct GetVectorTypeImpl
+{
};
-#define GET_VECTOR_TYPE_IMPL_N(T)\
- GET_VECTOR_TYPE_IMPL(T, 1)\
- GET_VECTOR_TYPE_IMPL(T, 2)\
- GET_VECTOR_TYPE_IMPL(T, 3)\
+
+#define GET_VECTOR_TYPE_IMPL(T, n) \
+ template<> \
+ struct GetVectorTypeImpl<T, n> \
+ { \
+ typedef T##n type; \
+ static SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n fromScalar(T v) \
+ { \
+ return make_##T##n(v); \
+ } \
+ };
+#define GET_VECTOR_TYPE_IMPL_N(T) \
+ GET_VECTOR_TYPE_IMPL(T, 1) \
+ GET_VECTOR_TYPE_IMPL(T, 2) \
+ GET_VECTOR_TYPE_IMPL(T, 3) \
GET_VECTOR_TYPE_IMPL(T, 4)
GET_VECTOR_TYPE_IMPL_N(int)
@@ -500,11 +625,14 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL Vector<T, n> _slang_vector_reshape(const Vect
return result;
}
-template <typename T, int ROWS, int COLS>
+template<typename T, int ROWS, int COLS>
struct Matrix
{
Vector<T, COLS> rows[ROWS];
- SLANG_FORCE_INLINE SLANG_CUDA_CALL Vector<T, COLS>& operator[](size_t index) { return rows[index]; }
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL Vector<T, COLS>& operator[](size_t index)
+ {
+ return rows[index];
+ }
};
@@ -515,7 +643,6 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(T scalar)
for (int i = 0; i < ROWS; i++)
result.rows[i] = GetVectorTypeImpl<T, COLS>::fromScalar(scalar);
return result;
-
}
template<typename T, int ROWS, int COLS>
@@ -527,7 +654,9 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(const Vector
}
template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1)
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+ const Vector<T, COLS>& row0,
+ const Vector<T, COLS>& row1)
{
Matrix<T, ROWS, COLS> result;
result.rows[0] = row0;
@@ -536,7 +665,10 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(const Vector
}
template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1, const Vector<T, COLS>& row2)
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+ const Vector<T, COLS>& row0,
+ const Vector<T, COLS>& row1,
+ const Vector<T, COLS>& row2)
{
Matrix<T, ROWS, COLS> result;
result.rows[0] = row0;
@@ -546,7 +678,11 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(const Vector
}
template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1, const Vector<T, COLS>& row2, const Vector<T, COLS>& row3)
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+ const Vector<T, COLS>& row0,
+ const Vector<T, COLS>& row1,
+ const Vector<T, COLS>& row2,
+ const Vector<T, COLS>& row3)
{
Matrix<T, ROWS, COLS> result;
result.rows[0] = row0;
@@ -557,16 +693,20 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(const Vector
}
template<typename T, int ROWS, int COLS, typename U, int otherRow, int otherCol>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(const Matrix<U, otherRow, otherCol>& other)
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+ const Matrix<U, otherRow, otherCol>& other)
{
Matrix<T, ROWS, COLS> result;
int minRow = ROWS;
int minCol = COLS;
- if (minRow > otherRow) minRow = otherRow;
- if (minCol > otherCol) minCol = otherCol;
+ if (minRow > otherRow)
+ minRow = otherRow;
+ if (minCol > otherCol)
+ minCol = otherCol;
for (int i = 0; i < minRow; i++)
for (int j = 0; j < minCol; j++)
- *_slang_vector_get_element_ptr(result.rows + i, j) = (T)_slang_vector_get_element(other.rows[i], j);
+ *_slang_vector_get_element_ptr(result.rows + i, j) =
+ (T)_slang_vector_get_element(other.rows[i], j);
return result;
}
@@ -574,129 +714,238 @@ template<typename T, int ROWS, int COLS>
SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(T v0, T v1, T v2, T v3)
{
Matrix<T, ROWS, COLS> rs;
- rs.rows[0].x = v0; rs.rows[0].y = v1;
- rs.rows[1].x = v2; rs.rows[1].y = v3;
+ rs.rows[0].x = v0;
+ rs.rows[0].y = v1;
+ rs.rows[1].x = v2;
+ rs.rows[1].y = v3;
return rs;
}
template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(T v0, T v1, T v2, T v3, T v4, T v5)
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+ T v0,
+ T v1,
+ T v2,
+ T v3,
+ T v4,
+ T v5)
{
Matrix<T, ROWS, COLS> rs;
if (COLS == 3)
{
- rs.rows[0].x = v0; rs.rows[0].y = v1; rs.rows[0].z = v2;
- rs.rows[1].x = v3; rs.rows[1].y = v4; rs.rows[1].z = v5;
+ rs.rows[0].x = v0;
+ rs.rows[0].y = v1;
+ rs.rows[0].z = v2;
+ rs.rows[1].x = v3;
+ rs.rows[1].y = v4;
+ rs.rows[1].z = v5;
}
else
{
- rs.rows[0].x = v0; rs.rows[0].y = v1;
- rs.rows[1].x = v2; rs.rows[1].y = v3;
- rs.rows[2].x = v4; rs.rows[2].y = v5;
+ rs.rows[0].x = v0;
+ rs.rows[0].y = v1;
+ rs.rows[1].x = v2;
+ rs.rows[1].y = v3;
+ rs.rows[2].x = v4;
+ rs.rows[2].y = v5;
}
return rs;
-
}
template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7)
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+ T v0,
+ T v1,
+ T v2,
+ T v3,
+ T v4,
+ T v5,
+ T v6,
+ T v7)
{
Matrix<T, ROWS, COLS> rs;
if (COLS == 4)
{
- rs.rows[0].x = v0; rs.rows[0].y = v1; rs.rows[0].z = v2; rs.rows[0].w = v3;
- rs.rows[1].x = v4; rs.rows[1].y = v5; rs.rows[1].z = v6; rs.rows[1].w = v7;
+ rs.rows[0].x = v0;
+ rs.rows[0].y = v1;
+ rs.rows[0].z = v2;
+ rs.rows[0].w = v3;
+ rs.rows[1].x = v4;
+ rs.rows[1].y = v5;
+ rs.rows[1].z = v6;
+ rs.rows[1].w = v7;
}
else
{
- rs.rows[0].x = v0; rs.rows[0].y = v1;
- rs.rows[1].x = v2; rs.rows[1].y = v3;
- rs.rows[2].x = v4; rs.rows[2].y = v5;
- rs.rows[3].x = v6; rs.rows[3].y = v7;
+ rs.rows[0].x = v0;
+ rs.rows[0].y = v1;
+ rs.rows[1].x = v2;
+ rs.rows[1].y = v3;
+ rs.rows[2].x = v4;
+ rs.rows[2].y = v5;
+ rs.rows[3].x = v6;
+ rs.rows[3].y = v7;
}
return rs;
}
template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8)
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+ T v0,
+ T v1,
+ T v2,
+ T v3,
+ T v4,
+ T v5,
+ T v6,
+ T v7,
+ T v8)
{
Matrix<T, ROWS, COLS> rs;
- rs.rows[0].x = v0; rs.rows[0].y = v1; rs.rows[0].z = v2;
- rs.rows[1].x = v3; rs.rows[1].y = v4; rs.rows[1].z = v5;
- rs.rows[2].x = v6; rs.rows[2].y = v7; rs.rows[2].z = v8;
+ rs.rows[0].x = v0;
+ rs.rows[0].y = v1;
+ rs.rows[0].z = v2;
+ rs.rows[1].x = v3;
+ rs.rows[1].y = v4;
+ rs.rows[1].z = v5;
+ rs.rows[2].x = v6;
+ rs.rows[2].y = v7;
+ rs.rows[2].z = v8;
return rs;
}
template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11)
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+ T v0,
+ T v1,
+ T v2,
+ T v3,
+ T v4,
+ T v5,
+ T v6,
+ T v7,
+ T v8,
+ T v9,
+ T v10,
+ T v11)
{
Matrix<T, ROWS, COLS> rs;
if (COLS == 4)
{
- rs.rows[0].x = v0; rs.rows[0].y = v1; rs.rows[0].z = v2; rs.rows[0].w = v3;
- rs.rows[1].x = v4; rs.rows[1].y = v5; rs.rows[1].z = v6; rs.rows[1].w = v7;
- rs.rows[2].x = v8; rs.rows[2].y = v9; rs.rows[2].z = v10; rs.rows[2].w = v11;
+ rs.rows[0].x = v0;
+ rs.rows[0].y = v1;
+ rs.rows[0].z = v2;
+ rs.rows[0].w = v3;
+ rs.rows[1].x = v4;
+ rs.rows[1].y = v5;
+ rs.rows[1].z = v6;
+ rs.rows[1].w = v7;
+ rs.rows[2].x = v8;
+ rs.rows[2].y = v9;
+ rs.rows[2].z = v10;
+ rs.rows[2].w = v11;
}
else
{
- rs.rows[0].x = v0; rs.rows[0].y = v1; rs.rows[0].z = v2;
- rs.rows[1].x = v3; rs.rows[1].y = v4; rs.rows[1].z = v5;
- rs.rows[2].x = v6; rs.rows[2].y = v7; rs.rows[2].z = v8;
- rs.rows[3].x = v9; rs.rows[3].y = v10; rs.rows[3].z = v11;
+ rs.rows[0].x = v0;
+ rs.rows[0].y = v1;
+ rs.rows[0].z = v2;
+ rs.rows[1].x = v3;
+ rs.rows[1].y = v4;
+ rs.rows[1].z = v5;
+ rs.rows[2].x = v6;
+ rs.rows[2].y = v7;
+ rs.rows[2].z = v8;
+ rs.rows[3].x = v9;
+ rs.rows[3].y = v10;
+ rs.rows[3].z = v11;
}
return rs;
}
template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15)
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+ T v0,
+ T v1,
+ T v2,
+ T v3,
+ T v4,
+ T v5,
+ T v6,
+ T v7,
+ T v8,
+ T v9,
+ T v10,
+ T v11,
+ T v12,
+ T v13,
+ T v14,
+ T v15)
{
Matrix<T, ROWS, COLS> rs;
- rs.rows[0].x = v0; rs.rows[0].y = v1; rs.rows[0].z = v2; rs.rows[0].w = v3;
- rs.rows[1].x = v4; rs.rows[1].y = v5; rs.rows[1].z = v6; rs.rows[1].w = v7;
- rs.rows[2].x = v8; rs.rows[2].y = v9; rs.rows[2].z = v10; rs.rows[2].w = v11;
- rs.rows[3].x = v12; rs.rows[3].y = v13; rs.rows[3].z = v14; rs.rows[3].w = v15;
+ rs.rows[0].x = v0;
+ rs.rows[0].y = v1;
+ rs.rows[0].z = v2;
+ rs.rows[0].w = v3;
+ rs.rows[1].x = v4;
+ rs.rows[1].y = v5;
+ rs.rows[1].z = v6;
+ rs.rows[1].w = v7;
+ rs.rows[2].x = v8;
+ rs.rows[2].y = v9;
+ rs.rows[2].z = v10;
+ rs.rows[2].w = v11;
+ rs.rows[3].x = v12;
+ rs.rows[3].y = v13;
+ rs.rows[3].z = v14;
+ rs.rows[3].w = v15;
return rs;
}
-#define SLANG_MATRIX_BINARY_OP(T, op) \
- template<int R, int C> \
- SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, R, C> operator op(const Matrix<T, R, C>& thisVal, const Matrix<T, R, C>& other) \
- { \
- Matrix<T, R, C> result;\
- for (int i = 0; i < R; i++) \
- for (int j = 0; j < C; j++) \
- *_slang_vector_get_element_ptr(result.rows+i,j) = _slang_vector_get_element(thisVal.rows[i], j) op _slang_vector_get_element(other.rows[i], j); \
- return result;\
+#define SLANG_MATRIX_BINARY_OP(T, op) \
+ template<int R, int C> \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, R, C> operator op( \
+ const Matrix<T, R, C>& thisVal, \
+ const Matrix<T, R, C>& other) \
+ { \
+ Matrix<T, R, C> result; \
+ for (int i = 0; i < R; i++) \
+ for (int j = 0; j < C; j++) \
+ *_slang_vector_get_element_ptr(result.rows + i, j) = \
+ _slang_vector_get_element(thisVal.rows[i], j) \
+ op _slang_vector_get_element(other.rows[i], j); \
+ return result; \
}
-#define SLANG_MATRIX_UNARY_OP(T, op) \
- template<int R, int C> \
+#define SLANG_MATRIX_UNARY_OP(T, op) \
+ template<int R, int C> \
SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, R, C> operator op(const Matrix<T, R, C>& thisVal) \
- { \
- Matrix<T, R, C> result;\
- for (int i = 0; i < R; i++) \
- for (int j = 0; j < C; j++) \
- *_slang_vector_get_element_ptr(result.rows+i,j) = op _slang_vector_get_element(thisVal.rows[i], j); \
- return result;\
- }
-#define SLANG_INT_MATRIX_OPS(T) \
- SLANG_MATRIX_BINARY_OP(T, +)\
- SLANG_MATRIX_BINARY_OP(T, -)\
- SLANG_MATRIX_BINARY_OP(T, *)\
- SLANG_MATRIX_BINARY_OP(T, / )\
- SLANG_MATRIX_BINARY_OP(T, &)\
- SLANG_MATRIX_BINARY_OP(T, |)\
- SLANG_MATRIX_BINARY_OP(T, &&)\
- SLANG_MATRIX_BINARY_OP(T, ||)\
- SLANG_MATRIX_BINARY_OP(T, ^)\
- SLANG_MATRIX_BINARY_OP(T, %)\
- SLANG_MATRIX_UNARY_OP(T, !)\
+ { \
+ Matrix<T, R, C> result; \
+ for (int i = 0; i < R; i++) \
+ for (int j = 0; j < C; j++) \
+ *_slang_vector_get_element_ptr(result.rows + i, j) = \
+ op _slang_vector_get_element(thisVal.rows[i], j); \
+ return result; \
+ }
+#define SLANG_INT_MATRIX_OPS(T) \
+ SLANG_MATRIX_BINARY_OP(T, +) \
+ SLANG_MATRIX_BINARY_OP(T, -) \
+ SLANG_MATRIX_BINARY_OP(T, *) \
+ SLANG_MATRIX_BINARY_OP(T, /) \
+ SLANG_MATRIX_BINARY_OP(T, &) \
+ SLANG_MATRIX_BINARY_OP(T, |) \
+ SLANG_MATRIX_BINARY_OP(T, &&) \
+ SLANG_MATRIX_BINARY_OP(T, ||) \
+ SLANG_MATRIX_BINARY_OP(T, ^) \
+ SLANG_MATRIX_BINARY_OP(T, %) \
+ SLANG_MATRIX_UNARY_OP(T, !) \
SLANG_MATRIX_UNARY_OP(T, ~)
#define SLANG_FLOAT_MATRIX_OPS(T) \
- SLANG_MATRIX_BINARY_OP(T, +)\
- SLANG_MATRIX_BINARY_OP(T, -)\
- SLANG_MATRIX_BINARY_OP(T, *)\
- SLANG_MATRIX_BINARY_OP(T, /)\
+ SLANG_MATRIX_BINARY_OP(T, +) \
+ SLANG_MATRIX_BINARY_OP(T, -) \
+ SLANG_MATRIX_BINARY_OP(T, *) \
+ SLANG_MATRIX_BINARY_OP(T, /) \
SLANG_MATRIX_UNARY_OP(T, -)
SLANG_INT_MATRIX_OPS(int)
SLANG_INT_MATRIX_OPS(uint)
@@ -711,48 +960,57 @@ SLANG_FLOAT_MATRIX_OPS(double)
#if SLANG_CUDA_ENABLE_HALF
SLANG_FLOAT_MATRIX_OPS(__half)
#endif
-#define SLANG_MATRIX_INT_NEG_OP(T) \
- template<int R, int C>\
+#define SLANG_MATRIX_INT_NEG_OP(T) \
+ template<int R, int C> \
SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, R, C> operator-(Matrix<T, R, C> thisVal) \
- { \
- Matrix<T, R, C> result;\
- for (int i = 0; i < R; i++) \
- for (int j = 0; j < C; j++) \
- *_slang_vector_get_element_ptr(result.rows+i,j) = 0 - _slang_vector_get_element(thisVal.rows[i], j); \
- return result;\
- }
- SLANG_MATRIX_INT_NEG_OP(int)
- SLANG_MATRIX_INT_NEG_OP(uint)
- SLANG_MATRIX_INT_NEG_OP(short)
- SLANG_MATRIX_INT_NEG_OP(ushort)
- SLANG_MATRIX_INT_NEG_OP(char)
- SLANG_MATRIX_INT_NEG_OP(uchar)
- SLANG_MATRIX_INT_NEG_OP(longlong)
- SLANG_MATRIX_INT_NEG_OP(ulonglong)
-
-#define SLANG_FLOAT_MATRIX_MOD(T)\
- template<int R, int C> \
- SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, R, C> operator%(Matrix<T, R, C> left, Matrix<T, R, C> right) \
- {\
- Matrix<T, R, C> result;\
- for (int i = 0; i < R; i++) \
- for (int j = 0; j < C; j++) \
- *_slang_vector_get_element_ptr(result.rows+i,j) = _slang_fmod(_slang_vector_get_element(left.rows[i], j), _slang_vector_get_element(right.rows[i], j)); \
- return result;\
- }
-
- SLANG_FLOAT_MATRIX_MOD(float)
- SLANG_FLOAT_MATRIX_MOD(double)
+ { \
+ Matrix<T, R, C> result; \
+ for (int i = 0; i < R; i++) \
+ for (int j = 0; j < C; j++) \
+ *_slang_vector_get_element_ptr(result.rows + i, j) = \
+ 0 - _slang_vector_get_element(thisVal.rows[i], j); \
+ return result; \
+ }
+SLANG_MATRIX_INT_NEG_OP(int)
+SLANG_MATRIX_INT_NEG_OP(uint)
+SLANG_MATRIX_INT_NEG_OP(short)
+SLANG_MATRIX_INT_NEG_OP(ushort)
+SLANG_MATRIX_INT_NEG_OP(char)
+SLANG_MATRIX_INT_NEG_OP(uchar)
+SLANG_MATRIX_INT_NEG_OP(longlong)
+SLANG_MATRIX_INT_NEG_OP(ulonglong)
+
+#define SLANG_FLOAT_MATRIX_MOD(T) \
+ template<int R, int C> \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, R, C> operator%( \
+ Matrix<T, R, C> left, \
+ Matrix<T, R, C> right) \
+ { \
+ Matrix<T, R, C> result; \
+ for (int i = 0; i < R; i++) \
+ for (int j = 0; j < C; j++) \
+ *_slang_vector_get_element_ptr(result.rows + i, j) = _slang_fmod( \
+ _slang_vector_get_element(left.rows[i], j), \
+ _slang_vector_get_element(right.rows[i], j)); \
+ return result; \
+ }
+
+SLANG_FLOAT_MATRIX_MOD(float)
+SLANG_FLOAT_MATRIX_MOD(double)
#if SLANG_CUDA_ENABLE_HALF
- template<int R, int C>
- SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<__half, R, C> operator%(Matrix<__half, R, C> left, Matrix<__half, R, C> right)
- {
- Matrix<__half, R, C> result;
- for (int i = 0; i < R; i++)
- for (int j = 0; j < C; j++)
- * _slang_vector_get_element_ptr(result.rows + i, j) = __float2half(_slang_fmod(__half2float(_slang_vector_get_element(left.rows[i], j)), __half2float(_slang_vector_get_element(right.rows[i], j))));
- return result;
- }
+template<int R, int C>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<__half, R, C> operator%(
+ Matrix<__half, R, C> left,
+ Matrix<__half, R, C> right)
+{
+ Matrix<__half, R, C> result;
+ for (int i = 0; i < R; i++)
+ for (int j = 0; j < C; j++)
+ *_slang_vector_get_element_ptr(result.rows + i, j) = __float2half(_slang_fmod(
+ __half2float(_slang_vector_get_element(left.rows[i], j)),
+ __half2float(_slang_vector_get_element(right.rows[i], j))));
+ return result;
+}
#endif
#undef SLANG_FLOAT_MATRIX_MOD
#undef SLANG_MATRIX_BINARY_OP
@@ -762,19 +1020,24 @@ SLANG_FLOAT_MATRIX_OPS(__half)
#undef SLANG_MATRIX_INT_NEG_OP
#undef SLANG_FLOAT_MATRIX_MOD
-#define SLANG_SELECT_IMPL(T, N)\
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Vector<T, N> _slang_select(bool##N condition, Vector<T, N> v0, Vector<T, N> v1) \
-{ \
- Vector<T, N> result; \
- for (int i = 0; i < N; i++) \
- { \
- *_slang_vector_get_element_ptr(&result, i) = _slang_vector_get_element(condition, i) ? _slang_vector_get_element(v0, i) : _slang_vector_get_element(v1, i); \
- } \
- return result; \
-}
-#define SLANG_SELECT_T(T)\
- SLANG_SELECT_IMPL(T, 2)\
- SLANG_SELECT_IMPL(T, 3)\
+#define SLANG_SELECT_IMPL(T, N) \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL Vector<T, N> _slang_select( \
+ bool##N condition, \
+ Vector<T, N> v0, \
+ Vector<T, N> v1) \
+ { \
+ Vector<T, N> result; \
+ for (int i = 0; i < N; i++) \
+ { \
+ *_slang_vector_get_element_ptr(&result, i) = _slang_vector_get_element(condition, i) \
+ ? _slang_vector_get_element(v0, i) \
+ : _slang_vector_get_element(v1, i); \
+ } \
+ return result; \
+ }
+#define SLANG_SELECT_T(T) \
+ SLANG_SELECT_IMPL(T, 2) \
+ SLANG_SELECT_IMPL(T, 3) \
SLANG_SELECT_IMPL(T, 4)
SLANG_SELECT_T(int)
@@ -794,53 +1057,103 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_select(bool condition, T v0, T v1)
//
// Half support
-//
+//
#if SLANG_CUDA_ENABLE_HALF
SLANG_SELECT_T(__half)
// Convenience functions ushort -> half
-SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 __ushort_as_half(const ushort2& i) { return __halves2half2(__ushort_as_half(i.x), __ushort_as_half(i.y)); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 __ushort_as_half(const ushort3& i) { return __half3{__ushort_as_half(i.x), __ushort_as_half(i.y), __ushort_as_half(i.z)}; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 __ushort_as_half(const ushort4& i) { return __half4{ __ushort_as_half(i.x), __ushort_as_half(i.y), __ushort_as_half(i.z), __ushort_as_half(i.w) }; }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 __ushort_as_half(const ushort2& i)
+{
+ return __halves2half2(__ushort_as_half(i.x), __ushort_as_half(i.y));
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 __ushort_as_half(const ushort3& i)
+{
+ return __half3{__ushort_as_half(i.x), __ushort_as_half(i.y), __ushort_as_half(i.z)};
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 __ushort_as_half(const ushort4& i)
+{
+ return __half4{
+ __ushort_as_half(i.x),
+ __ushort_as_half(i.y),
+ __ushort_as_half(i.z),
+ __ushort_as_half(i.w)};
+}
// Convenience functions half -> ushort
-SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort2 __half_as_ushort(const __half2& i) { return make_ushort2(__half_as_ushort(i.x), __half_as_ushort(i.y)); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort3 __half_as_ushort(const __half3& i) { return make_ushort3(__half_as_ushort(i.x), __half_as_ushort(i.y), __half_as_ushort(i.z)); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort4 __half_as_ushort(const __half4& i) { return make_ushort4(__half_as_ushort(i.x), __half_as_ushort(i.y), __half_as_ushort(i.z), __half_as_ushort(i.w)); }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort2 __half_as_ushort(const __half2& i)
+{
+ return make_ushort2(__half_as_ushort(i.x), __half_as_ushort(i.y));
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort3 __half_as_ushort(const __half3& i)
+{
+ return make_ushort3(__half_as_ushort(i.x), __half_as_ushort(i.y), __half_as_ushort(i.z));
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort4 __half_as_ushort(const __half4& i)
+{
+ return make_ushort4(
+ __half_as_ushort(i.x),
+ __half_as_ushort(i.y),
+ __half_as_ushort(i.z),
+ __half_as_ushort(i.w));
+}
-// This is a little bit of a hack. Fortunately CUDA has the definitions of the templated types in
+// This is a little bit of a hack. Fortunately CUDA has the definitions of the templated types in
// include/surface_indirect_functions.h
-// Here we find the template definition requires a specialization of __nv_isurf_trait to allow
-// a specialization of the surface write functions.
-// This *isn't* a problem on the read functions as they don't have a return type that uses this mechanism
+// Here we find the template definition requires a specialization of __nv_isurf_trait to allow
+// a specialization of the surface write functions.
+// This *isn't* a problem on the read functions as they don't have a return type that uses this
+// mechanism
-template<> struct __nv_isurf_trait<__half> { typedef void type; };
-template<> struct __nv_isurf_trait<__half2> { typedef void type; };
-template<> struct __nv_isurf_trait<__half4> { typedef void type; };
+template<>
+struct __nv_isurf_trait<__half>
+{
+ typedef void type;
+};
+template<>
+struct __nv_isurf_trait<__half2>
+{
+ typedef void type;
+};
+template<>
+struct __nv_isurf_trait<__half4>
+{
+ typedef void type;
+};
#define SLANG_DROP_PARENS(...) __VA_ARGS__
-#define SLANG_SURFACE_READ(FUNC_NAME, TYPE_ARGS, ARGS) \
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL __half FUNC_NAME<__half>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
- return __ushort_as_half(FUNC_NAME<ushort>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
-} \
-\
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 FUNC_NAME<__half2>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
- return __ushort_as_half(FUNC_NAME<ushort2>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
-} \
-\
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 FUNC_NAME<__half4>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
- return __ushort_as_half(FUNC_NAME<ushort4>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
-}
+#define SLANG_SURFACE_READ(FUNC_NAME, TYPE_ARGS, ARGS) \
+ template<> \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half FUNC_NAME<__half>( \
+ cudaSurfaceObject_t surfObj, \
+ SLANG_DROP_PARENS TYPE_ARGS, \
+ cudaSurfaceBoundaryMode boundaryMode) \
+ { \
+ return __ushort_as_half(FUNC_NAME<ushort>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
+ } \
+ \
+ template<> \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 FUNC_NAME<__half2>( \
+ cudaSurfaceObject_t surfObj, \
+ SLANG_DROP_PARENS TYPE_ARGS, \
+ cudaSurfaceBoundaryMode boundaryMode) \
+ { \
+ return __ushort_as_half( \
+ FUNC_NAME<ushort2>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
+ } \
+ \
+ template<> \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 FUNC_NAME<__half4>( \
+ cudaSurfaceObject_t surfObj, \
+ SLANG_DROP_PARENS TYPE_ARGS, \
+ cudaSurfaceBoundaryMode boundaryMode) \
+ { \
+ return __ushort_as_half( \
+ FUNC_NAME<ushort4>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
+ }
SLANG_SURFACE_READ(surf1Dread, (int x), (x))
SLANG_SURFACE_READ(surf2Dread, (int x, int y), (x, y))
@@ -850,24 +1163,36 @@ SLANG_SURFACE_READ(surf2DLayeredread, (int x, int y, int layer), (x, y, layer))
SLANG_SURFACE_READ(surfCubemapread, (int x, int y, int face), (x, y, face))
SLANG_SURFACE_READ(surfCubemapLayeredread, (int x, int y, int layerFace), (x, y, layerFace))
-#define SLANG_SURFACE_WRITE(FUNC_NAME, TYPE_ARGS, ARGS) \
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half>(__half data, cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
- FUNC_NAME<ushort>(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode); \
-} \
-\
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half2>(__half2 data, cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
- FUNC_NAME<ushort2>(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode); \
-} \
-\
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half4>(__half4 data, cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
- FUNC_NAME<ushort4>(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode); \
-}
+#define SLANG_SURFACE_WRITE(FUNC_NAME, TYPE_ARGS, ARGS) \
+ template<> \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half>( \
+ __half data, \
+ cudaSurfaceObject_t surfObj, \
+ SLANG_DROP_PARENS TYPE_ARGS, \
+ cudaSurfaceBoundaryMode boundaryMode) \
+ { \
+ FUNC_NAME<ushort>(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode); \
+ } \
+ \
+ template<> \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half2>( \
+ __half2 data, \
+ cudaSurfaceObject_t surfObj, \
+ SLANG_DROP_PARENS TYPE_ARGS, \
+ cudaSurfaceBoundaryMode boundaryMode) \
+ { \
+ FUNC_NAME<ushort2>(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode); \
+ } \
+ \
+ template<> \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half4>( \
+ __half4 data, \
+ cudaSurfaceObject_t surfObj, \
+ SLANG_DROP_PARENS TYPE_ARGS, \
+ cudaSurfaceBoundaryMode boundaryMode) \
+ { \
+ FUNC_NAME<ushort4>(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode); \
+ }
SLANG_SURFACE_WRITE(surf1Dwrite, (int x), (x))
SLANG_SURFACE_WRITE(surf2Dwrite, (int x, int y), (x, y))
@@ -878,38 +1203,54 @@ SLANG_SURFACE_WRITE(surfCubemapwrite, (int x, int y, int face), (x, y, face))
SLANG_SURFACE_WRITE(surfCubemapLayeredwrite, (int x, int y, int layerFace), (x, y, layerFace))
// ! Hack to test out reading !!!
-// Only works converting *from* half
-
-//template <typename T>
-//SLANG_FORCE_INLINE SLANG_CUDA_CALL T surf2Dread_convert(cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode);
-
-#define SLANG_SURFACE_READ_HALF_CONVERT(FUNC_NAME, TYPE_ARGS, ARGS) \
-\
-template <typename T> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL T FUNC_NAME##_convert(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode); \
-\
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float FUNC_NAME##_convert<float>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
- return __ushort_as_half(FUNC_NAME<uint16_t>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
-} \
-\
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float2 FUNC_NAME##_convert<float2>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
- const __half2 v = __ushort_as_half(FUNC_NAME<ushort2>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
- return float2{v.x, v.y}; \
-} \
-\
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float4 FUNC_NAME##_convert<float4>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
- const __half4 v = __ushort_as_half(FUNC_NAME<ushort4>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
- return float4{v.x, v.y, v.z, v.w}; \
-}
-
-SLANG_SURFACE_READ_HALF_CONVERT(surf1Dread, (int x), (x))
-SLANG_SURFACE_READ_HALF_CONVERT(surf2Dread, (int x, int y), (x, y))
+// Only works converting *from* half
+
+// template <typename T>
+// SLANG_FORCE_INLINE SLANG_CUDA_CALL T surf2Dread_convert(cudaSurfaceObject_t surfObj, int x, int
+// y, cudaSurfaceBoundaryMode boundaryMode);
+
+#define SLANG_SURFACE_READ_HALF_CONVERT(FUNC_NAME, TYPE_ARGS, ARGS) \
+ \
+ template<typename T> \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL T FUNC_NAME##_convert( \
+ cudaSurfaceObject_t surfObj, \
+ SLANG_DROP_PARENS TYPE_ARGS, \
+ cudaSurfaceBoundaryMode boundaryMode); \
+ \
+ template<> \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL float FUNC_NAME##_convert<float>( \
+ cudaSurfaceObject_t surfObj, \
+ SLANG_DROP_PARENS TYPE_ARGS, \
+ cudaSurfaceBoundaryMode boundaryMode) \
+ { \
+ return __ushort_as_half( \
+ FUNC_NAME<uint16_t>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
+ } \
+ \
+ template<> \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL float2 FUNC_NAME##_convert<float2>( \
+ cudaSurfaceObject_t surfObj, \
+ SLANG_DROP_PARENS TYPE_ARGS, \
+ cudaSurfaceBoundaryMode boundaryMode) \
+ { \
+ const __half2 v = \
+ __ushort_as_half(FUNC_NAME<ushort2>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
+ return float2{v.x, v.y}; \
+ } \
+ \
+ template<> \
+ SLANG_FORCE_INLINE SLANG_CUDA_CALL float4 FUNC_NAME##_convert<float4>( \
+ cudaSurfaceObject_t surfObj, \
+ SLANG_DROP_PARENS TYPE_ARGS, \
+ cudaSurfaceBoundaryMode boundaryMode) \
+ { \
+ const __half4 v = \
+ __ushort_as_half(FUNC_NAME<ushort4>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
+ return float4{v.x, v.y, v.z, v.w}; \
+ }
+
+SLANG_SURFACE_READ_HALF_CONVERT(surf1Dread, (int x), (x))
+SLANG_SURFACE_READ_HALF_CONVERT(surf2Dread, (int x, int y), (x, y))
SLANG_SURFACE_READ_HALF_CONVERT(surf3Dread, (int x, int y, int z), (x, y, z))
#endif
@@ -917,178 +1258,506 @@ SLANG_SURFACE_READ_HALF_CONVERT(surf3Dread, (int x, int y, int z), (x, y, z))
// Support for doing format conversion when writing to a surface/RWTexture
// NOTE! For normal surface access x values are *byte* addressed.
-// For the _convert versions they are *not*. They don't need to be because sust.p does not require it.
+// For the _convert versions they are *not*. They don't need to be because sust.p does not require
+// it.
-template <typename T>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode);
-template <typename T>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode);
-template <typename T>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, int y, int z, cudaSurfaceBoundaryMode boundaryMode);
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert(
+ T,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ cudaSurfaceBoundaryMode boundaryMode);
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert(
+ T,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ cudaSurfaceBoundaryMode boundaryMode);
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert(
+ T,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ int z,
+ cudaSurfaceBoundaryMode boundaryMode);
// https://docs.nvidia.com/cuda/inline-ptx-assembly/index.html
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#surface-instructions-sust
// Float
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float>(float v, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode)
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float>(
+ float v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ cudaSurfaceBoundaryMode boundaryMode)
{
- asm volatile ( "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2};}\n\t" :: "l"(surfObj),"r"(x),"f"(v));
+ asm volatile(
+ "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "f"(v));
}
-
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float>(float v, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode)
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float>(
+ float v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ cudaSurfaceBoundaryMode boundaryMode)
{
- asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"f"(v));
+ asm volatile(
+ "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "f"(v));
}
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float>(float v, cudaSurfaceObject_t surfObj, int x, int y, int z, cudaSurfaceBoundaryMode boundaryMode)
-{
- asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"r"(z),"f"(v));
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float>(
+ float v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ int z,
+ cudaSurfaceBoundaryMode boundaryMode)
+{
+ asm volatile(
+ "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(z),
+ "f"(v));
}
// Float2
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float2>(float2 v, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode)
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float2>(
+ float2 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ cudaSurfaceBoundaryMode boundaryMode)
{
const float vx = v.x, vy = v.y;
- asm volatile ( "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2,%3};}\n\t" :: "l"(surfObj),"r"(x),"f"(vx),"f"(vy));
+ asm volatile(
+ "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2,%3};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "f"(vx),
+ "f"(vy));
}
-
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float2>(float2 v, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode)
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float2>(
+ float2 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ cudaSurfaceBoundaryMode boundaryMode)
{
const float vx = v.x, vy = v.y;
- asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3,%4};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"f"(vx),"f"(vy));
+ asm volatile(
+ "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3,%4};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "f"(vx),
+ "f"(vy));
}
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float2>(float2 v, cudaSurfaceObject_t surfObj, int x, int y, int z, cudaSurfaceBoundaryMode boundaryMode)
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float2>(
+ float2 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ int z,
+ cudaSurfaceBoundaryMode boundaryMode)
{
const float vx = v.x, vy = v.y;
- asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4,%5};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"r"(z),"f"(vx),"f"(vy));
+ asm volatile(
+ "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4,%5};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(z),
+ "f"(vx),
+ "f"(vy));
}
// Float4
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float4>(float4 v, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode)
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float4>(
+ float4 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ cudaSurfaceBoundaryMode boundaryMode)
{
const float vx = v.x, vy = v.y, vz = v.z, vw = v.w;
- asm volatile ( "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2,%3,%4,%5};}\n\t" :: "l"(surfObj),"r"(x),"f"(vx),"f"(vy),"f"(vz),"f"(vw));
+ asm volatile(
+ "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2,%3,%4,%5};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "f"(vx),
+ "f"(vy),
+ "f"(vz),
+ "f"(vw));
}
-
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float4>(float4 v, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode)
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float4>(
+ float4 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ cudaSurfaceBoundaryMode boundaryMode)
{
const float vx = v.x, vy = v.y, vz = v.z, vw = v.w;
- asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3,%4,%5,%6};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"f"(vx),"f"(vy),"f"(vz),"f"(vw));
+ asm volatile(
+ "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2}], {%3,%4,%5,%6};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "f"(vx),
+ "f"(vy),
+ "f"(vz),
+ "f"(vw));
}
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float4>(float4 v, cudaSurfaceObject_t surfObj, int x, int y, int z, cudaSurfaceBoundaryMode boundaryMode)
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float4>(
+ float4 v,
+ cudaSurfaceObject_t surfObj,
+ int x,
+ int y,
+ int z,
+ cudaSurfaceBoundaryMode boundaryMode)
{
const float vx = v.x, vy = v.y, vz = v.z, vw = v.w;
- asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4,%5,%6,%7};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"r"(z),"f"(vx),"f"(vy),"f"(vz),"f"(vw));
+ asm volatile(
+ "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE
+ " [%0, {%1,%2,%3}], {%4,%5,%6,%7};}\n\t" ::"l"(surfObj),
+ "r"(x),
+ "r"(y),
+ "r"(z),
+ "f"(vx),
+ "f"(vy),
+ "f"(vz),
+ "f"(vw));
}
// ----------------------------- F32 -----------------------------------------
-// Unary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_ceil(float f) { return ::ceilf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_floor(float f) { return ::floorf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_round(float f) { return ::roundf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sin(float f) { return ::sinf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_cos(float f) { return ::cosf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void F32_sincos(float f, float* s, float* c) { ::sincosf(f, s, c); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_tan(float f) { return ::tanf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_asin(float f) { return ::asinf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_acos(float f) { return ::acosf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_atan(float f) { return ::atanf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sinh(float f) { return ::sinhf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_cosh(float f) { return ::coshf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_tanh(float f) { return ::tanhf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_log2(float f) { return ::log2f(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_log(float f) { return ::logf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_log10(float f) { return ::log10f(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_exp2(float f) { return ::exp2f(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_exp(float f) { return ::expf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_abs(float f) { return ::fabsf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_trunc(float f) { return ::truncf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sqrt(float f) { return ::sqrtf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_rsqrt(float f) { return ::rsqrtf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sign(float f) { return ( f == 0.0f) ? f : (( f < 0.0f) ? -1.0f : 1.0f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_frac(float f) { return f - F32_floor(f); }
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F32_isnan(float f) { return isnan(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F32_isfinite(float f) { return isfinite(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F32_isinf(float f) { return isinf(f); }
+// Unary
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_ceil(float f)
+{
+ return ::ceilf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_floor(float f)
+{
+ return ::floorf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_round(float f)
+{
+ return ::roundf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sin(float f)
+{
+ return ::sinf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_cos(float f)
+{
+ return ::cosf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void F32_sincos(float f, float* s, float* c)
+{
+ ::sincosf(f, s, c);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_tan(float f)
+{
+ return ::tanf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_asin(float f)
+{
+ return ::asinf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_acos(float f)
+{
+ return ::acosf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_atan(float f)
+{
+ return ::atanf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sinh(float f)
+{
+ return ::sinhf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_cosh(float f)
+{
+ return ::coshf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_tanh(float f)
+{
+ return ::tanhf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_log2(float f)
+{
+ return ::log2f(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_log(float f)
+{
+ return ::logf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_log10(float f)
+{
+ return ::log10f(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_exp2(float f)
+{
+ return ::exp2f(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_exp(float f)
+{
+ return ::expf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_abs(float f)
+{
+ return ::fabsf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_trunc(float f)
+{
+ return ::truncf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sqrt(float f)
+{
+ return ::sqrtf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_rsqrt(float f)
+{
+ return ::rsqrtf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sign(float f)
+{
+ return (f == 0.0f) ? f : ((f < 0.0f) ? -1.0f : 1.0f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_frac(float f)
+{
+ return f - F32_floor(f);
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F32_isnan(float f)
+{
+ return isnan(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F32_isfinite(float f)
+{
+ return isfinite(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F32_isinf(float f)
+{
+ return isinf(f);
+}
// Binary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_min(float a, float b) { return ::fminf(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_max(float a, float b) { return ::fmaxf(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_pow(float a, float b) { return ::powf(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_fmod(float a, float b) { return ::fmodf(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_remainder(float a, float b) { return ::remainderf(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_atan2(float a, float b) { return float(::atan2(a, b)); }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_min(float a, float b)
+{
+ return ::fminf(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_max(float a, float b)
+{
+ return ::fmaxf(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_pow(float a, float b)
+{
+ return ::powf(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_fmod(float a, float b)
+{
+ return ::fmodf(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_remainder(float a, float b)
+{
+ return ::remainderf(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_atan2(float a, float b)
+{
+ return float(::atan2(a, b));
+}
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_frexp(float x, int* e) { return frexpf(x, e); }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_frexp(float x, int* e)
+{
+ return frexpf(x, e);
+}
SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_modf(float x, float* ip)
{
return ::modff(x, ip);
}
-SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t F32_asuint(float f) { Union32 u; u.f = f; return u.u; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t F32_asint(float f) { Union32 u; u.f = f; return u.i; }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t F32_asuint(float f)
+{
+ Union32 u;
+ u.f = f;
+ return u.u;
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t F32_asint(float f)
+{
+ Union32 u;
+ u.f = f;
+ return u.i;
+}
// Ternary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_fma(float a, float b, float c) { return ::fmaf(a, b, c); }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_fma(float a, float b, float c)
+{
+ return ::fmaf(a, b, c);
+}
// ----------------------------- F64 -----------------------------------------
-// Unary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_ceil(double f) { return ::ceil(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_floor(double f) { return ::floor(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_round(double f) { return ::round(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sin(double f) { return ::sin(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_cos(double f) { return ::cos(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void F64_sincos(double f, double* s, double* c) { ::sincos(f, s, c); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_tan(double f) { return ::tan(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_asin(double f) { return ::asin(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_acos(double f) { return ::acos(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_atan(double f) { return ::atan(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sinh(double f) { return ::sinh(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_cosh(double f) { return ::cosh(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_tanh(double f) { return ::tanh(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_log2(double f) { return ::log2(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_log(double f) { return ::log(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_log10(float f) { return ::log10(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_exp2(double f) { return ::exp2(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_exp(double f) { return ::exp(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_abs(double f) { return ::fabs(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_trunc(double f) { return ::trunc(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sqrt(double f) { return ::sqrt(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_rsqrt(double f) { return ::rsqrt(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sign(double f) { return (f == 0.0) ? f : ((f < 0.0) ? -1.0 : 1.0); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_frac(double f) { return f - F64_floor(f); }
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F64_isnan(double f) { return isnan(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F64_isfinite(double f) { return isfinite(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F64_isinf(double f) { return isinf(f); }
+// Unary
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_ceil(double f)
+{
+ return ::ceil(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_floor(double f)
+{
+ return ::floor(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_round(double f)
+{
+ return ::round(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sin(double f)
+{
+ return ::sin(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_cos(double f)
+{
+ return ::cos(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void F64_sincos(double f, double* s, double* c)
+{
+ ::sincos(f, s, c);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_tan(double f)
+{
+ return ::tan(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_asin(double f)
+{
+ return ::asin(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_acos(double f)
+{
+ return ::acos(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_atan(double f)
+{
+ return ::atan(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sinh(double f)
+{
+ return ::sinh(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_cosh(double f)
+{
+ return ::cosh(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_tanh(double f)
+{
+ return ::tanh(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_log2(double f)
+{
+ return ::log2(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_log(double f)
+{
+ return ::log(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_log10(float f)
+{
+ return ::log10(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_exp2(double f)
+{
+ return ::exp2(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_exp(double f)
+{
+ return ::exp(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_abs(double f)
+{
+ return ::fabs(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_trunc(double f)
+{
+ return ::trunc(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sqrt(double f)
+{
+ return ::sqrt(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_rsqrt(double f)
+{
+ return ::rsqrt(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sign(double f)
+{
+ return (f == 0.0) ? f : ((f < 0.0) ? -1.0 : 1.0);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_frac(double f)
+{
+ return f - F64_floor(f);
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F64_isnan(double f)
+{
+ return isnan(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F64_isfinite(double f)
+{
+ return isfinite(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F64_isinf(double f)
+{
+ return isinf(f);
+}
// Binary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_min(double a, double b) { return ::fmin(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_max(double a, double b) { return ::fmax(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_pow(double a, double b) { return ::pow(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_fmod(double a, double b) { return ::fmod(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_remainder(double a, double b) { return ::remainder(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_atan2(double a, double b) { return ::atan2(a, b); }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_min(double a, double b)
+{
+ return ::fmin(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_max(double a, double b)
+{
+ return ::fmax(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_pow(double a, double b)
+{
+ return ::pow(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_fmod(double a, double b)
+{
+ return ::fmod(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_remainder(double a, double b)
+{
+ return ::remainder(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_atan2(double a, double b)
+{
+ return ::atan2(a, b);
+}
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_frexp(double x, int* e) { return ::frexp(x, e); }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_frexp(double x, int* e)
+{
+ return ::frexp(x, e);
+}
SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_modf(double x, double* ip)
{
@@ -1112,20 +1781,40 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL void F64_asint(double d, int32_t* low, int32_
}
// Ternary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_fma(double a, double b, double c) { return ::fma(a, b, c); }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_fma(double a, double b, double c)
+{
+ return ::fma(a, b, c);
+}
// ----------------------------- I32 -----------------------------------------
// Unary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_abs(int32_t f) { return (f < 0) ? -f : f; }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_abs(int32_t f)
+{
+ return (f < 0) ? -f : f;
+}
// Binary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_min(int32_t a, int32_t b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_max(int32_t a, int32_t b) { return a > b ? a : b; }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_min(int32_t a, int32_t b)
+{
+ return a < b ? a : b;
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_max(int32_t a, int32_t b)
+{
+ return a > b ? a : b;
+}
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float I32_asfloat(int32_t x) { Union32 u; u.i = x; return u.f; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t I32_asuint(int32_t x) { return uint32_t(x); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double I32_asdouble(int32_t low, int32_t hi )
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float I32_asfloat(int32_t x)
+{
+ Union32 u;
+ u.i = x;
+ return u.f;
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t I32_asuint(int32_t x)
+{
+ return uint32_t(x);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double I32_asdouble(int32_t low, int32_t hi)
{
Union64 u;
u.u = (uint64_t(hi) << 32) | uint32_t(low);
@@ -1134,15 +1823,32 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL double I32_asdouble(int32_t low, int32_t hi )
// ----------------------------- U32 -----------------------------------------
-// Unary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_abs(uint32_t f) { return f; }
+// Unary
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_abs(uint32_t f)
+{
+ return f;
+}
// Binary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_min(uint32_t a, uint32_t b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_max(uint32_t a, uint32_t b) { return a > b ? a : b; }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_min(uint32_t a, uint32_t b)
+{
+ return a < b ? a : b;
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_max(uint32_t a, uint32_t b)
+{
+ return a > b ? a : b;
+}
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float U32_asfloat(uint32_t x) { Union32 u; u.u = x; return u.f; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_asint(int32_t x) { return uint32_t(x); }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float U32_asfloat(uint32_t x)
+{
+ Union32 u;
+ u.u = x;
+ return u.f;
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_asint(int32_t x)
+{
+ return uint32_t(x);
+}
SLANG_FORCE_INLINE SLANG_CUDA_CALL double U32_asdouble(uint32_t low, uint32_t hi)
{
@@ -1160,17 +1866,35 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_countbits(uint32_t v)
// ----------------------------- I64 -----------------------------------------
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_abs(int64_t f) { return (f < 0) ? -f : f; }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_abs(int64_t f)
+{
+ return (f < 0) ? -f : f;
+}
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_min(int64_t a, int64_t b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_max(int64_t a, int64_t b) { return a > b ? a : b; }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_min(int64_t a, int64_t b)
+{
+ return a < b ? a : b;
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_max(int64_t a, int64_t b)
+{
+ return a > b ? a : b;
+}
// ----------------------------- U64 -----------------------------------------
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_abs(uint64_t f) { return f; }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_abs(uint64_t f)
+{
+ return f;
+}
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_min(uint64_t a, uint64_t b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_max(uint64_t a, uint64_t b) { return a > b ? a : b; }
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_min(uint64_t a, uint64_t b)
+{
+ return a < b ? a : b;
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_max(uint64_t a, uint64_t b)
+{
+ return a > b ? a : b;
+}
SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U64_countbits(uint64_t v)
{
@@ -1185,7 +1909,7 @@ SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U64_countbits(uint64_t v)
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-structuredbuffer-getdimensions
// Missing Load(_In_ int Location, _Out_ uint Status);
-template <typename T>
+template<typename T>
struct StructuredBuffer
{
SLANG_CUDA_CALL const T& operator[](size_t index) const
@@ -1205,7 +1929,11 @@ struct StructuredBuffer
}
#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT
- SLANG_CUDA_CALL void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) { *outNumStructs = uint32_t(count); *outStride = uint32_t(sizeof(T)); }
+ SLANG_CUDA_CALL void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride)
+ {
+ *outNumStructs = uint32_t(count);
+ *outStride = uint32_t(sizeof(T));
+ }
#endif
T* data;
@@ -1214,7 +1942,7 @@ struct StructuredBuffer
#endif
};
-template <typename T>
+template<typename T>
struct RWStructuredBuffer : StructuredBuffer<T>
{
SLANG_CUDA_CALL T& operator[](size_t index) const
@@ -1230,28 +1958,28 @@ struct RWStructuredBuffer : StructuredBuffer<T>
struct ByteAddressBuffer
{
SLANG_CUDA_CALL void GetDimensions(uint32_t* outDim) const { *outDim = uint32_t(sizeInBytes); }
- SLANG_CUDA_CALL uint32_t Load(size_t index) const
- {
+ SLANG_CUDA_CALL uint32_t Load(size_t index) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
- return data[index >> 2];
+ return data[index >> 2];
}
- SLANG_CUDA_CALL uint2 Load2(size_t index) const
- {
- SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
- const size_t dataIdx = index >> 2;
- return uint2{data[dataIdx], data[dataIdx + 1]};
+ SLANG_CUDA_CALL uint2 Load2(size_t index) const
+ {
+ SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
+ const size_t dataIdx = index >> 2;
+ return uint2{data[dataIdx], data[dataIdx + 1]};
}
- SLANG_CUDA_CALL uint3 Load3(size_t index) const
- {
+ SLANG_CUDA_CALL uint3 Load3(size_t index) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
- const size_t dataIdx = index >> 2;
- return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]};
+ const size_t dataIdx = index >> 2;
+ return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]};
}
- SLANG_CUDA_CALL uint4 Load4(size_t index) const
- {
+ SLANG_CUDA_CALL uint4 Load4(size_t index) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
- const size_t dataIdx = index >> 2;
- return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]};
+ const size_t dataIdx = index >> 2;
+ return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]};
}
template<typename T>
SLANG_CUDA_CALL T Load(size_t index) const
@@ -1270,38 +1998,38 @@ struct ByteAddressBuffer
return rs;
}
const uint32_t* data;
- size_t sizeInBytes; //< Must be multiple of 4
+ size_t sizeInBytes; //< Must be multiple of 4
};
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-rwbyteaddressbuffer
-// Missing support for Atomic operations
+// Missing support for Atomic operations
// Missing support for Load with status
struct RWByteAddressBuffer
{
SLANG_CUDA_CALL void GetDimensions(uint32_t* outDim) const { *outDim = uint32_t(sizeInBytes); }
-
- SLANG_CUDA_CALL uint32_t Load(size_t index) const
- {
+
+ SLANG_CUDA_CALL uint32_t Load(size_t index) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
- return data[index >> 2];
+ return data[index >> 2];
}
- SLANG_CUDA_CALL uint2 Load2(size_t index) const
- {
+ SLANG_CUDA_CALL uint2 Load2(size_t index) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
- const size_t dataIdx = index >> 2;
- return uint2{data[dataIdx], data[dataIdx + 1]};
+ const size_t dataIdx = index >> 2;
+ return uint2{data[dataIdx], data[dataIdx + 1]};
}
- SLANG_CUDA_CALL uint3 Load3(size_t index) const
- {
+ SLANG_CUDA_CALL uint3 Load3(size_t index) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
- const size_t dataIdx = index >> 2;
- return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]};
+ const size_t dataIdx = index >> 2;
+ return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]};
}
- SLANG_CUDA_CALL uint4 Load4(size_t index) const
- {
+ SLANG_CUDA_CALL uint4 Load4(size_t index) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
- const size_t dataIdx = index >> 2;
- return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]};
+ const size_t dataIdx = index >> 2;
+ return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]};
}
template<typename T>
SLANG_CUDA_CALL T Load(size_t index) const
@@ -1311,31 +2039,31 @@ struct RWByteAddressBuffer
memcpy(&data, ((const char*)this->data) + index, sizeof(T));
return data;
}
-
- SLANG_CUDA_CALL void Store(size_t index, uint32_t v) const
- {
+
+ SLANG_CUDA_CALL void Store(size_t index, uint32_t v) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
- data[index >> 2] = v;
+ data[index >> 2] = v;
}
- SLANG_CUDA_CALL void Store2(size_t index, uint2 v) const
- {
+ SLANG_CUDA_CALL void Store2(size_t index, uint2 v) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
- const size_t dataIdx = index >> 2;
+ const size_t dataIdx = index >> 2;
data[dataIdx + 0] = v.x;
data[dataIdx + 1] = v.y;
}
- SLANG_CUDA_CALL void Store3(size_t index, uint3 v) const
- {
+ SLANG_CUDA_CALL void Store3(size_t index, uint3 v) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
- const size_t dataIdx = index >> 2;
+ const size_t dataIdx = index >> 2;
data[dataIdx + 0] = v.x;
data[dataIdx + 1] = v.y;
data[dataIdx + 2] = v.z;
}
- SLANG_CUDA_CALL void Store4(size_t index, uint4 v) const
- {
+ SLANG_CUDA_CALL void Store4(size_t index, uint4 v) const
+ {
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
- const size_t dataIdx = index >> 2;
+ const size_t dataIdx = index >> 2;
data[dataIdx + 0] = v.x;
data[dataIdx + 1] = v.y;
data[dataIdx + 2] = v.z;
@@ -1347,9 +2075,9 @@ struct RWByteAddressBuffer
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
memcpy((char*)data + index, &value, sizeof(T));
}
-
- /// Can be used in the core module to gain access
- template <typename T>
+
+ /// Can be used in the core module to gain access
+ template<typename T>
SLANG_CUDA_CALL T* _getPtrAt(size_t index)
{
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
@@ -1364,69 +2092,71 @@ struct RWByteAddressBuffer
return rs;
}
uint32_t* data;
- size_t sizeInBytes; //< Must be multiple of 4
+ size_t sizeInBytes; //< Must be multiple of 4
};
// ---------------------- Wave --------------------------------------
-// TODO(JS): It appears that cuda does not have a simple way to get a lane index.
-//
-// Another approach could be...
-// laneId = ((threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x) & SLANG_CUDA_WARP_MASK
-// If that is really true another way to do this, would be for code generator to add this function
-// with the [numthreads] baked in.
-//
-// For now I'll just assume you have a launch that makes the following correct if the kernel uses WaveGetLaneIndex()
+// TODO(JS): It appears that cuda does not have a simple way to get a lane index.
+//
+// Another approach could be...
+// laneId = ((threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x) &
+// SLANG_CUDA_WARP_MASK If that is really true another way to do this, would be for code generator
+// to add this function with the [numthreads] baked in.
+//
+// For now I'll just assume you have a launch that makes the following correct if the kernel uses
+// WaveGetLaneIndex()
#ifndef SLANG_USE_ASM_LANE_ID
- __forceinline__ __device__ uint32_t _getLaneId()
+__forceinline__ __device__ uint32_t _getLaneId()
{
- // If the launch is (or I guess some multiple of the warp size)
- // we try this mechanism, which is apparently faster.
+ // If the launch is (or I guess some multiple of the warp size)
+ // we try this mechanism, which is apparently faster.
return threadIdx.x & SLANG_CUDA_WARP_MASK;
}
#else
__forceinline__ __device__ uint32_t _getLaneId()
{
// https://stackoverflow.com/questions/44337309/whats-the-most-efficient-way-to-calculate-the-warp-id-lane-id-in-a-1-d-grid#
- // This mechanism is not the fastest way to do it, and that is why the other mechanism
- // is the default. But the other mechanism relies on a launch that makes the assumption
+ // This mechanism is not the fastest way to do it, and that is why the other mechanism
+ // is the default. But the other mechanism relies on a launch that makes the assumption
// true.
- unsigned ret;
- asm volatile ("mov.u32 %0, %laneid;" : "=r"(ret));
+ unsigned ret;
+ asm volatile("mov.u32 %0, %laneid;" : "=r"(ret));
return ret;
}
#endif
typedef int WarpMask;
-// It appears that the __activemask() cannot always be used because
-// threads need to be converged.
-//
+// It appears that the __activemask() cannot always be used because
+// threads need to be converged.
+//
// For CUDA the article claims mask has to be used carefully
// https://devblogs.nvidia.com/using-cuda-warp-level-primitives/
-// With the Warp intrinsics there is no mask, and it's just the 'active lanes'.
+// With the Warp intrinsics there is no mask, and it's just the 'active lanes'.
// __activemask() though does not require there is convergence, so that doesn't work.
-//
-// '__ballot_sync' produces a convergance.
-//
+//
+// '__ballot_sync' produces a convergance.
+//
// From the CUDA docs:
-// ```For __all_sync, __any_sync, and __ballot_sync, a mask must be passed that specifies the threads
-// participating in the call. A bit, representing the thread's lane ID, must be set for each participating thread
-// to ensure they are properly converged before the intrinsic is executed by the hardware. All active threads named
-// in mask must execute the same intrinsic with the same mask, or the result is undefined.```
+// ```For __all_sync, __any_sync, and __ballot_sync, a mask must be passed that specifies the
+// threads participating in the call. A bit, representing the thread's lane ID, must be set for each
+// participating thread to ensure they are properly converged before the intrinsic is executed by
+// the hardware. All active threads named in mask must execute the same intrinsic with the same
+// mask, or the result is undefined.```
//
// Currently there isn't a mechanism to correctly get the mask without it being passed through.
-// Doing so will most likely require some changes to slang code generation to track masks, for now then we use
-// _getActiveMask.
+// Doing so will most likely require some changes to slang code generation to track masks, for now
+// then we use _getActiveMask.
// Return mask of all the lanes less than the current lane
__forceinline__ __device__ WarpMask _getLaneLtMask()
{
return (int(1) << _getLaneId()) - 1;
-}
+}
-// TODO(JS):
+// TODO(JS):
// THIS IS NOT CORRECT! That determining the appropriate active mask requires appropriate
// mask tracking.
__forceinline__ __device__ WarpMask _getActiveMask()
@@ -1478,30 +2208,30 @@ __inline__ __device__ int _waveCalcPow2Offset(WarpMask mask)
__inline__ __device__ bool _waveIsFirstLane()
{
const WarpMask mask = __activemask();
- // We special case bit 0, as that most warps are expected to be fully active.
-
+ // We special case bit 0, as that most warps are expected to be fully active.
+
// mask & -mask, isolates the lowest set bit.
- //return (mask & 1 ) || ((mask & -mask) == (1 << _getLaneId()));
-
- // This mechanism is most similar to what was in an nVidia post, so assume it is prefered.
- return (mask & 1 ) || ((__ffs(mask) - 1) == _getLaneId());
+ // return (mask & 1 ) || ((mask & -mask) == (1 << _getLaneId()));
+
+ // This mechanism is most similar to what was in an nVidia post, so assume it is prefered.
+ return (mask & 1) || ((__ffs(mask) - 1) == _getLaneId());
}
-template <typename T>
+template<typename T>
struct WaveOpOr
{
__inline__ __device__ static T getInitial(T a) { return 0; }
__inline__ __device__ static T doOp(T a, T b) { return a | b; }
};
-template <typename T>
+template<typename T>
struct WaveOpAnd
{
__inline__ __device__ static T getInitial(T a) { return ~T(0); }
__inline__ __device__ static T doOp(T a, T b) { return a & b; }
};
-template <typename T>
+template<typename T>
struct WaveOpXor
{
__inline__ __device__ static T getInitial(T a) { return 0; }
@@ -1509,7 +2239,7 @@ struct WaveOpXor
__inline__ __device__ static T doInverse(T a, T b) { return a ^ b; }
};
-template <typename T>
+template<typename T>
struct WaveOpAdd
{
__inline__ __device__ static T getInitial(T a) { return 0; }
@@ -1517,77 +2247,166 @@ struct WaveOpAdd
__inline__ __device__ static T doInverse(T a, T b) { return a - b; }
};
-template <typename T>
+template<typename T>
struct WaveOpMul
{
__inline__ __device__ static T getInitial(T a) { return T(1); }
__inline__ __device__ static T doOp(T a, T b) { return a * b; }
- // Using this inverse for int is probably undesirable - because in general it requires T to have more precision
- // There is also a performance aspect to it, where divides are generally significantly slower
+ // Using this inverse for int is probably undesirable - because in general it requires T to have
+ // more precision There is also a performance aspect to it, where divides are generally
+ // significantly slower
__inline__ __device__ static T doInverse(T a, T b) { return a / b; }
};
-template <typename T>
+template<typename T>
struct WaveOpMax
{
__inline__ __device__ static T getInitial(T a) { return a; }
__inline__ __device__ static T doOp(T a, T b) { return a > b ? a : b; }
};
-template <typename T>
+template<typename T>
struct WaveOpMin
{
- __inline__ __device__ static T getInitial(T a) { return a; }
+ __inline__ __device__ static T getInitial(T a) { return a; }
__inline__ __device__ static T doOp(T a, T b) { return a < b ? a : b; }
};
-template <typename T>
+template<typename T>
struct ElementTypeTrait;
// Scalar
-template <> struct ElementTypeTrait<int> { typedef int Type; };
-template <> struct ElementTypeTrait<uint> { typedef uint Type; };
-template <> struct ElementTypeTrait<float> { typedef float Type; };
-template <> struct ElementTypeTrait<double> { typedef double Type; };
-template <> struct ElementTypeTrait<uint64_t> { typedef uint64_t Type; };
-template <> struct ElementTypeTrait<int64_t> { typedef int64_t Type; };
+template<>
+struct ElementTypeTrait<int>
+{
+ typedef int Type;
+};
+template<>
+struct ElementTypeTrait<uint>
+{
+ typedef uint Type;
+};
+template<>
+struct ElementTypeTrait<float>
+{
+ typedef float Type;
+};
+template<>
+struct ElementTypeTrait<double>
+{
+ typedef double Type;
+};
+template<>
+struct ElementTypeTrait<uint64_t>
+{
+ typedef uint64_t Type;
+};
+template<>
+struct ElementTypeTrait<int64_t>
+{
+ typedef int64_t Type;
+};
// Vector
-template <> struct ElementTypeTrait<int1> { typedef int Type; };
-template <> struct ElementTypeTrait<int2> { typedef int Type; };
-template <> struct ElementTypeTrait<int3> { typedef int Type; };
-template <> struct ElementTypeTrait<int4> { typedef int Type; };
-
-template <> struct ElementTypeTrait<uint1> { typedef uint Type; };
-template <> struct ElementTypeTrait<uint2> { typedef uint Type; };
-template <> struct ElementTypeTrait<uint3> { typedef uint Type; };
-template <> struct ElementTypeTrait<uint4> { typedef uint Type; };
-
-template <> struct ElementTypeTrait<float1> { typedef float Type; };
-template <> struct ElementTypeTrait<float2> { typedef float Type; };
-template <> struct ElementTypeTrait<float3> { typedef float Type; };
-template <> struct ElementTypeTrait<float4> { typedef float Type; };
-
-template <> struct ElementTypeTrait<double1> { typedef double Type; };
-template <> struct ElementTypeTrait<double2> { typedef double Type; };
-template <> struct ElementTypeTrait<double3> { typedef double Type; };
-template <> struct ElementTypeTrait<double4> { typedef double Type; };
+template<>
+struct ElementTypeTrait<int1>
+{
+ typedef int Type;
+};
+template<>
+struct ElementTypeTrait<int2>
+{
+ typedef int Type;
+};
+template<>
+struct ElementTypeTrait<int3>
+{
+ typedef int Type;
+};
+template<>
+struct ElementTypeTrait<int4>
+{
+ typedef int Type;
+};
+
+template<>
+struct ElementTypeTrait<uint1>
+{
+ typedef uint Type;
+};
+template<>
+struct ElementTypeTrait<uint2>
+{
+ typedef uint Type;
+};
+template<>
+struct ElementTypeTrait<uint3>
+{
+ typedef uint Type;
+};
+template<>
+struct ElementTypeTrait<uint4>
+{
+ typedef uint Type;
+};
+
+template<>
+struct ElementTypeTrait<float1>
+{
+ typedef float Type;
+};
+template<>
+struct ElementTypeTrait<float2>
+{
+ typedef float Type;
+};
+template<>
+struct ElementTypeTrait<float3>
+{
+ typedef float Type;
+};
+template<>
+struct ElementTypeTrait<float4>
+{
+ typedef float Type;
+};
+
+template<>
+struct ElementTypeTrait<double1>
+{
+ typedef double Type;
+};
+template<>
+struct ElementTypeTrait<double2>
+{
+ typedef double Type;
+};
+template<>
+struct ElementTypeTrait<double3>
+{
+ typedef double Type;
+};
+template<>
+struct ElementTypeTrait<double4>
+{
+ typedef double Type;
+};
// Matrix
-template <typename T, int ROWS, int COLS>
-struct ElementTypeTrait<Matrix<T, ROWS, COLS> >
-{
- typedef T Type;
+template<typename T, int ROWS, int COLS>
+struct ElementTypeTrait<Matrix<T, ROWS, COLS>>
+{
+ typedef T Type;
};
-// Scalar
-template <typename INTF, typename T>
+// Scalar
+template<typename INTF, typename T>
__device__ T _waveReduceScalar(WarpMask mask, T val)
{
const int offsetSize = _waveCalcPow2Offset(mask);
if (offsetSize > 0)
{
- // Fast path O(log2(activeLanes))
+ // Fast path O(log2(activeLanes))
for (int offset = offsetSize >> 1; offset > 0; offset >>= 1)
{
val = INTF::doOp(val, __shfl_xor_sync(mask, val, offset));
@@ -1600,9 +2419,9 @@ __device__ T _waveReduceScalar(WarpMask mask, T val)
while (remaining)
{
const int laneBit = remaining & -remaining;
- // Get the sourceLane
+ // Get the sourceLane
const int srcLane = __ffs(laneBit) - 1;
- // Broadcast (can also broadcast to self)
+ // Broadcast (can also broadcast to self)
result = INTF::doOp(result, __shfl_sync(mask, val, srcLane));
remaining &= ~laneBit;
}
@@ -1613,13 +2432,13 @@ __device__ T _waveReduceScalar(WarpMask mask, T val)
// Multiple values
-template <typename INTF, typename T, size_t COUNT>
+template<typename INTF, typename T, size_t COUNT>
__device__ void _waveReduceMultiple(WarpMask mask, T* val)
{
const int offsetSize = _waveCalcPow2Offset(mask);
if (offsetSize > 0)
{
- // Fast path O(log2(activeLanes))
+ // Fast path O(log2(activeLanes))
for (int offset = offsetSize >> 1; offset > 0; offset >>= 1)
{
for (size_t i = 0; i < COUNT; ++i)
@@ -1638,14 +2457,14 @@ __device__ void _waveReduceMultiple(WarpMask mask, T* val)
originalVal[i] = v;
val[i] = INTF::getInitial(v);
}
-
+
int remaining = mask;
while (remaining)
{
const int laneBit = remaining & -remaining;
- // Get the sourceLane
+ // Get the sourceLane
const int srcLane = __ffs(laneBit) - 1;
- // Broadcast (can also broadcast to self)
+ // Broadcast (can also broadcast to self)
for (size_t i = 0; i < COUNT; ++i)
{
val[i] = INTF::doOp(val[i], __shfl_sync(mask, originalVal[i], srcLane));
@@ -1655,99 +2474,182 @@ __device__ void _waveReduceMultiple(WarpMask mask, T* val)
}
}
-template <typename INTF, typename T>
+template<typename INTF, typename T>
__device__ void _waveReduceMultiple(WarpMask mask, T* val)
{
- typedef typename ElementTypeTrait<T>::Type ElemType;
+ typedef typename ElementTypeTrait<T>::Type ElemType;
_waveReduceMultiple<INTF, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)val);
}
-template <typename T>
-__inline__ __device__ T _waveOr(WarpMask mask, T val) { return _waveReduceScalar<WaveOpOr<T>, T>(mask, val); }
+template<typename T>
+__inline__ __device__ T _waveOr(WarpMask mask, T val)
+{
+ return _waveReduceScalar<WaveOpOr<T>, T>(mask, val);
+}
-template <typename T>
-__inline__ __device__ T _waveAnd(WarpMask mask, T val) { return _waveReduceScalar<WaveOpAnd<T>, T>(mask, val); }
+template<typename T>
+__inline__ __device__ T _waveAnd(WarpMask mask, T val)
+{
+ return _waveReduceScalar<WaveOpAnd<T>, T>(mask, val);
+}
-template <typename T>
-__inline__ __device__ T _waveXor(WarpMask mask, T val) { return _waveReduceScalar<WaveOpXor<T>, T>(mask, val); }
+template<typename T>
+__inline__ __device__ T _waveXor(WarpMask mask, T val)
+{
+ return _waveReduceScalar<WaveOpXor<T>, T>(mask, val);
+}
-template <typename T>
-__inline__ __device__ T _waveProduct(WarpMask mask, T val) { return _waveReduceScalar<WaveOpMul<T>, T>(mask, val); }
+template<typename T>
+__inline__ __device__ T _waveProduct(WarpMask mask, T val)
+{
+ return _waveReduceScalar<WaveOpMul<T>, T>(mask, val);
+}
-template <typename T>
-__inline__ __device__ T _waveSum(WarpMask mask, T val) { return _waveReduceScalar<WaveOpAdd<T>, T>(mask, val); }
+template<typename T>
+__inline__ __device__ T _waveSum(WarpMask mask, T val)
+{
+ return _waveReduceScalar<WaveOpAdd<T>, T>(mask, val);
+}
-template <typename T>
-__inline__ __device__ T _waveMin(WarpMask mask, T val) { return _waveReduceScalar<WaveOpMin<T>, T>(mask, val); }
+template<typename T>
+__inline__ __device__ T _waveMin(WarpMask mask, T val)
+{
+ return _waveReduceScalar<WaveOpMin<T>, T>(mask, val);
+}
-template <typename T>
-__inline__ __device__ T _waveMax(WarpMask mask, T val) { return _waveReduceScalar<WaveOpMax<T>, T>(mask, val); }
+template<typename T>
+__inline__ __device__ T _waveMax(WarpMask mask, T val)
+{
+ return _waveReduceScalar<WaveOpMax<T>, T>(mask, val);
+}
// Fast-path specializations when CUDA warp reduce operators are available
#if __CUDA_ARCH__ >= 800 // 8.x or higher
template<>
-__inline__ __device__ unsigned _waveOr<unsigned>(WarpMask mask, unsigned val) { return __reduce_or_sync(mask, val); }
+__inline__ __device__ unsigned _waveOr<unsigned>(WarpMask mask, unsigned val)
+{
+ return __reduce_or_sync(mask, val);
+}
template<>
-__inline__ __device__ unsigned _waveAnd<unsigned>(WarpMask mask, unsigned val) { return __reduce_and_sync(mask, val); }
+__inline__ __device__ unsigned _waveAnd<unsigned>(WarpMask mask, unsigned val)
+{
+ return __reduce_and_sync(mask, val);
+}
template<>
-__inline__ __device__ unsigned _waveXor<unsigned>(WarpMask mask, unsigned val) { return __reduce_xor_sync(mask, val); }
+__inline__ __device__ unsigned _waveXor<unsigned>(WarpMask mask, unsigned val)
+{
+ return __reduce_xor_sync(mask, val);
+}
template<>
-__inline__ __device__ unsigned _waveSum<unsigned>(WarpMask mask, unsigned val) { return __reduce_add_sync(mask, val); }
+__inline__ __device__ unsigned _waveSum<unsigned>(WarpMask mask, unsigned val)
+{
+ return __reduce_add_sync(mask, val);
+}
template<>
-__inline__ __device__ int _waveSum<int>(WarpMask mask, int val) { return __reduce_add_sync(mask, val); }
+__inline__ __device__ int _waveSum<int>(WarpMask mask, int val)
+{
+ return __reduce_add_sync(mask, val);
+}
template<>
-__inline__ __device__ unsigned _waveMin<unsigned>(WarpMask mask, unsigned val) { return __reduce_min_sync(mask, val); }
+__inline__ __device__ unsigned _waveMin<unsigned>(WarpMask mask, unsigned val)
+{
+ return __reduce_min_sync(mask, val);
+}
template<>
-__inline__ __device__ int _waveMin<int>(WarpMask mask, int val) { return __reduce_min_sync(mask, val); }
+__inline__ __device__ int _waveMin<int>(WarpMask mask, int val)
+{
+ return __reduce_min_sync(mask, val);
+}
template<>
-__inline__ __device__ unsigned _waveMax<unsigned>(WarpMask mask, unsigned val) { return __reduce_max_sync(mask, val); }
+__inline__ __device__ unsigned _waveMax<unsigned>(WarpMask mask, unsigned val)
+{
+ return __reduce_max_sync(mask, val);
+}
template<>
-__inline__ __device__ int _waveMax<int>(WarpMask mask, int val) { return __reduce_max_sync(mask, val); }
+__inline__ __device__ int _waveMax<int>(WarpMask mask, int val)
+{
+ return __reduce_max_sync(mask, val);
+}
#endif
// Multiple
-template <typename T>
-__inline__ __device__ T _waveOrMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait<T>::Type ElemType; _waveReduceMultiple<WaveOpOr<ElemType> >(mask, &val); return val; }
+template<typename T>
+__inline__ __device__ T _waveOrMultiple(WarpMask mask, T val)
+{
+ typedef typename ElementTypeTrait<T>::Type ElemType;
+ _waveReduceMultiple<WaveOpOr<ElemType>>(mask, &val);
+ return val;
+}
-template <typename T>
-__inline__ __device__ T _waveAndMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait<T>::Type ElemType; _waveReduceMultiple<WaveOpAnd<ElemType> >(mask, &val); return val; }
+template<typename T>
+__inline__ __device__ T _waveAndMultiple(WarpMask mask, T val)
+{
+ typedef typename ElementTypeTrait<T>::Type ElemType;
+ _waveReduceMultiple<WaveOpAnd<ElemType>>(mask, &val);
+ return val;
+}
-template <typename T>
-__inline__ __device__ T _waveXorMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait<T>::Type ElemType; _waveReduceMultiple<WaveOpXor<ElemType> >(mask, &val); return val; }
+template<typename T>
+__inline__ __device__ T _waveXorMultiple(WarpMask mask, T val)
+{
+ typedef typename ElementTypeTrait<T>::Type ElemType;
+ _waveReduceMultiple<WaveOpXor<ElemType>>(mask, &val);
+ return val;
+}
-template <typename T>
-__inline__ __device__ T _waveProductMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait<T>::Type ElemType; _waveReduceMultiple<WaveOpMul<ElemType> >(mask, &val); return val; }
+template<typename T>
+__inline__ __device__ T _waveProductMultiple(WarpMask mask, T val)
+{
+ typedef typename ElementTypeTrait<T>::Type ElemType;
+ _waveReduceMultiple<WaveOpMul<ElemType>>(mask, &val);
+ return val;
+}
-template <typename T>
-__inline__ __device__ T _waveSumMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait<T>::Type ElemType; _waveReduceMultiple<WaveOpAdd<ElemType> >(mask, &val); return val; }
+template<typename T>
+__inline__ __device__ T _waveSumMultiple(WarpMask mask, T val)
+{
+ typedef typename ElementTypeTrait<T>::Type ElemType;
+ _waveReduceMultiple<WaveOpAdd<ElemType>>(mask, &val);
+ return val;
+}
-template <typename T>
-__inline__ __device__ T _waveMinMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait<T>::Type ElemType; _waveReduceMultiple<WaveOpMin<ElemType> >(mask, &val); return val; }
+template<typename T>
+__inline__ __device__ T _waveMinMultiple(WarpMask mask, T val)
+{
+ typedef typename ElementTypeTrait<T>::Type ElemType;
+ _waveReduceMultiple<WaveOpMin<ElemType>>(mask, &val);
+ return val;
+}
-template <typename T>
-__inline__ __device__ T _waveMaxMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait<T>::Type ElemType; _waveReduceMultiple<WaveOpMax<ElemType> >(mask, &val); return val; }
+template<typename T>
+__inline__ __device__ T _waveMaxMultiple(WarpMask mask, T val)
+{
+ typedef typename ElementTypeTrait<T>::Type ElemType;
+ _waveReduceMultiple<WaveOpMax<ElemType>>(mask, &val);
+ return val;
+}
-template <typename T>
-__inline__ __device__ bool _waveAllEqual(WarpMask mask, T val)
+template<typename T>
+__inline__ __device__ bool _waveAllEqual(WarpMask mask, T val)
{
int pred;
__match_all_sync(mask, val, &pred);
return pred != 0;
}
-template <typename T>
-__inline__ __device__ bool _waveAllEqualMultiple(WarpMask mask, T inVal)
+template<typename T>
+__inline__ __device__ bool _waveAllEqualMultiple(WarpMask mask, T inVal)
{
typedef typename ElementTypeTrait<T>::Type ElemType;
const size_t count = sizeof(T) / sizeof(ElemType);
@@ -1764,15 +2666,15 @@ __inline__ __device__ bool _waveAllEqualMultiple(WarpMask mask, T inVal)
return true;
}
-template <typename T>
-__inline__ __device__ T _waveReadFirst(WarpMask mask, T val)
+template<typename T>
+__inline__ __device__ T _waveReadFirst(WarpMask mask, T val)
{
const int lowestLaneId = __ffs(mask) - 1;
- return __shfl_sync(mask, val, lowestLaneId);
+ return __shfl_sync(mask, val, lowestLaneId);
}
-template <typename T>
-__inline__ __device__ T _waveReadFirstMultiple(WarpMask mask, T inVal)
+template<typename T>
+__inline__ __device__ T _waveReadFirstMultiple(WarpMask mask, T inVal)
{
typedef typename ElementTypeTrait<T>::Type ElemType;
const size_t count = sizeof(T) / sizeof(ElemType);
@@ -1782,12 +2684,12 @@ __inline__ __device__ T _waveReadFirstMultiple(WarpMask mask, T inVal)
const int lowestLaneId = __ffs(mask) - 1;
for (size_t i = 0; i < count; ++i)
{
- dst[i] = __shfl_sync(mask, src[i], lowestLaneId);
+ dst[i] = __shfl_sync(mask, src[i], lowestLaneId);
}
return outVal;
}
-template <typename T>
+template<typename T>
__inline__ __device__ T _waveShuffleMultiple(WarpMask mask, T inVal, int lane)
{
typedef typename ElementTypeTrait<T>::Type ElemType;
@@ -1797,27 +2699,27 @@ __inline__ __device__ T _waveShuffleMultiple(WarpMask mask, T inVal, int lane)
ElemType* dst = (ElemType*)&outVal;
for (size_t i = 0; i < count; ++i)
{
- dst[i] = __shfl_sync(mask, src[i], lane);
+ dst[i] = __shfl_sync(mask, src[i], lane);
}
return outVal;
}
-// Scalar
+// Scalar
-// Invertable means that when we get to the end of the reduce, we can remove val (to make exclusive), using
-// the inverse of the op.
-template <typename INTF, typename T>
+// Invertable means that when we get to the end of the reduce, we can remove val (to make
+// exclusive), using the inverse of the op.
+template<typename INTF, typename T>
__device__ T _wavePrefixInvertableScalar(WarpMask mask, T val)
{
const int offsetSize = _waveCalcPow2Offset(mask);
-
+
const int laneId = _getLaneId();
T result;
if (offsetSize > 0)
- {
+ {
// Sum is calculated inclusive of this lanes value
result = val;
- for (int i = 1; i < offsetSize; i += i)
+ for (int i = 1; i < offsetSize; i += i)
{
const T readVal = __shfl_up_sync(mask, result, i, offsetSize);
if (laneId >= i)
@@ -1828,7 +2730,7 @@ __device__ T _wavePrefixInvertableScalar(WarpMask mask, T val)
// Remove val from the result, by applyin inverse
result = INTF::doInverse(result, val);
}
- else
+ else
{
result = INTF::getInitial(val);
if (!_waveIsSingleLane(mask))
@@ -1837,9 +2739,9 @@ __device__ T _wavePrefixInvertableScalar(WarpMask mask, T val)
while (remaining)
{
const int laneBit = remaining & -remaining;
- // Get the sourceLane
+ // Get the sourceLane
const int srcLane = __ffs(laneBit) - 1;
- // Broadcast (can also broadcast to self)
+ // Broadcast (can also broadcast to self)
const T readValue = __shfl_sync(mask, val, srcLane);
// Only accumulate if srcLane is less than this lane
if (srcLane < laneId)
@@ -1848,27 +2750,28 @@ __device__ T _wavePrefixInvertableScalar(WarpMask mask, T val)
}
remaining &= ~laneBit;
}
- }
+ }
}
return result;
}
-
+
// This implementation separately tracks the value to be propogated, and the value
-// that is the final result
-template <typename INTF, typename T>
+// that is the final result
+template<typename INTF, typename T>
__device__ T _wavePrefixScalar(WarpMask mask, T val)
{
const int offsetSize = _waveCalcPow2Offset(mask);
-
+
const int laneId = _getLaneId();
- T result = INTF::getInitial(val);
+ T result = INTF::getInitial(val);
if (offsetSize > 0)
- {
+ {
// For transmitted value we will do it inclusively with this lanes value
- // For the result we do not include the lanes value. This means an extra multiply for each iteration
- // but means we don't need to have a divide at the end and also removes overflow issues in that scenario.
- for (int i = 1; i < offsetSize; i += i)
+ // For the result we do not include the lanes value. This means an extra multiply for each
+ // iteration but means we don't need to have a divide at the end and also removes overflow
+ // issues in that scenario.
+ for (int i = 1; i < offsetSize; i += i)
{
const T readVal = __shfl_up_sync(mask, val, i, offsetSize);
if (laneId >= i)
@@ -1878,7 +2781,7 @@ __device__ T _wavePrefixScalar(WarpMask mask, T val)
}
}
}
- else
+ else
{
if (!_waveIsSingleLane(mask))
{
@@ -1886,9 +2789,9 @@ __device__ T _wavePrefixScalar(WarpMask mask, T val)
while (remaining)
{
const int laneBit = remaining & -remaining;
- // Get the sourceLane
+ // Get the sourceLane
const int srcLane = __ffs(laneBit) - 1;
- // Broadcast (can also broadcast to self)
+ // Broadcast (can also broadcast to self)
const T readValue = __shfl_sync(mask, val, srcLane);
// Only accumulate if srcLane is less than this lane
if (srcLane < laneId)
@@ -1903,51 +2806,51 @@ __device__ T _wavePrefixScalar(WarpMask mask, T val)
}
-template <typename INTF, typename T, size_t COUNT>
+template<typename INTF, typename T, size_t COUNT>
__device__ T _waveOpCopy(T* dst, const T* src)
{
for (size_t j = 0; j < COUNT; ++j)
{
dst[j] = src[j];
}
-}
+}
-template <typename INTF, typename T, size_t COUNT>
+template<typename INTF, typename T, size_t COUNT>
__device__ T _waveOpDoInverse(T* inOut, const T* val)
{
for (size_t j = 0; j < COUNT; ++j)
{
inOut[j] = INTF::doInverse(inOut[j], val[j]);
}
-}
+}
-template <typename INTF, typename T, size_t COUNT>
+template<typename INTF, typename T, size_t COUNT>
__device__ T _waveOpSetInitial(T* out, const T* val)
{
for (size_t j = 0; j < COUNT; ++j)
{
out[j] = INTF::getInitial(val[j]);
}
-}
+}
-template <typename INTF, typename T, size_t COUNT>
+template<typename INTF, typename T, size_t COUNT>
__device__ T _wavePrefixInvertableMultiple(WarpMask mask, T* val)
{
const int offsetSize = _waveCalcPow2Offset(mask);
-
+
const int laneId = _getLaneId();
T originalVal[COUNT];
_waveOpCopy<INTF, T, COUNT>(originalVal, val);
-
+
if (offsetSize > 0)
- {
+ {
// Sum is calculated inclusive of this lanes value
- for (int i = 1; i < offsetSize; i += i)
+ for (int i = 1; i < offsetSize; i += i)
{
// TODO(JS): Note that here I don't split the laneId outside so it's only tested once.
- // This may be better but it would also mean that there would be shfl between lanes
- // that are on different (albeit identical) instructions. So this seems more likely to
+ // This may be better but it would also mean that there would be shfl between lanes
+ // that are on different (albeit identical) instructions. So this seems more likely to
// work as expected with everything in lock step.
for (size_t j = 0; j < COUNT; ++j)
{
@@ -1961,7 +2864,7 @@ __device__ T _wavePrefixInvertableMultiple(WarpMask mask, T* val)
// Remove originalVal from the result, by applyin inverse
_waveOpDoInverse<INTF, T, COUNT>(val, originalVal);
}
- else
+ else
{
_waveOpSetInitial<INTF, T, COUNT>(val, val);
if (!_waveIsSingleLane(mask))
@@ -1970,12 +2873,12 @@ __device__ T _wavePrefixInvertableMultiple(WarpMask mask, T* val)
while (remaining)
{
const int laneBit = remaining & -remaining;
- // Get the sourceLane
+ // Get the sourceLane
const int srcLane = __ffs(laneBit) - 1;
-
+
for (size_t j = 0; j < COUNT; ++j)
{
- // Broadcast (can also broadcast to self)
+ // Broadcast (can also broadcast to self)
const T readValue = __shfl_sync(mask, originalVal[j], srcLane);
// Only accumulate if srcLane is less than this lane
if (srcLane < laneId)
@@ -1985,27 +2888,28 @@ __device__ T _wavePrefixInvertableMultiple(WarpMask mask, T* val)
remaining &= ~laneBit;
}
}
- }
+ }
}
}
-
-template <typename INTF, typename T, size_t COUNT>
+
+template<typename INTF, typename T, size_t COUNT>
__device__ T _wavePrefixMultiple(WarpMask mask, T* val)
{
const int offsetSize = _waveCalcPow2Offset(mask);
-
+
const int laneId = _getLaneId();
-
+
T work[COUNT];
_waveOpCopy<INTF, T, COUNT>(work, val);
_waveOpSetInitial<INTF, T, COUNT>(val, val);
-
+
if (offsetSize > 0)
- {
+ {
// For transmitted value we will do it inclusively with this lanes value
- // For the result we do not include the lanes value. This means an extra op for each iteration
- // but means we don't need to have a divide at the end and also removes overflow issues in that scenario.
- for (int i = 1; i < offsetSize; i += i)
+ // For the result we do not include the lanes value. This means an extra op for each
+ // iteration but means we don't need to have a divide at the end and also removes overflow
+ // issues in that scenario.
+ for (int i = 1; i < offsetSize; i += i)
{
for (size_t j = 0; j < COUNT; ++j)
{
@@ -2013,12 +2917,12 @@ __device__ T _wavePrefixMultiple(WarpMask mask, T* val)
if (laneId >= i)
{
work[j] = INTF::doOp(work[j], readVal);
- val[j] = INTF::doOp(val[j], readVal);
+ val[j] = INTF::doOp(val[j], readVal);
}
}
}
}
- else
+ else
{
if (!_waveIsSingleLane(mask))
{
@@ -2026,12 +2930,12 @@ __device__ T _wavePrefixMultiple(WarpMask mask, T* val)
while (remaining)
{
const int laneBit = remaining & -remaining;
- // Get the sourceLane
+ // Get the sourceLane
const int srcLane = __ffs(laneBit) - 1;
-
+
for (size_t j = 0; j < COUNT; ++j)
{
- // Broadcast (can also broadcast to self)
+ // Broadcast (can also broadcast to self)
const T readValue = __shfl_sync(mask, work[j], srcLane);
// Only accumulate if srcLane is less than this lane
if (srcLane < laneId)
@@ -2045,71 +2949,96 @@ __device__ T _wavePrefixMultiple(WarpMask mask, T* val)
}
}
-template <typename T>
-__inline__ __device__ T _wavePrefixProduct(WarpMask mask, T val) { return _wavePrefixScalar<WaveOpMul<T>, T>(mask, val); }
-
-template <typename T>
-__inline__ __device__ T _wavePrefixSum(WarpMask mask, T val) { return _wavePrefixInvertableScalar<WaveOpAdd<T>, T>(mask, val); }
-
-template <typename T>
-__inline__ __device__ T _wavePrefixXor(WarpMask mask, T val) { return _wavePrefixInvertableScalar<WaveOpXor<T>, T>(mask, val); }
-
-template <typename T>
-__inline__ __device__ T _wavePrefixOr(WarpMask mask, T val) { return _wavePrefixScalar<WaveOpOr<T>, T>(mask, val); }
-
-template <typename T>
-__inline__ __device__ T _wavePrefixAnd(WarpMask mask, T val) { return _wavePrefixScalar<WaveOpAnd<T>, T>(mask, val); }
-
-
-template <typename T>
-__inline__ __device__ T _wavePrefixProductMultiple(WarpMask mask, T val)
-{
- typedef typename ElementTypeTrait<T>::Type ElemType;
- _wavePrefixInvertableMultiple<WaveOpMul<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)&val);
+template<typename T>
+__inline__ __device__ T _wavePrefixProduct(WarpMask mask, T val)
+{
+ return _wavePrefixScalar<WaveOpMul<T>, T>(mask, val);
+}
+
+template<typename T>
+__inline__ __device__ T _wavePrefixSum(WarpMask mask, T val)
+{
+ return _wavePrefixInvertableScalar<WaveOpAdd<T>, T>(mask, val);
+}
+
+template<typename T>
+__inline__ __device__ T _wavePrefixXor(WarpMask mask, T val)
+{
+ return _wavePrefixInvertableScalar<WaveOpXor<T>, T>(mask, val);
+}
+
+template<typename T>
+__inline__ __device__ T _wavePrefixOr(WarpMask mask, T val)
+{
+ return _wavePrefixScalar<WaveOpOr<T>, T>(mask, val);
+}
+
+template<typename T>
+__inline__ __device__ T _wavePrefixAnd(WarpMask mask, T val)
+{
+ return _wavePrefixScalar<WaveOpAnd<T>, T>(mask, val);
+}
+
+
+template<typename T>
+__inline__ __device__ T _wavePrefixProductMultiple(WarpMask mask, T val)
+{
+ typedef typename ElementTypeTrait<T>::Type ElemType;
+ _wavePrefixInvertableMultiple<WaveOpMul<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(
+ mask,
+ (ElemType*)&val);
return val;
}
-template <typename T>
-__inline__ __device__ T _wavePrefixSumMultiple(WarpMask mask, T val)
-{
- typedef typename ElementTypeTrait<T>::Type ElemType;
- _wavePrefixInvertableMultiple<WaveOpAdd<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)&val);
+template<typename T>
+__inline__ __device__ T _wavePrefixSumMultiple(WarpMask mask, T val)
+{
+ typedef typename ElementTypeTrait<T>::Type ElemType;
+ _wavePrefixInvertableMultiple<WaveOpAdd<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(
+ mask,
+ (ElemType*)&val);
return val;
}
-template <typename T>
-__inline__ __device__ T _wavePrefixXorMultiple(WarpMask mask, T val)
-{
- typedef typename ElementTypeTrait<T>::Type ElemType;
- _wavePrefixInvertableMultiple<WaveOpXor<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)&val);
+template<typename T>
+__inline__ __device__ T _wavePrefixXorMultiple(WarpMask mask, T val)
+{
+ typedef typename ElementTypeTrait<T>::Type ElemType;
+ _wavePrefixInvertableMultiple<WaveOpXor<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(
+ mask,
+ (ElemType*)&val);
return val;
}
-template <typename T>
-__inline__ __device__ T _wavePrefixOrMultiple(WarpMask mask, T val)
-{
- typedef typename ElementTypeTrait<T>::Type ElemType;
- _wavePrefixMultiple<WaveOpOr<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)&val);
+template<typename T>
+__inline__ __device__ T _wavePrefixOrMultiple(WarpMask mask, T val)
+{
+ typedef typename ElementTypeTrait<T>::Type ElemType;
+ _wavePrefixMultiple<WaveOpOr<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(
+ mask,
+ (ElemType*)&val);
return val;
}
-template <typename T>
-__inline__ __device__ T _wavePrefixAndMultiple(WarpMask mask, T val)
-{
- typedef typename ElementTypeTrait<T>::Type ElemType;
- _wavePrefixMultiple<WaveOpAnd<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)&val);
+template<typename T>
+__inline__ __device__ T _wavePrefixAndMultiple(WarpMask mask, T val)
+{
+ typedef typename ElementTypeTrait<T>::Type ElemType;
+ _wavePrefixMultiple<WaveOpAnd<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(
+ mask,
+ (ElemType*)&val);
return val;
}
-template <typename T>
-__inline__ __device__ uint4 _waveMatchScalar(WarpMask mask, T val)
+template<typename T>
+__inline__ __device__ uint4 _waveMatchScalar(WarpMask mask, T val)
{
int pred;
return make_uint4(__match_all_sync(mask, val, &pred), 0, 0, 0);
}
-template <typename T>
-__inline__ __device__ uint4 _waveMatchMultiple(WarpMask mask, const T& inVal)
+template<typename T>
+__inline__ __device__ uint4 _waveMatchMultiple(WarpMask mask, const T& inVal)
{
typedef typename ElementTypeTrait<T>::Type ElemType;
const size_t count = sizeof(T) / sizeof(ElemType);
@@ -2123,7 +3052,7 @@ __inline__ __device__ uint4 _waveMatchMultiple(WarpMask mask, const T& inVal)
return make_uint4(matchBits, 0, 0, 0);
}
-__device__ uint getAt(dim3 a, int b)
+__device__ uint getAt(dim3 a, int b)
{
SLANG_PRELUDE_ASSERT(b >= 0 && b < 3);
return (&a.x)[b];
@@ -2146,8 +3075,9 @@ __inline__ __device__ TResult slang_bit_cast(TInput val)
/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
-/* Type that defines the uniform entry point params. The actual content of this type is dependent on the entry point parameters, and can be
-found via reflection or defined such that it matches the shader appropriately.
+/* Type that defines the uniform entry point params. The actual content of this type is dependent on
+the entry point parameters, and can be found via reflection or defined such that it matches the
+shader appropriately.
*/
struct UniformEntryPointParams;
struct UniformState;
@@ -2157,28 +3087,29 @@ struct UniformState;
struct RayDesc
{
float3 Origin;
- float TMin;
+ float TMin;
float3 Direction;
- float TMax;
+ float TMax;
};
-static __forceinline__ __device__
-void *unpackOptiXRayPayloadPointer(uint32_t i0, uint32_t i1)
+static __forceinline__ __device__ void* unpackOptiXRayPayloadPointer(uint32_t i0, uint32_t i1)
{
const uint64_t uptr = static_cast<uint64_t>(i0) << 32 | i1;
- void* ptr = reinterpret_cast<void*>(uptr);
+ void* ptr = reinterpret_cast<void*>(uptr);
return ptr;
}
-static __forceinline__ __device__
-void packOptiXRayPayloadPointer(void* ptr, uint32_t& i0, uint32_t& i1)
+static __forceinline__ __device__ void packOptiXRayPayloadPointer(
+ void* ptr,
+ uint32_t& i0,
+ uint32_t& i1)
{
const uint64_t uptr = reinterpret_cast<uint64_t>(ptr);
i0 = uptr >> 32;
i1 = uptr & 0x00000000ffffffff;
}
-static __forceinline__ __device__ void *getOptiXRayPayloadPtr()
+static __forceinline__ __device__ void* getOptiXRayPayloadPtr()
{
const uint32_t u0 = optixGetPayload_0();
const uint32_t u1 = optixGetPayload_1();
@@ -2186,7 +3117,7 @@ static __forceinline__ __device__ void *getOptiXRayPayloadPtr()
}
template<typename T>
-__forceinline__ __device__ void *traceOptiXRay(
+__forceinline__ __device__ void* traceOptiXRay(
OptixTraversableHandle AccelerationStructure,
uint32_t RayFlags,
uint32_t InstanceInclusionMask,
@@ -2194,8 +3125,8 @@ __forceinline__ __device__ void *traceOptiXRay(
uint32_t MultiplierForGeometryContributionToHitGroupIndex,
uint32_t MissShaderIndex,
RayDesc Ray,
- T *Payload
-) {
+ T* Payload)
+{
uint32_t r0, r1;
packOptiXRayPayloadPointer((void*)Payload, r0, r1);
optixTrace(
@@ -2210,8 +3141,8 @@ __forceinline__ __device__ void *traceOptiXRay(
RayContributionToHitGroupIndex,
MultiplierForGeometryContributionToHitGroupIndex,
MissShaderIndex,
- r0, r1
- );
+ r0,
+ r1);
}
#endif
@@ -2256,7 +3187,8 @@ struct TensorView
template<typename T>
__device__ T* data_ptr_at(uint4 index)
{
- uint64_t offset = strides[0] * index.x + strides[1] * index.y + strides[2] * index.z + strides[3] * index.w;
+ uint64_t offset = strides[0] * index.x + strides[1] * index.y + strides[2] * index.z +
+ strides[3] * index.w;
return reinterpret_cast<T*>(data + offset);
}
@@ -2294,22 +3226,28 @@ struct TensorView
template<typename T>
__device__ T& load(uint3 index)
{
- return *reinterpret_cast<T*>(data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z);
+ return *reinterpret_cast<T*>(
+ data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z);
}
template<typename T>
__device__ T& load(uint32_t x, uint32_t y, uint32_t z, uint32_t w)
{
- return *reinterpret_cast<T*>(data + strides[0] * x + strides[1] * y + strides[2] * z + strides[3] * w);
+ return *reinterpret_cast<T*>(
+ data + strides[0] * x + strides[1] * y + strides[2] * z + strides[3] * w);
}
template<typename T>
__device__ T& load(uint4 index)
{
- return *reinterpret_cast<T*>(data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z + strides[3] * index.w);
+ return *reinterpret_cast<T*>(
+ data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z +
+ strides[3] * index.w);
}
template<typename T>
__device__ T& load(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4)
{
- return *reinterpret_cast<T*>(data + strides[0] * i0 + strides[1] * i1 + strides[2] * i2 + strides[3] * i3 + strides[4] * i4);
+ return *reinterpret_cast<T*>(
+ data + strides[0] * i0 + strides[1] * i1 + strides[2] * i2 + strides[3] * i3 +
+ strides[4] * i4);
}
// Generic version of load
@@ -2347,7 +3285,8 @@ struct TensorView
template<typename T>
__device__ void store(uint3 index, T val)
{
- *reinterpret_cast<T*>(data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z) = val;
+ *reinterpret_cast<T*>(
+ data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z) = val;
}
template<typename T>
__device__ void store(uint32_t x, uint32_t y, uint32_t z, uint32_t w, T val)
@@ -2358,12 +3297,16 @@ struct TensorView
template<typename T>
__device__ void store(uint4 index, T val)
{
- *reinterpret_cast<T*>(data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z + strides[3] * index.w) = val;
+ *reinterpret_cast<T*>(
+ data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z +
+ strides[3] * index.w) = val;
}
template<typename T>
__device__ void store(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4, T val)
{
- *reinterpret_cast<T*>(data + strides[0] * i0 + strides[1] * i1 + strides[2] * i2 + strides[3] * i3 + strides[4] * i4) = val;
+ *reinterpret_cast<T*>(
+ data + strides[0] * i0 + strides[1] * i1 + strides[2] * i2 + strides[3] * i3 +
+ strides[4] * i4) = val;
}
// Generic version
diff --git a/prelude/slang-hlsl-prelude.h b/prelude/slang-hlsl-prelude.h
index d892f228c..8e77201f9 100644
--- a/prelude/slang-hlsl-prelude.h
+++ b/prelude/slang-hlsl-prelude.h
@@ -3,6 +3,6 @@
#endif
#ifndef __DXC_VERSION_MAJOR
- // warning X3557: loop doesn't seem to do anything, forcing loop to unroll
- #pragma warning(disable: 3557)
+// warning X3557: loop doesn't seem to do anything, forcing loop to unroll
+#pragma warning(disable : 3557)
#endif
diff --git a/prelude/slang-llvm.h b/prelude/slang-llvm.h
index b41380581..e0bbbd14a 100644
--- a/prelude/slang-llvm.h
+++ b/prelude/slang-llvm.h
@@ -1,46 +1,54 @@
#ifndef SLANG_LLVM_H
#define SLANG_LLVM_H
-// TODO(JS):
+// TODO(JS):
// Disable exception declspecs, as not supported on LLVM without some extra options.
// We could enable with `-fms-extensions`
#define SLANG_DISABLE_EXCEPTIONS 1
#ifndef SLANG_PRELUDE_ASSERT
-# ifdef SLANG_PRELUDE_ENABLE_ASSERT
+#ifdef SLANG_PRELUDE_ENABLE_ASSERT
extern "C" void assertFailure(const char* msg);
-# define SLANG_PRELUDE_EXPECT(VALUE, MSG) if(VALUE) {} else assertFailure("assertion failed: '" MSG "'")
-# define SLANG_PRELUDE_ASSERT(VALUE) SLANG_PRELUDE_EXPECT(VALUE, #VALUE)
-# else // SLANG_PRELUDE_ENABLE_ASSERT
-# define SLANG_PRELUDE_EXPECT(VALUE, MSG)
-# define SLANG_PRELUDE_ASSERT(x)
-# endif // SLANG_PRELUDE_ENABLE_ASSERT
+#define SLANG_PRELUDE_EXPECT(VALUE, MSG) \
+ if (VALUE) \
+ { \
+ } \
+ else \
+ assertFailure("assertion failed: '" MSG "'")
+#define SLANG_PRELUDE_ASSERT(VALUE) SLANG_PRELUDE_EXPECT(VALUE, #VALUE)
+#else // SLANG_PRELUDE_ENABLE_ASSERT
+#define SLANG_PRELUDE_EXPECT(VALUE, MSG)
+#define SLANG_PRELUDE_ASSERT(x)
+#endif // SLANG_PRELUDE_ENABLE_ASSERT
#endif
/*
-Taken from stddef.h
+Taken from stddef.h
*/
typedef __PTRDIFF_TYPE__ ptrdiff_t;
typedef __SIZE_TYPE__ size_t;
typedef __SIZE_TYPE__ rsize_t;
-//typedef __WCHAR_TYPE__ wchar_t;
+// typedef __WCHAR_TYPE__ wchar_t;
#if defined(__need_NULL)
#undef NULL
#ifdef __cplusplus
-# if !defined(__MINGW32__) && !defined(_MSC_VER)
-# define NULL __null
-# else
-# define NULL 0
-# endif
+#if !defined(__MINGW32__) && !defined(_MSC_VER)
+#define NULL __null
#else
-# define NULL ((void*)0)
+#define NULL 0
+#endif
+#else
+#define NULL ((void*)0)
#endif
#ifdef __cplusplus
#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)
-namespace std { typedef decltype(nullptr) nullptr_t; }
+namespace std
+{
+typedef decltype(nullptr) nullptr_t;
+}
using ::std::nullptr_t;
#endif
#endif
@@ -49,18 +57,18 @@ using ::std::nullptr_t;
/*
-The following are taken verbatim from stdint.h from Clang in LLVM. Only 8/16/32/64 types are needed.
+The following are taken verbatim from stdint.h from Clang in LLVM. Only 8/16/32/64 types are needed.
*/
// LLVM/Clang types such that we can use LLVM/Clang without headers for C++ output from Slang
#ifdef __INT64_TYPE__
-# ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/
+#ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/
typedef __INT64_TYPE__ int64_t;
-# endif /* __int8_t_defined */
+#endif /* __int8_t_defined */
typedef __UINT64_TYPE__ uint64_t;
-# define __int_least64_t int64_t
-# define __uint_least64_t uint64_t
+#define __int_least64_t int64_t
+#define __uint_least64_t uint64_t
#endif /* __INT64_TYPE__ */
#ifdef __int_least64_t
@@ -72,17 +80,17 @@ typedef __uint_least64_t uint_fast64_t;
#ifdef __INT32_TYPE__
-# ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/
+#ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/
typedef __INT32_TYPE__ int32_t;
-# endif /* __int8_t_defined */
+#endif /* __int8_t_defined */
-# ifndef __uint32_t_defined /* more glibc compatibility */
-# define __uint32_t_defined
+#ifndef __uint32_t_defined /* more glibc compatibility */
+#define __uint32_t_defined
typedef __UINT32_TYPE__ uint32_t;
-# endif /* __uint32_t_defined */
+#endif /* __uint32_t_defined */
-# define __int_least32_t int32_t
-# define __uint_least32_t uint32_t
+#define __int_least32_t int32_t
+#define __uint_least32_t uint32_t
#endif /* __INT32_TYPE__ */
#ifdef __int_least32_t
@@ -97,8 +105,8 @@ typedef __uint_least32_t uint_fast32_t;
typedef __INT16_TYPE__ int16_t;
#endif /* __int8_t_defined */
typedef __UINT16_TYPE__ uint16_t;
-# define __int_least16_t int16_t
-# define __uint_least16_t uint16_t
+#define __int_least16_t int16_t
+#define __uint_least16_t uint16_t
#endif /* __INT16_TYPE__ */
#ifdef __int_least16_t
@@ -109,12 +117,12 @@ typedef __uint_least16_t uint_fast16_t;
#endif /* __int_least16_t */
#ifdef __INT8_TYPE__
-#ifndef __int8_t_defined /* glibc sys/types.h also defines int8_t*/
+#ifndef __int8_t_defined /* glibc sys/types.h also defines int8_t*/
typedef __INT8_TYPE__ int8_t;
#endif /* __int8_t_defined */
typedef __UINT8_TYPE__ uint8_t;
-# define __int_least8_t int8_t
-# define __uint_least8_t uint8_t
+#define __int_least8_t int8_t
+#define __uint_least8_t uint8_t
#endif /* __INT8_TYPE__ */
#ifdef __int_least8_t
@@ -126,12 +134,12 @@ typedef __uint_least8_t uint_fast8_t;
/* prevent glibc sys/types.h from defining conflicting types */
#ifndef __int8_t_defined
-# define __int8_t_defined
+#define __int8_t_defined
#endif /* __int8_t_defined */
/* C99 7.18.1.4 Integer types capable of holding object pointers.
*/
-#define __stdint_join3(a,b,c) a ## b ## c
+#define __stdint_join3(a, b, c) a##b##c
#ifndef _INTPTR_T
#ifndef __intptr_t_defined
@@ -148,7 +156,7 @@ typedef __UINTPTR_TYPE__ uintptr_t;
/* C99 7.18.1.5 Greatest-width integer types.
*/
-typedef __INTMAX_TYPE__ intmax_t;
+typedef __INTMAX_TYPE__ intmax_t;
typedef __UINTMAX_TYPE__ uintmax_t;
/* C99 7.18.4 Macros for minimum-width integer constants.
@@ -168,82 +176,82 @@ typedef __UINTMAX_TYPE__ uintmax_t;
* claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).
*/
-#define __int_c_join(a, b) a ## b
+#define __int_c_join(a, b) a##b
#define __int_c(v, suffix) __int_c_join(v, suffix)
#define __uint_c(v, suffix) __int_c_join(v##U, suffix)
#ifdef __INT64_TYPE__
-# ifdef __INT64_C_SUFFIX__
-# define __int64_c_suffix __INT64_C_SUFFIX__
-# else
-# undef __int64_c_suffix
-# endif /* __INT64_C_SUFFIX__ */
+#ifdef __INT64_C_SUFFIX__
+#define __int64_c_suffix __INT64_C_SUFFIX__
+#else
+#undef __int64_c_suffix
+#endif /* __INT64_C_SUFFIX__ */
#endif /* __INT64_TYPE__ */
#ifdef __int_least64_t
-# ifdef __int64_c_suffix
-# define INT64_C(v) __int_c(v, __int64_c_suffix)
-# define UINT64_C(v) __uint_c(v, __int64_c_suffix)
-# else
-# define INT64_C(v) v
-# define UINT64_C(v) v ## U
-# endif /* __int64_c_suffix */
+#ifdef __int64_c_suffix
+#define INT64_C(v) __int_c(v, __int64_c_suffix)
+#define UINT64_C(v) __uint_c(v, __int64_c_suffix)
+#else
+#define INT64_C(v) v
+#define UINT64_C(v) v##U
+#endif /* __int64_c_suffix */
#endif /* __int_least64_t */
#ifdef __INT32_TYPE__
-# ifdef __INT32_C_SUFFIX__
-# define __int32_c_suffix __INT32_C_SUFFIX__
+#ifdef __INT32_C_SUFFIX__
+#define __int32_c_suffix __INT32_C_SUFFIX__
#else
-# undef __int32_c_suffix
-# endif /* __INT32_C_SUFFIX__ */
+#undef __int32_c_suffix
+#endif /* __INT32_C_SUFFIX__ */
#endif /* __INT32_TYPE__ */
#ifdef __int_least32_t
-# ifdef __int32_c_suffix
-# define INT32_C(v) __int_c(v, __int32_c_suffix)
-# define UINT32_C(v) __uint_c(v, __int32_c_suffix)
-# else
-# define INT32_C(v) v
-# define UINT32_C(v) v ## U
-# endif /* __int32_c_suffix */
+#ifdef __int32_c_suffix
+#define INT32_C(v) __int_c(v, __int32_c_suffix)
+#define UINT32_C(v) __uint_c(v, __int32_c_suffix)
+#else
+#define INT32_C(v) v
+#define UINT32_C(v) v##U
+#endif /* __int32_c_suffix */
#endif /* __int_least32_t */
#ifdef __INT16_TYPE__
-# ifdef __INT16_C_SUFFIX__
-# define __int16_c_suffix __INT16_C_SUFFIX__
+#ifdef __INT16_C_SUFFIX__
+#define __int16_c_suffix __INT16_C_SUFFIX__
#else
-# undef __int16_c_suffix
-# endif /* __INT16_C_SUFFIX__ */
+#undef __int16_c_suffix
+#endif /* __INT16_C_SUFFIX__ */
#endif /* __INT16_TYPE__ */
#ifdef __int_least16_t
-# ifdef __int16_c_suffix
-# define INT16_C(v) __int_c(v, __int16_c_suffix)
-# define UINT16_C(v) __uint_c(v, __int16_c_suffix)
-# else
-# define INT16_C(v) v
-# define UINT16_C(v) v ## U
-# endif /* __int16_c_suffix */
+#ifdef __int16_c_suffix
+#define INT16_C(v) __int_c(v, __int16_c_suffix)
+#define UINT16_C(v) __uint_c(v, __int16_c_suffix)
+#else
+#define INT16_C(v) v
+#define UINT16_C(v) v##U
+#endif /* __int16_c_suffix */
#endif /* __int_least16_t */
#ifdef __INT8_TYPE__
-# ifdef __INT8_C_SUFFIX__
-# define __int8_c_suffix __INT8_C_SUFFIX__
+#ifdef __INT8_C_SUFFIX__
+#define __int8_c_suffix __INT8_C_SUFFIX__
#else
-# undef __int8_c_suffix
-# endif /* __INT8_C_SUFFIX__ */
+#undef __int8_c_suffix
+#endif /* __INT8_C_SUFFIX__ */
#endif /* __INT8_TYPE__ */
#ifdef __int_least8_t
-# ifdef __int8_c_suffix
-# define INT8_C(v) __int_c(v, __int8_c_suffix)
-# define UINT8_C(v) __uint_c(v, __int8_c_suffix)
-# else
-# define INT8_C(v) v
-# define UINT8_C(v) v ## U
-# endif /* __int8_c_suffix */
+#ifdef __int8_c_suffix
+#define INT8_C(v) __int_c(v, __int8_c_suffix)
+#define UINT8_C(v) __uint_c(v, __int8_c_suffix)
+#else
+#define INT8_C(v) v
+#define UINT8_C(v) v##U
+#endif /* __int8_c_suffix */
#endif /* __int_least8_t */
/* C99 7.18.2.1 Limits of exact-width integer types.
@@ -266,133 +274,131 @@ typedef __UINTMAX_TYPE__ uintmax_t;
*/
#ifdef __INT64_TYPE__
-# define INT64_MAX INT64_C( 9223372036854775807)
-# define INT64_MIN (-INT64_C( 9223372036854775807)-1)
-# define UINT64_MAX UINT64_C(18446744073709551615)
-# define __INT_LEAST64_MIN INT64_MIN
-# define __INT_LEAST64_MAX INT64_MAX
-# define __UINT_LEAST64_MAX UINT64_MAX
+#define INT64_MAX INT64_C(9223372036854775807)
+#define INT64_MIN (-INT64_C(9223372036854775807) - 1)
+#define UINT64_MAX UINT64_C(18446744073709551615)
+#define __INT_LEAST64_MIN INT64_MIN
+#define __INT_LEAST64_MAX INT64_MAX
+#define __UINT_LEAST64_MAX UINT64_MAX
#endif /* __INT64_TYPE__ */
#ifdef __INT_LEAST64_MIN
-# define INT_LEAST64_MIN __INT_LEAST64_MIN
-# define INT_LEAST64_MAX __INT_LEAST64_MAX
-# define UINT_LEAST64_MAX __UINT_LEAST64_MAX
-# define INT_FAST64_MIN __INT_LEAST64_MIN
-# define INT_FAST64_MAX __INT_LEAST64_MAX
-# define UINT_FAST64_MAX __UINT_LEAST64_MAX
+#define INT_LEAST64_MIN __INT_LEAST64_MIN
+#define INT_LEAST64_MAX __INT_LEAST64_MAX
+#define UINT_LEAST64_MAX __UINT_LEAST64_MAX
+#define INT_FAST64_MIN __INT_LEAST64_MIN
+#define INT_FAST64_MAX __INT_LEAST64_MAX
+#define UINT_FAST64_MAX __UINT_LEAST64_MAX
#endif /* __INT_LEAST64_MIN */
#ifdef __INT32_TYPE__
-# define INT32_MAX INT32_C(2147483647)
-# define INT32_MIN (-INT32_C(2147483647)-1)
-# define UINT32_MAX UINT32_C(4294967295)
-# define __INT_LEAST32_MIN INT32_MIN
-# define __INT_LEAST32_MAX INT32_MAX
-# define __UINT_LEAST32_MAX UINT32_MAX
+#define INT32_MAX INT32_C(2147483647)
+#define INT32_MIN (-INT32_C(2147483647) - 1)
+#define UINT32_MAX UINT32_C(4294967295)
+#define __INT_LEAST32_MIN INT32_MIN
+#define __INT_LEAST32_MAX INT32_MAX
+#define __UINT_LEAST32_MAX UINT32_MAX
#endif /* __INT32_TYPE__ */
#ifdef __INT_LEAST32_MIN
-# define INT_LEAST32_MIN __INT_LEAST32_MIN
-# define INT_LEAST32_MAX __INT_LEAST32_MAX
-# define UINT_LEAST32_MAX __UINT_LEAST32_MAX
-# define INT_FAST32_MIN __INT_LEAST32_MIN
-# define INT_FAST32_MAX __INT_LEAST32_MAX
-# define UINT_FAST32_MAX __UINT_LEAST32_MAX
+#define INT_LEAST32_MIN __INT_LEAST32_MIN
+#define INT_LEAST32_MAX __INT_LEAST32_MAX
+#define UINT_LEAST32_MAX __UINT_LEAST32_MAX
+#define INT_FAST32_MIN __INT_LEAST32_MIN
+#define INT_FAST32_MAX __INT_LEAST32_MAX
+#define UINT_FAST32_MAX __UINT_LEAST32_MAX
#endif /* __INT_LEAST32_MIN */
#ifdef __INT16_TYPE__
-#define INT16_MAX INT16_C(32767)
-#define INT16_MIN (-INT16_C(32767)-1)
-#define UINT16_MAX UINT16_C(65535)
-# define __INT_LEAST16_MIN INT16_MIN
-# define __INT_LEAST16_MAX INT16_MAX
-# define __UINT_LEAST16_MAX UINT16_MAX
+#define INT16_MAX INT16_C(32767)
+#define INT16_MIN (-INT16_C(32767) - 1)
+#define UINT16_MAX UINT16_C(65535)
+#define __INT_LEAST16_MIN INT16_MIN
+#define __INT_LEAST16_MAX INT16_MAX
+#define __UINT_LEAST16_MAX UINT16_MAX
#endif /* __INT16_TYPE__ */
#ifdef __INT_LEAST16_MIN
-# define INT_LEAST16_MIN __INT_LEAST16_MIN
-# define INT_LEAST16_MAX __INT_LEAST16_MAX
-# define UINT_LEAST16_MAX __UINT_LEAST16_MAX
-# define INT_FAST16_MIN __INT_LEAST16_MIN
-# define INT_FAST16_MAX __INT_LEAST16_MAX
-# define UINT_FAST16_MAX __UINT_LEAST16_MAX
+#define INT_LEAST16_MIN __INT_LEAST16_MIN
+#define INT_LEAST16_MAX __INT_LEAST16_MAX
+#define UINT_LEAST16_MAX __UINT_LEAST16_MAX
+#define INT_FAST16_MIN __INT_LEAST16_MIN
+#define INT_FAST16_MAX __INT_LEAST16_MAX
+#define UINT_FAST16_MAX __UINT_LEAST16_MAX
#endif /* __INT_LEAST16_MIN */
#ifdef __INT8_TYPE__
-# define INT8_MAX INT8_C(127)
-# define INT8_MIN (-INT8_C(127)-1)
-# define UINT8_MAX UINT8_C(255)
-# define __INT_LEAST8_MIN INT8_MIN
-# define __INT_LEAST8_MAX INT8_MAX
-# define __UINT_LEAST8_MAX UINT8_MAX
+#define INT8_MAX INT8_C(127)
+#define INT8_MIN (-INT8_C(127) - 1)
+#define UINT8_MAX UINT8_C(255)
+#define __INT_LEAST8_MIN INT8_MIN
+#define __INT_LEAST8_MAX INT8_MAX
+#define __UINT_LEAST8_MAX UINT8_MAX
#endif /* __INT8_TYPE__ */
#ifdef __INT_LEAST8_MIN
-# define INT_LEAST8_MIN __INT_LEAST8_MIN
-# define INT_LEAST8_MAX __INT_LEAST8_MAX
-# define UINT_LEAST8_MAX __UINT_LEAST8_MAX
-# define INT_FAST8_MIN __INT_LEAST8_MIN
-# define INT_FAST8_MAX __INT_LEAST8_MAX
-# define UINT_FAST8_MAX __UINT_LEAST8_MAX
+#define INT_LEAST8_MIN __INT_LEAST8_MIN
+#define INT_LEAST8_MAX __INT_LEAST8_MAX
+#define UINT_LEAST8_MAX __UINT_LEAST8_MAX
+#define INT_FAST8_MIN __INT_LEAST8_MIN
+#define INT_FAST8_MAX __INT_LEAST8_MAX
+#define UINT_FAST8_MAX __UINT_LEAST8_MAX
#endif /* __INT_LEAST8_MIN */
/* Some utility macros */
-#define __INTN_MIN(n) __stdint_join3( INT, n, _MIN)
-#define __INTN_MAX(n) __stdint_join3( INT, n, _MAX)
-#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX)
-#define __INTN_C(n, v) __stdint_join3( INT, n, _C(v))
+#define __INTN_MIN(n) __stdint_join3(INT, n, _MIN)
+#define __INTN_MAX(n) __stdint_join3(INT, n, _MAX)
+#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX)
+#define __INTN_C(n, v) __stdint_join3(INT, n, _C(v))
#define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v))
/* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */
/* C99 7.18.3 Limits of other integer types. */
-#define INTPTR_MIN (-__INTPTR_MAX__-1)
-#define INTPTR_MAX __INTPTR_MAX__
-#define UINTPTR_MAX __UINTPTR_MAX__
-#define PTRDIFF_MIN (-__PTRDIFF_MAX__-1)
-#define PTRDIFF_MAX __PTRDIFF_MAX__
-#define SIZE_MAX __SIZE_MAX__
+#define INTPTR_MIN (-__INTPTR_MAX__ - 1)
+#define INTPTR_MAX __INTPTR_MAX__
+#define UINTPTR_MAX __UINTPTR_MAX__
+#define PTRDIFF_MIN (-__PTRDIFF_MAX__ - 1)
+#define PTRDIFF_MAX __PTRDIFF_MAX__
+#define SIZE_MAX __SIZE_MAX__
/* ISO9899:2011 7.20 (C11 Annex K): Define RSIZE_MAX if __STDC_WANT_LIB_EXT1__
* is enabled. */
#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1
-#define RSIZE_MAX (SIZE_MAX >> 1)
+#define RSIZE_MAX (SIZE_MAX >> 1)
#endif
/* C99 7.18.2.5 Limits of greatest-width integer types. */
-#define INTMAX_MIN (-__INTMAX_MAX__-1)
-#define INTMAX_MAX __INTMAX_MAX__
-#define UINTMAX_MAX __UINTMAX_MAX__
+#define INTMAX_MIN (-__INTMAX_MAX__ - 1)
+#define INTMAX_MAX __INTMAX_MAX__
+#define UINTMAX_MAX __UINTMAX_MAX__
/* C99 7.18.3 Limits of other integer types. */
#define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__)
#define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__)
#ifdef __WINT_UNSIGNED__
-# define WINT_MIN __UINTN_C(__WINT_WIDTH__, 0)
-# define WINT_MAX __UINTN_MAX(__WINT_WIDTH__)
+#define WINT_MIN __UINTN_C(__WINT_WIDTH__, 0)
+#define WINT_MAX __UINTN_MAX(__WINT_WIDTH__)
#else
-# define WINT_MIN __INTN_MIN(__WINT_WIDTH__)
-# define WINT_MAX __INTN_MAX(__WINT_WIDTH__)
+#define WINT_MIN __INTN_MIN(__WINT_WIDTH__)
+#define WINT_MAX __INTN_MAX(__WINT_WIDTH__)
#endif
#ifndef WCHAR_MAX
-# define WCHAR_MAX __WCHAR_MAX__
+#define WCHAR_MAX __WCHAR_MAX__
#endif
#ifndef WCHAR_MIN
-# if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__)
-# define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__)
-# else
-# define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0)
-# endif
+#if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__)
+#define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__)
+#else
+#define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0)
+#endif
#endif
/* 7.18.4.2 Macros for greatest-width integer constants. */
-#define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__)
+#define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__)
#define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__)
#endif // SLANG_LLVM_H
-
-
diff --git a/prelude/slang-torch-prelude.h b/prelude/slang-torch-prelude.h
index 11ffe3b66..d303c1045 100644
--- a/prelude/slang-torch-prelude.h
+++ b/prelude/slang-torch-prelude.h
@@ -1,64 +1,67 @@
// Prelude for PyTorch cpp binding.
-#include <torch/extension.h>
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/CUDAUtils.h>
-#include <vector>
#include <stdexcept>
#include <string>
+#include <torch/extension.h>
+#include <vector>
#ifdef SLANG_LLVM
#include "slang-llvm.h"
#else // SLANG_LLVM
-# if SLANG_GCC_FAMILY && __GNUC__ < 6
-# include <cmath>
-# define SLANG_PRELUDE_STD std::
-# else
-# include <math.h>
-# define SLANG_PRELUDE_STD
-# endif
-
-# include <assert.h>
-# include <stdlib.h>
-# include <string.h>
-# include <stdint.h>
+#if SLANG_GCC_FAMILY && __GNUC__ < 6
+#include <cmath>
+#define SLANG_PRELUDE_STD std::
+#else
+#include <math.h>
+#define SLANG_PRELUDE_STD
+#endif
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
#endif // SLANG_LLVM
#include "../source/core/slang-string.h"
#if defined(_MSC_VER)
-# define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
+#define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
#else
-# define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
-//# define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport)) __attribute__((__visibility__("default")))
-#endif
-
-#ifdef __cplusplus
-# define SLANG_PRELUDE_EXTERN_C extern "C"
-# define SLANG_PRELUDE_EXTERN_C_START extern "C" {
-# define SLANG_PRELUDE_EXTERN_C_END }
+#define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
+// # define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport))
+// __attribute__((__visibility__("default")))
+#endif
+
+#ifdef __cplusplus
+#define SLANG_PRELUDE_EXTERN_C extern "C"
+#define SLANG_PRELUDE_EXTERN_C_START \
+ extern "C" \
+ {
+#define SLANG_PRELUDE_EXTERN_C_END }
#else
-# define SLANG_PRELUDE_EXTERN_C
-# define SLANG_PRELUDE_EXTERN_C_START
-# define SLANG_PRELUDE_EXTERN_C_END
-#endif
+#define SLANG_PRELUDE_EXTERN_C
+#define SLANG_PRELUDE_EXTERN_C_START
+#define SLANG_PRELUDE_EXTERN_C_END
+#endif
#define SLANG_PRELUDE_NAMESPACE
#ifndef SLANG_NO_THROW
-# define SLANG_NO_THROW
+#define SLANG_NO_THROW
#endif
#ifndef SLANG_STDCALL
-# define SLANG_STDCALL
+#define SLANG_STDCALL
#endif
#ifndef SLANG_MCALL
-# define SLANG_MCALL SLANG_STDCALL
+#define SLANG_MCALL SLANG_STDCALL
#endif
#ifndef SLANG_FORCE_INLINE
-# define SLANG_FORCE_INLINE inline
+#define SLANG_FORCE_INLINE inline
#endif
-#include "slang-cpp-types-core.h"
#include "slang-cpp-scalar-intrinsics.h"
+#include "slang-cpp-types-core.h"
static const int kSlangTorchTensorMaxDim = 5;
@@ -72,20 +75,26 @@ struct TensorView
};
-TensorView make_tensor_view(torch::Tensor val, const char* name, torch::ScalarType targetScalarType, bool requireContiguous)
+TensorView make_tensor_view(
+ torch::Tensor val,
+ const char* name,
+ torch::ScalarType targetScalarType,
+ bool requireContiguous)
{
// We're currently not trying to implicitly cast or transfer to device for two reasons:
// 1. There appears to be a bug with .to() where successive calls after the first one fail.
- // 2. Silent casts like this can cause large memory allocations & unexpected overheads.
+ // 2. Silent casts like this can cause large memory allocations & unexpected overheads.
// It's better to be explicit.
// Expect tensors to be on CUDA device
if (!val.device().is_cuda())
- throw std::runtime_error(std::string(name).append(": tensor is not on CUDA device.").c_str());
+ throw std::runtime_error(
+ std::string(name).append(": tensor is not on CUDA device.").c_str());
// Expect tensors to be the right type.
if (val.dtype() != targetScalarType)
- throw std::runtime_error(std::string(name).append(": tensor is not of the expected type.").c_str());
+ throw std::runtime_error(
+ std::string(name).append(": tensor is not of the expected type.").c_str());
// Check that the tensor is contiguous
if (requireContiguous && !val.is_contiguous())
@@ -138,14 +147,22 @@ TensorView make_tensor_view(torch::Tensor val, const char* name, torch::ScalarTy
}
if (val.dim() > kSlangTorchTensorMaxDim)
- throw std::runtime_error(std::string(name).append(": number of dimensions exceeds limit (").append(std::to_string(kSlangTorchTensorMaxDim)).append(")").c_str());
+ throw std::runtime_error(std::string(name)
+ .append(": number of dimensions exceeds limit (")
+ .append(std::to_string(kSlangTorchTensorMaxDim))
+ .append(")")
+ .c_str());
bool isEmpty = true;
for (int i = 0; i < val.dim(); ++i)
{
res.strides[i] = val.stride(i) * elementSize;
if (res.strides[i] == 0)
- throw std::runtime_error(std::string(name).append(": tensors with broadcasted dimensions are not supported (use tensor.contiguous() to make tensor whole)").c_str());
+ throw std::runtime_error(
+ std::string(name)
+ .append(": tensors with broadcasted dimensions are not supported (use "
+ "tensor.contiguous() to make tensor whole)")
+ .c_str());
res.sizes[i] = val.size(i);
if (res.sizes[i] > 0)