summaryrefslogtreecommitdiffstats
path: root/source/slang/slang-stdlib.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'source/slang/slang-stdlib.cpp')
-rw-r--r--source/slang/slang-stdlib.cpp1855
1 files changed, 1855 insertions, 0 deletions
diff --git a/source/slang/slang-stdlib.cpp b/source/slang/slang-stdlib.cpp
new file mode 100644
index 000000000..f74fcd603
--- /dev/null
+++ b/source/slang/slang-stdlib.cpp
@@ -0,0 +1,1855 @@
+// slang-stdlib.cpp
+
+#include "slang-stdlib.h"
+#include "syntax.h"
+
+#define STRINGIZE(x) STRINGIZE2(x)
+#define STRINGIZE2(x) #x
+#define LINE_STRING STRINGIZE(__LINE__)
+
+enum { kLibIncludeStringLine = __LINE__+1 };
+const char * LibIncludeStringChunks[] = { R"(
+
+typedef uint UINT;
+
+__generic<T> __intrinsic(Assign) T operator=(out T left, T right);
+
+__generic<T,U> __intrinsic(Sequence) U operator,(T left, U right);
+
+__generic<T> __intrinsic(Select) T operator?:(bool condition, T ifTrue, T ifFalse);
+__generic<T, let N : int> __intrinsic(Select) vector<T,N> operator?:(vector<bool,N> condition, vector<T,N> ifTrue, vector<T,N> ifFalse);
+
+__generic<T> __magic_type(HLSLAppendStructuredBufferType) struct AppendStructuredBuffer
+{
+ __intrinsic void Append(T value);
+
+ __intrinsic void GetDimensions(
+ out uint numStructs,
+ out uint stride);
+};
+
+__generic<T> __magic_type(HLSLBufferType) struct Buffer
+{
+ __intrinsic void GetDimensions(
+ out uint dim);
+
+ __intrinsic T Load(int location);
+ __intrinsic T Load(int location, out uint status);
+
+ __intrinsic __subscript(uint index) -> T;
+};
+
+__magic_type(HLSLByteAddressBufferType) struct ByteAddressBuffer
+{
+ __intrinsic void GetDimensions(
+ out uint dim);
+
+ __intrinsic uint Load(int location);
+ __intrinsic uint Load(int location, out uint status);
+
+ __intrinsic uint2 Load2(int location);
+ __intrinsic uint2 Load2(int location, out uint status);
+
+ __intrinsic uint3 Load3(int location);
+ __intrinsic uint3 Load3(int location, out uint status);
+
+ __intrinsic uint4 Load4(int location);
+ __intrinsic uint4 Load4(int location, out uint status);
+};
+
+__generic<T> __magic_type(HLSLStructuredBufferType) struct StructuredBuffer
+{
+ __intrinsic void GetDimensions(
+ out uint numStructs,
+ out uint stride);
+
+ __intrinsic T Load(int location);
+ __intrinsic T Load(int location, out uint status);
+
+ __intrinsic __subscript(uint index) -> T;
+};
+
+__generic<T> __magic_type(HLSLConsumeStructuredBufferType) struct ConsumeStructuredBuffer
+{
+ __intrinsic T Consume();
+
+ __intrinsic void GetDimensions(
+ out uint numStructs,
+ out uint stride);
+};
+
+__generic<T> __magic_type(HLSLInputPatchType) struct InputPatch
+{
+ __intrinsic __subscript(uint index) -> T;
+};
+
+__generic<T> __magic_type(HLSLOutputPatchType) struct OutputPatch
+{
+ __intrinsic __subscript(uint index) -> T { set; }
+};
+
+__generic<T> __magic_type(HLSLRWBufferType) struct RWBuffer
+{
+ // Note(tfoley): duplication with declaration of `Buffer`
+
+ __intrinsic void GetDimensions(
+ out uint dim);
+
+ __intrinsic T Load(int location);
+ __intrinsic T Load(int location, out uint status);
+
+ __intrinsic __subscript(uint index) -> T { get; set; }
+};
+
+__magic_type(HLSLRWByteAddressBufferType) struct RWByteAddressBuffer
+{
+ // Note(tfoley): supports alll operations from `ByteAddressBuffer`
+ // TODO(tfoley): can this be made a sub-type?
+
+ __intrinsic void GetDimensions(
+ out uint dim);
+
+ __intrinsic uint Load(int location);
+ __intrinsic uint Load(int location, out uint status);
+
+ __intrinsic uint2 Load2(int location);
+ __intrinsic uint2 Load2(int location, out uint status);
+
+ __intrinsic uint3 Load3(int location);
+ __intrinsic uint3 Load3(int location, out uint status);
+
+ __intrinsic uint4 Load4(int location);
+ __intrinsic uint4 Load4(int location, out uint status);
+
+ // Added operations:
+
+ __intrinsic void InterlockedAdd(
+ UINT dest,
+ UINT value,
+ out UINT original_value);
+ __intrinsic void InterlockedAdd(
+ UINT dest,
+ UINT value);
+
+ __intrinsic void InterlockedAnd(
+ UINT dest,
+ UINT value,
+ out UINT original_value);
+ __intrinsic void InterlockedAnd(
+ UINT dest,
+ UINT value);
+
+ __intrinsic void InterlockedCompareExchange(
+ UINT dest,
+ UINT compare_value,
+ UINT value,
+ out UINT original_value);
+ __intrinsic void InterlockedCompareExchange(
+ UINT dest,
+ UINT compare_value,
+ UINT value);
+
+ __intrinsic void InterlockedCompareStore(
+ UINT dest,
+ UINT compare_value,
+ UINT value);
+ __intrinsic void InterlockedCompareStore(
+ UINT dest,
+ UINT compare_value);
+
+ __intrinsic void InterlockedExchange(
+ UINT dest,
+ UINT value,
+ out UINT original_value);
+ __intrinsic void InterlockedExchange(
+ UINT dest,
+ UINT value);
+
+ __intrinsic void InterlockedMax(
+ UINT dest,
+ UINT value,
+ out UINT original_value);
+ __intrinsic void InterlockedMax(
+ UINT dest,
+ UINT value);
+
+ __intrinsic void InterlockedMin(
+ UINT dest,
+ UINT value,
+ out UINT original_value);
+ __intrinsic void InterlockedMin(
+ UINT dest,
+ UINT value);
+
+ __intrinsic void InterlockedOr(
+ UINT dest,
+ UINT value,
+ out UINT original_value);
+ __intrinsic void InterlockedOr(
+ UINT dest,
+ UINT value);
+
+ __intrinsic void InterlockedXor(
+ UINT dest,
+ UINT value,
+ out UINT original_value);
+ __intrinsic void InterlockedXor(
+ UINT dest,
+ UINT value);
+
+ __intrinsic void Store(
+ uint address,
+ uint value);
+
+ __intrinsic void Store2(
+ uint address,
+ uint2 value);
+
+ __intrinsic void Store3(
+ uint address,
+ uint3 value);
+
+ __intrinsic void Store4(
+ uint address,
+ uint4 value);
+};
+
+__generic<T> __magic_type(HLSLRWStructuredBufferType) struct RWStructuredBuffer
+{
+ __intrinsic uint DecrementCounter();
+
+ __intrinsic void GetDimensions(
+ out uint numStructs,
+ out uint stride);
+
+ __intrinsic void IncrementCounter();
+
+ __intrinsic T Load(int location);
+ __intrinsic T Load(int location, out uint status);
+
+ __intrinsic __subscript(uint index) -> T { get; set; }
+};
+
+__generic<T> __magic_type(HLSLPointStreamType) struct PointStream {};
+__generic<T> __magic_type(HLSLLineStreamType) struct LineStream {};
+__generic<T> __magic_type(HLSLLineStreamType) struct TriangleStream {};
+
+)", R"(
+
+// Note(tfoley): Trying to systematically add all the HLSL builtins
+
+// A type that can be used as an operand for builtins
+__trait __BuiltinType {}
+
+// A type that can be used for arithmetic operations
+__trait __BuiltinArithmeticType : __BuiltinType {}
+
+// A type that logically has a sign (positive/negative/zero)
+__trait __BuiltinSignedArithmeticType : __BuiltinArithmeticType {}
+
+// A type that can represent integers
+__trait __BuiltinIntegerType : __BuiltinArithmeticType {}
+
+// A type that can represent non-integers
+__trait __BuiltinRealType : __BuiltinArithmeticType {}
+
+// A type that uses a floating-point representation
+__trait __BuiltinFloatingPointType : __BuiltinRealType, __BuiltinSignedType {}
+
+// Try to terminate the current draw or dispatch call (HLSL SM 4.0)
+__intrinsic void abort();
+
+// Absolute value (HLSL SM 1.0)
+__generic<T : __BuiltinSignedArithmeticType> __intrinsic T abs(T x);
+__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic vector<T,N> abs(vector<T,N> x);
+__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic matrix<T,N,M> abs(matrix<T,N,M> x);
+
+// Inverse cosine (HLSL SM 1.0)
+__generic<T : __BuiltinFloatingPointType> __intrinsic T acos(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> acos(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> acos(matrix<T,N,M> x);
+
+// Test if all components are non-zero (HLSL SM 1.0)
+__generic<T : __BuiltinType> __intrinsic T all(T x);
+__generic<T : __BuiltinType, let N : int> __intrinsic vector<T,N> all(vector<T,N> x);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic matrix<T,N,M> all(matrix<T,N,M> x);
+
+// Barrier for writes to all memory spaces (HLSL SM 5.0)
+__intrinsic void AllMemoryBarrier();
+
+// Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0)
+__intrinsic void AllMemoryBarrierWithGroupSync();
+
+// Test if any components is non-zero (HLSL SM 1.0)
+__generic<T : __BuiltinType> __intrinsic T any(T x);
+__generic<T : __BuiltinType, let N : int> __intrinsic vector<T,N> any(vector<T,N> x);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic matrix<T,N,M> any(matrix<T,N,M> x);
+
+
+// Reinterpret bits as a double (HLSL SM 5.0)
+__intrinsic double asdouble(uint lowbits, uint highbits);
+
+// Reinterpret bits as a float (HLSL SM 4.0)
+__intrinsic float asfloat( int x);
+__intrinsic float asfloat(uint x);
+__generic<let N : int> __intrinsic vector<float,N> asfloat(vector< int,N> x);
+__generic<let N : int> __intrinsic vector<float,N> asfloat(vector<uint,N> x);
+__generic<let N : int, let M : int> __intrinsic matrix<float,N,M> asfloat(matrix< int,N,M> x);
+__generic<let N : int, let M : int> __intrinsic matrix<float,N,M> asfloat(matrix<uint,N,M> x);
+
+
+// Inverse sine (HLSL SM 1.0)
+__generic<T : __BuiltinFloatingPointType> __intrinsic T asin(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> asin(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> asin(matrix<T,N,M> x);
+
+// Reinterpret bits as an int (HLSL SM 4.0)
+__intrinsic int asint(float x);
+__intrinsic int asint(uint x);
+__generic<let N : int> __intrinsic vector<int,N> asint(vector<float,N> x);
+__generic<let N : int> __intrinsic vector<int,N> asint(vector<uint,N> x);
+__generic<let N : int, let M : int> __intrinsic matrix<int,N,M> asint(matrix<float,N,M> x);
+__generic<let N : int, let M : int> __intrinsic matrix<int,N,M> asint(matrix<uint,N,M> x);
+
+// Reinterpret bits of double as a uint (HLSL SM 5.0)
+__intrinsic void asuint(double value, out uint lowbits, out uint highbits);
+
+// Reinterpret bits as a uint (HLSL SM 4.0)
+__intrinsic uint asuint(float x);
+__intrinsic uint asuint(int x);
+__generic<let N : int> __intrinsic vector<uint,N> asuint(vector<float,N> x);
+__generic<let N : int> __intrinsic vector<uint,N> asuint(vector<int,N> x);
+__generic<let N : int, let M : int> __intrinsic matrix<uint,N,M> asuint(matrix<float,N,M> x);
+__generic<let N : int, let M : int> __intrinsic matrix<uint,N,M> asuint(matrix<int,N,M> x);
+
+// Inverse tangent (HLSL SM 1.0)
+__generic<T : __BuiltinFloatingPointType> __intrinsic T atan(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> atan(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> atan(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType> __intrinsic T atan2(T y, T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> atan2(vector<T,N> y, vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> atan2(matrix<T,N,M> y, matrix<T,N,M> x);
+
+// Ceiling (HLSL SM 1.0)
+__generic<T : __BuiltinFloatingPointType> __intrinsic T ceil(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> ceil(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> ceil(matrix<T,N,M> x);
+
+
+// Check access status to tiled resource
+__intrinsic bool CheckAccessFullyMapped(uint status);
+
+// Clamp (HLSL SM 1.0)
+__generic<T : __BuiltinArithmeticType> __intrinsic T clamp(T x, T min, T max);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic vector<T,N> clamp(vector<T,N> x, vector<T,N> min, vector<T,N> max);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic matrix<T,N,M> clamp(matrix<T,N,M> x, matrix<T,N,M> min, matrix<T,N,M> max);
+
+// Clip (discard) fragment conditionally
+__generic<T : __BuiltinFloatingPointType> __intrinsic void clip(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic void clip(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic void clip(matrix<T,N,M> x);
+
+// Cosine
+__generic<T : __BuiltinFloatingPointType> __intrinsic T cos(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> cos(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> cos(matrix<T,N,M> x);
+
+// Hyperbolic cosine
+__generic<T : __BuiltinFloatingPointType> __intrinsic T cosh(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> cosh(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> cosh(matrix<T,N,M> x);
+
+// Population count
+__intrinsic uint countbits(uint value);
+
+// Cross product
+__generic<T : __BuiltinArithmeticType> __intrinsic vector<T,3> cross(vector<T,3> x, vector<T,3> y);
+
+// Convert encoded color
+__intrinsic int4 D3DCOLORtoUBYTE4(float4 x);
+
+// Partial-difference derivatives
+__generic<T : __BuiltinFloatingPointType> __intrinsic T ddx(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> ddx(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> ddx(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType> __intrinsic T ddx_coarse(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> ddx_coarse(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> ddx_coarse(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType> __intrinsic T ddx_fine(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> ddx_fine(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> ddx_fine(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType> __intrinsic T ddy(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> ddy(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> ddy(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType> __intrinsic T ddy_coarse(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> ddy_coarse(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> ddy_coarse(matrix<T,N,M> x);
+
+__generic<T : __BuiltinFloatingPointType> __intrinsic T ddy_fine(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> ddy_fine(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> ddy_fine(matrix<T,N,M> x);
+
+
+// Radians to degrees
+__generic<T : __BuiltinFloatingPointType> __intrinsic T degrees(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> degrees(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> degrees(matrix<T,N,M> x);
+
+// Matrix determinant
+
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic T determinant(matrix<T,N,N> m);
+
+// Barrier for device memory
+__intrinsic void DeviceMemoryBarrier();
+__intrinsic void DeviceMemoryBarrierWithGroupSync();
+
+// Vector distance
+
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic T distance(vector<T,N> x, vector<T,N> y);
+
+// Vector dot product
+
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic T dot(vector<T,N> x, vector<T,N> y);
+
+// Helper for computing distance terms for lighting (obsolete)
+
+__generic<T : __BuiltinFloatingPointType> __intrinsic vector<T,4> dst(vector<T,4> x, vector<T,4> y);
+
+// Error message
+
+// __intrinsic void errorf( string format, ... );
+
+// Attribute evaluation
+
+__generic<T : __BuiltinArithmeticType> __intrinsic T EvaluateAttributeAtCentroid(T x);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic vector<T,N> EvaluateAttributeAtCentroid(vector<T,N> x);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic matrix<T,N,M> EvaluateAttributeAtCentroid(matrix<T,N,M> x);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic T EvaluateAttributeAtSample(T x, uint sampleindex);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic vector<T,N> EvaluateAttributeAtSample(vector<T,N> x, uint sampleindex);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic matrix<T,N,M> EvaluateAttributeAtSample(matrix<T,N,M> x, uint sampleindex);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic T EvaluateAttributeSnapped(T x, int2 offset);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic vector<T,N> EvaluateAttributeSnapped(vector<T,N> x, int2 offset);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic matrix<T,N,M> EvaluateAttributeSnapped(matrix<T,N,M> x, int2 offset);
+
+// Base-e exponent
+__generic<T : __BuiltinFloatingPointType> __intrinsic T exp(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> exp(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> exp(matrix<T,N,M> x);
+
+// Base-2 exponent
+__generic<T : __BuiltinFloatingPointType> __intrinsic T exp2(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> exp2(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> exp2(matrix<T,N,M> x);
+
+// Convert 16-bit float stored in low bits of integer
+__intrinsic float f16tof32(uint value);
+__generic<let N : int> __intrinsic vector<float,N> f16tof32(vector<uint,N> value);
+
+// Convert to 16-bit float stored in low bits of integer
+__intrinsic uint f32tof16(float value);
+__generic<let N : int> __intrinsic vector<uint,N> f32tof16(vector<float,N> value);
+
+// Flip surface normal to face forward, if needed
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> faceforward(vector<T,N> n, vector<T,N> i, vector<T,N> ng);
+
+// Find first set bit starting at high bit and working down
+__intrinsic int firstbithigh(int value);
+__generic<let N : int> __intrinsic vector<int,N> firstbithigh(vector<int,N> value);
+
+__intrinsic uint firstbithigh(uint value);
+__generic<let N : int> __intrinsic vector<uint,N> firstbithigh(vector<uint,N> value);
+
+// Find first set bit starting at low bit and working up
+__intrinsic int firstbitlow(int value);
+__generic<let N : int> __intrinsic vector<int,N> firstbitlow(vector<int,N> value);
+
+__intrinsic uint firstbitlow(uint value);
+__generic<let N : int> __intrinsic vector<uint,N> firstbitlow(vector<uint,N> value);
+
+// Floor (HLSL SM 1.0)
+__generic<T : __BuiltinFloatingPointType> __intrinsic T floor(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> floor(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> floor(matrix<T,N,M> x);
+
+// Fused multiply-add for doubles
+__intrinsic double fma(double a, double b, double c);
+__generic<let N : int> __intrinsic vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c);
+__generic<let N : int, let M : int> __intrinsic matrix<double,N,M> fma(matrix<double,N,M> a, matrix<double,N,M> b, matrix<double,N,M> c);
+
+// Floating point remainder of x/y
+__generic<T : __BuiltinFloatingPointType> __intrinsic T fmod(T x, T y);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> fmod(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> fmod(matrix<T,N,M> x, matrix<T,N,M> y);
+
+// Fractional part
+__generic<T : __BuiltinFloatingPointType> __intrinsic T frac(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> frac(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> frac(matrix<T,N,M> x);
+
+// Split float into mantissa and exponent
+__generic<T : __BuiltinFloatingPointType> __intrinsic T frexp(T x, out T exp);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> frexp(vector<T,N> x, out vector<T,N> exp);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> frexp(matrix<T,N,M> x, out matrix<T,N,M> exp);
+
+// Texture filter width
+__generic<T : __BuiltinFloatingPointType> __intrinsic T fwidth(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> fwidth(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> fwidth(matrix<T,N,M> x);
+
+)", R"(
+
+// Get number of samples in render target
+__intrinsic uint GetRenderTargetSampleCount();
+
+// Get position of given sample
+__intrinsic float2 GetRenderTargetSamplePosition(int Index);
+
+// Group memory barrier
+__intrinsic void GroupMemoryBarrier();
+__intrinsic void GroupMemoryBarrierWithGroupSync();
+
+// Atomics
+__intrinsic void InterlockedAdd(in out int dest, int value, out int original_value);
+__intrinsic void InterlockedAdd(in out uint dest, uint value, out uint original_value);
+
+__intrinsic void InterlockedAnd(in out int dest, int value, out int original_value);
+__intrinsic void InterlockedAnd(in out uint dest, uint value, out uint original_value);
+
+__intrinsic void InterlockedCompareExchange(in out int dest, int compare_value, int value, out int original_value);
+__intrinsic void InterlockedCompareExchange(in out uint dest, uint compare_value, uint value, out uint original_value);
+
+__intrinsic void InterlockedCompareStore(in out int dest, int compare_value, int value);
+__intrinsic void InterlockedCompareStore(in out uint dest, uint compare_value, uint value);
+
+__intrinsic void InterlockedExchange(in out int dest, int value, out int original_value);
+__intrinsic void InterlockedExchange(in out uint dest, uint value, out uint original_value);
+
+__intrinsic void InterlockedMax(in out int dest, int value, out int original_value);
+__intrinsic void InterlockedMax(in out uint dest, uint value, out uint original_value);
+
+__intrinsic void InterlockedMin(in out int dest, int value, out int original_value);
+__intrinsic void InterlockedMin(in out uint dest, uint value, out uint original_value);
+
+__intrinsic void InterlockedOr(in out int dest, int value, out int original_value);
+__intrinsic void InterlockedOr(in out uint dest, uint value, out uint original_value);
+
+__intrinsic void InterlockedXor(in out int dest, int value, out int original_value);
+__intrinsic void InterlockedXor(in out uint dest, uint value, out uint original_value);
+
+// Is floating-point value finite?
+__generic<T : __BuiltinFloatingPointType> __intrinsic bool isfinite(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<bool,N> isfinite(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<bool,N,M> isfinite(matrix<T,N,M> x);
+
+// Is floating-point value infinite?
+__generic<T : __BuiltinFloatingPointType> __intrinsic bool isinf(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<bool,N> isinf(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<bool,N,M> isinf(matrix<T,N,M> x);
+
+// Is floating-point value not-a-number?
+__generic<T : __BuiltinFloatingPointType> __intrinsic bool isnan(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<bool,N> isnan(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<bool,N,M> isnan(matrix<T,N,M> x);
+
+// Construct float from mantissa and exponent
+__generic<T : __BuiltinFloatingPointType> __intrinsic T ldexp(T x, T exp);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> ldexp(vector<T,N> x, vector<T,N> exp);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> ldexp(matrix<T,N,M> x, matrix<T,N,M> exp);
+
+// Vector length
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic T length(vector<T,N> x);
+
+// Linear interpolation
+__generic<T : __BuiltinFloatingPointType> __intrinsic T lerp(T x, T y, T s);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> lerp(vector<T,N> x, vector<T,N> y, vector<T,N> s);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s);
+
+// Legacy lighting function (obsolete)
+__intrinsic float4 lit(float n_dot_l, float n_dot_h, float m);
+
+// Base-e logarithm
+__generic<T : __BuiltinFloatingPointType> __intrinsic T log(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> log(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> log(matrix<T,N,M> x);
+
+// Base-10 logarithm
+__generic<T : __BuiltinFloatingPointType> __intrinsic T log10(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> log10(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> log10(matrix<T,N,M> x);
+
+// Base-2 logarithm
+__generic<T : __BuiltinFloatingPointType> __intrinsic T log2(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> log2(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> log2(matrix<T,N,M> x);
+
+// multiply-add
+__generic<T : __BuiltinArithmeticType> __intrinsic T mad(T mvalue, T avalue, T bvalue);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic vector<T,N> mad(vector<T,N> mvalue, vector<T,N> avalue, vector<T,N> bvalue);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic matrix<T,N,M> mad(matrix<T,N,M> mvalue, matrix<T,N,M> avalue, matrix<T,N,M> bvalue);
+
+// maximum
+__generic<T : __BuiltinArithmeticType> __intrinsic T max(T x, T y);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic vector<T,N> max(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic matrix<T,N,M> max(matrix<T,N,M> x, matrix<T,N,M> y);
+
+// minimum
+__generic<T : __BuiltinArithmeticType> __intrinsic T min(T x, T y);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic vector<T,N> min(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic matrix<T,N,M> min(matrix<T,N,M> x, matrix<T,N,M> y);
+
+// split into integer and fractional parts (both with same sign)
+__generic<T : __BuiltinFloatingPointType> __intrinsic T modf(T x, out T ip);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> modf(vector<T,N> x, out vector<T,N> ip);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip);
+
+// msad4 (whatever that is)
+__intrinsic uint4 msad4(uint reference, uint2 source, uint4 accum);
+
+// General inner products
+
+// scalar-scalar
+__generic<T : __BuiltinArithmeticType> __intrinsic(Mul_Scalar_Scalar) T mul(T x, T y);
+
+// scalar-vector and vector-scalar
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic(Mul_Vector_Scalar) vector<T,N> mul(vector<T,N> x, T y);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic(Mul_Scalar_Vector) vector<T,N> mul(T x, vector<T,N> y);
+
+// scalar-matrix and matrix-scalar
+__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic(Mul_Matrix_Scalar) matrix<T,N,M> mul(matrix<T,N,M> x, T y);
+__generic<T : __BuiltinArithmeticType, let N : int, let M :int> __intrinsic(Mul_Scalar_Matrix) matrix<T,N,M> mul(T x, matrix<T,N,M> y);
+
+// vector-vector (dot product)
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic(InnerProduct_Vector_Vector) T mul(vector<T,N> x, vector<T,N> y);
+
+// vector-matrix
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic(InnerProduct_Vector_Matrix) vector<T,M> mul(vector<T,N> x, matrix<T,N,M> y);
+
+// matrix-vector
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic(InnerProduct_Matrix_Vector) vector<T,N> mul(matrix<T,N,M> x, vector<T,M> y);
+
+// matrix-matrix
+__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic(InnerProduct_Matrix_Matrix) matrix<T,R,C> mul(matrix<T,R,N> x, matrix<T,N,C> y);
+
+// noise (deprecated)
+__intrinsic float noise(float x);
+__generic<let N : int> __intrinsic float noise(vector<float, N> x);
+
+// Normalize a vector
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> normalize(vector<T,N> x);
+
+// Raise to a power
+__generic<T : __BuiltinFloatingPointType> __intrinsic T pow(T x, T y);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> pow(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> pow(matrix<T,N,M> x, matrix<T,N,M> y);
+
+// Output message
+
+// __intrinsic void printf( string format, ... );
+
+// Tessellation factor fixup routines
+
+__intrinsic void Process2DQuadTessFactorsAvg(
+ in float4 RawEdgeFactors,
+ in float2 InsideScale,
+ out float4 RoundedEdgeTessFactors,
+ out float2 RoundedInsideTessFactors,
+ out float2 UnroundedInsideTessFactors);
+
+__intrinsic void Process2DQuadTessFactorsMax(
+ in float4 RawEdgeFactors,
+ in float2 InsideScale,
+ out float4 RoundedEdgeTessFactors,
+ out float2 RoundedInsideTessFactors,
+ out float2 UnroundedInsideTessFactors);
+
+__intrinsic void Process2DQuadTessFactorsMin(
+ in float4 RawEdgeFactors,
+ in float2 InsideScale,
+ out float4 RoundedEdgeTessFactors,
+ out float2 RoundedInsideTessFactors,
+ out float2 UnroundedInsideTessFactors);
+
+__intrinsic void ProcessIsolineTessFactors(
+ in float RawDetailFactor,
+ in float RawDensityFactor,
+ out float RoundedDetailFactor,
+ out float RoundedDensityFactor);
+
+__intrinsic void ProcessQuadTessFactorsAvg(
+ in float4 RawEdgeFactors,
+ in float InsideScale,
+ out float4 RoundedEdgeTessFactors,
+ out float2 RoundedInsideTessFactors,
+ out float2 UnroundedInsideTessFactors);
+
+__intrinsic void ProcessQuadTessFactorsMax(
+ in float4 RawEdgeFactors,
+ in float InsideScale,
+ out float4 RoundedEdgeTessFactors,
+ out float2 RoundedInsideTessFactors,
+ out float2 UnroundedInsideTessFactors);
+
+__intrinsic void ProcessQuadTessFactorsMin(
+ in float4 RawEdgeFactors,
+ in float InsideScale,
+ out float4 RoundedEdgeTessFactors,
+ out float2 RoundedInsideTessFactors,
+ out float2 UnroundedInsideTessFactors);
+
+__intrinsic void ProcessTriTessFactorsAvg(
+ in float3 RawEdgeFactors,
+ in float InsideScale,
+ out float3 RoundedEdgeTessFactors,
+ out float RoundedInsideTessFactor,
+ out float UnroundedInsideTessFactor);
+
+__intrinsic void ProcessTriTessFactorsMax(
+ in float3 RawEdgeFactors,
+ in float InsideScale,
+ out float3 RoundedEdgeTessFactors,
+ out float RoundedInsideTessFactor,
+ out float UnroundedInsideTessFactor);
+
+__intrinsic void ProcessTriTessFactorsMin(
+ in float3 RawEdgeFactors,
+ in float InsideScale,
+ out float3 RoundedEdgeTessFactors,
+ out float RoundedInsideTessFactors,
+ out float UnroundedInsideTessFactors);
+
+// Degrees to radians
+__generic<T : __BuiltinFloatingPointType> __intrinsic T radians(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> radians(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> radians(matrix<T,N,M> x);
+
+// Approximate reciprocal
+__generic<T : __BuiltinFloatingPointType> __intrinsic T rcp(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> rcp(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> rcp(matrix<T,N,M> x);
+
+// Reflect incident vector across plane with given normal
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__intrinsic
+vector<T,N> reflect(vector<T,N> i, vector<T,N> n);
+
+// Refract incident vector given surface normal and index of refraction
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__intrinsic
+vector<T,N> refract(vector<T,N> i, vector<T,N> n, float eta);
+
+// Reverse order of bits
+__intrinsic uint reversebits(uint value);
+__generic<let N : int> vector<uint,N> reversebits(vector<uint,N> value);
+
+// Round-to-nearest
+__generic<T : __BuiltinFloatingPointType> __intrinsic T round(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> round(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> round(matrix<T,N,M> x);
+
+// Reciprocal of square root
+__generic<T : __BuiltinFloatingPointType> __intrinsic T rsqrt(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> rsqrt(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> rsqrt(matrix<T,N,M> x);
+
+// Clamp value to [0,1] range
+__generic<T : __BuiltinFloatingPointType> __intrinsic T saturate(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> saturate(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> saturate(matrix<T,N,M> x);
+
+
+// Extract sign of value
+__generic<T : __BuiltinSignedArithmeticType> __intrinsic int sign(T x);
+__generic<T : __BuiltinSignedArithmeticType, let N : int> __intrinsic vector<int,N> sign(vector<T,N> x);
+__generic<T : __BuiltinSignedArithmeticType, let N : int, let M : int> __intrinsic matrix<int,N,M> sign(matrix<T,N,M> x);
+
+)", R"(
+
+
+// Sine
+__generic<T : __BuiltinFloatingPointType> __intrinsic T sin(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> sin(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> sin(matrix<T,N,M> x);
+
+// Sine and cosine
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic void sincos(T x, out T s, out T c);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic void sincos(vector<T,N> x, out vector<T,N> s, out vector<T,N> c);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic void sincos(matrix<T,N,M> x, out matrix<T,N,M> s, out matrix<T,N,M> c);
+
+// Hyperbolic Sine
+__generic<T : __BuiltinFloatingPointType> __intrinsic T sinh(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> sinh(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> sinh(matrix<T,N,M> x);
+
+// Smooth step (Hermite interpolation)
+__generic<T : __BuiltinFloatingPointType> __intrinsic T smoothstep(T min, T max, T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> smoothstep(vector<T,N> min, vector<T,N> max, vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> smoothstep(matrix<T,N,M> min, matrix<T,N,M> max, matrix<T,N,M> x);
+
+// Square root
+__generic<T : __BuiltinFloatingPointType> __intrinsic T sqrt(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> sqrt(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> sqrt(matrix<T,N,M> x);
+
+// Step function
+__generic<T : __BuiltinFloatingPointType> __intrinsic T step(T y, T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> step(vector<T,N> y, vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> step(matrix<T,N,M> y, matrix<T,N,M> x);
+
+// Tangent
+__generic<T : __BuiltinFloatingPointType> __intrinsic T tan(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> tan(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> tan(matrix<T,N,M> x);
+
+// Hyperbolic tangent
+__generic<T : __BuiltinFloatingPointType> __intrinsic T tanh(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> tanh(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> tanh(matrix<T,N,M> x);
+
+// Legacy texture-fetch operations
+
+/*
+__intrinsic float4 tex1D(sampler1D s, float t);
+__intrinsic float4 tex1D(sampler1D s, float t, float ddx, float ddy);
+__intrinsic float4 tex1Dbias(sampler1D s, float4 t);
+__intrinsic float4 tex1Dgrad(sampler1D s, float t, float ddx, float ddy);
+__intrinsic float4 tex1Dlod(sampler1D s, float4 t);
+__intrinsic float4 tex1Dproj(sampler1D s, float4 t);
+
+__intrinsic float4 tex2D(sampler2D s, float2 t);
+__intrinsic float4 tex2D(sampler2D s, float2 t, float2 ddx, float2 ddy);
+__intrinsic float4 tex2Dbias(sampler2D s, float4 t);
+__intrinsic float4 tex2Dgrad(sampler2D s, float2 t, float2 ddx, float2 ddy);
+__intrinsic float4 tex2Dlod(sampler2D s, float4 t);
+__intrinsic float4 tex2Dproj(sampler2D s, float4 t);
+
+__intrinsic float4 tex3D(sampler3D s, float3 t);
+__intrinsic float4 tex3D(sampler3D s, float3 t, float3 ddx, float3 ddy);
+__intrinsic float4 tex3Dbias(sampler3D s, float4 t);
+__intrinsic float4 tex3Dgrad(sampler3D s, float3 t, float3 ddx, float3 ddy);
+__intrinsic float4 tex3Dlod(sampler3D s, float4 t);
+__intrinsic float4 tex3Dproj(sampler3D s, float4 t);
+
+__intrinsic float4 texCUBE(samplerCUBE s, float3 t);
+__intrinsic float4 texCUBE(samplerCUBE s, float3 t, float3 ddx, float3 ddy);
+__intrinsic float4 texCUBEbias(samplerCUBE s, float4 t);
+__intrinsic float4 texCUBEgrad(samplerCUBE s, float3 t, float3 ddx, float3 ddy);
+__intrinsic float4 texCUBElod(samplerCUBE s, float4 t);
+__intrinsic float4 texCUBEproj(samplerCUBE s, float4 t);
+*/
+
+// Matrix transpose
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic matrix<T,M,N> transpose(matrix<T,N,M> x);
+
+// Truncate to integer
+__generic<T : __BuiltinFloatingPointType> __intrinsic T trunc(T x);
+__generic<T : __BuiltinFloatingPointType, let N : int> __intrinsic vector<T,N> trunc(vector<T,N> x);
+__generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __intrinsic matrix<T,N,M> trunc(matrix<T,N,M> x);
+
+
+)", R"(
+
+// Shader model 6.0 stuff
+
+__intrinsic uint GlobalOrderedCountIncrement(uint countToAppendForThisLane);
+
+__generic<T : __BuiltinType> __intrinsic T QuadReadLaneAt(T sourceValue, int quadLaneID);
+__generic<T : __BuiltinType, let N : int> __intrinsic vector<T,N> QuadReadLaneAt(vector<T,N> sourceValue, int quadLaneID);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic matrix<T,N,M> QuadReadLaneAt(matrix<T,N,M> sourceValue, int quadLaneID);
+
+__generic<T : __BuiltinType> __intrinsic T QuadSwapX(T localValue);
+__generic<T : __BuiltinType, let N : int> __intrinsic vector<T,N> QuadSwapX(vector<T,N> localValue);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic matrix<T,N,M> QuadSwapX(matrix<T,N,M> localValue);
+
+__generic<T : __BuiltinType> __intrinsic T QuadSwapY(T localValue);
+__generic<T : __BuiltinType, let N : int> __intrinsic vector<T,N> QuadSwapY(vector<T,N> localValue);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic matrix<T,N,M> QuadSwapY(matrix<T,N,M> localValue);
+
+__generic<T : __BuiltinIntegerType> __intrinsic T WaveAllBitAnd(T expr);
+__generic<T : __BuiltinIntegerType, let N : int> __intrinsic vector<T,N> WaveAllBitAnd(vector<T,N> expr);
+__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic matrix<T,N,M> WaveAllBitAnd(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinIntegerType> __intrinsic T WaveAllBitOr(T expr);
+__generic<T : __BuiltinIntegerType, let N : int> __intrinsic vector<T,N> WaveAllBitOr(vector<T,N> expr);
+__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic matrix<T,N,M> WaveAllBitOr(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinIntegerType> __intrinsic T WaveAllBitXor(T expr);
+__generic<T : __BuiltinIntegerType, let N : int> __intrinsic vector<T,N> WaveAllBitXor(vector<T,N> expr);
+__generic<T : __BuiltinIntegerType, let N : int, let M : int> __intrinsic matrix<T,N,M> WaveAllBitXor(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic T WaveAllMax(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic vector<T,N> WaveAllMax(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic matrix<T,N,M> WaveAllMax(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic T WaveAllMin(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic vector<T,N> WaveAllMin(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic matrix<T,N,M> WaveAllMin(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic T WaveAllProduct(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic vector<T,N> WaveAllProduct(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic matrix<T,N,M> WaveAllProduct(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic T WaveAllSum(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic vector<T,N> WaveAllSum(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic matrix<T,N,M> WaveAllSum(matrix<T,N,M> expr);
+
+__intrinsic bool WaveAllEqual(bool expr);
+__intrinsic bool WaveAllTrue(bool expr);
+__intrinsic bool WaveAnyTrue(bool expr);
+
+uint64_t WaveBallot(bool expr);
+
+uint WaveGetLaneCount();
+uint WaveGetLaneIndex();
+uint WaveGetOrderedIndex();
+
+bool WaveIsHelperLane();
+
+bool WaveOnce();
+
+__generic<T : __BuiltinArithmeticType> __intrinsic T WavePrefixProduct(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic vector<T,N> WavePrefixProduct(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic matrix<T,N,M> WavePrefixProduct(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinArithmeticType> __intrinsic T WavePrefixSum(T expr);
+__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic vector<T,N> WavePrefixSum(vector<T,N> expr);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic matrix<T,N,M> WavePrefixSum(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinType> __intrinsic T WaveReadFirstLane(T expr);
+__generic<T : __BuiltinType, let N : int> __intrinsic vector<T,N> WaveReadFirstLane(vector<T,N> expr);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic matrix<T,N,M> WaveReadFirstLane(matrix<T,N,M> expr);
+
+__generic<T : __BuiltinType> __intrinsic T WaveReadLaneAt(T expr, int laneIndex);
+__generic<T : __BuiltinType, let N : int> __intrinsic vector<T,N> WaveReadLaneAt(vector<T,N> expr, int laneIndex);
+__generic<T : __BuiltinType, let N : int, let M : int> __intrinsic matrix<T,N,M> WaveReadLaneAt(matrix<T,N,M> expr, int laneIndex);
+
+
+)", R"(
+
+// `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points
+typedef Texture2D texture2D;
+
+#line default
+)" };
+
+
+using namespace CoreLib::Basic;
+
+namespace Slang
+{
+ namespace Compiler
+ {
+ static String stdlibPath;
+
+ String getStdlibPath()
+ {
+ if(stdlibPath.Length() != 0)
+ return stdlibPath;
+
+ StringBuilder pathBuilder;
+ for( auto cc = __FILE__; *cc; ++cc )
+ {
+ switch( *cc )
+ {
+ case '\n':
+ case '\t':
+ case '\\':
+ pathBuilder << "\\";
+ default:
+ pathBuilder << *cc;
+ break;
+ }
+ }
+ stdlibPath = pathBuilder.ProduceString();
+
+ return stdlibPath;
+ }
+
+ String SlangStdLib::code;
+
+ enum
+ {
+ SINT_MASK = 1 << 0,
+ FLOAT_MASK = 1 << 1,
+ COMPARISON = 1 << 2,
+ BOOL_MASK = 1 << 3,
+ UINT_MASK = 1 << 4,
+ ASSIGNMENT = 1 << 5,
+ POSTFIX = 1 << 6,
+
+ INT_MASK = SINT_MASK | UINT_MASK,
+ ARITHMETIC_MASK = INT_MASK | FLOAT_MASK,
+ LOGICAL_MASK = INT_MASK | BOOL_MASK,
+ ANY_MASK = INT_MASK | FLOAT_MASK | BOOL_MASK,
+ };
+
+ String SlangStdLib::GetCode()
+ {
+ if (code.Length() > 0)
+ return code;
+ StringBuilder sb;
+
+ // generate operator overloads
+
+
+
+ struct OpInfo { IntrinsicOp opCode; char const* opName; unsigned flags; };
+
+ OpInfo unaryOps[] = {
+ { IntrinsicOp::Neg, "-", ARITHMETIC_MASK },
+ { IntrinsicOp::Not, "!", ANY_MASK },
+ { IntrinsicOp::Not, "~", INT_MASK },
+ { IntrinsicOp::PreInc, "++", ARITHMETIC_MASK | ASSIGNMENT },
+ { IntrinsicOp::PreDec, "--", ARITHMETIC_MASK | ASSIGNMENT },
+ { IntrinsicOp::PostInc, "++", ARITHMETIC_MASK | ASSIGNMENT | POSTFIX },
+ { IntrinsicOp::PostDec, "--", ARITHMETIC_MASK | ASSIGNMENT | POSTFIX },
+ };
+
+ OpInfo binaryOps[] = {
+ { IntrinsicOp::Add, "+", ARITHMETIC_MASK },
+ { IntrinsicOp::Sub, "-", ARITHMETIC_MASK },
+ { IntrinsicOp::Mul, "*", ARITHMETIC_MASK },
+ { IntrinsicOp::Div, "/", ARITHMETIC_MASK },
+ { IntrinsicOp::Mod, "%", INT_MASK },
+
+ { IntrinsicOp::And, "&&", LOGICAL_MASK },
+ { IntrinsicOp::Or, "||", LOGICAL_MASK },
+
+ { IntrinsicOp::BitAnd, "&", LOGICAL_MASK },
+ { IntrinsicOp::BitOr, "|", LOGICAL_MASK },
+ { IntrinsicOp::BitXor, "^", LOGICAL_MASK },
+
+ { IntrinsicOp::Lsh, "<<", INT_MASK },
+ { IntrinsicOp::Rsh, ">>", INT_MASK },
+
+ { IntrinsicOp::Eql, "==", ANY_MASK | COMPARISON },
+ { IntrinsicOp::Neq, "!=", ANY_MASK | COMPARISON },
+
+ { IntrinsicOp::Greater, ">", ARITHMETIC_MASK | COMPARISON },
+ { IntrinsicOp::Less, "<", ARITHMETIC_MASK | COMPARISON },
+ { IntrinsicOp::Geq, ">=", ARITHMETIC_MASK | COMPARISON },
+ { IntrinsicOp::Leq, "<=", ARITHMETIC_MASK | COMPARISON },
+
+ { IntrinsicOp::AddAssign, "+=", ASSIGNMENT | ARITHMETIC_MASK },
+ { IntrinsicOp::SubAssign, "-=", ASSIGNMENT | ARITHMETIC_MASK },
+ { IntrinsicOp::MulAssign, "*=", ASSIGNMENT | ARITHMETIC_MASK },
+ { IntrinsicOp::DivAssign, "/=", ASSIGNMENT | ARITHMETIC_MASK },
+ { IntrinsicOp::ModAssign, "%=", ASSIGNMENT | ARITHMETIC_MASK },
+ { IntrinsicOp::AndAssign, "&=", ASSIGNMENT | LOGICAL_MASK },
+ { IntrinsicOp::OrAssign, "|=", ASSIGNMENT | LOGICAL_MASK },
+ { IntrinsicOp::XorAssign, "^=", ASSIGNMENT | LOGICAL_MASK },
+ { IntrinsicOp::LshAssign, "<<=", ASSIGNMENT | INT_MASK },
+ { IntrinsicOp::RshAssign, ">>=", ASSIGNMENT | INT_MASK },
+
+
+ };
+
+ /*
+ String floatTypes[] = { "float", "float2", "float3", "float4" };
+ String intTypes[] = { "int", "int2", "int3", "int4" };
+ String uintTypes[] = { "uint", "uint2", "uint3", "uint4" };
+ */
+
+ String path = getStdlibPath();
+
+
+
+#define EMIT_LINE_DIRECTIVE() sb << "#line " << (__LINE__+1) << " \"" << path << "\"\n"
+
+ // Generate declarations for all the base types
+
+ static const struct {
+ char const* name;
+ BaseType tag;
+ unsigned flags;
+ } kBaseTypes[] = {
+ { "void", BaseType::Void, 0 },
+ { "int", BaseType::Int, SINT_MASK },
+ { "float", BaseType::Float, FLOAT_MASK },
+ { "uint", BaseType::UInt, UINT_MASK },
+ { "bool", BaseType::Bool, BOOL_MASK },
+ { "uint64_t", BaseType::UInt64, UINT_MASK },
+ };
+ static const int kBaseTypeCount = sizeof(kBaseTypes) / sizeof(kBaseTypes[0]);
+ for (int tt = 0; tt < kBaseTypeCount; ++tt)
+ {
+ EMIT_LINE_DIRECTIVE();
+ sb << "__builtin_type(" << int(kBaseTypes[tt].tag) << ") struct " << kBaseTypes[tt].name << "\n{\n";
+
+ // Declare trait conformances for this type
+
+ sb << "__conforms __BuiltinType;\n";
+
+ switch( kBaseTypes[tt].tag )
+ {
+ case BaseType::Float:
+ sb << "__conforms __BuiltinFloatingPointType;\n";
+ sb << "__conforms __BuiltinRealType;\n";
+ // fall through to:
+ case BaseType::Int:
+ sb << "__conforms __BuiltinSignedArithmeticType;\n";
+ // fall through to:
+ case BaseType::UInt:
+ case BaseType::UInt64:
+ sb << "__conforms __BuiltinArithmeticType;\n";
+ // fall through to:
+ case BaseType::Bool:
+ sb << "__conforms __BuiltinType;\n";
+ break;
+
+ default:
+ break;
+ }
+
+ // Declare initializers to convert from various other types
+ for( int ss = 0; ss < kBaseTypeCount; ++ss )
+ {
+ if( kBaseTypes[ss].tag == BaseType::Void )
+ continue;
+
+ EMIT_LINE_DIRECTIVE();
+ sb << "__init(" << kBaseTypes[ss].name << " value);\n";
+ }
+
+ sb << "};\n";
+ }
+
+ // Declare ad hoc aliases for some types, just to get things compiling
+ //
+ // TODO(tfoley): At the very least, `double` should be treated as a distinct type.
+ sb << "typedef float double;\n";
+ sb << "typedef float half;\n";
+
+ // Declare vector and matrix types
+
+ sb << "__generic<T = float, let N : int = 4> __magic_type(Vector) struct vector\n{\n";
+ sb << " __init(T value);\n"; // initialize from single scalar
+ sb << "};\n";
+ sb << "__generic<T = float, let R : int = 4, let C : int = 4> __magic_type(Matrix) struct matrix {};\n";
+
+ static const struct {
+ char const* name;
+ char const* glslPrefix;
+ } kTypes[] =
+ {
+ {"float", ""},
+ {"int", "i"},
+ {"uint", "u"},
+ {"bool", "b"},
+ };
+ static const int kTypeCount = sizeof(kTypes) / sizeof(kTypes[0]);
+
+ for (int tt = 0; tt < kTypeCount; ++tt)
+ {
+ // Declare HLSL vector types
+ for (int ii = 1; ii <= 4; ++ii)
+ {
+ sb << "typedef vector<" << kTypes[tt].name << "," << ii << "> " << kTypes[tt].name << ii << ";\n";
+ }
+
+ // Declare HLSL matrix types
+ for (int rr = 2; rr <= 4; ++rr)
+ for (int cc = 2; cc <= 4; ++cc)
+ {
+ sb << "typedef matrix<" << kTypes[tt].name << "," << rr << "," << cc << "> " << kTypes[tt].name << rr << "x" << cc << ";\n";
+ }
+ }
+
+ static const char* kComponentNames[]{ "x", "y", "z", "w" };
+ static const char* kVectorNames[]{ "", "x", "xy", "xyz", "xyzw" };
+
+ // Need to add constructors to the types above
+ for (int N = 2; N <= 4; ++N)
+ {
+ sb << "__generic<T> __extension vector<T, " << N << ">\n{\n";
+
+ // initialize from N scalars
+ sb << "__init(";
+ for (int ii = 0; ii < N; ++ii)
+ {
+ if (ii != 0) sb << ", ";
+ sb << "T " << kComponentNames[ii];
+ }
+ sb << ");\n";
+
+ // Initialize from an M-vector and then scalars
+ for (int M = 2; M < N; ++M)
+ {
+ sb << "__init(vector<T," << M << "> " << kVectorNames[M];
+ for (int ii = M; ii < N; ++ii)
+ {
+ sb << ", T " << kComponentNames[ii];
+ }
+ sb << ");\n";
+ }
+
+ // initialize from another vector of the same size
+ //
+ // TODO(tfoley): this overlaps with implicit conversions.
+ // We should look for a way that we can define implicit
+ // conversions directly in the stdlib instead...
+ sb << "__generic<U> __init(vector<U," << N << ">);\n";
+
+ sb << "}\n";
+ }
+
+ for( int R = 2; R <= 4; ++R )
+ for( int C = 2; C <= 4; ++C )
+ {
+ sb << "__generic<T> __extension matrix<T, " << R << "," << C << ">\n{\n";
+
+ // initialize from R*C scalars
+ sb << "__init(";
+ for( int ii = 0; ii < R; ++ii )
+ for( int jj = 0; jj < C; ++jj )
+ {
+ if ((ii+jj) != 0) sb << ", ";
+ sb << "T m" << ii << jj;
+ }
+ sb << ");\n";
+
+ // Initialize from R C-vectors
+ sb << "__init(";
+ for (int ii = 0; ii < R; ++ii)
+ {
+ if(ii != 0) sb << ", ";
+ sb << "vector<T," << C << "> row" << ii;
+ }
+ sb << ");\n";
+
+
+ // initialize from another matrix of the same size
+ //
+ // TODO(tfoley): See comment about how this overlaps
+ // with implicit conversion, in the `vector` case above
+ sb << "__generic<U> __init(matrix<U," << R << ", " << C << ">);\n";
+
+ sb << "}\n";
+ }
+
+
+ // Declare built-in texture and sampler types
+
+ sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerState) << ") struct SamplerState {};";
+ sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerComparisonState) << ") struct SamplerComparisonState {};";
+
+ // TODO(tfoley): Need to handle `RW*` variants of texture types as well...
+ static const struct {
+ char const* name;
+ TextureType::Shape baseShape;
+ int coordCount;
+ } kBaseTextureTypes[] = {
+ { "Texture1D", TextureType::Shape1D, 1 },
+ { "Texture2D", TextureType::Shape2D, 2 },
+ { "Texture3D", TextureType::Shape3D, 3 },
+ { "TextureCube", TextureType::ShapeCube, 3 },
+ };
+ static const int kBaseTextureTypeCount = sizeof(kBaseTextureTypes) / sizeof(kBaseTextureTypes[0]);
+
+
+ static const struct {
+ char const* name;
+ SlangResourceAccess access;
+ } kBaseTextureAccessLevels[] = {
+ { "", SLANG_RESOURCE_ACCESS_READ },
+ { "RW", SLANG_RESOURCE_ACCESS_READ_WRITE },
+ { "RasterizerOrdered", SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
+ };
+ static const int kBaseTextureAccessLevelCount = sizeof(kBaseTextureAccessLevels) / sizeof(kBaseTextureAccessLevels[0]);
+
+ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
+ {
+ char const* name = kBaseTextureTypes[tt].name;
+ TextureType::Shape baseShape = kBaseTextureTypes[tt].baseShape;
+
+ for (int isArray = 0; isArray < 2; ++isArray)
+ {
+ // Arrays of 3D textures aren't allowed
+ if (isArray && baseShape == TextureType::Shape3D) continue;
+
+ for (int isMultisample = 0; isMultisample < 2; ++isMultisample)
+ for (int accessLevel = 0; accessLevel < kBaseTextureAccessLevelCount; ++accessLevel)
+ {
+ auto access = kBaseTextureAccessLevels[accessLevel].access;
+
+ // TODO: any constraints to enforce on what gets to be multisampled?
+
+ unsigned flavor = baseShape;
+ if (isArray) flavor |= TextureType::ArrayFlag;
+ if (isMultisample) flavor |= TextureType::MultisampleFlag;
+// if (isShadow) flavor |= TextureType::ShadowFlag;
+
+ flavor |= (access << 8);
+
+
+ // emit a generic signature
+ // TODO: allow for multisample count to come in as well...
+ sb << "__generic<T = float4> ";
+
+ sb << "__magic_type(Texture," << int(flavor) << ") struct ";
+ sb << kBaseTextureAccessLevels[accessLevel].name;
+ sb << name;
+ if (isMultisample) sb << "MS";
+ if (isArray) sb << "Array";
+// if (isShadow) sb << "Shadow";
+ sb << "\n{";
+
+ if( !isMultisample )
+ {
+ sb << "float CalculateLevelOfDetail(SamplerState s, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
+
+ sb << "float CalculateLevelOfDetailUnclamped(SamplerState s, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount << " location);\n";
+
+ // TODO: `Gather` operation
+ // (tricky because it returns a 4-vector of the element type
+ // of the texture components...)
+ }
+
+ // TODO: `GetDimensions` operations
+
+ for(int isFloat = 0; isFloat < 2; ++isFloat)
+ for(int includeMipInfo = 0; includeMipInfo < 2; ++includeMipInfo)
+ {
+ char const* t = isFloat ? "out float " : "out UINT ";
+
+ sb << "void GetDimensions(";
+ if(includeMipInfo)
+ sb << "UINT mipLevel, ";
+
+ switch(baseShape)
+ {
+ case TextureType::Shape1D:
+ sb << t << "width";
+ break;
+
+ case TextureType::Shape2D:
+ case TextureType::ShapeCube:
+ sb << t << "width,";
+ sb << t << "height";
+ break;
+
+ case TextureType::Shape3D:
+ sb << t << "width,";
+ sb << t << "height,";
+ sb << t << "depth";
+ break;
+
+ default:
+ assert(!"unexpected");
+ break;
+ }
+
+ if(isArray)
+ {
+ sb << ", " << t << "elements";
+ }
+
+ if(includeMipInfo)
+ sb << ", " << t << "numberOfLevels";
+
+ sb << ");\n";
+ }
+
+ // `GetSamplePosition()`
+ if( isMultisample )
+ {
+ sb << "float2 GetSamplePosition(int s);\n";
+ }
+
+ // `Load()`
+
+ if( kBaseTextureTypes[tt].coordCount + isArray < 4 )
+ {
+ sb << "T Load(";
+ sb << "int" << kBaseTextureTypes[tt].coordCount + isArray + 1 << " location);\n";
+
+ if( !isMultisample )
+ {
+ sb << "T Load(";
+ sb << "int" << kBaseTextureTypes[tt].coordCount + isArray + 1 << " location, ";
+ sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+ }
+ else
+ {
+ sb << "T Load(";
+ sb << "int" << kBaseTextureTypes[tt].coordCount + isArray + 1 << " location, ";
+ sb << "int sampleIndex, ";
+ sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+ }
+ }
+
+ if(baseShape != TextureType::ShapeCube)
+ {
+ // subscript operator
+ sb << "__intrinsic __subscript(uint" << kBaseTextureTypes[tt].coordCount + isArray << " location) -> T;\n";
+ }
+
+ if( !isMultisample )
+ {
+ // `Sample()`
+
+ sb << "T Sample(SamplerState s, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location);\n";
+
+ if( baseShape != TextureType::ShapeCube )
+ {
+ sb << "T Sample(SamplerState s, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+ sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+ }
+
+ sb << "T Sample(SamplerState s, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+ if( baseShape != TextureType::ShapeCube )
+ {
+ sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
+ }
+ sb << "float clamp);\n";
+
+ sb << "T Sample(SamplerState s, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+ if( baseShape != TextureType::ShapeCube )
+ {
+ sb << "int" << kBaseTextureTypes[tt].coordCount << " offset, ";
+ }
+ sb << "float clamp, out uint status);\n";
+
+
+ // `SampleBias()`
+ sb << "T SampleBias(SamplerState s, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias);\n";
+
+ if( baseShape != TextureType::ShapeCube )
+ {
+ sb << "T SampleBias(SamplerState s, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, float bias, ";
+ sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+ }
+
+ // `SampleCmp()` and `SampleCmpLevelZero`
+ sb << "T SampleCmp(SamplerComparisonState s, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+ sb << "float compareValue";
+ sb << ");\n";
+
+ sb << "T SampleCmpLevelZero(SamplerComparisonState s, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+ sb << "float compareValue";
+ sb << ");\n";
+
+ if( baseShape != TextureType::ShapeCube )
+ {
+ // Note(tfoley): MSDN seems confused, and claims that the `offset`
+ // parameter for `SampleCmp` is available for everything but 3D
+ // textures, while `Sample` and `SampleBias` are consistent in
+ // saying they only exclude `offset` for cube maps (which makes
+ // sense). I'm going to assume the documentation for `SampleCmp`
+ // is just wrong.
+
+ sb << "T SampleCmp(SamplerState s, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+ sb << "float compareValue, ";
+ sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+
+ sb << "T SampleCmpLevelZero(SamplerState s, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+ sb << "float compareValue, ";
+ sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+ }
+
+ sb << "T SampleGrad(SamplerState s, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount << " gradY";
+ sb << ");\n";
+
+ if( baseShape != TextureType::ShapeCube )
+ {
+ sb << "T SampleGrad(SamplerState s, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount << " gradX, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount << " gradY, ";
+ sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+ }
+
+ // `SampleLevel`
+
+ sb << "T SampleLevel(SamplerState s, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+ sb << "float level);\n";
+
+ if( baseShape != TextureType::ShapeCube )
+ {
+ sb << "T SampleLevel(SamplerState s, ";
+ sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
+ sb << "float level, ";
+ sb << "int" << kBaseTextureTypes[tt].coordCount << " offset);\n";
+ }
+ }
+
+ sb << "\n};\n";
+ }
+ }
+ }
+
+ // Declare additional built-in generic types
+
+ sb << "__generic<T> __magic_type(ConstantBuffer) struct ConstantBuffer {};\n";
+ sb << "__generic<T> __magic_type(TextureBuffer) struct TextureBuffer {};\n";
+
+ sb << "__generic<T> __magic_type(PackedBuffer) struct PackedBuffer {};\n";
+ sb << "__generic<T> __magic_type(Uniform) struct Uniform {};\n";
+ sb << "__generic<T> __magic_type(Patch) struct Patch {};\n";
+
+
+ // Stale declarations for GLSL inner-product builtins
+#if 0
+ sb << "__intrinsic vec3 operator * (vec3, mat3);\n";
+ sb << "__intrinsic vec3 operator * (mat3, vec3);\n";
+
+ sb << "__intrinsic vec4 operator * (vec4, mat4);\n";
+ sb << "__intrinsic vec4 operator * (mat4, vec4);\n";
+
+ sb << "__intrinsic mat3 operator * (mat3, mat3);\n";
+ sb << "__intrinsic mat4 operator * (mat4, mat4);\n";
+#endif
+
+#if 0
+ sb << "__intrinsic(And) bool operator && (bool, bool);\n";
+ sb << "__intrinsic(Or) bool operator || (bool, bool);\n";
+
+ for (auto type : intTypes)
+ {
+ sb << "__intrinsic(And) bool operator && (bool, " << type << ");\n";
+ sb << "__intrinsic(Or) bool operator || (bool, " << type << ");\n";
+ sb << "__intrinsic(And) bool operator && (" << type << ", bool);\n";
+ sb << "__intrinsic(Or) bool operator || (" << type << ", bool);\n";
+ }
+#endif
+
+ for (auto op : unaryOps)
+ {
+ for (auto type : kBaseTypes)
+ {
+ if ((type.flags & op.flags) == 0)
+ continue;
+
+ char const* fixity = (op.flags & POSTFIX) != 0 ? "__postfix " : "__prefix ";
+ char const* qual = (op.flags & ASSIGNMENT) != 0 ? "in out " : "";
+
+ // scalar version
+ sb << fixity;
+ sb << "__intrinsic(" << int(op.opCode) << ") " << type.name << " operator" << op.opName << "(" << qual << type.name << " value);\n";
+
+ // vector version
+ sb << "__generic<let N : int> ";
+ sb << fixity;
+ sb << "__intrinsic(" << int(op.opCode) << ") vector<" << type.name << ",N> operator" << op.opName << "(" << qual << "vector<" << type.name << ",N> value);\n";
+
+ // matrix version
+ sb << "__generic<let N : int, let M : int> ";
+ sb << fixity;
+ sb << "__intrinsic(" << int(op.opCode) << ") matrix<" << type.name << ",N,M> operator" << op.opName << "(" << qual << "matrix<" << type.name << ",N,M> value);\n";
+ }
+ }
+
+ for (auto op : binaryOps)
+ {
+ for (auto type : kBaseTypes)
+ {
+ if ((type.flags & op.flags) == 0)
+ continue;
+
+ char const* leftType = type.name;
+ char const* rightType = leftType;
+ char const* resultType = leftType;
+
+ if (op.flags & COMPARISON) resultType = "bool";
+
+ char const* leftQual = "";
+ if(op.flags & ASSIGNMENT) leftQual = "in out ";
+
+ // TODO: handle `SHIFT`
+
+ // scalar version
+ sb << "__intrinsic(" << int(op.opCode) << ") " << resultType << " operator" << op.opName << "(" << leftQual << leftType << " left, " << rightType << " right);\n";
+
+ // vector version
+ sb << "__generic<let N : int> ";
+ sb << "__intrinsic(" << int(op.opCode) << ") vector<" << resultType << ",N> operator" << op.opName << "(" << leftQual << "vector<" << leftType << ",N> left, vector<" << rightType << ",N> right);\n";
+
+ // matrix version
+ sb << "__generic<let N : int, let M : int> ";
+ sb << "__intrinsic(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, matrix<" << rightType << ",N,M> right);\n";
+ }
+ }
+
+#if 0
+ for (auto op : intUnaryOps)
+ {
+ String opName = GetOperatorFunctionName(op);
+ for (int i = 0; i < 4; i++)
+ {
+ auto itype = intTypes[i];
+ auto utype = uintTypes[i];
+ for (int j = 0; j < 2; j++)
+ {
+ auto retType = (op == Operator::Not) ? "bool" : j == 0 ? itype : utype;
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << (j == 0 ? itype : utype) << ");\n";
+ }
+ }
+ }
+
+ for (auto op : floatUnaryOps)
+ {
+ String opName = GetOperatorFunctionName(op);
+ for (int i = 0; i < 4; i++)
+ {
+ auto type = floatTypes[i];
+ auto retType = (op == Operator::Not) ? "bool" : type;
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << type << ");\n";
+ }
+ }
+
+ for (auto op : floatOps)
+ {
+ String opName = GetOperatorFunctionName(op);
+ for (int i = 0; i < 4; i++)
+ {
+ auto type = floatTypes[i];
+ auto itype = intTypes[i];
+ auto utype = uintTypes[i];
+ auto retType = ((op >= Operator::Eql && op <= Operator::Leq) || op == Operator::And || op == Operator::Or) ? "bool" : type;
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << type << ", " << type << ");\n";
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << itype << ", " << type << ");\n";
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << utype << ", " << type << ");\n";
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << type << ", " << itype << ");\n";
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << type << ", " << utype << ");\n";
+ if (i > 0)
+ {
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << type << ", " << floatTypes[0] << ");\n";
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << floatTypes[0] << ", " << type << ");\n";
+
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << type << ", " << intTypes[0] << ");\n";
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << intTypes[0] << ", " << type << ");\n";
+
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << type << ", " << uintTypes[0] << ");\n";
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << uintTypes[0] << ", " << type << ");\n";
+ }
+ }
+ }
+
+ for (auto op : intOps)
+ {
+ String opName = GetOperatorFunctionName(op);
+ for (int i = 0; i < 4; i++)
+ {
+ auto type = intTypes[i];
+ auto utype = uintTypes[i];
+ auto retType = ((op >= Operator::Eql && op <= Operator::Leq) || op == Operator::And || op == Operator::Or) ? "bool" : type;
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << type << ", " << type << ");\n";
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << utype << ", " << type << ");\n";
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << type << ", " << utype << ");\n";
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << utype << ", " << utype << ");\n";
+ if (i > 0)
+ {
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << type << ", " << intTypes[0] << ");\n";
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << intTypes[0] << ", " << type << ");\n";
+
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << type << ", " << uintTypes[0] << ");\n";
+ sb << "__intrinsic " << retType << " operator " << opName << "(" << uintTypes[0] << ", " << type << ");\n";
+ }
+ }
+ }
+#endif
+
+ // Output a suitable `#line` directive to point at our raw stdlib code above
+ sb << "\n#line " << kLibIncludeStringLine << " \"" << path << "\"\n";
+
+ int chunkCount = sizeof(LibIncludeStringChunks) / sizeof(LibIncludeStringChunks[0]);
+ for (int cc = 0; cc < chunkCount; ++cc)
+ {
+ sb << LibIncludeStringChunks[cc];
+ }
+
+ code = sb.ProduceString();
+ return code;
+ }
+
+
+ // GLSL-specific library code
+
+ String glslLibraryCode;
+
+ String getGLSLLibraryCode()
+ {
+ if(glslLibraryCode.Length() != 0)
+ return glslLibraryCode;
+
+ String path = getStdlibPath();
+
+ StringBuilder sb;
+
+#define RAW(TEXT) \
+ EMIT_LINE_DIRECTIVE(); \
+ sb << TEXT;
+
+ static const struct {
+ char const* name;
+ char const* glslPrefix;
+ } kTypes[] =
+ {
+ {"float", ""},
+ {"int", "i"},
+ {"uint", "u"},
+ {"bool", "b"},
+ };
+ static const int kTypeCount = sizeof(kTypes) / sizeof(kTypes[0]);
+
+ for( int tt = 0; tt < kTypeCount; ++tt )
+ {
+ // Declare GLSL aliases for HLSL types
+ for (int vv = 2; vv <= 4; ++vv)
+ {
+ sb << "typedef " << kTypes[tt].name << vv << " " << kTypes[tt].glslPrefix << "vec" << vv << ";\n";
+ sb << "typedef " << kTypes[tt].name << vv << "x" << vv << " " << kTypes[tt].glslPrefix << "mat" << vv << ";\n";
+ }
+ for (int rr = 2; rr <= 4; ++rr)
+ for (int cc = 2; cc <= 4; ++cc)
+ {
+ sb << "typedef " << kTypes[tt].name << rr << "x" << cc << " " << kTypes[tt].glslPrefix << "mat" << rr << "x" << cc << ";\n";
+ }
+ }
+
+ // TODO(tfoley): Need to handle `RW*` variants of texture types as well...
+ static const struct {
+ char const* name;
+ TextureType::Shape baseShape;
+ int coordCount;
+ } kBaseTextureTypes[] = {
+ { "1D", TextureType::Shape1D, 1 },
+ { "2D", TextureType::Shape2D, 2 },
+ { "3D", TextureType::Shape3D, 3 },
+ { "Cube", TextureType::ShapeCube, 3 },
+ };
+ static const int kBaseTextureTypeCount = sizeof(kBaseTextureTypes) / sizeof(kBaseTextureTypes[0]);
+
+
+ static const struct {
+ char const* name;
+ SlangResourceAccess access;
+ } kBaseTextureAccessLevels[] = {
+ { "", SLANG_RESOURCE_ACCESS_READ },
+ { "RW", SLANG_RESOURCE_ACCESS_READ_WRITE },
+ { "RasterizerOrdered", SLANG_RESOURCE_ACCESS_RASTER_ORDERED },
+ };
+ static const int kBaseTextureAccessLevelCount = sizeof(kBaseTextureAccessLevels) / sizeof(kBaseTextureAccessLevels[0]);
+
+ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
+ {
+ char const* shapeName = kBaseTextureTypes[tt].name;
+ TextureType::Shape baseShape = kBaseTextureTypes[tt].baseShape;
+
+ for (int isArray = 0; isArray < 2; ++isArray)
+ {
+ // Arrays of 3D textures aren't allowed
+ if (isArray && baseShape == TextureType::Shape3D) continue;
+
+ for (int isMultisample = 0; isMultisample < 2; ++isMultisample)
+ {
+ auto access = SLANG_RESOURCE_ACCESS_READ;
+
+ // TODO: any constraints to enforce on what gets to be multisampled?
+
+
+ unsigned flavor = baseShape;
+ if (isArray) flavor |= TextureType::ArrayFlag;
+ if (isMultisample) flavor |= TextureType::MultisampleFlag;
+// if (isShadow) flavor |= TextureType::ShadowFlag;
+
+ flavor |= (access << 8);
+
+ StringBuilder nameBuilder;
+ nameBuilder << shapeName;
+ if (isMultisample) nameBuilder << "MS";
+ if (isArray) nameBuilder << "Array";
+ auto name = nameBuilder.ProduceString();
+
+ sb << "__generic<T> ";
+ sb << "__magic_type(TextureSampler," << int(flavor) << ") struct ";
+ sb << "__sampler" << name;
+ sb << " {};\n";
+
+ sb << "__generic<T> ";
+ sb << "__magic_type(Texture," << int(flavor) << ") struct ";
+ sb << "__texture" << name;
+ sb << " {};\n";
+
+ sb << "__generic<T> ";
+ sb << "__magic_type(GLSLImageType," << int(flavor) << ") struct ";
+ sb << "__image" << name;
+ sb << " {};\n";
+
+ // TODO(tfoley): flesh this out for all the available prefixes
+ static const struct
+ {
+ char const* prefix;
+ char const* elementType;
+ } kTextureElementTypes[] = {
+ { "", "vec4" },
+ { "i", "ivec4" },
+ { "u", "uvec4" },
+ { nullptr, nullptr },
+ };
+ for( auto ee = kTextureElementTypes; ee->prefix; ++ee )
+ {
+ sb << "typedef __sampler" << name << "<" << ee->elementType << "> " << ee->prefix << "sampler" << name << ";\n";
+ sb << "typedef __texture" << name << "<" << ee->elementType << "> " << ee->prefix << "texture" << name << ";\n";
+ sb << "typedef __image" << name << "<" << ee->elementType << "> " << ee->prefix << "image" << name << ";\n";
+ }
+ }
+ }
+ }
+
+ sb << "__generic<T> __magic_type(GLSLInputParameterBlockType) struct __GLSLInputParameterBlock {};\n";
+ sb << "__generic<T> __magic_type(GLSLOutputParameterBlockType) struct __GLSLOutputParameterBlock {};\n";
+ sb << "__generic<T> __magic_type(GLSLShaderStorageBufferType) struct __GLSLShaderStorageBuffer {};\n";
+
+ sb << "__magic_type(SamplerState," << int(SamplerStateType::Flavor::SamplerState) << ") struct sampler {};";
+
+ sb << "__magic_type(GLSLInputAttachmentType) struct subpassInput {};";
+
+ // Define additional keywords
+ sb << "__modifier(GLSLBufferModifier) buffer;\n";
+ sb << "__modifier(GLSLWriteOnlyModifier) writeonly;\n";
+ sb << "__modifier(GLSLReadOnlyModifier) readonly;\n";
+ sb << "__modifier(GLSLPatchModifier) patch;\n";
+
+ sb << "__modifier(SimpleModifier) flat;\n";
+
+ glslLibraryCode = sb.ProduceString();
+ return glslLibraryCode;
+ }
+
+
+
+ //
+
+ void SlangStdLib::Finalize()
+ {
+ code = nullptr;
+ stdlibPath = String();
+ glslLibraryCode = String();
+ }
+
+ }
+}
+