From eaafafe772366a23ed847cbb10770c72aa5cfc28 Mon Sep 17 00:00:00 2001 From: Tim Foley Date: Wed, 3 Oct 2018 16:03:37 -0700 Subject: Update DXR API definitions for final spec. (#659) * Update DXR API definitions for final spec. The final version of the DXR API has changed the result type of the `DispatchRaysIndex()` and `DispatchRaysDimensions()` builtins to `uint3` (from `uint2`). * Add updates for DXR object<->world transformations The `ObjectToWorld()` and `WorldToObject()` functions were renamed to `ObjectToWorld3x4()` and `WorldToObject3x4()`, resepctively, and then new functions `ObjectToWorld4x3()` and `WorldToObject4x3()` were added to give convenient access to the transpose of these matrices. (No, I'm not clear on why user's couldn't just call `transpose()`, either) I've left the old function names in the standard library as forwarding functions just so that we don't break existing DXR code that relied on the old names. --- source/slang/hlsl.meta.slang | 2824 ++++++++++++++++++++-------------------- source/slang/hlsl.meta.slang.h | 23 +- 2 files changed, 1438 insertions(+), 1409 deletions(-) (limited to 'source') diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index a9609e13e..5bcff1762 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -1,1405 +1,1419 @@ -// Slang HLSL compatibility library - -typedef uint UINT; - -__generic -__magic_type(HLSLAppendStructuredBufferType) -__intrinsic_type($(kIROp_HLSLAppendStructuredBufferType)) -struct AppendStructuredBuffer -{ - void Append(T value); - - void GetDimensions( - out uint numStructs, - out uint stride); -}; - -__magic_type(HLSLByteAddressBufferType) -__intrinsic_type($(kIROp_HLSLByteAddressBufferType)) -struct ByteAddressBuffer -{ - void GetDimensions( - out uint dim); - - uint Load(int location); - uint Load(int location, out uint status); - - uint2 Load2(int location); - uint2 Load2(int location, out uint status); - - uint3 Load3(int location); - uint3 Load3(int location, out uint status); - - uint4 Load4(int location); - uint4 Load4(int location, out uint status); -}; - -__generic -__magic_type(HLSLStructuredBufferType) -__intrinsic_type($(kIROp_HLSLStructuredBufferType)) -struct StructuredBuffer -{ - void GetDimensions( - out uint numStructs, - out uint stride); - - T Load(int location); - T Load(int location, out uint status); - - __subscript(uint index) -> T { __intrinsic_op(bufferLoad) get; }; -}; - -__generic -__magic_type(HLSLConsumeStructuredBufferType) -__intrinsic_type($(kIROp_HLSLConsumeStructuredBufferType)) -struct ConsumeStructuredBuffer -{ - T Consume(); - - void GetDimensions( - out uint numStructs, - out uint stride); -}; - -__generic -__magic_type(HLSLInputPatchType) -__intrinsic_type($(kIROp_HLSLInputPatchType)) -struct InputPatch -{ - __subscript(uint index) -> T; -}; - -__generic -__magic_type(HLSLOutputPatchType) -__intrinsic_type($(kIROp_HLSLOutputPatchType)) -struct OutputPatch -{ - __subscript(uint index) -> T; -}; - -${{{{ -static const struct { - IROp op; - char const* name; -} kMutableByteAddressBufferCases[] = -{ - { kIROp_HLSLRWByteAddressBufferType, "RWByteAddressBuffer" }, - { kIROp_HLSLRasterizerOrderedByteAddressBufferType, "RasterizerOrderedByteAddressBuffer" }, -}; -for(auto item : kMutableByteAddressBufferCases) { -}}}} - -__magic_type(HLSL$(item.name)Type) -__intrinsic_type($(item.op)) -struct $(item.name) -{ - // Note(tfoley): supports alll operations from `ByteAddressBuffer` - // TODO(tfoley): can this be made a sub-type? - - void GetDimensions( - out uint dim); - - uint Load(int location); - uint Load(int location, out uint status); - - uint2 Load2(int location); - uint2 Load2(int location, out uint status); - - uint3 Load3(int location); - uint3 Load3(int location, out uint status); - - uint4 Load4(int location); - uint4 Load4(int location, out uint status); - - // Added operations: - - void InterlockedAdd( - UINT dest, - UINT value, - out UINT original_value); - void InterlockedAdd( - UINT dest, - UINT value); - - void InterlockedAnd( - UINT dest, - UINT value, - out UINT original_value); - void InterlockedAnd( - UINT dest, - UINT value); - - void InterlockedCompareExchange( - UINT dest, - UINT compare_value, - UINT value, - out UINT original_value); - void InterlockedCompareExchange( - UINT dest, - UINT compare_value, - UINT value); - - void InterlockedCompareStore( - UINT dest, - UINT compare_value, - UINT value); - void InterlockedCompareStore( - UINT dest, - UINT compare_value); - - void InterlockedExchange( - UINT dest, - UINT value, - out UINT original_value); - void InterlockedExchange( - UINT dest, - UINT value); - - void InterlockedMax( - UINT dest, - UINT value, - out UINT original_value); - void InterlockedMax( - UINT dest, - UINT value); - - void InterlockedMin( - UINT dest, - UINT value, - out UINT original_value); - void InterlockedMin( - UINT dest, - UINT value); - - void InterlockedOr( - UINT dest, - UINT value, - out UINT original_value); - void InterlockedOr( - UINT dest, - UINT value); - - void InterlockedXor( - UINT dest, - UINT value, - out UINT original_value); - void InterlockedXor( - UINT dest, - UINT value); - - void Store( - uint address, - uint value); - - void Store2( - uint address, - uint2 value); - - void Store3( - uint address, - uint3 value); - - void Store4( - uint address, - uint4 value); -}; - -${{{{ -} -}}}} - -${{{{ -static const struct { - IROp op; - char const* name; -} kMutableStructuredBufferCases[] = -{ - { kIROp_HLSLRWStructuredBufferType, "RWStructuredBuffer" }, - { kIROp_HLSLRasterizerOrderedStructuredBufferType, "RasterizerOrderedStructuredBuffer" }, -}; -for(auto item : kMutableStructuredBufferCases) { -}}}} - - -__generic -__magic_type(HLSL$(item.name)Type) -__intrinsic_type($(item.op)) -struct $(item.name) -{ - uint DecrementCounter(); - - void GetDimensions( - out uint numStructs, - out uint stride); - - uint IncrementCounter(); - - T Load(int location); - T Load(int location, out uint status); - - __subscript(uint index) -> T - { - __intrinsic_op(bufferElementRef) - ref; - } -}; - -${{{{ -} -}}}} - -__generic -__magic_type(HLSLPointStreamType) -__intrinsic_type($(kIROp_HLSLPointStreamType)) -struct PointStream -{ - __target_intrinsic(glsl, "EmitVertex()") - void Append(T value); - - __target_intrinsic(glsl, "EndPrimitive()") - void RestartStrip(); -}; - -__generic -__magic_type(HLSLLineStreamType) -__intrinsic_type($(kIROp_HLSLLineStreamType)) -struct LineStream -{ - __target_intrinsic(glsl, "EmitVertex()") - void Append(T value); - - __target_intrinsic(glsl, "EndPrimitive()") - void RestartStrip(); -}; - -__generic -__magic_type(HLSLTriangleStreamType) -__intrinsic_type($(kIROp_HLSLTriangleStreamType)) -struct TriangleStream -{ - __target_intrinsic(glsl, "EmitVertex()") - void Append(T value); - - __target_intrinsic(glsl, "EndPrimitive()") - void RestartStrip(); -}; - -// Note(tfoley): Trying to systematically add all the HLSL builtins - -// Try to terminate the current draw or dispatch call (HLSL SM 4.0) -void abort(); - -// Absolute value (HLSL SM 1.0) -__generic T abs(T x); -__generic vector abs(vector x); -__generic matrix abs(matrix x); - -// Inverse cosine (HLSL SM 1.0) -__generic T acos(T x); -__generic vector acos(vector x); -__generic matrix acos(matrix x); - -// Test if all components are non-zero (HLSL SM 1.0) -__generic bool all(T x); -__generic bool all(vector x); -__generic bool all(matrix x); - -// Barrier for writes to all memory spaces (HLSL SM 5.0) -void AllMemoryBarrier(); - -// Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0) -void AllMemoryBarrierWithGroupSync(); - -// Test if any components is non-zero (HLSL SM 1.0) - -__generic -__target_intrinsic(glsl, "bool($0)") -bool any(T x); - -__generic -__target_intrinsic(glsl, "any(bvec$N0($0))") -bool any(vector x); - -__generic -// TODO: need to define GLSL mapping -bool any(matrix x); - - -// Reinterpret bits as a double (HLSL SM 5.0) -double asdouble(uint lowbits, uint highbits); - -// Reinterpret bits as a float (HLSL SM 4.0) -float asfloat( int x); -float asfloat(uint x); -__generic vector asfloat(vector< int,N> x); -__generic vector asfloat(vector x); -__generic matrix asfloat(matrix< int,N,M> x); -__generic matrix asfloat(matrix x); - - -// Inverse sine (HLSL SM 1.0) -__generic T asin(T x); -__generic vector asin(vector x); -__generic matrix asin(matrix x); - -// Reinterpret bits as an int (HLSL SM 4.0) -int asint(float x); -int asint(uint x); -__generic vector asint(vector x); -__generic vector asint(vector x); -__generic matrix asint(matrix x); -__generic matrix asint(matrix x); - -// Reinterpret bits of double as a uint (HLSL SM 5.0) -void asuint(double value, out uint lowbits, out uint highbits); - -// Reinterpret bits as a uint (HLSL SM 4.0) -uint asuint(float x); -uint asuint(int x); -__generic vector asuint(vector x); -__generic vector asuint(vector x); -__generic matrix asuint(matrix x); -__generic matrix asuint(matrix x); - -// Inverse tangent (HLSL SM 1.0) -__generic T atan(T x); -__generic vector atan(vector x); -__generic matrix atan(matrix x); - -__generic -__target_intrinsic(glsl,"atan($0,$1)") -T atan2(T y, T x); - -__generic -__target_intrinsic(glsl,"atan($0,$1)") -vector atan2(vector y, vector x); - -__generic -__target_intrinsic(glsl,"atan($0,$1)") -matrix atan2(matrix y, matrix x); - -// Ceiling (HLSL SM 1.0) -__generic T ceil(T x); -__generic vector ceil(vector x); -__generic matrix ceil(matrix x); - - -// Check access status to tiled resource -bool CheckAccessFullyMapped(uint status); - -// Clamp (HLSL SM 1.0) -__generic T clamp(T x, T min, T max); -__generic vector clamp(vector x, vector min, vector max); -__generic matrix clamp(matrix x, matrix min, matrix max); - -// Clip (discard) fragment conditionally -__generic void clip(T x); -__generic void clip(vector x); -__generic void clip(matrix x); - -// Cosine -__generic T cos(T x); -__generic vector cos(vector x); -__generic matrix cos(matrix x); - -// Hyperbolic cosine -__generic T cosh(T x); -__generic vector cosh(vector x); -__generic matrix cosh(matrix x); - -// Population count -__target_intrinsic(glsl, "bitCount") -uint countbits(uint value); - -// Cross product -__generic vector cross(vector x, vector y); - -// Convert encoded color -int4 D3DCOLORtoUBYTE4(float4 x); - -// Partial-difference derivatives -__generic -__target_intrinsic(glsl, dFdx) -T ddx(T x); - -__generic -__target_intrinsic(glsl, dFdx) -vector ddx(vector x); - -__generic -__target_intrinsic(glsl, dFdx) -matrix ddx(matrix x); - -__generic -__glsl_extension(GL_ARB_derivative_control) -__target_intrinsic(glsl, dFdxCoarse) -T ddx_coarse(T x); - -__generic -__glsl_extension(GL_ARB_derivative_control) -__target_intrinsic(glsl, dFdxCoarse) -vector ddx_coarse(vector x); - -__generic -__glsl_extension(GL_ARB_derivative_control) -__target_intrinsic(glsl, dFdxCoarse) -matrix ddx_coarse(matrix x); - -__generic -__glsl_extension(GL_ARB_derivative_control) -__target_intrinsic(glsl, dFdxFine) -T ddx_fine(T x); - -__generic -__glsl_extension(GL_ARB_derivative_control) -__target_intrinsic(glsl, dFdxFine) -vector ddx_fine(vector x); - -__generic -__glsl_extension(GL_ARB_derivative_control) -__target_intrinsic(glsl, dFdxFine) -matrix ddx_fine(matrix x); - -__generic -__target_intrinsic(glsl, dFdy) -T ddy(T x); - -__generic -__target_intrinsic(glsl, dFdy) -vector ddy(vector x); - -__generic -__target_intrinsic(glsl, dFdy) - matrix ddy(matrix x); - -__generic -__glsl_extension(GL_ARB_derivative_control) -__target_intrinsic(glsl, dFdyCoarse) -T ddy_coarse(T x); - -__generic -__glsl_extension(GL_ARB_derivative_control) -__target_intrinsic(glsl, dFdyCoarse) -vector ddy_coarse(vector x); - -__generic -__glsl_extension(GL_ARB_derivative_control) -__target_intrinsic(glsl, dFdyCoarse) -matrix ddy_coarse(matrix x); - -__generic -__glsl_extension(GL_ARB_derivative_control) -__target_intrinsic(glsl, dFdyFine) -T ddy_fine(T x); - -__generic -__glsl_extension(GL_ARB_derivative_control) -__target_intrinsic(glsl, dFdyFine) -vector ddy_fine(vector x); - -__generic -__glsl_extension(GL_ARB_derivative_control) -__target_intrinsic(glsl, dFdyFine) -matrix ddy_fine(matrix x); - - -// Radians to degrees -__generic T degrees(T x); -__generic vector degrees(vector x); -__generic matrix degrees(matrix x); - -// Matrix determinant - -__generic T determinant(matrix m); - -// Barrier for device memory -void DeviceMemoryBarrier(); -void DeviceMemoryBarrierWithGroupSync(); - -// Vector distance - -__generic T distance(vector x, vector y); - -// Vector dot product - -__generic T dot(vector x, vector y); - -// Helper for computing distance terms for lighting (obsolete) - -__generic vector dst(vector x, vector y); - -// Error message - -// void errorf( string format, ... ); - -// Attribute evaluation - -__generic T EvaluateAttributeAtCentroid(T x); -__generic vector EvaluateAttributeAtCentroid(vector x); -__generic matrix EvaluateAttributeAtCentroid(matrix x); - -__generic T EvaluateAttributeAtSample(T x, uint sampleindex); -__generic vector EvaluateAttributeAtSample(vector x, uint sampleindex); -__generic matrix EvaluateAttributeAtSample(matrix x, uint sampleindex); - -__generic T EvaluateAttributeSnapped(T x, int2 offset); -__generic vector EvaluateAttributeSnapped(vector x, int2 offset); -__generic matrix EvaluateAttributeSnapped(matrix x, int2 offset); - -// Base-e exponent -__generic T exp(T x); -__generic vector exp(vector x); -__generic matrix exp(matrix x); - -// Base-2 exponent -__generic T exp2(T x); -__generic vector exp2(vector x); -__generic matrix exp2(matrix x); - -// Convert 16-bit float stored in low bits of integer -float f16tof32(uint value); -__generic vector f16tof32(vector value); - -// Convert to 16-bit float stored in low bits of integer -uint f32tof16(float value); -__generic vector f32tof16(vector value); - -// Flip surface normal to face forward, if needed -__generic vector faceforward(vector n, vector i, vector ng); - -// Find first set bit starting at high bit and working down -__target_intrinsic(glsl,"findMSB") -int firstbithigh(int value); - -__target_intrinsic(glsl,"findMSB") -__generic vector firstbithigh(vector value); - -__target_intrinsic(glsl,"findMSB") -uint firstbithigh(uint value); - -__target_intrinsic(glsl,"findMSB") -__generic vector firstbithigh(vector value); - -// Find first set bit starting at low bit and working up -__target_intrinsic(glsl,"findLSB") -int firstbitlow(int value); - -__target_intrinsic(glsl,"findLSB") -__generic vector firstbitlow(vector value); - -__target_intrinsic(glsl,"findLSB") -uint firstbitlow(uint value); - -__target_intrinsic(glsl,"findLSB") -__generic vector firstbitlow(vector value); - -// Floor (HLSL SM 1.0) -__generic T floor(T x); -__generic vector floor(vector x); -__generic matrix floor(matrix x); - -// Fused multiply-add for doubles -double fma(double a, double b, double c); -__generic vector fma(vector a, vector b, vector c); -__generic matrix fma(matrix a, matrix b, matrix c); - -// Floating point remainder of x/y -__generic T fmod(T x, T y); -__generic vector fmod(vector x, vector y); -__generic matrix fmod(matrix x, matrix y); - -// Fractional part -__generic -__target_intrinsic(glsl, fract) -T frac(T x); - -__generic -__target_intrinsic(glsl, fract) -vector frac(vector x); - -__generic -__target_intrinsic(glsl, fract) -matrix frac(matrix x); - -// Split float into mantissa and exponent -__generic T frexp(T x, out T exp); -__generic vector frexp(vector x, out vector exp); -__generic matrix frexp(matrix x, out matrix exp); - -// Texture filter width -__generic T fwidth(T x); -__generic vector fwidth(vector x); -__generic matrix fwidth(matrix x); - -// Get number of samples in render target -uint GetRenderTargetSampleCount(); - -// Get position of given sample -float2 GetRenderTargetSamplePosition(int Index); - -// Group memory barrier -__target_intrinsic(glsl, "groupMemoryBarrier") -void GroupMemoryBarrier(); - -// Note: the unmatched parentheses in the GLSL lowering are -// to cancel out the parens that the emit logic uses, so that -// we can emit this as if it were an expression. -// -// TODO: investigate whether we can just use "operator comma" here. -__target_intrinsic(glsl, "groupMemoryBarrier()); (barrier()") -void GroupMemoryBarrierWithGroupSync(); - -// Atomics - -__target_intrinsic(glsl, "$atomicAdd($A, $1)") -void InterlockedAdd(__ref int dest, int value); - -__target_intrinsic(glsl, "$atomicAdd($A, $1)") -void InterlockedAdd(__ref uint dest, uint value); - -__target_intrinsic(glsl, "($2 = $atomicAdd($A, $1))") -void InterlockedAdd(__ref int dest, int value, out int original_value); - -__target_intrinsic(glsl, "($2 = $atomicAdd($A, $1))") -void InterlockedAdd(__ref uint dest, uint value, out uint original_value); - -__target_intrinsic(glsl, "$atomicAnd($A, $1)") -void InterlockedAnd(__ref int dest, int value); - -__target_intrinsic(glsl, "$atomicAnd($A, $1)") -void InterlockedAnd(__ref uint dest, uint value); - -__target_intrinsic(glsl, "($2 = $atomicAnd($A, $1))") -void InterlockedAnd(__ref int dest, int value, out int original_value); - -__target_intrinsic(glsl, "($2 = $atomicAnd($A, $1))") -void InterlockedAnd(__ref uint dest, uint value, out uint original_value); - -__target_intrinsic(glsl, "($3 = $atomicCompSwap($A, $1, $2))") -void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value); - -__target_intrinsic(glsl, "($3 = $atomicCompSwap($A, $1, $2))") -void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, out uint original_value); - -__target_intrinsic(glsl, "$atomicCompSwap($A, $1, $2)") -void InterlockedCompareStore(__ref int dest, int compare_value, int value); - -__target_intrinsic(glsl, "$atomicCompSwap($A, $1, $2)") -void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value); - -__target_intrinsic(glsl, "$atomicExchange($A, $1)") -void InterlockedExchange(__ref int dest, int value); - -__target_intrinsic(glsl, "$atomicExchange($A, $1)") -void InterlockedExchange(__ref uint dest, uint value); - -__target_intrinsic(glsl, "($2 = $atomicExchange($A, $1))") -void InterlockedExchange(__ref int dest, int value, out int original_value); - -__target_intrinsic(glsl, "($2 = $atomicExchange($A, $1))") -void InterlockedExchange(__ref uint dest, uint value, out uint original_value); - -__target_intrinsic(glsl, "$atomicMax($A, $1)") -void InterlockedMax(__ref int dest, int value); - -__target_intrinsic(glsl, "$atomicMax($A, $1)") -void InterlockedMax(__ref uint dest, uint value); - -__target_intrinsic(glsl, "($2 = $atomicMax($A, $1))") -void InterlockedMax(__ref int dest, int value, out int original_value); - -__target_intrinsic(glsl, "($2 = $atomicMax($A, $1))") -void InterlockedMax(__ref uint dest, uint value, out uint original_value); - -__target_intrinsic(glsl, "$atomicMin($A, $1)") -void InterlockedMin(in out int dest, int value); - -__target_intrinsic(glsl, "$atomicMin($A, $1)") -void InterlockedMin(in out uint dest, uint value); - -__target_intrinsic(glsl, "($2 = $atomicMin($A, $1))") -void InterlockedMin(in out int dest, int value, out int original_value); - -__target_intrinsic(glsl, "($2 = $atomicMin($A, $1))") -void InterlockedMin(in out uint dest, uint value, out uint original_value); - -__target_intrinsic(glsl, "$atomicOr($A, $1)") -void InterlockedOr(__ref int dest, int value); - -__target_intrinsic(glsl, "$atomicOr($A, $1)") -void InterlockedOr(__ref uint dest, uint value); - -__target_intrinsic(glsl, "($2 = $atomicOr($A, $1))") -void InterlockedOr(__ref int dest, int value, out int original_value); - -__target_intrinsic(glsl, "($2 = $atomicOr($A, $1))") -void InterlockedOr(__ref uint dest, uint value, out uint original_value); - -__target_intrinsic(glsl, "$atomicXor($A, $1)") -void InterlockedXor(__ref int dest, int value); - -__target_intrinsic(glsl, "$atomicXor($A, $1)") -void InterlockedXor(__ref uint dest, uint value); - -__target_intrinsic(glsl, "($2 = $atomicXor($A, $1))") -void InterlockedXor(__ref int dest, int value, out int original_value); - -__target_intrinsic(glsl, "($2 = $atomicXor($A, $1))") -void InterlockedXor(__ref uint dest, uint value, out uint original_value); - -// Is floating-point value finite? -__generic bool isfinite(T x); -__generic vector isfinite(vector x); -__generic matrix isfinite(matrix x); - -// Is floating-point value infinite? -__generic bool isinf(T x); -__generic vector isinf(vector x); -__generic matrix isinf(matrix x); - -// Is floating-point value not-a-number? -__generic bool isnan(T x); -__generic vector isnan(vector x); -__generic matrix isnan(matrix x); - -// Construct float from mantissa and exponent -__generic T ldexp(T x, T exp); -__generic vector ldexp(vector x, vector exp); -__generic matrix ldexp(matrix x, matrix exp); - -// Vector length -__generic T length(vector x); - -// Linear interpolation -__generic -__target_intrinsic(glsl, mix) -T lerp(T x, T y, T s); - -__generic -__target_intrinsic(glsl, mix) -vector lerp(vector x, vector y, vector s); - -__generic -__target_intrinsic(glsl, mix) -matrix lerp(matrix x, matrix y, matrix s); - -// Legacy lighting function (obsolete) -float4 lit(float n_dot_l, float n_dot_h, float m); - -// Base-e logarithm -__generic T log(T x); -__generic vector log(vector x); -__generic matrix log(matrix x); - -// Base-10 logarithm -__generic T log10(T x); -__generic vector log10(vector x); -__generic matrix log10(matrix x); - -// Base-2 logarithm -__generic T log2(T x); -__generic vector log2(vector x); -__generic matrix log2(matrix x); - -// multiply-add -__generic T mad(T mvalue, T avalue, T bvalue); -__generic vector mad(vector mvalue, vector avalue, vector bvalue); -__generic matrix mad(matrix mvalue, matrix avalue, matrix bvalue); - -// maximum -__generic T max(T x, T y); -__generic vector max(vector x, vector y); -__generic matrix max(matrix x, matrix y); - -// minimum -__generic T min(T x, T y); -__generic vector min(vector x, vector y); -__generic matrix min(matrix x, matrix y); - -// split into integer and fractional parts (both with same sign) -__generic T modf(T x, out T ip); -__generic vector modf(vector x, out vector ip); -__generic matrix modf(matrix x, out matrix ip); - -// msad4 (whatever that is) -uint4 msad4(uint reference, uint2 source, uint4 accum); - -// General inner products - -// scalar-scalar -__generic T mul(T x, T y); - -// scalar-vector and vector-scalar -__generic vector mul(vector x, T y); -__generic vector mul(T x, vector y); - -// scalar-matrix and matrix-scalar -__generic matrix mul(matrix x, T y); -__generic matrix mul(T x, matrix y); - -// vector-vector (dot product) -__generic __intrinsic_op(dot) T mul(vector x, vector y); - -// vector-matrix -__generic __intrinsic_op(mulVectorMatrix) vector mul(vector x, matrix y); - -// matrix-vector -__generic __intrinsic_op(mulMatrixVector) vector mul(matrix x, vector y); - -// matrix-matrix -__generic __intrinsic_op(mulMatrixMatrix) matrix mul(matrix x, matrix y); - -// noise (deprecated) -float noise(float x); -__generic float noise(vector x); - -/// Indicate that an index may be non-uniform at execution time. -/// -/// Shader Model 5.1 and 6.x introduce support for dynamic indexing -/// of arrays of resources, but place the restriction that *by default* -/// the implementation can assume that any value used as an index into -/// such arrays will be dynamically uniform across an entire `Draw` or `Dispatch` -/// (when using instancing, the value must be uniform across all instances; -/// it does not seem that the restriction extends to draws within a multi-draw). -/// -/// In order to indicate to the implementation that it cannot make the -/// uniformity assumption, a shader programmer is required to pass the index -/// to the `NonUniformResourceIndex` function before using it as an index. -/// The function superficially acts like an identity function. -/// -/// Note: a future version of Slang may take responsibility for inserting calls -/// to this function as necessary in output code, rather than make this -/// the user's responsibility, so that the default behavior of the language -/// is more semantically "correct." -uint NonUniformResourceIndex(uint index); -int NonUniformResourceIndex(int index); - -// Normalize a vector -__generic vector normalize(vector x); - -// Raise to a power -__generic T pow(T x, T y); -__generic vector pow(vector x, vector y); -__generic matrix pow(matrix x, matrix y); - -// Output message - -// void printf( string format, ... ); - -// Tessellation factor fixup routines - -void Process2DQuadTessFactorsAvg( - in float4 RawEdgeFactors, - in float2 InsideScale, - out float4 RoundedEdgeTessFactors, - out float2 RoundedInsideTessFactors, - out float2 UnroundedInsideTessFactors); - -void Process2DQuadTessFactorsMax( - in float4 RawEdgeFactors, - in float2 InsideScale, - out float4 RoundedEdgeTessFactors, - out float2 RoundedInsideTessFactors, - out float2 UnroundedInsideTessFactors); - -void Process2DQuadTessFactorsMin( - in float4 RawEdgeFactors, - in float2 InsideScale, - out float4 RoundedEdgeTessFactors, - out float2 RoundedInsideTessFactors, - out float2 UnroundedInsideTessFactors); - -void ProcessIsolineTessFactors( - in float RawDetailFactor, - in float RawDensityFactor, - out float RoundedDetailFactor, - out float RoundedDensityFactor); - -void ProcessQuadTessFactorsAvg( - in float4 RawEdgeFactors, - in float InsideScale, - out float4 RoundedEdgeTessFactors, - out float2 RoundedInsideTessFactors, - out float2 UnroundedInsideTessFactors); - -void ProcessQuadTessFactorsMax( - in float4 RawEdgeFactors, - in float InsideScale, - out float4 RoundedEdgeTessFactors, - out float2 RoundedInsideTessFactors, - out float2 UnroundedInsideTessFactors); - -void ProcessQuadTessFactorsMin( - in float4 RawEdgeFactors, - in float InsideScale, - out float4 RoundedEdgeTessFactors, - out float2 RoundedInsideTessFactors, - out float2 UnroundedInsideTessFactors); - -void ProcessTriTessFactorsAvg( - in float3 RawEdgeFactors, - in float InsideScale, - out float3 RoundedEdgeTessFactors, - out float RoundedInsideTessFactor, - out float UnroundedInsideTessFactor); - -void ProcessTriTessFactorsMax( - in float3 RawEdgeFactors, - in float InsideScale, - out float3 RoundedEdgeTessFactors, - out float RoundedInsideTessFactor, - out float UnroundedInsideTessFactor); - -void ProcessTriTessFactorsMin( - in float3 RawEdgeFactors, - in float InsideScale, - out float3 RoundedEdgeTessFactors, - out float RoundedInsideTessFactors, - out float UnroundedInsideTessFactors); - -// Degrees to radians -__generic T radians(T x); -__generic vector radians(vector x); -__generic matrix radians(matrix x); - -// Approximate reciprocal -__generic T rcp(T x); -__generic vector rcp(vector x); -__generic matrix rcp(matrix x); - -// Reflect incident vector across plane with given normal -__generic -vector reflect(vector i, vector n); - -// Refract incident vector given surface normal and index of refraction -__generic -vector refract(vector i, vector n, float eta); - -// Reverse order of bits -__target_intrinsic(glsl, "bitfieldReverse") -uint reversebits(uint value); - -__target_intrinsic(glsl, "bitfieldReverse") -__generic vector reversebits(vector value); - -// Round-to-nearest -__generic T round(T x); -__generic vector round(vector x); -__generic matrix round(matrix x); - -// Reciprocal of square root -__generic T rsqrt(T x); -__generic vector rsqrt(vector x); -__generic matrix rsqrt(matrix x); - -// Clamp value to [0,1] range -__generic -__target_intrinsic(glsl, "clamp($0, 0, 1)") -T saturate(T x); - -__generic -__target_intrinsic(glsl, "clamp($0, 0, 1)") -vector saturate(vector x); - -__generic -__target_intrinsic(glsl, "clamp($0, 0, 1)") -matrix saturate(matrix x); - -__generic -__specialized_for_target(glsl) -T saturate(T x) -{ - return clamp(x, T(0), T(1)); -} - -__generic -__specialized_for_target(glsl) -vector saturate(vector x) -{ - return clamp(x, - vector(T(0)), - vector(T(1))); -} - -// HACK: need a helper to turn a scalar into a matrix, -// because GLSL and HLSL disagree on the semantics of -// constructing a matrix from a single scalar. -__generic -matrix __scalarToMatrix(T value); - -__generic -__specialized_for_target(glsl) -matrix saturate(matrix x) -{ - return clamp(x, - __scalarToMatrix(T(0)), - __scalarToMatrix(T(1))); -} - - -// Extract sign of value -__generic int sign(T x); -__generic vector sign(vector x); -__generic matrix sign(matrix x); - - -// Sine -__generic T sin(T x); -__generic vector sin(vector x); -__generic matrix sin(matrix x); - -// Sine and cosine -__generic void sincos(T x, out T s, out T c); -__generic void sincos(vector x, out vector s, out vector c); -__generic void sincos(matrix x, out matrix s, out matrix c); - -// Hyperbolic Sine -__generic T sinh(T x); -__generic vector sinh(vector x); -__generic matrix sinh(matrix x); - -// Smooth step (Hermite interpolation) -__generic T smoothstep(T min, T max, T x); -__generic vector smoothstep(vector min, vector max, vector x); -__generic matrix smoothstep(matrix min, matrix max, matrix x); - -// Square root -__generic T sqrt(T x); -__generic vector sqrt(vector x); -__generic matrix sqrt(matrix x); - -// Step function -__generic T step(T y, T x); -__generic vector step(vector y, vector x); -__generic matrix step(matrix y, matrix x); - -// Tangent -__generic T tan(T x); -__generic vector tan(vector x); -__generic matrix tan(matrix x); - -// Hyperbolic tangent -__generic T tanh(T x); -__generic vector tanh(vector x); -__generic matrix tanh(matrix x); - -// Legacy texture-fetch operations - -/* -float4 tex1D(sampler1D s, float t); -float4 tex1D(sampler1D s, float t, float ddx, float ddy); -float4 tex1Dbias(sampler1D s, float4 t); -float4 tex1Dgrad(sampler1D s, float t, float ddx, float ddy); -float4 tex1Dlod(sampler1D s, float4 t); -float4 tex1Dproj(sampler1D s, float4 t); - -float4 tex2D(sampler2D s, float2 t); -float4 tex2D(sampler2D s, float2 t, float2 ddx, float2 ddy); -float4 tex2Dbias(sampler2D s, float4 t); -float4 tex2Dgrad(sampler2D s, float2 t, float2 ddx, float2 ddy); -float4 tex2Dlod(sampler2D s, float4 t); -float4 tex2Dproj(sampler2D s, float4 t); - -float4 tex3D(sampler3D s, float3 t); -float4 tex3D(sampler3D s, float3 t, float3 ddx, float3 ddy); -float4 tex3Dbias(sampler3D s, float4 t); -float4 tex3Dgrad(sampler3D s, float3 t, float3 ddx, float3 ddy); -float4 tex3Dlod(sampler3D s, float4 t); -float4 tex3Dproj(sampler3D s, float4 t); - -float4 texCUBE(samplerCUBE s, float3 t); -float4 texCUBE(samplerCUBE s, float3 t, float3 ddx, float3 ddy); -float4 texCUBEbias(samplerCUBE s, float4 t); -float4 texCUBEgrad(samplerCUBE s, float3 t, float3 ddx, float3 ddy); -float4 texCUBElod(samplerCUBE s, float4 t); -float4 texCUBEproj(samplerCUBE s, float4 t); -*/ - -// Matrix transpose -__generic matrix transpose(matrix x); - -// Truncate to integer -__generic T trunc(T x); -__generic vector trunc(vector x); -__generic matrix trunc(matrix x); - -// Shader model 6.0 stuff - -uint GlobalOrderedCountIncrement(uint countToAppendForThisLane); - -__generic T QuadReadLaneAt(T sourceValue, int quadLaneID); -__generic vector QuadReadLaneAt(vector sourceValue, int quadLaneID); -__generic matrix QuadReadLaneAt(matrix sourceValue, int quadLaneID); - -__generic T QuadSwapX(T localValue); -__generic vector QuadSwapX(vector localValue); -__generic matrix QuadSwapX(matrix localValue); - -__generic T QuadSwapY(T localValue); -__generic vector QuadSwapY(vector localValue); -__generic matrix QuadSwapY(matrix localValue); - -__generic T WaveAllBitAnd(T expr); -__generic vector WaveAllBitAnd(vector expr); -__generic matrix WaveAllBitAnd(matrix expr); - -__generic T WaveAllBitOr(T expr); -__generic vector WaveAllBitOr(vector expr); -__generic matrix WaveAllBitOr(matrix expr); - -__generic T WaveAllBitXor(T expr); -__generic vector WaveAllBitXor(vector expr); -__generic matrix WaveAllBitXor(matrix expr); - -__generic T WaveAllMax(T expr); -__generic vector WaveAllMax(vector expr); -__generic matrix WaveAllMax(matrix expr); - -__generic T WaveAllMin(T expr); -__generic vector WaveAllMin(vector expr); -__generic matrix WaveAllMin(matrix expr); - -__generic T WaveAllProduct(T expr); -__generic vector WaveAllProduct(vector expr); -__generic matrix WaveAllProduct(matrix expr); - -__generic T WaveAllSum(T expr); -__generic vector WaveAllSum(vector expr); -__generic matrix WaveAllSum(matrix expr); - -bool WaveAllEqual(bool expr); -bool WaveAllTrue(bool expr); -bool WaveAnyTrue(bool expr); - -uint64_t WaveBallot(bool expr); - -uint WaveGetLaneCount(); -uint WaveGetLaneIndex(); -uint WaveGetOrderedIndex(); - -bool WaveIsHelperLane(); - -bool WaveOnce(); - -__generic T WavePrefixProduct(T expr); -__generic vector WavePrefixProduct(vector expr); -__generic matrix WavePrefixProduct(matrix expr); - -__generic T WavePrefixSum(T expr); -__generic vector WavePrefixSum(vector expr); -__generic matrix WavePrefixSum(matrix expr); - -__generic T WaveReadFirstLane(T expr); -__generic vector WaveReadFirstLane(vector expr); -__generic matrix WaveReadFirstLane(matrix expr); - -__generic T WaveReadLaneAt(T expr, int laneIndex); -__generic vector WaveReadLaneAt(vector expr, int laneIndex); -__generic matrix WaveReadLaneAt(matrix expr, int laneIndex); - -// `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points -typedef Texture2D texture2D; - -${{{{ -// Component-wise multiplication ops -for(auto op : binaryOps) -{ - switch (op.opCode) - { - default: - continue; - - case kIROp_Mul: - case kIRPseudoOp_MulAssign: - break; - } - - for (auto type : kBaseTypes) - { - if ((type.flags & op.flags) == 0) - continue; - - char const* leftType = type.name; - char const* rightType = leftType; - char const* resultType = leftType; - - char const* leftQual = ""; - if(op.flags & ASSIGNMENT) leftQual = "in out "; - - sb << "__generic "; - sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, matrix<" << rightType << ",N,M> right);\n"; - } -} - -// - -// Buffer types - -static const struct { - char const* name; - SlangResourceAccess access; -} kBaseBufferAccessLevels[] = { - { "", SLANG_RESOURCE_ACCESS_READ }, - { "RW", SLANG_RESOURCE_ACCESS_READ_WRITE }, - { "RasterizerOrdered", SLANG_RESOURCE_ACCESS_RASTER_ORDERED }, -}; -static const int kBaseBufferAccessLevelCount = sizeof(kBaseBufferAccessLevels) / sizeof(kBaseBufferAccessLevels[0]); - -for (int aa = 0; aa < kBaseBufferAccessLevelCount; ++aa) -{ - auto flavor = TextureFlavor::create(TextureFlavor::Shape::ShapeBuffer, kBaseBufferAccessLevels[aa].access).flavor; - sb << "__generic\n"; - sb << "__magic_type(Texture," << int(flavor) << ")\n"; - sb << "__intrinsic_type(" << (kIROp_TextureType + (int(flavor) << kIROpMeta_OtherShift)) << ")\n"; - sb << "struct "; - sb << kBaseBufferAccessLevels[aa].name; - sb << "Buffer {\n"; - - sb << "void GetDimensions(out uint dim);\n"; - - sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)"; - sb << "__target_intrinsic(glsl, \"texelFetch($0, $1)$z\")\n"; - sb << "T Load(int location);\n"; - - sb << "T Load(int location, out uint status);\n"; - - sb << "__subscript(uint index) -> T {\n"; - - sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)"; - sb << "__target_intrinsic(glsl, \"texelFetch($0, int($1))$z\") get;\n"; - - if (kBaseBufferAccessLevels[aa].access != SLANG_RESOURCE_ACCESS_READ) - { - sb << "ref;\n"; - } - - sb << "}\n"; - - sb << "};\n"; -} -}}}} - - -// DirectX Raytracing (DXR) Support -// -// The following is based on the experimental DXR SDK v0.09.01. -// -// Numbering follows the sections in the "D3D12 Raytracing Functional Spec" v0.09 (2018-03-12) -// - -// 10.1.1 - Ray Flags - -typedef uint RAY_FLAG; - -static const RAY_FLAG RAY_FLAG_NONE = 0x00; -static const RAY_FLAG RAY_FLAG_FORCE_OPAQUE = 0x01; -static const RAY_FLAG RAY_FLAG_FORCE_NON_OPAQUE = 0x02; -static const RAY_FLAG RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH = 0x04; -static const RAY_FLAG RAY_FLAG_SKIP_CLOSEST_HIT_SHADER = 0x08; -static const RAY_FLAG RAY_FLAG_CULL_BACK_FACING_TRIANGLES = 0x10; -static const RAY_FLAG RAY_FLAG_CULL_FRONT_FACING_TRIANGLES = 0x20; -static const RAY_FLAG RAY_FLAG_CULL_OPAQUE = 0x40; -static const RAY_FLAG RAY_FLAG_CULL_NON_OPAQUE = 0x80; - -// 10.1.2 - Ray Description Structure - -__builtin -__magic_type(RayDescType) -__intrinsic_type($(kIROp_RayDescType)) -struct RayDesc -{ - __target_intrinsic(hlsl, Origin) - float3 Origin; - - __target_intrinsic(hlsl, TMin) - float TMin; - - __target_intrinsic(hlsl, Direction) - float3 Direction; - - __target_intrinsic(hlsl, TMax) - float TMax; -}; - -// 10.1.3 - Ray Acceleration Structure - -__builtin -__magic_type(RaytracingAccelerationStructureType) -__intrinsic_type($(kIROp_RaytracingAccelerationStructureType)) -struct RaytracingAccelerationStructure {}; - -// 10.1.4 - Subobject Definitions - -// TODO: We may decide to support these, but their reliance on C++ implicit -// constructor call syntax (`SomeType someVar(arg0, arg1);`) makes them -// annoying for the current Slang parsing strategy, and using global variables -// for this stuff comes across as a kludge rather than the best possible design. - -// 10.1.5 - Intersection Attributes Structure - -__builtin -__magic_type(BuiltInTriangleIntersectionAttributesType) -__intrinsic_type($(kIROp_BuiltInTriangleIntersectionAttributesType)) -struct BuiltInTriangleIntersectionAttributes -{ - __target_intrinsic(hlsl, barycentrics) - float2 barycentrics; -}; - -// 10.2 Shaders - -// Right now new shader stages need to be added directly to the compiler -// implementation, rather than being something that can be declared in the stdlib. - -// 10.3 - Intrinsics - -// 10.3.1 -void CallShader(uint ShaderIndex, inout param_t Parameter); - -// 10.3.2 -void TraceRay( - RaytracingAccelerationStructure AccelerationStructure, - uint RayFlags, - uint InstanceInclusionMask, - uint RayContributionToHitGroupIndex, - uint MultiplierForGeometryContributionToHitGroupIndex, - uint MissShaderIndex, - RayDesc Ray, - inout payload_t Payload); - -// 10.3.3 -bool ReportHit(float THit, uint HitKind, attr_t Attributes); - -// 10.3.4 -void IgnoreHit(); - -// 10.3.5 -void AcceptHitAndEndSearch(); - -// 10.4 - System Values and Special Semantics - -// TODO: Many of these functions need to be restricted so that -// they can only be accessed from specific stages. - -// 10.4.1 - Ray Dispatch System Values -uint2 DispatchRaysIndex(); -uint2 DispatchRaysDimensions(); - -// 10.4.2 - Ray System Values -float3 WorldRayOrigin(); -float3 WorldRayDirection(); -float RayTMin(); -float RayTCurrent(); -uint RayFlags(); - -// 10.4.3 - Primitive/Object Space System Values -uint InstanceIndex(); -uint InstanceID(); -uint PrimitiveIndex(); -float3 ObjectRayOrigin(); -float3 ObjectRayDirection(); -float3x4 ObjectToWorld(); -float3x4 WorldToObject(); - -// 10.4.4 - Hit Specific System values -uint HitKind(); +// Slang HLSL compatibility library + +typedef uint UINT; + +__generic +__magic_type(HLSLAppendStructuredBufferType) +__intrinsic_type($(kIROp_HLSLAppendStructuredBufferType)) +struct AppendStructuredBuffer +{ + void Append(T value); + + void GetDimensions( + out uint numStructs, + out uint stride); +}; + +__magic_type(HLSLByteAddressBufferType) +__intrinsic_type($(kIROp_HLSLByteAddressBufferType)) +struct ByteAddressBuffer +{ + void GetDimensions( + out uint dim); + + uint Load(int location); + uint Load(int location, out uint status); + + uint2 Load2(int location); + uint2 Load2(int location, out uint status); + + uint3 Load3(int location); + uint3 Load3(int location, out uint status); + + uint4 Load4(int location); + uint4 Load4(int location, out uint status); +}; + +__generic +__magic_type(HLSLStructuredBufferType) +__intrinsic_type($(kIROp_HLSLStructuredBufferType)) +struct StructuredBuffer +{ + void GetDimensions( + out uint numStructs, + out uint stride); + + T Load(int location); + T Load(int location, out uint status); + + __subscript(uint index) -> T { __intrinsic_op(bufferLoad) get; }; +}; + +__generic +__magic_type(HLSLConsumeStructuredBufferType) +__intrinsic_type($(kIROp_HLSLConsumeStructuredBufferType)) +struct ConsumeStructuredBuffer +{ + T Consume(); + + void GetDimensions( + out uint numStructs, + out uint stride); +}; + +__generic +__magic_type(HLSLInputPatchType) +__intrinsic_type($(kIROp_HLSLInputPatchType)) +struct InputPatch +{ + __subscript(uint index) -> T; +}; + +__generic +__magic_type(HLSLOutputPatchType) +__intrinsic_type($(kIROp_HLSLOutputPatchType)) +struct OutputPatch +{ + __subscript(uint index) -> T; +}; + +${{{{ +static const struct { + IROp op; + char const* name; +} kMutableByteAddressBufferCases[] = +{ + { kIROp_HLSLRWByteAddressBufferType, "RWByteAddressBuffer" }, + { kIROp_HLSLRasterizerOrderedByteAddressBufferType, "RasterizerOrderedByteAddressBuffer" }, +}; +for(auto item : kMutableByteAddressBufferCases) { +}}}} + +__magic_type(HLSL$(item.name)Type) +__intrinsic_type($(item.op)) +struct $(item.name) +{ + // Note(tfoley): supports alll operations from `ByteAddressBuffer` + // TODO(tfoley): can this be made a sub-type? + + void GetDimensions( + out uint dim); + + uint Load(int location); + uint Load(int location, out uint status); + + uint2 Load2(int location); + uint2 Load2(int location, out uint status); + + uint3 Load3(int location); + uint3 Load3(int location, out uint status); + + uint4 Load4(int location); + uint4 Load4(int location, out uint status); + + // Added operations: + + void InterlockedAdd( + UINT dest, + UINT value, + out UINT original_value); + void InterlockedAdd( + UINT dest, + UINT value); + + void InterlockedAnd( + UINT dest, + UINT value, + out UINT original_value); + void InterlockedAnd( + UINT dest, + UINT value); + + void InterlockedCompareExchange( + UINT dest, + UINT compare_value, + UINT value, + out UINT original_value); + void InterlockedCompareExchange( + UINT dest, + UINT compare_value, + UINT value); + + void InterlockedCompareStore( + UINT dest, + UINT compare_value, + UINT value); + void InterlockedCompareStore( + UINT dest, + UINT compare_value); + + void InterlockedExchange( + UINT dest, + UINT value, + out UINT original_value); + void InterlockedExchange( + UINT dest, + UINT value); + + void InterlockedMax( + UINT dest, + UINT value, + out UINT original_value); + void InterlockedMax( + UINT dest, + UINT value); + + void InterlockedMin( + UINT dest, + UINT value, + out UINT original_value); + void InterlockedMin( + UINT dest, + UINT value); + + void InterlockedOr( + UINT dest, + UINT value, + out UINT original_value); + void InterlockedOr( + UINT dest, + UINT value); + + void InterlockedXor( + UINT dest, + UINT value, + out UINT original_value); + void InterlockedXor( + UINT dest, + UINT value); + + void Store( + uint address, + uint value); + + void Store2( + uint address, + uint2 value); + + void Store3( + uint address, + uint3 value); + + void Store4( + uint address, + uint4 value); +}; + +${{{{ +} +}}}} + +${{{{ +static const struct { + IROp op; + char const* name; +} kMutableStructuredBufferCases[] = +{ + { kIROp_HLSLRWStructuredBufferType, "RWStructuredBuffer" }, + { kIROp_HLSLRasterizerOrderedStructuredBufferType, "RasterizerOrderedStructuredBuffer" }, +}; +for(auto item : kMutableStructuredBufferCases) { +}}}} + + +__generic +__magic_type(HLSL$(item.name)Type) +__intrinsic_type($(item.op)) +struct $(item.name) +{ + uint DecrementCounter(); + + void GetDimensions( + out uint numStructs, + out uint stride); + + uint IncrementCounter(); + + T Load(int location); + T Load(int location, out uint status); + + __subscript(uint index) -> T + { + __intrinsic_op(bufferElementRef) + ref; + } +}; + +${{{{ +} +}}}} + +__generic +__magic_type(HLSLPointStreamType) +__intrinsic_type($(kIROp_HLSLPointStreamType)) +struct PointStream +{ + __target_intrinsic(glsl, "EmitVertex()") + void Append(T value); + + __target_intrinsic(glsl, "EndPrimitive()") + void RestartStrip(); +}; + +__generic +__magic_type(HLSLLineStreamType) +__intrinsic_type($(kIROp_HLSLLineStreamType)) +struct LineStream +{ + __target_intrinsic(glsl, "EmitVertex()") + void Append(T value); + + __target_intrinsic(glsl, "EndPrimitive()") + void RestartStrip(); +}; + +__generic +__magic_type(HLSLTriangleStreamType) +__intrinsic_type($(kIROp_HLSLTriangleStreamType)) +struct TriangleStream +{ + __target_intrinsic(glsl, "EmitVertex()") + void Append(T value); + + __target_intrinsic(glsl, "EndPrimitive()") + void RestartStrip(); +}; + +// Note(tfoley): Trying to systematically add all the HLSL builtins + +// Try to terminate the current draw or dispatch call (HLSL SM 4.0) +void abort(); + +// Absolute value (HLSL SM 1.0) +__generic T abs(T x); +__generic vector abs(vector x); +__generic matrix abs(matrix x); + +// Inverse cosine (HLSL SM 1.0) +__generic T acos(T x); +__generic vector acos(vector x); +__generic matrix acos(matrix x); + +// Test if all components are non-zero (HLSL SM 1.0) +__generic bool all(T x); +__generic bool all(vector x); +__generic bool all(matrix x); + +// Barrier for writes to all memory spaces (HLSL SM 5.0) +void AllMemoryBarrier(); + +// Thread-group sync and barrier for writes to all memory spaces (HLSL SM 5.0) +void AllMemoryBarrierWithGroupSync(); + +// Test if any components is non-zero (HLSL SM 1.0) + +__generic +__target_intrinsic(glsl, "bool($0)") +bool any(T x); + +__generic +__target_intrinsic(glsl, "any(bvec$N0($0))") +bool any(vector x); + +__generic +// TODO: need to define GLSL mapping +bool any(matrix x); + + +// Reinterpret bits as a double (HLSL SM 5.0) +double asdouble(uint lowbits, uint highbits); + +// Reinterpret bits as a float (HLSL SM 4.0) +float asfloat( int x); +float asfloat(uint x); +__generic vector asfloat(vector< int,N> x); +__generic vector asfloat(vector x); +__generic matrix asfloat(matrix< int,N,M> x); +__generic matrix asfloat(matrix x); + + +// Inverse sine (HLSL SM 1.0) +__generic T asin(T x); +__generic vector asin(vector x); +__generic matrix asin(matrix x); + +// Reinterpret bits as an int (HLSL SM 4.0) +int asint(float x); +int asint(uint x); +__generic vector asint(vector x); +__generic vector asint(vector x); +__generic matrix asint(matrix x); +__generic matrix asint(matrix x); + +// Reinterpret bits of double as a uint (HLSL SM 5.0) +void asuint(double value, out uint lowbits, out uint highbits); + +// Reinterpret bits as a uint (HLSL SM 4.0) +uint asuint(float x); +uint asuint(int x); +__generic vector asuint(vector x); +__generic vector asuint(vector x); +__generic matrix asuint(matrix x); +__generic matrix asuint(matrix x); + +// Inverse tangent (HLSL SM 1.0) +__generic T atan(T x); +__generic vector atan(vector x); +__generic matrix atan(matrix x); + +__generic +__target_intrinsic(glsl,"atan($0,$1)") +T atan2(T y, T x); + +__generic +__target_intrinsic(glsl,"atan($0,$1)") +vector atan2(vector y, vector x); + +__generic +__target_intrinsic(glsl,"atan($0,$1)") +matrix atan2(matrix y, matrix x); + +// Ceiling (HLSL SM 1.0) +__generic T ceil(T x); +__generic vector ceil(vector x); +__generic matrix ceil(matrix x); + + +// Check access status to tiled resource +bool CheckAccessFullyMapped(uint status); + +// Clamp (HLSL SM 1.0) +__generic T clamp(T x, T min, T max); +__generic vector clamp(vector x, vector min, vector max); +__generic matrix clamp(matrix x, matrix min, matrix max); + +// Clip (discard) fragment conditionally +__generic void clip(T x); +__generic void clip(vector x); +__generic void clip(matrix x); + +// Cosine +__generic T cos(T x); +__generic vector cos(vector x); +__generic matrix cos(matrix x); + +// Hyperbolic cosine +__generic T cosh(T x); +__generic vector cosh(vector x); +__generic matrix cosh(matrix x); + +// Population count +__target_intrinsic(glsl, "bitCount") +uint countbits(uint value); + +// Cross product +__generic vector cross(vector x, vector y); + +// Convert encoded color +int4 D3DCOLORtoUBYTE4(float4 x); + +// Partial-difference derivatives +__generic +__target_intrinsic(glsl, dFdx) +T ddx(T x); + +__generic +__target_intrinsic(glsl, dFdx) +vector ddx(vector x); + +__generic +__target_intrinsic(glsl, dFdx) +matrix ddx(matrix x); + +__generic +__glsl_extension(GL_ARB_derivative_control) +__target_intrinsic(glsl, dFdxCoarse) +T ddx_coarse(T x); + +__generic +__glsl_extension(GL_ARB_derivative_control) +__target_intrinsic(glsl, dFdxCoarse) +vector ddx_coarse(vector x); + +__generic +__glsl_extension(GL_ARB_derivative_control) +__target_intrinsic(glsl, dFdxCoarse) +matrix ddx_coarse(matrix x); + +__generic +__glsl_extension(GL_ARB_derivative_control) +__target_intrinsic(glsl, dFdxFine) +T ddx_fine(T x); + +__generic +__glsl_extension(GL_ARB_derivative_control) +__target_intrinsic(glsl, dFdxFine) +vector ddx_fine(vector x); + +__generic +__glsl_extension(GL_ARB_derivative_control) +__target_intrinsic(glsl, dFdxFine) +matrix ddx_fine(matrix x); + +__generic +__target_intrinsic(glsl, dFdy) +T ddy(T x); + +__generic +__target_intrinsic(glsl, dFdy) +vector ddy(vector x); + +__generic +__target_intrinsic(glsl, dFdy) + matrix ddy(matrix x); + +__generic +__glsl_extension(GL_ARB_derivative_control) +__target_intrinsic(glsl, dFdyCoarse) +T ddy_coarse(T x); + +__generic +__glsl_extension(GL_ARB_derivative_control) +__target_intrinsic(glsl, dFdyCoarse) +vector ddy_coarse(vector x); + +__generic +__glsl_extension(GL_ARB_derivative_control) +__target_intrinsic(glsl, dFdyCoarse) +matrix ddy_coarse(matrix x); + +__generic +__glsl_extension(GL_ARB_derivative_control) +__target_intrinsic(glsl, dFdyFine) +T ddy_fine(T x); + +__generic +__glsl_extension(GL_ARB_derivative_control) +__target_intrinsic(glsl, dFdyFine) +vector ddy_fine(vector x); + +__generic +__glsl_extension(GL_ARB_derivative_control) +__target_intrinsic(glsl, dFdyFine) +matrix ddy_fine(matrix x); + + +// Radians to degrees +__generic T degrees(T x); +__generic vector degrees(vector x); +__generic matrix degrees(matrix x); + +// Matrix determinant + +__generic T determinant(matrix m); + +// Barrier for device memory +void DeviceMemoryBarrier(); +void DeviceMemoryBarrierWithGroupSync(); + +// Vector distance + +__generic T distance(vector x, vector y); + +// Vector dot product + +__generic T dot(vector x, vector y); + +// Helper for computing distance terms for lighting (obsolete) + +__generic vector dst(vector x, vector y); + +// Error message + +// void errorf( string format, ... ); + +// Attribute evaluation + +__generic T EvaluateAttributeAtCentroid(T x); +__generic vector EvaluateAttributeAtCentroid(vector x); +__generic matrix EvaluateAttributeAtCentroid(matrix x); + +__generic T EvaluateAttributeAtSample(T x, uint sampleindex); +__generic vector EvaluateAttributeAtSample(vector x, uint sampleindex); +__generic matrix EvaluateAttributeAtSample(matrix x, uint sampleindex); + +__generic T EvaluateAttributeSnapped(T x, int2 offset); +__generic vector EvaluateAttributeSnapped(vector x, int2 offset); +__generic matrix EvaluateAttributeSnapped(matrix x, int2 offset); + +// Base-e exponent +__generic T exp(T x); +__generic vector exp(vector x); +__generic matrix exp(matrix x); + +// Base-2 exponent +__generic T exp2(T x); +__generic vector exp2(vector x); +__generic matrix exp2(matrix x); + +// Convert 16-bit float stored in low bits of integer +float f16tof32(uint value); +__generic vector f16tof32(vector value); + +// Convert to 16-bit float stored in low bits of integer +uint f32tof16(float value); +__generic vector f32tof16(vector value); + +// Flip surface normal to face forward, if needed +__generic vector faceforward(vector n, vector i, vector ng); + +// Find first set bit starting at high bit and working down +__target_intrinsic(glsl,"findMSB") +int firstbithigh(int value); + +__target_intrinsic(glsl,"findMSB") +__generic vector firstbithigh(vector value); + +__target_intrinsic(glsl,"findMSB") +uint firstbithigh(uint value); + +__target_intrinsic(glsl,"findMSB") +__generic vector firstbithigh(vector value); + +// Find first set bit starting at low bit and working up +__target_intrinsic(glsl,"findLSB") +int firstbitlow(int value); + +__target_intrinsic(glsl,"findLSB") +__generic vector firstbitlow(vector value); + +__target_intrinsic(glsl,"findLSB") +uint firstbitlow(uint value); + +__target_intrinsic(glsl,"findLSB") +__generic vector firstbitlow(vector value); + +// Floor (HLSL SM 1.0) +__generic T floor(T x); +__generic vector floor(vector x); +__generic matrix floor(matrix x); + +// Fused multiply-add for doubles +double fma(double a, double b, double c); +__generic vector fma(vector a, vector b, vector c); +__generic matrix fma(matrix a, matrix b, matrix c); + +// Floating point remainder of x/y +__generic T fmod(T x, T y); +__generic vector fmod(vector x, vector y); +__generic matrix fmod(matrix x, matrix y); + +// Fractional part +__generic +__target_intrinsic(glsl, fract) +T frac(T x); + +__generic +__target_intrinsic(glsl, fract) +vector frac(vector x); + +__generic +__target_intrinsic(glsl, fract) +matrix frac(matrix x); + +// Split float into mantissa and exponent +__generic T frexp(T x, out T exp); +__generic vector frexp(vector x, out vector exp); +__generic matrix frexp(matrix x, out matrix exp); + +// Texture filter width +__generic T fwidth(T x); +__generic vector fwidth(vector x); +__generic matrix fwidth(matrix x); + +// Get number of samples in render target +uint GetRenderTargetSampleCount(); + +// Get position of given sample +float2 GetRenderTargetSamplePosition(int Index); + +// Group memory barrier +__target_intrinsic(glsl, "groupMemoryBarrier") +void GroupMemoryBarrier(); + +// Note: the unmatched parentheses in the GLSL lowering are +// to cancel out the parens that the emit logic uses, so that +// we can emit this as if it were an expression. +// +// TODO: investigate whether we can just use "operator comma" here. +__target_intrinsic(glsl, "groupMemoryBarrier()); (barrier()") +void GroupMemoryBarrierWithGroupSync(); + +// Atomics + +__target_intrinsic(glsl, "$atomicAdd($A, $1)") +void InterlockedAdd(__ref int dest, int value); + +__target_intrinsic(glsl, "$atomicAdd($A, $1)") +void InterlockedAdd(__ref uint dest, uint value); + +__target_intrinsic(glsl, "($2 = $atomicAdd($A, $1))") +void InterlockedAdd(__ref int dest, int value, out int original_value); + +__target_intrinsic(glsl, "($2 = $atomicAdd($A, $1))") +void InterlockedAdd(__ref uint dest, uint value, out uint original_value); + +__target_intrinsic(glsl, "$atomicAnd($A, $1)") +void InterlockedAnd(__ref int dest, int value); + +__target_intrinsic(glsl, "$atomicAnd($A, $1)") +void InterlockedAnd(__ref uint dest, uint value); + +__target_intrinsic(glsl, "($2 = $atomicAnd($A, $1))") +void InterlockedAnd(__ref int dest, int value, out int original_value); + +__target_intrinsic(glsl, "($2 = $atomicAnd($A, $1))") +void InterlockedAnd(__ref uint dest, uint value, out uint original_value); + +__target_intrinsic(glsl, "($3 = $atomicCompSwap($A, $1, $2))") +void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value); + +__target_intrinsic(glsl, "($3 = $atomicCompSwap($A, $1, $2))") +void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, out uint original_value); + +__target_intrinsic(glsl, "$atomicCompSwap($A, $1, $2)") +void InterlockedCompareStore(__ref int dest, int compare_value, int value); + +__target_intrinsic(glsl, "$atomicCompSwap($A, $1, $2)") +void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value); + +__target_intrinsic(glsl, "$atomicExchange($A, $1)") +void InterlockedExchange(__ref int dest, int value); + +__target_intrinsic(glsl, "$atomicExchange($A, $1)") +void InterlockedExchange(__ref uint dest, uint value); + +__target_intrinsic(glsl, "($2 = $atomicExchange($A, $1))") +void InterlockedExchange(__ref int dest, int value, out int original_value); + +__target_intrinsic(glsl, "($2 = $atomicExchange($A, $1))") +void InterlockedExchange(__ref uint dest, uint value, out uint original_value); + +__target_intrinsic(glsl, "$atomicMax($A, $1)") +void InterlockedMax(__ref int dest, int value); + +__target_intrinsic(glsl, "$atomicMax($A, $1)") +void InterlockedMax(__ref uint dest, uint value); + +__target_intrinsic(glsl, "($2 = $atomicMax($A, $1))") +void InterlockedMax(__ref int dest, int value, out int original_value); + +__target_intrinsic(glsl, "($2 = $atomicMax($A, $1))") +void InterlockedMax(__ref uint dest, uint value, out uint original_value); + +__target_intrinsic(glsl, "$atomicMin($A, $1)") +void InterlockedMin(in out int dest, int value); + +__target_intrinsic(glsl, "$atomicMin($A, $1)") +void InterlockedMin(in out uint dest, uint value); + +__target_intrinsic(glsl, "($2 = $atomicMin($A, $1))") +void InterlockedMin(in out int dest, int value, out int original_value); + +__target_intrinsic(glsl, "($2 = $atomicMin($A, $1))") +void InterlockedMin(in out uint dest, uint value, out uint original_value); + +__target_intrinsic(glsl, "$atomicOr($A, $1)") +void InterlockedOr(__ref int dest, int value); + +__target_intrinsic(glsl, "$atomicOr($A, $1)") +void InterlockedOr(__ref uint dest, uint value); + +__target_intrinsic(glsl, "($2 = $atomicOr($A, $1))") +void InterlockedOr(__ref int dest, int value, out int original_value); + +__target_intrinsic(glsl, "($2 = $atomicOr($A, $1))") +void InterlockedOr(__ref uint dest, uint value, out uint original_value); + +__target_intrinsic(glsl, "$atomicXor($A, $1)") +void InterlockedXor(__ref int dest, int value); + +__target_intrinsic(glsl, "$atomicXor($A, $1)") +void InterlockedXor(__ref uint dest, uint value); + +__target_intrinsic(glsl, "($2 = $atomicXor($A, $1))") +void InterlockedXor(__ref int dest, int value, out int original_value); + +__target_intrinsic(glsl, "($2 = $atomicXor($A, $1))") +void InterlockedXor(__ref uint dest, uint value, out uint original_value); + +// Is floating-point value finite? +__generic bool isfinite(T x); +__generic vector isfinite(vector x); +__generic matrix isfinite(matrix x); + +// Is floating-point value infinite? +__generic bool isinf(T x); +__generic vector isinf(vector x); +__generic matrix isinf(matrix x); + +// Is floating-point value not-a-number? +__generic bool isnan(T x); +__generic vector isnan(vector x); +__generic matrix isnan(matrix x); + +// Construct float from mantissa and exponent +__generic T ldexp(T x, T exp); +__generic vector ldexp(vector x, vector exp); +__generic matrix ldexp(matrix x, matrix exp); + +// Vector length +__generic T length(vector x); + +// Linear interpolation +__generic +__target_intrinsic(glsl, mix) +T lerp(T x, T y, T s); + +__generic +__target_intrinsic(glsl, mix) +vector lerp(vector x, vector y, vector s); + +__generic +__target_intrinsic(glsl, mix) +matrix lerp(matrix x, matrix y, matrix s); + +// Legacy lighting function (obsolete) +float4 lit(float n_dot_l, float n_dot_h, float m); + +// Base-e logarithm +__generic T log(T x); +__generic vector log(vector x); +__generic matrix log(matrix x); + +// Base-10 logarithm +__generic T log10(T x); +__generic vector log10(vector x); +__generic matrix log10(matrix x); + +// Base-2 logarithm +__generic T log2(T x); +__generic vector log2(vector x); +__generic matrix log2(matrix x); + +// multiply-add +__generic T mad(T mvalue, T avalue, T bvalue); +__generic vector mad(vector mvalue, vector avalue, vector bvalue); +__generic matrix mad(matrix mvalue, matrix avalue, matrix bvalue); + +// maximum +__generic T max(T x, T y); +__generic vector max(vector x, vector y); +__generic matrix max(matrix x, matrix y); + +// minimum +__generic T min(T x, T y); +__generic vector min(vector x, vector y); +__generic matrix min(matrix x, matrix y); + +// split into integer and fractional parts (both with same sign) +__generic T modf(T x, out T ip); +__generic vector modf(vector x, out vector ip); +__generic matrix modf(matrix x, out matrix ip); + +// msad4 (whatever that is) +uint4 msad4(uint reference, uint2 source, uint4 accum); + +// General inner products + +// scalar-scalar +__generic T mul(T x, T y); + +// scalar-vector and vector-scalar +__generic vector mul(vector x, T y); +__generic vector mul(T x, vector y); + +// scalar-matrix and matrix-scalar +__generic matrix mul(matrix x, T y); +__generic matrix mul(T x, matrix y); + +// vector-vector (dot product) +__generic __intrinsic_op(dot) T mul(vector x, vector y); + +// vector-matrix +__generic __intrinsic_op(mulVectorMatrix) vector mul(vector x, matrix y); + +// matrix-vector +__generic __intrinsic_op(mulMatrixVector) vector mul(matrix x, vector y); + +// matrix-matrix +__generic __intrinsic_op(mulMatrixMatrix) matrix mul(matrix x, matrix y); + +// noise (deprecated) +float noise(float x); +__generic float noise(vector x); + +/// Indicate that an index may be non-uniform at execution time. +/// +/// Shader Model 5.1 and 6.x introduce support for dynamic indexing +/// of arrays of resources, but place the restriction that *by default* +/// the implementation can assume that any value used as an index into +/// such arrays will be dynamically uniform across an entire `Draw` or `Dispatch` +/// (when using instancing, the value must be uniform across all instances; +/// it does not seem that the restriction extends to draws within a multi-draw). +/// +/// In order to indicate to the implementation that it cannot make the +/// uniformity assumption, a shader programmer is required to pass the index +/// to the `NonUniformResourceIndex` function before using it as an index. +/// The function superficially acts like an identity function. +/// +/// Note: a future version of Slang may take responsibility for inserting calls +/// to this function as necessary in output code, rather than make this +/// the user's responsibility, so that the default behavior of the language +/// is more semantically "correct." +uint NonUniformResourceIndex(uint index); +int NonUniformResourceIndex(int index); + +// Normalize a vector +__generic vector normalize(vector x); + +// Raise to a power +__generic T pow(T x, T y); +__generic vector pow(vector x, vector y); +__generic matrix pow(matrix x, matrix y); + +// Output message + +// void printf( string format, ... ); + +// Tessellation factor fixup routines + +void Process2DQuadTessFactorsAvg( + in float4 RawEdgeFactors, + in float2 InsideScale, + out float4 RoundedEdgeTessFactors, + out float2 RoundedInsideTessFactors, + out float2 UnroundedInsideTessFactors); + +void Process2DQuadTessFactorsMax( + in float4 RawEdgeFactors, + in float2 InsideScale, + out float4 RoundedEdgeTessFactors, + out float2 RoundedInsideTessFactors, + out float2 UnroundedInsideTessFactors); + +void Process2DQuadTessFactorsMin( + in float4 RawEdgeFactors, + in float2 InsideScale, + out float4 RoundedEdgeTessFactors, + out float2 RoundedInsideTessFactors, + out float2 UnroundedInsideTessFactors); + +void ProcessIsolineTessFactors( + in float RawDetailFactor, + in float RawDensityFactor, + out float RoundedDetailFactor, + out float RoundedDensityFactor); + +void ProcessQuadTessFactorsAvg( + in float4 RawEdgeFactors, + in float InsideScale, + out float4 RoundedEdgeTessFactors, + out float2 RoundedInsideTessFactors, + out float2 UnroundedInsideTessFactors); + +void ProcessQuadTessFactorsMax( + in float4 RawEdgeFactors, + in float InsideScale, + out float4 RoundedEdgeTessFactors, + out float2 RoundedInsideTessFactors, + out float2 UnroundedInsideTessFactors); + +void ProcessQuadTessFactorsMin( + in float4 RawEdgeFactors, + in float InsideScale, + out float4 RoundedEdgeTessFactors, + out float2 RoundedInsideTessFactors, + out float2 UnroundedInsideTessFactors); + +void ProcessTriTessFactorsAvg( + in float3 RawEdgeFactors, + in float InsideScale, + out float3 RoundedEdgeTessFactors, + out float RoundedInsideTessFactor, + out float UnroundedInsideTessFactor); + +void ProcessTriTessFactorsMax( + in float3 RawEdgeFactors, + in float InsideScale, + out float3 RoundedEdgeTessFactors, + out float RoundedInsideTessFactor, + out float UnroundedInsideTessFactor); + +void ProcessTriTessFactorsMin( + in float3 RawEdgeFactors, + in float InsideScale, + out float3 RoundedEdgeTessFactors, + out float RoundedInsideTessFactors, + out float UnroundedInsideTessFactors); + +// Degrees to radians +__generic T radians(T x); +__generic vector radians(vector x); +__generic matrix radians(matrix x); + +// Approximate reciprocal +__generic T rcp(T x); +__generic vector rcp(vector x); +__generic matrix rcp(matrix x); + +// Reflect incident vector across plane with given normal +__generic +vector reflect(vector i, vector n); + +// Refract incident vector given surface normal and index of refraction +__generic +vector refract(vector i, vector n, float eta); + +// Reverse order of bits +__target_intrinsic(glsl, "bitfieldReverse") +uint reversebits(uint value); + +__target_intrinsic(glsl, "bitfieldReverse") +__generic vector reversebits(vector value); + +// Round-to-nearest +__generic T round(T x); +__generic vector round(vector x); +__generic matrix round(matrix x); + +// Reciprocal of square root +__generic T rsqrt(T x); +__generic vector rsqrt(vector x); +__generic matrix rsqrt(matrix x); + +// Clamp value to [0,1] range +__generic +__target_intrinsic(glsl, "clamp($0, 0, 1)") +T saturate(T x); + +__generic +__target_intrinsic(glsl, "clamp($0, 0, 1)") +vector saturate(vector x); + +__generic +__target_intrinsic(glsl, "clamp($0, 0, 1)") +matrix saturate(matrix x); + +__generic +__specialized_for_target(glsl) +T saturate(T x) +{ + return clamp(x, T(0), T(1)); +} + +__generic +__specialized_for_target(glsl) +vector saturate(vector x) +{ + return clamp(x, + vector(T(0)), + vector(T(1))); +} + +// HACK: need a helper to turn a scalar into a matrix, +// because GLSL and HLSL disagree on the semantics of +// constructing a matrix from a single scalar. +__generic +matrix __scalarToMatrix(T value); + +__generic +__specialized_for_target(glsl) +matrix saturate(matrix x) +{ + return clamp(x, + __scalarToMatrix(T(0)), + __scalarToMatrix(T(1))); +} + + +// Extract sign of value +__generic int sign(T x); +__generic vector sign(vector x); +__generic matrix sign(matrix x); + + +// Sine +__generic T sin(T x); +__generic vector sin(vector x); +__generic matrix sin(matrix x); + +// Sine and cosine +__generic void sincos(T x, out T s, out T c); +__generic void sincos(vector x, out vector s, out vector c); +__generic void sincos(matrix x, out matrix s, out matrix c); + +// Hyperbolic Sine +__generic T sinh(T x); +__generic vector sinh(vector x); +__generic matrix sinh(matrix x); + +// Smooth step (Hermite interpolation) +__generic T smoothstep(T min, T max, T x); +__generic vector smoothstep(vector min, vector max, vector x); +__generic matrix smoothstep(matrix min, matrix max, matrix x); + +// Square root +__generic T sqrt(T x); +__generic vector sqrt(vector x); +__generic matrix sqrt(matrix x); + +// Step function +__generic T step(T y, T x); +__generic vector step(vector y, vector x); +__generic matrix step(matrix y, matrix x); + +// Tangent +__generic T tan(T x); +__generic vector tan(vector x); +__generic matrix tan(matrix x); + +// Hyperbolic tangent +__generic T tanh(T x); +__generic vector tanh(vector x); +__generic matrix tanh(matrix x); + +// Legacy texture-fetch operations + +/* +float4 tex1D(sampler1D s, float t); +float4 tex1D(sampler1D s, float t, float ddx, float ddy); +float4 tex1Dbias(sampler1D s, float4 t); +float4 tex1Dgrad(sampler1D s, float t, float ddx, float ddy); +float4 tex1Dlod(sampler1D s, float4 t); +float4 tex1Dproj(sampler1D s, float4 t); + +float4 tex2D(sampler2D s, float2 t); +float4 tex2D(sampler2D s, float2 t, float2 ddx, float2 ddy); +float4 tex2Dbias(sampler2D s, float4 t); +float4 tex2Dgrad(sampler2D s, float2 t, float2 ddx, float2 ddy); +float4 tex2Dlod(sampler2D s, float4 t); +float4 tex2Dproj(sampler2D s, float4 t); + +float4 tex3D(sampler3D s, float3 t); +float4 tex3D(sampler3D s, float3 t, float3 ddx, float3 ddy); +float4 tex3Dbias(sampler3D s, float4 t); +float4 tex3Dgrad(sampler3D s, float3 t, float3 ddx, float3 ddy); +float4 tex3Dlod(sampler3D s, float4 t); +float4 tex3Dproj(sampler3D s, float4 t); + +float4 texCUBE(samplerCUBE s, float3 t); +float4 texCUBE(samplerCUBE s, float3 t, float3 ddx, float3 ddy); +float4 texCUBEbias(samplerCUBE s, float4 t); +float4 texCUBEgrad(samplerCUBE s, float3 t, float3 ddx, float3 ddy); +float4 texCUBElod(samplerCUBE s, float4 t); +float4 texCUBEproj(samplerCUBE s, float4 t); +*/ + +// Matrix transpose +__generic matrix transpose(matrix x); + +// Truncate to integer +__generic T trunc(T x); +__generic vector trunc(vector x); +__generic matrix trunc(matrix x); + +// Shader model 6.0 stuff + +uint GlobalOrderedCountIncrement(uint countToAppendForThisLane); + +__generic T QuadReadLaneAt(T sourceValue, int quadLaneID); +__generic vector QuadReadLaneAt(vector sourceValue, int quadLaneID); +__generic matrix QuadReadLaneAt(matrix sourceValue, int quadLaneID); + +__generic T QuadSwapX(T localValue); +__generic vector QuadSwapX(vector localValue); +__generic matrix QuadSwapX(matrix localValue); + +__generic T QuadSwapY(T localValue); +__generic vector QuadSwapY(vector localValue); +__generic matrix QuadSwapY(matrix localValue); + +__generic T WaveAllBitAnd(T expr); +__generic vector WaveAllBitAnd(vector expr); +__generic matrix WaveAllBitAnd(matrix expr); + +__generic T WaveAllBitOr(T expr); +__generic vector WaveAllBitOr(vector expr); +__generic matrix WaveAllBitOr(matrix expr); + +__generic T WaveAllBitXor(T expr); +__generic vector WaveAllBitXor(vector expr); +__generic matrix WaveAllBitXor(matrix expr); + +__generic T WaveAllMax(T expr); +__generic vector WaveAllMax(vector expr); +__generic matrix WaveAllMax(matrix expr); + +__generic T WaveAllMin(T expr); +__generic vector WaveAllMin(vector expr); +__generic matrix WaveAllMin(matrix expr); + +__generic T WaveAllProduct(T expr); +__generic vector WaveAllProduct(vector expr); +__generic matrix WaveAllProduct(matrix expr); + +__generic T WaveAllSum(T expr); +__generic vector WaveAllSum(vector expr); +__generic matrix WaveAllSum(matrix expr); + +bool WaveAllEqual(bool expr); +bool WaveAllTrue(bool expr); +bool WaveAnyTrue(bool expr); + +uint64_t WaveBallot(bool expr); + +uint WaveGetLaneCount(); +uint WaveGetLaneIndex(); +uint WaveGetOrderedIndex(); + +bool WaveIsHelperLane(); + +bool WaveOnce(); + +__generic T WavePrefixProduct(T expr); +__generic vector WavePrefixProduct(vector expr); +__generic matrix WavePrefixProduct(matrix expr); + +__generic T WavePrefixSum(T expr); +__generic vector WavePrefixSum(vector expr); +__generic matrix WavePrefixSum(matrix expr); + +__generic T WaveReadFirstLane(T expr); +__generic vector WaveReadFirstLane(vector expr); +__generic matrix WaveReadFirstLane(matrix expr); + +__generic T WaveReadLaneAt(T expr, int laneIndex); +__generic vector WaveReadLaneAt(vector expr, int laneIndex); +__generic matrix WaveReadLaneAt(matrix expr, int laneIndex); + +// `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points +typedef Texture2D texture2D; + +${{{{ +// Component-wise multiplication ops +for(auto op : binaryOps) +{ + switch (op.opCode) + { + default: + continue; + + case kIROp_Mul: + case kIRPseudoOp_MulAssign: + break; + } + + for (auto type : kBaseTypes) + { + if ((type.flags & op.flags) == 0) + continue; + + char const* leftType = type.name; + char const* rightType = leftType; + char const* resultType = leftType; + + char const* leftQual = ""; + if(op.flags & ASSIGNMENT) leftQual = "in out "; + + sb << "__generic "; + sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, matrix<" << rightType << ",N,M> right);\n"; + } +} + +// + +// Buffer types + +static const struct { + char const* name; + SlangResourceAccess access; +} kBaseBufferAccessLevels[] = { + { "", SLANG_RESOURCE_ACCESS_READ }, + { "RW", SLANG_RESOURCE_ACCESS_READ_WRITE }, + { "RasterizerOrdered", SLANG_RESOURCE_ACCESS_RASTER_ORDERED }, +}; +static const int kBaseBufferAccessLevelCount = sizeof(kBaseBufferAccessLevels) / sizeof(kBaseBufferAccessLevels[0]); + +for (int aa = 0; aa < kBaseBufferAccessLevelCount; ++aa) +{ + auto flavor = TextureFlavor::create(TextureFlavor::Shape::ShapeBuffer, kBaseBufferAccessLevels[aa].access).flavor; + sb << "__generic\n"; + sb << "__magic_type(Texture," << int(flavor) << ")\n"; + sb << "__intrinsic_type(" << (kIROp_TextureType + (int(flavor) << kIROpMeta_OtherShift)) << ")\n"; + sb << "struct "; + sb << kBaseBufferAccessLevels[aa].name; + sb << "Buffer {\n"; + + sb << "void GetDimensions(out uint dim);\n"; + + sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)"; + sb << "__target_intrinsic(glsl, \"texelFetch($0, $1)$z\")\n"; + sb << "T Load(int location);\n"; + + sb << "T Load(int location, out uint status);\n"; + + sb << "__subscript(uint index) -> T {\n"; + + sb << "__glsl_extension(GL_EXT_samplerless_texture_functions)"; + sb << "__target_intrinsic(glsl, \"texelFetch($0, int($1))$z\") get;\n"; + + if (kBaseBufferAccessLevels[aa].access != SLANG_RESOURCE_ACCESS_READ) + { + sb << "ref;\n"; + } + + sb << "}\n"; + + sb << "};\n"; +} +}}}} + + +// DirectX Raytracing (DXR) Support +// +// The following is based on the experimental DXR SDK v0.09.01. +// +// Numbering follows the sections in the "D3D12 Raytracing Functional Spec" v0.09 (2018-03-12) +// + +// 10.1.1 - Ray Flags + +typedef uint RAY_FLAG; + +static const RAY_FLAG RAY_FLAG_NONE = 0x00; +static const RAY_FLAG RAY_FLAG_FORCE_OPAQUE = 0x01; +static const RAY_FLAG RAY_FLAG_FORCE_NON_OPAQUE = 0x02; +static const RAY_FLAG RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH = 0x04; +static const RAY_FLAG RAY_FLAG_SKIP_CLOSEST_HIT_SHADER = 0x08; +static const RAY_FLAG RAY_FLAG_CULL_BACK_FACING_TRIANGLES = 0x10; +static const RAY_FLAG RAY_FLAG_CULL_FRONT_FACING_TRIANGLES = 0x20; +static const RAY_FLAG RAY_FLAG_CULL_OPAQUE = 0x40; +static const RAY_FLAG RAY_FLAG_CULL_NON_OPAQUE = 0x80; + +// 10.1.2 - Ray Description Structure + +__builtin +__magic_type(RayDescType) +__intrinsic_type($(kIROp_RayDescType)) +struct RayDesc +{ + __target_intrinsic(hlsl, Origin) + float3 Origin; + + __target_intrinsic(hlsl, TMin) + float TMin; + + __target_intrinsic(hlsl, Direction) + float3 Direction; + + __target_intrinsic(hlsl, TMax) + float TMax; +}; + +// 10.1.3 - Ray Acceleration Structure + +__builtin +__magic_type(RaytracingAccelerationStructureType) +__intrinsic_type($(kIROp_RaytracingAccelerationStructureType)) +struct RaytracingAccelerationStructure {}; + +// 10.1.4 - Subobject Definitions + +// TODO: We may decide to support these, but their reliance on C++ implicit +// constructor call syntax (`SomeType someVar(arg0, arg1);`) makes them +// annoying for the current Slang parsing strategy, and using global variables +// for this stuff comes across as a kludge rather than the best possible design. + +// 10.1.5 - Intersection Attributes Structure + +__builtin +__magic_type(BuiltInTriangleIntersectionAttributesType) +__intrinsic_type($(kIROp_BuiltInTriangleIntersectionAttributesType)) +struct BuiltInTriangleIntersectionAttributes +{ + __target_intrinsic(hlsl, barycentrics) + float2 barycentrics; +}; + +// 10.2 Shaders + +// Right now new shader stages need to be added directly to the compiler +// implementation, rather than being something that can be declared in the stdlib. + +// 10.3 - Intrinsics + +// 10.3.1 +void CallShader(uint ShaderIndex, inout param_t Parameter); + +// 10.3.2 +void TraceRay( + RaytracingAccelerationStructure AccelerationStructure, + uint RayFlags, + uint InstanceInclusionMask, + uint RayContributionToHitGroupIndex, + uint MultiplierForGeometryContributionToHitGroupIndex, + uint MissShaderIndex, + RayDesc Ray, + inout payload_t Payload); + +// 10.3.3 +bool ReportHit(float THit, uint HitKind, attr_t Attributes); + +// 10.3.4 +void IgnoreHit(); + +// 10.3.5 +void AcceptHitAndEndSearch(); + +// 10.4 - System Values and Special Semantics + +// TODO: Many of these functions need to be restricted so that +// they can only be accessed from specific stages. + +// 10.4.1 - Ray Dispatch System Values +uint3 DispatchRaysIndex(); +uint3 DispatchRaysDimensions(); + +// 10.4.2 - Ray System Values +float3 WorldRayOrigin(); +float3 WorldRayDirection(); +float RayTMin(); +float RayTCurrent(); +uint RayFlags(); + +// 10.4.3 - Primitive/Object Space System Values +uint InstanceIndex(); +uint InstanceID(); +uint PrimitiveIndex(); +float3 ObjectRayOrigin(); +float3 ObjectRayDirection(); + +float3x4 ObjectToWorld3x4(); +float4x3 ObjectToWorld4x3(); +float3x4 WorldToObject3x4(); +float4x3 WorldToObject4x3(); +// Note: The provisional DXR spec included these unadorned +// `ObjectToWorld()` and `WorldToObject()` functions, so +// we will forward them to the new names as a convience +// for users who are porting their code. +// +// TODO: Should we provide a deprecation warning on these +// declarations, so that users can know they aren't coding +// against the final spec? +// +float3x4 ObjectToWorld() { return ObjectToWorld3x4(); } +float3x4 WorldToObject() { return WorldToObject3x4(); } + +// 10.4.4 - Hit Specific System values +uint HitKind(); diff --git a/source/slang/hlsl.meta.slang.h b/source/slang/hlsl.meta.slang.h index 54aa2710d..21a9305f8 100644 --- a/source/slang/hlsl.meta.slang.h +++ b/source/slang/hlsl.meta.slang.h @@ -1436,8 +1436,8 @@ SLANG_RAW("// TODO: Many of these functions need to be restricted so that\n") SLANG_RAW("// they can only be accessed from specific stages.\n") SLANG_RAW("\n") SLANG_RAW("// 10.4.1 - Ray Dispatch System Values\n") -SLANG_RAW("uint2 DispatchRaysIndex();\n") -SLANG_RAW("uint2 DispatchRaysDimensions();\n") +SLANG_RAW("uint3 DispatchRaysIndex();\n") +SLANG_RAW("uint3 DispatchRaysDimensions();\n") SLANG_RAW("\n") SLANG_RAW("// 10.4.2 - Ray System Values\n") SLANG_RAW("float3 WorldRayOrigin();\n") @@ -1452,8 +1452,23 @@ SLANG_RAW("uint InstanceID();\n") SLANG_RAW("uint PrimitiveIndex();\n") SLANG_RAW("float3 ObjectRayOrigin();\n") SLANG_RAW("float3 ObjectRayDirection();\n") -SLANG_RAW("float3x4 ObjectToWorld();\n") -SLANG_RAW("float3x4 WorldToObject();\n") +SLANG_RAW("\n") +SLANG_RAW("float3x4 ObjectToWorld3x4();\n") +SLANG_RAW("float4x3 ObjectToWorld4x3();\n") +SLANG_RAW("float3x4 WorldToObject3x4();\n") +SLANG_RAW("float4x3 WorldToObject4x3();\n") +SLANG_RAW("\n") +SLANG_RAW("// Note: The provisional DXR spec included these unadorned\n") +SLANG_RAW("// `ObjectToWorld()` and `WorldToObject()` functions, so\n") +SLANG_RAW("// we will forward them to the new names as a convience\n") +SLANG_RAW("// for users who are porting their code.\n") +SLANG_RAW("//\n") +SLANG_RAW("// TODO: Should we provide a deprecation warning on these\n") +SLANG_RAW("// declarations, so that users can know they aren't coding\n") +SLANG_RAW("// against the final spec?\n") +SLANG_RAW("//\n") +SLANG_RAW("float3x4 ObjectToWorld() { return ObjectToWorld3x4(); }\n") +SLANG_RAW("float3x4 WorldToObject() { return WorldToObject3x4(); }\n") SLANG_RAW("\n") SLANG_RAW("// 10.4.4 - Hit Specific System values\n") SLANG_RAW("uint HitKind();\n") -- cgit v1.2.3