summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
Diffstat (limited to 'source')
-rw-r--r--source/slang/hlsl.meta.slang202
-rw-r--r--source/slang/slang-emit.cpp11
-rw-r--r--source/slang/slang-ir-strip-witness-tables.cpp33
-rw-r--r--source/slang/slang-ir-strip-witness-tables.h10
-rw-r--r--source/slang/slang.vcxproj2
-rw-r--r--source/slang/slang.vcxproj.filters8
6 files changed, 211 insertions, 55 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 572b64b21..d9e40dd4f 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -1199,9 +1199,24 @@ matrix<T, N, M> floor(matrix<T, N, M> x)
}
// Fused multiply-add for doubles
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
double fma(double a, double b, double c);
-__generic<let N : int> vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c);
-__generic<let N : int, let M : int> matrix<double,N,M> fma(matrix<double,N,M> a, matrix<double,N,M> b, matrix<double,N,M> c);
+
+__generic<let N : int>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c)
+{
+ VECTOR_MAP_TRINARY(double, N, fma, a, b, c);
+}
+
+__generic<let N : int, let M : int>
+__target_intrinsic(hlsl)
+matrix<double, N, M> fma(matrix<double, N, M> a, matrix<double, N, M> b, matrix<double, N, M> c)
+{
+ MATRIX_MAP_TRINARY(double, N, M, fma, a, b, c);
+}
// Floating point remainder of x/y
__generic<T : __BuiltinFloatingPointType>
@@ -1425,7 +1440,6 @@ __generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(cpu)
__target_intrinsic(cuda)
-//__target_intrinsic(glsl, "(!(isinf($0) || isnan($0)))")
bool isfinite(T x)
{
return !(isinf(x) || isnan(x));
@@ -1433,7 +1447,6 @@ bool isfinite(T x)
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
-//__target_intrinsic(glsl, "(!(isinf($0) || isnan($0)))")
vector<bool, N> isfinite(vector<T, N> x)
{
VECTOR_MAP_UNARY(bool, N, isfinite, x);
@@ -1488,18 +1501,16 @@ matrix<bool, N, M> isnan(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
-__target_intrinsic(glsl, "($0 * pow(2.0f, $1))")
-T ldexp(T x, T exp);
-/*{
+T ldexp(T x, T exp)
+{
return x * exp2(exp);
-}*/
+}
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
-__target_intrinsic(glsl, "($0 * pow(2.0f, $1))")
vector<T, N> ldexp(vector<T, N> x, vector<T, N> exp)
{
- VECTOR_MAP_BINARY(T, N, ldexp, x, exp);
+ return x * exp2(exp);
}
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
@@ -1522,17 +1533,17 @@ T length(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, mix)
-T lerp(T x, T y, T s);
-/*{
- return x * (1 - s) + y * s;
-}*/
+T lerp(T x, T y, T s)
+{
+ return x * (T(1.0f) - s) + y * s;
+}
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl, mix)
vector<T, N> lerp(vector<T, N> x, vector<T, N> y, vector<T, N> s)
{
- VECTOR_MAP_TRINARY(T, N, lerp, x, y, s);
+ return x * (T(1.0f) - s) + y * s;
}
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
@@ -1543,7 +1554,14 @@ matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s)
}
// Legacy lighting function (obsolete)
-float4 lit(float n_dot_l, float n_dot_h, float m);
+__target_intrinsic(hlsl)
+float4 lit(float n_dot_l, float n_dot_h, float m)
+{
+ let ambient = 1.0f;
+ let diffuse = max(n_dot_l, 0.0f);
+ let specular = step(0.0f, n_dot_l) * max(n_dot_h * m, 0.0f);
+ return float4(ambient, diffuse, specular, 1.0f);
+}
// Base-e logarithm
__generic<T : __BuiltinFloatingPointType>
@@ -1606,14 +1624,25 @@ matrix<T,N,M> log2(matrix<T,N,M> x)
// multiply-add
+__generic<T : __BuiltinArithmeticType>
+__target_intrinsic(hlsl)
__target_intrinsic(glsl, fma)
-__generic<T : __BuiltinArithmeticType> T mad(T mvalue, T avalue, T bvalue);
+T mad(T mvalue, T avalue, T bvalue);
+__generic<T : __BuiltinArithmeticType, let N : int>
+__target_intrinsic(hlsl)
__target_intrinsic(glsl, fma)
-__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> mad(vector<T,N> mvalue, vector<T,N> avalue, vector<T,N> bvalue);
+vector<T, N> mad(vector<T, N> mvalue, vector<T, N> avalue, vector<T, N> bvalue)
+{
+ VECTOR_MAP_TRINARY(T, N, mad, mvalue, avalue, bvalue);
+}
-__target_intrinsic(glsl, fma)
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> mad(matrix<T,N,M> mvalue, matrix<T,N,M> avalue, matrix<T,N,M> bvalue);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
+__target_intrinsic(hlsl)
+matrix<T, N, M> mad(matrix<T, N, M> mvalue, matrix<T, N, M> avalue, matrix<T, N, M> bvalue)
+{
+ MATRIX_MAP_TRINARY(T, N, M, mad, mvalue, avalue, bvalue);
+}
// maximum
__generic<T : __BuiltinArithmeticType>
@@ -1677,32 +1706,79 @@ matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip)
}
// msad4 (whatever that is)
-uint4 msad4(uint reference, uint2 source, uint4 accum);
+__target_intrinsic(hlsl)
+uint4 msad4(uint reference, uint2 source, uint4 accum)
+{
+ int4 bytesRef = (reference >> uint4(24, 16, 8, 0)) & 0xFF;
+ int4 bytesX = (source.x >> uint4(24, 16, 8, 0)) & 0xFF;
+ int4 bytesY = (source.y >> uint4(24, 16, 8, 0)) & 0xFF;
+
+ uint4 mask = bytesRef == 0 ? 0 : 0xFFFFFFFFu;
+
+ uint4 result = accum;
+ result += mask.x & abs(bytesRef - int4(bytesX.x, bytesY.y, bytesY.z, bytesY.w));
+ result += mask.y & abs(bytesRef - int4(bytesX.x, bytesX.y, bytesY.z, bytesY.w));
+ result += mask.z & abs(bytesRef - int4(bytesX.x, bytesX.y, bytesX.z, bytesY.w));
+ result += mask.w & abs(bytesRef - int4(bytesX.x, bytesX.y, bytesX.z, bytesX.w));
+ return result;
+}
// General inner products
// scalar-scalar
-__generic<T : __BuiltinArithmeticType> T mul(T x, T y);
+__generic<T : __BuiltinArithmeticType>
+__intrinsic_op($(kIROp_Mul))
+T mul(T x, T y);
// scalar-vector and vector-scalar
-__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> mul(vector<T,N> x, T y);
-__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> mul(T x, vector<T,N> y);
+__generic<T : __BuiltinArithmeticType, let N : int>
+__intrinsic_op($(kIROp_Mul))
+vector<T, N> mul(vector<T, N> x, T y);
+
+__generic<T : __BuiltinArithmeticType, let N : int>
+__intrinsic_op($(kIROp_Mul))
+vector<T, N> mul(T x, vector<T, N> y);
// scalar-matrix and matrix-scalar
-__generic<T : __BuiltinArithmeticType, let N : int, let M :int> matrix<T,N,M> mul(matrix<T,N,M> x, T y);
-__generic<T : __BuiltinArithmeticType, let N : int, let M :int> matrix<T,N,M> mul(T x, matrix<T,N,M> y);
+__generic<T : __BuiltinArithmeticType, let N : int, let M :int>
+__intrinsic_op($(kIROp_Mul))
+matrix<T, N, M> mul(matrix<T, N, M> x, T y);
+
+__generic<T : __BuiltinArithmeticType, let N : int, let M :int>
+__intrinsic_op($(kIROp_Mul))
+matrix<T, N, M> mul(T x, matrix<T, N, M> y);
// vector-vector (dot product)
-__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(dot) T mul(vector<T,N> x, vector<T,N> y);
+__generic<T : __BuiltinArithmeticType, let N : int>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl, "dot")
+T mul(vector<T, N> x, vector<T, N> y)
+{
+ return dot(x, y);
+}
+
+${{{{
+// TODO: The following functions could conceivably be defined
+// in the stdlib for the benefit of targets without direct
+// support for matrices, but the use of `__intrinsic_op` to
+// map them to a dedicated IR instruction interferes with
+// that choice.
+}}}}
// vector-matrix
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulVectorMatrix) vector<T,M> mul(vector<T,N> x, matrix<T,N,M> y);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
+__intrinsic_op(mulVectorMatrix)
+vector<T,M> mul(vector<T,N> x, matrix<T,N,M> y);
// matrix-vector
-__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulMatrixVector) vector<T,N> mul(matrix<T,N,M> x, vector<T,M> y);
+__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
+__intrinsic_op(mulMatrixVector)
+vector<T,N> mul(matrix<T,N,M> x, vector<T,M> y);
// matrix-matrix
-__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic_op(mulMatrixMatrix) matrix<T,R,C> mul(matrix<T,R,N> x, matrix<T,N,C> y);
+__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int>
+__intrinsic_op(mulMatrixMatrix)
+matrix<T,R,C> mul(matrix<T,R,N> x, matrix<T,N,C> y);
// noise (deprecated)
@@ -1753,10 +1829,13 @@ int NonUniformResourceIndex(int index)
}
// Normalize a vector
-__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> normalize(vector<T,N> x);
-/*{
+__generic<T : __BuiltinFloatingPointType, let N : int>
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+vector<T,N> normalize(vector<T,N> x)
+{
return x / length(x);
-}*/
+}
// Raise to a power
__generic<T : __BuiltinFloatingPointType>
@@ -1856,31 +1935,33 @@ void ProcessTriTessFactorsMin(
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
-T radians(T x);
+T radians(T x)
+{
+ return x * (T.getPi() / T(180.0f));
+}
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
vector<T, N> radians(vector<T, N> x)
{
- VECTOR_MAP_UNARY(T, N, radians, x);
+ return x * (T.getPi() / T(180.0f));
}
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
matrix<T, N, M> radians(matrix<T, N, M> x)
{
- MATRIX_MAP_UNARY(T, N, M, radians, x);
+ return x * (T.getPi() / T(180.0f));
}
// Approximate reciprocal
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
-__target_intrinsic(glsl, "1.0/($0)")
-T rcp(T x);
-/*{
- return T(1) / x;
-}*/
+T rcp(T x)
+{
+ return T(1.0) / x;
+}
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
@@ -1891,7 +1972,6 @@ vector<T, N> rcp(vector<T, N> x)
__generic<T : __BuiltinFloatingPointType, let N : int, let M : int>
__target_intrinsic(hlsl)
-// Note: GLSL doesn't define a vector `rcp`, so not intrinsic there
matrix<T, N, M> rcp(matrix<T, N, M> x)
{
MATRIX_MAP_UNARY(T, N, M, rcp, x);
@@ -1899,27 +1979,35 @@ matrix<T, N, M> rcp(matrix<T, N, M> x)
// Reflect incident vector across plane with given normal
__generic<T : __BuiltinFloatingPointType, let N : int>
-vector<T,N> reflect(vector<T,N> i, vector<T,N> n);
-/*{
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+vector<T,N> reflect(vector<T,N> i, vector<T,N> n)
+{
return i - T(2) * dot(n,i) * n;
-}*/
+}
// Refract incident vector given surface normal and index of refraction
__generic<T : __BuiltinFloatingPointType, let N : int>
-vector<T,N> refract(vector<T,N> i, vector<T,N> n, float eta);
-/*{
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+vector<T,N> refract(vector<T,N> i, vector<T,N> n, T eta)
+{
let dotNI = dot(n,i);
let k = T(1) - eta*eta*(T(1) - dotNI * dotNI);
- if(k < 0) return vector<T,N>(T(0));
+ if(k < T(0)) return vector<T,N>(T(0));
return eta * i - (eta * dotNI + sqrt(k)) * n;
-}*/
+}
// Reverse order of bits
__target_intrinsic(glsl, "bitfieldReverse")
uint reversebits(uint value);
__target_intrinsic(glsl, "bitfieldReverse")
-__generic<let N : int> vector<uint,N> reversebits(vector<uint,N> value);
+__generic<let N : int>
+vector<uint, N> reversebits(vector<uint, N> value)
+{
+ VECTOR_MAP_UNARY(uint, N, reversebits, value);
+}
// Round-to-nearest
__generic<T : __BuiltinFloatingPointType>
@@ -2073,7 +2161,13 @@ matrix<T, N, M> sinh(matrix<T, N, M> x)
// Smooth step (Hermite interpolation)
__generic<T : __BuiltinFloatingPointType>
-T smoothstep(T min, T max, T x);
+__target_intrinsic(hlsl)
+__target_intrinsic(glsl)
+T smoothstep(T min, T max, T x)
+{
+ let t = saturate((x - min) / (max - min));
+ return t * t * (T(3.0f) - (t + t));
+}
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
@@ -2113,10 +2207,10 @@ matrix<T, N, M> sqrt(matrix<T, N, M> x)
__generic<T : __BuiltinFloatingPointType>
__target_intrinsic(hlsl)
__target_intrinsic(glsl)
-T step(T y, T x);
-/*{
+T step(T y, T x)
+{
return x < y ? T(0.0f) : T(1.0f);
-}*/
+}
__generic<T : __BuiltinFloatingPointType, let N : int>
__target_intrinsic(hlsl)
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index dcca7d25e..e613f5462 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -15,6 +15,7 @@
#include "slang-ir-specialize.h"
#include "slang-ir-specialize-resources.h"
#include "slang-ir-ssa.h"
+#include "slang-ir-strip-witness-tables.h"
#include "slang-ir-union.h"
#include "slang-ir-validate.h"
#include "slang-ir-wrap-structured-buffers.h"
@@ -441,6 +442,16 @@ Result linkAndOptimizeIR(
break;
}
+ // For all targets that don't support true dynamic dispatch through
+ // witness tables (that is all targets at present), we need
+ // to eliminate witness tables from the IR so that they
+ // don't keep symbols live that we don't actually need.
+ stripWitnessTables(irModule);
+#if 0
+ dumpIRIfEnabled(compileRequest, irModule, "AFTER STRIP WITNESS TABLES");
+#endif
+ validateIRModuleIfEnabled(compileRequest, irModule);
+
// The resource-based specialization pass above
// may create specialized versions of functions, but
// it does not try to completely eliminate the original
diff --git a/source/slang/slang-ir-strip-witness-tables.cpp b/source/slang/slang-ir-strip-witness-tables.cpp
new file mode 100644
index 000000000..8536508ba
--- /dev/null
+++ b/source/slang/slang-ir-strip-witness-tables.cpp
@@ -0,0 +1,33 @@
+// slang-ir-strip-witness-tables.cpp
+#include "slang-ir-strip-witness-tables.h"
+
+#include "slang-ir.h"
+#include "slang-ir-insts.h"
+
+namespace Slang
+{
+
+void stripWitnessTables(IRModule* module)
+{
+ // Our goal here is to empty out any witness tables in
+ // the IR so that they don't keep other symbols alive
+ // further into compilation. Luckily we expect all
+ // witness tables to live directly at the global scope
+ // (or inside of a generic, which we can ignore for
+ // now because the emit logic also ignores generics),
+ // and there is a single function we can call to
+ // remove all of the content from the witness tables
+ // (since the key-value associations are stored as
+ // children of each table).
+
+ for( auto inst : module->getGlobalInsts() )
+ {
+ auto witnessTable = as<IRWitnessTable>(inst);
+ if(!witnessTable)
+ continue;
+
+ witnessTable->removeAndDeallocateAllDecorationsAndChildren();
+ }
+}
+
+} \ No newline at end of file
diff --git a/source/slang/slang-ir-strip-witness-tables.h b/source/slang/slang-ir-strip-witness-tables.h
new file mode 100644
index 000000000..43bd0127d
--- /dev/null
+++ b/source/slang/slang-ir-strip-witness-tables.h
@@ -0,0 +1,10 @@
+// slang-ir-strip-witness-tables.cpp
+#pragma once
+
+namespace Slang
+{
+struct IRModule;
+
+ /// Strip the contents of all witness table instructions from the given IR `module`
+void stripWitnessTables(IRModule* module);
+} \ No newline at end of file
diff --git a/source/slang/slang.vcxproj b/source/slang/slang.vcxproj
index 76cffe08b..2f55fffdc 100644
--- a/source/slang/slang.vcxproj
+++ b/source/slang/slang.vcxproj
@@ -230,6 +230,7 @@
<ClInclude Include="slang-ir-specialize.h" />
<ClInclude Include="slang-ir-ssa.h" />
<ClInclude Include="slang-ir-string-hash.h" />
+ <ClInclude Include="slang-ir-strip-witness-tables.h" />
<ClInclude Include="slang-ir-strip.h" />
<ClInclude Include="slang-ir-type-set.h" />
<ClInclude Include="slang-ir-union.h" />
@@ -313,6 +314,7 @@
<ClCompile Include="slang-ir-specialize.cpp" />
<ClCompile Include="slang-ir-ssa.cpp" />
<ClCompile Include="slang-ir-string-hash.cpp" />
+ <ClCompile Include="slang-ir-strip-witness-tables.cpp" />
<ClCompile Include="slang-ir-strip.cpp" />
<ClCompile Include="slang-ir-type-set.cpp" />
<ClCompile Include="slang-ir-union.cpp" />
diff --git a/source/slang/slang.vcxproj.filters b/source/slang/slang.vcxproj.filters
index 449f72069..7a0f465fe 100644
--- a/source/slang/slang.vcxproj.filters
+++ b/source/slang/slang.vcxproj.filters
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Header Files">
@@ -249,6 +249,9 @@
<ClInclude Include="slang-visitor.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="slang-ir-strip-witness-tables.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="slang-check-conformance.cpp">
@@ -467,6 +470,9 @@
<ClCompile Include="slang.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="slang-ir-strip-witness-tables.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="..\core\core.natvis">