diff options
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/hlsl.meta.slang | 202 | ||||
| -rw-r--r-- | source/slang/slang-emit.cpp | 11 | ||||
| -rw-r--r-- | source/slang/slang-ir-strip-witness-tables.cpp | 33 | ||||
| -rw-r--r-- | source/slang/slang-ir-strip-witness-tables.h | 10 | ||||
| -rw-r--r-- | source/slang/slang.vcxproj | 2 | ||||
| -rw-r--r-- | source/slang/slang.vcxproj.filters | 8 |
6 files changed, 211 insertions, 55 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 572b64b21..d9e40dd4f 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -1199,9 +1199,24 @@ matrix<T, N, M> floor(matrix<T, N, M> x) } // Fused multiply-add for doubles +__target_intrinsic(hlsl) +__target_intrinsic(glsl) double fma(double a, double b, double c); -__generic<let N : int> vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c); -__generic<let N : int, let M : int> matrix<double,N,M> fma(matrix<double,N,M> a, matrix<double,N,M> b, matrix<double,N,M> c); + +__generic<let N : int> +__target_intrinsic(hlsl) +__target_intrinsic(glsl) +vector<double, N> fma(vector<double, N> a, vector<double, N> b, vector<double, N> c) +{ + VECTOR_MAP_TRINARY(double, N, fma, a, b, c); +} + +__generic<let N : int, let M : int> +__target_intrinsic(hlsl) +matrix<double, N, M> fma(matrix<double, N, M> a, matrix<double, N, M> b, matrix<double, N, M> c) +{ + MATRIX_MAP_TRINARY(double, N, M, fma, a, b, c); +} // Floating point remainder of x/y __generic<T : __BuiltinFloatingPointType> @@ -1425,7 +1440,6 @@ __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __target_intrinsic(cpu) __target_intrinsic(cuda) -//__target_intrinsic(glsl, "(!(isinf($0) || isnan($0)))") bool isfinite(T x) { return !(isinf(x) || isnan(x)); @@ -1433,7 +1447,6 @@ bool isfinite(T x) __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) -//__target_intrinsic(glsl, "(!(isinf($0) || isnan($0)))") vector<bool, N> isfinite(vector<T, N> x) { VECTOR_MAP_UNARY(bool, N, isfinite, x); @@ -1488,18 +1501,16 @@ matrix<bool, N, M> isnan(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) -__target_intrinsic(glsl, "($0 * pow(2.0f, $1))") -T ldexp(T x, T exp); -/*{ +T ldexp(T x, T exp) +{ return x * exp2(exp); -}*/ +} __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) -__target_intrinsic(glsl, "($0 * pow(2.0f, $1))") vector<T, N> ldexp(vector<T, N> x, vector<T, N> exp) { - VECTOR_MAP_BINARY(T, N, ldexp, x, exp); + return x * exp2(exp); } __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> @@ -1522,17 +1533,17 @@ T length(vector<T, N> x) __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __target_intrinsic(glsl, mix) -T lerp(T x, T y, T s); -/*{ - return x * (1 - s) + y * s; -}*/ +T lerp(T x, T y, T s) +{ + return x * (T(1.0f) - s) + y * s; +} __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl, mix) vector<T, N> lerp(vector<T, N> x, vector<T, N> y, vector<T, N> s) { - VECTOR_MAP_TRINARY(T, N, lerp, x, y, s); + return x * (T(1.0f) - s) + y * s; } __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> @@ -1543,7 +1554,14 @@ matrix<T,N,M> lerp(matrix<T,N,M> x, matrix<T,N,M> y, matrix<T,N,M> s) } // Legacy lighting function (obsolete) -float4 lit(float n_dot_l, float n_dot_h, float m); +__target_intrinsic(hlsl) +float4 lit(float n_dot_l, float n_dot_h, float m) +{ + let ambient = 1.0f; + let diffuse = max(n_dot_l, 0.0f); + let specular = step(0.0f, n_dot_l) * max(n_dot_h * m, 0.0f); + return float4(ambient, diffuse, specular, 1.0f); +} // Base-e logarithm __generic<T : __BuiltinFloatingPointType> @@ -1606,14 +1624,25 @@ matrix<T,N,M> log2(matrix<T,N,M> x) // multiply-add +__generic<T : __BuiltinArithmeticType> +__target_intrinsic(hlsl) __target_intrinsic(glsl, fma) -__generic<T : __BuiltinArithmeticType> T mad(T mvalue, T avalue, T bvalue); +T mad(T mvalue, T avalue, T bvalue); +__generic<T : __BuiltinArithmeticType, let N : int> +__target_intrinsic(hlsl) __target_intrinsic(glsl, fma) -__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> mad(vector<T,N> mvalue, vector<T,N> avalue, vector<T,N> bvalue); +vector<T, N> mad(vector<T, N> mvalue, vector<T, N> avalue, vector<T, N> bvalue) +{ + VECTOR_MAP_TRINARY(T, N, mad, mvalue, avalue, bvalue); +} -__target_intrinsic(glsl, fma) -__generic<T : __BuiltinArithmeticType, let N : int, let M : int> matrix<T,N,M> mad(matrix<T,N,M> mvalue, matrix<T,N,M> avalue, matrix<T,N,M> bvalue); +__generic<T : __BuiltinArithmeticType, let N : int, let M : int> +__target_intrinsic(hlsl) +matrix<T, N, M> mad(matrix<T, N, M> mvalue, matrix<T, N, M> avalue, matrix<T, N, M> bvalue) +{ + MATRIX_MAP_TRINARY(T, N, M, mad, mvalue, avalue, bvalue); +} // maximum __generic<T : __BuiltinArithmeticType> @@ -1677,32 +1706,79 @@ matrix<T,N,M> modf(matrix<T,N,M> x, out matrix<T,N,M> ip) } // msad4 (whatever that is) -uint4 msad4(uint reference, uint2 source, uint4 accum); +__target_intrinsic(hlsl) +uint4 msad4(uint reference, uint2 source, uint4 accum) +{ + int4 bytesRef = (reference >> uint4(24, 16, 8, 0)) & 0xFF; + int4 bytesX = (source.x >> uint4(24, 16, 8, 0)) & 0xFF; + int4 bytesY = (source.y >> uint4(24, 16, 8, 0)) & 0xFF; + + uint4 mask = bytesRef == 0 ? 0 : 0xFFFFFFFFu; + + uint4 result = accum; + result += mask.x & abs(bytesRef - int4(bytesX.x, bytesY.y, bytesY.z, bytesY.w)); + result += mask.y & abs(bytesRef - int4(bytesX.x, bytesX.y, bytesY.z, bytesY.w)); + result += mask.z & abs(bytesRef - int4(bytesX.x, bytesX.y, bytesX.z, bytesY.w)); + result += mask.w & abs(bytesRef - int4(bytesX.x, bytesX.y, bytesX.z, bytesX.w)); + return result; +} // General inner products // scalar-scalar -__generic<T : __BuiltinArithmeticType> T mul(T x, T y); +__generic<T : __BuiltinArithmeticType> +__intrinsic_op($(kIROp_Mul)) +T mul(T x, T y); // scalar-vector and vector-scalar -__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> mul(vector<T,N> x, T y); -__generic<T : __BuiltinArithmeticType, let N : int> vector<T,N> mul(T x, vector<T,N> y); +__generic<T : __BuiltinArithmeticType, let N : int> +__intrinsic_op($(kIROp_Mul)) +vector<T, N> mul(vector<T, N> x, T y); + +__generic<T : __BuiltinArithmeticType, let N : int> +__intrinsic_op($(kIROp_Mul)) +vector<T, N> mul(T x, vector<T, N> y); // scalar-matrix and matrix-scalar -__generic<T : __BuiltinArithmeticType, let N : int, let M :int> matrix<T,N,M> mul(matrix<T,N,M> x, T y); -__generic<T : __BuiltinArithmeticType, let N : int, let M :int> matrix<T,N,M> mul(T x, matrix<T,N,M> y); +__generic<T : __BuiltinArithmeticType, let N : int, let M :int> +__intrinsic_op($(kIROp_Mul)) +matrix<T, N, M> mul(matrix<T, N, M> x, T y); + +__generic<T : __BuiltinArithmeticType, let N : int, let M :int> +__intrinsic_op($(kIROp_Mul)) +matrix<T, N, M> mul(T x, matrix<T, N, M> y); // vector-vector (dot product) -__generic<T : __BuiltinArithmeticType, let N : int> __intrinsic_op(dot) T mul(vector<T,N> x, vector<T,N> y); +__generic<T : __BuiltinArithmeticType, let N : int> +__target_intrinsic(hlsl) +__target_intrinsic(glsl, "dot") +T mul(vector<T, N> x, vector<T, N> y) +{ + return dot(x, y); +} + +${{{{ +// TODO: The following functions could conceivably be defined +// in the stdlib for the benefit of targets without direct +// support for matrices, but the use of `__intrinsic_op` to +// map them to a dedicated IR instruction interferes with +// that choice. +}}}} // vector-matrix -__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulVectorMatrix) vector<T,M> mul(vector<T,N> x, matrix<T,N,M> y); +__generic<T : __BuiltinArithmeticType, let N : int, let M : int> +__intrinsic_op(mulVectorMatrix) +vector<T,M> mul(vector<T,N> x, matrix<T,N,M> y); // matrix-vector -__generic<T : __BuiltinArithmeticType, let N : int, let M : int> __intrinsic_op(mulMatrixVector) vector<T,N> mul(matrix<T,N,M> x, vector<T,M> y); +__generic<T : __BuiltinArithmeticType, let N : int, let M : int> +__intrinsic_op(mulMatrixVector) +vector<T,N> mul(matrix<T,N,M> x, vector<T,M> y); // matrix-matrix -__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> __intrinsic_op(mulMatrixMatrix) matrix<T,R,C> mul(matrix<T,R,N> x, matrix<T,N,C> y); +__generic<T : __BuiltinArithmeticType, let R : int, let N : int, let C : int> +__intrinsic_op(mulMatrixMatrix) +matrix<T,R,C> mul(matrix<T,R,N> x, matrix<T,N,C> y); // noise (deprecated) @@ -1753,10 +1829,13 @@ int NonUniformResourceIndex(int index) } // Normalize a vector -__generic<T : __BuiltinFloatingPointType, let N : int> vector<T,N> normalize(vector<T,N> x); -/*{ +__generic<T : __BuiltinFloatingPointType, let N : int> +__target_intrinsic(hlsl) +__target_intrinsic(glsl) +vector<T,N> normalize(vector<T,N> x) +{ return x / length(x); -}*/ +} // Raise to a power __generic<T : __BuiltinFloatingPointType> @@ -1856,31 +1935,33 @@ void ProcessTriTessFactorsMin( __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __target_intrinsic(glsl) -T radians(T x); +T radians(T x) +{ + return x * (T.getPi() / T(180.0f)); +} __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) __target_intrinsic(glsl) vector<T, N> radians(vector<T, N> x) { - VECTOR_MAP_UNARY(T, N, radians, x); + return x * (T.getPi() / T(180.0f)); } __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) matrix<T, N, M> radians(matrix<T, N, M> x) { - MATRIX_MAP_UNARY(T, N, M, radians, x); + return x * (T.getPi() / T(180.0f)); } // Approximate reciprocal __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) -__target_intrinsic(glsl, "1.0/($0)") -T rcp(T x); -/*{ - return T(1) / x; -}*/ +T rcp(T x) +{ + return T(1.0) / x; +} __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) @@ -1891,7 +1972,6 @@ vector<T, N> rcp(vector<T, N> x) __generic<T : __BuiltinFloatingPointType, let N : int, let M : int> __target_intrinsic(hlsl) -// Note: GLSL doesn't define a vector `rcp`, so not intrinsic there matrix<T, N, M> rcp(matrix<T, N, M> x) { MATRIX_MAP_UNARY(T, N, M, rcp, x); @@ -1899,27 +1979,35 @@ matrix<T, N, M> rcp(matrix<T, N, M> x) // Reflect incident vector across plane with given normal __generic<T : __BuiltinFloatingPointType, let N : int> -vector<T,N> reflect(vector<T,N> i, vector<T,N> n); -/*{ +__target_intrinsic(hlsl) +__target_intrinsic(glsl) +vector<T,N> reflect(vector<T,N> i, vector<T,N> n) +{ return i - T(2) * dot(n,i) * n; -}*/ +} // Refract incident vector given surface normal and index of refraction __generic<T : __BuiltinFloatingPointType, let N : int> -vector<T,N> refract(vector<T,N> i, vector<T,N> n, float eta); -/*{ +__target_intrinsic(hlsl) +__target_intrinsic(glsl) +vector<T,N> refract(vector<T,N> i, vector<T,N> n, T eta) +{ let dotNI = dot(n,i); let k = T(1) - eta*eta*(T(1) - dotNI * dotNI); - if(k < 0) return vector<T,N>(T(0)); + if(k < T(0)) return vector<T,N>(T(0)); return eta * i - (eta * dotNI + sqrt(k)) * n; -}*/ +} // Reverse order of bits __target_intrinsic(glsl, "bitfieldReverse") uint reversebits(uint value); __target_intrinsic(glsl, "bitfieldReverse") -__generic<let N : int> vector<uint,N> reversebits(vector<uint,N> value); +__generic<let N : int> +vector<uint, N> reversebits(vector<uint, N> value) +{ + VECTOR_MAP_UNARY(uint, N, reversebits, value); +} // Round-to-nearest __generic<T : __BuiltinFloatingPointType> @@ -2073,7 +2161,13 @@ matrix<T, N, M> sinh(matrix<T, N, M> x) // Smooth step (Hermite interpolation) __generic<T : __BuiltinFloatingPointType> -T smoothstep(T min, T max, T x); +__target_intrinsic(hlsl) +__target_intrinsic(glsl) +T smoothstep(T min, T max, T x) +{ + let t = saturate((x - min) / (max - min)); + return t * t * (T(3.0f) - (t + t)); +} __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) @@ -2113,10 +2207,10 @@ matrix<T, N, M> sqrt(matrix<T, N, M> x) __generic<T : __BuiltinFloatingPointType> __target_intrinsic(hlsl) __target_intrinsic(glsl) -T step(T y, T x); -/*{ +T step(T y, T x) +{ return x < y ? T(0.0f) : T(1.0f); -}*/ +} __generic<T : __BuiltinFloatingPointType, let N : int> __target_intrinsic(hlsl) diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index dcca7d25e..e613f5462 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -15,6 +15,7 @@ #include "slang-ir-specialize.h" #include "slang-ir-specialize-resources.h" #include "slang-ir-ssa.h" +#include "slang-ir-strip-witness-tables.h" #include "slang-ir-union.h" #include "slang-ir-validate.h" #include "slang-ir-wrap-structured-buffers.h" @@ -441,6 +442,16 @@ Result linkAndOptimizeIR( break; } + // For all targets that don't support true dynamic dispatch through + // witness tables (that is all targets at present), we need + // to eliminate witness tables from the IR so that they + // don't keep symbols live that we don't actually need. + stripWitnessTables(irModule); +#if 0 + dumpIRIfEnabled(compileRequest, irModule, "AFTER STRIP WITNESS TABLES"); +#endif + validateIRModuleIfEnabled(compileRequest, irModule); + // The resource-based specialization pass above // may create specialized versions of functions, but // it does not try to completely eliminate the original diff --git a/source/slang/slang-ir-strip-witness-tables.cpp b/source/slang/slang-ir-strip-witness-tables.cpp new file mode 100644 index 000000000..8536508ba --- /dev/null +++ b/source/slang/slang-ir-strip-witness-tables.cpp @@ -0,0 +1,33 @@ +// slang-ir-strip-witness-tables.cpp +#include "slang-ir-strip-witness-tables.h" + +#include "slang-ir.h" +#include "slang-ir-insts.h" + +namespace Slang +{ + +void stripWitnessTables(IRModule* module) +{ + // Our goal here is to empty out any witness tables in + // the IR so that they don't keep other symbols alive + // further into compilation. Luckily we expect all + // witness tables to live directly at the global scope + // (or inside of a generic, which we can ignore for + // now because the emit logic also ignores generics), + // and there is a single function we can call to + // remove all of the content from the witness tables + // (since the key-value associations are stored as + // children of each table). + + for( auto inst : module->getGlobalInsts() ) + { + auto witnessTable = as<IRWitnessTable>(inst); + if(!witnessTable) + continue; + + witnessTable->removeAndDeallocateAllDecorationsAndChildren(); + } +} + +}
\ No newline at end of file diff --git a/source/slang/slang-ir-strip-witness-tables.h b/source/slang/slang-ir-strip-witness-tables.h new file mode 100644 index 000000000..43bd0127d --- /dev/null +++ b/source/slang/slang-ir-strip-witness-tables.h @@ -0,0 +1,10 @@ +// slang-ir-strip-witness-tables.cpp +#pragma once + +namespace Slang +{ +struct IRModule; + + /// Strip the contents of all witness table instructions from the given IR `module` +void stripWitnessTables(IRModule* module); +}
\ No newline at end of file diff --git a/source/slang/slang.vcxproj b/source/slang/slang.vcxproj index 76cffe08b..2f55fffdc 100644 --- a/source/slang/slang.vcxproj +++ b/source/slang/slang.vcxproj @@ -230,6 +230,7 @@ <ClInclude Include="slang-ir-specialize.h" /> <ClInclude Include="slang-ir-ssa.h" /> <ClInclude Include="slang-ir-string-hash.h" /> + <ClInclude Include="slang-ir-strip-witness-tables.h" /> <ClInclude Include="slang-ir-strip.h" /> <ClInclude Include="slang-ir-type-set.h" /> <ClInclude Include="slang-ir-union.h" /> @@ -313,6 +314,7 @@ <ClCompile Include="slang-ir-specialize.cpp" /> <ClCompile Include="slang-ir-ssa.cpp" /> <ClCompile Include="slang-ir-string-hash.cpp" /> + <ClCompile Include="slang-ir-strip-witness-tables.cpp" /> <ClCompile Include="slang-ir-strip.cpp" /> <ClCompile Include="slang-ir-type-set.cpp" /> <ClCompile Include="slang-ir-union.cpp" /> diff --git a/source/slang/slang.vcxproj.filters b/source/slang/slang.vcxproj.filters index 449f72069..7a0f465fe 100644 --- a/source/slang/slang.vcxproj.filters +++ b/source/slang/slang.vcxproj.filters @@ -1,4 +1,4 @@ -<?xml version="1.0" encoding="utf-8"?> +<?xml version="1.0" encoding="utf-8"?> <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <ItemGroup> <Filter Include="Header Files"> @@ -249,6 +249,9 @@ <ClInclude Include="slang-visitor.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="slang-ir-strip-witness-tables.h"> + <Filter>Header Files</Filter> + </ClInclude> </ItemGroup> <ItemGroup> <ClCompile Include="slang-check-conformance.cpp"> @@ -467,6 +470,9 @@ <ClCompile Include="slang.cpp"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="slang-ir-strip-witness-tables.cpp"> + <Filter>Source Files</Filter> + </ClCompile> </ItemGroup> <ItemGroup> <None Include="..\core\core.natvis"> |
