From c2873f406d544057e0ec61e61fb8580ca768e493 Mon Sep 17 00:00:00 2001
From: Tim Foley <tfoleyNV@users.noreply.github.com>
Date: Tue, 1 Sep 2020 18:37:54 -0700
Subject: Mark f32tof16 and f16tof32 as HLSL intrinsics (#1526)

Fixes GitLab issue 85

These functions are intrinsic for HLSL, but were not marked as such, leading to emitting code that manually loops for the vector case. The looping code resulted in lower performance for some users, because apparently dxc was unable (or unwilling?) to unroll the loop, and ended up generating temporary ("stack-allocated") arrays for the vectors produced.

As a longer-term solution, we may need to consider how the `VECTOR_MAP...` and `MATRIX_MAP...` idioms used in the stdlib get lowered, so that we can emit fully-unrolled versions in cases where the vector/matrix shape is known at the time we generate code. This PR does not attempt to address that larger issue.
---
 source/slang/hlsl.meta.slang | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'source')
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 167a60b1e..c89ab8ff9 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -1477,9 +1477,11 @@ matrix<T,N,M> exp2(matrix<T,N,M> x)
 // Convert 16-bit float stored in low bits of integer
 __target_intrinsic(glsl, "unpackHalf2x16($0).x")
 __glsl_version(420)
+__target_intrinsic(hlsl)
 float f16tof32(uint value);
 
 __generic<let N : int>
+__target_intrinsic(hlsl)
 vector<float, N> f16tof32(vector<uint, N> value)
 {
     VECTOR_MAP_UNARY(float, N, f16tof32, value);
@@ -1488,9 +1490,11 @@ vector<float, N> f16tof32(vector<uint, N> value)
 // Convert to 16-bit float stored in low bits of integer
 __target_intrinsic(glsl, "packHalf2x16(vec2($0,0.0))")
 __glsl_version(420)
+__target_intrinsic(hlsl)
 uint f32tof16(float value);
 
 __generic<let N : int>
+__target_intrinsic(hlsl)
 vector<uint, N> f32tof16(vector<float, N> value)
 {
     VECTOR_MAP_UNARY(uint, N, f32tof16, value);
-- 
cgit v1.2.3