From 3e312b3062ab493c80d7d7eddf43c94ec59ecdb7 Mon Sep 17 00:00:00 2001
From: jsmall-nvidia <jsmall@nvidia.com>
Date: Thu, 10 Nov 2022 18:38:19 -0500
Subject: Improvements to NVRTC diagnostic parsing (#2504)

* #include an absolute path didn't work - because paths were taken to always be relative.

* Float16 support for C++/CPU based targets with f16tof32 and f32tof16.

* Small correction around INF/NAN handling for f32tof16

* Small improvement to f16tof32

* Disable CUDA test for now.

* Improvements to NVRTC diagnostic parsing.
Handle compilerSpecificArgs.
Fix issue with terminating nul ending up in diagnostic string.

* Improved NVRTC error parsing.
f32tof16 and f16tof32 work in principal on CUDA.

* Small update to test, although they remain disabled.

* Work around SLANG_E_NOT_AVAILABLE being turned into ignored, when a legitimate error is found

* A more tightly constrained fallback NVRTC diagnostic parsing.

* Remove CharUtil, as not neeed.

Co-authored-by: Yong He <yonghe@outlook.com>
---
 source/slang/hlsl.meta.slang | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'source/slang')
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index ec2e6de95..1cff7d6f3 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -1742,6 +1742,8 @@ matrix<T,N,M> exp2(matrix<T,N,M> x)
 __target_intrinsic(glsl, "unpackHalf2x16($0).x")
 __glsl_version(420)
 __target_intrinsic(hlsl)
+__cuda_sm_version(6.0)
+__target_intrinsic(cuda, "__half2float(__short_as_half($0))")
 float f16tof32(uint value);
 
 __generic<let N : int>
@@ -1757,6 +1759,8 @@ vector<float, N> f16tof32(vector<uint, N> value)
 __target_intrinsic(glsl, "packHalf2x16(vec2($0,0.0))")
 __glsl_version(420)
 __target_intrinsic(hlsl)
+__cuda_sm_version(6.0)
+__target_intrinsic(cuda, "__half_as_ushort(__float2half($0))")
 uint f32tof16(float value);
 
 __generic<let N : int>
-- 
cgit v1.2.3