From 3e312b3062ab493c80d7d7eddf43c94ec59ecdb7 Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Thu, 10 Nov 2022 18:38:19 -0500 Subject: Improvements to NVRTC diagnostic parsing (#2504) * #include an absolute path didn't work - because paths were taken to always be relative. * Float16 support for C++/CPU based targets with f16tof32 and f32tof16. * Small correction around INF/NAN handling for f32tof16 * Small improvement to f16tof32 * Disable CUDA test for now. * Improvements to NVRTC diagnostic parsing. Handle compilerSpecificArgs. Fix issue with terminating nul ending up in diagnostic string. * Improved NVRTC error parsing. f32tof16 and f16tof32 work in principal on CUDA. * Small update to test, although they remain disabled. * Work around SLANG_E_NOT_AVAILABLE being turned into ignored, when a legitimate error is found * A more tightly constrained fallback NVRTC diagnostic parsing. * Remove CharUtil, as not neeed. Co-authored-by: Yong He --- source/slang/hlsl.meta.slang | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'source/slang') diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index ec2e6de95..1cff7d6f3 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -1742,6 +1742,8 @@ matrix exp2(matrix x) __target_intrinsic(glsl, "unpackHalf2x16($0).x") __glsl_version(420) __target_intrinsic(hlsl) +__cuda_sm_version(6.0) +__target_intrinsic(cuda, "__half2float(__short_as_half($0))") float f16tof32(uint value); __generic @@ -1757,6 +1759,8 @@ vector f16tof32(vector value) __target_intrinsic(glsl, "packHalf2x16(vec2($0,0.0))") __glsl_version(420) __target_intrinsic(hlsl) +__cuda_sm_version(6.0) +__target_intrinsic(cuda, "__half_as_ushort(__float2half($0))") uint f32tof16(float value); __generic -- cgit v1.2.3