From 421941993d169c943f2c364bfe9c48b603339fd1 Mon Sep 17 00:00:00 2001 From: Neil Bickford <57467222+NeilBickford-NV@users.noreply.github.com> Date: Tue, 7 Nov 2023 11:47:18 -0800 Subject: CUDA: Fixes for NVRTC 12.x and warp mask ambiguity; adds CC 8.x warp reduction intrinsics. (#3314) * CUDA: Fixes for NVRTC 12.x, warp mask ambiguity; add reduction partial specializations. * Fixes running NVRTC on CUDA 12 without a specified profile (used in testing, e.g. `slang-test -api cuda -category wave`) * Fixes mask ambiguity between getting the lane index from threadId.x and a full mask of threads. * Adds partial specializations for compute capability 8.x warp reduction intrinsics. * Fix formatting --- source/compiler-core/slang-nvrtc-compiler.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'source') diff --git a/source/compiler-core/slang-nvrtc-compiler.cpp b/source/compiler-core/slang-nvrtc-compiler.cpp index daa392120..b84b1a403 100644 --- a/source/compiler-core/slang-nvrtc-compiler.cpp +++ b/source/compiler-core/slang-nvrtc-compiler.cpp @@ -820,15 +820,21 @@ SlangResult NVRTCDownstreamCompiler::compile(const DownstreamCompileOptions& inO { // The lowest supported CUDA architecture version supported - // by NVRTC is `compute_30`. + // by any version of NVRTC we support is `compute_30`. // SemanticVersion version(3); - // Newer releases of NVRTC only support `compute_35` and up - // (with everything before `compute_52` being deprecated). - // - if( m_desc.version.m_major >= 11 ) + // Newer releases of NVRTC only support newer CUDA architectures. + if ( m_desc.version.m_major >= 12 ) + { + // NVRTC in CUDA 12 only supports `compute_50` and up + // (with everything before `compute_52` being deprecated). + version = SemanticVersion(5, 0); + } + else if ( m_desc.version.m_major == 11 ) { + // NVRTC in CUDA 11 only supports `compute_35` and up + // (with everything before `compute_52` being deprecated). version = SemanticVersion(3, 5); } -- cgit v1.2.3