21 files changed, 289 insertions, 51 deletions
diff --git a/source/core/slang-downstream-compiler.h b/source/core/slang-downstream-compiler.h
index f9e33ed6c..3ffa32097 100644
--- a/source/core/slang-downstream-compiler.h
+++ b/source/core/slang-downstream-compiler.h
@@ -7,6 +7,7 @@
 #include "slang-process-util.h"
 
 #include "slang-platform.h"
+#include "slang-semantic-version.h"
 
 #include "slang-io.h"
 
@@ -207,6 +208,16 @@ public:
         String value;
     };
 
+    struct CapabilityVersion
+    {
+        enum class Kind
+        {
+            CUDASM,                     ///< What the version is for
+        };
+        Kind kind;
+        SemanticVersion version;
+    };
+
     struct CompileOptions
     {
         typedef uint32_t Flags;
@@ -247,6 +258,8 @@ public:
 
         List<String> includePaths;
         List<String> libraryPaths;
+
+        List<CapabilityVersion> requiredCapabilityVersions;
     };
 
     typedef uint32_t ProductFlags;
diff --git a/source/core/slang-nvrtc-compiler.cpp b/source/core/slang-nvrtc-compiler.cpp
index 5d5a1ce0f..0e167bf80 100644
--- a/source/core/slang-nvrtc-compiler.cpp
+++ b/source/core/slang-nvrtc-compiler.cpp
@@ -10,6 +10,7 @@
 
 #include "slang-io.h"
 #include "slang-shared-library.h"
+#include "slang-semantic-version.h"
 
 namespace nvrtc
 {
@@ -307,14 +308,30 @@ SlangResult NVRTCDownstreamCompiler::compile(const CompileOptions& options, RefP
         // This is arguably too much - but nvrtc does not appear to have a mechanism to switch off individual warnings.
         // I tried the -Xcudafe mechanism but that does not appear to work for nvrtc
         cmdLine.addArg("-w");
+    }
 
-        //
-#if 0
-        cmdLine.addArg("-arch=compute_70");
-#else
-        // Needed for Warp intrinsics
-        cmdLine.addArg("-arch=compute_30");
-#endif
+    {
+        // Lowest supported is 3.0
+        SemanticVersion version(3);
+        for (const auto& capabilityVersion : options.requiredCapabilityVersions)
+        {
+            if (capabilityVersion.kind == DownstreamCompiler::CapabilityVersion::Kind::CUDASM)
+            {
+                if (capabilityVersion.version > version)
+                {
+                    version = capabilityVersion.version;
+                }
+            }
+        }
+
+        StringBuilder builder;
+        builder << "-arch=compute_";
+        builder << version.m_major;
+
+        SLANG_ASSERT(version.m_minor >= 0 && version.m_minor <= 9);
+        builder << char('0' + version.m_minor);
+
+        cmdLine.addArg(builder);
     }
 
     nvrtcProgram program = nullptr;
diff --git a/source/core/slang-semantic-version.cpp b/source/core/slang-semantic-version.cpp
index 93536e007..7f603fd9c 100644
--- a/source/core/slang-semantic-version.cpp
+++ b/source/core/slang-semantic-version.cpp
@@ -7,13 +7,13 @@
 
 namespace Slang {
 
-SlangResult SemanticVersion::parse(const UnownedStringSlice& value, SemanticVersion& outVersion)
+SlangResult SemanticVersion::parse(const UnownedStringSlice& value, char separatorChar, SemanticVersion& outVersion)
 {
     outVersion.reset();
 
     UnownedStringSlice slices[3];
     Index splitCount;
-    SLANG_RETURN_ON_FAIL(StringUtil::split(value, '.', 3, slices, splitCount));
+    SLANG_RETURN_ON_FAIL(StringUtil::split(value, separatorChar, 3, slices, splitCount));
     if (splitCount <= 0)
     {
         return SLANG_FAIL;
@@ -38,6 +38,11 @@ SlangResult SemanticVersion::parse(const UnownedStringSlice& value, SemanticVers
     return SLANG_OK;
 }
 
+SlangResult SemanticVersion::parse(const UnownedStringSlice& value,  SemanticVersion& outVersion)
+{
+    return parse(value, '.', outVersion);
+}
+
 void SemanticVersion::append(StringBuilder& buf) const
 {
     buf << Int32(m_major) << "." << Int32(m_minor);
diff --git a/source/core/slang-semantic-version.h b/source/core/slang-semantic-version.h
index bbfcb663e..d33116de6 100644
--- a/source/core/slang-semantic-version.h
+++ b/source/core/slang-semantic-version.h
@@ -15,9 +15,9 @@ struct SemanticVersion
 
     SemanticVersion():m_major(0), m_minor(0), m_patch(0) {}
     SemanticVersion(int inMajor, int inMinor = 0, int inPatch = 0):
-        m_major(uint8_t(inMajor)),
-        m_minor(uint8_t(inMinor)),
-        m_patch(uint8_t(inPatch))
+        m_major(uint32_t(inMajor)),
+        m_minor(uint16_t(inMinor)),
+        m_patch(uint16_t(inPatch))
     {}
 
     void reset()
@@ -27,15 +27,26 @@ struct SemanticVersion
         m_patch = 0;
     }
 
+        /// All zeros means nothing is set
+    bool isSet() const { return m_major || m_minor || m_patch; }
+
     IntegerType toInteger() const { return (IntegerType(m_major) << 32) | (uint32_t(m_minor) << 16) | m_patch; }
     void setFromInteger(IntegerType v)
     {
-        m_major = (v >> 32);
-        m_minor = uint16_t(v >> 16);
-        m_patch = uint16_t(v);
+        set(int(v >> 32), int((v >> 16) & 0xffff), int(v & 0xffff));
+    }
+    void set(int major, int minor, int patch = 0)
+    {
+        SLANG_ASSERT(major >= 0 && minor >=0 && patch >= 0);
+
+        m_major = uint32_t(major);
+        m_minor = uint16_t(minor);
+        m_patch = uint16_t(patch);
     }
 
     static SlangResult parse(const UnownedStringSlice& value, SemanticVersion& outVersion);
+    static SlangResult parse(const UnownedStringSlice& value, char separatorChar, SemanticVersion& outVersion);
+
     void append(StringBuilder& buf) const;
 
     bool operator>(const ThisType& rhs) const { return toInteger() > rhs.toInteger(); }
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index e2e745773..f82f7b5f4 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -2616,15 +2616,18 @@ __generic<T : __BuiltinType>
 __glsl_extension(GL_KHR_shader_subgroup_vote)
 __spirv_version(1.3)
 __target_intrinsic(glsl, "subgroupAllEqual($0)")
+__cuda_sm_version(7.0)
 __target_intrinsic(cuda, "_waveAllEqual($0)")
 bool WaveActiveAllEqual(T value);
 __generic<T : __BuiltinType, let N : int> 
 __glsl_extension(GL_KHR_shader_subgroup_vote)
 __spirv_version(1.3)
 __target_intrinsic(glsl, "subgroupAllEqual($0)")
+__cuda_sm_version(7.0)
 __target_intrinsic(cuda, "_waveAllEqualMultiple($0)")
 bool WaveActiveAllEqual(vector<T,N> value);
 __generic<T : __BuiltinType, let N : int, let M : int>
+__cuda_sm_version(7.0)
 __target_intrinsic(cuda, "_waveAllEqualMultiple($0)")
 bool WaveActiveAllEqual(matrix<T,N,M> value);
 
@@ -2796,14 +2799,17 @@ uint WavePrefixCountBits(bool value);
 
 __generic<T : __BuiltinType>
 __target_intrinsic(hlsl)
+__cuda_sm_version(7.0)
 __target_intrinsic(cuda, "_waveMatchScalar($0)")
 uint4 WaveMatch(T value);
 __generic<T : __BuiltinType, let N : int>
 __target_intrinsic(hlsl)
+__cuda_sm_version(7.0)
 __target_intrinsic(cuda, "_waveMatchMultiple($0)")
 uint4 WaveMatch(vector<T,N> value);
 __generic<T : __BuiltinType, let N : int, let M : int>
 __target_intrinsic(hlsl)
+__cuda_sm_version(7.0)
 __target_intrinsic(cuda, "_waveMatchMultiple($0)")
 uint4 WaveMatch(matrix<T,N,M> value);
 
diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp
index 7eb5f145b..53a028483 100644
--- a/source/slang/slang-compiler.cpp
+++ b/source/slang/slang-compiler.cpp
@@ -22,6 +22,7 @@
 #include "slang-emit.h"
 
 #include "slang-glsl-extension-tracker.h"
+#include "slang-emit-cuda.h"
 
 #include "slang-ir-serialize.h"
 
@@ -1292,6 +1293,19 @@ SlangResult dissassembleDXILUsingDXC(
             SourceResult source;
             SLANG_RETURN_ON_FAIL(emitEntryPointSource(slangRequest, entryPointIndex, targetReq, sourceTarget, endToEndReq, source));
 
+            // Look for the version
+            if (auto cudaTracker = as<CUDAExtensionTracker>(source.extensionTracker))
+            {
+                if (cudaTracker->m_smVersion.isSet())
+                {
+                    DownstreamCompiler::CapabilityVersion version;
+                    version.kind = DownstreamCompiler::CapabilityVersion::Kind::CUDASM;
+                    version.version = cudaTracker->m_smVersion;
+
+                    options.requiredCapabilityVersions.add(version);
+                }
+            }
+
             options.sourceContents = source.source;
             
             maybeDumpIntermediate(slangRequest, options.sourceContents.getBuffer(), sourceTarget);
diff --git a/source/slang/slang-diagnostic-defs.h b/source/slang/slang-diagnostic-defs.h
index a28bfe77e..6e77aa45d 100644
--- a/source/slang/slang-diagnostic-defs.h
+++ b/source/slang/slang-diagnostic-defs.h
@@ -208,7 +208,7 @@ DIAGNOSTIC(20004, Error, unexpectedTokenExpectedComponentDefinition, "unexpected
 DIAGNOSTIC(20008, Error, invalidOperator, "invalid operator '$0'.");
 DIAGNOSTIC(20011, Error, unexpectedColon, "unexpected ':'.")
 DIAGNOSTIC(20012, Error, invalidSPIRVVersion, "Expecting SPIR-V version as either 'major.minor', or quoted if has patch (eg for SPIR-V 1.2, '1.2' or \"1.2\"')")
-
+DIAGNOSTIC(20013, Error, invalidCUDASMVersion, "Expecting CUDA SM version as either 'major.minor', or quoted if has patch (eg for '7.0' or \"7.0\"')")
 //
 // 3xxxx - Semantic analysis
 //
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index 64cb240fc..a728df755 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -375,6 +375,32 @@ bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
     return Super::tryEmitInstExprImpl(inst, inOuterPrec);
 }
 
+void CUDASourceEmitter::handleCallExprDecorationsImpl(IRInst* funcValue)
+{
+    // Does this function declare any requirements on GLSL version or
+    // extensions, which should affect our output?
+
+    auto decoratedValue = funcValue;
+    while (auto specInst = as<IRSpecialize>(decoratedValue))
+    {
+        decoratedValue = getSpecializedValue(specInst);
+    }
+
+    for (auto decoration : decoratedValue->getDecorations())
+    {
+        if( auto smDecoration = as<IRRequireCUDASMVersionDecoration>(decoration))
+        {
+            SemanticVersion version;
+            version.setFromInteger(SemanticVersion::IntegerType(smDecoration->getCUDASMVersion()));
+
+            if (version > m_extensionTracker->m_smVersion)
+            {
+                m_extensionTracker->m_smVersion = version;
+            }
+        }
+    }
+}
+
 void CUDASourceEmitter::emitLayoutDirectivesImpl(TargetRequest* targetReq)
 {
     SLANG_UNUSED(targetReq);
diff --git a/source/slang/slang-emit-cuda.h b/source/slang/slang-emit-cuda.h
index 3d23fd80f..dce3b4eb8 100644
--- a/source/slang/slang-emit-cuda.h
+++ b/source/slang/slang-emit-cuda.h
@@ -7,6 +7,13 @@
 namespace Slang
 {
 
+class CUDAExtensionTracker : public RefObject
+{
+public:
+
+    SemanticVersion m_smVersion;
+};
+
 class CUDASourceEmitter : public CPPSourceEmitter
 {
 public:
@@ -26,8 +33,11 @@ public:
     static UnownedStringSlice getBuiltinTypeName(IROp op);
     static UnownedStringSlice getVectorPrefix(IROp op);
 
+    virtual RefObject* getExtensionTracker() SLANG_OVERRIDE { return m_extensionTracker; }
+
     CUDASourceEmitter(const Desc& desc) :
-        Super(desc)
+        Super(desc),
+        m_extensionTracker(new CUDAExtensionTracker)
     {}
 
 protected:
@@ -51,6 +61,7 @@ protected:
 
     virtual void emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) SLANG_OVERRIDE;
 
+    virtual void handleCallExprDecorationsImpl(IRInst* funcValue) SLANG_OVERRIDE;
 
     //virtual bool tryEmitGlobalParamImpl(IRGlobalParam* varDecl, IRType* varType) SLANG_OVERRIDE;
     virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE;
@@ -64,6 +75,8 @@ protected:
     virtual SlangResult calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicType* type, StringBuilder& outBuilder) SLANG_OVERRIDE;
     
     SlangResult _calcCUDATextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName);
+
+    RefPtr<CUDAExtensionTracker> m_extensionTracker;
 };
 
 }
diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h
index c6aaf57ca..6c01a700a 100644
--- a/source/slang/slang-ir-inst-defs.h
+++ b/source/slang/slang-ir-inst-defs.h
@@ -404,6 +404,8 @@ INST(HighLevelDeclDecoration,               highLevelDecl,          1, 0)
     INST(RequireSPIRVVersionDecoration,     requireSPIRVVersion,    1, 0)
     INST(RequireGLSLVersionDecoration,      requireGLSLVersion,     1, 0)
     INST(RequireGLSLExtensionDecoration,    requireGLSLExtension,   1, 0)
+    INST(RequireCUDASMVersionDecoration,    requireCUDASMVersion,   1, 0)
+
     INST(ReadNoneDecoration,                readNone,               0, 0)
     INST(VulkanCallablePayloadDecoration,   vulkanCallablePayload,  0, 0)
     INST(EarlyDepthStencilDecoration,       earlyDepthStencil,      0, 0)
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index 448bf9f0e..e307dc41e 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -190,6 +190,18 @@ struct IRRequireSPIRVVersionDecoration : IRDecoration
     }
 };
 
+struct IRRequireCUDASMVersionDecoration : IRDecoration
+{
+    enum { kOp = kIROp_RequireCUDASMVersionDecoration };
+    IR_LEAF_ISA(RequireCUDASMVersionDecoration)
+
+    IRConstant* getCUDASMVersionOperand() { return cast<IRConstant>(getOperand(0)); }
+    IntegerLiteralValue getCUDASMVersion()
+    {
+        return getCUDASMVersionOperand()->value.intVal;
+    }
+};
+
 struct IRRequireGLSLExtensionDecoration : IRDecoration
 {
     enum { kOp = kIROp_RequireGLSLExtensionDecoration };
@@ -2131,6 +2143,12 @@ struct IRBuilder
         addDecoration(value, kIROp_RequireSPIRVVersionDecoration, getIntValue(getBasicType(BaseType::UInt64), intValue));
     }
 
+    void addRequireCUDASMVersionDecoration(IRInst* value, const SemanticVersion& version)
+    {
+        SemanticVersion::IntegerType intValue = version.toInteger();
+        addDecoration(value, kIROp_RequireCUDASMVersionDecoration, getIntValue(getBasicType(BaseType::UInt64), intValue));
+    }
+
     void addPatchConstantFuncDecoration(IRInst* value, IRInst* patchConstantFunc)
     {
         addDecoration(value, kIROp_PatchConstantFuncDecoration, patchConstantFunc);
diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp
index 16dc14819..ea1196a6c 100644
--- a/source/slang/slang-lower-to-ir.cpp
+++ b/source/slang/slang-lower-to-ir.cpp
@@ -6196,7 +6196,10 @@ struct DeclLoweringVisitor : DeclVisitor<DeclLoweringVisitor, LoweredValInfo>
         {
             getBuilder()->addRequireSPIRVVersionDecoration(irFunc, versionMod->version);
         }
-
+        for (auto versionMod : decl->GetModifiersOfType<RequiredCUDASMVersionModifier>())
+        {
+            getBuilder()->addRequireCUDASMVersionDecoration(irFunc, versionMod->version);
+        }
 
         if (auto attr = decl->FindModifier<InstanceAttribute>())
         {
diff --git a/source/slang/slang-modifier-defs.h b/source/slang/slang-modifier-defs.h
index 7ea1d0101..0c7156c72 100644
--- a/source/slang/slang-modifier-defs.h
+++ b/source/slang/slang-modifier-defs.h
@@ -80,6 +80,12 @@ SYNTAX_CLASS(RequiredSPIRVVersionModifier, Modifier)
 FIELD(SemanticVersion, version)
 END_SYNTAX_CLASS()
 
+// A modifier to tag something as an intrinsic that requires
+// a certain CUDA SM version to be enabled when used. Specified as "major.minor"
+SYNTAX_CLASS(RequiredCUDASMVersionModifier, Modifier)
+FIELD(SemanticVersion, version)
+END_SYNTAX_CLASS()
+
 SIMPLE_SYNTAX_CLASS(InOutModifier, OutModifier)
 
 // `__ref` modifier for by-reference parameter passing
@@ -486,4 +492,4 @@ SIMPLE_SYNTAX_CLASS(ExternAttribute, Attribute)
 // An `[__unsafeForceInlineExternal]` attribute indicates that the callee should be inlined
 // into call sites after initial IR generation (that is, as early as possible).
 //
-SIMPLE_SYNTAX_CLASS(UnsafeForceInlineEarlyAttribute, Attribute)
-\ No newline at end of file
+SIMPLE_SYNTAX_CLASS(UnsafeForceInlineEarlyAttribute, Attribute)
diff --git a/source/slang/slang-parser.cpp b/source/slang/slang-parser.cpp
index 46dd617a1..5a99f6ffb 100644
--- a/source/slang/slang-parser.cpp
+++ b/source/slang/slang-parser.cpp
@@ -4819,17 +4819,17 @@ namespace Slang
 
         return modifier;
     }
-    static RefPtr<RefObject> parseSPIRVVersionModifier(Parser* parser, void* /*userData*/)
-    {
-        auto modifier = new RequiredSPIRVVersionModifier();
 
+
+    static SlangResult parseSemanticVersion(Parser* parser, Token& outToken, SemanticVersion& outVersion)
+    {
         parser->ReadToken(TokenType::LParent);
-        Token token = parser->ReadToken();
+        outToken = parser->ReadToken();
         parser->ReadToken(TokenType::RParent);
 
-        UnownedStringSlice content = token.Content;
+        UnownedStringSlice content = outToken.Content;
         // We allow specified as major.minor or as a string (in quotes)
-        switch (token.type)
+        switch (outToken.type)
         {
             case TokenType::FloatingPointLiteral:
             {
@@ -4838,26 +4838,44 @@ namespace Slang
             case TokenType::StringLiteral:
             {
                 // We need to trim quotes if needed
-                SLANG_ASSERT(content.getLength() >= 2 && content[0] == '"' && content[content.getLength() -1] == '"');
+                SLANG_ASSERT(content.getLength() >= 2 && content[0] == '"' && content[content.getLength() - 1] == '"');
                 content = UnownedStringSlice(content.begin() + 1, content.end() - 1);
                 break;
             }
             default:
             {
-                parser->sink->diagnose(token, Diagnostics::invalidSPIRVVersion);
-                return RefPtr<RefObject>();
+                return SLANG_FAIL;
             }
         }
-        
+        return SemanticVersion::parse(content, outVersion);
+    }
+
+    static RefPtr<RefObject> parseSPIRVVersionModifier(Parser* parser, void* /*userData*/)
+    {
+        Token token;
         SemanticVersion version;
-        if (SLANG_FAILED(SemanticVersion::parse(content, modifier->version)))
+        if (SLANG_SUCCEEDED(parseSemanticVersion(parser, token, version)))
         {
-            // Unable to parse the error so fail
-            parser->sink->diagnose(token, Diagnostics::invalidSPIRVVersion);
-            return RefPtr<RefObject>();
+            auto modifier = new RequiredSPIRVVersionModifier();
+            modifier->version = version;
+            return modifier;
         }
+        parser->sink->diagnose(token, Diagnostics::invalidSPIRVVersion);
+        return RefPtr<RefObject>();
+    }
 
-        return modifier;
+    static RefPtr<RefObject> parseCUDASMVersionModifier(Parser* parser, void* /*userData*/)
+    {
+        Token token;
+        SemanticVersion version;
+        if (SLANG_SUCCEEDED(parseSemanticVersion(parser, token, version)))
+        {
+            auto modifier = new RequiredCUDASMVersionModifier();
+            modifier->version = version;
+            return modifier;
+        }
+        parser->sink->diagnose(token, Diagnostics::invalidCUDASMVersion);
+        return RefPtr<RefObject>();
     }
 
     static RefPtr<RefObject> parseLayoutModifier(Parser* parser, void* /*userData*/)
@@ -5149,6 +5167,7 @@ namespace Slang
         MODIFIER(__glsl_extension,  parseGLSLExtensionModifier);
         MODIFIER(__glsl_version,    parseGLSLVersionModifier);
         MODIFIER(__spirv_version,   parseSPIRVVersionModifier);
+        MODIFIER(__cuda_sm_version, parseCUDASMVersionModifier);
 
         MODIFIER(__builtin_type,    parseBuiltinTypeModifier);
         MODIFIER(__magic_type,      parseMagicTypeModifier);
diff --git a/tests/hlsl-intrinsic/wave-equality.slang b/tests/hlsl-intrinsic/wave-equality.slang
index eb9e3e6a3..7ed67b632 100644
--- a/tests/hlsl-intrinsic/wave-equality.slang
+++ b/tests/hlsl-intrinsic/wave-equality.slang
@@ -2,8 +2,7 @@
 //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute
 //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0
 //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute
-// TODO(JS): Requires compute_7_0 which isn't available on all CI systems with CUDA
-//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute
+//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0
 
 //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
 RWStructuredBuffer<int> outputBuffer;
diff --git a/tests/hlsl-intrinsic/wave-multi-prefix.slang b/tests/hlsl-intrinsic/wave-multi-prefix.slang
index fb649d6ef..a1eb0e7a9 100644
--- a/tests/hlsl-intrinsic/wave-multi-prefix.slang
+++ b/tests/hlsl-intrinsic/wave-multi-prefix.slang
@@ -5,8 +5,7 @@
 //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile sm_6_5
 // Disabled because we don't have GLSL intrinsics for these it seems 
 //DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute
-// TODO(JS): Disabled because requires compute_7_0 which isn't available on all CI with CUDA
-//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute
+//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0
 
 //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
 RWStructuredBuffer<int> outputBuffer;
diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp
index 2ea74052f..e8b9e8b32 100644
--- a/tools/render-test/cpu-compute-util.cpp
+++ b/tools/render-test/cpu-compute-util.cpp
@@ -350,6 +350,13 @@ static SlangResult _newTexture(const InputTextureDesc& desc, slang::TypeLayoutRe
     return SLANG_FAIL;
 }
 
+/* static */bool CPUComputeUtil::hasFeature(const UnownedStringSlice& feature)
+{
+    SLANG_UNUSED(feature);
+    // CPU has no specific support requirements
+    return false;
+}
+
 /* static */SlangResult CPUComputeUtil::calcBindings(const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& outContext)
 {
     auto request = compilationAndLayout.output.request;
diff --git a/tools/render-test/cpu-compute-util.h b/tools/render-test/cpu-compute-util.h
index e6e896b6a..c66650506 100644
--- a/tools/render-test/cpu-compute-util.h
+++ b/tools/render-test/cpu-compute-util.h
@@ -49,7 +49,9 @@ struct CPUComputeUtil
         void* m_uniformEntryPointParams;
     };
 
-    
+        /// True if this feature is available on CPU
+    static bool hasFeature(const Slang::UnownedStringSlice& feature);
+
         /// Runs code across run styles and makes sure output buffers match
     static SlangResult checkStyleConsistency(ISlangSharedLibrary* sharedLib, const uint32_t dispatchSize[3], const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout);
 
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp
index af7c0e6c2..48d73fa93 100644
--- a/tools/render-test/cuda/cuda-compute-util.cpp
+++ b/tools/render-test/cuda/cuda-compute-util.cpp
@@ -5,6 +5,7 @@
 
 #include "../../source/core/slang-std-writers.h"
 #include "../../source/core/slang-token-reader.h"
+#include "../../source/core/slang-semantic-version.h"
 
 #include "../bind-location.h"
 
@@ -307,7 +308,7 @@ static int _calcSMCountPerMultiProcessor(int major, int minor)
     return last.coreCount;
 }
 
-static SlangResult _findMaxFlopsDeviceId(int* outDevice)
+static SlangResult _findMaxFlopsDeviceIndex(int* outDeviceIndex)
 {
     int smPerMultiproc = 0;
     int maxPerfDevice = -1;
@@ -360,7 +361,7 @@ static SlangResult _findMaxFlopsDeviceId(int* outDevice)
         return SLANG_FAIL;
     }
 
-    *outDevice = maxPerfDevice;
+    *outDeviceIndex = maxPerfDevice;
     return SLANG_OK;
 }
 
@@ -374,9 +375,13 @@ static SlangResult _initCuda(CUDAReportStyle reportType = CUDAReportStyle::Norma
 class ScopeCUDAContext
 {
 public:
-    ScopeCUDAContext() : m_context(nullptr) {}
+    ScopeCUDAContext() :
+        m_context(nullptr),
+        m_device(-1),
+        m_deviceIndex(-1)
+    {}
 
-    SlangResult init(unsigned int flags, CUdevice device, CUDAReportStyle reportType = CUDAReportStyle::Normal)
+    SlangResult init(unsigned int flags, int deviceIndex, CUDAReportStyle reportType = CUDAReportStyle::Normal)
     {
         SLANG_RETURN_ON_FAIL(_initCuda(reportType));
 
@@ -386,7 +391,10 @@ public:
             m_context = nullptr;
         }
 
-        SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cuCtxCreate(&m_context, flags, device), reportType);
+        m_deviceIndex = deviceIndex;
+        SLANG_CUDA_RETURN_ON_FAIL(cuDeviceGet(&m_device, deviceIndex));
+
+        SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cuCtxCreate(&m_context, flags, m_device), reportType);
         return SLANG_OK;
     }
 
@@ -394,9 +402,8 @@ public:
     {
         SLANG_RETURN_ON_FAIL(_initCuda(reportType));
 
-        int deviceId;
-        SLANG_RETURN_ON_FAIL(_findMaxFlopsDeviceId(&deviceId));
-        SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cudaSetDevice(deviceId), reportType);
+        SLANG_RETURN_ON_FAIL(_findMaxFlopsDeviceIndex(&m_deviceIndex));
+        SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cudaSetDevice(m_deviceIndex), reportType);
 
         if (m_context)
         {
@@ -404,7 +411,9 @@ public:
             m_context = nullptr;
         }
 
-        SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cuCtxCreate(&m_context, flags, deviceId), reportType);
+        SLANG_CUDA_RETURN_ON_FAIL(cuDeviceGet(&m_device, m_deviceIndex));
+
+        SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cuCtxCreate(&m_context, flags, m_device), reportType);
         return SLANG_OK;
     }
 
@@ -417,9 +426,57 @@ public:
     }
     SLANG_FORCE_INLINE operator CUcontext () const { return m_context; }
 
+    int m_deviceIndex;
+    CUdevice m_device;
     CUcontext m_context;
 };
 
+/* static */SlangResult CUDAComputeUtil::parseFeature(const Slang::UnownedStringSlice& feature, bool& outResult)
+{
+    outResult = false;
+
+    if (feature.startsWith("cuda_sm_"))
+    {
+        const UnownedStringSlice versionSlice = UnownedStringSlice(feature.begin() + 8, feature.end());
+        SemanticVersion requiredVersion;
+        SLANG_RETURN_ON_FAIL(SemanticVersion::parse(versionSlice, '_', requiredVersion));
+
+        // Need to get the version from the cuda device
+        ScopeCUDAContext context;
+        SLANG_RETURN_ON_FAIL(context.init(0, CUDAReportStyle::Silent));
+
+        const int deviceIndex = context.m_deviceIndex;
+
+        int computeMode = -1;
+        SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, deviceIndex));
+
+        // If we don't have compute mode availability, we can't execute
+        if (computeMode == cudaComputeModeProhibited)
+        {
+            return SLANG_FAIL;
+        }
+
+        int major, minor;
+        SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&major,  cudaDevAttrComputeCapabilityMajor, deviceIndex));
+        SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, deviceIndex));
+
+        SemanticVersion actualVersion;
+        actualVersion.set(major, minor);
+
+        outResult = actualVersion >= requiredVersion;
+
+        return SLANG_OK;
+    }
+
+    return SLANG_FAIL;
+}
+
+/* static */bool CUDAComputeUtil::hasFeature(const Slang::UnownedStringSlice& feature)
+{
+    bool res;
+    return SLANG_SUCCEEDED(parseFeature(feature, res)) ? res : false;
+}
+
 /* static */bool CUDAComputeUtil::canCreateDevice()
 {
     ScopeCUDAContext context;
diff --git a/tools/render-test/cuda/cuda-compute-util.h b/tools/render-test/cuda/cuda-compute-util.h
index f15c9d4e3..bc3d7d233 100644
--- a/tools/render-test/cuda/cuda-compute-util.h
+++ b/tools/render-test/cuda/cuda-compute-util.h
@@ -46,6 +46,10 @@ struct CUDAComputeUtil
         List<BindSet::Value*> m_buffers;
     };
 
+    static SlangResult parseFeature(const Slang::UnownedStringSlice& feature, bool& outResult);
+
+    static bool hasFeature(const Slang::UnownedStringSlice& feature);
+
     static SlangResult createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr<CUDAResource>& outResource);
 
     static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], Context& outContext);
diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp
index ab041b5bc..1d88ee500 100644
--- a/tools/render-test/render-test-main.cpp
+++ b/tools/render-test/render-test-main.cpp
@@ -544,6 +544,15 @@ static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* sessi
     // If it's CPU testing we don't need a window or a renderer
     if (gOptions.rendererType == RendererType::CPU)
     {
+        // Check we have all the required features
+        for (const auto& renderFeature : gOptions.renderFeatures)
+        {
+            if (!CPUComputeUtil::hasFeature(renderFeature.getUnownedSlice()))
+            {
+                return SLANG_E_NOT_AVAILABLE;
+            }
+        }
+
         ShaderCompilerUtil::OutputAndLayout compilationAndLayout;
         SLANG_RETURN_ON_FAIL(ShaderCompilerUtil::compileWithLayout(session, gOptions.sourcePath, gOptions.compileArgs, gOptions.shaderType, input, compilationAndLayout));
 
@@ -604,12 +613,20 @@ static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* sessi
     }
 
     if (gOptions.rendererType == RendererType::CUDA)
-    {
+    {        
+#if RENDER_TEST_CUDA
+        // Check we have all the required features
+        for (const auto& renderFeature : gOptions.renderFeatures)
+        {
+            if (!CUDAComputeUtil::hasFeature(renderFeature.getUnownedSlice()))
+            {
+                return SLANG_E_NOT_AVAILABLE;
+            }
+        }
+
         ShaderCompilerUtil::OutputAndLayout compilationAndLayout;
         SLANG_RETURN_ON_FAIL(ShaderCompilerUtil::compileWithLayout(session, gOptions.sourcePath, gOptions.compileArgs, gOptions.shaderType, input, compilationAndLayout));
 
-#if RENDER_TEST_CUDA
-
         const uint64_t startTicks = ProcessUtil::getClockTick();
 
         CUDAComputeUtil::Context context;