diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2020-03-30 19:23:09 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-03-30 23:23:09 +0000 |
| commit | ea7690558bca71ce3a9453adff4e0135352a352f (patch) | |
| tree | 3eb983d3f8e6b1c215f6d2818a0f3e793ecb4485 | |
| parent | ad5b60c8b5868c69a979779f201748fb7837fdc9 (diff) | |
CUDA version handling (#1301)
* render feature for CUDA compute model.
* Use SemanticVersion type.
* Enable CUDA wave tests that require CUDA SM 7.0.
Provide mechanism for DownstreamCompiler to specify version numbers.
* Enabled wave-equality.slang
* Make CUDA SM version major version not just a single digit.
* Fix assert.
* DownstreamCompiler::Version -> CapabilityVersion
21 files changed, 289 insertions, 51 deletions
diff --git a/source/core/slang-downstream-compiler.h b/source/core/slang-downstream-compiler.h index f9e33ed6c..3ffa32097 100644 --- a/source/core/slang-downstream-compiler.h +++ b/source/core/slang-downstream-compiler.h @@ -7,6 +7,7 @@ #include "slang-process-util.h" #include "slang-platform.h" +#include "slang-semantic-version.h" #include "slang-io.h" @@ -207,6 +208,16 @@ public: String value; }; + struct CapabilityVersion + { + enum class Kind + { + CUDASM, ///< What the version is for + }; + Kind kind; + SemanticVersion version; + }; + struct CompileOptions { typedef uint32_t Flags; @@ -247,6 +258,8 @@ public: List<String> includePaths; List<String> libraryPaths; + + List<CapabilityVersion> requiredCapabilityVersions; }; typedef uint32_t ProductFlags; diff --git a/source/core/slang-nvrtc-compiler.cpp b/source/core/slang-nvrtc-compiler.cpp index 5d5a1ce0f..0e167bf80 100644 --- a/source/core/slang-nvrtc-compiler.cpp +++ b/source/core/slang-nvrtc-compiler.cpp @@ -10,6 +10,7 @@ #include "slang-io.h" #include "slang-shared-library.h" +#include "slang-semantic-version.h" namespace nvrtc { @@ -307,14 +308,30 @@ SlangResult NVRTCDownstreamCompiler::compile(const CompileOptions& options, RefP // This is arguably too much - but nvrtc does not appear to have a mechanism to switch off individual warnings. // I tried the -Xcudafe mechanism but that does not appear to work for nvrtc cmdLine.addArg("-w"); + } - // -#if 0 - cmdLine.addArg("-arch=compute_70"); -#else - // Needed for Warp intrinsics - cmdLine.addArg("-arch=compute_30"); -#endif + { + // Lowest supported is 3.0 + SemanticVersion version(3); + for (const auto& capabilityVersion : options.requiredCapabilityVersions) + { + if (capabilityVersion.kind == DownstreamCompiler::CapabilityVersion::Kind::CUDASM) + { + if (capabilityVersion.version > version) + { + version = capabilityVersion.version; + } + } + } + + StringBuilder builder; + builder << "-arch=compute_"; + builder << version.m_major; + + SLANG_ASSERT(version.m_minor >= 0 && version.m_minor <= 9); + builder << char('0' + version.m_minor); + + cmdLine.addArg(builder); } nvrtcProgram program = nullptr; diff --git a/source/core/slang-semantic-version.cpp b/source/core/slang-semantic-version.cpp index 93536e007..7f603fd9c 100644 --- a/source/core/slang-semantic-version.cpp +++ b/source/core/slang-semantic-version.cpp @@ -7,13 +7,13 @@ namespace Slang { -SlangResult SemanticVersion::parse(const UnownedStringSlice& value, SemanticVersion& outVersion) +SlangResult SemanticVersion::parse(const UnownedStringSlice& value, char separatorChar, SemanticVersion& outVersion) { outVersion.reset(); UnownedStringSlice slices[3]; Index splitCount; - SLANG_RETURN_ON_FAIL(StringUtil::split(value, '.', 3, slices, splitCount)); + SLANG_RETURN_ON_FAIL(StringUtil::split(value, separatorChar, 3, slices, splitCount)); if (splitCount <= 0) { return SLANG_FAIL; @@ -38,6 +38,11 @@ SlangResult SemanticVersion::parse(const UnownedStringSlice& value, SemanticVers return SLANG_OK; } +SlangResult SemanticVersion::parse(const UnownedStringSlice& value, SemanticVersion& outVersion) +{ + return parse(value, '.', outVersion); +} + void SemanticVersion::append(StringBuilder& buf) const { buf << Int32(m_major) << "." << Int32(m_minor); diff --git a/source/core/slang-semantic-version.h b/source/core/slang-semantic-version.h index bbfcb663e..d33116de6 100644 --- a/source/core/slang-semantic-version.h +++ b/source/core/slang-semantic-version.h @@ -15,9 +15,9 @@ struct SemanticVersion SemanticVersion():m_major(0), m_minor(0), m_patch(0) {} SemanticVersion(int inMajor, int inMinor = 0, int inPatch = 0): - m_major(uint8_t(inMajor)), - m_minor(uint8_t(inMinor)), - m_patch(uint8_t(inPatch)) + m_major(uint32_t(inMajor)), + m_minor(uint16_t(inMinor)), + m_patch(uint16_t(inPatch)) {} void reset() @@ -27,15 +27,26 @@ struct SemanticVersion m_patch = 0; } + /// All zeros means nothing is set + bool isSet() const { return m_major || m_minor || m_patch; } + IntegerType toInteger() const { return (IntegerType(m_major) << 32) | (uint32_t(m_minor) << 16) | m_patch; } void setFromInteger(IntegerType v) { - m_major = (v >> 32); - m_minor = uint16_t(v >> 16); - m_patch = uint16_t(v); + set(int(v >> 32), int((v >> 16) & 0xffff), int(v & 0xffff)); + } + void set(int major, int minor, int patch = 0) + { + SLANG_ASSERT(major >= 0 && minor >=0 && patch >= 0); + + m_major = uint32_t(major); + m_minor = uint16_t(minor); + m_patch = uint16_t(patch); } static SlangResult parse(const UnownedStringSlice& value, SemanticVersion& outVersion); + static SlangResult parse(const UnownedStringSlice& value, char separatorChar, SemanticVersion& outVersion); + void append(StringBuilder& buf) const; bool operator>(const ThisType& rhs) const { return toInteger() > rhs.toInteger(); } diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index e2e745773..f82f7b5f4 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2616,15 +2616,18 @@ __generic<T : __BuiltinType> __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupAllEqual($0)") +__cuda_sm_version(7.0) __target_intrinsic(cuda, "_waveAllEqual($0)") bool WaveActiveAllEqual(T value); __generic<T : __BuiltinType, let N : int> __glsl_extension(GL_KHR_shader_subgroup_vote) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupAllEqual($0)") +__cuda_sm_version(7.0) __target_intrinsic(cuda, "_waveAllEqualMultiple($0)") bool WaveActiveAllEqual(vector<T,N> value); __generic<T : __BuiltinType, let N : int, let M : int> +__cuda_sm_version(7.0) __target_intrinsic(cuda, "_waveAllEqualMultiple($0)") bool WaveActiveAllEqual(matrix<T,N,M> value); @@ -2796,14 +2799,17 @@ uint WavePrefixCountBits(bool value); __generic<T : __BuiltinType> __target_intrinsic(hlsl) +__cuda_sm_version(7.0) __target_intrinsic(cuda, "_waveMatchScalar($0)") uint4 WaveMatch(T value); __generic<T : __BuiltinType, let N : int> __target_intrinsic(hlsl) +__cuda_sm_version(7.0) __target_intrinsic(cuda, "_waveMatchMultiple($0)") uint4 WaveMatch(vector<T,N> value); __generic<T : __BuiltinType, let N : int, let M : int> __target_intrinsic(hlsl) +__cuda_sm_version(7.0) __target_intrinsic(cuda, "_waveMatchMultiple($0)") uint4 WaveMatch(matrix<T,N,M> value); diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp index 7eb5f145b..53a028483 100644 --- a/source/slang/slang-compiler.cpp +++ b/source/slang/slang-compiler.cpp @@ -22,6 +22,7 @@ #include "slang-emit.h" #include "slang-glsl-extension-tracker.h" +#include "slang-emit-cuda.h" #include "slang-ir-serialize.h" @@ -1292,6 +1293,19 @@ SlangResult dissassembleDXILUsingDXC( SourceResult source; SLANG_RETURN_ON_FAIL(emitEntryPointSource(slangRequest, entryPointIndex, targetReq, sourceTarget, endToEndReq, source)); + // Look for the version + if (auto cudaTracker = as<CUDAExtensionTracker>(source.extensionTracker)) + { + if (cudaTracker->m_smVersion.isSet()) + { + DownstreamCompiler::CapabilityVersion version; + version.kind = DownstreamCompiler::CapabilityVersion::Kind::CUDASM; + version.version = cudaTracker->m_smVersion; + + options.requiredCapabilityVersions.add(version); + } + } + options.sourceContents = source.source; maybeDumpIntermediate(slangRequest, options.sourceContents.getBuffer(), sourceTarget); diff --git a/source/slang/slang-diagnostic-defs.h b/source/slang/slang-diagnostic-defs.h index a28bfe77e..6e77aa45d 100644 --- a/source/slang/slang-diagnostic-defs.h +++ b/source/slang/slang-diagnostic-defs.h @@ -208,7 +208,7 @@ DIAGNOSTIC(20004, Error, unexpectedTokenExpectedComponentDefinition, "unexpected DIAGNOSTIC(20008, Error, invalidOperator, "invalid operator '$0'."); DIAGNOSTIC(20011, Error, unexpectedColon, "unexpected ':'.") DIAGNOSTIC(20012, Error, invalidSPIRVVersion, "Expecting SPIR-V version as either 'major.minor', or quoted if has patch (eg for SPIR-V 1.2, '1.2' or \"1.2\"')") - +DIAGNOSTIC(20013, Error, invalidCUDASMVersion, "Expecting CUDA SM version as either 'major.minor', or quoted if has patch (eg for '7.0' or \"7.0\"')") // // 3xxxx - Semantic analysis // diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp index 64cb240fc..a728df755 100644 --- a/source/slang/slang-emit-cuda.cpp +++ b/source/slang/slang-emit-cuda.cpp @@ -375,6 +375,32 @@ bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu return Super::tryEmitInstExprImpl(inst, inOuterPrec); } +void CUDASourceEmitter::handleCallExprDecorationsImpl(IRInst* funcValue) +{ + // Does this function declare any requirements on GLSL version or + // extensions, which should affect our output? + + auto decoratedValue = funcValue; + while (auto specInst = as<IRSpecialize>(decoratedValue)) + { + decoratedValue = getSpecializedValue(specInst); + } + + for (auto decoration : decoratedValue->getDecorations()) + { + if( auto smDecoration = as<IRRequireCUDASMVersionDecoration>(decoration)) + { + SemanticVersion version; + version.setFromInteger(SemanticVersion::IntegerType(smDecoration->getCUDASMVersion())); + + if (version > m_extensionTracker->m_smVersion) + { + m_extensionTracker->m_smVersion = version; + } + } + } +} + void CUDASourceEmitter::emitLayoutDirectivesImpl(TargetRequest* targetReq) { SLANG_UNUSED(targetReq); diff --git a/source/slang/slang-emit-cuda.h b/source/slang/slang-emit-cuda.h index 3d23fd80f..dce3b4eb8 100644 --- a/source/slang/slang-emit-cuda.h +++ b/source/slang/slang-emit-cuda.h @@ -7,6 +7,13 @@ namespace Slang { +class CUDAExtensionTracker : public RefObject +{ +public: + + SemanticVersion m_smVersion; +}; + class CUDASourceEmitter : public CPPSourceEmitter { public: @@ -26,8 +33,11 @@ public: static UnownedStringSlice getBuiltinTypeName(IROp op); static UnownedStringSlice getVectorPrefix(IROp op); + virtual RefObject* getExtensionTracker() SLANG_OVERRIDE { return m_extensionTracker; } + CUDASourceEmitter(const Desc& desc) : - Super(desc) + Super(desc), + m_extensionTracker(new CUDAExtensionTracker) {} protected: @@ -51,6 +61,7 @@ protected: virtual void emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) SLANG_OVERRIDE; + virtual void handleCallExprDecorationsImpl(IRInst* funcValue) SLANG_OVERRIDE; //virtual bool tryEmitGlobalParamImpl(IRGlobalParam* varDecl, IRType* varType) SLANG_OVERRIDE; virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE; @@ -64,6 +75,8 @@ protected: virtual SlangResult calcScalarFuncName(HLSLIntrinsic::Op op, IRBasicType* type, StringBuilder& outBuilder) SLANG_OVERRIDE; SlangResult _calcCUDATextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName); + + RefPtr<CUDAExtensionTracker> m_extensionTracker; }; } diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h index c6aaf57ca..6c01a700a 100644 --- a/source/slang/slang-ir-inst-defs.h +++ b/source/slang/slang-ir-inst-defs.h @@ -404,6 +404,8 @@ INST(HighLevelDeclDecoration, highLevelDecl, 1, 0) INST(RequireSPIRVVersionDecoration, requireSPIRVVersion, 1, 0) INST(RequireGLSLVersionDecoration, requireGLSLVersion, 1, 0) INST(RequireGLSLExtensionDecoration, requireGLSLExtension, 1, 0) + INST(RequireCUDASMVersionDecoration, requireCUDASMVersion, 1, 0) + INST(ReadNoneDecoration, readNone, 0, 0) INST(VulkanCallablePayloadDecoration, vulkanCallablePayload, 0, 0) INST(EarlyDepthStencilDecoration, earlyDepthStencil, 0, 0) diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index 448bf9f0e..e307dc41e 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -190,6 +190,18 @@ struct IRRequireSPIRVVersionDecoration : IRDecoration } }; +struct IRRequireCUDASMVersionDecoration : IRDecoration +{ + enum { kOp = kIROp_RequireCUDASMVersionDecoration }; + IR_LEAF_ISA(RequireCUDASMVersionDecoration) + + IRConstant* getCUDASMVersionOperand() { return cast<IRConstant>(getOperand(0)); } + IntegerLiteralValue getCUDASMVersion() + { + return getCUDASMVersionOperand()->value.intVal; + } +}; + struct IRRequireGLSLExtensionDecoration : IRDecoration { enum { kOp = kIROp_RequireGLSLExtensionDecoration }; @@ -2131,6 +2143,12 @@ struct IRBuilder addDecoration(value, kIROp_RequireSPIRVVersionDecoration, getIntValue(getBasicType(BaseType::UInt64), intValue)); } + void addRequireCUDASMVersionDecoration(IRInst* value, const SemanticVersion& version) + { + SemanticVersion::IntegerType intValue = version.toInteger(); + addDecoration(value, kIROp_RequireCUDASMVersionDecoration, getIntValue(getBasicType(BaseType::UInt64), intValue)); + } + void addPatchConstantFuncDecoration(IRInst* value, IRInst* patchConstantFunc) { addDecoration(value, kIROp_PatchConstantFuncDecoration, patchConstantFunc); diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index 16dc14819..ea1196a6c 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -6196,7 +6196,10 @@ struct DeclLoweringVisitor : DeclVisitor<DeclLoweringVisitor, LoweredValInfo> { getBuilder()->addRequireSPIRVVersionDecoration(irFunc, versionMod->version); } - + for (auto versionMod : decl->GetModifiersOfType<RequiredCUDASMVersionModifier>()) + { + getBuilder()->addRequireCUDASMVersionDecoration(irFunc, versionMod->version); + } if (auto attr = decl->FindModifier<InstanceAttribute>()) { diff --git a/source/slang/slang-modifier-defs.h b/source/slang/slang-modifier-defs.h index 7ea1d0101..0c7156c72 100644 --- a/source/slang/slang-modifier-defs.h +++ b/source/slang/slang-modifier-defs.h @@ -80,6 +80,12 @@ SYNTAX_CLASS(RequiredSPIRVVersionModifier, Modifier) FIELD(SemanticVersion, version) END_SYNTAX_CLASS() +// A modifier to tag something as an intrinsic that requires +// a certain CUDA SM version to be enabled when used. Specified as "major.minor" +SYNTAX_CLASS(RequiredCUDASMVersionModifier, Modifier) +FIELD(SemanticVersion, version) +END_SYNTAX_CLASS() + SIMPLE_SYNTAX_CLASS(InOutModifier, OutModifier) // `__ref` modifier for by-reference parameter passing @@ -486,4 +492,4 @@ SIMPLE_SYNTAX_CLASS(ExternAttribute, Attribute) // An `[__unsafeForceInlineExternal]` attribute indicates that the callee should be inlined // into call sites after initial IR generation (that is, as early as possible). // -SIMPLE_SYNTAX_CLASS(UnsafeForceInlineEarlyAttribute, Attribute)
\ No newline at end of file +SIMPLE_SYNTAX_CLASS(UnsafeForceInlineEarlyAttribute, Attribute) diff --git a/source/slang/slang-parser.cpp b/source/slang/slang-parser.cpp index 46dd617a1..5a99f6ffb 100644 --- a/source/slang/slang-parser.cpp +++ b/source/slang/slang-parser.cpp @@ -4819,17 +4819,17 @@ namespace Slang return modifier; } - static RefPtr<RefObject> parseSPIRVVersionModifier(Parser* parser, void* /*userData*/) - { - auto modifier = new RequiredSPIRVVersionModifier(); + + static SlangResult parseSemanticVersion(Parser* parser, Token& outToken, SemanticVersion& outVersion) + { parser->ReadToken(TokenType::LParent); - Token token = parser->ReadToken(); + outToken = parser->ReadToken(); parser->ReadToken(TokenType::RParent); - UnownedStringSlice content = token.Content; + UnownedStringSlice content = outToken.Content; // We allow specified as major.minor or as a string (in quotes) - switch (token.type) + switch (outToken.type) { case TokenType::FloatingPointLiteral: { @@ -4838,26 +4838,44 @@ namespace Slang case TokenType::StringLiteral: { // We need to trim quotes if needed - SLANG_ASSERT(content.getLength() >= 2 && content[0] == '"' && content[content.getLength() -1] == '"'); + SLANG_ASSERT(content.getLength() >= 2 && content[0] == '"' && content[content.getLength() - 1] == '"'); content = UnownedStringSlice(content.begin() + 1, content.end() - 1); break; } default: { - parser->sink->diagnose(token, Diagnostics::invalidSPIRVVersion); - return RefPtr<RefObject>(); + return SLANG_FAIL; } } - + return SemanticVersion::parse(content, outVersion); + } + + static RefPtr<RefObject> parseSPIRVVersionModifier(Parser* parser, void* /*userData*/) + { + Token token; SemanticVersion version; - if (SLANG_FAILED(SemanticVersion::parse(content, modifier->version))) + if (SLANG_SUCCEEDED(parseSemanticVersion(parser, token, version))) { - // Unable to parse the error so fail - parser->sink->diagnose(token, Diagnostics::invalidSPIRVVersion); - return RefPtr<RefObject>(); + auto modifier = new RequiredSPIRVVersionModifier(); + modifier->version = version; + return modifier; } + parser->sink->diagnose(token, Diagnostics::invalidSPIRVVersion); + return RefPtr<RefObject>(); + } - return modifier; + static RefPtr<RefObject> parseCUDASMVersionModifier(Parser* parser, void* /*userData*/) + { + Token token; + SemanticVersion version; + if (SLANG_SUCCEEDED(parseSemanticVersion(parser, token, version))) + { + auto modifier = new RequiredCUDASMVersionModifier(); + modifier->version = version; + return modifier; + } + parser->sink->diagnose(token, Diagnostics::invalidCUDASMVersion); + return RefPtr<RefObject>(); } static RefPtr<RefObject> parseLayoutModifier(Parser* parser, void* /*userData*/) @@ -5149,6 +5167,7 @@ namespace Slang MODIFIER(__glsl_extension, parseGLSLExtensionModifier); MODIFIER(__glsl_version, parseGLSLVersionModifier); MODIFIER(__spirv_version, parseSPIRVVersionModifier); + MODIFIER(__cuda_sm_version, parseCUDASMVersionModifier); MODIFIER(__builtin_type, parseBuiltinTypeModifier); MODIFIER(__magic_type, parseMagicTypeModifier); diff --git a/tests/hlsl-intrinsic/wave-equality.slang b/tests/hlsl-intrinsic/wave-equality.slang index eb9e3e6a3..7ed67b632 100644 --- a/tests/hlsl-intrinsic/wave-equality.slang +++ b/tests/hlsl-intrinsic/wave-equality.slang @@ -2,8 +2,7 @@ //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile cs_6_0 //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -// TODO(JS): Requires compute_7_0 which isn't available on all CI systems with CUDA -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer<int> outputBuffer; diff --git a/tests/hlsl-intrinsic/wave-multi-prefix.slang b/tests/hlsl-intrinsic/wave-multi-prefix.slang index fb649d6ef..a1eb0e7a9 100644 --- a/tests/hlsl-intrinsic/wave-multi-prefix.slang +++ b/tests/hlsl-intrinsic/wave-multi-prefix.slang @@ -5,8 +5,7 @@ //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -use-dxil -profile sm_6_5 // Disabled because we don't have GLSL intrinsics for these it seems //DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -// TODO(JS): Disabled because requires compute_7_0 which isn't available on all CI with CUDA -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -render-features cuda_sm_7_0 //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer<int> outputBuffer; diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp index 2ea74052f..e8b9e8b32 100644 --- a/tools/render-test/cpu-compute-util.cpp +++ b/tools/render-test/cpu-compute-util.cpp @@ -350,6 +350,13 @@ static SlangResult _newTexture(const InputTextureDesc& desc, slang::TypeLayoutRe return SLANG_FAIL; } +/* static */bool CPUComputeUtil::hasFeature(const UnownedStringSlice& feature) +{ + SLANG_UNUSED(feature); + // CPU has no specific support requirements + return false; +} + /* static */SlangResult CPUComputeUtil::calcBindings(const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& outContext) { auto request = compilationAndLayout.output.request; diff --git a/tools/render-test/cpu-compute-util.h b/tools/render-test/cpu-compute-util.h index e6e896b6a..c66650506 100644 --- a/tools/render-test/cpu-compute-util.h +++ b/tools/render-test/cpu-compute-util.h @@ -49,7 +49,9 @@ struct CPUComputeUtil void* m_uniformEntryPointParams; }; - + /// True if this feature is available on CPU + static bool hasFeature(const Slang::UnownedStringSlice& feature); + /// Runs code across run styles and makes sure output buffers match static SlangResult checkStyleConsistency(ISlangSharedLibrary* sharedLib, const uint32_t dispatchSize[3], const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout); diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp index af7c0e6c2..48d73fa93 100644 --- a/tools/render-test/cuda/cuda-compute-util.cpp +++ b/tools/render-test/cuda/cuda-compute-util.cpp @@ -5,6 +5,7 @@ #include "../../source/core/slang-std-writers.h" #include "../../source/core/slang-token-reader.h" +#include "../../source/core/slang-semantic-version.h" #include "../bind-location.h" @@ -307,7 +308,7 @@ static int _calcSMCountPerMultiProcessor(int major, int minor) return last.coreCount; } -static SlangResult _findMaxFlopsDeviceId(int* outDevice) +static SlangResult _findMaxFlopsDeviceIndex(int* outDeviceIndex) { int smPerMultiproc = 0; int maxPerfDevice = -1; @@ -360,7 +361,7 @@ static SlangResult _findMaxFlopsDeviceId(int* outDevice) return SLANG_FAIL; } - *outDevice = maxPerfDevice; + *outDeviceIndex = maxPerfDevice; return SLANG_OK; } @@ -374,9 +375,13 @@ static SlangResult _initCuda(CUDAReportStyle reportType = CUDAReportStyle::Norma class ScopeCUDAContext { public: - ScopeCUDAContext() : m_context(nullptr) {} + ScopeCUDAContext() : + m_context(nullptr), + m_device(-1), + m_deviceIndex(-1) + {} - SlangResult init(unsigned int flags, CUdevice device, CUDAReportStyle reportType = CUDAReportStyle::Normal) + SlangResult init(unsigned int flags, int deviceIndex, CUDAReportStyle reportType = CUDAReportStyle::Normal) { SLANG_RETURN_ON_FAIL(_initCuda(reportType)); @@ -386,7 +391,10 @@ public: m_context = nullptr; } - SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cuCtxCreate(&m_context, flags, device), reportType); + m_deviceIndex = deviceIndex; + SLANG_CUDA_RETURN_ON_FAIL(cuDeviceGet(&m_device, deviceIndex)); + + SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cuCtxCreate(&m_context, flags, m_device), reportType); return SLANG_OK; } @@ -394,9 +402,8 @@ public: { SLANG_RETURN_ON_FAIL(_initCuda(reportType)); - int deviceId; - SLANG_RETURN_ON_FAIL(_findMaxFlopsDeviceId(&deviceId)); - SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cudaSetDevice(deviceId), reportType); + SLANG_RETURN_ON_FAIL(_findMaxFlopsDeviceIndex(&m_deviceIndex)); + SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cudaSetDevice(m_deviceIndex), reportType); if (m_context) { @@ -404,7 +411,9 @@ public: m_context = nullptr; } - SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cuCtxCreate(&m_context, flags, deviceId), reportType); + SLANG_CUDA_RETURN_ON_FAIL(cuDeviceGet(&m_device, m_deviceIndex)); + + SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cuCtxCreate(&m_context, flags, m_device), reportType); return SLANG_OK; } @@ -417,9 +426,57 @@ public: } SLANG_FORCE_INLINE operator CUcontext () const { return m_context; } + int m_deviceIndex; + CUdevice m_device; CUcontext m_context; }; +/* static */SlangResult CUDAComputeUtil::parseFeature(const Slang::UnownedStringSlice& feature, bool& outResult) +{ + outResult = false; + + if (feature.startsWith("cuda_sm_")) + { + const UnownedStringSlice versionSlice = UnownedStringSlice(feature.begin() + 8, feature.end()); + SemanticVersion requiredVersion; + SLANG_RETURN_ON_FAIL(SemanticVersion::parse(versionSlice, '_', requiredVersion)); + + // Need to get the version from the cuda device + ScopeCUDAContext context; + SLANG_RETURN_ON_FAIL(context.init(0, CUDAReportStyle::Silent)); + + const int deviceIndex = context.m_deviceIndex; + + int computeMode = -1; + SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, deviceIndex)); + + // If we don't have compute mode availability, we can't execute + if (computeMode == cudaComputeModeProhibited) + { + return SLANG_FAIL; + } + + int major, minor; + SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, deviceIndex)); + SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, deviceIndex)); + + SemanticVersion actualVersion; + actualVersion.set(major, minor); + + outResult = actualVersion >= requiredVersion; + + return SLANG_OK; + } + + return SLANG_FAIL; +} + +/* static */bool CUDAComputeUtil::hasFeature(const Slang::UnownedStringSlice& feature) +{ + bool res; + return SLANG_SUCCEEDED(parseFeature(feature, res)) ? res : false; +} + /* static */bool CUDAComputeUtil::canCreateDevice() { ScopeCUDAContext context; diff --git a/tools/render-test/cuda/cuda-compute-util.h b/tools/render-test/cuda/cuda-compute-util.h index f15c9d4e3..bc3d7d233 100644 --- a/tools/render-test/cuda/cuda-compute-util.h +++ b/tools/render-test/cuda/cuda-compute-util.h @@ -46,6 +46,10 @@ struct CUDAComputeUtil List<BindSet::Value*> m_buffers; }; + static SlangResult parseFeature(const Slang::UnownedStringSlice& feature, bool& outResult); + + static bool hasFeature(const Slang::UnownedStringSlice& feature); + static SlangResult createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr<CUDAResource>& outResource); static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], Context& outContext); diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp index ab041b5bc..1d88ee500 100644 --- a/tools/render-test/render-test-main.cpp +++ b/tools/render-test/render-test-main.cpp @@ -544,6 +544,15 @@ static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* sessi // If it's CPU testing we don't need a window or a renderer if (gOptions.rendererType == RendererType::CPU) { + // Check we have all the required features + for (const auto& renderFeature : gOptions.renderFeatures) + { + if (!CPUComputeUtil::hasFeature(renderFeature.getUnownedSlice())) + { + return SLANG_E_NOT_AVAILABLE; + } + } + ShaderCompilerUtil::OutputAndLayout compilationAndLayout; SLANG_RETURN_ON_FAIL(ShaderCompilerUtil::compileWithLayout(session, gOptions.sourcePath, gOptions.compileArgs, gOptions.shaderType, input, compilationAndLayout)); @@ -604,12 +613,20 @@ static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* sessi } if (gOptions.rendererType == RendererType::CUDA) - { + { +#if RENDER_TEST_CUDA + // Check we have all the required features + for (const auto& renderFeature : gOptions.renderFeatures) + { + if (!CUDAComputeUtil::hasFeature(renderFeature.getUnownedSlice())) + { + return SLANG_E_NOT_AVAILABLE; + } + } + ShaderCompilerUtil::OutputAndLayout compilationAndLayout; SLANG_RETURN_ON_FAIL(ShaderCompilerUtil::compileWithLayout(session, gOptions.sourcePath, gOptions.compileArgs, gOptions.shaderType, input, compilationAndLayout)); -#if RENDER_TEST_CUDA - const uint64_t startTicks = ProcessUtil::getClockTick(); CUDAComputeUtil::Context context; |
