From 40d8f3aeedf018c7c6766e98ec64733abd90671e Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Mon, 16 Sep 2019 09:38:21 -0400 Subject: CPU Performance/Testing improvements (#1055) * First pass of render-test refactor. * Make window construction a function that can choose an implementation. * Remove OpenGL as currently has windows dependency. * Disable Vulkan as Renderer impl has dependency on windows. * Pass Window in as parameter of 'update'. * Add win-window.cpp as was missing. * Fix warning on windows about signs during comparison. * * Added mechanism to add random arrays as buffer inputs and select type * Improved RenderGenerator to generate more types, and to be more careful around int32 ranges. * Added support for security checks (for Visual Studio C++) * Disable Execption handling being on by default when compiling kernels * Added a 'Group' version of the entry point that will evaluate all threads in a group in a single call. In test code use this method if available. * Added -compile-arg to be able to pass arguments to the compile within render-test * Add documention for the _Group execution feature. * Fix some typos in cpu-target.md --- source/core/slang-cpp-compiler.h | 3 +- source/core/slang-random-generator.cpp | 24 ++- source/core/slang-random-generator.h | 8 +- source/core/slang-visual-studio-compiler-util.cpp | 9 + source/slang/slang-compiler.cpp | 6 +- source/slang/slang-emit-cpp.cpp | 221 ++++++++++++++++------ source/slang/slang-emit-cpp.h | 4 + 7 files changed, 215 insertions(+), 60 deletions(-) (limited to 'source') diff --git a/source/core/slang-cpp-compiler.h b/source/core/slang-cpp-compiler.h index 22c17606a..f1592d240 100644 --- a/source/core/slang-cpp-compiler.h +++ b/source/core/slang-cpp-compiler.h @@ -95,7 +95,8 @@ public: enum Enum : Flags { EnableExceptionHandling = 0x01, - Verbose = 0x02, + Verbose = 0x02, + EnableSecurityChecks = 0x04, }; }; diff --git a/source/core/slang-random-generator.cpp b/source/core/slang-random-generator.cpp index 7e8476c30..ce43067aa 100644 --- a/source/core/slang-random-generator.cpp +++ b/source/core/slang-random-generator.cpp @@ -32,15 +32,33 @@ int64_t RandomGenerator::nextInt64() return (int64_t(high) << 32) | low; } -int32_t RandomGenerator::nextInt32InRange(int32_t min, int32_t max) +uint32_t RandomGenerator::nextUInt32InRange(uint32_t min, uint32_t max) { - int32_t diff = max - min; + // Make sure max is at least in + max = (max >= min) ? max : min; + + // Make 64 bit so can be lazier than having to take care of 32 bit overflow/underflow issues + uint32_t diff = max - min; if (diff <= 1) { return min; } + return (nextUInt32() % diff) + min; +} - return (nextPositiveInt32() % diff) + min; + +int32_t RandomGenerator::nextInt32InRange(int32_t min, int32_t max) +{ + // Make sure max is at least in + max = (max >= min) ? max : min; + + // Make 64 bit so can be lazier than having to take care of 32 bit overflow/underflow issues + uint32_t diff = uint32_t(int64_t(max) - int64_t(min)); + if (diff <= 1) + { + return min; + } + return int32_t(int64_t(nextUInt32() % diff) + min); } int64_t RandomGenerator::nextInt64InRange(int64_t min, int64_t max) diff --git a/source/core/slang-random-generator.h b/source/core/slang-random-generator.h index 8b4d1759b..57f0e8630 100644 --- a/source/core/slang-random-generator.h +++ b/source/core/slang-random-generator.h @@ -30,6 +30,9 @@ class RandomGenerator: public RefObject /// Get the next bool virtual bool nextBool(); + /// Next uint32_t + uint32_t nextUInt32() { return uint32_t(nextInt32()); } + /// Next Int32 which can only be positive int32_t nextPositiveInt32() { return nextInt32() & 0x7fffffff; } /// Next Int64 which can only be positive @@ -38,9 +41,12 @@ class RandomGenerator: public RefObject /// Returns value up to BUT NOT INCLUDING maxValue. int32_t nextInt32UpTo(int32_t maxValue) { assert(maxValue > 0); return (maxValue <= 1) ? 0 : (nextPositiveInt32() % maxValue); } - /// Returns value from min up to BUT NOT INCLUDING max + /// Returns value from min up to BUT NOT INCLUDING max. int32_t nextInt32InRange(int32_t min, int32_t max); + /// Returns value from min up to BUT NOT INCLUDING max + uint32_t nextUInt32InRange(uint32_t min, uint32_t max); + /// Returns value up to BUT NOT INCLUDING maxValue int64_t nextInt64UpTo(int64_t maxValue) { assert(maxValue > 0); return (maxValue <= 1) ? 0 : (nextPositiveInt64() % maxValue); } diff --git a/source/core/slang-visual-studio-compiler-util.cpp b/source/core/slang-visual-studio-compiler-util.cpp index 48ef108e4..3d0cfdc61 100644 --- a/source/core/slang-visual-studio-compiler-util.cpp +++ b/source/core/slang-visual-studio-compiler-util.cpp @@ -95,6 +95,15 @@ namespace Slang // Doesn't appear to be a VS equivalent } + if (options.flags & CompileOptions::Flag::EnableSecurityChecks) + { + cmdLine.addArg("/GS"); + } + else + { + cmdLine.addArg("/GS-"); + } + switch (options.debugInfoType) { default: diff --git a/source/slang/slang-compiler.cpp b/source/slang/slang-compiler.cpp index 2d5557371..dbb900ab6 100644 --- a/source/slang/slang-compiler.cpp +++ b/source/slang/slang-compiler.cpp @@ -1357,11 +1357,15 @@ SlangResult dissassembleDXILUsingDXC( } } - CPPCompiler::CompileOptions options; + typedef CPPCompiler::CompileOptions CompileOptions; + CompileOptions options; // Set the source type options.sourceType = (rawSourceLanguage == SourceLanguage::C) ? CPPCompiler::SourceType::C : CPPCompiler::SourceType::CPP; + // Disable exceptions and security checks + options.flags &= ~(CompileOptions::Flag::EnableExceptionHandling | CompileOptions::Flag::EnableSecurityChecks); + // Generate a path a temporary filename for output module String modulePath; SLANG_RETURN_ON_FAIL(File::generateTemporary(UnownedStringSlice::fromLiteral("slang-generated"), modulePath)); diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp index a5173549a..df6d1bee8 100644 --- a/source/slang/slang-emit-cpp.cpp +++ b/source/slang/slang-emit-cpp.cpp @@ -2463,6 +2463,103 @@ struct GlobalParamInfo UInt size; }; +void CPPSourceEmitter::_emitEntryPointDefinitionStart(IRFunc* func, IRGlobalParam* entryPointGlobalParams, const String& funcName) +{ + auto resultType = func->getResultType(); + + auto entryPointLayout = asEntryPoint(func); + + // Emit the actual function + emitEntryPointAttributes(func, entryPointLayout); + emitType(resultType, funcName); + + m_writer->emit("(ComputeVaryingInput* varyingInput, UniformEntryPointParams* params, UniformState* uniformState)\n{\n"); + emitSemantics(func); + + m_writer->indent(); + // Initialize when constructing so that globals are zeroed + m_writer->emit("Context context = {};\n"); + m_writer->emit("context.uniformState = uniformState;\n"); + m_writer->emit("context.varyingInput = *varyingInput;\n"); + + if (entryPointGlobalParams) + { + auto varDecl = entryPointGlobalParams; + auto rawType = varDecl->getDataType(); + + auto varType = rawType; + + m_writer->emit("context."); + m_writer->emit(getName(varDecl)); + m_writer->emit(" = ("); + emitType(varType); + m_writer->emit("*)params; \n"); + } +} + +void CPPSourceEmitter::_emitEntryPointDefinitionEnd(IRFunc* func) +{ + SLANG_UNUSED(func); + m_writer->dedent(); + m_writer->emit("}\n"); +} + +// We want to order such that the largest range is the inner loop + +void CPPSourceEmitter::_emitEntryPointGroup(const UInt sizeAlongAxis[3], const String& funcName) +{ + struct AxisWithSize + { + typedef AxisWithSize ThisType; + bool operator<(const ThisType& rhs) const { return size < rhs.size; } + + int axis; + UInt size; + }; + List axes; + + for (int i = 0; i < 3; ++i) + { + if (sizeAlongAxis[i] > 1) + { + AxisWithSize axisWithSize; + axisWithSize.axis = i; + axisWithSize.size = sizeAlongAxis[i]; + axes.add(axisWithSize); + } + } + + axes.sort(); + + // Open all the loops + StringBuilder builder; + for (Index i = 0; i < axes.getCount(); ++i) + { + const auto& axis = axes[i]; + builder.Clear(); + const char elem[2] = { s_elemNames[axis.axis], 0 }; + builder << "for (uint32_t " << elem << " = start." << elem << "; " << elem << " < start." << elem << " + " << axis.size << "; ++" << elem << ")\n{\n"; + m_writer->emit(builder); + m_writer->indent(); + + builder.Clear(); + builder << "context.dispatchThreadID." << elem << " = " << elem << ";\n"; + m_writer->emit(builder); + } + + // just call at inner loop point + m_writer->emit("context._"); + m_writer->emit(funcName); + m_writer->emit("();\n"); + + // Close all the loops + for (Index i = Index(axes.getCount() - 1); i >= 0; --i) + { + m_writer->dedent(); + m_writer->emit("}\n"); + } +} + void CPPSourceEmitter::emitModuleImpl(IRModule* module) { List actions; @@ -2600,77 +2697,93 @@ void CPPSourceEmitter::emitModuleImpl(IRModule* module) auto entryPointLayout = asEntryPoint(func); if (entryPointLayout) { - auto resultType = func->getResultType(); - auto name = getFuncName(func); + // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sv-dispatchthreadid + // SV_DispatchThreadID is the sum of SV_GroupID * numthreads and GroupThreadID. - // Emit the actual function - emitEntryPointAttributes(func, entryPointLayout); - emitType(resultType, name); + static const UInt kAxisCount = 3; + UInt sizeAlongAxis[kAxisCount]; - m_writer->emit("(ComputeVaryingInput* varyingInput, UniformEntryPointParams* params, UniformState* uniformState)\n{\n"); - emitSemantics(func); + String funcName = getFuncName(func); - m_writer->indent(); - // Initialize when constructing so that globals are zeroed - m_writer->emit("Context context = {};\n"); - m_writer->emit("context.uniformState = uniformState;\n"); - m_writer->emit("context.varyingInput = *varyingInput;\n"); + { + _emitEntryPointDefinitionStart(func, entryPointGlobalParams, funcName); - if (entryPointGlobalParams) - { - auto varDecl = entryPointGlobalParams; - auto rawType = varDecl->getDataType(); + // Emit dispatchThreadID + if (entryPointLayout->profile.GetStage() == Stage::Compute) + { + // TODO: this is kind of gross because we are using a public + // reflection API function, rather than some kind of internal + // utility it forwards to... + spReflectionEntryPoint_getComputeThreadGroupSize((SlangReflectionEntryPoint*)entryPointLayout, kAxisCount, &sizeAlongAxis[0]); - auto varType = rawType; + m_writer->emit("context.dispatchThreadID = {\n"); + m_writer->indent(); - m_writer->emit("context."); - m_writer->emit(getName(varDecl)); - m_writer->emit(" = ("); - emitType(varType); - m_writer->emit("*)params; \n"); - } - - // Emit dispatchThreadID - if (entryPointLayout->profile.GetStage() == Stage::Compute) - { - // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sv-dispatchthreadid - // SV_DispatchThreadID is the sum of SV_GroupID * numthreads and GroupThreadID. + StringBuilder builder; + for (int i = 0; i < kAxisCount; ++i) + { + builder.Clear(); + const char elem[2] = {s_elemNames[i], 0}; + builder << "varyingInput->groupID." << elem << " * " << sizeAlongAxis[i] << " + varyingInput->groupThreadID." << elem; + if (i < kAxisCount - 1) + { + builder << ","; + } + builder << "\n"; + m_writer->emit(builder); + } - static const UInt kAxisCount = 3; - UInt sizeAlongAxis[kAxisCount]; + m_writer->dedent(); + m_writer->emit("};\n"); + } - // TODO: this is kind of gross because we are using a public - // reflection API function, rather than some kind of internal - // utility it forwards to... - spReflectionEntryPoint_getComputeThreadGroupSize((SlangReflectionEntryPoint*)entryPointLayout, kAxisCount, &sizeAlongAxis[0]); + m_writer->emit("context._"); + m_writer->emit(funcName); + m_writer->emit("();\n"); - m_writer->emit("context.dispatchThreadID = {\n"); - m_writer->indent(); + _emitEntryPointDefinitionEnd(func); + } + // Emit the group version which runs for all elements in a thread group + { StringBuilder builder; - - for (int i = 0; i < kAxisCount; ++i) + builder << getFuncName(func); + builder << "_Group"; + + String groupFuncName = builder; + + _emitEntryPointDefinitionStart(func, entryPointGlobalParams, groupFuncName); + + // Emit dispatchThreadID + if (entryPointLayout->profile.GetStage() == Stage::Compute) { - builder.Clear(); - const char elem[2] = {s_elemNames[i], 0}; - builder << "varyingInput->groupID." << elem << " * " << sizeAlongAxis[i] << " + varyingInput->groupThreadID." << elem; - if (i < kAxisCount - 1) + spReflectionEntryPoint_getComputeThreadGroupSize((SlangReflectionEntryPoint*)entryPointLayout, kAxisCount, &sizeAlongAxis[0]); + { - builder << ","; + m_writer->emit("const uint3 start = {\n"); + m_writer->indent(); + for (int i = 0; i < kAxisCount; ++i) + { + builder.Clear(); + const char elem[2] = { s_elemNames[i], 0 }; + builder << "varyingInput->groupID." << elem << " * " << sizeAlongAxis[i]; + if (i < kAxisCount - 1) + { + builder << ","; + } + builder << "\n"; + m_writer->emit(builder); + } + m_writer->dedent(); + m_writer->emit("};\n"); } - builder << "\n"; - m_writer->emit(builder); + m_writer->emit("context.dispatchThreadID = start;\n"); + + _emitEntryPointGroup(sizeAlongAxis, funcName); } - m_writer->dedent(); - m_writer->emit("};\n"); + _emitEntryPointDefinitionEnd(func); } - - m_writer->emit("context._"); - m_writer->emit(name); - m_writer->emit("();\n"); - m_writer->dedent(); - m_writer->emit("}\n"); } } } diff --git a/source/slang/slang-emit-cpp.h b/source/slang/slang-emit-cpp.h index 6c300320a..906086d71 100644 --- a/source/slang/slang-emit-cpp.h +++ b/source/slang/slang-emit-cpp.h @@ -257,6 +257,10 @@ protected: SlangResult _calcTextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName); + void _emitEntryPointDefinitionStart(IRFunc* func, IRGlobalParam* entryPointGlobalParams, const String& funcName); + void _emitEntryPointDefinitionEnd(IRFunc* func); + void _emitEntryPointGroup(const UInt sizeAlongAxis[3], const String& funcName); + Dictionary m_intrinsicNameMap; Dictionary m_typeNameMap; -- cgit v1.2.3