diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2019-09-16 09:38:21 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-09-16 09:38:21 -0400 |
| commit | 40d8f3aeedf018c7c6766e98ec64733abd90671e (patch) | |
| tree | 0c9cae7bc88d4344dd53596a88c3ce9918f2df13 /tools/render-test/cpu-compute-util.cpp | |
| parent | c2e5d2468ad6a38cdb8a067da0678302f6cc6066 (diff) | |
CPU Performance/Testing improvements (#1055)
* First pass of render-test refactor.
* Make window construction a function that can choose an implementation.
* Remove OpenGL as currently has windows dependency.
* Disable Vulkan as Renderer impl has dependency on windows.
* Pass Window in as parameter of 'update'.
* Add win-window.cpp as was missing.
* Fix warning on windows about signs during comparison.
* * Added mechanism to add random arrays as buffer inputs and select type
* Improved RenderGenerator to generate more types, and to be more careful around int32 ranges.
* Added support for security checks (for Visual Studio C++)
* Disable Execption handling being on by default when compiling kernels
* Added a 'Group' version of the entry point that will evaluate all threads in a group in a single call. In test code use this method if available.
* Added -compile-arg to be able to pass arguments to the compile within render-test
* Add documention for the _Group execution feature.
* Fix some typos in cpu-target.md
Diffstat (limited to 'tools/render-test/cpu-compute-util.cpp')
| -rw-r--r-- | tools/render-test/cpu-compute-util.cpp | 45 |
1 files changed, 32 insertions, 13 deletions
diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp index 85a8fb1b0..4294ad539 100644 --- a/tools/render-test/cpu-compute-util.cpp +++ b/tools/render-test/cpu-compute-util.cpp @@ -316,6 +316,7 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount) slang::EntryPointReflection* entryPoint = nullptr; Func func = nullptr; + Func groupFunc = nullptr; { auto entryPointCount = reflection->getEntryPointCount(); SLANG_ASSERT(entryPointCount == 1); @@ -325,15 +326,19 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount) const char* entryPointName = entryPoint->getName(); func = (Func)sharedLibrary->findFuncByName(entryPointName); - if (!func) + StringBuilder groupEntryPointName; + groupEntryPointName << entryPointName << "_Group"; + + groupFunc = (Func)sharedLibrary->findFuncByName(groupEntryPointName.getBuffer()); + + if (func == nullptr && groupFunc == nullptr) { return SLANG_FAIL; } } - SlangUInt numThreadsPerAxis[3]; - entryPoint->getComputeThreadGroupSize(3, numThreadsPerAxis); - + // If we have the group function, that's the faster way to execute all threads in group... + if (groupFunc) { UniformState* uniformState = (UniformState*)context.binding.m_rootBuffer.m_data; CPPPrelude::UniformEntryPointParams* uniformEntryPointParams = (CPPPrelude::UniformEntryPointParams*)context.binding.m_entryPointBuffer.m_data; @@ -341,17 +346,33 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount) CPPPrelude::ComputeVaryingInput varying; varying.groupID = {}; - for (int z = 0; z < int(numThreadsPerAxis[2]); ++z) + groupFunc(&varying, uniformEntryPointParams, uniformState); + } + else + { + // We can also fire off each thread individually + SlangUInt numThreadsPerAxis[3]; + entryPoint->getComputeThreadGroupSize(3, numThreadsPerAxis); + { - varying.groupThreadID.z = z; - for (int y = 0; y < int(numThreadsPerAxis[1]); ++y) + UniformState* uniformState = (UniformState*)context.binding.m_rootBuffer.m_data; + CPPPrelude::UniformEntryPointParams* uniformEntryPointParams = (CPPPrelude::UniformEntryPointParams*)context.binding.m_entryPointBuffer.m_data; + + CPPPrelude::ComputeVaryingInput varying; + varying.groupID = {}; + + for (int z = 0; z < int(numThreadsPerAxis[2]); ++z) { - varying.groupThreadID.y = y; - for (int x = 0; x < int(numThreadsPerAxis[0]); ++x) + varying.groupThreadID.z = z; + for (int y = 0; y < int(numThreadsPerAxis[1]); ++y) { - varying.groupThreadID.x = x; + varying.groupThreadID.y = y; + for (int x = 0; x < int(numThreadsPerAxis[0]); ++x) + { + varying.groupThreadID.x = x; - func(&varying, uniformEntryPointParams, uniformState); + func(&varying, uniformEntryPointParams, uniformState); + } } } } @@ -360,6 +381,4 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount) return SLANG_OK; } - - } // renderer_test |
