diff options
Diffstat (limited to 'tools/render-test/cpu-compute-util.cpp')
| -rw-r--r-- | tools/render-test/cpu-compute-util.cpp | 45 |
1 files changed, 32 insertions, 13 deletions
diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp index 85a8fb1b0..4294ad539 100644 --- a/tools/render-test/cpu-compute-util.cpp +++ b/tools/render-test/cpu-compute-util.cpp @@ -316,6 +316,7 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount) slang::EntryPointReflection* entryPoint = nullptr; Func func = nullptr; + Func groupFunc = nullptr; { auto entryPointCount = reflection->getEntryPointCount(); SLANG_ASSERT(entryPointCount == 1); @@ -325,15 +326,19 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount) const char* entryPointName = entryPoint->getName(); func = (Func)sharedLibrary->findFuncByName(entryPointName); - if (!func) + StringBuilder groupEntryPointName; + groupEntryPointName << entryPointName << "_Group"; + + groupFunc = (Func)sharedLibrary->findFuncByName(groupEntryPointName.getBuffer()); + + if (func == nullptr && groupFunc == nullptr) { return SLANG_FAIL; } } - SlangUInt numThreadsPerAxis[3]; - entryPoint->getComputeThreadGroupSize(3, numThreadsPerAxis); - + // If we have the group function, that's the faster way to execute all threads in group... + if (groupFunc) { UniformState* uniformState = (UniformState*)context.binding.m_rootBuffer.m_data; CPPPrelude::UniformEntryPointParams* uniformEntryPointParams = (CPPPrelude::UniformEntryPointParams*)context.binding.m_entryPointBuffer.m_data; @@ -341,17 +346,33 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount) CPPPrelude::ComputeVaryingInput varying; varying.groupID = {}; - for (int z = 0; z < int(numThreadsPerAxis[2]); ++z) + groupFunc(&varying, uniformEntryPointParams, uniformState); + } + else + { + // We can also fire off each thread individually + SlangUInt numThreadsPerAxis[3]; + entryPoint->getComputeThreadGroupSize(3, numThreadsPerAxis); + { - varying.groupThreadID.z = z; - for (int y = 0; y < int(numThreadsPerAxis[1]); ++y) + UniformState* uniformState = (UniformState*)context.binding.m_rootBuffer.m_data; + CPPPrelude::UniformEntryPointParams* uniformEntryPointParams = (CPPPrelude::UniformEntryPointParams*)context.binding.m_entryPointBuffer.m_data; + + CPPPrelude::ComputeVaryingInput varying; + varying.groupID = {}; + + for (int z = 0; z < int(numThreadsPerAxis[2]); ++z) { - varying.groupThreadID.y = y; - for (int x = 0; x < int(numThreadsPerAxis[0]); ++x) + varying.groupThreadID.z = z; + for (int y = 0; y < int(numThreadsPerAxis[1]); ++y) { - varying.groupThreadID.x = x; + varying.groupThreadID.y = y; + for (int x = 0; x < int(numThreadsPerAxis[0]); ++x) + { + varying.groupThreadID.x = x; - func(&varying, uniformEntryPointParams, uniformState); + func(&varying, uniformEntryPointParams, uniformState); + } } } } @@ -360,6 +381,4 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount) return SLANG_OK; } - - } // renderer_test |
