summaryrefslogtreecommitdiffstats
path: root/tools/render-test
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2019-09-18 11:40:59 -0400
committerGitHub <noreply@github.com>2019-09-18 11:40:59 -0400
commit31c7abcc27a33d63ac8d335387a0ce7b3ad74954 (patch)
tree3b4254df7bdbf8b497aa8a3e5f08f8927c1afbc6 /tools/render-test
parent3af404da7f7f125464b78159940cb3fc06e69cc5 (diff)
Improvements to testing and ABI for CPU (#1057)
* WIP: Improving CPU performance/ABI * Optionally output code on CPU for groupThreadID and groupID. * Added ability to set compute dispatch size on command line for render-test. Dispatch compute tests taking into account dispatch size. Added test for semantics are working. * Test using GroupRange. * Fix problem with adding \n for externa diagnostic - to do it if there isn't a \n at the end. Change the ouput order (put result before) so last value is diagnostic string. * Made GroupRange the default exposed CPU ABI entry point style. Removed CPU_EXECUTE test style -as tested via the now cross platform render-test * Split out execution from setup for execution to improve perf. * For better code coverage/testing test all styles of CPU compute entry point. * Improve documentation for ABI changes for CPU code. Add 'expecting' to error message from review. * Fix small typos.
Diffstat (limited to 'tools/render-test')
-rw-r--r--tools/render-test/cpu-compute-util.cpp223
-rw-r--r--tools/render-test/cpu-compute-util.h29
-rw-r--r--tools/render-test/options.cpp2
-rw-r--r--tools/render-test/render-test-main.cpp23
4 files changed, 205 insertions, 72 deletions
diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp
index 1b1adef82..81325ce80 100644
--- a/tools/render-test/cpu-compute-util.cpp
+++ b/tools/render-test/cpu-compute-util.cpp
@@ -301,127 +301,220 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount)
return SLANG_OK;
}
-/* static */SlangResult CPUComputeUtil::execute(const uint32_t dispatchSize[3], const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& context)
+/* static */SlangResult CPUComputeUtil::calcExecuteInfo(ExecuteStyle style, const uint32_t dispatchSize[3], const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& context, ExecuteInfo& out)
{
auto request = compilationAndLayout.output.request;
auto reflection = (slang::ShaderReflection*) spGetReflection(request);
+ slang::EntryPointReflection* entryPoint = nullptr;
+ auto entryPointCount = reflection->getEntryPointCount();
+ SLANG_ASSERT(entryPointCount == 1);
+
+ entryPoint = reflection->getEntryPointByIndex(0);
+
+ const char* entryPointName = entryPoint->getName();
+
ComPtr<ISlangSharedLibrary> sharedLibrary;
SLANG_RETURN_ON_FAIL(spGetEntryPointHostCallable(request, 0, 0, sharedLibrary.writeRef()));
- // Use reflection to find the entry point name
-
- struct UniformState;
- typedef void(*Func)(CPPPrelude::ComputeVaryingInput* varyingInput, CPPPrelude::UniformEntryPointParams* uniformEntryPointParams, UniformState* uniformState);
- typedef void(*GroupRangeFunc)(CPPPrelude::GroupComputeVaryingInput* varyingInput, CPPPrelude::UniformEntryPointParams* uniformEntryPointParams, UniformState* uniformState);
-
- slang::EntryPointReflection* entryPoint = nullptr;
- Func func = nullptr;
- Func groupFunc = nullptr;
- GroupRangeFunc groupRangeFunc = nullptr;
+ // Copy dispatch size
+ for (int i = 0; i < 3; ++i)
{
- auto entryPointCount = reflection->getEntryPointCount();
- SLANG_ASSERT(entryPointCount == 1);
-
- entryPoint = reflection->getEntryPointByIndex(0);
+ out.m_dispatchSize[i] = dispatchSize[i];
+ }
- const char* entryPointName = entryPoint->getName();
- func = (Func)sharedLibrary->findFuncByName(entryPointName);
+ out.m_style = style;
+ out.m_uniformState = (void*)context.binding.m_rootBuffer.m_data;
+ out.m_uniformEntryPointParams = (void*)context.binding.m_entryPointBuffer.m_data;
+ switch (style)
+ {
+ case ExecuteStyle::Group:
{
StringBuilder groupEntryPointName;
groupEntryPointName << entryPointName << "_Group";
- groupFunc = (Func)sharedLibrary->findFuncByName(groupEntryPointName.getBuffer());
- }
+ CPPPrelude::ComputeFunc groupFunc = (CPPPrelude::ComputeFunc)sharedLibrary->findFuncByName(groupEntryPointName.getBuffer());
+ if (!groupFunc)
+ {
+ return SLANG_FAIL;
+ }
+ out.m_func = (ExecuteInfo::Func)groupFunc;
+ break;
+ }
+ case ExecuteStyle::GroupRange:
{
- StringBuilder groupRangeEntryPointName;
- groupRangeEntryPointName << entryPointName << "_GroupRange";
-
- groupRangeFunc = (GroupRangeFunc)sharedLibrary->findFuncByName(groupRangeEntryPointName.getBuffer());
+ CPPPrelude::ComputeFunc groupRangeFunc = nullptr;
+ groupRangeFunc = (CPPPrelude::ComputeFunc)sharedLibrary->findFuncByName(entryPointName);
+ if (!groupRangeFunc)
+ {
+ return SLANG_FAIL;
+ }
+ out.m_func = (ExecuteInfo::Func)groupRangeFunc;
+ break;
}
+ case ExecuteStyle::Thread:
+ {
+ StringBuilder threadEntryPointName;
+ threadEntryPointName << entryPointName << "_Thread";
- if (func == nullptr && groupFunc == nullptr && groupRangeFunc == nullptr)
+ CPPPrelude::ComputeThreadFunc threadFunc = (CPPPrelude::ComputeThreadFunc)sharedLibrary->findFuncByName(threadEntryPointName.getBuffer());
+ if (!threadFunc)
+ {
+ return SLANG_FAIL;
+ }
+
+ SlangUInt numThreadsPerAxis[3];
+ entryPoint->getComputeThreadGroupSize(3, numThreadsPerAxis);
+ for (int i = 0; i < 3; ++i)
+ {
+ out.m_numThreadsPerAxis[i] = uint32_t(numThreadsPerAxis[i]);
+ }
+ out.m_func = (ExecuteInfo::Func)threadFunc;
+ break;
+ }
+ default:
{
return SLANG_FAIL;
}
}
- // If we have the group function, that's the faster way to execute all threads in group...
- if (groupRangeFunc)
- {
- UniformState* uniformState = (UniformState*)context.binding.m_rootBuffer.m_data;
- CPPPrelude::UniformEntryPointParams* uniformEntryPointParams = (CPPPrelude::UniformEntryPointParams*)context.binding.m_entryPointBuffer.m_data;
- CPPPrelude::GroupComputeVaryingInput varying;
-
- varying.startGroupID = {};
- varying.endGroupID = { dispatchSize[0], dispatchSize[1], dispatchSize[2] };
-
- groupRangeFunc(&varying, uniformEntryPointParams, uniformState);
- }
- else if (groupFunc)
- {
- CPPPrelude::ComputeVaryingInput varying;
+ return SLANG_OK;
+}
+
+/* static */SlangResult CPUComputeUtil::execute(const ExecuteInfo& info)
+{
+ CPPPrelude::UniformState* uniformState = (CPPPrelude::UniformState*)info.m_uniformState;
+ CPPPrelude::UniformEntryPointParams* uniformEntryPointParams = (CPPPrelude::UniformEntryPointParams*)info.m_uniformEntryPointParams;
- for (uint32_t groupZ = 0; groupZ < dispatchSize[2]; ++groupZ)
+ switch (info.m_style)
+ {
+ case ExecuteStyle::Group:
{
- for (uint32_t groupY = 0; groupY < dispatchSize[1]; ++groupY)
- {
- for (uint32_t groupX = 0; groupX < dispatchSize[0]; ++groupX)
- {
- UniformState* uniformState = (UniformState*)context.binding.m_rootBuffer.m_data;
- CPPPrelude::UniformEntryPointParams* uniformEntryPointParams = (CPPPrelude::UniformEntryPointParams*)context.binding.m_entryPointBuffer.m_data;
+ CPPPrelude::ComputeFunc groupFunc = (CPPPrelude::ComputeFunc)info.m_func;
+ CPPPrelude::ComputeVaryingInput varying;
- varying.groupID = {groupX, groupY, groupZ};
+ const uint32_t groupXCount = info.m_dispatchSize[0];
+ const uint32_t groupYCount = info.m_dispatchSize[1];
+ const uint32_t groupZCount = info.m_dispatchSize[2];
- groupFunc(&varying, uniformEntryPointParams, uniformState);
+ for (uint32_t groupZ = 0; groupZ < groupZCount; ++groupZ)
+ {
+ for (uint32_t groupY = 0; groupY < groupYCount; ++groupY)
+ {
+ for (uint32_t groupX = 0; groupX < groupXCount; ++groupX)
+ {
+ varying.startGroupID = { groupX, groupY, groupZ };
+ groupFunc(&varying, uniformEntryPointParams, uniformState);
+ }
}
}
+ break;
}
+ case ExecuteStyle::GroupRange:
+ {
+ CPPPrelude::ComputeFunc groupRangeFunc = (CPPPrelude::ComputeFunc)info.m_func;
+ CPPPrelude::ComputeVaryingInput varying;
- }
- else
- {
- // We can also fire off each thread individually
- SlangUInt numThreadsPerAxis[3];
- entryPoint->getComputeThreadGroupSize(3, numThreadsPerAxis);
+ varying.startGroupID = {};
+ varying.endGroupID = { info.m_dispatchSize[0], info.m_dispatchSize[1], info.m_dispatchSize[2] };
+ groupRangeFunc(&varying, uniformEntryPointParams, uniformState);
+ break;
+ }
+ case ExecuteStyle::Thread:
{
- UniformState* uniformState = (UniformState*)context.binding.m_rootBuffer.m_data;
- CPPPrelude::UniformEntryPointParams* uniformEntryPointParams = (CPPPrelude::UniformEntryPointParams*)context.binding.m_entryPointBuffer.m_data;
+ CPPPrelude::ComputeThreadFunc threadFunc = (CPPPrelude::ComputeThreadFunc)info.m_func;
+ CPPPrelude::ComputeThreadVaryingInput varying;
- CPPPrelude::ComputeVaryingInput varying;
+ const uint32_t groupXCount = info.m_dispatchSize[0];
+ const uint32_t groupYCount = info.m_dispatchSize[1];
+ const uint32_t groupZCount = info.m_dispatchSize[2];
+
+ const uint32_t threadXCount = uint32_t(info.m_numThreadsPerAxis[0]);
+ const uint32_t threadYCount = uint32_t(info.m_numThreadsPerAxis[1]);
+ const uint32_t threadZCount = uint32_t(info.m_numThreadsPerAxis[2]);
- for (uint32_t groupZ = 0; groupZ < dispatchSize[2]; ++groupZ)
+ for (uint32_t groupZ = 0; groupZ < groupZCount; ++groupZ)
{
- for (uint32_t groupY = 0; groupY < dispatchSize[1]; ++groupY)
+ for (uint32_t groupY = 0; groupY < groupYCount; ++groupY)
{
- for (uint32_t groupX = 0; groupX < dispatchSize[0]; ++groupX)
+ for (uint32_t groupX = 0; groupX < groupXCount; ++groupX)
{
- varying.groupID = {groupX, groupY, groupZ};
+ varying.groupID = { groupX, groupY, groupZ };
- for (int z = 0; z < int(numThreadsPerAxis[2]); ++z)
+ for (uint32_t z = 0; z < threadZCount; ++z)
{
varying.groupThreadID.z = z;
- for (int y = 0; y < int(numThreadsPerAxis[1]); ++y)
+ for (uint32_t y = 0; y < threadYCount; ++y)
{
varying.groupThreadID.y = y;
- for (int x = 0; x < int(numThreadsPerAxis[0]); ++x)
+ for (uint32_t x = 0; x < threadXCount; ++x)
{
varying.groupThreadID.x = x;
- func(&varying, uniformEntryPointParams, uniformState);
+ threadFunc(&varying, uniformEntryPointParams, uniformState);
}
}
}
}
}
}
+ break;
+ }
+ default: return SLANG_FAIL;
+ }
+
+ return SLANG_OK;
+}
+
+
+/* static */ SlangResult CPUComputeUtil::checkStyleConsistency(const uint32_t dispatchSize[3], const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout)
+{
+ Context context;
+ SLANG_RETURN_ON_FAIL(CPUComputeUtil::calcBindings(compilationAndLayout, context));
+
+ // Run the thread style to test against
+ {
+ ExecuteInfo info;
+ SLANG_RETURN_ON_FAIL(calcExecuteInfo(ExecuteStyle::Thread, dispatchSize, compilationAndLayout, context, info));
+ SLANG_RETURN_ON_FAIL(execute(info));
+ }
+
+ ExecuteStyle styles[] = { ExecuteStyle::Group, ExecuteStyle::GroupRange };
+ for (auto style: styles)
+ {
+ Context checkContext;
+ SLANG_RETURN_ON_FAIL(CPUComputeUtil::calcBindings(compilationAndLayout, checkContext));
+
+ ExecuteInfo info;
+ SLANG_RETURN_ON_FAIL(calcExecuteInfo(style, dispatchSize, compilationAndLayout, checkContext, info));
+ SLANG_RETURN_ON_FAIL(execute(info));
+
+ // Make sure the out buffers are all the same
+
+ const auto& entries = compilationAndLayout.layout.entries;
+
+ for (int i = 0; i < entries.getCount(); ++i)
+ {
+ const auto& entry = entries[i];
+ if (entry.isOutput)
+ {
+ const auto& buffer = context.buffers[i];
+ const auto& checkBuffer = checkContext.buffers[i];
+
+ if (buffer.m_sizeInBytes != checkBuffer.m_sizeInBytes ||
+ memcmp(buffer.m_data, checkBuffer.m_data, buffer.m_sizeInBytes) != 0)
+ {
+ return SLANG_FAIL;
+ }
+ }
}
}
return SLANG_OK;
}
+
} // renderer_test
diff --git a/tools/render-test/cpu-compute-util.h b/tools/render-test/cpu-compute-util.h
index b30ef146b..1284735c0 100644
--- a/tools/render-test/cpu-compute-util.h
+++ b/tools/render-test/cpu-compute-util.h
@@ -11,6 +11,14 @@ namespace renderer_test {
struct CPUComputeUtil
{
+ enum class ExecuteStyle
+ {
+ Unknown,
+ Thread,
+ Group,
+ GroupRange,
+ };
+
struct Resource : public RefObject
{
void* getInterface() const { return m_interface; }
@@ -27,9 +35,28 @@ struct CPUComputeUtil
List<RefPtr<Resource> > m_resources;
};
+ struct ExecuteInfo
+ {
+ typedef void (*Func)();
+
+ ExecuteStyle m_style;
+ Func m_func;
+ uint32_t m_dispatchSize[3];
+ uint32_t m_numThreadsPerAxis[3];
+
+ void* m_uniformState;
+ void* m_uniformEntryPointParams;
+ };
+
+
+ /// Runs code across run styles and makes sure output buffers match
+ static SlangResult checkStyleConsistency(const uint32_t dispatchSize[3], const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout);
+
static SlangResult calcBindings(const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& outContext);
- static SlangResult execute(const uint32_t dispatchSize[3], const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& outContext);
+ static SlangResult calcExecuteInfo(ExecuteStyle style, const uint32_t dispatchSize[3], const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& context, ExecuteInfo& out);
+
+ static SlangResult execute(const ExecuteInfo& info);
static SlangResult writeBindings(const ShaderInputLayout& layout, const List<CPUMemoryBinding::Buffer>& buffers, const Slang::String& fileName);
};
diff --git a/tools/render-test/options.cpp b/tools/render-test/options.cpp
index d2f21a5d9..e13a2b88f 100644
--- a/tools/render-test/options.cpp
+++ b/tools/render-test/options.cpp
@@ -183,7 +183,7 @@ SlangResult parseOptions(int argc, const char*const* argv, Slang::WriterHelper s
{
if (argCursor == argEnd)
{
- stdError.print("error: comma separated compute dispatch size for '%s'\n", arg);
+ stdError.print("error: expecting a comma separated compute dispatch size for '%s'\n", arg);
return SLANG_FAIL;
}
List<UnownedStringSlice> slices;
diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp
index 2a0b9a6c9..3a8871618 100644
--- a/tools/render-test/render-test-main.cpp
+++ b/tools/render-test/render-test-main.cpp
@@ -459,12 +459,25 @@ SLANG_TEST_TOOL_API SlangResult innerMain(Slang::StdWriters* stdWriters, SlangSe
ShaderCompilerUtil::OutputAndLayout compilationAndLayout;
SLANG_RETURN_ON_FAIL(ShaderCompilerUtil::compileWithLayout(session, gOptions.sourcePath, gOptions.compileArgs, gOptions.shaderType, input, compilationAndLayout));
- CPUComputeUtil::Context context;
- SLANG_RETURN_ON_FAIL(CPUComputeUtil::calcBindings(compilationAndLayout, context));
- SLANG_RETURN_ON_FAIL(CPUComputeUtil::execute(gOptions.computeDispatchSize, compilationAndLayout, context));
+
+ {
+ CPUComputeUtil::Context context;
+ SLANG_RETURN_ON_FAIL(CPUComputeUtil::calcBindings(compilationAndLayout, context));
+
+ CPUComputeUtil::ExecuteInfo info;
+ SLANG_RETURN_ON_FAIL(CPUComputeUtil::calcExecuteInfo(CPUComputeUtil::ExecuteStyle::GroupRange, gOptions.computeDispatchSize, compilationAndLayout, context, info));
+ SLANG_RETURN_ON_FAIL(CPUComputeUtil::execute(info));
+
+ // Dump everything out that was written
+ SLANG_RETURN_ON_FAIL(CPUComputeUtil::writeBindings(compilationAndLayout.layout, context.buffers, gOptions.outputPath));
+ }
+
+ {
+ // Check all execution styles produce the same result
+ SLANG_RETURN_ON_FAIL(CPUComputeUtil::checkStyleConsistency(gOptions.computeDispatchSize, compilationAndLayout));
+ }
- // Dump everything out that was written
- return CPUComputeUtil::writeBindings(compilationAndLayout.layout, context.buffers, gOptions.outputPath);
+ return SLANG_OK;
}
Slang::RefPtr<Renderer> renderer;