summaryrefslogtreecommitdiffstats
path: root/tools/render-test
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2019-09-17 12:25:45 -0400
committerGitHub <noreply@github.com>2019-09-17 12:25:45 -0400
commit3af404da7f7f125464b78159940cb3fc06e69cc5 (patch)
treed1640fc1ac08be8a15420a8603eba991833a1792 /tools/render-test
parent3758a50dae81973b00541f2a151e3ee9cd2d1645 (diff)
CPU ABI improvements (#1056)
* WIP: Improving CPU performance/ABI * Optionally output code on CPU for groupThreadID and groupID. * Added ability to set compute dispatch size on command line for render-test. Dispatch compute tests taking into account dispatch size. Added test for semantics are working. * Test using GroupRange. * Fix problem with adding \n for externa diagnostic - to do it if there isn't a \n at the end. Change the ouput order (put result before) so last value is diagnostic string.
Diffstat (limited to 'tools/render-test')
-rw-r--r--tools/render-test/cpu-compute-util.cpp75
-rw-r--r--tools/render-test/cpu-compute-util.h2
-rw-r--r--tools/render-test/options.cpp28
-rw-r--r--tools/render-test/options.h2
-rw-r--r--tools/render-test/render-test-main.cpp4
5 files changed, 92 insertions, 19 deletions
diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp
index 4294ad539..1b1adef82 100644
--- a/tools/render-test/cpu-compute-util.cpp
+++ b/tools/render-test/cpu-compute-util.cpp
@@ -301,7 +301,7 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount)
return SLANG_OK;
}
-/* static */SlangResult CPUComputeUtil::execute(const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& context)
+/* static */SlangResult CPUComputeUtil::execute(const uint32_t dispatchSize[3], const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& context)
{
auto request = compilationAndLayout.output.request;
auto reflection = (slang::ShaderReflection*) spGetReflection(request);
@@ -313,10 +313,12 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount)
struct UniformState;
typedef void(*Func)(CPPPrelude::ComputeVaryingInput* varyingInput, CPPPrelude::UniformEntryPointParams* uniformEntryPointParams, UniformState* uniformState);
+ typedef void(*GroupRangeFunc)(CPPPrelude::GroupComputeVaryingInput* varyingInput, CPPPrelude::UniformEntryPointParams* uniformEntryPointParams, UniformState* uniformState);
slang::EntryPointReflection* entryPoint = nullptr;
Func func = nullptr;
Func groupFunc = nullptr;
+ GroupRangeFunc groupRangeFunc = nullptr;
{
auto entryPointCount = reflection->getEntryPointCount();
SLANG_ASSERT(entryPointCount == 1);
@@ -326,27 +328,58 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount)
const char* entryPointName = entryPoint->getName();
func = (Func)sharedLibrary->findFuncByName(entryPointName);
- StringBuilder groupEntryPointName;
- groupEntryPointName << entryPointName << "_Group";
+ {
+ StringBuilder groupEntryPointName;
+ groupEntryPointName << entryPointName << "_Group";
+
+ groupFunc = (Func)sharedLibrary->findFuncByName(groupEntryPointName.getBuffer());
+ }
- groupFunc = (Func)sharedLibrary->findFuncByName(groupEntryPointName.getBuffer());
+ {
+ StringBuilder groupRangeEntryPointName;
+ groupRangeEntryPointName << entryPointName << "_GroupRange";
+
+ groupRangeFunc = (GroupRangeFunc)sharedLibrary->findFuncByName(groupRangeEntryPointName.getBuffer());
+ }
- if (func == nullptr && groupFunc == nullptr)
+ if (func == nullptr && groupFunc == nullptr && groupRangeFunc == nullptr)
{
return SLANG_FAIL;
}
}
// If we have the group function, that's the faster way to execute all threads in group...
- if (groupFunc)
+ if (groupRangeFunc)
{
UniformState* uniformState = (UniformState*)context.binding.m_rootBuffer.m_data;
CPPPrelude::UniformEntryPointParams* uniformEntryPointParams = (CPPPrelude::UniformEntryPointParams*)context.binding.m_entryPointBuffer.m_data;
+ CPPPrelude::GroupComputeVaryingInput varying;
+ varying.startGroupID = {};
+ varying.endGroupID = { dispatchSize[0], dispatchSize[1], dispatchSize[2] };
+
+ groupRangeFunc(&varying, uniformEntryPointParams, uniformState);
+ }
+ else if (groupFunc)
+ {
CPPPrelude::ComputeVaryingInput varying;
- varying.groupID = {};
- groupFunc(&varying, uniformEntryPointParams, uniformState);
+ for (uint32_t groupZ = 0; groupZ < dispatchSize[2]; ++groupZ)
+ {
+ for (uint32_t groupY = 0; groupY < dispatchSize[1]; ++groupY)
+ {
+ for (uint32_t groupX = 0; groupX < dispatchSize[0]; ++groupX)
+ {
+ UniformState* uniformState = (UniformState*)context.binding.m_rootBuffer.m_data;
+ CPPPrelude::UniformEntryPointParams* uniformEntryPointParams = (CPPPrelude::UniformEntryPointParams*)context.binding.m_entryPointBuffer.m_data;
+
+ varying.groupID = {groupX, groupY, groupZ};
+
+ groupFunc(&varying, uniformEntryPointParams, uniformState);
+ }
+ }
+ }
+
}
else
{
@@ -359,19 +392,29 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount)
CPPPrelude::UniformEntryPointParams* uniformEntryPointParams = (CPPPrelude::UniformEntryPointParams*)context.binding.m_entryPointBuffer.m_data;
CPPPrelude::ComputeVaryingInput varying;
- varying.groupID = {};
- for (int z = 0; z < int(numThreadsPerAxis[2]); ++z)
+ for (uint32_t groupZ = 0; groupZ < dispatchSize[2]; ++groupZ)
{
- varying.groupThreadID.z = z;
- for (int y = 0; y < int(numThreadsPerAxis[1]); ++y)
+ for (uint32_t groupY = 0; groupY < dispatchSize[1]; ++groupY)
{
- varying.groupThreadID.y = y;
- for (int x = 0; x < int(numThreadsPerAxis[0]); ++x)
+ for (uint32_t groupX = 0; groupX < dispatchSize[0]; ++groupX)
{
- varying.groupThreadID.x = x;
+ varying.groupID = {groupX, groupY, groupZ};
- func(&varying, uniformEntryPointParams, uniformState);
+ for (int z = 0; z < int(numThreadsPerAxis[2]); ++z)
+ {
+ varying.groupThreadID.z = z;
+ for (int y = 0; y < int(numThreadsPerAxis[1]); ++y)
+ {
+ varying.groupThreadID.y = y;
+ for (int x = 0; x < int(numThreadsPerAxis[0]); ++x)
+ {
+ varying.groupThreadID.x = x;
+
+ func(&varying, uniformEntryPointParams, uniformState);
+ }
+ }
+ }
}
}
}
diff --git a/tools/render-test/cpu-compute-util.h b/tools/render-test/cpu-compute-util.h
index cbc4e6e58..b30ef146b 100644
--- a/tools/render-test/cpu-compute-util.h
+++ b/tools/render-test/cpu-compute-util.h
@@ -29,7 +29,7 @@ struct CPUComputeUtil
static SlangResult calcBindings(const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& outContext);
- static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& outContext);
+ static SlangResult execute(const uint32_t dispatchSize[3], const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& outContext);
static SlangResult writeBindings(const ShaderInputLayout& layout, const List<CPUMemoryBinding::Buffer>& buffers, const Slang::String& fileName);
};
diff --git a/tools/render-test/options.cpp b/tools/render-test/options.cpp
index 1cf0ffbe8..d2f21a5d9 100644
--- a/tools/render-test/options.cpp
+++ b/tools/render-test/options.cpp
@@ -179,6 +179,34 @@ SlangResult parseOptions(int argc, const char*const* argv, Slang::WriterHelper s
gOptions.adapter = *argCursor++;
}
+ else if (strcmp(arg, "-compute-dispatch") == 0)
+ {
+ if (argCursor == argEnd)
+ {
+ stdError.print("error: comma separated compute dispatch size for '%s'\n", arg);
+ return SLANG_FAIL;
+ }
+ List<UnownedStringSlice> slices;
+ StringUtil::split(UnownedStringSlice(*argCursor++), ',', slices);
+ if (slices.getCount() != 3)
+ {
+ stdError.print("error: expected 3 comma separated integers for compute dispatch size for '%s'\n", arg);
+ return SLANG_FAIL;
+ }
+
+ String string;
+ for (Index i = 0; i < 3; ++i)
+ {
+ string = slices[i];
+ int v = StringToInt(string);
+ if (v < 1)
+ {
+ stdError.print("error: expected 3 comma positive integers for compute dispatch size for '%s'\n", arg);
+ return SLANG_FAIL;
+ }
+ gOptions.computeDispatchSize[i] = v;
+ }
+ }
else
{
// Lookup
diff --git a/tools/render-test/options.h b/tools/render-test/options.h
index a57c94ed0..67eae6603 100644
--- a/tools/render-test/options.h
+++ b/tools/render-test/options.h
@@ -64,6 +64,8 @@ struct Options
Slang::List<Slang::CommandLine::Arg> compileArgs;
Slang::String adapter; ///< The adapter to use either name or index
+
+ uint32_t computeDispatchSize[3] = { 1, 1, 1 };
};
extern Options gOptions;
diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp
index 0e457f9e4..2a0b9a6c9 100644
--- a/tools/render-test/render-test-main.cpp
+++ b/tools/render-test/render-test-main.cpp
@@ -232,7 +232,7 @@ void RenderTestApp::runCompute()
auto pipelineType = PipelineType::Compute;
m_renderer->setPipelineState(pipelineType, m_pipelineState);
m_bindingState->apply(m_renderer, pipelineType);
- m_renderer->dispatchCompute(1, 1, 1);
+ m_renderer->dispatchCompute(m_options.computeDispatchSize[0], m_options.computeDispatchSize[1], m_options.computeDispatchSize[2]);
}
void RenderTestApp::finalize()
@@ -461,7 +461,7 @@ SLANG_TEST_TOOL_API SlangResult innerMain(Slang::StdWriters* stdWriters, SlangSe
CPUComputeUtil::Context context;
SLANG_RETURN_ON_FAIL(CPUComputeUtil::calcBindings(compilationAndLayout, context));
- SLANG_RETURN_ON_FAIL(CPUComputeUtil::execute(compilationAndLayout, context));
+ SLANG_RETURN_ON_FAIL(CPUComputeUtil::execute(gOptions.computeDispatchSize, compilationAndLayout, context));
// Dump everything out that was written
return CPUComputeUtil::writeBindings(compilationAndLayout.layout, context.buffers, gOptions.outputPath);