CPU Performance/Testing improvements (#1055)

* First pass of render-test refactor. * Make window construction a function that can choose an implementation. * Remove OpenGL as currently has windows dependency. * Disable Vulkan as Renderer impl has dependency on windows. * Pass Window in as parameter of 'update'. * Add win-window.cpp as was missing. * Fix warning on windows about signs during comparison. * * Added mechanism to add random arrays as buffer inputs and select type * Improved RenderGenerator to generate more types, and to be more careful around int32 ranges. * Added support for security checks (for Visual Studio C++) * Disable Execption handling being on by default when compiling kernels * Added a 'Group' version of the entry point that will evaluate all threads in a group in a single call. In test code use this method if available. * Added -compile-arg to be able to pass arguments to the compile within render-test * Add documention for the _Group execution feature. * Fix some typos in cpu-target.md
author: jsmall-nvidia <jsmall@nvidia.com> 2019-09-16 09:38:21 -0400
committer: GitHub <noreply@github.com> 2019-09-16 09:38:21 -0400
commit: 40d8f3aeedf018c7c6766e98ec64733abd90671e (patch)
tree: 0c9cae7bc88d4344dd53596a88c3ce9918f2df13 /tools/render-test/cpu-compute-util.cpp
parent: c2e5d2468ad6a38cdb8a067da0678302f6cc6066 (diff)
1 files changed, 32 insertions, 13 deletions
diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp
index 85a8fb1b0..4294ad539 100644
--- a/tools/render-test/cpu-compute-util.cpp
+++ b/tools/render-test/cpu-compute-util.cpp
@@ -316,6 +316,7 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount)
 
     slang::EntryPointReflection* entryPoint = nullptr;
     Func func = nullptr;
+    Func groupFunc = nullptr;
     {
         auto entryPointCount = reflection->getEntryPointCount();
         SLANG_ASSERT(entryPointCount == 1);
@@ -325,15 +326,19 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount)
         const char* entryPointName = entryPoint->getName();
         func = (Func)sharedLibrary->findFuncByName(entryPointName);
 
-        if (!func)
+        StringBuilder groupEntryPointName;
+        groupEntryPointName << entryPointName << "_Group";
+
+        groupFunc = (Func)sharedLibrary->findFuncByName(groupEntryPointName.getBuffer());
+
+        if (func == nullptr && groupFunc == nullptr)
         {
             return SLANG_FAIL;
         }
     }
 
-    SlangUInt numThreadsPerAxis[3];
-    entryPoint->getComputeThreadGroupSize(3, numThreadsPerAxis);
-
+    // If we have the group function, that's the faster way to execute all threads in group...
+    if (groupFunc)
     {
         UniformState* uniformState = (UniformState*)context.binding.m_rootBuffer.m_data;
         CPPPrelude::UniformEntryPointParams* uniformEntryPointParams = (CPPPrelude::UniformEntryPointParams*)context.binding.m_entryPointBuffer.m_data;
@@ -341,17 +346,33 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount)
         CPPPrelude::ComputeVaryingInput varying;
         varying.groupID = {};
 
-        for (int z = 0; z < int(numThreadsPerAxis[2]); ++z)
+        groupFunc(&varying, uniformEntryPointParams, uniformState);
+    }
+    else
+    {
+        // We can also fire off each thread individually
+        SlangUInt numThreadsPerAxis[3];
+        entryPoint->getComputeThreadGroupSize(3, numThreadsPerAxis);
+
         {
-            varying.groupThreadID.z = z;
-            for (int y = 0; y < int(numThreadsPerAxis[1]); ++y)
+            UniformState* uniformState = (UniformState*)context.binding.m_rootBuffer.m_data;
+            CPPPrelude::UniformEntryPointParams* uniformEntryPointParams = (CPPPrelude::UniformEntryPointParams*)context.binding.m_entryPointBuffer.m_data;
+
+            CPPPrelude::ComputeVaryingInput varying;
+            varying.groupID = {};
+
+            for (int z = 0; z < int(numThreadsPerAxis[2]); ++z)
             {
-                varying.groupThreadID.y = y;
-                for (int x = 0; x < int(numThreadsPerAxis[0]); ++x)
+                varying.groupThreadID.z = z;
+                for (int y = 0; y < int(numThreadsPerAxis[1]); ++y)
                 {
-                    varying.groupThreadID.x = x;
+                    varying.groupThreadID.y = y;
+                    for (int x = 0; x < int(numThreadsPerAxis[0]); ++x)
+                    {
+                        varying.groupThreadID.x = x;
 
-                    func(&varying, uniformEntryPointParams, uniformState);
+                        func(&varying, uniformEntryPointParams, uniformState);
+                    }
                 }
             }
         }
@@ -360,6 +381,4 @@ static CPUComputeUtil::Resource* _newOneTexture2D(int elemCount)
     return SLANG_OK;
 }
 
-
-
 } // renderer_test
author	jsmall-nvidia <jsmall@nvidia.com>	2019-09-16 09:38:21 -0400
committer	GitHub <noreply@github.com>	2019-09-16 09:38:21 -0400
commit	40d8f3aeedf018c7c6766e98ec64733abd90671e (patch)
tree	0c9cae7bc88d4344dd53596a88c3ce9918f2df13 /tools/render-test/cpu-compute-util.cpp
parent	c2e5d2468ad6a38cdb8a067da0678302f6cc6066 (diff)