summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2019-09-23 15:38:25 -0400
committerGitHub <noreply@github.com>2019-09-23 15:38:25 -0400
commit05af41d21d74d24871507e6f8f50574ea08c48a2 (patch)
tree3197b021ed71c40f6035fdfa7d450b4b3b945422 /tests
parentede0792fd9b4c7bc5c2653092ba1d492e67ca190 (diff)
Simple test profiling (#1062)
* First pass support for performance profiling * Test across all elements * Fix bug - sourceContents is not used, should use rawSource. * * Add ability to get prelude from API. * Allow specifying source language for render-test * Made it possible to compile a test input file as C++ * Special handling for reflection * Added C++ impl to performance-profile.slang * Remove some clang warnings. * Output profile timings on appveyor and other TC. * Remove passing around of StdWriters (can use global). Small comment improvements.
Diffstat (limited to 'tests')
-rw-r--r--tests/compute/performance-profile.slang52
1 files changed, 52 insertions, 0 deletions
diff --git a/tests/compute/performance-profile.slang b/tests/compute/performance-profile.slang
new file mode 100644
index 000000000..6ec8ecd94
--- /dev/null
+++ b/tests/compute/performance-profile.slang
@@ -0,0 +1,52 @@
+//TEST(compute):PERFORMANCE_PROFILE:-cpu -compute -compile-arg -O3 -compute-dispatch 256,1,1
+//TEST(compute):PERFORMANCE_PROFILE:-cpu -compute -source-language cpp -compile-arg -O3 -compute-dispatch 256,1,1
+//TEST(compute):PERFORMANCE_PROFILE:-slang -compute -compute-dispatch 256,1,1
+//TEST(compute):PERFORMANCE_PROFILE:-slang -compute -dx12 -compute-dispatch 256,1,1
+//TEST(compute, vulkan):PERFORMANCE_PROFILE:-vk -compute -compute-dispatch 256,1,1
+
+//TEST_INPUT:ubuffer(random(float, 4096, -1, 1), stride=4):dxbinding(0),glbinding(0),out,name outputBuffer
+
+#ifndef __cplusplus
+
+RWStructuredBuffer<float> outputBuffer;
+
+[numthreads(16, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ uint i = dispatchThreadID.x;
+ float v = outputBuffer[i];
+ v = v < 0.0f ? (v * v) : (v + v + v);
+ outputBuffer[i] = v;
+}
+
+
+#else
+
+namespace { // anonymous
+
+struct LocalUniformState
+{
+ RWStructuredBuffer<float> outputBuffer_0;
+};
+
+} // anonymous
+
+static void _calc(const RWStructuredBuffer<float>& buf, int start, int end)
+{
+ assert(start >= 0 && end <= buf.count);
+ float* data = buf.data;
+
+ for (int i = start; i < end; ++i)
+ {
+ float v = data[i];
+ data[i] = v < 0.0f ? (v * v) : (v + v + v);
+ }
+}
+
+SLANG_PRELUDE_EXPORT
+void computeMain(ComputeVaryingInput* varyingInput, UniformEntryPointParams* params, LocalUniformState* uniformState)
+{
+ _calc(uniformState->outputBuffer_0, varyingInput->startGroupID.x * 16, varyingInput->endGroupID.x * 16);
+}
+
+#endif \ No newline at end of file