From b3e0b0d491c55bfdc1c40d26a421910103c1b9f2 Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Tue, 28 Jan 2020 12:41:09 -0500 Subject: Synthesizing CUDA tests (#1183) * When using setUniform clamp the amount of data written to the buffer size. * CUDA implement StructuredBuffer/ByteAddressBuffer as pointer/count as is on CPU. Allow bounds check to zero index. Update docs. * Synthesize tests. * Fix bug in CUDA output. * Fixing more tests to run on CUDA. * Added BaseType for layout of Vector and Matrix - as they are held as int32_t vector array types. * Enable unbound array support on CUDA. * Added unsized array support for CUDA documentation. --- tools/render-test/cuda/cuda-compute-util.cpp | 12 ++--- tools/render-test/cuda/cuda-compute-util.h | 5 +++ tools/render-test/render-test-main.cpp | 8 ++++ tools/slang-test/options.h | 2 +- tools/slang-test/slang-test-main.cpp | 67 +++++++++++++++++++++++----- 5 files changed, 78 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp index aa82d8d70..a50295063 100644 --- a/tools/render-test/cuda/cuda-compute-util.cpp +++ b/tools/render-test/cuda/cuda-compute-util.cpp @@ -572,13 +572,15 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp auto elementCount = int(typeLayout->getElementCount()); if (elementCount == 0) { - void** array = location.getUniform(); - // If set, we setup the data needed for array on CPU side - if (value && array) + CUDAComputeUtil::Array array = { nullptr, 0 }; + auto resource = CUDAResource::getCUDAResource(value); + if (resource) { - // TODO(JS): For now we'll just assume a pointer... - *array = CUDAResource::getCUDAData(value); + array.data = resource->m_cudaMemory; + array.count = value->m_elementCount; } + + location.setUniform(&array, sizeof(array)); } break; } diff --git a/tools/render-test/cuda/cuda-compute-util.h b/tools/render-test/cuda/cuda-compute-util.h index ea58b6343..f739ade91 100644 --- a/tools/render-test/cuda/cuda-compute-util.h +++ b/tools/render-test/cuda/cuda-compute-util.h @@ -21,6 +21,11 @@ struct CUDAComputeUtil void* data; size_t count; }; + struct Array + { + void* data; + size_t count; + }; struct Context { diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp index 050a6d2c8..16f2d78d1 100644 --- a/tools/render-test/render-test-main.cpp +++ b/tools/render-test/render-test-main.cpp @@ -600,9 +600,17 @@ SLANG_TEST_TOOL_API SlangResult innerMain(Slang::StdWriters* stdWriters, SlangSe #if RENDER_TEST_CUDA + const uint64_t startTicks = ProcessUtil::getClockTick(); + CUDAComputeUtil::Context context; SLANG_RETURN_ON_FAIL(CUDAComputeUtil::execute(compilationAndLayout, context)); + if (gOptions.performanceProfile) + { + const uint64_t endTicks = ProcessUtil::getClockTick(); + _outputProfileTime(startTicks, endTicks); + } + if (gOptions.outputPath) { // Dump everything out that was written diff --git a/tools/slang-test/options.h b/tools/slang-test/options.h index 12869a945..ffad16fdc 100644 --- a/tools/slang-test/options.h +++ b/tools/slang-test/options.h @@ -86,7 +86,7 @@ struct Options // OpenGL is disabled for now // CPU is disabled by default // CUDA is disabled by default - Slang::RenderApiFlags synthesizedTestApis = Slang::RenderApiFlag::AllOf & ~(Slang::RenderApiFlag::Vulkan | Slang::RenderApiFlag::OpenGl | Slang::RenderApiFlag::CPU | Slang::RenderApiFlag::CUDA); + Slang::RenderApiFlags synthesizedTestApis = Slang::RenderApiFlag::AllOf & ~(Slang::RenderApiFlag::Vulkan | Slang::RenderApiFlag::OpenGl | Slang::RenderApiFlag::CPU); // The adapter to use. If empty will match first found adapter. Slang::String adapter; diff --git a/tools/slang-test/slang-test-main.cpp b/tools/slang-test/slang-test-main.cpp index ac073bcee..a6ad0cff3 100644 --- a/tools/slang-test/slang-test-main.cpp +++ b/tools/slang-test/slang-test-main.cpp @@ -2514,22 +2514,61 @@ bool testPassesCategoryMask( static void _calcSynthesizedTests(TestContext* context, RenderApiType synthRenderApiType, const List& srcTests, List& ioSynthTests) { // Add the explicit parameter - for (const auto& testDetails: srcTests) + for (const auto& srcTest: srcTests) { - const auto& requirements = testDetails.requirements; + const auto& requirements = srcTest.requirements; // Render tests use renderApis... // If it's an explicit test, we don't synth from it now - // TODO(JS): Arguably we should synthesize from explicit tests. In principal we can remove the explicit api apply another - // although that may not always work. - if (requirements.usedRenderApiFlags == 0 || - requirements.explicitRenderApi != RenderApiType::Unknown) + // In the case of CUDA, we can only synth from a CPU source + if (synthRenderApiType == RenderApiType::CUDA) { - continue; + if (requirements.explicitRenderApi != RenderApiType::CPU) + { + continue; + } + + // If the source language is defined, and it's + + const Index index = srcTest.options.args.indexOf("-source-language"); + if (index >= 0) + { + // + const auto& language = srcTest.options.args[index + 1]; + SlangSourceLanguage sourceLanguage = DownstreamCompiler::getSourceLanguageFromName(language.getUnownedSlice()); + + bool isCrossCompile = true; + + switch (sourceLanguage) + { + case SLANG_SOURCE_LANGUAGE_GLSL: + case SLANG_SOURCE_LANGUAGE_C: + case SLANG_SOURCE_LANGUAGE_CPP: + { + isCrossCompile = false; + } + default: break; + } + + if (!isCrossCompile) + { + continue; + } + } + } + else + { + // TODO(JS): Arguably we should synthesize from explicit tests. In principal we can remove the explicit api apply another + // although that may not always work. + if (requirements.usedRenderApiFlags == 0 || + requirements.explicitRenderApi != RenderApiType::Unknown) + { + continue; + } } - TestDetails synthTestDetails(testDetails.options); + TestDetails synthTestDetails(srcTest.options); TestOptions& synthOptions = synthTestDetails.options; // Mark as synthesized @@ -2544,8 +2583,16 @@ static void _calcSynthesizedTests(TestContext* context, RenderApiType synthRende // If the target is vulkan remove the -hlsl option if (synthRenderApiType == RenderApiType::Vulkan) { - Index index = synthOptions.args.indexOf("-hlsl"); - if (index != Index(-1)) + const Index index = synthOptions.args.indexOf("-hlsl"); + if (index >= 0) + { + synthOptions.args.removeAt(index); + } + } + else if (synthRenderApiType == RenderApiType::CUDA) + { + const Index index = synthOptions.args.indexOf("-cpu"); + if (index >= 0) { synthOptions.args.removeAt(index); } -- cgit v1.2.3