From cae5ddd4a2c9343ec7367c9049c5cc0c8628a9c4 Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Wed, 8 Jan 2020 11:09:20 -0500 Subject: Setup of runtime cuda device (#1162) * CUDA generated first test compiles. * WIP on enabling CUDA in render-test. * Detect CUDA_PATH environmental variable to build build cuda support into render-test. Added WIP cuda-compute-util.cpp/h Added CUDA as a renderer type. * Fix libraries needed for cuda in premake. * Added -enable-cuda premake option. Defaults to false. * Creates CUDA device, loads PTX and finds entry point. * Fix some erroneous cruft from slang-cuda-prelude.h --- prelude/slang-cuda-prelude.h | 2 +- premake5.lua | 31 ++++ source/core/slang-render-api-util.cpp | 6 + source/core/slang-render-api-util.h | 2 + source/slang/slang-emit-cuda.cpp | 2 +- tools/gfx/render.cpp | 2 + tools/gfx/render.h | 4 +- tools/render-test/cpu-compute-util.h | 2 +- tools/render-test/cuda/cuda-compute-util.cpp | 206 +++++++++++++++++++++++++++ tools/render-test/cuda/cuda-compute-util.h | 21 +++ tools/render-test/options.cpp | 5 +- tools/render-test/render-test-main.cpp | 54 +++++-- tools/render-test/slang-support.h | 13 ++ tools/slang-test/slang-test-main.cpp | 5 + 14 files changed, 341 insertions(+), 14 deletions(-) create mode 100644 tools/render-test/cuda/cuda-compute-util.cpp create mode 100644 tools/render-test/cuda/cuda-compute-util.h diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index ac299eac0..4d4681baf 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -1,5 +1,5 @@ -#line 1 "slang-cuda-prelude.h" +// For now we'll disable any asserts in this prelude #define SLANG_PRELUDE_ASSERT(x) template diff --git a/premake5.lua b/premake5.lua index 1905bd2f4..4de256321 100644 --- a/premake5.lua +++ b/premake5.lua @@ -84,10 +84,26 @@ newoption { allowed = { { "true", "True"}, { "false", "False" } } } +newoption { + trigger = "enable-cuda", + description = "(Optional) If true will enable cuda tests, if CUDA is found via CUDA_PATH", + value = "bool", + default = "false", + allowed = { { "true", "True"}, { "false", "False" } } +} + buildLocation = _OPTIONS["build-location"] executeBinary = (_OPTIONS["execute-binary"] == "true") targetDetail = _OPTIONS["target-detail"] buildGlslang = (_OPTIONS["build-glslang"] == "true") +enableCuda = (_OPTIONS["enable-cuda"] == "true") + +-- cudaPath is only set if cuda is enabled, and CUDA_PATH enviromental variable is set +cudaPath = nil +if enableCuda then + -- Get the CUDA path from the environment variable. If set, CUDA will be assumed installed + cudaPath = os.getenv("CUDA_PATH") +end -- Is true when the target is really windows (ie not something on top of windows like cygwin) local isTargetWindows = (os.target() == "windows") and not (targetDetail == "mingw" or targetDetail == "cygwin") @@ -529,6 +545,21 @@ toolSharedLibrary "render-test" -- d3dcompiler_47.dll is copied from the external/slang-binaries submodule. postbuildcommands { '"$(SolutionDir)tools\\copy-hlsl-libs.bat" "$(WindowsSdkDir)Redist/D3D/%{cfg.platform:lower()}/" "%{cfg.targetdir}/" "windows-%{cfg.platform:lower()}"'} end + + if type(cudaPath) == "string" and isTargetWindows then + addSourceDir "tools/render-test/cuda" + defines { "RENDER_TEST_CUDA" } + includedirs { cudaPath .. "/include" } + includedirs { cudaPath .. "/include", cudaPath .. "/common/inc" } + + filter { "platforms:x86" } + libdirs { cudaPath .. "/lib/Win32/" } + + filter { "platforms:x64" } + libdirs { cudaPath .. "/lib/x64/" } + + links { "cuda", "cudart" } + end -- -- `gfx` is a utility library for doing GPU rendering diff --git a/source/core/slang-render-api-util.cpp b/source/core/slang-render-api-util.cpp index a9339c14e..960537a0b 100644 --- a/source/core/slang-render-api-util.cpp +++ b/source/core/slang-render-api-util.cpp @@ -18,6 +18,7 @@ namespace Slang { { RenderApiType::D3D12, "dx12,d3d12", ""}, { RenderApiType::D3D11, "dx11,d3d11", "hlsl,hlsl-rewrite,slang"}, { RenderApiType::CPU, "cpu", ""}, + { RenderApiType::CUDA, "cuda", "cuda,ptx"}, }; static int _calcAvailableApis() @@ -268,6 +269,11 @@ static bool _canLoadSharedLibrary(const char* libName) case RenderApiType::D3D11: return _canLoadSharedLibrary("d3d11"); case RenderApiType::D3D12: return _canLoadSharedLibrary("d3d12"); case RenderApiType::CPU: return true; + case RenderApiType::CUDA: + { + // We'll assume it's available, and if not trying to create it will detect it + return true; + } default: break; } #elif SLANG_UNIX_FAMILY diff --git a/source/core/slang-render-api-util.h b/source/core/slang-render-api-util.h index 48b599653..b028d3996 100644 --- a/source/core/slang-render-api-util.h +++ b/source/core/slang-render-api-util.h @@ -16,6 +16,7 @@ enum class RenderApiType D3D12, D3D11, CPU, + CUDA, CountOf, }; @@ -29,6 +30,7 @@ struct RenderApiFlag D3D12 = 1 << int(RenderApiType::D3D12), D3D11 = 1 << int(RenderApiType::D3D11), CPU = 1 << int(RenderApiType::CPU), + CUDA = 1 << int(RenderApiType::CUDA), AllOf = (1 << int(RenderApiType::CountOf)) - 1 ///< All bits set }; }; diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp index f2c9a1e80..980e94a29 100644 --- a/source/slang/slang-emit-cuda.cpp +++ b/source/slang/slang-emit-cuda.cpp @@ -270,7 +270,7 @@ void CUDASourceEmitter::emitEntryPointAttributesImpl(IRFunc* irFunc, IREntryPoin m_writer->emit(")]\n"); #endif - m_writer->emit("__global__ "); + m_writer->emit("extern \"C\" __global__ "); break; } diff --git a/tools/gfx/render.cpp b/tools/gfx/render.cpp index 785343927..43a255817 100644 --- a/tools/gfx/render.cpp +++ b/tools/gfx/render.cpp @@ -75,6 +75,7 @@ const Resource::DescBase& Resource::getDescBase() const BindingStyle::OpenGl, // OpenGl, BindingStyle::Vulkan, // Vulkan BindingStyle::CPU, // CPU + BindingStyle::CUDA, // CUDA }; /* static */void RendererUtil::compileTimeAsserts() @@ -405,6 +406,7 @@ ProjectionStyle RendererUtil::getProjectionStyle(RendererType type) case RendererType::Vulkan: return UnownedStringSlice::fromLiteral("Vulkan"); case RendererType::Unknown: return UnownedStringSlice::fromLiteral("Unknown"); case RendererType::CPU: return UnownedStringSlice::fromLiteral("CPU"); + case RendererType::CUDA: return UnownedStringSlice::fromLiteral("CUDA"); default: return UnownedStringSlice::fromLiteral("?!?"); } } diff --git a/tools/gfx/render.h b/tools/gfx/render.h index 65f3c00c0..a4d042a9a 100644 --- a/tools/gfx/render.h +++ b/tools/gfx/render.h @@ -68,13 +68,14 @@ enum class RendererType OpenGl, Vulkan, CPU, + CUDA, CountOf, }; enum class ProjectionStyle { Unknown, - OpenGl, + OpenGl, DirectX, Vulkan, CountOf, @@ -88,6 +89,7 @@ enum class BindingStyle OpenGl, Vulkan, CPU, + CUDA, CountOf, }; diff --git a/tools/render-test/cpu-compute-util.h b/tools/render-test/cpu-compute-util.h index 9430eb841..179985f6f 100644 --- a/tools/render-test/cpu-compute-util.h +++ b/tools/render-test/cpu-compute-util.h @@ -64,4 +64,4 @@ struct CPUComputeUtil } // renderer_test -#endif //CPU_MEMORY_BINDING_H +#endif //CPU_COMPUTE_UTIL_H diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp new file mode 100644 index 000000000..138f842b4 --- /dev/null +++ b/tools/render-test/cuda/cuda-compute-util.cpp @@ -0,0 +1,206 @@ + +#include "cuda-compute-util.h" + +#include "../../slang-com-helper.h" + +#include "../../source/core/slang-std-writers.h" +#include "../../source/core/slang-token-reader.h" + +#include +#include + +namespace renderer_test { +using namespace Slang; + +#define SLANG_CUDA_RETURN_ON_FAIL(x) { int _res = (int)(x); if (_res != 0) return SLANG_FAIL; } + +static int _calcSMCountPerMultiProcessor(int major, int minor) +{ + // Defines for GPU Architecture types (using the SM version to determine + // the # of cores per SM + struct SMInfo + { + int sm; // 0xMm (hexadecimal notation), M = SM Major version, and m = SM minor version + int coreCount; + }; + + static const SMInfo infos[] = + { + {0x30, 192}, + {0x32, 192}, + {0x35, 192}, + {0x37, 192}, + {0x50, 128}, + {0x52, 128}, + {0x53, 128}, + {0x60, 64}, + {0x61, 128}, + {0x62, 128}, + {0x70, 64}, + {0x72, 64}, + {0x75, 64} + }; + + const int sm = ((major << 4) + minor); + for (Index i = 0; i < SLANG_COUNT_OF(infos); ++i) + { + if (infos[i].sm == sm) + { + return infos[i].coreCount; + } + } + + const auto& last = infos[SLANG_COUNT_OF(infos) - 1]; + + // It must be newer presumably + SLANG_ASSERT(sm > last.coreCount ); + + // Default to the last entry + return last.coreCount; +} + +static SlangResult _findMaxFlopsDeviceId(int* outDevice) +{ + int smPerMultiproc = 0; + int maxPerfDevice = -1; + int deviceCount = 0; + int devicesProhibited = 0; + + uint64_t maxComputePerf = 0; + SLANG_CUDA_RETURN_ON_FAIL(cudaGetDeviceCount(&deviceCount)); + + // Find the best CUDA capable GPU device + for (int currentDevice = 0; currentDevice < deviceCount; ++currentDevice) + { + int computeMode = -1, major = 0, minor = 0; + SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, currentDevice)); + SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, currentDevice)); + SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, currentDevice)); + + // If this GPU is not running on Compute Mode prohibited, + // then we can add it to the list + if (computeMode != cudaComputeModeProhibited) + { + if (major == 9999 && minor == 9999) + { + smPerMultiproc = 1; + } + else + { + smPerMultiproc = _calcSMCountPerMultiProcessor(major, minor); + } + + int multiProcessorCount = 0, clockRate = 0; + SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&multiProcessorCount, cudaDevAttrMultiProcessorCount, currentDevice)); + SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, currentDevice)); + uint64_t compute_perf = uint64_t(multiProcessorCount) * smPerMultiproc * clockRate; + + if (compute_perf > maxComputePerf) + { + maxComputePerf = compute_perf; + maxPerfDevice = currentDevice; + } + } + else + { + devicesProhibited++; + } + } + + if (maxPerfDevice < 0) + { + return SLANG_FAIL; + } + + *outDevice = maxPerfDevice; + return SLANG_OK; +} + +static SlangResult _initCuda() +{ + static CUresult res = cuInit(0); + SLANG_CUDA_RETURN_ON_FAIL(res); + + return SLANG_OK; +} + + + +/* static */SlangResult _createDevice(CUcontext* outContext) +{ + SLANG_RETURN_ON_FAIL(_initCuda()); + + int deviceId; + SLANG_RETURN_ON_FAIL(_findMaxFlopsDeviceId(&deviceId)); + SLANG_CUDA_RETURN_ON_FAIL(cudaSetDevice(deviceId)); + + CUcontext context; + + // Create context + SLANG_CUDA_RETURN_ON_FAIL(cuCtxCreate(&context, 0, deviceId)); + + *outContext = context; + return SLANG_OK; +} + +/* static */bool CUDAComputeUtil::canCreateDevice() +{ + CUcontext context; + if (SLANG_SUCCEEDED(_createDevice(&context))) + { + cuCtxDestroy(context); + return true; + } + + return false; +} + +static SlangResult _compute(CUcontext context, CUmodule module, const ShaderCompilerUtil::OutputAndLayout& outputAndLayout) +{ + auto request = outputAndLayout.output.request; + auto reflection = (slang::ShaderReflection*) spGetReflection(request); + + slang::EntryPointReflection* entryPoint = nullptr; + auto entryPointCount = reflection->getEntryPointCount(); + SLANG_ASSERT(entryPointCount == 1); + + entryPoint = reflection->getEntryPointByIndex(0); + + const char* entryPointName = entryPoint->getName(); + + // Get the entry point + CUfunction kernel; + + SLANG_CUDA_RETURN_ON_FAIL(cuModuleGetFunction(&kernel, module, entryPointName)); + + + return SLANG_OK; +} + +/* static */SlangResult CUDAComputeUtil::execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout) +{ + CUcontext context; + SLANG_RETURN_ON_FAIL(_createDevice(&context)); + + const Index index = outputAndLayout.output.findKernelDescIndex(StageType::Compute); + if (index < 0) + { + return SLANG_FAIL; + } + + const auto& kernel = outputAndLayout.output.kernelDescs[index]; + + CUmodule module = 0; + SLANG_CUDA_RETURN_ON_FAIL(cuModuleLoadData(&module, kernel.codeBegin)); + + SLANG_RETURN_ON_FAIL(_compute(context, module, outputAndLayout)); + + SLANG_CUDA_RETURN_ON_FAIL(cuModuleUnload(module)); + + cuCtxDestroy(context); + + return SLANG_OK; +} + + +} // renderer_test diff --git a/tools/render-test/cuda/cuda-compute-util.h b/tools/render-test/cuda/cuda-compute-util.h new file mode 100644 index 000000000..9c7d83b1f --- /dev/null +++ b/tools/render-test/cuda/cuda-compute-util.h @@ -0,0 +1,21 @@ +#ifndef CUDA_COMPUTE_UTIL_H +#define CUDA_COMPUTE_UTIL_H + +#include "../slang-support.h" +#include "../options.h" + +#include "../../source/core/slang-smart-pointer.h" + +namespace renderer_test { + +struct CUDAComputeUtil +{ + static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout); + + static bool canCreateDevice(); +}; + + +} // renderer_test + +#endif //CPU_MEMORY_BINDING_H diff --git a/tools/render-test/options.cpp b/tools/render-test/options.cpp index a614336e7..4d5d83ce5 100644 --- a/tools/render-test/options.cpp +++ b/tools/render-test/options.cpp @@ -25,11 +25,12 @@ static gfx::RendererType _toRenderType(Slang::RenderApiType apiType) using namespace Slang; switch (apiType) { - case RenderApiType::D3D11: return gfx::RendererType::DirectX11; - case RenderApiType::D3D12: return gfx::RendererType::DirectX12; + case RenderApiType::D3D11: return gfx::RendererType::DirectX11; + case RenderApiType::D3D12: return gfx::RendererType::DirectX12; case RenderApiType::OpenGl: return gfx::RendererType::OpenGl; case RenderApiType::Vulkan: return gfx::RendererType::Vulkan; case RenderApiType::CPU: return gfx::RendererType::CPU; + case RenderApiType::CUDA: return gfx::RendererType::CUDA; default: return gfx::RendererType::Unknown; } } diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp index 1355402cf..a110b6ca9 100644 --- a/tools/render-test/render-test-main.cpp +++ b/tools/render-test/render-test-main.cpp @@ -25,6 +25,10 @@ #include "cpu-compute-util.h" +#if RENDER_TEST_CUDA +# include "cuda/cuda-compute-util.h" +#endif + namespace renderer_test { using Slang::Result; @@ -461,6 +465,13 @@ SLANG_TEST_TOOL_API SlangResult innerMain(Slang::StdWriters* stdWriters, SlangSe nativeLanguage = SLANG_SOURCE_LANGUAGE_CPP; slangPassThrough = SLANG_PASS_THROUGH_GENERIC_C_CPP; break; + case RendererType::CUDA: + input.target = SLANG_PTX; + input.profile = ""; + nativeLanguage = SLANG_SOURCE_LANGUAGE_CUDA; + slangPassThrough = SLANG_PASS_THROUGH_NVRTC; + break; + default: fprintf(stderr, "error: unexpected\n"); return SLANG_FAIL; @@ -502,20 +513,30 @@ SLANG_TEST_TOOL_API SlangResult innerMain(Slang::StdWriters* stdWriters, SlangSe rendererName << "'" << gOptions.adapter << "'"; } - // If it's CPU testing we don't need a window or a renderer - if (gOptions.rendererType == RendererType::CPU) + if (gOptions.onlyStartup) { - if (gOptions.onlyStartup) + switch (gOptions.rendererType) { - // Need generic C/C++ - if (SLANG_FAILED(spSessionCheckPassThroughSupport(session, SLANG_PASS_THROUGH_GENERIC_C_CPP))) + case RendererType::CUDA: { +#if RENDER_TEST_CUDA + return SLANG_SUCCEEDED(spSessionCheckPassThroughSupport(session, SLANG_PASS_THROUGH_NVRTC)) && CUDAComputeUtil::canCreateDevice() ? SLANG_OK : SLANG_FAIL; +#else return SLANG_FAIL; +#endif + } + case RendererType::CPU: + { + // As long as we have CPU, then this should work + return spSessionCheckPassThroughSupport(session, SLANG_PASS_THROUGH_GENERIC_C_CPP); } - // Should work ... - return SLANG_OK; + default: break; } + } + // If it's CPU testing we don't need a window or a renderer + if (gOptions.rendererType == RendererType::CPU) + { ShaderCompilerUtil::OutputAndLayout compilationAndLayout; SLANG_RETURN_ON_FAIL(ShaderCompilerUtil::compileWithLayout(session, gOptions.sourcePath, gOptions.compileArgs, gOptions.shaderType, input, compilationAndLayout)); @@ -564,10 +585,27 @@ SLANG_TEST_TOOL_API SlangResult innerMain(Slang::StdWriters* stdWriters, SlangSe } } - return SLANG_OK; } + if (gOptions.rendererType == RendererType::CUDA) + { + ShaderCompilerUtil::OutputAndLayout compilationAndLayout; + SLANG_RETURN_ON_FAIL(ShaderCompilerUtil::compileWithLayout(session, gOptions.sourcePath, gOptions.compileArgs, gOptions.shaderType, input, compilationAndLayout)); + +#if RENDER_TEST_CUDA + + // TODO(JS): + // We don't know how to execute it yet.. + + SLANG_RETURN_ON_FAIL(CUDAComputeUtil::execute(compilationAndLayout)); + + return SLANG_OK; +#else + return SLANG_FAIL; +#endif + } + Slang::RefPtr renderer; { RendererUtil::CreateFunc createFunc = RendererUtil::getCreateFunc(gOptions.rendererType); diff --git a/tools/render-test/slang-support.h b/tools/render-test/slang-support.h index cd3223c87..97b85ff8f 100644 --- a/tools/render-test/slang-support.h +++ b/tools/render-test/slang-support.h @@ -55,6 +55,19 @@ struct ShaderCompilerUtil spDestroyCompileRequest(request); } } + + Slang::Index findKernelDescIndex(gfx::StageType stage) const + { + for (Slang::Index i = 0; i < kernelDescs.getCount(); ++i) + { + if (kernelDescs[i].stage == stage) + { + return i; + } + } + return -1; + } + List kernelDescs; ShaderProgram::Desc desc; SlangCompileRequest* request = nullptr; diff --git a/tools/slang-test/slang-test-main.cpp b/tools/slang-test/slang-test-main.cpp index 6401ac852..9ccf6840f 100644 --- a/tools/slang-test/slang-test-main.cpp +++ b/tools/slang-test/slang-test-main.cpp @@ -670,6 +670,11 @@ static SlangResult _extractRenderTestRequirements(const CommandLine& cmdLine, Te nativeLanguage = SLANG_SOURCE_LANGUAGE_CPP; passThru = SLANG_PASS_THROUGH_GENERIC_C_CPP; break; + case RenderApiType::CUDA: + target = SLANG_PTX; + nativeLanguage = SLANG_SOURCE_LANGUAGE_CUDA; + passThru = SLANG_PASS_THROUGH_NVRTC; + break; } SlangSourceLanguage sourceLanguage = nativeLanguage; -- cgit v1.2.3