summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-01-08 11:09:20 -0500
committerGitHub <noreply@github.com>2020-01-08 11:09:20 -0500
commitcae5ddd4a2c9343ec7367c9049c5cc0c8628a9c4 (patch)
treec8200a495f3c0bc5a841ce752fdfb13a73278faf /tools
parent17285faf9b4fe7f6c28b43972212068465bdb42e (diff)
Setup of runtime cuda device (#1162)
* CUDA generated first test compiles. * WIP on enabling CUDA in render-test. * Detect CUDA_PATH environmental variable to build build cuda support into render-test. Added WIP cuda-compute-util.cpp/h Added CUDA as a renderer type. * Fix libraries needed for cuda in premake. * Added -enable-cuda premake option. Defaults to false. * Creates CUDA device, loads PTX and finds entry point. * Fix some erroneous cruft from slang-cuda-prelude.h
Diffstat (limited to 'tools')
-rw-r--r--tools/gfx/render.cpp2
-rw-r--r--tools/gfx/render.h4
-rw-r--r--tools/render-test/cpu-compute-util.h2
-rw-r--r--tools/render-test/cuda/cuda-compute-util.cpp206
-rw-r--r--tools/render-test/cuda/cuda-compute-util.h21
-rw-r--r--tools/render-test/options.cpp5
-rw-r--r--tools/render-test/render-test-main.cpp54
-rw-r--r--tools/render-test/slang-support.h13
-rw-r--r--tools/slang-test/slang-test-main.cpp5
9 files changed, 300 insertions, 12 deletions
diff --git a/tools/gfx/render.cpp b/tools/gfx/render.cpp
index 785343927..43a255817 100644
--- a/tools/gfx/render.cpp
+++ b/tools/gfx/render.cpp
@@ -75,6 +75,7 @@ const Resource::DescBase& Resource::getDescBase() const
BindingStyle::OpenGl, // OpenGl,
BindingStyle::Vulkan, // Vulkan
BindingStyle::CPU, // CPU
+ BindingStyle::CUDA, // CUDA
};
/* static */void RendererUtil::compileTimeAsserts()
@@ -405,6 +406,7 @@ ProjectionStyle RendererUtil::getProjectionStyle(RendererType type)
case RendererType::Vulkan: return UnownedStringSlice::fromLiteral("Vulkan");
case RendererType::Unknown: return UnownedStringSlice::fromLiteral("Unknown");
case RendererType::CPU: return UnownedStringSlice::fromLiteral("CPU");
+ case RendererType::CUDA: return UnownedStringSlice::fromLiteral("CUDA");
default: return UnownedStringSlice::fromLiteral("?!?");
}
}
diff --git a/tools/gfx/render.h b/tools/gfx/render.h
index 65f3c00c0..a4d042a9a 100644
--- a/tools/gfx/render.h
+++ b/tools/gfx/render.h
@@ -68,13 +68,14 @@ enum class RendererType
OpenGl,
Vulkan,
CPU,
+ CUDA,
CountOf,
};
enum class ProjectionStyle
{
Unknown,
- OpenGl,
+ OpenGl,
DirectX,
Vulkan,
CountOf,
@@ -88,6 +89,7 @@ enum class BindingStyle
OpenGl,
Vulkan,
CPU,
+ CUDA,
CountOf,
};
diff --git a/tools/render-test/cpu-compute-util.h b/tools/render-test/cpu-compute-util.h
index 9430eb841..179985f6f 100644
--- a/tools/render-test/cpu-compute-util.h
+++ b/tools/render-test/cpu-compute-util.h
@@ -64,4 +64,4 @@ struct CPUComputeUtil
} // renderer_test
-#endif //CPU_MEMORY_BINDING_H
+#endif //CPU_COMPUTE_UTIL_H
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp
new file mode 100644
index 000000000..138f842b4
--- /dev/null
+++ b/tools/render-test/cuda/cuda-compute-util.cpp
@@ -0,0 +1,206 @@
+
+#include "cuda-compute-util.h"
+
+#include "../../slang-com-helper.h"
+
+#include "../../source/core/slang-std-writers.h"
+#include "../../source/core/slang-token-reader.h"
+
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+
+namespace renderer_test {
+using namespace Slang;
+
+#define SLANG_CUDA_RETURN_ON_FAIL(x) { int _res = (int)(x); if (_res != 0) return SLANG_FAIL; }
+
+static int _calcSMCountPerMultiProcessor(int major, int minor)
+{
+ // Defines for GPU Architecture types (using the SM version to determine
+ // the # of cores per SM
+ struct SMInfo
+ {
+ int sm; // 0xMm (hexadecimal notation), M = SM Major version, and m = SM minor version
+ int coreCount;
+ };
+
+ static const SMInfo infos[] =
+ {
+ {0x30, 192},
+ {0x32, 192},
+ {0x35, 192},
+ {0x37, 192},
+ {0x50, 128},
+ {0x52, 128},
+ {0x53, 128},
+ {0x60, 64},
+ {0x61, 128},
+ {0x62, 128},
+ {0x70, 64},
+ {0x72, 64},
+ {0x75, 64}
+ };
+
+ const int sm = ((major << 4) + minor);
+ for (Index i = 0; i < SLANG_COUNT_OF(infos); ++i)
+ {
+ if (infos[i].sm == sm)
+ {
+ return infos[i].coreCount;
+ }
+ }
+
+ const auto& last = infos[SLANG_COUNT_OF(infos) - 1];
+
+ // It must be newer presumably
+ SLANG_ASSERT(sm > last.coreCount );
+
+ // Default to the last entry
+ return last.coreCount;
+}
+
+static SlangResult _findMaxFlopsDeviceId(int* outDevice)
+{
+ int smPerMultiproc = 0;
+ int maxPerfDevice = -1;
+ int deviceCount = 0;
+ int devicesProhibited = 0;
+
+ uint64_t maxComputePerf = 0;
+ SLANG_CUDA_RETURN_ON_FAIL(cudaGetDeviceCount(&deviceCount));
+
+ // Find the best CUDA capable GPU device
+ for (int currentDevice = 0; currentDevice < deviceCount; ++currentDevice)
+ {
+ int computeMode = -1, major = 0, minor = 0;
+ SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, currentDevice));
+ SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, currentDevice));
+ SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, currentDevice));
+
+ // If this GPU is not running on Compute Mode prohibited,
+ // then we can add it to the list
+ if (computeMode != cudaComputeModeProhibited)
+ {
+ if (major == 9999 && minor == 9999)
+ {
+ smPerMultiproc = 1;
+ }
+ else
+ {
+ smPerMultiproc = _calcSMCountPerMultiProcessor(major, minor);
+ }
+
+ int multiProcessorCount = 0, clockRate = 0;
+ SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&multiProcessorCount, cudaDevAttrMultiProcessorCount, currentDevice));
+ SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, currentDevice));
+ uint64_t compute_perf = uint64_t(multiProcessorCount) * smPerMultiproc * clockRate;
+
+ if (compute_perf > maxComputePerf)
+ {
+ maxComputePerf = compute_perf;
+ maxPerfDevice = currentDevice;
+ }
+ }
+ else
+ {
+ devicesProhibited++;
+ }
+ }
+
+ if (maxPerfDevice < 0)
+ {
+ return SLANG_FAIL;
+ }
+
+ *outDevice = maxPerfDevice;
+ return SLANG_OK;
+}
+
+static SlangResult _initCuda()
+{
+ static CUresult res = cuInit(0);
+ SLANG_CUDA_RETURN_ON_FAIL(res);
+
+ return SLANG_OK;
+}
+
+
+
+/* static */SlangResult _createDevice(CUcontext* outContext)
+{
+ SLANG_RETURN_ON_FAIL(_initCuda());
+
+ int deviceId;
+ SLANG_RETURN_ON_FAIL(_findMaxFlopsDeviceId(&deviceId));
+ SLANG_CUDA_RETURN_ON_FAIL(cudaSetDevice(deviceId));
+
+ CUcontext context;
+
+ // Create context
+ SLANG_CUDA_RETURN_ON_FAIL(cuCtxCreate(&context, 0, deviceId));
+
+ *outContext = context;
+ return SLANG_OK;
+}
+
+/* static */bool CUDAComputeUtil::canCreateDevice()
+{
+ CUcontext context;
+ if (SLANG_SUCCEEDED(_createDevice(&context)))
+ {
+ cuCtxDestroy(context);
+ return true;
+ }
+
+ return false;
+}
+
+static SlangResult _compute(CUcontext context, CUmodule module, const ShaderCompilerUtil::OutputAndLayout& outputAndLayout)
+{
+ auto request = outputAndLayout.output.request;
+ auto reflection = (slang::ShaderReflection*) spGetReflection(request);
+
+ slang::EntryPointReflection* entryPoint = nullptr;
+ auto entryPointCount = reflection->getEntryPointCount();
+ SLANG_ASSERT(entryPointCount == 1);
+
+ entryPoint = reflection->getEntryPointByIndex(0);
+
+ const char* entryPointName = entryPoint->getName();
+
+ // Get the entry point
+ CUfunction kernel;
+
+ SLANG_CUDA_RETURN_ON_FAIL(cuModuleGetFunction(&kernel, module, entryPointName));
+
+
+ return SLANG_OK;
+}
+
+/* static */SlangResult CUDAComputeUtil::execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout)
+{
+ CUcontext context;
+ SLANG_RETURN_ON_FAIL(_createDevice(&context));
+
+ const Index index = outputAndLayout.output.findKernelDescIndex(StageType::Compute);
+ if (index < 0)
+ {
+ return SLANG_FAIL;
+ }
+
+ const auto& kernel = outputAndLayout.output.kernelDescs[index];
+
+ CUmodule module = 0;
+ SLANG_CUDA_RETURN_ON_FAIL(cuModuleLoadData(&module, kernel.codeBegin));
+
+ SLANG_RETURN_ON_FAIL(_compute(context, module, outputAndLayout));
+
+ SLANG_CUDA_RETURN_ON_FAIL(cuModuleUnload(module));
+
+ cuCtxDestroy(context);
+
+ return SLANG_OK;
+}
+
+
+} // renderer_test
diff --git a/tools/render-test/cuda/cuda-compute-util.h b/tools/render-test/cuda/cuda-compute-util.h
new file mode 100644
index 000000000..9c7d83b1f
--- /dev/null
+++ b/tools/render-test/cuda/cuda-compute-util.h
@@ -0,0 +1,21 @@
+#ifndef CUDA_COMPUTE_UTIL_H
+#define CUDA_COMPUTE_UTIL_H
+
+#include "../slang-support.h"
+#include "../options.h"
+
+#include "../../source/core/slang-smart-pointer.h"
+
+namespace renderer_test {
+
+struct CUDAComputeUtil
+{
+ static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout);
+
+ static bool canCreateDevice();
+};
+
+
+} // renderer_test
+
+#endif //CPU_MEMORY_BINDING_H
diff --git a/tools/render-test/options.cpp b/tools/render-test/options.cpp
index a614336e7..4d5d83ce5 100644
--- a/tools/render-test/options.cpp
+++ b/tools/render-test/options.cpp
@@ -25,11 +25,12 @@ static gfx::RendererType _toRenderType(Slang::RenderApiType apiType)
using namespace Slang;
switch (apiType)
{
- case RenderApiType::D3D11: return gfx::RendererType::DirectX11;
- case RenderApiType::D3D12: return gfx::RendererType::DirectX12;
+ case RenderApiType::D3D11: return gfx::RendererType::DirectX11;
+ case RenderApiType::D3D12: return gfx::RendererType::DirectX12;
case RenderApiType::OpenGl: return gfx::RendererType::OpenGl;
case RenderApiType::Vulkan: return gfx::RendererType::Vulkan;
case RenderApiType::CPU: return gfx::RendererType::CPU;
+ case RenderApiType::CUDA: return gfx::RendererType::CUDA;
default: return gfx::RendererType::Unknown;
}
}
diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp
index 1355402cf..a110b6ca9 100644
--- a/tools/render-test/render-test-main.cpp
+++ b/tools/render-test/render-test-main.cpp
@@ -25,6 +25,10 @@
#include "cpu-compute-util.h"
+#if RENDER_TEST_CUDA
+# include "cuda/cuda-compute-util.h"
+#endif
+
namespace renderer_test {
using Slang::Result;
@@ -461,6 +465,13 @@ SLANG_TEST_TOOL_API SlangResult innerMain(Slang::StdWriters* stdWriters, SlangSe
nativeLanguage = SLANG_SOURCE_LANGUAGE_CPP;
slangPassThrough = SLANG_PASS_THROUGH_GENERIC_C_CPP;
break;
+ case RendererType::CUDA:
+ input.target = SLANG_PTX;
+ input.profile = "";
+ nativeLanguage = SLANG_SOURCE_LANGUAGE_CUDA;
+ slangPassThrough = SLANG_PASS_THROUGH_NVRTC;
+ break;
+
default:
fprintf(stderr, "error: unexpected\n");
return SLANG_FAIL;
@@ -502,20 +513,30 @@ SLANG_TEST_TOOL_API SlangResult innerMain(Slang::StdWriters* stdWriters, SlangSe
rendererName << "'" << gOptions.adapter << "'";
}
- // If it's CPU testing we don't need a window or a renderer
- if (gOptions.rendererType == RendererType::CPU)
+ if (gOptions.onlyStartup)
{
- if (gOptions.onlyStartup)
+ switch (gOptions.rendererType)
{
- // Need generic C/C++
- if (SLANG_FAILED(spSessionCheckPassThroughSupport(session, SLANG_PASS_THROUGH_GENERIC_C_CPP)))
+ case RendererType::CUDA:
{
+#if RENDER_TEST_CUDA
+ return SLANG_SUCCEEDED(spSessionCheckPassThroughSupport(session, SLANG_PASS_THROUGH_NVRTC)) && CUDAComputeUtil::canCreateDevice() ? SLANG_OK : SLANG_FAIL;
+#else
return SLANG_FAIL;
+#endif
+ }
+ case RendererType::CPU:
+ {
+ // As long as we have CPU, then this should work
+ return spSessionCheckPassThroughSupport(session, SLANG_PASS_THROUGH_GENERIC_C_CPP);
}
- // Should work ...
- return SLANG_OK;
+ default: break;
}
+ }
+ // If it's CPU testing we don't need a window or a renderer
+ if (gOptions.rendererType == RendererType::CPU)
+ {
ShaderCompilerUtil::OutputAndLayout compilationAndLayout;
SLANG_RETURN_ON_FAIL(ShaderCompilerUtil::compileWithLayout(session, gOptions.sourcePath, gOptions.compileArgs, gOptions.shaderType, input, compilationAndLayout));
@@ -564,10 +585,27 @@ SLANG_TEST_TOOL_API SlangResult innerMain(Slang::StdWriters* stdWriters, SlangSe
}
}
-
return SLANG_OK;
}
+ if (gOptions.rendererType == RendererType::CUDA)
+ {
+ ShaderCompilerUtil::OutputAndLayout compilationAndLayout;
+ SLANG_RETURN_ON_FAIL(ShaderCompilerUtil::compileWithLayout(session, gOptions.sourcePath, gOptions.compileArgs, gOptions.shaderType, input, compilationAndLayout));
+
+#if RENDER_TEST_CUDA
+
+ // TODO(JS):
+ // We don't know how to execute it yet..
+
+ SLANG_RETURN_ON_FAIL(CUDAComputeUtil::execute(compilationAndLayout));
+
+ return SLANG_OK;
+#else
+ return SLANG_FAIL;
+#endif
+ }
+
Slang::RefPtr<Renderer> renderer;
{
RendererUtil::CreateFunc createFunc = RendererUtil::getCreateFunc(gOptions.rendererType);
diff --git a/tools/render-test/slang-support.h b/tools/render-test/slang-support.h
index cd3223c87..97b85ff8f 100644
--- a/tools/render-test/slang-support.h
+++ b/tools/render-test/slang-support.h
@@ -55,6 +55,19 @@ struct ShaderCompilerUtil
spDestroyCompileRequest(request);
}
}
+
+ Slang::Index findKernelDescIndex(gfx::StageType stage) const
+ {
+ for (Slang::Index i = 0; i < kernelDescs.getCount(); ++i)
+ {
+ if (kernelDescs[i].stage == stage)
+ {
+ return i;
+ }
+ }
+ return -1;
+ }
+
List<ShaderProgram::KernelDesc> kernelDescs;
ShaderProgram::Desc desc;
SlangCompileRequest* request = nullptr;
diff --git a/tools/slang-test/slang-test-main.cpp b/tools/slang-test/slang-test-main.cpp
index 6401ac852..9ccf6840f 100644
--- a/tools/slang-test/slang-test-main.cpp
+++ b/tools/slang-test/slang-test-main.cpp
@@ -670,6 +670,11 @@ static SlangResult _extractRenderTestRequirements(const CommandLine& cmdLine, Te
nativeLanguage = SLANG_SOURCE_LANGUAGE_CPP;
passThru = SLANG_PASS_THROUGH_GENERIC_C_CPP;
break;
+ case RenderApiType::CUDA:
+ target = SLANG_PTX;
+ nativeLanguage = SLANG_SOURCE_LANGUAGE_CUDA;
+ passThru = SLANG_PASS_THROUGH_NVRTC;
+ break;
}
SlangSourceLanguage sourceLanguage = nativeLanguage;