diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2020-01-08 11:09:20 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-01-08 11:09:20 -0500 |
| commit | cae5ddd4a2c9343ec7367c9049c5cc0c8628a9c4 (patch) | |
| tree | c8200a495f3c0bc5a841ce752fdfb13a73278faf /tools/render-test/cuda/cuda-compute-util.cpp | |
| parent | 17285faf9b4fe7f6c28b43972212068465bdb42e (diff) | |
Setup of runtime cuda device (#1162)
* CUDA generated first test compiles.
* WIP on enabling CUDA in render-test.
* Detect CUDA_PATH environmental variable to build build cuda support into render-test.
Added WIP cuda-compute-util.cpp/h
Added CUDA as a renderer type.
* Fix libraries needed for cuda in premake.
* Added -enable-cuda premake option. Defaults to false.
* Creates CUDA device, loads PTX and finds entry point.
* Fix some erroneous cruft from slang-cuda-prelude.h
Diffstat (limited to 'tools/render-test/cuda/cuda-compute-util.cpp')
| -rw-r--r-- | tools/render-test/cuda/cuda-compute-util.cpp | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp new file mode 100644 index 000000000..138f842b4 --- /dev/null +++ b/tools/render-test/cuda/cuda-compute-util.cpp @@ -0,0 +1,206 @@ + +#include "cuda-compute-util.h" + +#include "../../slang-com-helper.h" + +#include "../../source/core/slang-std-writers.h" +#include "../../source/core/slang-token-reader.h" + +#include <cuda.h> +#include <cuda_runtime_api.h> + +namespace renderer_test { +using namespace Slang; + +#define SLANG_CUDA_RETURN_ON_FAIL(x) { int _res = (int)(x); if (_res != 0) return SLANG_FAIL; } + +static int _calcSMCountPerMultiProcessor(int major, int minor) +{ + // Defines for GPU Architecture types (using the SM version to determine + // the # of cores per SM + struct SMInfo + { + int sm; // 0xMm (hexadecimal notation), M = SM Major version, and m = SM minor version + int coreCount; + }; + + static const SMInfo infos[] = + { + {0x30, 192}, + {0x32, 192}, + {0x35, 192}, + {0x37, 192}, + {0x50, 128}, + {0x52, 128}, + {0x53, 128}, + {0x60, 64}, + {0x61, 128}, + {0x62, 128}, + {0x70, 64}, + {0x72, 64}, + {0x75, 64} + }; + + const int sm = ((major << 4) + minor); + for (Index i = 0; i < SLANG_COUNT_OF(infos); ++i) + { + if (infos[i].sm == sm) + { + return infos[i].coreCount; + } + } + + const auto& last = infos[SLANG_COUNT_OF(infos) - 1]; + + // It must be newer presumably + SLANG_ASSERT(sm > last.coreCount ); + + // Default to the last entry + return last.coreCount; +} + +static SlangResult _findMaxFlopsDeviceId(int* outDevice) +{ + int smPerMultiproc = 0; + int maxPerfDevice = -1; + int deviceCount = 0; + int devicesProhibited = 0; + + uint64_t maxComputePerf = 0; + SLANG_CUDA_RETURN_ON_FAIL(cudaGetDeviceCount(&deviceCount)); + + // Find the best CUDA capable GPU device + for (int currentDevice = 0; currentDevice < deviceCount; ++currentDevice) + { + int computeMode = -1, major = 0, minor = 0; + SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, currentDevice)); + SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, currentDevice)); + SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, currentDevice)); + + // If this GPU is not running on Compute Mode prohibited, + // then we can add it to the list + if (computeMode != cudaComputeModeProhibited) + { + if (major == 9999 && minor == 9999) + { + smPerMultiproc = 1; + } + else + { + smPerMultiproc = _calcSMCountPerMultiProcessor(major, minor); + } + + int multiProcessorCount = 0, clockRate = 0; + SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&multiProcessorCount, cudaDevAttrMultiProcessorCount, currentDevice)); + SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, currentDevice)); + uint64_t compute_perf = uint64_t(multiProcessorCount) * smPerMultiproc * clockRate; + + if (compute_perf > maxComputePerf) + { + maxComputePerf = compute_perf; + maxPerfDevice = currentDevice; + } + } + else + { + devicesProhibited++; + } + } + + if (maxPerfDevice < 0) + { + return SLANG_FAIL; + } + + *outDevice = maxPerfDevice; + return SLANG_OK; +} + +static SlangResult _initCuda() +{ + static CUresult res = cuInit(0); + SLANG_CUDA_RETURN_ON_FAIL(res); + + return SLANG_OK; +} + + + +/* static */SlangResult _createDevice(CUcontext* outContext) +{ + SLANG_RETURN_ON_FAIL(_initCuda()); + + int deviceId; + SLANG_RETURN_ON_FAIL(_findMaxFlopsDeviceId(&deviceId)); + SLANG_CUDA_RETURN_ON_FAIL(cudaSetDevice(deviceId)); + + CUcontext context; + + // Create context + SLANG_CUDA_RETURN_ON_FAIL(cuCtxCreate(&context, 0, deviceId)); + + *outContext = context; + return SLANG_OK; +} + +/* static */bool CUDAComputeUtil::canCreateDevice() +{ + CUcontext context; + if (SLANG_SUCCEEDED(_createDevice(&context))) + { + cuCtxDestroy(context); + return true; + } + + return false; +} + +static SlangResult _compute(CUcontext context, CUmodule module, const ShaderCompilerUtil::OutputAndLayout& outputAndLayout) +{ + auto request = outputAndLayout.output.request; + auto reflection = (slang::ShaderReflection*) spGetReflection(request); + + slang::EntryPointReflection* entryPoint = nullptr; + auto entryPointCount = reflection->getEntryPointCount(); + SLANG_ASSERT(entryPointCount == 1); + + entryPoint = reflection->getEntryPointByIndex(0); + + const char* entryPointName = entryPoint->getName(); + + // Get the entry point + CUfunction kernel; + + SLANG_CUDA_RETURN_ON_FAIL(cuModuleGetFunction(&kernel, module, entryPointName)); + + + return SLANG_OK; +} + +/* static */SlangResult CUDAComputeUtil::execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout) +{ + CUcontext context; + SLANG_RETURN_ON_FAIL(_createDevice(&context)); + + const Index index = outputAndLayout.output.findKernelDescIndex(StageType::Compute); + if (index < 0) + { + return SLANG_FAIL; + } + + const auto& kernel = outputAndLayout.output.kernelDescs[index]; + + CUmodule module = 0; + SLANG_CUDA_RETURN_ON_FAIL(cuModuleLoadData(&module, kernel.codeBegin)); + + SLANG_RETURN_ON_FAIL(_compute(context, module, outputAndLayout)); + + SLANG_CUDA_RETURN_ON_FAIL(cuModuleUnload(module)); + + cuCtxDestroy(context); + + return SLANG_OK; +} + + +} // renderer_test |
