summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-03-30 19:23:09 -0400
committerGitHub <noreply@github.com>2020-03-30 23:23:09 +0000
commitea7690558bca71ce3a9453adff4e0135352a352f (patch)
tree3eb983d3f8e6b1c215f6d2818a0f3e793ecb4485 /tools
parentad5b60c8b5868c69a979779f201748fb7837fdc9 (diff)
CUDA version handling (#1301)
* render feature for CUDA compute model. * Use SemanticVersion type. * Enable CUDA wave tests that require CUDA SM 7.0. Provide mechanism for DownstreamCompiler to specify version numbers. * Enabled wave-equality.slang * Make CUDA SM version major version not just a single digit. * Fix assert. * DownstreamCompiler::Version -> CapabilityVersion
Diffstat (limited to 'tools')
-rw-r--r--tools/render-test/cpu-compute-util.cpp7
-rw-r--r--tools/render-test/cpu-compute-util.h4
-rw-r--r--tools/render-test/cuda/cuda-compute-util.cpp75
-rw-r--r--tools/render-test/cuda/cuda-compute-util.h4
-rw-r--r--tools/render-test/render-test-main.cpp23
5 files changed, 100 insertions, 13 deletions
diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp
index 2ea74052f..e8b9e8b32 100644
--- a/tools/render-test/cpu-compute-util.cpp
+++ b/tools/render-test/cpu-compute-util.cpp
@@ -350,6 +350,13 @@ static SlangResult _newTexture(const InputTextureDesc& desc, slang::TypeLayoutRe
return SLANG_FAIL;
}
+/* static */bool CPUComputeUtil::hasFeature(const UnownedStringSlice& feature)
+{
+ SLANG_UNUSED(feature);
+ // CPU has no specific support requirements
+ return false;
+}
+
/* static */SlangResult CPUComputeUtil::calcBindings(const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& outContext)
{
auto request = compilationAndLayout.output.request;
diff --git a/tools/render-test/cpu-compute-util.h b/tools/render-test/cpu-compute-util.h
index e6e896b6a..c66650506 100644
--- a/tools/render-test/cpu-compute-util.h
+++ b/tools/render-test/cpu-compute-util.h
@@ -49,7 +49,9 @@ struct CPUComputeUtil
void* m_uniformEntryPointParams;
};
-
+ /// True if this feature is available on CPU
+ static bool hasFeature(const Slang::UnownedStringSlice& feature);
+
/// Runs code across run styles and makes sure output buffers match
static SlangResult checkStyleConsistency(ISlangSharedLibrary* sharedLib, const uint32_t dispatchSize[3], const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout);
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp
index af7c0e6c2..48d73fa93 100644
--- a/tools/render-test/cuda/cuda-compute-util.cpp
+++ b/tools/render-test/cuda/cuda-compute-util.cpp
@@ -5,6 +5,7 @@
#include "../../source/core/slang-std-writers.h"
#include "../../source/core/slang-token-reader.h"
+#include "../../source/core/slang-semantic-version.h"
#include "../bind-location.h"
@@ -307,7 +308,7 @@ static int _calcSMCountPerMultiProcessor(int major, int minor)
return last.coreCount;
}
-static SlangResult _findMaxFlopsDeviceId(int* outDevice)
+static SlangResult _findMaxFlopsDeviceIndex(int* outDeviceIndex)
{
int smPerMultiproc = 0;
int maxPerfDevice = -1;
@@ -360,7 +361,7 @@ static SlangResult _findMaxFlopsDeviceId(int* outDevice)
return SLANG_FAIL;
}
- *outDevice = maxPerfDevice;
+ *outDeviceIndex = maxPerfDevice;
return SLANG_OK;
}
@@ -374,9 +375,13 @@ static SlangResult _initCuda(CUDAReportStyle reportType = CUDAReportStyle::Norma
class ScopeCUDAContext
{
public:
- ScopeCUDAContext() : m_context(nullptr) {}
+ ScopeCUDAContext() :
+ m_context(nullptr),
+ m_device(-1),
+ m_deviceIndex(-1)
+ {}
- SlangResult init(unsigned int flags, CUdevice device, CUDAReportStyle reportType = CUDAReportStyle::Normal)
+ SlangResult init(unsigned int flags, int deviceIndex, CUDAReportStyle reportType = CUDAReportStyle::Normal)
{
SLANG_RETURN_ON_FAIL(_initCuda(reportType));
@@ -386,7 +391,10 @@ public:
m_context = nullptr;
}
- SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cuCtxCreate(&m_context, flags, device), reportType);
+ m_deviceIndex = deviceIndex;
+ SLANG_CUDA_RETURN_ON_FAIL(cuDeviceGet(&m_device, deviceIndex));
+
+ SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cuCtxCreate(&m_context, flags, m_device), reportType);
return SLANG_OK;
}
@@ -394,9 +402,8 @@ public:
{
SLANG_RETURN_ON_FAIL(_initCuda(reportType));
- int deviceId;
- SLANG_RETURN_ON_FAIL(_findMaxFlopsDeviceId(&deviceId));
- SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cudaSetDevice(deviceId), reportType);
+ SLANG_RETURN_ON_FAIL(_findMaxFlopsDeviceIndex(&m_deviceIndex));
+ SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cudaSetDevice(m_deviceIndex), reportType);
if (m_context)
{
@@ -404,7 +411,9 @@ public:
m_context = nullptr;
}
- SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cuCtxCreate(&m_context, flags, deviceId), reportType);
+ SLANG_CUDA_RETURN_ON_FAIL(cuDeviceGet(&m_device, m_deviceIndex));
+
+ SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cuCtxCreate(&m_context, flags, m_device), reportType);
return SLANG_OK;
}
@@ -417,9 +426,57 @@ public:
}
SLANG_FORCE_INLINE operator CUcontext () const { return m_context; }
+ int m_deviceIndex;
+ CUdevice m_device;
CUcontext m_context;
};
+/* static */SlangResult CUDAComputeUtil::parseFeature(const Slang::UnownedStringSlice& feature, bool& outResult)
+{
+ outResult = false;
+
+ if (feature.startsWith("cuda_sm_"))
+ {
+ const UnownedStringSlice versionSlice = UnownedStringSlice(feature.begin() + 8, feature.end());
+ SemanticVersion requiredVersion;
+ SLANG_RETURN_ON_FAIL(SemanticVersion::parse(versionSlice, '_', requiredVersion));
+
+ // Need to get the version from the cuda device
+ ScopeCUDAContext context;
+ SLANG_RETURN_ON_FAIL(context.init(0, CUDAReportStyle::Silent));
+
+ const int deviceIndex = context.m_deviceIndex;
+
+ int computeMode = -1;
+ SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, deviceIndex));
+
+ // If we don't have compute mode availability, we can't execute
+ if (computeMode == cudaComputeModeProhibited)
+ {
+ return SLANG_FAIL;
+ }
+
+ int major, minor;
+ SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, deviceIndex));
+ SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, deviceIndex));
+
+ SemanticVersion actualVersion;
+ actualVersion.set(major, minor);
+
+ outResult = actualVersion >= requiredVersion;
+
+ return SLANG_OK;
+ }
+
+ return SLANG_FAIL;
+}
+
+/* static */bool CUDAComputeUtil::hasFeature(const Slang::UnownedStringSlice& feature)
+{
+ bool res;
+ return SLANG_SUCCEEDED(parseFeature(feature, res)) ? res : false;
+}
+
/* static */bool CUDAComputeUtil::canCreateDevice()
{
ScopeCUDAContext context;
diff --git a/tools/render-test/cuda/cuda-compute-util.h b/tools/render-test/cuda/cuda-compute-util.h
index f15c9d4e3..bc3d7d233 100644
--- a/tools/render-test/cuda/cuda-compute-util.h
+++ b/tools/render-test/cuda/cuda-compute-util.h
@@ -46,6 +46,10 @@ struct CUDAComputeUtil
List<BindSet::Value*> m_buffers;
};
+ static SlangResult parseFeature(const Slang::UnownedStringSlice& feature, bool& outResult);
+
+ static bool hasFeature(const Slang::UnownedStringSlice& feature);
+
static SlangResult createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr<CUDAResource>& outResource);
static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], Context& outContext);
diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp
index ab041b5bc..1d88ee500 100644
--- a/tools/render-test/render-test-main.cpp
+++ b/tools/render-test/render-test-main.cpp
@@ -544,6 +544,15 @@ static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* sessi
// If it's CPU testing we don't need a window or a renderer
if (gOptions.rendererType == RendererType::CPU)
{
+ // Check we have all the required features
+ for (const auto& renderFeature : gOptions.renderFeatures)
+ {
+ if (!CPUComputeUtil::hasFeature(renderFeature.getUnownedSlice()))
+ {
+ return SLANG_E_NOT_AVAILABLE;
+ }
+ }
+
ShaderCompilerUtil::OutputAndLayout compilationAndLayout;
SLANG_RETURN_ON_FAIL(ShaderCompilerUtil::compileWithLayout(session, gOptions.sourcePath, gOptions.compileArgs, gOptions.shaderType, input, compilationAndLayout));
@@ -604,12 +613,20 @@ static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* sessi
}
if (gOptions.rendererType == RendererType::CUDA)
- {
+ {
+#if RENDER_TEST_CUDA
+ // Check we have all the required features
+ for (const auto& renderFeature : gOptions.renderFeatures)
+ {
+ if (!CUDAComputeUtil::hasFeature(renderFeature.getUnownedSlice()))
+ {
+ return SLANG_E_NOT_AVAILABLE;
+ }
+ }
+
ShaderCompilerUtil::OutputAndLayout compilationAndLayout;
SLANG_RETURN_ON_FAIL(ShaderCompilerUtil::compileWithLayout(session, gOptions.sourcePath, gOptions.compileArgs, gOptions.shaderType, input, compilationAndLayout));
-#if RENDER_TEST_CUDA
-
const uint64_t startTicks = ProcessUtil::getClockTick();
CUDAComputeUtil::Context context;