Better diagnostics on failure on CUDA. (#1288)

* Better diagnostics on failure on CUDA. * Catch exceptions in render-test * * Added ability to disable reporting on CUDA failures * Stopped using exception for reporting (just write to StdWriter::out() * Removed CUDAResult type * Don't set arch type on nvrtc to see if fixes CI issues. * Try compute_30 on CUDA. * Added ability to IGNORE_ a test DIsabled rw-texture-simple and texture-get-dimensions * Disable tests that require CUDA SM7.0 Use DISABLE_ prefix to disable tests. * Disable signalUnexpectedError doing printf.
author: jsmall-nvidia <jsmall@nvidia.com> 2020-03-25 14:08:21 -0400
committer: GitHub <noreply@github.com> 2020-03-25 14:08:21 -0400
commit: 28a0ca96a1ad2a3f0e09cc97b866f3b6338a09fa (patch)
tree: 271232cf6aa1917c23298d3dc1dc995ef65832f9 /tools/render-test/cuda/cuda-compute-util.cpp
parent: 889132e7e3c79ae364fa3882646861e5b14df503 (diff)
1 files changed, 97 insertions, 20 deletions
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp
index 96b4e3b3e..af7c0e6c2 100644
--- a/tools/render-test/cuda/cuda-compute-util.cpp
+++ b/tools/render-test/cuda/cuda-compute-util.cpp
@@ -18,14 +18,96 @@ using namespace Slang;
 SLANG_FORCE_INLINE static bool _isError(CUresult result) { return result != 0; }
 SLANG_FORCE_INLINE static bool _isError(cudaError_t result) { return result != 0; }
 
-#if 0
-#define SLANG_CUDA_RETURN_ON_FAIL(x) { auto _res = x; if (_isError(_res)) return SLANG_FAIL; }
+// A enum used to control if errors are reported on failure of CUDA call.
+enum class CUDAReportStyle
+{
+    Normal,
+    Silent,
+};
+
+struct CUDAErrorInfo
+{
+    CUDAErrorInfo(const char* filePath, int lineNo, const char* errorName = nullptr, const char* errorString = nullptr):
+        m_filePath(filePath),
+        m_lineNo(lineNo),
+        m_errorName(errorName),
+        m_errorString(errorString)
+    {
+    }
+    SlangResult handle() const
+    {
+        StringBuilder builder;
+        builder << "Error: " << m_filePath << " (" << m_lineNo << ") :";
+
+        if (m_errorName)
+        {
+            builder << m_errorName << " : ";
+        }
+        if (m_errorString)
+        {
+            builder << m_errorString;
+        }
+
+        StdWriters::getError().put(builder.getUnownedSlice());
+
+        //Slang::signalUnexpectedError(builder.getBuffer());
+        return SLANG_FAIL;
+    }
+
+    const char* m_filePath;
+    int m_lineNo;
+    const char* m_errorName;
+    const char* m_errorString;
+};
+
+#if 1
+// If this code path is enabled, CUDA errors will be reported directly to StdWriter::out stream.
+
+static SlangResult _handleCUDAError(CUresult cuResult, const char* file, int line)
+{
+    CUDAErrorInfo info(file, line);
+    cuGetErrorString(cuResult, &info.m_errorString);
+    cuGetErrorName(cuResult, &info.m_errorName);
+    return info.handle();
+}
+
+static SlangResult _handleCUDAError(cudaError_t error, const char* file, int line)
+{
+    return CUDAErrorInfo(file, line, cudaGetErrorName(error), cudaGetErrorString(error)).handle();
+}
+
+#define SLANG_CUDA_HANDLE_ERROR(x) _handleCUDAError(_res, __FILE__, __LINE__)
+
 #else
+// If this code path is enabled, errors are not reported, but can have an assert enabled
 
-#define SLANG_CUDA_RETURN_ON_FAIL(x) { auto _res = x; if (_isError(_res)) { SLANG_ASSERT(!"Failed CUDA call"); return SLANG_FAIL; } }
+static SlangResult _handleCUDAError(CUresult cuResult)
+{
+    SLANG_UNUSED(cuResult);
+    //SLANG_ASSERT(!"Failed CUDA call");
+    return SLANG_FAIL;
+}
 
+static SlangResult _handleCUDAError(cudaError_t error)
+{
+    SLANG_UNUSED(error);
+    //SLANG_ASSERT(!"Failed CUDA call");
+    return SLANG_FAIL;
+}
+
+#define SLANG_CUDA_HANDLE_ERROR(x) _handleCUDAError(_res)
 #endif
 
+#define SLANG_CUDA_RETURN_ON_FAIL(x) { auto _res = x; if (_isError(_res)) return SLANG_CUDA_HANDLE_ERROR(_res); }
+#define SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(x, r) \
+    { \
+        auto _res = x; \
+        if (_isError(_res)) \
+        { \
+            return (r == CUDAReportStyle::Normal) ? SLANG_CUDA_HANDLE_ERROR(_res) : SLANG_FAIL; \
+        } \
+    } \
+
 #define SLANG_CUDA_ASSERT_ON_FAIL(x) { auto _res = x; if (_isError(_res)) { SLANG_ASSERT(!"Failed CUDA call"); }; }
 
 class MemoryCUDAResource : public CUDAResource
@@ -282,11 +364,10 @@ static SlangResult _findMaxFlopsDeviceId(int* outDevice)
     return SLANG_OK;
 }
 
-static SlangResult _initCuda()
+static SlangResult _initCuda(CUDAReportStyle reportType = CUDAReportStyle::Normal)
 {
     static CUresult res = cuInit(0);
-    SLANG_CUDA_RETURN_ON_FAIL(res);
-
+    SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(res, reportType);
     return SLANG_OK;
 }
 
@@ -295,39 +376,35 @@ class ScopeCUDAContext
 public:
     ScopeCUDAContext() : m_context(nullptr) {}
 
-    SlangResult init(unsigned int flags, CUdevice device)
+    SlangResult init(unsigned int flags, CUdevice device, CUDAReportStyle reportType = CUDAReportStyle::Normal)
     {
-        SLANG_RETURN_ON_FAIL(_initCuda());
+        SLANG_RETURN_ON_FAIL(_initCuda(reportType));
 
         if (m_context)
         {
             cuCtxDestroy(m_context);
             m_context = nullptr;
         }
-        if (_isError(cuCtxCreate(&m_context, flags, device)))
-        {
-            return SLANG_FAIL;
-        }
+
+        SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cuCtxCreate(&m_context, flags, device), reportType);
         return SLANG_OK;
     }
 
-    SlangResult init(unsigned int flags)
+    SlangResult init(unsigned int flags, CUDAReportStyle reportType = CUDAReportStyle::Normal)
     {
-        SLANG_RETURN_ON_FAIL(_initCuda());
+        SLANG_RETURN_ON_FAIL(_initCuda(reportType));
 
         int deviceId;
         SLANG_RETURN_ON_FAIL(_findMaxFlopsDeviceId(&deviceId));
-        SLANG_CUDA_RETURN_ON_FAIL(cudaSetDevice(deviceId));
+        SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cudaSetDevice(deviceId), reportType);
 
         if (m_context)
         {
             cuCtxDestroy(m_context);
             m_context = nullptr;
         }
-        if (_isError(cuCtxCreate(&m_context, flags, deviceId)))
-        {
-            return SLANG_FAIL;
-        }
+
+        SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL(cuCtxCreate(&m_context, flags, deviceId), reportType);
         return SLANG_OK;
     }
 
@@ -346,7 +423,7 @@ public:
 /* static */bool CUDAComputeUtil::canCreateDevice()
 {
     ScopeCUDAContext context;
-    return SLANG_SUCCEEDED(context.init(0));
+    return SLANG_SUCCEEDED(context.init(0, CUDAReportStyle::Silent));
 }
 
 static bool _hasReadAccess(SlangResourceAccess access)
author	jsmall-nvidia <jsmall@nvidia.com>	2020-03-25 14:08:21 -0400
committer	GitHub <noreply@github.com>	2020-03-25 14:08:21 -0400
commit	28a0ca96a1ad2a3f0e09cc97b866f3b6338a09fa (patch)
tree	271232cf6aa1917c23298d3dc1dc995ef65832f9 /tools/render-test/cuda/cuda-compute-util.cpp
parent	889132e7e3c79ae364fa3882646861e5b14df503 (diff)