summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Foley <tfoleyNV@users.noreply.github.com>2020-09-16 17:45:07 -0700
committerGitHub <noreply@github.com>2020-09-16 17:45:07 -0700
commit3e2cb34bf112722368c296130b1424de267d2b3c (patch)
tree17babf78043854903a7ea300d33db5445e948572
parent8dd0d26466b7b84b0575031bff2ced8b3b1a1bac (diff)
Fix some issues around dim3 for CUDA (#1544)
The logic we use to compute `SV_DispatchThreadID` and friends for CUDA makes use of the `gridDim` and `blockDim` built-in variables in CUDA. These variables have type `dim3` which is similar to `uint3` but is considered a distinct type for some reason. The logic for computing the `SV`s currently pretends that `gridDim` and `blockDim` are `uint3`s, and this means that the code they emit doesn't always compile cleanly (although it does in our existing test cases...). This change adds a few overloads that work on `dim3` to the CUDA prelude and that seem to make the code we emit work again. Note: This change should be seen as a somewhat hacky quick fix rather than a real resolution to the underlying issue. It is probably better if we emit code that replaces uses of `gridDim` with `uint3(gridDim.x, gridDim.y, gridDim.z)` instead, to ensure that we get the typing correct, even if the result looks less idiomatic.
-rw-r--r--prelude/slang-cuda-prelude.h14
1 files changed, 14 insertions, 0 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index feec1562a..9b485dbe5 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -1203,6 +1203,20 @@ __inline__ __device__ uint4 _waveMatchMultiple(WarpMask mask, const T& inVal)
return make_uint4(matchBits, 0, 0, 0);
}
+__device__ uint getAt(dim3 a, int b)
+{
+ assert(b >= 0 && b < 3);
+ return (&a.x)[b];
+}
+__device__ uint3 operator*(uint3 a, dim3 b)
+{
+ uint3 r;
+ r.x = a.x * b.x;
+ r.y = a.y * b.y;
+ r.z = a.z * b.z;
+ return r;
+}
+
/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */