summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Foley <tfoleyNV@users.noreply.github.com>2020-10-05 11:10:53 -0700
committerGitHub <noreply@github.com>2020-10-05 11:10:53 -0700
commit41d8610653cacfb763e3e1a1c538e17037703108 (patch)
tree4f9b0c9c7e46a47feac40ecfa6b6b3a9cb027921
parentd930c65e7fef6414af363e1f8d4fff52beb448af (diff)
Small fixes for CUDA code emit (#1564)
* Small fixes for CUDA code emit * Add a CUDA translation to `GroupMemoryBarrierWithWaveSync()`. We map this to `__syncwarp()` for CUDA (with no mask, implying a full-warp sync). * Consistently use `SLANG_PRELUDE_ASSERT` for assertions introduced in code emit (rather than just using the bare `assert(...)` function, which is not included by our CUDA prelude by default) * Add a new `SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT` flag to the CUDA prelude that allows the `count` field to be omitted from `(RW)StructuredBuffer<T>`. This is a bit of a hacky because the computed layouts will still assume the `count` field is present, but this feature is required by at least one client application for now. A better long-term fix will take more time to design and implement. * fixup: CUDA prelude code fix for pedantic compilers Co-authored-by: Tim Foley <tim.foley.is@gmail.com> Co-authored-by: Yong He <yonghe@outlook.com>
-rw-r--r--prelude/slang-cuda-prelude.h44
-rw-r--r--source/slang/hlsl.meta.slang1
-rw-r--r--source/slang/slang-emit-cpp.cpp4
3 files changed, 35 insertions, 14 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index 9b485dbe5..aebcffc10 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -339,27 +339,47 @@ SLANG_CUDA_CALL uint32_t U64_countbits(uint64_t v)
// Missing Load(_In_ int Location, _Out_ uint Status);
template <typename T>
-struct RWStructuredBuffer
+struct StructuredBuffer
{
- SLANG_CUDA_CALL T& operator[](size_t index) const { SLANG_CUDA_BOUND_CHECK(index, count); return data[index]; }
- SLANG_CUDA_CALL const T& Load(size_t index) const { SLANG_CUDA_BOUND_CHECK(index, count); return data[index]; }
+ SLANG_CUDA_CALL const T& operator[](size_t index) const
+ {
+#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT
+ SLANG_CUDA_BOUND_CHECK(index, count);
+#endif
+ return data[index];
+ }
+
+ SLANG_CUDA_CALL const T& Load(size_t index) const
+ {
+#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT
+ SLANG_CUDA_BOUND_CHECK(index, count);
+#endif
+ return data[index];
+ }
+
+#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT
SLANG_CUDA_CALL void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) { *outNumStructs = uint32_t(count); *outStride = uint32_t(sizeof(T)); }
-
+#endif
+
T* data;
+#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT
size_t count;
+#endif
};
template <typename T>
-struct StructuredBuffer
+struct RWStructuredBuffer : StructuredBuffer<T>
{
- SLANG_CUDA_CALL const T& operator[](size_t index) const { SLANG_CUDA_BOUND_CHECK(index, count); return data[index]; }
- SLANG_CUDA_CALL const T& Load(size_t index) const { SLANG_CUDA_BOUND_CHECK(index, count); return data[index]; }
- SLANG_CUDA_CALL void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) { *outNumStructs = uint32_t(count); *outStride = uint32_t(sizeof(T)); }
-
- T* data;
- size_t count;
+ SLANG_CUDA_CALL T& operator[](size_t index) const
+ {
+#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT
+ SLANG_CUDA_BOUND_CHECK(index, this->count);
+#endif
+ return this->data[index];
+ }
};
+
// Missing Load(_In_ int Location, _Out_ uint Status);
struct ByteAddressBuffer
@@ -1205,7 +1225,7 @@ __inline__ __device__ uint4 _waveMatchMultiple(WarpMask mask, const T& inVal)
__device__ uint getAt(dim3 a, int b)
{
- assert(b >= 0 && b < 3);
+ SLANG_PRELUDE_ASSERT(b >= 0 && b < 3);
return (&a.x)[b];
}
__device__ uint3 operator*(uint3 a, dim3 b)
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 7c5ca0027..e677b9020 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -2987,6 +2987,7 @@ __glsl_extension(GL_KHR_shader_subgroup_basic)
__spirv_version(1.3)
__target_intrinsic(glsl, "subgroupBarrier()")
__target_intrinsic(hlsl, "GroupMemoryBarrier()")
+__target_intrinsic(cuda, "__syncwarp()")
void GroupMemoryBarrierWithWaveSync();
// NOTE! WaveMaskBroadcastLaneAt is *NOT* standard HLSL
diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp
index 5359740b4..1e2482bb5 100644
--- a/source/slang/slang-emit-cpp.cpp
+++ b/source/slang/slang-emit-cpp.cpp
@@ -984,7 +984,7 @@ void CPPSourceEmitter::_emitGetAtDefinition(const UnownedStringSlice& funcName,
{
int vecSize = int(getIntVal(vectorType->getElementCount()));
- writer->emit("assert(b >= 0 && b < ");
+ writer->emit("SLANG_PRELUDE_ASSERT(b >= 0 && b < ");
writer->emit(vecSize);
writer->emit(");\n");
if (lValue)
@@ -997,7 +997,7 @@ void CPPSourceEmitter::_emitGetAtDefinition(const UnownedStringSlice& funcName,
//int colCount = int(getIntVal(matrixType->getColumnCount()));
int rowCount = int(getIntVal(matrixType->getRowCount()));
- writer->emit("assert(b >= 0 && b < ");
+ writer->emit("SLANG_PRELUDE_ASSERT(b >= 0 && b < ");
writer->emit(rowCount);
writer->emit(");\n");