From 41d8610653cacfb763e3e1a1c538e17037703108 Mon Sep 17 00:00:00 2001 From: Tim Foley Date: Mon, 5 Oct 2020 11:10:53 -0700 Subject: Small fixes for CUDA code emit (#1564) * Small fixes for CUDA code emit * Add a CUDA translation to `GroupMemoryBarrierWithWaveSync()`. We map this to `__syncwarp()` for CUDA (with no mask, implying a full-warp sync). * Consistently use `SLANG_PRELUDE_ASSERT` for assertions introduced in code emit (rather than just using the bare `assert(...)` function, which is not included by our CUDA prelude by default) * Add a new `SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT` flag to the CUDA prelude that allows the `count` field to be omitted from `(RW)StructuredBuffer`. This is a bit of a hacky because the computed layouts will still assume the `count` field is present, but this feature is required by at least one client application for now. A better long-term fix will take more time to design and implement. * fixup: CUDA prelude code fix for pedantic compilers Co-authored-by: Tim Foley Co-authored-by: Yong He --- prelude/slang-cuda-prelude.h | 44 ++++++++++++++++++++++++++++++----------- source/slang/hlsl.meta.slang | 1 + source/slang/slang-emit-cpp.cpp | 4 ++-- 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 9b485dbe5..aebcffc10 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -339,27 +339,47 @@ SLANG_CUDA_CALL uint32_t U64_countbits(uint64_t v) // Missing Load(_In_ int Location, _Out_ uint Status); template -struct RWStructuredBuffer +struct StructuredBuffer { - SLANG_CUDA_CALL T& operator[](size_t index) const { SLANG_CUDA_BOUND_CHECK(index, count); return data[index]; } - SLANG_CUDA_CALL const T& Load(size_t index) const { SLANG_CUDA_BOUND_CHECK(index, count); return data[index]; } + SLANG_CUDA_CALL const T& operator[](size_t index) const + { +#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT + SLANG_CUDA_BOUND_CHECK(index, count); +#endif + return data[index]; + } + + SLANG_CUDA_CALL const T& Load(size_t index) const + { +#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT + SLANG_CUDA_BOUND_CHECK(index, count); +#endif + return data[index]; + } + +#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT SLANG_CUDA_CALL void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) { *outNumStructs = uint32_t(count); *outStride = uint32_t(sizeof(T)); } - +#endif + T* data; +#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT size_t count; +#endif }; template -struct StructuredBuffer +struct RWStructuredBuffer : StructuredBuffer { - SLANG_CUDA_CALL const T& operator[](size_t index) const { SLANG_CUDA_BOUND_CHECK(index, count); return data[index]; } - SLANG_CUDA_CALL const T& Load(size_t index) const { SLANG_CUDA_BOUND_CHECK(index, count); return data[index]; } - SLANG_CUDA_CALL void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) { *outNumStructs = uint32_t(count); *outStride = uint32_t(sizeof(T)); } - - T* data; - size_t count; + SLANG_CUDA_CALL T& operator[](size_t index) const + { +#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT + SLANG_CUDA_BOUND_CHECK(index, this->count); +#endif + return this->data[index]; + } }; + // Missing Load(_In_ int Location, _Out_ uint Status); struct ByteAddressBuffer @@ -1205,7 +1225,7 @@ __inline__ __device__ uint4 _waveMatchMultiple(WarpMask mask, const T& inVal) __device__ uint getAt(dim3 a, int b) { - assert(b >= 0 && b < 3); + SLANG_PRELUDE_ASSERT(b >= 0 && b < 3); return (&a.x)[b]; } __device__ uint3 operator*(uint3 a, dim3 b) diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 7c5ca0027..e677b9020 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2987,6 +2987,7 @@ __glsl_extension(GL_KHR_shader_subgroup_basic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBarrier()") __target_intrinsic(hlsl, "GroupMemoryBarrier()") +__target_intrinsic(cuda, "__syncwarp()") void GroupMemoryBarrierWithWaveSync(); // NOTE! WaveMaskBroadcastLaneAt is *NOT* standard HLSL diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp index 5359740b4..1e2482bb5 100644 --- a/source/slang/slang-emit-cpp.cpp +++ b/source/slang/slang-emit-cpp.cpp @@ -984,7 +984,7 @@ void CPPSourceEmitter::_emitGetAtDefinition(const UnownedStringSlice& funcName, { int vecSize = int(getIntVal(vectorType->getElementCount())); - writer->emit("assert(b >= 0 && b < "); + writer->emit("SLANG_PRELUDE_ASSERT(b >= 0 && b < "); writer->emit(vecSize); writer->emit(");\n"); if (lValue) @@ -997,7 +997,7 @@ void CPPSourceEmitter::_emitGetAtDefinition(const UnownedStringSlice& funcName, //int colCount = int(getIntVal(matrixType->getColumnCount())); int rowCount = int(getIntVal(matrixType->getRowCount())); - writer->emit("assert(b >= 0 && b < "); + writer->emit("SLANG_PRELUDE_ASSERT(b >= 0 && b < "); writer->emit(rowCount); writer->emit(");\n"); -- cgit v1.2.3