diff options
| author | Tim Foley <tfoleyNV@users.noreply.github.com> | 2020-10-05 11:10:53 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-10-05 11:10:53 -0700 |
| commit | 41d8610653cacfb763e3e1a1c538e17037703108 (patch) | |
| tree | 4f9b0c9c7e46a47feac40ecfa6b6b3a9cb027921 | |
| parent | d930c65e7fef6414af363e1f8d4fff52beb448af (diff) | |
Small fixes for CUDA code emit (#1564)
* Small fixes for CUDA code emit
* Add a CUDA translation to `GroupMemoryBarrierWithWaveSync()`. We map this to `__syncwarp()` for CUDA (with no mask, implying a full-warp sync).
* Consistently use `SLANG_PRELUDE_ASSERT` for assertions introduced in code emit (rather than just using the bare `assert(...)` function, which is not included by our CUDA prelude by default)
* Add a new `SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT` flag to the CUDA prelude that allows the `count` field to be omitted from `(RW)StructuredBuffer<T>`. This is a bit of a hacky because the computed layouts will still assume the `count` field is present, but this feature is required by at least one client application for now. A better long-term fix will take more time to design and implement.
* fixup: CUDA prelude code fix for pedantic compilers
Co-authored-by: Tim Foley <tim.foley.is@gmail.com>
Co-authored-by: Yong He <yonghe@outlook.com>
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 44 | ||||
| -rw-r--r-- | source/slang/hlsl.meta.slang | 1 | ||||
| -rw-r--r-- | source/slang/slang-emit-cpp.cpp | 4 |
3 files changed, 35 insertions, 14 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 9b485dbe5..aebcffc10 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -339,27 +339,47 @@ SLANG_CUDA_CALL uint32_t U64_countbits(uint64_t v) // Missing Load(_In_ int Location, _Out_ uint Status); template <typename T> -struct RWStructuredBuffer +struct StructuredBuffer { - SLANG_CUDA_CALL T& operator[](size_t index) const { SLANG_CUDA_BOUND_CHECK(index, count); return data[index]; } - SLANG_CUDA_CALL const T& Load(size_t index) const { SLANG_CUDA_BOUND_CHECK(index, count); return data[index]; } + SLANG_CUDA_CALL const T& operator[](size_t index) const + { +#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT + SLANG_CUDA_BOUND_CHECK(index, count); +#endif + return data[index]; + } + + SLANG_CUDA_CALL const T& Load(size_t index) const + { +#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT + SLANG_CUDA_BOUND_CHECK(index, count); +#endif + return data[index]; + } + +#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT SLANG_CUDA_CALL void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) { *outNumStructs = uint32_t(count); *outStride = uint32_t(sizeof(T)); } - +#endif + T* data; +#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT size_t count; +#endif }; template <typename T> -struct StructuredBuffer +struct RWStructuredBuffer : StructuredBuffer<T> { - SLANG_CUDA_CALL const T& operator[](size_t index) const { SLANG_CUDA_BOUND_CHECK(index, count); return data[index]; } - SLANG_CUDA_CALL const T& Load(size_t index) const { SLANG_CUDA_BOUND_CHECK(index, count); return data[index]; } - SLANG_CUDA_CALL void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) { *outNumStructs = uint32_t(count); *outStride = uint32_t(sizeof(T)); } - - T* data; - size_t count; + SLANG_CUDA_CALL T& operator[](size_t index) const + { +#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT + SLANG_CUDA_BOUND_CHECK(index, this->count); +#endif + return this->data[index]; + } }; + // Missing Load(_In_ int Location, _Out_ uint Status); struct ByteAddressBuffer @@ -1205,7 +1225,7 @@ __inline__ __device__ uint4 _waveMatchMultiple(WarpMask mask, const T& inVal) __device__ uint getAt(dim3 a, int b) { - assert(b >= 0 && b < 3); + SLANG_PRELUDE_ASSERT(b >= 0 && b < 3); return (&a.x)[b]; } __device__ uint3 operator*(uint3 a, dim3 b) diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 7c5ca0027..e677b9020 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2987,6 +2987,7 @@ __glsl_extension(GL_KHR_shader_subgroup_basic) __spirv_version(1.3) __target_intrinsic(glsl, "subgroupBarrier()") __target_intrinsic(hlsl, "GroupMemoryBarrier()") +__target_intrinsic(cuda, "__syncwarp()") void GroupMemoryBarrierWithWaveSync(); // NOTE! WaveMaskBroadcastLaneAt is *NOT* standard HLSL diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp index 5359740b4..1e2482bb5 100644 --- a/source/slang/slang-emit-cpp.cpp +++ b/source/slang/slang-emit-cpp.cpp @@ -984,7 +984,7 @@ void CPPSourceEmitter::_emitGetAtDefinition(const UnownedStringSlice& funcName, { int vecSize = int(getIntVal(vectorType->getElementCount())); - writer->emit("assert(b >= 0 && b < "); + writer->emit("SLANG_PRELUDE_ASSERT(b >= 0 && b < "); writer->emit(vecSize); writer->emit(");\n"); if (lValue) @@ -997,7 +997,7 @@ void CPPSourceEmitter::_emitGetAtDefinition(const UnownedStringSlice& funcName, //int colCount = int(getIntVal(matrixType->getColumnCount())); int rowCount = int(getIntVal(matrixType->getRowCount())); - writer->emit("assert(b >= 0 && b < "); + writer->emit("SLANG_PRELUDE_ASSERT(b >= 0 && b < "); writer->emit(rowCount); writer->emit(");\n"); |
