summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2021-06-10 14:57:09 -0400
committerGitHub <noreply@github.com>2021-06-10 11:57:09 -0700
commit37e8917d10626b519470f2e34625f0efe741352f (patch)
tree4e8e51bd63ebc03fcdf0c893b675906cf507d73c
parent0d9bd79e8fd4d57e1a723ca6b6a45efec2b42872 (diff)
CUDA layout corner cases/testing (#1881)
* #include an absolute path didn't work - because paths were taken to always be relative. * Add support for sizeOf/alignOf/offsetOf to stdlib. Add $G intrinsic expansion that works of the generic parameters not the param type * Test cuda layout. * Fix CUDA layout issues. Fix reflection to handle other built in types. Fix __offsetOf * Tests of reflection and layout as reported directly from CUDA. * Comment about use of aligned size as size. * Fix warning from VS. * Check alignment is pow2. * Small improvements to alignment calcs. * Tab to spaces. * Fix alignment pointer sizes on 32 bit OS for CUDA. * Fix CUDA reflection on 32 bit.
-rw-r--r--prelude/slang-cpp-prelude.h75
-rw-r--r--prelude/slang-cuda-prelude.h9
-rw-r--r--source/slang/core.meta.slang29
-rw-r--r--source/slang/slang-intrinsic-expand.cpp29
-rw-r--r--source/slang/slang-type-layout.cpp213
-rw-r--r--tests/cuda/cuda-array-layout.slang32
-rw-r--r--tests/cuda/cuda-array-layout.slang.expected.txt9
-rw-r--r--tests/cuda/cuda-layout.slang24
-rw-r--r--tests/cuda/cuda-layout.slang.expected.txt33
-rw-r--r--tests/cuda/cuda-reflection.slang28
-rw-r--r--tests/cuda/cuda-reflection.slang.expected250
-rw-r--r--tools/slang-reflection-test/slang-reflection-test-main.cpp6
12 files changed, 656 insertions, 81 deletions
diff --git a/prelude/slang-cpp-prelude.h b/prelude/slang-cpp-prelude.h
index 725be4b42..ffd18cf32 100644
--- a/prelude/slang-cpp-prelude.h
+++ b/prelude/slang-cpp-prelude.h
@@ -36,6 +36,81 @@
# define SLANG_INFINITY INFINITY
#endif
+// Detect the compiler type
+
+#ifndef SLANG_COMPILER
+# define SLANG_COMPILER
+
+/*
+Compiler defines, see http://sourceforge.net/p/predef/wiki/Compilers/
+NOTE that SLANG_VC holds the compiler version - not just 1 or 0
+*/
+# if defined(_MSC_VER)
+# if _MSC_VER >= 1900
+# define SLANG_VC 14
+# elif _MSC_VER >= 1800
+# define SLANG_VC 12
+# elif _MSC_VER >= 1700
+# define SLANG_VC 11
+# elif _MSC_VER >= 1600
+# define SLANG_VC 10
+# elif _MSC_VER >= 1500
+# define SLANG_VC 9
+# else
+# error "unknown version of Visual C++ compiler"
+# endif
+# elif defined(__clang__)
+# define SLANG_CLANG 1
+# elif defined(__SNC__)
+# define SLANG_SNC 1
+# elif defined(__ghs__)
+# define SLANG_GHS 1
+# elif defined(__GNUC__) /* note: __clang__, __SNC__, or __ghs__ imply __GNUC__ */
+# define SLANG_GCC 1
+# else
+# error "unknown compiler"
+# endif
+/*
+Any compilers not detected by the above logic are now now explicitly zeroed out.
+*/
+# ifndef SLANG_VC
+# define SLANG_VC 0
+# endif
+# ifndef SLANG_CLANG
+# define SLANG_CLANG 0
+# endif
+# ifndef SLANG_SNC
+# define SLANG_SNC 0
+# endif
+# ifndef SLANG_GHS
+# define SLANG_GHS 0
+# endif
+# ifndef SLANG_GCC
+# define SLANG_GCC 0
+# endif
+#endif /* SLANG_COMPILER */
+
+#define SLANG_GCC_FAMILY (SLANG_CLANG || SLANG_SNC || SLANG_GHS || SLANG_GCC)
+
+// GCC Specific
+#if SLANG_GCC_FAMILY
+# define SLANG_ALIGN_OF(T) __alignof__(T)
+// Use this macro instead of offsetof, because gcc produces warning if offsetof is used on a
+// non POD type, even though it produces the correct result
+# define SLANG_OFFSET_OF(T, ELEMENT) (size_t(&((T*)1)->ELEMENT) - 1)
+#endif // SLANG_GCC_FAMILY
+
+// Microsoft VC specific
+#if SLANG_VC
+# define SLANG_ALIGN_OF(T) __alignof(T)
+#endif // SLANG_VC
+
+// Default impls
+
+#ifndef SLANG_OFFSET_OF
+# define SLANG_OFFSET_OF(X, Y) offsetof(X, Y)
+#endif
+
#include "slang-cpp-types.h"
#include "slang-cpp-scalar-intrinsics.h"
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h
index 91094a75e..01c658e0b 100644
--- a/prelude/slang-cuda-prelude.h
+++ b/prelude/slang-cuda-prelude.h
@@ -15,6 +15,15 @@
#include <optix.h>
#endif
+// Define slang offsetof implementation
+#ifndef SLANG_OFFSET_OF
+# define SLANG_OFFSET_OF(type, member) (size_t)((char*)&(((type *)0)->member) - (char*)0)
+#endif
+
+#ifndef SLANG_ALIGN_OF
+# define SLANG_ALIGN_OF(type) __alignof__(type)
+#endif
+
// Must be large enough to cause overflow and therefore infinity
#ifndef SLANG_INFINITY
# define SLANG_INFINITY ((float)(1e+300 * 1e+300))
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index afdd96029..761316d86 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -1962,6 +1962,35 @@ __target_intrinsic(cuda, " @ ")
__target_intrinsic(cpp, " @ ")
int __SyntaxError();
+/// For downstream compilers that allow sizeof/alignof/offsetof
+/// Can't be called in the C/C++ style. Need to use __size_of<some_type>() as opposed to sizeof(some_type).
+__generic<T>
+__target_intrinsic(cuda, "sizeof($G0)")
+__target_intrinsic(cpp, "sizeof($G0)")
+int __sizeOf();
+
+__generic<T>
+__target_intrinsic(cuda, "sizeof($T0)")
+__target_intrinsic(cpp, "sizeof($T0)")
+int __sizeOf(T v);
+
+__generic<T>
+__target_intrinsic(cuda, "SLANG_ALIGN_OF($G0)")
+__target_intrinsic(cpp, "SLANG_ALIGN_OF($G0)")
+int __alignOf();
+
+__generic<T>
+__target_intrinsic(cuda, "SLANG_ALIGN_OF($T0)")
+__target_intrinsic(cpp, "SLANG_ALIGN_OF($T0)")
+int __alignOf(T v);
+
+// It would be nice to have offsetof equivalent, but it's not clear how that would work in terms of the Slang language.
+// Here we allow calculating the offset of a field in bytes from an *instance* of the type.
+__generic<T,F>
+__target_intrinsic(cuda, "int(((char*)&($1)) - ((char*)&($0)))")
+__target_intrinsic(cpp, "int(((char*)&($1)) - ((char*)&($0))")
+int __offsetOf(in T t, in F field);
+
/// Mark beginning of "interlocked" operations in a fragment shader.
__target_intrinsic(glsl, "beginInvocationInterlockARB")
__glsl_extension(GL_ARB_fragment_shader_interlock)
diff --git a/source/slang/slang-intrinsic-expand.cpp b/source/slang/slang-intrinsic-expand.cpp
index bd2e17b28..045b7e6c5 100644
--- a/source/slang/slang-intrinsic-expand.cpp
+++ b/source/slang/slang-intrinsic-expand.cpp
@@ -236,6 +236,35 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor)
}
break;
+ case 'G':
+ {
+ // Get the type/value at the index of the specialization of this generic
+
+ SLANG_RELEASE_ASSERT(*cursor >= '0' && *cursor <= '9');
+ Index argIndex = (*cursor++) - '0';
+
+ IRSpecialize* specialize = as<IRSpecialize>(m_callInst->getCallee());
+ SLANG_ASSERT(specialize);
+
+ {
+ auto argCount = Index(specialize->getArgCount());
+ SLANG_UNUSED(argCount);
+ SLANG_ASSERT(argIndex < argCount);
+
+ auto arg = specialize->getArg(argIndex);
+
+ if (auto type = as<IRType>(arg))
+ {
+ m_emitter->emitType(type);
+ }
+ else
+ {
+ m_emitter->emitVal(arg, getInfo(EmitOp::General));
+ }
+ }
+ }
+ break;
+
case 'T':
// Get the the 'element' type for the type of the param at the index
{
diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp
index ad38e11cb..0fc7958d0 100644
--- a/source/slang/slang-type-layout.cpp
+++ b/source/slang/slang-type-layout.cpp
@@ -8,25 +8,30 @@
namespace Slang {
-size_t RoundToAlignment(size_t offset, size_t alignment)
+static bool _isPow2(size_t v)
{
- size_t remainder = offset % alignment;
- if (remainder == 0)
- return offset;
- else
- return offset + (alignment - remainder);
+ return v > 0 && ((v - 1) & v) == 0;
+}
+
+static size_t _roundToAlignment(size_t offset, size_t alignment)
+{
+ // Must also be a power of 2
+ SLANG_ASSERT(_isPow2(alignment));
+
+ const size_t mask = alignment - 1;
+ return (offset + mask) & ~mask;
}
-LayoutSize RoundToAlignment(LayoutSize offset, size_t alignment)
+static LayoutSize _roundToAlignment(LayoutSize offset, size_t alignment)
{
// An infinite size is assumed to be maximally aligned.
if(offset.isInfinite())
return LayoutSize::infinite();
- return RoundToAlignment(offset.getFiniteValue(), alignment);
+ return _roundToAlignment(offset.getFiniteValue(), alignment);
}
-static size_t RoundUpToPowerOfTwo( size_t value )
+static size_t _roundUpToPowerOfTwo( size_t value )
{
// TODO(tfoley): I know this isn't a fast approach
size_t result = 1;
@@ -35,6 +40,21 @@ static size_t RoundUpToPowerOfTwo( size_t value )
return result;
}
+static bool _isAligned(size_t size, size_t alignment)
+{
+ SLANG_ASSERT(_isPow2(alignment));
+ return ((alignment - 1) & size) == 0;
+}
+
+// This is a workaround to keep functions from causing warnings in release builds, and therefore causing compilation to fail.
+void _typeLayout_keepFunctions()
+{
+ auto a = _isAligned;
+ auto b = _isPow2;
+ SLANG_UNUSED(a);
+ SLANG_UNUSED(b);
+}
+
//
struct DefaultLayoutRulesImpl : SimpleLayoutRulesImpl
@@ -81,7 +101,7 @@ struct DefaultLayoutRulesImpl : SimpleLayoutRulesImpl
SLANG_RELEASE_ASSERT(elementInfo.size.isFinite());
auto elementSize = elementInfo.size.getFiniteValue();
auto elementAlignment = elementInfo.alignment;
- auto elementStride = RoundToAlignment(elementSize, elementAlignment);
+ auto elementStride = _roundToAlignment(elementSize, elementAlignment);
// An array with no elements will have zero size.
//
@@ -155,7 +175,7 @@ struct DefaultLayoutRulesImpl : SimpleLayoutRulesImpl
auto fieldBaseOffset = ioStructInfo->size;
// We need to ensure that the offset for the field will respect its alignment
- auto fieldOffset = RoundToAlignment(fieldBaseOffset, fieldInfo.alignment);
+ auto fieldOffset = _roundToAlignment(fieldBaseOffset, fieldInfo.alignment);
// The size of the struct must be adjusted to cover the bytes consumed
// by this field.
@@ -222,7 +242,7 @@ struct GLSLBaseLayoutRulesImpl : DefaultLayoutRulesImpl
SimpleLayoutInfo vectorInfo(
LayoutResourceKind::Uniform,
size,
- RoundUpToPowerOfTwo(size));
+ _roundUpToPowerOfTwo(size));
return vectorInfo;
}
@@ -231,7 +251,7 @@ struct GLSLBaseLayoutRulesImpl : DefaultLayoutRulesImpl
// The size of an array must be rounded up to be a multiple of its alignment.
//
auto info = Super::GetArrayLayout(elementInfo, elementCount);
- info.size = RoundToAlignment(info.size, info.alignment);
+ info.size = _roundToAlignment(info.size, info.alignment);
return info;
}
@@ -239,7 +259,7 @@ struct GLSLBaseLayoutRulesImpl : DefaultLayoutRulesImpl
{
// The size of a `struct` must be rounded up to be a multiple of its alignment.
//
- ioStructInfo->size = RoundToAlignment(ioStructInfo->size, ioStructInfo->alignment);
+ ioStructInfo->size = _roundToAlignment(ioStructInfo->size, ioStructInfo->alignment);
}
};
@@ -329,7 +349,7 @@ struct HLSLConstantBufferLayoutRulesImpl : DefaultLayoutRulesImpl
return ioStructInfo->size;
ioStructInfo->alignment = std::max(ioStructInfo->alignment, fieldInfo.alignment);
- ioStructInfo->size = RoundToAlignment(ioStructInfo->size, fieldInfo.alignment);
+ ioStructInfo->size = _roundToAlignment(ioStructInfo->size, fieldInfo.alignment);
LayoutSize fieldOffset = ioStructInfo->size;
LayoutSize fieldSize = fieldInfo.size;
@@ -340,7 +360,7 @@ struct HLSLConstantBufferLayoutRulesImpl : DefaultLayoutRulesImpl
auto endRegister = (fieldOffset + fieldSize - 1) / registerSize;
if (startRegister != endRegister)
{
- ioStructInfo->size = RoundToAlignment(ioStructInfo->size, size_t(registerSize));
+ ioStructInfo->size = _roundToAlignment(ioStructInfo->size, size_t(registerSize));
fieldOffset = ioStructInfo->size;
}
@@ -396,10 +416,38 @@ struct CPULayoutRulesImpl : DefaultLayoutRulesImpl
void EndStructLayout(UniformLayoutInfo* ioStructInfo) override
{
// Conform to C/C++ size is adjusted to the largest alignment
- ioStructInfo->size = RoundToAlignment(ioStructInfo->size, ioStructInfo->alignment);
+ ioStructInfo->size = _roundToAlignment(ioStructInfo->size, ioStructInfo->alignment);
}
};
+// The CUDA compiler NVRTC only works on 64 bit operating systems.
+// So instead of using native host type sizes we use these types instead
+//
+// NOTE! This implies that our CUDA reflection (even if produced on 32 bit host environment) is always 64 bit.
+// This is unlikely to be a problem in practice.
+
+// NOTE! For the moment the CUDA prelude we use size_t - but that's ok as we currently use these types for
+// sizes
+
+// Memory sizes, and memory offsets (signed)
+typedef int64_t CUDASize;
+typedef int64_t CUDAOffset;
+
+// TODO(JS): This could be better as CudaUSize if we accepted LowerCamel Acronyms...
+typedef uint64_t CUDAUSize;
+
+// A type that is the size of a pointer
+typedef CUDASize CUDAPtr;
+// For CUtexObject and CUsurfObject
+typedef CUDAPtr CUDAHandle;
+
+// This is not strictly speaking needed - but exists to be consistent with cuda-prelude.h and the current CUDA emit.
+typedef CUDAPtr CUDASamplerState;
+
+// TODO(JS): Perhaps there is an argument these should be 32 bit?
+typedef CUDASize CUDACount;
+typedef CUDASize CUDAIndex;
+
struct CUDALayoutRulesImpl : DefaultLayoutRulesImpl
{
typedef DefaultLayoutRulesImpl Super;
@@ -421,54 +469,23 @@ struct CUDALayoutRulesImpl : DefaultLayoutRulesImpl
SimpleArrayLayoutInfo GetArrayLayout(SimpleLayoutInfo elementInfo, LayoutSize elementCount) override
{
SLANG_RELEASE_ASSERT(elementInfo.size.isFinite());
- auto elementSize = elementInfo.size.getFiniteValue();
- auto elementAlignment = elementInfo.alignment;
- auto elementStride = RoundToAlignment(elementSize, elementAlignment);
-
+
if (elementCount.isInfinite())
{
// This is an unsized array, get information for element
auto info = Super::GetArrayLayout(elementInfo, LayoutSize(1));
// So it is actually a Array<T> on CUDA which is a pointer and a size
- info.size = sizeof(void*) * 2;
- info.alignment = SLANG_ALIGN_OF(void*);
+ info.size = _roundToAlignment((CUDAPtr) + sizeof(CUDACount), sizeof(CUDAPtr));
+ info.alignment = sizeof(CUDAPtr);
return info;
}
+
+ // It's fine to use the Default impl, as long as any elements size is alignment rounded (as happen in EndStructLayout).
+ // If that weren't the case the array may be smaller than elementSize * elementCount which would be wrong for CUDA.
+ SLANG_ASSERT(_isAligned(elementInfo.size.getFiniteValue(), elementInfo.alignment));
- // An array with no elements will have zero size.
- //
- LayoutSize arraySize = 0;
- //
- // Any array with a non-zero number of elements will need
- // to have space for N elements of size `elementSize`, with
- // the constraints that there must be `elementStride` bytes
- // between consecutive elements.
- //
- if (elementCount > 0)
- {
- // We can think of this as either allocating (N-1)
- // chunks of size `elementStride` (for most of the elements)
- // and then one final chunk of size `elementSize` for
- // the last element, or equivalently as allocating
- // N chunks of size `elementStride` and then "giving back"
- // the final `elementStride - elementSize` bytes.
- //
- arraySize = (elementStride * (elementCount - 1)) + elementSize;
- }
-
- SimpleArrayLayoutInfo arrayInfo;
- arrayInfo.kind = elementInfo.kind;
- arrayInfo.size = arraySize;
- arrayInfo.alignment = elementAlignment;
- arrayInfo.elementStride = elementStride;
- return arrayInfo;
- }
-
- // Computes the alignment of a vector type given element size and element count.
- uint32_t getVectorAlignment(uint32_t elementSize, uint32_t elementCount)
- {
- return elementCount == 3 ? elementSize : elementSize * elementCount;
+ return Super::GetArrayLayout(elementInfo, elementCount);
}
SimpleLayoutInfo GetVectorLayout(BaseType elementType, SimpleLayoutInfo elementInfo, size_t elementCount) override
@@ -478,28 +495,53 @@ struct CUDALayoutRulesImpl : DefaultLayoutRulesImpl
{
SimpleLayoutInfo fixInfo(elementInfo);
fixInfo.size = sizeof(int32_t);
- fixInfo.alignment = SLANG_ALIGN_OF(int32_t);
+ fixInfo.alignment = sizeof(int32_t);
return GetVectorLayout(BaseType::Int, fixInfo, elementCount);
}
+
+ const auto elementSize = elementInfo.size.getFiniteValue();
+
+ // These rules can largely be determines by looking at
+ // 'vector_types.h' in the CUDA SDK
+
+ // Size in bytes of vector
+ size_t size = elementSize * elementCount;
+ // Special case 3, as uses alignment of the elementSize
+ size_t alignment = (elementCount == 3) ? elementSize : size;
+
+ // special case half
+ if (elementType == BaseType::Half && elementCount >= 3)
+ {
+ alignment = elementSize * 2;
+ size = _roundToAlignment(size, alignment);
+ }
+
+ // Nothing is aligned more than 16
+ alignment = std::min(alignment, size_t(16));
+
+ // TODO(JS): It's not 100% clear what is right in terms of size in respect of *alignment*. If the size is the 'used' bytes, then
+ // it can be less that the aligned size. If that's the case the GetArrayLayout (and MatrixLayout) is *wrong* in that on the last element
+ // it uses the size (not the aligned size/stride).
+ //
+ // Here I am assuming it's reasonable for the size to be the aligned size. That being the case the GetArrayLayout/GetMatrixLayout will be
+ // correct without special handling.
+ //
+ // The assert below checks that is indeed the case.
+
+ // The size must be a multiple of the alignment
+ SLANG_ASSERT(_isAligned(size, alignment));
SimpleLayoutInfo vectorInfo;
vectorInfo.kind = elementInfo.kind;
- vectorInfo.size = elementInfo.size * elementCount;
- vectorInfo.alignment = getVectorAlignment((uint32_t)elementInfo.size.getFiniteValue(), (uint32_t)elementCount);
+ vectorInfo.size = size;
+ vectorInfo.alignment = alignment;
+
return vectorInfo;
}
SimpleArrayLayoutInfo GetMatrixLayout(BaseType elementType, SimpleLayoutInfo elementInfo, size_t rowCount, size_t columnCount) override
{
- // Special case bool
- if (elementType == BaseType::Bool)
- {
- SimpleLayoutInfo fixInfo(elementInfo);
- fixInfo.size = sizeof(int32_t);
- fixInfo.alignment = SLANG_ALIGN_OF(int32_t);
- return GetMatrixLayout(BaseType::Int, fixInfo, rowCount, columnCount);
- }
-
+ // The default behavior is to calculate the size as an array of rowCount vectors, which is correct here
return Super::GetMatrixLayout(elementType, elementInfo, rowCount, columnCount);
}
@@ -511,7 +553,7 @@ struct CUDALayoutRulesImpl : DefaultLayoutRulesImpl
void EndStructLayout(UniformLayoutInfo* ioStructInfo) override
{
// Conform to CUDA/C/C++ size is adjusted to the largest alignment
- ioStructInfo->size = RoundToAlignment(ioStructInfo->size, ioStructInfo->alignment);
+ ioStructInfo->size = _roundToAlignment(ioStructInfo->size, ioStructInfo->alignment);
}
};
@@ -874,9 +916,10 @@ struct CUDAObjectLayoutRulesImpl : CPUObjectLayoutRulesImpl
switch (kind)
{
case ShaderParameterKind::ConstantBuffer:
+ {
// It's a pointer to the actual uniform data
- return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), SLANG_ALIGN_OF(void*));
-
+ return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(CUDAPtr), sizeof(CUDAPtr));
+ }
case ShaderParameterKind::TextureSampler:
case ShaderParameterKind::MutableTextureSampler:
// That there is no distinct Sampler on CUDA, so TextureSampler is the same as a Texture
@@ -884,29 +927,37 @@ struct CUDAObjectLayoutRulesImpl : CPUObjectLayoutRulesImpl
case ShaderParameterKind::MutableTexture:
case ShaderParameterKind::TextureUniformBuffer:
case ShaderParameterKind::Texture:
- // It's a pointer to a texture interface
- return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(ObjectHandle), SLANG_ALIGN_OF(ObjectHandle));
+ {
+ // It's a CUtexObject or CUsurfObject which is an opaque CUDAHandle sized
+ return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(CUDAHandle), sizeof(CUDAPtr));
+ }
case ShaderParameterKind::StructuredBuffer:
case ShaderParameterKind::MutableStructuredBuffer:
- // It's a pointer and a size
- return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*) * 2, SLANG_ALIGN_OF(void*));
-
+ {
+ // It's a ptr and a count of the amount of elements
+ const size_t size = _roundToAlignment(sizeof(CUDAPtr) + sizeof(CUDACount), sizeof(CUDAPtr));
+ return SimpleLayoutInfo(LayoutResourceKind::Uniform, size, sizeof(CUDAPtr));
+ }
case ShaderParameterKind::RawBuffer:
case ShaderParameterKind::Buffer:
case ShaderParameterKind::MutableRawBuffer:
case ShaderParameterKind::MutableBuffer:
- // It's a pointer and a size in bytes
- return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*) * 2, SLANG_ALIGN_OF(void*));
-
+ {
+ // It's a ptr and a count of the amount of elements
+ const size_t size = _roundToAlignment(sizeof(CUDAPtr) + sizeof(CUDACount), sizeof(CUDAPtr));
+ return SimpleLayoutInfo(LayoutResourceKind::Uniform, size, sizeof(CUDAPtr));
+ }
case ShaderParameterKind::SamplerState:
+ {
// In CUDA it seems that sampler states are combined into texture objects.
// So it's a binding issue to combine a sampler with a texture - and sampler are ignored
// For simplicity here though - we do create a variable and that variable takes up
// uniform binding space.
// TODO(JS): If we wanted to remove these variables we'd want to do it as a pass. The pass
// would presumably have to remove use of variables of this kind throughout IR.
- return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), SLANG_ALIGN_OF(void*));
+ return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(CUDASamplerState), sizeof(CUDAPtr));
+ }
case ShaderParameterKind::InputRenderTarget:
// TODO: how to handle these?
@@ -4007,7 +4058,7 @@ static TypeLayoutResult _createTypeLayout(
// The tag is always a `uint` for now.
//
auto tagInfo = context.rules->GetScalarLayout(BaseType::UInt);
- info.size = RoundToAlignment(info.size, tagInfo.alignment);
+ info.size = _roundToAlignment(info.size, tagInfo.alignment);
taggedUnionLayout->tagOffset = info.size;
diff --git a/tests/cuda/cuda-array-layout.slang b/tests/cuda/cuda-array-layout.slang
new file mode 100644
index 000000000..7fee3b192
--- /dev/null
+++ b/tests/cuda/cuda-array-layout.slang
@@ -0,0 +1,32 @@
+//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer : register(u0);
+
+struct PadLadenStruct
+{
+ double a;
+ uint8_t b;
+};
+
+// This is to check if the last half can be inserted 'inside' the spare padding of a. It should not be
+struct StructWithArray
+{
+ PadLadenStruct a[1];
+ uint8_t b;
+
+ matrix<half, 3, 3> c;
+ uint8_t d;
+};
+
+[numthreads(1, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ StructWithArray s;
+ outputBuffer[0] = __sizeOf(s);
+
+ outputBuffer[1] = __offsetOf(s, s.a);
+ outputBuffer[2] = __offsetOf(s, s.b);
+ outputBuffer[3] = __offsetOf(s, s.c);
+ outputBuffer[4] = __offsetOf(s, s.d);
+}
diff --git a/tests/cuda/cuda-array-layout.slang.expected.txt b/tests/cuda/cuda-array-layout.slang.expected.txt
new file mode 100644
index 000000000..bc3e8bd6c
--- /dev/null
+++ b/tests/cuda/cuda-array-layout.slang.expected.txt
@@ -0,0 +1,9 @@
+type: int32_t
+48
+0
+16
+20
+44
+0
+0
+0
diff --git a/tests/cuda/cuda-layout.slang b/tests/cuda/cuda-layout.slang
new file mode 100644
index 000000000..725bf798e
--- /dev/null
+++ b/tests/cuda/cuda-layout.slang
@@ -0,0 +1,24 @@
+//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer : register(u0);
+
+#define WRITE_TYPE_ALIGN(base, type) \
+ outputBuffer[base * 4 + 0] = __alignOf<type>(); \
+ outputBuffer[base * 4 + 1] = __alignOf<vector<type, 2> >(); \
+ outputBuffer[base * 4 + 2] = __alignOf<vector<type, 3> >(); \
+ outputBuffer[base * 4 + 3] = __alignOf<vector<type, 4> >();
+
+
+[numthreads(1, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ WRITE_TYPE_ALIGN(0, uint8_t)
+ WRITE_TYPE_ALIGN(1, uint16_t)
+ WRITE_TYPE_ALIGN(2, int)
+ WRITE_TYPE_ALIGN(3, int64_t)
+
+ WRITE_TYPE_ALIGN(4, half)
+ WRITE_TYPE_ALIGN(5, float)
+ WRITE_TYPE_ALIGN(6, double)
+}
diff --git a/tests/cuda/cuda-layout.slang.expected.txt b/tests/cuda/cuda-layout.slang.expected.txt
new file mode 100644
index 000000000..bf4edf064
--- /dev/null
+++ b/tests/cuda/cuda-layout.slang.expected.txt
@@ -0,0 +1,33 @@
+type: int32_t
+1
+2
+1
+4
+2
+4
+2
+8
+4
+8
+4
+16
+8
+16
+8
+16
+2
+4
+4
+4
+4
+8
+4
+16
+8
+16
+8
+16
+0
+0
+0
+0
diff --git a/tests/cuda/cuda-reflection.slang b/tests/cuda/cuda-reflection.slang
new file mode 100644
index 000000000..95bf591c9
--- /dev/null
+++ b/tests/cuda/cuda-reflection.slang
@@ -0,0 +1,28 @@
+// cuda-reflection.slang
+
+//TEST:REFLECTION:-stage compute -entry main -target cuda
+
+struct PadLadenStruct
+{
+ double a;
+ uint8_t b;
+};
+
+// This is to check if the last half can be inserted 'inside' the spare padding of a. It should not be
+struct StructWithArray
+{
+ PadLadenStruct a[1];
+ uint8_t c;
+
+ matrix<half, 3, 3> d;
+ uint8_t e;
+};
+
+ConstantBuffer<StructWithArray> cb;
+RWStructuredBuffer<StructWithArray> sb;
+
+[numthreads(1, 1, 1)]
+void main(
+ uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+} \ No newline at end of file
diff --git a/tests/cuda/cuda-reflection.slang.expected b/tests/cuda/cuda-reflection.slang.expected
new file mode 100644
index 000000000..d27d99557
--- /dev/null
+++ b/tests/cuda/cuda-reflection.slang.expected
@@ -0,0 +1,250 @@
+result code = 0
+standard error = {
+}
+standard output = {
+{
+ "parameters": [
+ {
+ "name": "cb",
+ "binding": {"kind": "uniform", "offset": 0, "size": 8},
+ "type": {
+ "kind": "constantBuffer",
+ "elementType": {
+ "kind": "struct",
+ "name": "StructWithArray",
+ "fields": [
+ {
+ "name": "a",
+ "type": {
+ "kind": "array",
+ "elementCount": 1,
+ "elementType": {
+ "kind": "struct",
+ "name": "PadLadenStruct",
+ "fields": [
+ {
+ "name": "a",
+ "type": {
+ "kind": "scalar",
+ "scalarType": "float64"
+ },
+ "binding": {"kind": "uniform", "offset": 0, "size": 8}
+ },
+ {
+ "name": "b",
+ "type": {
+ "kind": "scalar",
+ "scalarType": "uint8"
+ },
+ "binding": {"kind": "uniform", "offset": 8, "size": 1}
+ }
+ ]
+ },
+ "uniformStride": 16
+ },
+ "binding": {"kind": "uniform", "offset": 0, "size": 16}
+ },
+ {
+ "name": "c",
+ "type": {
+ "kind": "scalar",
+ "scalarType": "uint8"
+ },
+ "binding": {"kind": "uniform", "offset": 16, "size": 1}
+ },
+ {
+ "name": "d",
+ "type": {
+ "kind": "matrix",
+ "rowCount": 3,
+ "columnCount": 3,
+ "elementType": {
+ "kind": "scalar",
+ "scalarType": "float16"
+ }
+ },
+ "binding": {"kind": "uniform", "offset": 20, "size": 24}
+ },
+ {
+ "name": "e",
+ "type": {
+ "kind": "scalar",
+ "scalarType": "uint8"
+ },
+ "binding": {"kind": "uniform", "offset": 44, "size": 1}
+ }
+ ]
+ },
+ "containerVarLayout": {
+ "binding": {"kind": "uniform", "offset": 0, "size": 8}
+ },
+ "elementVarLayout": {
+ "type": {
+ "kind": "struct",
+ "name": "StructWithArray",
+ "fields": [
+ {
+ "name": "a",
+ "type": {
+ "kind": "array",
+ "elementCount": 1,
+ "elementType": {
+ "kind": "struct",
+ "name": "PadLadenStruct",
+ "fields": [
+ {
+ "name": "a",
+ "type": {
+ "kind": "scalar",
+ "scalarType": "float64"
+ },
+ "binding": {"kind": "uniform", "offset": 0, "size": 8}
+ },
+ {
+ "name": "b",
+ "type": {
+ "kind": "scalar",
+ "scalarType": "uint8"
+ },
+ "binding": {"kind": "uniform", "offset": 8, "size": 1}
+ }
+ ]
+ },
+ "uniformStride": 16
+ },
+ "binding": {"kind": "uniform", "offset": 0, "size": 16}
+ },
+ {
+ "name": "c",
+ "type": {
+ "kind": "scalar",
+ "scalarType": "uint8"
+ },
+ "binding": {"kind": "uniform", "offset": 16, "size": 1}
+ },
+ {
+ "name": "d",
+ "type": {
+ "kind": "matrix",
+ "rowCount": 3,
+ "columnCount": 3,
+ "elementType": {
+ "kind": "scalar",
+ "scalarType": "float16"
+ }
+ },
+ "binding": {"kind": "uniform", "offset": 20, "size": 24}
+ },
+ {
+ "name": "e",
+ "type": {
+ "kind": "scalar",
+ "scalarType": "uint8"
+ },
+ "binding": {"kind": "uniform", "offset": 44, "size": 1}
+ }
+ ]
+ },
+ "binding": {"kind": "uniform", "offset": 0, "size": 48}
+ }
+ }
+ },
+ {
+ "name": "sb",
+ "binding": {"kind": "uniform", "offset": 8, "size": 16},
+ "type": {
+ "kind": "resource",
+ "baseShape": "structuredBuffer",
+ "access": "readWrite",
+ "resultType": {
+ "kind": "struct",
+ "name": "StructWithArray",
+ "fields": [
+ {
+ "name": "a",
+ "type": {
+ "kind": "array",
+ "elementCount": 1,
+ "elementType": {
+ "kind": "struct",
+ "name": "PadLadenStruct",
+ "fields": [
+ {
+ "name": "a",
+ "type": {
+ "kind": "scalar",
+ "scalarType": "float64"
+ },
+ "binding": {"kind": "uniform", "offset": 0, "size": 8}
+ },
+ {
+ "name": "b",
+ "type": {
+ "kind": "scalar",
+ "scalarType": "uint8"
+ },
+ "binding": {"kind": "uniform", "offset": 8, "size": 1}
+ }
+ ]
+ },
+ "uniformStride": 16
+ },
+ "binding": {"kind": "uniform", "offset": 0, "size": 16}
+ },
+ {
+ "name": "c",
+ "type": {
+ "kind": "scalar",
+ "scalarType": "uint8"
+ },
+ "binding": {"kind": "uniform", "offset": 16, "size": 1}
+ },
+ {
+ "name": "d",
+ "type": {
+ "kind": "matrix",
+ "rowCount": 3,
+ "columnCount": 3,
+ "elementType": {
+ "kind": "scalar",
+ "scalarType": "float16"
+ }
+ },
+ "binding": {"kind": "uniform", "offset": 20, "size": 24}
+ },
+ {
+ "name": "e",
+ "type": {
+ "kind": "scalar",
+ "scalarType": "uint8"
+ },
+ "binding": {"kind": "uniform", "offset": 44, "size": 1}
+ }
+ ]
+ }
+ }
+ }
+ ],
+ "entryPoints": [
+ {
+ "name": "main",
+ "stage:": "compute",
+ "parameters": [
+ {
+ "name": "dispatchThreadID",
+ "semanticName": "SV_DISPATCHTHREADID",
+ "type": {
+ "kind": "vector",
+ "elementCount": 3,
+ "elementType": {
+ "kind": "scalar",
+ "scalarType": "uint32"
+ }
+ }
+ }
+ ],
+ "threadGroupSize": [1, 1, 1]
+ }
+ ]
+}
+}
diff --git a/tools/slang-reflection-test/slang-reflection-test-main.cpp b/tools/slang-reflection-test/slang-reflection-test-main.cpp
index 655b4e41d..0b8e88d68 100644
--- a/tools/slang-reflection-test/slang-reflection-test-main.cpp
+++ b/tools/slang-reflection-test/slang-reflection-test-main.cpp
@@ -489,10 +489,16 @@ static void emitReflectionScalarTypeInfoJSON(
#define CASE(TAG, ID) case slang::TypeReflection::ScalarType::TAG: write(writer, #ID); break
CASE(Void, void);
CASE(Bool, bool);
+
+ CASE(Int8, int8);
+ CASE(UInt8, uint8);
+ CASE(Int16, int16);
+ CASE(UInt16, uint16);
CASE(Int32, int32);
CASE(UInt32, uint32);
CASE(Int64, int64);
CASE(UInt64, uint64);
+
CASE(Float16, float16);
CASE(Float32, float32);
CASE(Float64, float64);