diff options
| -rw-r--r-- | prelude/slang-cpp-prelude.h | 75 | ||||
| -rw-r--r-- | prelude/slang-cuda-prelude.h | 9 | ||||
| -rw-r--r-- | source/slang/core.meta.slang | 29 | ||||
| -rw-r--r-- | source/slang/slang-intrinsic-expand.cpp | 29 | ||||
| -rw-r--r-- | source/slang/slang-type-layout.cpp | 213 | ||||
| -rw-r--r-- | tests/cuda/cuda-array-layout.slang | 32 | ||||
| -rw-r--r-- | tests/cuda/cuda-array-layout.slang.expected.txt | 9 | ||||
| -rw-r--r-- | tests/cuda/cuda-layout.slang | 24 | ||||
| -rw-r--r-- | tests/cuda/cuda-layout.slang.expected.txt | 33 | ||||
| -rw-r--r-- | tests/cuda/cuda-reflection.slang | 28 | ||||
| -rw-r--r-- | tests/cuda/cuda-reflection.slang.expected | 250 | ||||
| -rw-r--r-- | tools/slang-reflection-test/slang-reflection-test-main.cpp | 6 |
12 files changed, 656 insertions, 81 deletions
diff --git a/prelude/slang-cpp-prelude.h b/prelude/slang-cpp-prelude.h index 725be4b42..ffd18cf32 100644 --- a/prelude/slang-cpp-prelude.h +++ b/prelude/slang-cpp-prelude.h @@ -36,6 +36,81 @@ # define SLANG_INFINITY INFINITY #endif +// Detect the compiler type + +#ifndef SLANG_COMPILER +# define SLANG_COMPILER + +/* +Compiler defines, see http://sourceforge.net/p/predef/wiki/Compilers/ +NOTE that SLANG_VC holds the compiler version - not just 1 or 0 +*/ +# if defined(_MSC_VER) +# if _MSC_VER >= 1900 +# define SLANG_VC 14 +# elif _MSC_VER >= 1800 +# define SLANG_VC 12 +# elif _MSC_VER >= 1700 +# define SLANG_VC 11 +# elif _MSC_VER >= 1600 +# define SLANG_VC 10 +# elif _MSC_VER >= 1500 +# define SLANG_VC 9 +# else +# error "unknown version of Visual C++ compiler" +# endif +# elif defined(__clang__) +# define SLANG_CLANG 1 +# elif defined(__SNC__) +# define SLANG_SNC 1 +# elif defined(__ghs__) +# define SLANG_GHS 1 +# elif defined(__GNUC__) /* note: __clang__, __SNC__, or __ghs__ imply __GNUC__ */ +# define SLANG_GCC 1 +# else +# error "unknown compiler" +# endif +/* +Any compilers not detected by the above logic are now now explicitly zeroed out. +*/ +# ifndef SLANG_VC +# define SLANG_VC 0 +# endif +# ifndef SLANG_CLANG +# define SLANG_CLANG 0 +# endif +# ifndef SLANG_SNC +# define SLANG_SNC 0 +# endif +# ifndef SLANG_GHS +# define SLANG_GHS 0 +# endif +# ifndef SLANG_GCC +# define SLANG_GCC 0 +# endif +#endif /* SLANG_COMPILER */ + +#define SLANG_GCC_FAMILY (SLANG_CLANG || SLANG_SNC || SLANG_GHS || SLANG_GCC) + +// GCC Specific +#if SLANG_GCC_FAMILY +# define SLANG_ALIGN_OF(T) __alignof__(T) +// Use this macro instead of offsetof, because gcc produces warning if offsetof is used on a +// non POD type, even though it produces the correct result +# define SLANG_OFFSET_OF(T, ELEMENT) (size_t(&((T*)1)->ELEMENT) - 1) +#endif // SLANG_GCC_FAMILY + +// Microsoft VC specific +#if SLANG_VC +# define SLANG_ALIGN_OF(T) __alignof(T) +#endif // SLANG_VC + +// Default impls + +#ifndef SLANG_OFFSET_OF +# define SLANG_OFFSET_OF(X, Y) offsetof(X, Y) +#endif + #include "slang-cpp-types.h" #include "slang-cpp-scalar-intrinsics.h" diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 91094a75e..01c658e0b 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -15,6 +15,15 @@ #include <optix.h> #endif +// Define slang offsetof implementation +#ifndef SLANG_OFFSET_OF +# define SLANG_OFFSET_OF(type, member) (size_t)((char*)&(((type *)0)->member) - (char*)0) +#endif + +#ifndef SLANG_ALIGN_OF +# define SLANG_ALIGN_OF(type) __alignof__(type) +#endif + // Must be large enough to cause overflow and therefore infinity #ifndef SLANG_INFINITY # define SLANG_INFINITY ((float)(1e+300 * 1e+300)) diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index afdd96029..761316d86 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -1962,6 +1962,35 @@ __target_intrinsic(cuda, " @ ") __target_intrinsic(cpp, " @ ") int __SyntaxError(); +/// For downstream compilers that allow sizeof/alignof/offsetof +/// Can't be called in the C/C++ style. Need to use __size_of<some_type>() as opposed to sizeof(some_type). +__generic<T> +__target_intrinsic(cuda, "sizeof($G0)") +__target_intrinsic(cpp, "sizeof($G0)") +int __sizeOf(); + +__generic<T> +__target_intrinsic(cuda, "sizeof($T0)") +__target_intrinsic(cpp, "sizeof($T0)") +int __sizeOf(T v); + +__generic<T> +__target_intrinsic(cuda, "SLANG_ALIGN_OF($G0)") +__target_intrinsic(cpp, "SLANG_ALIGN_OF($G0)") +int __alignOf(); + +__generic<T> +__target_intrinsic(cuda, "SLANG_ALIGN_OF($T0)") +__target_intrinsic(cpp, "SLANG_ALIGN_OF($T0)") +int __alignOf(T v); + +// It would be nice to have offsetof equivalent, but it's not clear how that would work in terms of the Slang language. +// Here we allow calculating the offset of a field in bytes from an *instance* of the type. +__generic<T,F> +__target_intrinsic(cuda, "int(((char*)&($1)) - ((char*)&($0)))") +__target_intrinsic(cpp, "int(((char*)&($1)) - ((char*)&($0))") +int __offsetOf(in T t, in F field); + /// Mark beginning of "interlocked" operations in a fragment shader. __target_intrinsic(glsl, "beginInvocationInterlockARB") __glsl_extension(GL_ARB_fragment_shader_interlock) diff --git a/source/slang/slang-intrinsic-expand.cpp b/source/slang/slang-intrinsic-expand.cpp index bd2e17b28..045b7e6c5 100644 --- a/source/slang/slang-intrinsic-expand.cpp +++ b/source/slang/slang-intrinsic-expand.cpp @@ -236,6 +236,35 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor) } break; + case 'G': + { + // Get the type/value at the index of the specialization of this generic + + SLANG_RELEASE_ASSERT(*cursor >= '0' && *cursor <= '9'); + Index argIndex = (*cursor++) - '0'; + + IRSpecialize* specialize = as<IRSpecialize>(m_callInst->getCallee()); + SLANG_ASSERT(specialize); + + { + auto argCount = Index(specialize->getArgCount()); + SLANG_UNUSED(argCount); + SLANG_ASSERT(argIndex < argCount); + + auto arg = specialize->getArg(argIndex); + + if (auto type = as<IRType>(arg)) + { + m_emitter->emitType(type); + } + else + { + m_emitter->emitVal(arg, getInfo(EmitOp::General)); + } + } + } + break; + case 'T': // Get the the 'element' type for the type of the param at the index { diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp index ad38e11cb..0fc7958d0 100644 --- a/source/slang/slang-type-layout.cpp +++ b/source/slang/slang-type-layout.cpp @@ -8,25 +8,30 @@ namespace Slang { -size_t RoundToAlignment(size_t offset, size_t alignment) +static bool _isPow2(size_t v) { - size_t remainder = offset % alignment; - if (remainder == 0) - return offset; - else - return offset + (alignment - remainder); + return v > 0 && ((v - 1) & v) == 0; +} + +static size_t _roundToAlignment(size_t offset, size_t alignment) +{ + // Must also be a power of 2 + SLANG_ASSERT(_isPow2(alignment)); + + const size_t mask = alignment - 1; + return (offset + mask) & ~mask; } -LayoutSize RoundToAlignment(LayoutSize offset, size_t alignment) +static LayoutSize _roundToAlignment(LayoutSize offset, size_t alignment) { // An infinite size is assumed to be maximally aligned. if(offset.isInfinite()) return LayoutSize::infinite(); - return RoundToAlignment(offset.getFiniteValue(), alignment); + return _roundToAlignment(offset.getFiniteValue(), alignment); } -static size_t RoundUpToPowerOfTwo( size_t value ) +static size_t _roundUpToPowerOfTwo( size_t value ) { // TODO(tfoley): I know this isn't a fast approach size_t result = 1; @@ -35,6 +40,21 @@ static size_t RoundUpToPowerOfTwo( size_t value ) return result; } +static bool _isAligned(size_t size, size_t alignment) +{ + SLANG_ASSERT(_isPow2(alignment)); + return ((alignment - 1) & size) == 0; +} + +// This is a workaround to keep functions from causing warnings in release builds, and therefore causing compilation to fail. +void _typeLayout_keepFunctions() +{ + auto a = _isAligned; + auto b = _isPow2; + SLANG_UNUSED(a); + SLANG_UNUSED(b); +} + // struct DefaultLayoutRulesImpl : SimpleLayoutRulesImpl @@ -81,7 +101,7 @@ struct DefaultLayoutRulesImpl : SimpleLayoutRulesImpl SLANG_RELEASE_ASSERT(elementInfo.size.isFinite()); auto elementSize = elementInfo.size.getFiniteValue(); auto elementAlignment = elementInfo.alignment; - auto elementStride = RoundToAlignment(elementSize, elementAlignment); + auto elementStride = _roundToAlignment(elementSize, elementAlignment); // An array with no elements will have zero size. // @@ -155,7 +175,7 @@ struct DefaultLayoutRulesImpl : SimpleLayoutRulesImpl auto fieldBaseOffset = ioStructInfo->size; // We need to ensure that the offset for the field will respect its alignment - auto fieldOffset = RoundToAlignment(fieldBaseOffset, fieldInfo.alignment); + auto fieldOffset = _roundToAlignment(fieldBaseOffset, fieldInfo.alignment); // The size of the struct must be adjusted to cover the bytes consumed // by this field. @@ -222,7 +242,7 @@ struct GLSLBaseLayoutRulesImpl : DefaultLayoutRulesImpl SimpleLayoutInfo vectorInfo( LayoutResourceKind::Uniform, size, - RoundUpToPowerOfTwo(size)); + _roundUpToPowerOfTwo(size)); return vectorInfo; } @@ -231,7 +251,7 @@ struct GLSLBaseLayoutRulesImpl : DefaultLayoutRulesImpl // The size of an array must be rounded up to be a multiple of its alignment. // auto info = Super::GetArrayLayout(elementInfo, elementCount); - info.size = RoundToAlignment(info.size, info.alignment); + info.size = _roundToAlignment(info.size, info.alignment); return info; } @@ -239,7 +259,7 @@ struct GLSLBaseLayoutRulesImpl : DefaultLayoutRulesImpl { // The size of a `struct` must be rounded up to be a multiple of its alignment. // - ioStructInfo->size = RoundToAlignment(ioStructInfo->size, ioStructInfo->alignment); + ioStructInfo->size = _roundToAlignment(ioStructInfo->size, ioStructInfo->alignment); } }; @@ -329,7 +349,7 @@ struct HLSLConstantBufferLayoutRulesImpl : DefaultLayoutRulesImpl return ioStructInfo->size; ioStructInfo->alignment = std::max(ioStructInfo->alignment, fieldInfo.alignment); - ioStructInfo->size = RoundToAlignment(ioStructInfo->size, fieldInfo.alignment); + ioStructInfo->size = _roundToAlignment(ioStructInfo->size, fieldInfo.alignment); LayoutSize fieldOffset = ioStructInfo->size; LayoutSize fieldSize = fieldInfo.size; @@ -340,7 +360,7 @@ struct HLSLConstantBufferLayoutRulesImpl : DefaultLayoutRulesImpl auto endRegister = (fieldOffset + fieldSize - 1) / registerSize; if (startRegister != endRegister) { - ioStructInfo->size = RoundToAlignment(ioStructInfo->size, size_t(registerSize)); + ioStructInfo->size = _roundToAlignment(ioStructInfo->size, size_t(registerSize)); fieldOffset = ioStructInfo->size; } @@ -396,10 +416,38 @@ struct CPULayoutRulesImpl : DefaultLayoutRulesImpl void EndStructLayout(UniformLayoutInfo* ioStructInfo) override { // Conform to C/C++ size is adjusted to the largest alignment - ioStructInfo->size = RoundToAlignment(ioStructInfo->size, ioStructInfo->alignment); + ioStructInfo->size = _roundToAlignment(ioStructInfo->size, ioStructInfo->alignment); } }; +// The CUDA compiler NVRTC only works on 64 bit operating systems. +// So instead of using native host type sizes we use these types instead +// +// NOTE! This implies that our CUDA reflection (even if produced on 32 bit host environment) is always 64 bit. +// This is unlikely to be a problem in practice. + +// NOTE! For the moment the CUDA prelude we use size_t - but that's ok as we currently use these types for +// sizes + +// Memory sizes, and memory offsets (signed) +typedef int64_t CUDASize; +typedef int64_t CUDAOffset; + +// TODO(JS): This could be better as CudaUSize if we accepted LowerCamel Acronyms... +typedef uint64_t CUDAUSize; + +// A type that is the size of a pointer +typedef CUDASize CUDAPtr; +// For CUtexObject and CUsurfObject +typedef CUDAPtr CUDAHandle; + +// This is not strictly speaking needed - but exists to be consistent with cuda-prelude.h and the current CUDA emit. +typedef CUDAPtr CUDASamplerState; + +// TODO(JS): Perhaps there is an argument these should be 32 bit? +typedef CUDASize CUDACount; +typedef CUDASize CUDAIndex; + struct CUDALayoutRulesImpl : DefaultLayoutRulesImpl { typedef DefaultLayoutRulesImpl Super; @@ -421,54 +469,23 @@ struct CUDALayoutRulesImpl : DefaultLayoutRulesImpl SimpleArrayLayoutInfo GetArrayLayout(SimpleLayoutInfo elementInfo, LayoutSize elementCount) override { SLANG_RELEASE_ASSERT(elementInfo.size.isFinite()); - auto elementSize = elementInfo.size.getFiniteValue(); - auto elementAlignment = elementInfo.alignment; - auto elementStride = RoundToAlignment(elementSize, elementAlignment); - + if (elementCount.isInfinite()) { // This is an unsized array, get information for element auto info = Super::GetArrayLayout(elementInfo, LayoutSize(1)); // So it is actually a Array<T> on CUDA which is a pointer and a size - info.size = sizeof(void*) * 2; - info.alignment = SLANG_ALIGN_OF(void*); + info.size = _roundToAlignment((CUDAPtr) + sizeof(CUDACount), sizeof(CUDAPtr)); + info.alignment = sizeof(CUDAPtr); return info; } + + // It's fine to use the Default impl, as long as any elements size is alignment rounded (as happen in EndStructLayout). + // If that weren't the case the array may be smaller than elementSize * elementCount which would be wrong for CUDA. + SLANG_ASSERT(_isAligned(elementInfo.size.getFiniteValue(), elementInfo.alignment)); - // An array with no elements will have zero size. - // - LayoutSize arraySize = 0; - // - // Any array with a non-zero number of elements will need - // to have space for N elements of size `elementSize`, with - // the constraints that there must be `elementStride` bytes - // between consecutive elements. - // - if (elementCount > 0) - { - // We can think of this as either allocating (N-1) - // chunks of size `elementStride` (for most of the elements) - // and then one final chunk of size `elementSize` for - // the last element, or equivalently as allocating - // N chunks of size `elementStride` and then "giving back" - // the final `elementStride - elementSize` bytes. - // - arraySize = (elementStride * (elementCount - 1)) + elementSize; - } - - SimpleArrayLayoutInfo arrayInfo; - arrayInfo.kind = elementInfo.kind; - arrayInfo.size = arraySize; - arrayInfo.alignment = elementAlignment; - arrayInfo.elementStride = elementStride; - return arrayInfo; - } - - // Computes the alignment of a vector type given element size and element count. - uint32_t getVectorAlignment(uint32_t elementSize, uint32_t elementCount) - { - return elementCount == 3 ? elementSize : elementSize * elementCount; + return Super::GetArrayLayout(elementInfo, elementCount); } SimpleLayoutInfo GetVectorLayout(BaseType elementType, SimpleLayoutInfo elementInfo, size_t elementCount) override @@ -478,28 +495,53 @@ struct CUDALayoutRulesImpl : DefaultLayoutRulesImpl { SimpleLayoutInfo fixInfo(elementInfo); fixInfo.size = sizeof(int32_t); - fixInfo.alignment = SLANG_ALIGN_OF(int32_t); + fixInfo.alignment = sizeof(int32_t); return GetVectorLayout(BaseType::Int, fixInfo, elementCount); } + + const auto elementSize = elementInfo.size.getFiniteValue(); + + // These rules can largely be determines by looking at + // 'vector_types.h' in the CUDA SDK + + // Size in bytes of vector + size_t size = elementSize * elementCount; + // Special case 3, as uses alignment of the elementSize + size_t alignment = (elementCount == 3) ? elementSize : size; + + // special case half + if (elementType == BaseType::Half && elementCount >= 3) + { + alignment = elementSize * 2; + size = _roundToAlignment(size, alignment); + } + + // Nothing is aligned more than 16 + alignment = std::min(alignment, size_t(16)); + + // TODO(JS): It's not 100% clear what is right in terms of size in respect of *alignment*. If the size is the 'used' bytes, then + // it can be less that the aligned size. If that's the case the GetArrayLayout (and MatrixLayout) is *wrong* in that on the last element + // it uses the size (not the aligned size/stride). + // + // Here I am assuming it's reasonable for the size to be the aligned size. That being the case the GetArrayLayout/GetMatrixLayout will be + // correct without special handling. + // + // The assert below checks that is indeed the case. + + // The size must be a multiple of the alignment + SLANG_ASSERT(_isAligned(size, alignment)); SimpleLayoutInfo vectorInfo; vectorInfo.kind = elementInfo.kind; - vectorInfo.size = elementInfo.size * elementCount; - vectorInfo.alignment = getVectorAlignment((uint32_t)elementInfo.size.getFiniteValue(), (uint32_t)elementCount); + vectorInfo.size = size; + vectorInfo.alignment = alignment; + return vectorInfo; } SimpleArrayLayoutInfo GetMatrixLayout(BaseType elementType, SimpleLayoutInfo elementInfo, size_t rowCount, size_t columnCount) override { - // Special case bool - if (elementType == BaseType::Bool) - { - SimpleLayoutInfo fixInfo(elementInfo); - fixInfo.size = sizeof(int32_t); - fixInfo.alignment = SLANG_ALIGN_OF(int32_t); - return GetMatrixLayout(BaseType::Int, fixInfo, rowCount, columnCount); - } - + // The default behavior is to calculate the size as an array of rowCount vectors, which is correct here return Super::GetMatrixLayout(elementType, elementInfo, rowCount, columnCount); } @@ -511,7 +553,7 @@ struct CUDALayoutRulesImpl : DefaultLayoutRulesImpl void EndStructLayout(UniformLayoutInfo* ioStructInfo) override { // Conform to CUDA/C/C++ size is adjusted to the largest alignment - ioStructInfo->size = RoundToAlignment(ioStructInfo->size, ioStructInfo->alignment); + ioStructInfo->size = _roundToAlignment(ioStructInfo->size, ioStructInfo->alignment); } }; @@ -874,9 +916,10 @@ struct CUDAObjectLayoutRulesImpl : CPUObjectLayoutRulesImpl switch (kind) { case ShaderParameterKind::ConstantBuffer: + { // It's a pointer to the actual uniform data - return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), SLANG_ALIGN_OF(void*)); - + return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(CUDAPtr), sizeof(CUDAPtr)); + } case ShaderParameterKind::TextureSampler: case ShaderParameterKind::MutableTextureSampler: // That there is no distinct Sampler on CUDA, so TextureSampler is the same as a Texture @@ -884,29 +927,37 @@ struct CUDAObjectLayoutRulesImpl : CPUObjectLayoutRulesImpl case ShaderParameterKind::MutableTexture: case ShaderParameterKind::TextureUniformBuffer: case ShaderParameterKind::Texture: - // It's a pointer to a texture interface - return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(ObjectHandle), SLANG_ALIGN_OF(ObjectHandle)); + { + // It's a CUtexObject or CUsurfObject which is an opaque CUDAHandle sized + return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(CUDAHandle), sizeof(CUDAPtr)); + } case ShaderParameterKind::StructuredBuffer: case ShaderParameterKind::MutableStructuredBuffer: - // It's a pointer and a size - return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*) * 2, SLANG_ALIGN_OF(void*)); - + { + // It's a ptr and a count of the amount of elements + const size_t size = _roundToAlignment(sizeof(CUDAPtr) + sizeof(CUDACount), sizeof(CUDAPtr)); + return SimpleLayoutInfo(LayoutResourceKind::Uniform, size, sizeof(CUDAPtr)); + } case ShaderParameterKind::RawBuffer: case ShaderParameterKind::Buffer: case ShaderParameterKind::MutableRawBuffer: case ShaderParameterKind::MutableBuffer: - // It's a pointer and a size in bytes - return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*) * 2, SLANG_ALIGN_OF(void*)); - + { + // It's a ptr and a count of the amount of elements + const size_t size = _roundToAlignment(sizeof(CUDAPtr) + sizeof(CUDACount), sizeof(CUDAPtr)); + return SimpleLayoutInfo(LayoutResourceKind::Uniform, size, sizeof(CUDAPtr)); + } case ShaderParameterKind::SamplerState: + { // In CUDA it seems that sampler states are combined into texture objects. // So it's a binding issue to combine a sampler with a texture - and sampler are ignored // For simplicity here though - we do create a variable and that variable takes up // uniform binding space. // TODO(JS): If we wanted to remove these variables we'd want to do it as a pass. The pass // would presumably have to remove use of variables of this kind throughout IR. - return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), SLANG_ALIGN_OF(void*)); + return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(CUDASamplerState), sizeof(CUDAPtr)); + } case ShaderParameterKind::InputRenderTarget: // TODO: how to handle these? @@ -4007,7 +4058,7 @@ static TypeLayoutResult _createTypeLayout( // The tag is always a `uint` for now. // auto tagInfo = context.rules->GetScalarLayout(BaseType::UInt); - info.size = RoundToAlignment(info.size, tagInfo.alignment); + info.size = _roundToAlignment(info.size, tagInfo.alignment); taggedUnionLayout->tagOffset = info.size; diff --git a/tests/cuda/cuda-array-layout.slang b/tests/cuda/cuda-array-layout.slang new file mode 100644 index 000000000..7fee3b192 --- /dev/null +++ b/tests/cuda/cuda-array-layout.slang @@ -0,0 +1,32 @@ +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer<int> outputBuffer : register(u0); + +struct PadLadenStruct +{ + double a; + uint8_t b; +}; + +// This is to check if the last half can be inserted 'inside' the spare padding of a. It should not be +struct StructWithArray +{ + PadLadenStruct a[1]; + uint8_t b; + + matrix<half, 3, 3> c; + uint8_t d; +}; + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + StructWithArray s; + outputBuffer[0] = __sizeOf(s); + + outputBuffer[1] = __offsetOf(s, s.a); + outputBuffer[2] = __offsetOf(s, s.b); + outputBuffer[3] = __offsetOf(s, s.c); + outputBuffer[4] = __offsetOf(s, s.d); +} diff --git a/tests/cuda/cuda-array-layout.slang.expected.txt b/tests/cuda/cuda-array-layout.slang.expected.txt new file mode 100644 index 000000000..bc3e8bd6c --- /dev/null +++ b/tests/cuda/cuda-array-layout.slang.expected.txt @@ -0,0 +1,9 @@ +type: int32_t +48 +0 +16 +20 +44 +0 +0 +0 diff --git a/tests/cuda/cuda-layout.slang b/tests/cuda/cuda-layout.slang new file mode 100644 index 000000000..725bf798e --- /dev/null +++ b/tests/cuda/cuda-layout.slang @@ -0,0 +1,24 @@ +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer<int> outputBuffer : register(u0); + +#define WRITE_TYPE_ALIGN(base, type) \ + outputBuffer[base * 4 + 0] = __alignOf<type>(); \ + outputBuffer[base * 4 + 1] = __alignOf<vector<type, 2> >(); \ + outputBuffer[base * 4 + 2] = __alignOf<vector<type, 3> >(); \ + outputBuffer[base * 4 + 3] = __alignOf<vector<type, 4> >(); + + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + WRITE_TYPE_ALIGN(0, uint8_t) + WRITE_TYPE_ALIGN(1, uint16_t) + WRITE_TYPE_ALIGN(2, int) + WRITE_TYPE_ALIGN(3, int64_t) + + WRITE_TYPE_ALIGN(4, half) + WRITE_TYPE_ALIGN(5, float) + WRITE_TYPE_ALIGN(6, double) +} diff --git a/tests/cuda/cuda-layout.slang.expected.txt b/tests/cuda/cuda-layout.slang.expected.txt new file mode 100644 index 000000000..bf4edf064 --- /dev/null +++ b/tests/cuda/cuda-layout.slang.expected.txt @@ -0,0 +1,33 @@ +type: int32_t +1 +2 +1 +4 +2 +4 +2 +8 +4 +8 +4 +16 +8 +16 +8 +16 +2 +4 +4 +4 +4 +8 +4 +16 +8 +16 +8 +16 +0 +0 +0 +0 diff --git a/tests/cuda/cuda-reflection.slang b/tests/cuda/cuda-reflection.slang new file mode 100644 index 000000000..95bf591c9 --- /dev/null +++ b/tests/cuda/cuda-reflection.slang @@ -0,0 +1,28 @@ +// cuda-reflection.slang + +//TEST:REFLECTION:-stage compute -entry main -target cuda + +struct PadLadenStruct +{ + double a; + uint8_t b; +}; + +// This is to check if the last half can be inserted 'inside' the spare padding of a. It should not be +struct StructWithArray +{ + PadLadenStruct a[1]; + uint8_t c; + + matrix<half, 3, 3> d; + uint8_t e; +}; + +ConstantBuffer<StructWithArray> cb; +RWStructuredBuffer<StructWithArray> sb; + +[numthreads(1, 1, 1)] +void main( + uint3 dispatchThreadID : SV_DispatchThreadID) +{ +}
\ No newline at end of file diff --git a/tests/cuda/cuda-reflection.slang.expected b/tests/cuda/cuda-reflection.slang.expected new file mode 100644 index 000000000..d27d99557 --- /dev/null +++ b/tests/cuda/cuda-reflection.slang.expected @@ -0,0 +1,250 @@ +result code = 0 +standard error = { +} +standard output = { +{ + "parameters": [ + { + "name": "cb", + "binding": {"kind": "uniform", "offset": 0, "size": 8}, + "type": { + "kind": "constantBuffer", + "elementType": { + "kind": "struct", + "name": "StructWithArray", + "fields": [ + { + "name": "a", + "type": { + "kind": "array", + "elementCount": 1, + "elementType": { + "kind": "struct", + "name": "PadLadenStruct", + "fields": [ + { + "name": "a", + "type": { + "kind": "scalar", + "scalarType": "float64" + }, + "binding": {"kind": "uniform", "offset": 0, "size": 8} + }, + { + "name": "b", + "type": { + "kind": "scalar", + "scalarType": "uint8" + }, + "binding": {"kind": "uniform", "offset": 8, "size": 1} + } + ] + }, + "uniformStride": 16 + }, + "binding": {"kind": "uniform", "offset": 0, "size": 16} + }, + { + "name": "c", + "type": { + "kind": "scalar", + "scalarType": "uint8" + }, + "binding": {"kind": "uniform", "offset": 16, "size": 1} + }, + { + "name": "d", + "type": { + "kind": "matrix", + "rowCount": 3, + "columnCount": 3, + "elementType": { + "kind": "scalar", + "scalarType": "float16" + } + }, + "binding": {"kind": "uniform", "offset": 20, "size": 24} + }, + { + "name": "e", + "type": { + "kind": "scalar", + "scalarType": "uint8" + }, + "binding": {"kind": "uniform", "offset": 44, "size": 1} + } + ] + }, + "containerVarLayout": { + "binding": {"kind": "uniform", "offset": 0, "size": 8} + }, + "elementVarLayout": { + "type": { + "kind": "struct", + "name": "StructWithArray", + "fields": [ + { + "name": "a", + "type": { + "kind": "array", + "elementCount": 1, + "elementType": { + "kind": "struct", + "name": "PadLadenStruct", + "fields": [ + { + "name": "a", + "type": { + "kind": "scalar", + "scalarType": "float64" + }, + "binding": {"kind": "uniform", "offset": 0, "size": 8} + }, + { + "name": "b", + "type": { + "kind": "scalar", + "scalarType": "uint8" + }, + "binding": {"kind": "uniform", "offset": 8, "size": 1} + } + ] + }, + "uniformStride": 16 + }, + "binding": {"kind": "uniform", "offset": 0, "size": 16} + }, + { + "name": "c", + "type": { + "kind": "scalar", + "scalarType": "uint8" + }, + "binding": {"kind": "uniform", "offset": 16, "size": 1} + }, + { + "name": "d", + "type": { + "kind": "matrix", + "rowCount": 3, + "columnCount": 3, + "elementType": { + "kind": "scalar", + "scalarType": "float16" + } + }, + "binding": {"kind": "uniform", "offset": 20, "size": 24} + }, + { + "name": "e", + "type": { + "kind": "scalar", + "scalarType": "uint8" + }, + "binding": {"kind": "uniform", "offset": 44, "size": 1} + } + ] + }, + "binding": {"kind": "uniform", "offset": 0, "size": 48} + } + } + }, + { + "name": "sb", + "binding": {"kind": "uniform", "offset": 8, "size": 16}, + "type": { + "kind": "resource", + "baseShape": "structuredBuffer", + "access": "readWrite", + "resultType": { + "kind": "struct", + "name": "StructWithArray", + "fields": [ + { + "name": "a", + "type": { + "kind": "array", + "elementCount": 1, + "elementType": { + "kind": "struct", + "name": "PadLadenStruct", + "fields": [ + { + "name": "a", + "type": { + "kind": "scalar", + "scalarType": "float64" + }, + "binding": {"kind": "uniform", "offset": 0, "size": 8} + }, + { + "name": "b", + "type": { + "kind": "scalar", + "scalarType": "uint8" + }, + "binding": {"kind": "uniform", "offset": 8, "size": 1} + } + ] + }, + "uniformStride": 16 + }, + "binding": {"kind": "uniform", "offset": 0, "size": 16} + }, + { + "name": "c", + "type": { + "kind": "scalar", + "scalarType": "uint8" + }, + "binding": {"kind": "uniform", "offset": 16, "size": 1} + }, + { + "name": "d", + "type": { + "kind": "matrix", + "rowCount": 3, + "columnCount": 3, + "elementType": { + "kind": "scalar", + "scalarType": "float16" + } + }, + "binding": {"kind": "uniform", "offset": 20, "size": 24} + }, + { + "name": "e", + "type": { + "kind": "scalar", + "scalarType": "uint8" + }, + "binding": {"kind": "uniform", "offset": 44, "size": 1} + } + ] + } + } + } + ], + "entryPoints": [ + { + "name": "main", + "stage:": "compute", + "parameters": [ + { + "name": "dispatchThreadID", + "semanticName": "SV_DISPATCHTHREADID", + "type": { + "kind": "vector", + "elementCount": 3, + "elementType": { + "kind": "scalar", + "scalarType": "uint32" + } + } + } + ], + "threadGroupSize": [1, 1, 1] + } + ] +} +} diff --git a/tools/slang-reflection-test/slang-reflection-test-main.cpp b/tools/slang-reflection-test/slang-reflection-test-main.cpp index 655b4e41d..0b8e88d68 100644 --- a/tools/slang-reflection-test/slang-reflection-test-main.cpp +++ b/tools/slang-reflection-test/slang-reflection-test-main.cpp @@ -489,10 +489,16 @@ static void emitReflectionScalarTypeInfoJSON( #define CASE(TAG, ID) case slang::TypeReflection::ScalarType::TAG: write(writer, #ID); break CASE(Void, void); CASE(Bool, bool); + + CASE(Int8, int8); + CASE(UInt8, uint8); + CASE(Int16, int16); + CASE(UInt16, uint16); CASE(Int32, int32); CASE(UInt32, uint32); CASE(Int64, int64); CASE(UInt64, uint64); + CASE(Float16, float16); CASE(Float32, float32); CASE(Float64, float64); |
