diff options
| -rw-r--r-- | docs/cpu-target.md | 213 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.cpp | 10 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.h | 23 | ||||
| -rw-r--r-- | source/slang/slang-emit-cpp.cpp | 1014 | ||||
| -rw-r--r-- | source/slang/slang-emit-cpp.h | 38 | ||||
| -rw-r--r-- | source/slang/slang-emit.cpp | 102 | ||||
| -rw-r--r-- | source/slang/slang-ir-entry-point-uniforms.cpp | 33 | ||||
| -rw-r--r-- | source/slang/slang-ir-entry-point-uniforms.h | 5 | ||||
| -rw-r--r-- | source/slang/slang-parameter-binding.cpp | 37 | ||||
| -rw-r--r-- | source/slang/slang-type-layout.cpp | 230 | ||||
| -rw-r--r-- | source/slang/slang-type-layout.h | 12 | ||||
| -rw-r--r-- | tests/cross-compile/c-cross-compile.slang | 4 | ||||
| -rw-r--r-- | tests/cross-compile/cpp-execute-simple.slang | 8 | ||||
| -rw-r--r-- | tests/cross-compile/cpp-execute.slang | 7 | ||||
| -rw-r--r-- | tests/cross-compile/cpp-resource-reflection.slang | 42 | ||||
| -rw-r--r-- | tests/cross-compile/cpp-resource-reflection.slang.expected | 163 | ||||
| -rw-r--r-- | tests/cross-compile/cpp-resource.slang | 53 | ||||
| -rw-r--r-- | tests/cross-compile/slang-cpp-prelude.h | 625 | ||||
| -rw-r--r-- | tools/slang-test/slang-test-main.cpp | 25 |
19 files changed, 2426 insertions, 218 deletions
diff --git a/docs/cpu-target.md b/docs/cpu-target.md new file mode 100644 index 000000000..cc1e15b08 --- /dev/null +++ b/docs/cpu-target.md @@ -0,0 +1,213 @@ +Slang CPU target Support +======================== + +Slang has preliminary support for producing CPU source and binaries. + +# Features + +* Can compile C/C++/Slang to binaries (executables and or shared libraries) +* Can compile Slang source into C++ source code +* Supports compute style shaders +* C/C++ backend abstracts the command line options, and parses the compiler errors/out such that all supported compilers output available in same format + +# Limitations + +These limitations apply to Slang source, with C/C++ the limitations are whatever the compiler requires + +* Only supports 64 bit targets (specifically it assumes all pointers are 64 bit) +* Barriers are not supported (making these work would require an ABI change) +* Atomics are not supported +* Complex resource types (such as say Texture2d) are work in progress +* Out of bounds access to resources has undefined behavior +* ParameterBlocks are not currently supported + +For current C++ source output, the compiler needs to support partial specialization. + +# How it works + +The initial version works by adding 'back end' compiler support for C/C++ compilers. Currently this is tested to work with Visual Studio, Clang and G++/Gcc on Windows and Linux. The C/C++ backend can be directly accessed much like 'dxc', 'fxc' of 'glslang' can, using the pass-through mechanism with the following new backends... + +``` +SLANG_PASS_THROUGH_CLANG, ///< Clang C/C++ compiler +SLANG_PASS_THROUGH_VISUAL_STUDIO, ///< Visual studio C/C++ compiler +SLANG_PASS_THROUGH_GCC, ///< GCC C/C++ compiler +SLANG_PASS_THROUGH_GENERIC_C_CPP, ///< Generic C or C++ compiler, which is decided by the source type +``` + +Sometimes it is not important which C/C++ compiler is used, and this can be specified via the 'Generic C/C++' option. This will aim to use the compiler that is most likely binary compatible with the compiler that was used to build the slang binary being used. + +To make it possible for slang to produce CPU code, we now need a mechanism to convert slang code into C/C++. The first iteration only supports C++ generation. If source is desired instead of a binary this can be specified via the SlangCompileTarget. These can be specified on the slangc command line as `-target c` or `-target cpp` + +In the API the `SlangCompileTarget`s are + +``` +SLANG_C_SOURCE, ///< The C language +SLANG_CPP_SOURCE, ///< The C++ language +``` + +If a CPU binary is required this can be specified as a `SlangCompileTarget` of + +``` +SLANG_EXECUTABLE, ///< Executable (for hosting CPU/OS) +SLANG_SHARED_LIBRARY, ///< A shared library/Dll (for hosting CPU/OS) +``` + +These can also be specified on the slang command line as `-target exe` and `-target dll` or `-target sharedlib`. + +In order to be able to use the slang code on CPU, there needs to be binding via values passed to a function that the C/C++ code will produce and export. How this works is described in the ABI section. + +That if a binary target is requested, the binary contents will be returned in a ISlangBlob just like for other targets. To use the CPU binary typically it must be saved as file and then potentially marked for execution by the OS before executing. It may be possible to load shared libraries or dlls from memory - but is a non standard feature, that requires unusual work arounds. + +Under the covers when slang is used to generate a binary via a C/C++ compiler, it must do so through the file system. Currently this means that the source (say generated by slang) and the binary (produced by the C/C++ compiler) must all be files. To make this work slang uses temporary files. That the reasoning for hiding this mechanism - and not return say filenames, is so that in the future when binaries are produced directly (for example with LLVM), nothing will need to change. + +ABI +=== + +Say we have some slang source like the following. + +``` +struct Thing { int a; int b; } + +Texture2D<float> tex; +SamplerState sampler; + +[numthreads(4, 1, 1)] +void computeMain( + uint3 dispatchThreadID : SV_DispatchThreadID, + uniform Thing thing, + uniform Thing thing2) +{ + // ... +} +``` + +When it is compiled into a shared library/dll - how is it invoked? The entry point is exported with a signiture + +``` +void computeMain(ComputeVaryingInput* varyingInput, UniformState* uniformState); +``` + +The UniformState struct typically varies by shader, and it holds all of the bindings. Where these are located can be determined by reflection. For example + +``` +struct UniformState +{ + Thing_0* thing3_0; + RWStructuredBuffer<int32_t> outputBuffer_0; + Texture2D<float > tex_0; + SamplerState sampler_0; + _S1* _S2; +}; +``` + +That for C++ targets, the templated types are defined in the slang-cpp-prelude.h that is included. Note that `slang-cpp-prelude.h` *MUST* currently be within the search path passed to the compiler. By default with the CPU path, the path to the slang file is included as a 'system' include path, such that placing the slang-cpp-prelude.h file in the same directory as the slang source file should mean that it is found. + +ConstantBuffers will become pointers to the type they hold (as thing3_0 is in the above structure). + +StructuredBuffer/RWStructuredBuffer/ByteAddressBuffer/RWByteAddressBuffer become in effect (where in ByteAddressBuffers T is uint32_t). + +``` + T* data; + size_t count; +``` + +Resource types become pointers to interfaces that implement their features. For example `Texture2D` become a pointer to a `ITexture2D` interface that has to be implemented in client side code. Similarly SamplerState and SamplerComparisonState become `ISamplerState` and `ISamplerComparisonState`. + +The `_S1` struct in the example above (which may have different names) is actually a struct that holds all of the entry point uniforms if there are any, in this case + +``` +struct _S1 +{ + Thing_0 thing_0; + Thing_0 thing2_0; +}; +``` + +Note that the this pointer is not directly reflected (although layout of uniform paramters in the struct are). Currently this pointer is just placed after all the other reflected bindings. + + +It may be useful to be able to include `slang-cpp-prelude.h` in C++ code to access the types that are used in the generated code. This introduces a problem in that the types used in the generated code might clash with types in client code. To work around this problem, you can wrap all of the types defined in the prelude with a namespace of your choosing. For example + +``` +#define SLANG_PRELUDE_NAMESPACE CPPPrelude +#include "../../tests/cross-compile/slang-cpp-prelude.h" +``` + +Would wrap all the slang prelude types in the namespace `CPPPrelude`. + +Language aspects +================ + +# Arrays passed by Value + +Slang follows the HLSL convention that arrays are passed by value. This is in contrast the C/C++ where arrays are passed by reference. To make generated C/C++ follow this convention an array is turned into a 'FixedArray' struct type. Sinces classes by default in C/C++ are passed by reference the wrapped array is also. + +To get something more similar to C/C++ operation the array can be marked in out or inout to make it passed by reference. + +Limitations +=========== + +# Out of bounds access + +In HLSL code if an access is made out of bounds of a StructuredBuffer, execution proceceeds. If an out of bounds read is performed, a zeroed value is returned. If an out of bounds write is performed it's effectively a noop, as the value is discarded. + +On the CPU target this behaviour is *NOT* supported. For a debug CPU build an out of bounds access will assert, for a release build the behaviour is undefined. + +The reason for this is that such an access is quite difficult and/or slow to implement on the CPU. The underlying reason is that operator[] typically returns a reference to the contained value. If this is out of bounds - it's not clear what to return, in particular because the value may be read or written and moreover elements of the type might bet written. In practice this means a global zeroed value cannot be returned. + +This could be supported if code gen worked as followed for say + +``` +RWStructuredBuffer<float4> values; + +values[3].x = 10; +``` + +Produces + +``` +template <typename T> +struct RWStructuredBuffer +{ + T& at(size_t index, T& defValue) { return index < size ? values[index] : defValue; } + + T* values; + size_t size; +}; + +RWStructuredBuffer<float4> values; + +// ... +Vector<float, 3> defValue = {}; // Zero initialize such that access +values.at(3).x = 10; +``` + +Note that [] would be turned into the `at` function, which takes the default value as a paramter provided by the caller. If this is then written to then only the defValue is corrupted. Even this mechanism not be quite right, because if we write and then read again from the out of bounds reference in HLSL we may expect that 0 is returned, whereas here we get the value that was written. + +TODO +==== + +# Main + +* Complete support (in terms of interfaces) for 'complex' resource types - such as Texture +* Interface implementation for complex resource types +* Parameter block support (the difficulty is around layout) +* Split out entry point uniforms into a separate pointer passed to the entry point +* Test system executes and tests for CPU targets +* Slang API allows for compilation into loaded binary such that functions can be directly executed +* Output C/C++ compiler errors as 'externalCompiler' errors through diagnostic system +* Improve documentation +* Output of header files +* Mechanism to specify where C/C++ binaries are located + +# Internal Slang compiler features + +These issues are more internal Slang features/improvements + +* Currently we only support 64 bit targets (it is assumed in layout that pointers are 64 bit) +* Slang compute tests work (where appropriate) +* Currently only generates C++ code, it would be fairly straight forward to support C (especially if we have 'intrinsic definitions') +* Have 'intrinsic definitions' in standard library - such that they can be generated where appropriate + + This will simplify the C/C++ code generation as means slang language will generate must of the appropriate code +* Currently 'construct' IR inst is supported as is, we may want to split out to separate instructions for specific scenarios + diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index 26af7b9f4..870523a3b 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -705,7 +705,7 @@ void CLikeSourceEmitter::emitDeclarator(IRDeclaratorInfo* declarator) } } -void CLikeSourceEmitter::emitSimpleValue(IRInst* inst) +void CLikeSourceEmitter::emitSimpleValueImpl(IRInst* inst) { switch(inst->op) { @@ -927,7 +927,7 @@ bool CLikeSourceEmitter::shouldFoldInstIntoUseSites(IRInst* inst) return true; } -void CLikeSourceEmitter::emitOperand(IRInst* inst, EmitOpInfo const& outerPrec) +void CLikeSourceEmitter::emitOperandImpl(IRInst* inst, EmitOpInfo const& outerPrec) { if( shouldFoldInstIntoUseSites(inst) ) { @@ -2541,7 +2541,7 @@ void CLikeSourceEmitter::emitSimpleFuncParamImpl(IRParam* param) emitSemantics(param); } -void CLikeSourceEmitter::emitSimpleFunc(IRFunc* func) +void CLikeSourceEmitter::emitSimpleFuncImpl(IRFunc* func) { auto resultType = func->getResultType(); @@ -2592,7 +2592,7 @@ void CLikeSourceEmitter::emitSimpleFunc(IRFunc* func) } } -void CLikeSourceEmitter::emitParamType(IRType* type, String const& name) +void CLikeSourceEmitter::emitParamTypeImpl(IRType* type, String const& name) { // An `out` or `inout` parameter will have been // encoded as a parameter of pointer type, so @@ -3242,7 +3242,7 @@ void CLikeSourceEmitter::executeEmitActions(List<EmitAction> const& actions) } } -void CLikeSourceEmitter::emitModule(IRModule* module) +void CLikeSourceEmitter::emitModuleImpl(IRModule* module) { // The IR will usually come in an order that respects // dependencies between global declarations, but this diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h index 926a67e31..ee906010b 100644 --- a/source/slang/slang-emit-c-like.h +++ b/source/slang/slang-emit-c-like.h @@ -86,13 +86,13 @@ public: EmitVarChain* next; EmitVarChain() - : varLayout(0) - , next(0) + : varLayout(nullptr) + , next(nullptr) {} EmitVarChain(VarLayout* varLayout) : varLayout(varLayout) - , next(0) + , next(nullptr) {} EmitVarChain(VarLayout* varLayout, EmitVarChain* next) @@ -175,11 +175,11 @@ public: String getName(IRInst* inst); void emitDeclarator(IRDeclaratorInfo* declarator); - void emitSimpleValue(IRInst* inst); + void emitSimpleValue(IRInst* inst) { emitSimpleValueImpl(inst); } bool shouldFoldInstIntoUseSites(IRInst* inst); - void emitOperand(IRInst* inst, EmitOpInfo const& outerPrec); + void emitOperand(IRInst* inst, EmitOpInfo const& outerPrec) { emitOperandImpl(inst, outerPrec); } void emitArgs(IRInst* inst); @@ -218,7 +218,7 @@ public: void emitSemantics(VarLayout* varLayout); void emitSemantics(IRInst* inst); - VarLayout* getVarLayout(IRInst* var); + static VarLayout* getVarLayout(IRInst* var); void emitLayoutSemantics(IRInst* inst, char const* uniformSemanticSpelling = "register"); @@ -247,9 +247,9 @@ public: /// Emit high-level statements for the body of a function. void emitFunctionBody(IRGlobalValueWithCode* code); - void emitSimpleFunc(IRFunc* func); + void emitSimpleFunc(IRFunc* func) { emitSimpleFuncImpl(func); } - void emitParamType(IRType* type, String const& name); + void emitParamType(IRType* type, String const& name) { emitParamTypeImpl(type, name); } IRInst* getSpecializedValue(IRSpecialize* specInst); @@ -305,7 +305,7 @@ public: void computeEmitActions(IRModule* module, List<EmitAction>& ioActions); void executeEmitActions(List<EmitAction> const& actions); - void emitModule(IRModule* module); + void emitModule(IRModule* module) { emitModuleImpl(module); } void emitPreprocessorDirectives() { emitPreprocessorDirectivesImpl(); } void emitSimpleType(IRType* type); @@ -335,6 +335,11 @@ public: virtual void emitVarDecorationsImpl(IRInst* varDecl) { SLANG_UNUSED(varDecl); } virtual void emitMatrixLayoutModifiersImpl(VarLayout* layout) { SLANG_UNUSED(layout); } virtual void emitTypeImpl(IRType* type, const StringSliceLoc* nameLoc); + virtual void emitSimpleValueImpl(IRInst* inst); + virtual void emitModuleImpl(IRModule* module); + virtual void emitSimpleFuncImpl(IRFunc* func); + virtual void emitOperandImpl(IRInst* inst, EmitOpInfo const& outerPrec); + virtual void emitParamTypeImpl(IRType* type, String const& name); // Only needed for glsl output with $ prefix intrinsics - so perhaps removable in the future virtual void emitTextureOrTextureSamplerTypeImpl(IRTextureTypeBase* type, char const* baseName) { SLANG_UNUSED(type); SLANG_UNUSED(baseName); } diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp index 2ec087eef..0228955dc 100644 --- a/source/slang/slang-emit-cpp.cpp +++ b/source/slang/slang-emit-cpp.cpp @@ -10,6 +10,69 @@ #include <assert.h> +/* +ABI +--- + +In terms of ABI we need to discuss the variety of variables/resources that need to be defined by the host for appropriate execution +of the output code. + +https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-variable-syntax + +Broadly we could categorize these as.. + +1) Varying entry point parameters (or 'varying') +2) Uniform entry point parameters +3) Uniform globals +4) Thread shared (such as group shared) or ('thread shared') +5) Thread local ('static') + +If we can invoke a bunch of threads as a single invocation we could effectively have the ThreadShared not part of the ABI, but something +that is say allocated on the stack before the threads are kicked off. If we kick of threads individually then we would need to pass this +in as part of ABI. NOTE that it isn't right in so far as memory barriers etc couldn't work, as each thread would run to completion, but +we aren't going to worry about barriers for now. + +On 1 - there could be potentially input and outputs (perhaps in out?). On CPU I guess that's fine. + +On 2 and 3 they are effectively the same, and so for now 2+3 will be referred to together as 'uniforms'. +They should be copied into a single structure that has a well known order. + +On 1 these are parameters that vary on an invocation. Thus a caller might call many times with same globals structure +and different varying entry point parameters. + +On 5 - This would be a global that can be set and then accessed within the context of single thread + +So in order of rate of change + +1 : Probably change on every invocation (in the future such an invocation might be behind the API) +2 + 3 : Changes per group of 'threads' executed together +4 : Does not change between invocations +5 : Could be placed on the stack, and so not necessarily part of the ABI + +For now we are only going to implement something 'Compute shader'-like. Doing so makes the varying parameter always the same. + +So for now we would need to pass in + +ComputeVaryingInput - Fixed because we are doing compute shader +Uniform - All the uniform data in a big blob, both from uniform entry point parameters, and uniform globals + +When called we can have a structure that holds the thread local variables, and these two pointers. + + +We can stick pointers to these in a structure lets call it 'Context'. On C++ we could make all the functions 'methods', and then +we don't need to pass around the context as a parameter. For C this doesn't work, so it might be worth just biting the bullet and +just adding the context to the output. + +Issues: + +* How does this work with layout? The layout if it's going to specify offsets will need to know that they will be allocated into each +of these structs AND that the order they are placed needs to be consistent. + +* When variables access one of these sources, we will now need code that will add the dereferencing. Hopefully this can be done by looking +at the type of the variable, and then adding the appropriate access via part of emit. + +*/ + namespace Slang { static const char s_elemNames[] = "xyzw"; @@ -345,7 +408,23 @@ UnownedStringSlice CPPSourceEmitter::_getTypeName(IRType* inType) return m_slicePool.getSlice(handle); } - handle = _calcTypeName(type); + if (type->op == kIROp_MatrixType) + { + auto matType = static_cast<IRMatrixType*>(type); + + auto elementType = matType->getElementType(); + const auto rowCount = int(GetIntVal(matType->getRowCount())); + const auto colCount = int(GetIntVal(matType->getColumnCount())); + + // Make sure the vector type the matrix is built on is added + useType(_getVecType(elementType, colCount)); + } + + StringBuilder builder; + if (SLANG_SUCCEEDED(_calcTypeName(type, m_target, builder))) + { + handle = m_slicePool.add(builder); + } m_typeNameMap.Add(type, handle); @@ -353,14 +432,63 @@ UnownedStringSlice CPPSourceEmitter::_getTypeName(IRType* inType) return m_slicePool.getSlice(handle); } -StringSlicePool::Handle CPPSourceEmitter::_calcTypeName(IRType* type) +SlangResult CPPSourceEmitter::_calcTextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName) +{ + switch (texType->getAccess()) + { + case SLANG_RESOURCE_ACCESS_READ: + break; + case SLANG_RESOURCE_ACCESS_READ_WRITE: + outName << "RW"; + break; + case SLANG_RESOURCE_ACCESS_RASTER_ORDERED: + outName << "RasterizerOrdered"; + break; + case SLANG_RESOURCE_ACCESS_APPEND: + outName << "Append"; + break; + case SLANG_RESOURCE_ACCESS_CONSUME: + outName << "Consume"; + break; + default: + SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled resource access mode"); + return SLANG_FAIL; + } + + switch (texType->GetBaseShape()) + { + case TextureFlavor::Shape::Shape1D: outName << "Texture1D"; break; + case TextureFlavor::Shape::Shape2D: outName << "Texture2D"; break; + case TextureFlavor::Shape::Shape3D: outName << "Texture3D"; break; + case TextureFlavor::Shape::ShapeCube: outName << "TextureCube"; break; + case TextureFlavor::Shape::ShapeBuffer: outName << "Buffer"; break; + default: + SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled resource shape"); + return SLANG_FAIL; + } + + if (texType->isMultisample()) + { + outName << "MS"; + } + if (texType->isArray()) + { + outName << "Array"; + } + outName << "<" << _getTypeName(texType->getElementType()) << " >"; + + return SLANG_OK; +} + +SlangResult CPPSourceEmitter::_calcTypeName(IRType* type, CodeGenTarget target, StringBuilder& out) { switch (type->op) { case kIROp_HalfType: { // Special case half - return m_slicePool.add(getBuiltinTypeName(kIROp_FloatType)); + out << getBuiltinTypeName(kIROp_FloatType); + return SLANG_OK; } case kIROp_VectorType: { @@ -368,26 +496,23 @@ StringSlicePool::Handle CPPSourceEmitter::_calcTypeName(IRType* type) auto vecCount = int(GetIntVal(vecType->getElementCount())); const IROp elemType = vecType->getElementType()->op; - if (m_target == CodeGenTarget::CPPSource) + if (target == CodeGenTarget::CPPSource) { - StringBuilder builder; - builder << "Vector<" << getBuiltinTypeName(elemType) << ", " << vecCount << ">"; - return m_slicePool.add(builder); + out << "Vector<" << getBuiltinTypeName(elemType) << ", " << vecCount << ">"; } else { - StringBuilder builder; - builder << "Vec"; + out << "Vec"; UnownedStringSlice postFix = _getCTypeVecPostFix(elemType); - builder << postFix; + out << postFix; if (postFix.size() > 1) { - builder << "_"; + out << "_"; } - builder << vecCount; - return m_slicePool.add(builder); + out << vecCount; } + return SLANG_OK; } case kIROp_MatrixType: { @@ -397,42 +522,32 @@ StringSlicePool::Handle CPPSourceEmitter::_calcTypeName(IRType* type) const auto rowCount = int(GetIntVal(matType->getRowCount())); const auto colCount = int(GetIntVal(matType->getColumnCount())); - if (m_target == CodeGenTarget::CPPSource) + if (target == CodeGenTarget::CPPSource) { - StringBuilder builder; - builder << "Matrix<" << getBuiltinTypeName(elementType->op) << ", " << rowCount << ", " << colCount << ">"; - return m_slicePool.add(builder); + out << "Matrix<" << getBuiltinTypeName(elementType->op) << ", " << rowCount << ", " << colCount << ">"; } else { - // Make sure there is the vector name too - _getTypeName(_getVecType(elementType, colCount)); - - StringBuilder builder; - - builder << "Mat"; + out << "Mat"; const UnownedStringSlice postFix = _getCTypeVecPostFix(_getCType(elementType->op)); - builder << postFix; + out << postFix; if (postFix.size() > 1) { - builder << "_"; + out << "_"; } - builder << rowCount; - builder << colCount; - - return m_slicePool.add(builder); + out << rowCount; + out << colCount; } + return SLANG_OK; } case kIROp_HLSLRWStructuredBufferType: { auto bufType = static_cast<IRHLSLRWStructuredBufferType*>(type); - StringBuilder builder; - builder << "RWStructuredBuffer<"; - builder << _getTypeName(bufType->getElementType()); - builder << ">"; - - return m_slicePool.add(builder); + out << "RWStructuredBuffer<"; + SLANG_RETURN_ON_FAIL(_calcTypeName(bufType->getElementType(), target, out)); + out << ">"; + return SLANG_OK; } case kIROp_ArrayType: { @@ -440,24 +555,44 @@ StringSlicePool::Handle CPPSourceEmitter::_calcTypeName(IRType* type) auto elementType = arrayType->getElementType(); int elementCount = int(GetIntVal(arrayType->getElementCount())); - StringBuilder builder; - builder << "FixedArray<"; - builder << _getTypeName(elementType); - builder << ", " << elementCount << ">"; - - return m_slicePool.add(builder); + out << "FixedArray<"; + SLANG_RETURN_ON_FAIL(_calcTypeName(elementType, target, out)); + out << ", " << elementCount << ">"; + return SLANG_OK; + } + case kIROp_SamplerStateType: + { + out << "SamplerState"; + return SLANG_OK; + } + case kIROp_SamplerComparisonStateType: + { + out << "SamplerComparisonState"; + return SLANG_OK; } default: { if (IRBasicType::isaImpl(type->op)) { - return m_slicePool.add(getBuiltinTypeName(type->op)); + out << getBuiltinTypeName(type->op); + return SLANG_OK; } + + if (auto texType = as<IRTextureTypeBase>(type)) + { + // We don't support TextureSampler, so ignore that + if (texType->op != kIROp_TextureSamplerType) + { + return _calcTextureTypeName(texType, out); + } + } + break; } } - return StringSlicePool::kNullHandle; + SLANG_DIAGNOSE_UNEXPECTED(getSink(), SourceLoc(), "unhandled type for C/C++ emit"); + return SLANG_FAIL; } void CPPSourceEmitter::useType(IRType* type) @@ -615,8 +750,12 @@ static IRBasicType* _getElementType(IRType* type) static bool _isOperator(const UnownedStringSlice& funcName) { - const char c = funcName[0]; - return !((c >= 'a' && c <='z') || (c >= 'A' && c <= 'Z') || c == '_'); + if (funcName.size() > 0) + { + const char c = funcName[0]; + return !((c >= 'a' && c <='z') || (c >= 'A' && c <= 'Z') || c == '_'); + } + return false; } void CPPSourceEmitter::_emitAryDefinition(const SpecializedIntrinsic& specOp) @@ -989,6 +1128,130 @@ void CPPSourceEmitter::_emitNormalizeDefinition(const UnownedStringSlice& funcNa writer->emit("}\n\n"); } +void CPPSourceEmitter::_emitConstructConvertDefinition(const UnownedStringSlice& funcName, const SpecializedIntrinsic& specOp) +{ + SourceWriter* writer = getSourceWriter(); + IRFuncType* funcType = specOp.signatureType; + + SLANG_ASSERT(funcType->getParamCount() == 2); + + IRType* srcType = funcType->getParamType(1); + IRType* retType = specOp.returnType; + + emitType(retType); + writer->emit(" "); + writer->emit(funcName); + writer->emit("("); + emitType(srcType); + writer->emitChar(' '); + writer->emitChar(char('a' + 0)); + writer->emit(")"); + + writer->emit("\n{\n"); + writer->indent(); + + writer->emit("return "); + emitType(retType); + writer->emit("{ "); + + IRType* dstElemType = _getElementType(retType); + //IRType* srcElemType = _getElementType(srcType); + + TypeDimension dim = _getTypeDimension(srcType, false); + + for (int i = 0; i < dim.rowCount; ++i) + { + if (dim.rowCount > 1) + { + if (i > 0) + { + writer->emit(", \n"); + } + writer->emit("{ "); + } + + for (int j = 0; j < dim.colCount; ++j) + { + if (j > 0) + { + writer->emit(", "); + } + + emitType(dstElemType); + writer->emit("("); + _emitAccess(UnownedStringSlice::fromLiteral("a"), dim, i, j, writer); + writer->emit(")"); + } + if (dim.rowCount > 1) + { + writer->emit("}"); + } + } + + writer->emit("};\n"); + + writer->dedent(); + writer->emit("}\n\n"); +} + +void CPPSourceEmitter::_emitConstructFromScalarDefinition(const UnownedStringSlice& funcName, const SpecializedIntrinsic& specOp) +{ + SourceWriter* writer = getSourceWriter(); + IRFuncType* funcType = specOp.signatureType; + + SLANG_ASSERT(funcType->getParamCount() == 2); + + IRType* srcType = funcType->getParamType(1); + IRType* retType = specOp.returnType; + + emitType(retType); + writer->emit(" "); + writer->emit(funcName); + writer->emit("("); + emitType(srcType); + writer->emitChar(' '); + writer->emitChar(char('a' + 0)); + writer->emit(")"); + + writer->emit("\n{\n"); + writer->indent(); + + writer->emit("return "); + emitType(retType); + writer->emit("{ "); + + const TypeDimension dim = _getTypeDimension(retType, false); + + for (int i = 0; i < dim.rowCount; ++i) + { + if (dim.rowCount > 1) + { + if (i > 0) + { + writer->emit(", \n"); + } + writer->emit("{ "); + } + for (int j = 0; j < dim.colCount; ++j) + { + if (j > 0) + { + writer->emit(", "); + } + writer->emit("a"); + } + if (dim.rowCount > 1) + { + writer->emit("}"); + } + } + + writer->emit("};\n"); + + writer->dedent(); + writer->emit("}\n\n"); +} + void CPPSourceEmitter::_emitReflectDefinition(const UnownedStringSlice& funcName, const SpecializedIntrinsic& specOp) { SourceWriter* writer = getSourceWriter(); @@ -1061,6 +1324,14 @@ void CPPSourceEmitter::emitSpecializedOperationDefinition(const SpecializedIntri { return _emitReflectDefinition(_getFuncName(specOp), specOp); } + case IntrinsicOp::ConstructConvert: + { + return _emitConstructConvertDefinition(_getFuncName(specOp), specOp); + } + case IntrinsicOp::ConstructFromScalar: + { + return _emitConstructFromScalarDefinition(_getFuncName(specOp), specOp); + } default: { const auto& info = getOperationInfo(specOp.op); @@ -1183,47 +1454,40 @@ void CPPSourceEmitter::emitCall(const SpecializedIntrinsic& specOp, IRInst* inst } case IntrinsicOp::Swizzle: { - // For C++ we don't need to emit a swizzle function - // For C we need a construction function + // Currently only works for C++ (we use {} constuction) - which means we don't need to generate a function. + // For C we need to generate suitable construction function auto swizzleInst = static_cast<IRSwizzle*>(inst); const Index elementCount = Index(swizzleInst->getElementCount()); - if (elementCount == 1) - { - defaultEmitInstExpr(inst, inOuterPrec); - } - else - { - // TODO(JS): Not sure this is correct on the parens handling front - IRType* retType = specOp.returnType; - emitType(retType); - writer->emit("{"); + // TODO(JS): Not 100% sure this is correct on the parens handling front + IRType* retType = specOp.returnType; + emitType(retType); + writer->emit("{"); - for (Index i = 0; i < elementCount; ++i) + for (Index i = 0; i < elementCount; ++i) + { + if (i > 0) { - if (i > 0) - { - writer->emit(", "); - } - - auto outerPrec = getInfo(EmitOp::General); + writer->emit(", "); + } - auto prec = getInfo(EmitOp::Postfix); - emitOperand(swizzleInst->getBase(), leftSide(outerPrec, prec)); + auto outerPrec = getInfo(EmitOp::General); - writer->emit("."); + auto prec = getInfo(EmitOp::Postfix); + emitOperand(swizzleInst->getBase(), leftSide(outerPrec, prec)); - IRInst* irElementIndex = swizzleInst->getElementIndex(i); - SLANG_RELEASE_ASSERT(irElementIndex->op == kIROp_IntLit); - IRConstant* irConst = (IRConstant*)irElementIndex; - UInt elementIndex = (UInt)irConst->value.intVal; - SLANG_RELEASE_ASSERT(elementIndex < 4); + writer->emit("."); - writer->emitChar(s_elemNames[elementIndex]); - } + IRInst* irElementIndex = swizzleInst->getElementIndex(i); + SLANG_RELEASE_ASSERT(irElementIndex->op == kIROp_IntLit); + IRConstant* irConst = (IRConstant*)irElementIndex; + UInt elementIndex = (UInt)irConst->value.intVal; + SLANG_RELEASE_ASSERT(elementIndex < 4); - writer->emit("}"); + writer->emitChar(s_elemNames[elementIndex]); } + + writer->emit("}"); break; } default: @@ -1302,6 +1566,46 @@ StringSlicePool::Handle CPPSourceEmitter::_calcFuncName(const SpecializedIntrins } else { + switch (specOp.op) + { + case IntrinsicOp::ConstructConvert: + { + // Work out the function name + IRFuncType* signatureType = specOp.signatureType; + SLANG_ASSERT(signatureType->getParamCount() == 2); + + IRType* dstType = signatureType->getParamType(0); + //IRType* srcType = signatureType->getParamType(1); + + StringBuilder builder; + builder << "convert_"; + // I need a function that is called that will construct this + if (SLANG_FAILED(_calcTypeName(dstType, CodeGenTarget::CSource, builder))) + { + return StringSlicePool::kNullHandle; + } + return m_slicePool.add(builder); + } + case IntrinsicOp::ConstructFromScalar: + { + // Work out the function name + IRFuncType* signatureType = specOp.signatureType; + SLANG_ASSERT(signatureType->getParamCount() == 2); + + IRType* dstType = signatureType->getParamType(0); + + StringBuilder builder; + builder << "constructFromScalar_"; + // I need a function that is called that will construct this + if (SLANG_FAILED(_calcTypeName(dstType, CodeGenTarget::CSource, builder))) + { + return StringSlicePool::kNullHandle; + } + return m_slicePool.add(builder); + } + default: break; + } + const auto& info = getOperationInfo(specOp.op); if (info.funcName.size()) { @@ -1316,6 +1620,38 @@ StringSlicePool::Handle CPPSourceEmitter::_calcFuncName(const SpecializedIntrins void CPPSourceEmitter::emitOperationCall(IntrinsicOp op, IRInst* inst, IRUse* operands, int operandCount, IRType* retType, const EmitOpInfo& inOuterPrec) { + switch (op) + { + case IntrinsicOp::ConstructFromScalar: + { + SLANG_ASSERT(operandCount == 1); + IRType* dstType = inst->getDataType(); + IRType* srcType = _getElementType(dstType); + IRType* argTypes[2] = { dstType, srcType }; + + SpecializedIntrinsic specOp = getSpecializedOperation(op, argTypes, 2, retType); + + emitCall(specOp, inst, operands, operandCount, inOuterPrec); + return; + } + case IntrinsicOp::ConstructConvert: + { + SLANG_ASSERT(inst->getOperandCount() == 1); + IRType* argTypes[2] = {inst->getDataType(), inst->getOperand(0)->getDataType() }; + + SpecializedIntrinsic specOp = getSpecializedOperation(op, argTypes, 2, retType); + + IRFuncType* signatureType = specOp.signatureType; + SLANG_UNUSED(signatureType); + + SLANG_ASSERT(signatureType->getParamType(0) != signatureType->getParamType(1)); + + emitCall(specOp, inst, operands, operandCount, inOuterPrec); + return; + } + default: break; + } + if (operandCount > 8) { List<IRType*> argTypes; @@ -1381,11 +1717,94 @@ CPPSourceEmitter::CPPSourceEmitter(const Desc& desc): } } -void CPPSourceEmitter::emitParameterGroupImpl(IRGlobalParam* varDecl, IRUniformParameterGroupType* type) +void CPPSourceEmitter::_emitInOutParamType(IRType* type, String const& name, IRType* valueType) +{ + StringSliceLoc nameAndLoc(name.getUnownedSlice()); + + if (auto refType = as<IRRefType>(type)) + { + m_writer->emit("const "); + } + + UnownedStringSlice slice = _getTypeName(valueType); + m_writer->emit(slice); + m_writer->emit("& "); + m_writer->emitName(nameAndLoc); +} + +void CPPSourceEmitter::emitParamTypeImpl(IRType* type, String const& name) +{ + // An `out` or `inout` parameter will have been + // encoded as a parameter of pointer type, so + // we need to decode that here. + // + if (auto outType = as<IROutType>(type)) + { + return _emitInOutParamType(type, name, outType->getValueType()); + } + else if (auto inOutType = as<IRInOutType>(type)) + { + return _emitInOutParamType(type, name, inOutType->getValueType()); + } + else if (auto refType = as<IRRefType>(type)) + { + return _emitInOutParamType(type, name, refType->getValueType()); + } + + emitType(type, name); +} + +bool CPPSourceEmitter::tryEmitGlobalParamImpl(IRGlobalParam* varDecl, IRType* varType) { SLANG_UNUSED(varDecl); - SLANG_UNUSED(type); - SLANG_ASSERT(!"Not implemented"); + SLANG_UNUSED(varType); + + switch (varType->op) + { + case kIROp_StructType: + { + String name = getName(varDecl); + + UnownedStringSlice typeName = _getTypeName(varType); + m_writer->emit(typeName); + m_writer->emit("* "); + m_writer->emit(name); + m_writer->emit(";\n"); + return true; + } + } + + return false; +} + +void CPPSourceEmitter::emitParameterGroupImpl(IRGlobalParam* varDecl, IRUniformParameterGroupType* type) +{ + // Output global parameters + auto varLayout = getVarLayout(varDecl); + SLANG_RELEASE_ASSERT(varLayout); + + String name = getName(varDecl); + auto elementType = type->getElementType(); + + switch (type->op) + { + case kIROp_ParameterBlockType: + case kIROp_ConstantBufferType: + { + UnownedStringSlice typeName = _getTypeName(elementType); + m_writer->emit(typeName); + m_writer->emit("* "); + m_writer->emit(name); + m_writer->emit(";\n"); + break; + } + default: + { + emitType(elementType, name); + m_writer->emit(";\n"); + break; + } + } } void CPPSourceEmitter::emitEntryPointAttributesImpl(IRFunc* irFunc, EntryPointLayout* entryPointLayout) @@ -1426,6 +1845,87 @@ void CPPSourceEmitter::emitEntryPointAttributesImpl(IRFunc* irFunc, EntryPointLa m_writer->emit("SLANG_PRELUDE_EXPORT\n"); } +void CPPSourceEmitter::emitSimpleFuncImpl(IRFunc* func) +{ + auto resultType = func->getResultType(); + + auto name = getFuncName(func); + + // Deal with decorations that need + // to be emitted as attributes + + // We are going to ignore the parameters passed and just pass in the Context + + auto entryPointLayout = asEntryPoint(func); + if (entryPointLayout) + { + StringBuilder prefixName; + prefixName << "_" << name; + emitType(resultType, prefixName); + m_writer->emit("()\n"); + } + else + { + emitType(resultType, name); + + m_writer->emit("("); + auto firstParam = func->getFirstParam(); + for (auto pp = firstParam; pp; pp = pp->getNextParam()) + { + if (pp != firstParam) + m_writer->emit(", "); + + emitSimpleFuncParamImpl(pp); + } + m_writer->emit(")"); + + emitSemantics(func); + } + + // TODO: encode declaration vs. definition + if (isDefinition(func)) + { + m_writer->emit("\n{\n"); + m_writer->indent(); + + // HACK: forward-declare all the local variables needed for the + // parameters of non-entry blocks. + emitPhiVarDecls(func); + + // Need to emit the operations in the blocks of the function + emitFunctionBody(func); + + m_writer->dedent(); + m_writer->emit("}\n\n"); + } + else + { + m_writer->emit(";\n\n"); + } +} + +void CPPSourceEmitter::emitSimpleValueImpl(IRInst* inst) +{ + switch (inst->op) + { + case kIROp_FloatLit: + { + IRConstant* constantInst = static_cast<IRConstant*>(inst); + + m_writer->emit(constantInst->value.floatVal); + + // If the literal is a float, then we need to add 'f' at end + IRType* type = constantInst->getDataType(); + if (type && type->op == kIROp_FloatType ) + { + m_writer->emitChar('f'); + } + break; + } + default: Super::emitSimpleValueImpl(inst); + } +} + void CPPSourceEmitter::emitVectorTypeNameImpl(IRType* elementType, IRIntegerValue elementCount) { emitSimpleType(_getVecType(elementType, int(elementCount))); @@ -1578,7 +2078,44 @@ bool CPPSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOut switch (inst->op) { + case kIROp_constructVectorFromScalar: + { + SLANG_ASSERT(inst->getOperandCount() == 1); + IRType* dstType = inst->getDataType(); + + // Check it's a vector + SLANG_ASSERT(dstType->op == kIROp_VectorType); + // Source must be a scalar + SLANG_ASSERT(as<IRBasicType>(inst->getOperand(0)->getDataType())); + + emitOperationCall(IntrinsicOp::ConstructFromScalar, inst, inst->getOperands(), int(inst->getOperandCount()), dstType, inOuterPrec); + return true; + } case kIROp_Construct: + { + IRType* dstType = inst->getDataType(); + IRType* srcType = inst->getOperand(0)->getDataType(); + + if ((dstType->op == kIROp_VectorType || dstType->op == kIROp_MatrixType) && + inst->getOperandCount() == 1) + { + if (as<IRBasicType>(srcType)) + { + emitOperationCall(IntrinsicOp::ConstructFromScalar, inst, inst->getOperands(), int(inst->getOperandCount()), dstType, inOuterPrec); + } + else + { + SLANG_ASSERT(_getElementType(dstType) != _getElementType(srcType)); + // If it's constructed from a type conversion + emitOperationCall(IntrinsicOp::ConstructConvert, inst, inst->getOperands(), int(inst->getOperandCount()), dstType, inOuterPrec); + } + } + else + { + emitOperationCall(IntrinsicOp::Init, inst, inst->getOperands(), int(inst->getOperandCount()), inst->getDataType(), inOuterPrec); + } + return true; + } case kIROp_makeVector: case kIROp_MakeMatrix: { @@ -1599,7 +2136,44 @@ bool CPPSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOut } case kIROp_swizzle: { - emitOperationCall(IntrinsicOp::Swizzle, inst, inst->getOperands(), int(inst->getOperandCount()), inst->getDataType(), inOuterPrec); + // For C++ we don't need to emit a swizzle function + // For C we need a construction function + auto swizzleInst = static_cast<IRSwizzle*>(inst); + + IRInst* baseInst = swizzleInst->getBase(); + IRType* baseType = baseInst->getDataType(); + + // If we are swizzling from a built in type, + if (as<IRBasicType>(baseType)) + { + // We can swizzle a scalar type to be a vector, or just a scalar + IRType* dstType = swizzleInst->getDataType(); + if (as<IRBasicType>(dstType)) + { + // If the output is a scalar, then could only have been a .x, which we can just ignore the '.x' part + emitOperand(baseInst, inOuterPrec); + } + else + { + SLANG_ASSERT(dstType->op == kIROp_VectorType); + emitOperationCall(IntrinsicOp::ConstructFromScalar, inst, inst->getOperands(), 1, dstType, inOuterPrec); + } + } + else + { + const Index elementCount = Index(swizzleInst->getElementCount()); + if (elementCount == 1) + { + // If just one thing is extracted then the . syntax will just work + defaultEmitInstExpr(inst, inOuterPrec); + } + else + { + // Will need to generate a swizzle method + emitOperationCall(IntrinsicOp::Swizzle, inst, inst->getOperands(), int(inst->getOperandCount()), inst->getDataType(), inOuterPrec); + } + } + return true; } case kIROp_Call: @@ -1619,6 +2193,7 @@ bool CPPSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOut return false; } + default: { IntrinsicOp op = getOperation(inst->op); @@ -1652,15 +2227,290 @@ void CPPSourceEmitter::emitPreprocessorDirectivesImpl() { emitSpecializedOperationDefinition(keyValue.Key); } +} + +void CPPSourceEmitter::emitOperandImpl(IRInst* inst, EmitOpInfo const& outerPrec) +{ + if (shouldFoldInstIntoUseSites(inst)) + { + emitInstExpr(inst, outerPrec); + return; + } + + switch (inst->op) + { + case 0: // nothing yet + case kIROp_GlobalParam: + { + // It's in UniformState + String name = getName(inst); + m_writer->emit("("); + switch (inst->getDataType()->op) + { + case kIROp_ParameterBlockType: + case kIROp_ConstantBufferType: + case kIROp_StructType: + { + m_writer->emit("*"); + break; + } + default: break; + } + m_writer->emit("uniformState->"); + m_writer->emit(name); + m_writer->emit(")"); + break; + } + case kIROp_Param: + { + auto varLayout = getVarLayout(inst); + + if (varLayout) + { + auto semanticNameSpelling = varLayout->systemValueSemantic; + if (semanticNameSpelling.getLength()) + { + semanticNameSpelling = semanticNameSpelling.toLower(); + + if (semanticNameSpelling == "sv_dispatchthreadid") + { + + m_writer->emit("dispatchThreadID"); + return; + } + else if (semanticNameSpelling == "sv_groupid") + { + m_writer->emit("varyingInput.groupID"); + return; + } + else if (semanticNameSpelling == "sv_groupthreadid") + { + m_writer->emit("varyingInput.groupThreadID"); + return; + } + } + } + + ; // Fall-thru + } + case kIROp_GlobalVar: + default: + // GlobalVar should be fine as should just be a member of Context + m_writer->emit(getName(inst)); + break; + } +} + +static bool _isVariable(IROp op) +{ + switch (op) + { + case kIROp_GlobalVar: + case kIROp_GlobalParam: + //case kIROp_Var: + { + return true; + } + default: return false; + } +} + +static bool _isFunction(IROp op) +{ + return op == kIROp_Func; +} + +struct GlobalParamInfo +{ + typedef GlobalParamInfo ThisType; + bool operator<(const ThisType& rhs) const { return offset < rhs.offset; } + bool operator==(const ThisType& rhs) const { return offset == rhs.offset; } + bool operator!=(const ThisType& rhs) const { return !(*this == rhs); } + + IRInst* inst; + UInt offset; + UInt size; +}; + +void CPPSourceEmitter::emitModuleImpl(IRModule* module) +{ + List<EmitAction> actions; + computeEmitActions(module, actions); + + // Emit forward declarations. Don't emit variables that need to be grouped or function definitions (which will ref those types) + for (auto action : actions) + { + switch (action.level) + { + case EmitAction::Level::ForwardDeclaration: + emitFuncDecl(cast<IRFunc>(action.inst)); + break; + + case EmitAction::Level::Definition: + if (_isVariable(action.inst->op) || _isFunction(action.inst->op)) + { + // Don't emit functions or variables that have to be grouped into structures yet + } + else + { + emitGlobalInst(action.inst); + } + break; + } + } - // Lets take a look at layout + // Output the global parameters in a 'UniformState' structure + { + m_writer->emit("struct UniformState\n{\n"); + m_writer->indent(); - ProgramLayout* programLayout = m_programLayout; + List<GlobalParamInfo> params; - if (programLayout) + for (auto action : actions) + { + if (action.level == EmitAction::Level::Definition && action.inst->op == kIROp_GlobalParam) + { + VarLayout* varLayout = CLikeSourceEmitter::getVarLayout(action.inst); + SLANG_ASSERT(varLayout); + const VarLayout::ResourceInfo* varInfo = varLayout->FindResourceInfo(LayoutResourceKind::Uniform); + TypeLayout* typeLayout = varLayout->getTypeLayout(); + TypeLayout::ResourceInfo* typeInfo = typeLayout->FindResourceInfo(LayoutResourceKind::Uniform); + + GlobalParamInfo paramInfo; + paramInfo.inst = action.inst; + // Index is the byte offset for uniform + paramInfo.offset = varInfo ? varInfo->index : 0; + paramInfo.size = typeInfo ? typeInfo->count.raw : 0; + + params.add(paramInfo); + } + } + + // We want to sort by layout offset, and insert suitable padding + params.sort(); + + int padIndex = 0; + size_t offset = 0; + for (const auto& paramInfo : params) + { + if (offset < paramInfo.offset) + { + // We want to output some padding + StringBuilder builder; + builder << "uint8_t _pad" << (padIndex++) << "[" << (paramInfo.offset - offset) << "];\n"; + } + + emitGlobalInst(paramInfo.inst); + // Set offset after this + offset = paramInfo.offset + paramInfo.size; + } + + m_writer->emit("\n"); + m_writer->dedent(); + m_writer->emit("\n};\n\n"); + } + + // Output the 'Context' which will be used for execution { + m_writer->emit("struct Context\n{\n"); + m_writer->indent(); + m_writer->emit("UniformState* uniformState;\n"); + m_writer->emit("ComputeVaryingInput varyingInput;\n"); + m_writer->emit("uint3 dispatchThreadID;\n"); + // Output all the thread locals + for (auto action : actions) + { + if (action.level == EmitAction::Level::Definition && action.inst->op == kIROp_GlobalVar) + { + emitGlobalInst(action.inst); + } + } + + // Finally output the functions as methods on the context + for (auto action : actions) + { + if (action.level == EmitAction::Level::Definition && _isFunction(action.inst->op)) + { + emitGlobalInst(action.inst); + } + } + + m_writer->dedent(); + m_writer->emit("};\n\n"); + } + + // Finally we need to output dll entry points + + for (auto action : actions) + { + if (action.level == EmitAction::Level::Definition && _isFunction(action.inst->op)) + { + IRFunc* func = as<IRFunc>(action.inst); + + auto entryPointLayout = asEntryPoint(func); + if (entryPointLayout) + { + auto resultType = func->getResultType(); + auto name = getFuncName(func); + + // Emit the actual function + emitEntryPointAttributes(func, entryPointLayout); + emitType(resultType, name); + + m_writer->emit("(ComputeVaryingInput* varyingInput, UniformState* uniformState)\n{\n"); + emitSemantics(func); + + m_writer->indent(); + // Initialize when constructing so that globals are zeroed + m_writer->emit("Context context = {};\n"); + m_writer->emit("context.uniformState = uniformState;\n"); + m_writer->emit("context.varyingInput = *varyingInput;\n"); + + // Emit dispatchThreadID + if (entryPointLayout->profile.GetStage() == Stage::Compute) + { + // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sv-dispatchthreadid + // SV_DispatchThreadID is the sum of SV_GroupID * numthreads and GroupThreadID. + + static const UInt kAxisCount = 3; + UInt sizeAlongAxis[kAxisCount]; + + // TODO: this is kind of gross because we are using a public + // reflection API function, rather than some kind of internal + // utility it forwards to... + spReflectionEntryPoint_getComputeThreadGroupSize((SlangReflectionEntryPoint*)entryPointLayout, kAxisCount, &sizeAlongAxis[0]); + + m_writer->emit("context.dispatchThreadID = {\n"); + m_writer->indent(); + + StringBuilder builder; + + for (int i = 0; i < kAxisCount; ++i) + { + builder.Clear(); + const char elem[2] = {s_elemNames[i], 0}; + builder << "varyingInput->groupID." << elem << " * " << sizeAlongAxis[i] << " + varyingInput->groupThreadID." << elem; + if (i < kAxisCount - 1) + { + builder << ","; + } + builder << "\n"; + m_writer->emit(builder); + } + + m_writer->dedent(); + m_writer->emit("};\n"); + } + + m_writer->emit("context._"); + m_writer->emit(name); + m_writer->emit("();\n"); + m_writer->dedent(); + m_writer->emit("}\n"); + } + } } } diff --git a/source/slang/slang-emit-cpp.h b/source/slang/slang-emit-cpp.h index ea793ee95..4280bdc80 100644 --- a/source/slang/slang-emit-cpp.h +++ b/source/slang/slang-emit-cpp.h @@ -10,6 +10,24 @@ namespace Slang { +/* TODO(JS): Note that there are multiple methods to handle 'construction' operations. That is because 'construct' is used as a kind of +generic 'construction' for built in types including vectors and matrices. + +For the moment the cpp emit code, determines what kind of construct is needed, and has special handling for ConstructConvert and +ConstructFromScalar. + +That currently we do not see constructVectorFromScalar - for example when we do... + +int2 fromScalar = 1; + +This appears as a construction from an int. + +That the better thing to do would be that there were IR instructions for the specific types of construction. I suppose there is a question +about whether there should be separate instructions for vector/matrix, or emit code should just use the destination type. In practice I think +it's fine that there isn't an instruction separating vector/matrix. That being the case I guess we arguably don't need constructVectorFromScalar, +just constructXXXFromScalar. Would be good if there was a suitable name to encompass vector/matrix. +*/ + #define SLANG_CPP_INTRINSIC_OP(x) \ x(Invalid, "", -1) \ x(Init, "", -1) \ @@ -93,7 +111,10 @@ namespace Slang \ x(AsFloat, "asfloat", 1) \ x(AsInt, "asint", 1) \ - x(AsUInt, "asuint", 1) + x(AsUInt, "asuint", 1) \ + \ + x(ConstructConvert, "", 1) \ + x(ConstructFromScalar, "", 1) class CPPSourceEmitter: public CLikeSourceEmitter @@ -181,6 +202,13 @@ protected: virtual void emitVectorTypeNameImpl(IRType* elementType, IRIntegerValue elementCount) SLANG_OVERRIDE; virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE; virtual void emitPreprocessorDirectivesImpl() SLANG_OVERRIDE; + virtual void emitSimpleValueImpl(IRInst* value) SLANG_OVERRIDE; + virtual void emitModuleImpl(IRModule* module) SLANG_OVERRIDE; + virtual void emitSimpleFuncImpl(IRFunc* func) SLANG_OVERRIDE; + virtual void emitOperandImpl(IRInst* inst, EmitOpInfo const& outerPrec) SLANG_OVERRIDE; + virtual void emitParamTypeImpl(IRType* type, String const& name) SLANG_OVERRIDE; + + virtual bool tryEmitGlobalParamImpl(IRGlobalParam* varDecl, IRType* varType) SLANG_OVERRIDE; void emitIntrinsicCallExpr(IRCall* inst, IRFunc* func, EmitOpInfo const& inOuterPrec); @@ -194,9 +222,13 @@ protected: void _emitLengthDefinition(const UnownedStringSlice& funcName, const SpecializedIntrinsic& specOp); void _emitNormalizeDefinition(const UnownedStringSlice& funcName, const SpecializedIntrinsic& specOp); void _emitReflectDefinition(const UnownedStringSlice& funcName, const SpecializedIntrinsic& specOp); + void _emitConstructConvertDefinition(const UnownedStringSlice& funcName, const SpecializedIntrinsic& specOp); + void _emitConstructFromScalarDefinition(const UnownedStringSlice& funcName, const SpecializedIntrinsic& specOp); void _emitSignature(const UnownedStringSlice& funcName, const SpecializedIntrinsic& specOp); + void _emitInOutParamType(IRType* type, String const& name, IRType* valueType); + UnownedStringSlice _getAndEmitSpecializedOperationDefinition(IntrinsicOp op, IRType*const* argTypes, Int argCount, IRType* retType); static TypeDimension _getTypeDimension(IRType* type, bool vecSwap); @@ -216,6 +248,10 @@ protected: UnownedStringSlice _getTypeName(IRType* type); StringSlicePool::Handle _calcTypeName(IRType* type); + SlangResult _calcTypeName(IRType* type, CodeGenTarget target, StringBuilder& out); + + SlangResult _calcTextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName); + Dictionary<SpecializedIntrinsic, StringSlicePool::Handle> m_intrinsicNameMap; Dictionary<IRType*, StringSlicePool::Handle> m_typeNameMap; diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index d5dcaca98..205f8ee0e 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -305,7 +305,7 @@ String emitEntryPoint( // parameters of a shader entry point and move them into // the global scope instead. // - moveEntryPointUniformParamsToGlobalScope(irModule); + moveEntryPointUniformParamsToGlobalScope(irModule, target); #if 0 dumpIRIfEnabled(compileRequest, irModule, "ENTRY POINT UNIFORMS MOVED"); #endif @@ -361,56 +361,60 @@ String emitEntryPoint( #endif validateIRModuleIfEnabled(compileRequest, irModule); - // The Slang language allows interfaces to be used like - // ordinary types (including placing them in constant - // buffers and entry-point parameter lists), but then - // getting them to lay out in a reasonable way requires - // us to treat fields/variables with interface type - // *as if* they were pointers to heap-allocated "objects." - // - // Specialization will have replaced fields/variables - // with interface types like `IFoo` with fields/variables - // with pointer-like types like `ExistentialBox<SomeType>`. - // - // We need to legalize these pointer-like types away, - // which involves two main changes: - // - // 1. Any `ExistentialBox<...>` fields need to be moved - // out of their enclosing `struct` type, so that the layout - // of the enclosing type is computed as if the field had - // zero size. - // - // 2. Once an `ExistentialBox<X>` has been floated out - // of its parent and landed somwhere permanent (e.g., either - // a dedicated variable, or a field of constant buffer), - // we need to replace it with just an `X`, after which we - // will have (more) legal shader code. - // - legalizeExistentialTypeLayout( - irModule, - sink); - eliminateDeadCode(compileRequest, irModule); - + // We don't need the legalize pass for C/C++ based types + if (!(sourceStyle == SourceStyle::CPP || sourceStyle == SourceStyle::C)) + { + // The Slang language allows interfaces to be used like + // ordinary types (including placing them in constant + // buffers and entry-point parameter lists), but then + // getting them to lay out in a reasonable way requires + // us to treat fields/variables with interface type + // *as if* they were pointers to heap-allocated "objects." + // + // Specialization will have replaced fields/variables + // with interface types like `IFoo` with fields/variables + // with pointer-like types like `ExistentialBox<SomeType>`. + // + // We need to legalize these pointer-like types away, + // which involves two main changes: + // + // 1. Any `ExistentialBox<...>` fields need to be moved + // out of their enclosing `struct` type, so that the layout + // of the enclosing type is computed as if the field had + // zero size. + // + // 2. Once an `ExistentialBox<X>` has been floated out + // of its parent and landed somwhere permanent (e.g., either + // a dedicated variable, or a field of constant buffer), + // we need to replace it with just an `X`, after which we + // will have (more) legal shader code. + // + legalizeExistentialTypeLayout( + irModule, + sink); + eliminateDeadCode(compileRequest, irModule); + #if 0 - dumpIRIfEnabled(compileRequest, irModule, "EXISTENTIALS LEGALIZED"); + dumpIRIfEnabled(compileRequest, irModule, "EXISTENTIALS LEGALIZED"); #endif - validateIRModuleIfEnabled(compileRequest, irModule); - - // Many of our target languages and/or downstream compilers - // don't support `struct` types that have resource-type fields. - // In order to work around this limitation, we will rewrite the - // IR so that any structure types with resource-type fields get - // split into a "tuple" that comprises the ordinary fields (still - // bundles up as a `struct`) and one element for each resource-type - // field (recursively). - // - // What used to be individual variables/parameters/arguments/etc. - // then become multiple variables/parameters/arguments/etc. - // - legalizeResourceTypes( - irModule, - sink); - eliminateDeadCode(compileRequest, irModule); + validateIRModuleIfEnabled(compileRequest, irModule); + + // Many of our target languages and/or downstream compilers + // don't support `struct` types that have resource-type fields. + // In order to work around this limitation, we will rewrite the + // IR so that any structure types with resource-type fields get + // split into a "tuple" that comprises the ordinary fields (still + // bundles up as a `struct`) and one element for each resource-type + // field (recursively). + // + // What used to be individual variables/parameters/arguments/etc. + // then become multiple variables/parameters/arguments/etc. + // + legalizeResourceTypes( + irModule, + sink); + eliminateDeadCode(compileRequest, irModule); + } // Debugging output of legalization #if 0 diff --git a/source/slang/slang-ir-entry-point-uniforms.cpp b/source/slang/slang-ir-entry-point-uniforms.cpp index 20e726f25..da036d798 100644 --- a/source/slang/slang-ir-entry-point-uniforms.cpp +++ b/source/slang/slang-ir-entry-point-uniforms.cpp @@ -98,6 +98,12 @@ struct MoveEntryPointUniformParametersToGlobalScope // IRModule* module; + // The target can determine how a variable is moved out into global scope + CodeGenTarget codeGenTarget; + + // If true the target needs constant buffer wrapping (for uniforms say) + bool targetNeedsConstantBuffer; + // We will process a whole module by visiting all // its global functions, looking for entry points. // @@ -162,7 +168,7 @@ struct MoveEntryPointUniformParametersToGlobalScope // an explicit IR constant buffer for that wrapper, // auto entryPointParamsLayout = entryPointLayout->parametersLayout; - bool needConstantBuffer = entryPointParamsLayout->typeLayout.is<ParameterGroupTypeLayout>(); + bool needConstantBuffer = targetNeedsConstantBuffer && entryPointParamsLayout->typeLayout.is<ParameterGroupTypeLayout>(); // We will set up an IR builder so that we are ready to generate code. // @@ -369,6 +375,10 @@ struct MoveEntryPointUniformParametersToGlobalScope return true; } + // TODO(JS): We probably want a more accurate way of determining if system semantic value + // We can use the flags Flag::SemanticValue for one. But main issue with this test, is for some + // targets currently (CPU) no resources are consumed. Perhaps this is fixed elsewhere by using a 'notional' resource. + // Varying parameters with "system value" semantics currently show up as // consuming no resources, so we need to special-case that here. // @@ -415,10 +425,29 @@ struct MoveEntryPointUniformParametersToGlobalScope }; void moveEntryPointUniformParamsToGlobalScope( - IRModule* module) + IRModule* module, + CodeGenTarget target) { MoveEntryPointUniformParametersToGlobalScope context; + context.module = module; + context.codeGenTarget = target; + context.targetNeedsConstantBuffer = true; + + // Check if this target needs constant buffer wrapping + switch (target) + { + case CodeGenTarget::CPPSource: + case CodeGenTarget::CSource: + case CodeGenTarget::Executable: + case CodeGenTarget::SharedLibrary: + { + context.targetNeedsConstantBuffer = false; + break; + } + default: break; + } + context.processModule(); } diff --git a/source/slang/slang-ir-entry-point-uniforms.h b/source/slang/slang-ir-entry-point-uniforms.h index 49994c202..0e978b9eb 100644 --- a/source/slang/slang-ir-entry-point-uniforms.h +++ b/source/slang/slang-ir-entry-point-uniforms.h @@ -1,12 +1,15 @@ // slang-ir-entry-point-uniform.h #pragma once +#include "slang-compiler.h" + namespace Slang { struct IRModule; /// Move any uniform parameters of entry points to the global scope instead. void moveEntryPointUniformParamsToGlobalScope( - IRModule* module); + IRModule* module, + CodeGenTarget target); } diff --git a/source/slang/slang-parameter-binding.cpp b/source/slang/slang-parameter-binding.cpp index 722725af7..f9657e776 100644 --- a/source/slang/slang-parameter-binding.cpp +++ b/source/slang/slang-parameter-binding.cpp @@ -2632,6 +2632,21 @@ static int _calcTotalNumUsedRegistersForLayoutResourceKind(ParameterBindingConte return numUsed; } +static bool _isCPUTarget(CodeGenTarget target) +{ + switch (target) + { + case CodeGenTarget::CPPSource: + case CodeGenTarget::CSource: + case CodeGenTarget::Executable: + case CodeGenTarget::SharedLibrary: + { + return true; + } + default: return false; + } +} + /// Keep track of the running global counter for entry points and global parameters visited. /// /// Because of explicit `register` and `[[vk::binding(...)]]` support, parameter binding @@ -3022,16 +3037,22 @@ RefPtr<ProgramLayout> generateParameterBindings( // want to do so through a different feature. // bool needDefaultConstantBuffer = false; - for( auto& parameterInfo : sharedContext.parameters ) - { - SLANG_RELEASE_ASSERT(parameterInfo->varLayouts.getCount() != 0); - auto firstVarLayout = parameterInfo->varLayouts.getFirst(); - // Does the field have any uniform data? - if( firstVarLayout->typeLayout->FindResourceInfo(LayoutResourceKind::Uniform) ) + // On a CPU target, it's okay to have global scope parameters that use Uniform resources (because on CPU + // all resources are 'Uniform') + if (!_isCPUTarget(targetReq->target)) + { + for( auto& parameterInfo : sharedContext.parameters ) { - needDefaultConstantBuffer = true; - diagnoseGlobalUniform(&sharedContext, firstVarLayout->varDecl); + SLANG_RELEASE_ASSERT(parameterInfo->varLayouts.getCount() != 0); + auto firstVarLayout = parameterInfo->varLayouts.getFirst(); + + // Does the field have any uniform data? + if( firstVarLayout->typeLayout->FindResourceInfo(LayoutResourceKind::Uniform) ) + { + needDefaultConstantBuffer = true; + diagnoseGlobalUniform(&sharedContext, firstVarLayout->varDecl); + } } } diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp index f76b29a51..3b14b74c2 100644 --- a/source/slang/slang-type-layout.cpp +++ b/source/slang/slang-type-layout.cpp @@ -346,6 +346,38 @@ struct HLSLConstantBufferLayoutRulesImpl : DefaultLayoutRulesImpl } }; +struct CPULayoutRulesImpl : DefaultLayoutRulesImpl +{ + typedef DefaultLayoutRulesImpl Super; + + SimpleLayoutInfo GetScalarLayout(BaseType baseType) override + { + switch (baseType) + { + case BaseType::Bool: + { + // TODO(JS): Much like ptr this is a problem - in knowing how to return this value. In the past it's been a word + // on some compilers for example. + // On checking though current compilers (clang, g++, visual studio) it is a single byte + return SimpleLayoutInfo( LayoutResourceKind::Uniform, 1, 1 ); + } + + default: return Super::GetScalarLayout(baseType); + } + } + + UniformLayoutInfo BeginStructLayout() override + { + return Super::BeginStructLayout(); + } + + void EndStructLayout(UniformLayoutInfo* ioStructInfo) override + { + // Conform to C/C++ size is adjusted to the largest alignment + ioStructInfo->size = RoundToAlignment(ioStructInfo->size, ioStructInfo->alignment); + } +}; + struct HLSLStructuredBufferLayoutRulesImpl : DefaultLayoutRulesImpl { // HLSL structured buffers drop the restrictions added for constant buffers, @@ -558,6 +590,8 @@ struct GLSLLayoutRulesFamilyImpl : LayoutRulesFamilyImpl LayoutRulesImpl* getHitAttributesParameterRules() override; LayoutRulesImpl* getShaderRecordConstantBufferRules() override; + + LayoutRulesImpl* getStructuredBufferRules() override; }; struct HLSLLayoutRulesFamilyImpl : LayoutRulesFamilyImpl @@ -576,11 +610,86 @@ struct HLSLLayoutRulesFamilyImpl : LayoutRulesFamilyImpl LayoutRulesImpl* getHitAttributesParameterRules() override; LayoutRulesImpl* getShaderRecordConstantBufferRules() override; + + LayoutRulesImpl* getStructuredBufferRules() override; +}; + +struct CPULayoutRulesFamilyImpl : LayoutRulesFamilyImpl +{ + virtual LayoutRulesImpl* getConstantBufferRules() override; + virtual LayoutRulesImpl* getPushConstantBufferRules() override; + virtual LayoutRulesImpl* getTextureBufferRules() override; + virtual LayoutRulesImpl* getVaryingInputRules() override; + virtual LayoutRulesImpl* getVaryingOutputRules() override; + virtual LayoutRulesImpl* getSpecializationConstantRules() override; + virtual LayoutRulesImpl* getShaderStorageBufferRules() override; + virtual LayoutRulesImpl* getParameterBlockRules() override; + + LayoutRulesImpl* getRayPayloadParameterRules() override; + LayoutRulesImpl* getCallablePayloadParameterRules() override; + LayoutRulesImpl* getHitAttributesParameterRules() override; + + LayoutRulesImpl* getShaderRecordConstantBufferRules() override; + LayoutRulesImpl* getStructuredBufferRules() override; }; GLSLLayoutRulesFamilyImpl kGLSLLayoutRulesFamilyImpl; HLSLLayoutRulesFamilyImpl kHLSLLayoutRulesFamilyImpl; +CPULayoutRulesFamilyImpl kCPULayoutRulesFamilyImpl; +// CPU case + +struct CPUObjectLayoutRulesImpl : ObjectLayoutRulesImpl +{ + virtual SimpleLayoutInfo GetObjectLayout(ShaderParameterKind kind) override + { + switch (kind) + { + case ShaderParameterKind::ConstantBuffer: + // It's a pointer to the actual uniform data + return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*)); + + case ShaderParameterKind::MutableTexture: + case ShaderParameterKind::TextureUniformBuffer: + case ShaderParameterKind::Texture: + // It's a pointer to a texture interface + return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*)); + + case ShaderParameterKind::StructuredBuffer: + case ShaderParameterKind::MutableStructuredBuffer: + // It's a ptr and a size of the amount of elements + return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*) * 2, sizeof(void*)); + + case ShaderParameterKind::RawBuffer: + case ShaderParameterKind::Buffer: + case ShaderParameterKind::MutableRawBuffer: + case ShaderParameterKind::MutableBuffer: + // It's a pointer and a size in bytes + return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*) * 2, sizeof(void*)); + + case ShaderParameterKind::SamplerState: + // It's a pointer + return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(void*), sizeof(void*)); + + case ShaderParameterKind::TextureSampler: + case ShaderParameterKind::MutableTextureSampler: + case ShaderParameterKind::InputRenderTarget: + // TODO: how to handle these? + default: + SLANG_UNEXPECTED("unhandled shader parameter kind"); + UNREACHABLE_RETURN(SimpleLayoutInfo()); + } + } +}; + + + +static CPUObjectLayoutRulesImpl kCPUObjectLayoutRulesImpl; +static CPULayoutRulesImpl kCPULayoutRulesImpl; + +LayoutRulesImpl kCPULayoutRulesImpl_ = { + &kCPULayoutRulesFamilyImpl, &kCPULayoutRulesImpl, &kCPUObjectLayoutRulesImpl, +}; // GLSL cases @@ -624,6 +733,11 @@ LayoutRulesImpl kGLSLHitAttributesParameterLayoutRulesImpl_ = { &kGLSLLayoutRulesFamilyImpl, &kGLSLHitAttributesParameterLayoutRulesImpl, &kGLSLObjectLayoutRulesImpl, }; +LayoutRulesImpl kGLSLStructuredBufferLayoutRulesImpl_ = { + &kGLSLLayoutRulesFamilyImpl, &kStd430LayoutRulesImpl, &kGLSLObjectLayoutRulesImpl, +}; + + // HLSL cases LayoutRulesImpl kHLSLConstantBufferLayoutRulesImpl_ = { @@ -654,7 +768,7 @@ LayoutRulesImpl kHLSLHitAttributesParameterLayoutRulesImpl_ = { &kHLSLLayoutRulesFamilyImpl, &kHLSLHitAttributesParameterLayoutRulesImpl, &kHLSLObjectLayoutRulesImpl, }; -// +// GLSL Family LayoutRulesImpl* GLSLLayoutRulesFamilyImpl::getConstantBufferRules() { @@ -717,7 +831,12 @@ LayoutRulesImpl* GLSLLayoutRulesFamilyImpl::getHitAttributesParameterRules() return &kGLSLHitAttributesParameterLayoutRulesImpl_; } -// +LayoutRulesImpl* GLSLLayoutRulesFamilyImpl::getStructuredBufferRules() +{ + return &kGLSLStructuredBufferLayoutRulesImpl_; +} + +// HLSL Family LayoutRulesImpl* HLSLLayoutRulesFamilyImpl::getConstantBufferRules() { @@ -741,6 +860,11 @@ LayoutRulesImpl* HLSLLayoutRulesFamilyImpl::getShaderRecordConstantBufferRules() return &kHLSLConstantBufferLayoutRulesImpl_; } +LayoutRulesImpl* HLSLLayoutRulesFamilyImpl::getStructuredBufferRules() +{ + return &kHLSLStructuredBufferLayoutRulesImpl_; +} + LayoutRulesImpl* HLSLLayoutRulesFamilyImpl::getTextureBufferRules() { return nullptr; @@ -781,21 +905,65 @@ LayoutRulesImpl* HLSLLayoutRulesFamilyImpl::getHitAttributesParameterRules() return &kHLSLHitAttributesParameterLayoutRulesImpl_; } +// CPU Family +LayoutRulesImpl* CPULayoutRulesFamilyImpl::getConstantBufferRules() +{ + return &kCPULayoutRulesImpl_; +} -// +LayoutRulesImpl* CPULayoutRulesFamilyImpl::getPushConstantBufferRules() +{ + return &kCPULayoutRulesImpl_; +} -LayoutRulesImpl* GetLayoutRulesImpl(LayoutRule rule) +LayoutRulesImpl* CPULayoutRulesFamilyImpl::getTextureBufferRules() { - switch (rule) - { - case LayoutRule::Std140: return &kStd140LayoutRulesImpl_; - case LayoutRule::Std430: return &kStd430LayoutRulesImpl_; - case LayoutRule::HLSLConstantBuffer: return &kHLSLConstantBufferLayoutRulesImpl_; - case LayoutRule::HLSLStructuredBuffer: return &kHLSLStructuredBufferLayoutRulesImpl_; - default: - return nullptr; - } + return nullptr; +} + +LayoutRulesImpl* CPULayoutRulesFamilyImpl::getVaryingInputRules() +{ + return nullptr; +} +LayoutRulesImpl* CPULayoutRulesFamilyImpl::getVaryingOutputRules() +{ + return nullptr; +} +LayoutRulesImpl* CPULayoutRulesFamilyImpl::getSpecializationConstantRules() +{ + return nullptr; +} +LayoutRulesImpl* CPULayoutRulesFamilyImpl::getShaderStorageBufferRules() +{ + return nullptr; +} +LayoutRulesImpl* CPULayoutRulesFamilyImpl::getParameterBlockRules() +{ + // Not clear - just use similar to CPU + return &kCPULayoutRulesImpl_; +} +LayoutRulesImpl* CPULayoutRulesFamilyImpl::getRayPayloadParameterRules() +{ + return nullptr; +} +LayoutRulesImpl* CPULayoutRulesFamilyImpl::getCallablePayloadParameterRules() +{ + return nullptr; +} +LayoutRulesImpl* CPULayoutRulesFamilyImpl::getHitAttributesParameterRules() +{ + return nullptr; +} +LayoutRulesImpl* CPULayoutRulesFamilyImpl::getShaderRecordConstantBufferRules() +{ + // Just following HLSLs lead for the moment + return &kCPULayoutRulesImpl_; +} + +LayoutRulesImpl* CPULayoutRulesFamilyImpl::getStructuredBufferRules() +{ + return &kCPULayoutRulesImpl_; } LayoutRulesFamilyImpl* getDefaultLayoutRulesFamilyForTarget(TargetRequest* targetReq) @@ -820,13 +988,15 @@ LayoutRulesFamilyImpl* getDefaultLayoutRulesFamilyForTarget(TargetRequest* targe case CodeGenTarget::CPPSource: case CodeGenTarget::CSource: { + // For now lets use some fairly simple CPU binding rules + // We just need to decide here what style of layout is appropriate, in terms of memory // and binding. That in terms of the actual binding that will be injected into functions // in the form of a BindContext. For now we'll go with HLSL layout - // that we may want to rethink that with the use of arrays and binding VK style binding might be // more appropriate in some ways. - return &kHLSLLayoutRulesFamilyImpl; + return &kCPULayoutRulesFamilyImpl; } default: @@ -1253,7 +1423,10 @@ static bool _usesOrdinaryData(RefPtr<TypeLayout> typeLayout) /// to the resource usage of a container like a `ConstantBuffer<X>` or /// `ParameterBlock<X>`. /// + /// TODO: letUnformBleedThrough is (hopefully temporary) a hack that was added to enable CPU targets to + /// produce workable layout. CPU targets have all bindings/variables laid out as uniforms static void _addUnmaskedResourceUsage( + bool letUniformBleedThrough, TypeLayout* dstTypeLayout, TypeLayout* srcTypeLayout, bool haveFullRegisterSpaceOrSet) @@ -1264,6 +1437,10 @@ static void _addUnmaskedResourceUsage( { case LayoutResourceKind::Uniform: // Ordinary/uniform resource usage will always be masked. + if (letUniformBleedThrough) + { + dstTypeLayout->addResourceUsage(resInfo); + } break; case LayoutResourceKind::RegisterSpace: @@ -1453,6 +1630,13 @@ static RefPtr<TypeLayout> _createParameterGroupTypeLayout( for( auto elementTypeResInfo : rawElementTypeLayout->resourceInfos ) { auto kind = elementTypeResInfo.kind; + + // TODO: Added to make layout work correctly for CPU target + if(kind == LayoutResourceKind::Uniform) + { + continue; + } + auto elementVarResInfo = elementVarLayout->findOrAddResourceInfo(kind); // If the container part of things is using the same resource kind @@ -1518,7 +1702,7 @@ static RefPtr<TypeLayout> _createParameterGroupTypeLayout( // buffer. Its resource usage will only bleed through if we // didn't allocate a full `space` or `set`. // - _addUnmaskedResourceUsage(typeLayout, containerTypeLayout, wantSpaceOrSet); + _addUnmaskedResourceUsage(true, typeLayout, containerTypeLayout, wantSpaceOrSet); // next we turn to the element type, where the cases are slightly // more involved (technically we could use this same logic for @@ -1526,7 +1710,7 @@ static RefPtr<TypeLayout> _createParameterGroupTypeLayout( // just special-case the container). // - _addUnmaskedResourceUsage(typeLayout, rawElementTypeLayout, wantSpaceOrSet); + _addUnmaskedResourceUsage(false, typeLayout, rawElementTypeLayout, wantSpaceOrSet); // At this point we have handled all the complexities that // arise for a parameter group that doesn't include interface-type @@ -1684,8 +1868,8 @@ static RefPtr<TypeLayout> _createParameterGroupTypeLayout( // up the hierarchy. // RefPtr<TypeLayout> unmaskedPendingDataTypeLayout = new TypeLayout(); - _addUnmaskedResourceUsage(unmaskedPendingDataTypeLayout, pendingContainerTypeLayout, wantSpaceOrSet); - _addUnmaskedResourceUsage(unmaskedPendingDataTypeLayout, pendingElementTypeLayout, wantSpaceOrSet); + _addUnmaskedResourceUsage(true, unmaskedPendingDataTypeLayout, pendingContainerTypeLayout, wantSpaceOrSet); + _addUnmaskedResourceUsage(false, unmaskedPendingDataTypeLayout, pendingElementTypeLayout, wantSpaceOrSet); // TODO: we should probably optimize for the case where there is no unmasked // usage that needs to be reported out, since it should be a common case. @@ -1836,9 +2020,7 @@ createStructuredBufferTypeLayout( typeLayout->elementTypeLayout = elementTypeLayout; typeLayout->uniformAlignment = info.alignment; - SLANG_RELEASE_ASSERT(!typeLayout->FindResourceInfo(LayoutResourceKind::Uniform)); - SLANG_RELEASE_ASSERT(typeLayout->uniformAlignment == 1); - + if( info.size != 0 ) { typeLayout->addResourceUsage(info.kind, info.size); @@ -1859,10 +2041,8 @@ createStructuredBufferTypeLayout( RefPtr<Type> structuredBufferType, RefPtr<Type> elementType) { - // TODO(tfoley): we should be looking up the appropriate rules - // via the `LayoutRulesFamily` in use here... - auto structuredBufferLayoutRules = GetLayoutRulesImpl( - LayoutRule::HLSLStructuredBuffer); + // look up the appropriate rules via the `LayoutRulesFamily` + auto structuredBufferLayoutRules = context.getRulesFamily()->getStructuredBufferRules(); // Create and save type layout for the buffer contents. auto elementTypeLayout = createTypeLayout( diff --git a/source/slang/slang-type-layout.h b/source/slang/slang-type-layout.h index b7b3c3207..5e86be113 100644 --- a/source/slang/slang-type-layout.h +++ b/source/slang/slang-type-layout.h @@ -17,14 +17,6 @@ class Type; // -enum class LayoutRule -{ - Std140, - Std430, - HLSLConstantBuffer, - HLSLStructuredBuffer, -}; - #if 0 enum class LayoutRulesFamily { @@ -392,7 +384,7 @@ public: typedef unsigned int VarLayoutFlags; enum VarLayoutFlag : VarLayoutFlags { - HasSemantic = 1 << 1 + HasSemantic = 1 << 0 }; // A reified layout for a particular variable, field, etc. @@ -924,6 +916,8 @@ struct LayoutRulesFamilyImpl virtual LayoutRulesImpl* getHitAttributesParameterRules()= 0; virtual LayoutRulesImpl* getShaderRecordConstantBufferRules() = 0; + + virtual LayoutRulesImpl* getStructuredBufferRules() = 0; }; struct TypeLayoutContext diff --git a/tests/cross-compile/c-cross-compile.slang b/tests/cross-compile/c-cross-compile.slang index 29078f0b6..646ba3445 100644 --- a/tests/cross-compile/c-cross-compile.slang +++ b/tests/cross-compile/c-cross-compile.slang @@ -65,6 +65,9 @@ struct Thing //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):dxbinding(0),glbinding(0),out RWStructuredBuffer<int> outputBuffer; +RWStructuredBuffer<int> outputBuffer2; + + [numthreads(4, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { @@ -104,4 +107,5 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) val = test(val); outputBuffer[tid] = val + int(dot(vec2, vec4)); + outputBuffer2[tid] = int(tid); }
\ No newline at end of file diff --git a/tests/cross-compile/cpp-execute-simple.slang b/tests/cross-compile/cpp-execute-simple.slang index 74a3ec634..72c77b653 100644 --- a/tests/cross-compile/cpp-execute-simple.slang +++ b/tests/cross-compile/cpp-execute-simple.slang @@ -1,10 +1,12 @@ //TEST:CPU_EXECUTE: -profile cs_5_0 -entry computeMain -target sharedlib +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):dxbinding(0),glbinding(0),out +RWStructuredBuffer<int> outputBuffer; + + [numthreads(4, 1, 1)] void computeMain( - uint3 dispatchThreadID : SV_DispatchThreadID, -//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):dxbinding(0),glbinding(0),out - RWStructuredBuffer<int> outputBuffer) + uint3 dispatchThreadID : SV_DispatchThreadID) { uint tid = dispatchThreadID.x; diff --git a/tests/cross-compile/cpp-execute.slang b/tests/cross-compile/cpp-execute.slang index 1c90c8dd2..2700aa49f 100644 --- a/tests/cross-compile/cpp-execute.slang +++ b/tests/cross-compile/cpp-execute.slang @@ -62,11 +62,12 @@ struct Thing float b; }; +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):dxbinding(0),glbinding(0),out +RWStructuredBuffer<int> outputBuffer; + [numthreads(4, 1, 1)] void computeMain( - uint3 dispatchThreadID : SV_DispatchThreadID, -//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):dxbinding(0),glbinding(0),out - RWStructuredBuffer<int> outputBuffer) + uint3 dispatchThreadID : SV_DispatchThreadID) { uint tid = dispatchThreadID.x; diff --git a/tests/cross-compile/cpp-resource-reflection.slang b/tests/cross-compile/cpp-resource-reflection.slang new file mode 100644 index 000000000..122e7b4de --- /dev/null +++ b/tests/cross-compile/cpp-resource-reflection.slang @@ -0,0 +1,42 @@ +//TEST:REFLECTION: -profile cs_5_0 -entry computeMain -target cpp + + +struct Thing +{ + int a; + float b; + float c; +}; + +static int value; + +// Don't use parameter block for now +//ParameterBlock<AnotherThing> s_paramBlock; + +ConstantBuffer<Thing> thing3; + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):dxbinding(0),glbinding(0),out +RWStructuredBuffer<int> outputBuffer; + +Texture2D<float> tex; +SamplerState sampler; + +[numthreads(4, 1, 1)] +void computeMain( + uint3 dispatchThreadID : SV_DispatchThreadID, + uniform Thing thing, + uniform Thing thing2) +{ + uint tid = dispatchThreadID.x; + + // TODO(JS): Doesn't emit correctly on c++ becomes... + // Vector<float, 2> loc_0 = Vector<float, 2>{Vector<uint32_t, 2>{dispatchThreadID_0.x, dispatchThreadID_0.y}} * 0.50000000000000000000f; + //float2 loc = dispatchThreadID.xy * 0.5f; + + float2 loc = float2(dispatchThreadID.x * 0.5f, dispatchThreadID.y * 0.5f); + + float v = tex.Load(int3(tid, tid, 0)); + float s = tex.Sample(sampler, loc); + + outputBuffer[tid] = int(tid * tid) + thing.a + thing3.a + int(v + s) + value; // + thing.a; +}
\ No newline at end of file diff --git a/tests/cross-compile/cpp-resource-reflection.slang.expected b/tests/cross-compile/cpp-resource-reflection.slang.expected new file mode 100644 index 000000000..a0e5241f0 --- /dev/null +++ b/tests/cross-compile/cpp-resource-reflection.slang.expected @@ -0,0 +1,163 @@ +result code = 0 +standard error = { +} +standard output = { +{ + "parameters": [ + { + "name": "thing3", + "binding": {"kind": "uniform", "offset": 0, "size": 8}, + "type": { + "kind": "constantBuffer", + "elementType": { + "kind": "struct", + "name": "Thing", + "fields": [ + { + "name": "a", + "type": { + "kind": "scalar", + "scalarType": "int32" + }, + "binding": {"kind": "uniform", "offset": 0, "size": 4} + }, + { + "name": "b", + "type": { + "kind": "scalar", + "scalarType": "float32" + }, + "binding": {"kind": "uniform", "offset": 4, "size": 4} + }, + { + "name": "c", + "type": { + "kind": "scalar", + "scalarType": "float32" + }, + "binding": {"kind": "uniform", "offset": 8, "size": 4} + } + ] + } + } + }, + { + "name": "outputBuffer", + "binding": {"kind": "uniform", "offset": 8, "size": 16}, + "type": { + "kind": "resource", + "baseShape": "structuredBuffer", + "access": "readWrite", + "resultType": { + "kind": "scalar", + "scalarType": "int32" + } + } + }, + { + "name": "tex", + "binding": {"kind": "uniform", "offset": 24, "size": 8}, + "type": { + "kind": "resource", + "baseShape": "texture2D" + } + }, + { + "name": "sampler", + "binding": {"kind": "uniform", "offset": 32, "size": 8}, + "type": { + "kind": "samplerState" + } + } + ], + "entryPoints": [ + { + "name": "computeMain", + "stage:": "compute", + "parameters": [ + { + "name": "dispatchThreadID", + "semanticName": "SV_DISPATCHTHREADID", + "type": { + "kind": "vector", + "elementCount": 3, + "elementType": { + "kind": "scalar", + "scalarType": "uint32" + } + } + }, + { + "name": "thing", + "binding": {"kind": "uniform", "offset": 0, "size": 12}, + "type": { + "kind": "struct", + "name": "Thing", + "fields": [ + { + "name": "a", + "type": { + "kind": "scalar", + "scalarType": "int32" + }, + "binding": {"kind": "uniform", "offset": 0, "size": 4} + }, + { + "name": "b", + "type": { + "kind": "scalar", + "scalarType": "float32" + }, + "binding": {"kind": "uniform", "offset": 4, "size": 4} + }, + { + "name": "c", + "type": { + "kind": "scalar", + "scalarType": "float32" + }, + "binding": {"kind": "uniform", "offset": 8, "size": 4} + } + ] + } + }, + { + "name": "thing2", + "binding": {"kind": "uniform", "offset": 12, "size": 12}, + "type": { + "kind": "struct", + "name": "Thing", + "fields": [ + { + "name": "a", + "type": { + "kind": "scalar", + "scalarType": "int32" + }, + "binding": {"kind": "uniform", "offset": 0, "size": 4} + }, + { + "name": "b", + "type": { + "kind": "scalar", + "scalarType": "float32" + }, + "binding": {"kind": "uniform", "offset": 4, "size": 4} + }, + { + "name": "c", + "type": { + "kind": "scalar", + "scalarType": "float32" + }, + "binding": {"kind": "uniform", "offset": 8, "size": 4} + } + ] + } + } + ], + "threadGroupSize": [4, 1, 1] + } + ] +} +} diff --git a/tests/cross-compile/cpp-resource.slang b/tests/cross-compile/cpp-resource.slang new file mode 100644 index 000000000..f443249ad --- /dev/null +++ b/tests/cross-compile/cpp-resource.slang @@ -0,0 +1,53 @@ +//TEST:CPP_COMPILER_COMPILE: -profile cs_5_0 -entry computeMain -target cpp + +struct Thing +{ + int a; + float b; +}; + +static int value; + +// Don't use parameter block for now +//ParameterBlock<AnotherThing> s_paramBlock; + +ConstantBuffer<Thing> thing3; + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):dxbinding(0),glbinding(0),out +RWStructuredBuffer<int> outputBuffer; + +Texture2D<float> tex; +SamplerState sampler; + +void doSomething(int a, inout float3 v[2]) +{ + v[0] = float3(float(a)); + v[1] = float3(float(a + 1)); +} + +[numthreads(4, 1, 1)] +void computeMain( + uint3 dispatchThreadID : SV_DispatchThreadID, + uniform Thing thing, + uniform Thing thing2) +{ + uint tid = dispatchThreadID.x; + + int2 fromScalar = tid.x; + uint2 another = {}; + + float2 loc = dispatchThreadID.xy * 0.5f; + + float v = tex.Load(int3(tid, tid, 0)); + float s = tex.Sample(sampler, loc); + + // This should promote the 0.0 into a float2, + float l = tex.Sample(sampler, 0.0); + + float3 m = float(3).xxx; + + float3 arr[2] = { float3(3), float3(4) }; + doSomething(int(tid), arr); + + outputBuffer[tid] = int(tid * tid) + thing.a + thing3.a + int(v + s) + value + fromScalar.y + int(another.y) + int(m.x) + int(l) + int(arr[0].y); // + thing.a; +}
\ No newline at end of file diff --git a/tests/cross-compile/slang-cpp-prelude.h b/tests/cross-compile/slang-cpp-prelude.h index f2635eb2c..612e04a0b 100644 --- a/tests/cross-compile/slang-cpp-prelude.h +++ b/tests/cross-compile/slang-cpp-prelude.h @@ -1,17 +1,442 @@ #ifndef SLANG_CPP_PRELUDE_H #define SLANG_CPP_PRELUDE_H +/* --------------- START From slang.h ----------------- */ + +#ifndef SLANG_COMPILER +# define SLANG_COMPILER + +/* +Compiler defines, see http://sourceforge.net/p/predef/wiki/Compilers/ +NOTE that SLANG_VC holds the compiler version - not just 1 or 0 +*/ +# if defined(_MSC_VER) +# if _MSC_VER >= 1900 +# define SLANG_VC 14 +# elif _MSC_VER >= 1800 +# define SLANG_VC 12 +# elif _MSC_VER >= 1700 +# define SLANG_VC 11 +# elif _MSC_VER >= 1600 +# define SLANG_VC 10 +# elif _MSC_VER >= 1500 +# define SLANG_VC 9 +# else +# error "unknown version of Visual C++ compiler" +# endif +# elif defined(__clang__) +# define SLANG_CLANG 1 +# elif defined(__SNC__) +# define SLANG_SNC 1 +# elif defined(__ghs__) +# define SLANG_GHS 1 +# elif defined(__GNUC__) /* note: __clang__, __SNC__, or __ghs__ imply __GNUC__ */ +# define SLANG_GCC 1 +# else +# error "unknown compiler" +# endif +/* +Any compilers not detected by the above logic are now now explicitly zeroed out. +*/ +# ifndef SLANG_VC +# define SLANG_VC 0 +# endif +# ifndef SLANG_CLANG +# define SLANG_CLANG 0 +# endif +# ifndef SLANG_SNC +# define SLANG_SNC 0 +# endif +# ifndef SLANG_GHS +# define SLANG_GHS 0 +# endif +# ifndef SLANG_GCC +# define SLANG_GCC 0 +# endif +#endif /* SLANG_COMPILER */ + +/* +The following section attempts to detect the target platform being compiled for. + +If an application defines `SLANG_PLATFORM` before including this header, +they take responsibility for setting any compiler-dependent macros +used later in the file. + +Most applications should not need to touch this section. +*/ +#ifndef SLANG_PLATFORM +# define SLANG_PLATFORM +/** +Operating system defines, see http://sourceforge.net/p/predef/wiki/OperatingSystems/ +*/ +# if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_PARTITION_APP +# define SLANG_WINRT 1 /* Windows Runtime, either on Windows RT or Windows 8 */ +# elif defined(XBOXONE) +# define SLANG_XBOXONE 1 +# elif defined(_WIN64) /* note: XBOXONE implies _WIN64 */ +# define SLANG_WIN64 1 +# elif defined(_M_PPC) +# define SLANG_X360 1 +# elif defined(_WIN32) /* note: _M_PPC implies _WIN32 */ +# define SLANG_WIN32 1 +# elif defined(__ANDROID__) +# define SLANG_ANDROID 1 +# elif defined(__linux__) || defined(__CYGWIN__) /* note: __ANDROID__ implies __linux__ */ +# define SLANG_LINUX 1 +# elif defined(__APPLE__) && (defined(__arm__) || defined(__arm64__)) +# define SLANG_IOS 1 +# elif defined(__APPLE__) +# define SLANG_OSX 1 +# elif defined(__CELLOS_LV2__) +# define SLANG_PS3 1 +# elif defined(__ORBIS__) +# define SLANG_PS4 1 +# elif defined(__SNC__) && defined(__arm__) +# define SLANG_PSP2 1 +# elif defined(__ghs__) +# define SLANG_WIIU 1 +# else +# error "unknown target platform" +# endif +/* +Any platforms not detected by the above logic are now now explicitly zeroed out. +*/ +# ifndef SLANG_WINRT +# define SLANG_WINRT 0 +# endif +# ifndef SLANG_XBOXONE +# define SLANG_XBOXONE 0 +# endif +# ifndef SLANG_WIN64 +# define SLANG_WIN64 0 +# endif +# ifndef SLANG_X360 +# define SLANG_X360 0 +# endif +# ifndef SLANG_WIN32 +# define SLANG_WIN32 0 +# endif +# ifndef SLANG_ANDROID +# define SLANG_ANDROID 0 +# endif +# ifndef SLANG_LINUX +# define SLANG_LINUX 0 +# endif +# ifndef SLANG_IOS +# define SLANG_IOS 0 +# endif +# ifndef SLANG_OSX +# define SLANG_OSX 0 +# endif +# ifndef SLANG_PS3 +# define SLANG_PS3 0 +# endif +# ifndef SLANG_PS4 +# define SLANG_PS4 0 +# endif +# ifndef SLANG_PSP2 +# define SLANG_PSP2 0 +# endif +# ifndef SLANG_WIIU +# define SLANG_WIIU 0 +# endif +#endif /* SLANG_PLATFORM */ + +/* Shorthands for "families" of compilers/platforms */ +#define SLANG_GCC_FAMILY (SLANG_CLANG || SLANG_SNC || SLANG_GHS || SLANG_GCC) +#define SLANG_WINDOWS_FAMILY (SLANG_WINRT || SLANG_WIN32 || SLANG_WIN64) +#define SLANG_MICROSOFT_FAMILY (SLANG_XBOXONE || SLANG_X360 || SLANG_WINDOWS_FAMILY) +#define SLANG_LINUX_FAMILY (SLANG_LINUX || SLANG_ANDROID) +#define SLANG_APPLE_FAMILY (SLANG_IOS || SLANG_OSX) /* equivalent to #if __APPLE__ */ +#define SLANG_UNIX_FAMILY (SLANG_LINUX_FAMILY || SLANG_APPLE_FAMILY) /* shortcut for unix/posix platforms */ + +/* Macro for declaring if a method is no throw. Should be set before the return parameter. */ +#ifndef SLANG_NO_THROW +# if SLANG_WINDOWS_FAMILY && !defined(SLANG_DISABLE_EXCEPTIONS) +# define SLANG_NO_THROW __declspec(nothrow) +# endif +#endif +#ifndef SLANG_NO_THROW +# define SLANG_NO_THROW +#endif + +/* The `SLANG_STDCALL` and `SLANG_MCALL` defines are used to set the calling +convention for interface methods. +*/ +#ifndef SLANG_STDCALL +# if SLANG_MICROSOFT_FAMILY +# define SLANG_STDCALL __stdcall +# else +# define SLANG_STDCALL +# endif +#endif +#ifndef SLANG_MCALL +# define SLANG_MCALL SLANG_STDCALL +#endif + + +#if !defined(SLANG_STATIC) && !defined(SLANG_STATIC) + #define SLANG_DYNAMIC +#endif + +#if defined(_MSC_VER) +# define SLANG_DLL_EXPORT __declspec(dllexport) +#else +# define SLANG_DLL_EXPORT __attribute__((__visibility__("default"))) +#endif + +#if defined(SLANG_DYNAMIC) +# if defined(_MSC_VER) +# ifdef SLANG_DYNAMIC_EXPORT +# define SLANG_API SLANG_DLL_EXPORT +# else +# define SLANG_API __declspec(dllimport) +# endif +# else + // TODO: need to consider compiler capabilities +//# ifdef SLANG_DYNAMIC_EXPORT +# define SLANG_API SLANG_DLL_EXPORT +//# endif +# endif +#endif + +#ifndef SLANG_API +# define SLANG_API +#endif + +// GCC Specific +#if SLANG_GCC_FAMILY +// This doesn't work on clang - because the typedef is seen as multiply defined, use the line numbered version defined later +# if !defined(__clang__) && (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) || defined(__ORBIS__)) +# define SLANG_COMPILE_TIME_ASSERT(exp) typedef char SlangCompileTimeAssert_Dummy[(exp) ? 1 : -1] __attribute__((unused)) +# endif + +# define SLANG_NO_INLINE __attribute__((noinline)) +# define SLANG_FORCE_INLINE inline __attribute__((always_inline)) +# define SLANG_BREAKPOINT(id) __builtin_trap(); +# define SLANG_ALIGN_OF(T) __alignof__(T) + +// Use this macro instead of offsetof, because gcc produces warning if offsetof is used on a +// non POD type, even though it produces the correct result +# define SLANG_OFFSET_OF(T, ELEMENT) (size_t(&((T*)1)->ELEMENT) - 1) +#endif // SLANG_GCC_FAMILY + +// Microsoft VC specific +#if SLANG_MICROSOFT_FAMILY +# define SLANG_NO_INLINE __declspec(noinline) +# define SLANG_FORCE_INLINE __forceinline +# define SLANG_BREAKPOINT(id) __debugbreak(); +# define SLANG_ALIGN_OF(T) __alignof(T) + +# define SLANG_INT64(x) (x##i64) +# define SLANG_UINT64(x) (x##ui64) +#endif // SLANG_MICROSOFT_FAMILY + +#ifndef SLANG_FORCE_INLINE +# define SLANG_FORCE_INLINE inline +#endif +#ifndef SLANG_NO_INLINE +# define SLANG_NO_INLINE +#endif + +#ifndef SLANG_COMPILE_TIME_ASSERT +# define SLANG_COMPILE_TIME_ASSERT(exp) typedef char SLANG_CONCAT(SlangCompileTimeAssert,__LINE__)[(exp) ? 1 : -1] +#endif + +#ifndef SLANG_OFFSET_OF +# define SLANG_OFFSET_OF(X, Y) offsetof(X, Y) +#endif + +#ifndef SLANG_BREAKPOINT +// Make it crash with a write to 0! +# define SLANG_BREAKPOINT(id) (*((int*)0) = int(id)); +#endif + +// Use for getting the amount of members of a standard C array. +#define SLANG_COUNT_OF(x) (sizeof(x)/sizeof(x[0])) +/// SLANG_INLINE exists to have a way to inline consistent with SLANG_ALWAYS_INLINE +#define SLANG_INLINE inline + +// Other defines +#define SLANG_STRINGIZE_HELPER(X) #X +#define SLANG_STRINGIZE(X) SLANG_STRINGIZE_HELPER(X) + +#define SLANG_CONCAT_HELPER(X, Y) X##Y +#define SLANG_CONCAT(X, Y) SLANG_CONCAT_HELPER(X, Y) + +#ifndef SLANG_UNUSED +# define SLANG_UNUSED(v) (void)v; +#endif + +// Used for doing constant literals +#ifndef SLANG_INT64 +# define SLANG_INT64(x) (x##ll) +#endif +#ifndef SLANG_UINT64 +# define SLANG_UINT64(x) (x##ull) +#endif + + +#ifdef __cplusplus +# define SLANG_EXTERN_C extern "C" +#else +# define SLANG_EXTERN_C +#endif + +#ifdef __cplusplus +// C++ specific macros +// Clang +#if SLANG_CLANG +# if (__clang_major__*10 + __clang_minor__) >= 33 +# define SLANG_HAS_MOVE_SEMANTICS 1 +# define SLANG_HAS_ENUM_CLASS 1 +# define SLANG_OVERRIDE override +# endif +// Gcc +#elif SLANG_GCC_FAMILY +// Check for C++11 +# if (__cplusplus >= 201103L) +# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 +# define SLANG_HAS_MOVE_SEMANTICS 1 +# endif +# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 +# define SLANG_HAS_ENUM_CLASS 1 +# endif +# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 +# define SLANG_OVERRIDE override +# endif +# endif +# endif // SLANG_GCC_FAMILY + +// Visual Studio + +# if SLANG_VC +// C4481: nonstandard extension used: override specifier 'override' +# if _MSC_VER < 1700 +# pragma warning(disable : 4481) +# endif +# define SLANG_OVERRIDE override +# if _MSC_VER >= 1600 +# define SLANG_HAS_MOVE_SEMANTICS 1 +# endif +# if _MSC_VER >= 1700 +# define SLANG_HAS_ENUM_CLASS 1 +# endif + +# endif // SLANG_VC + +// Set non set +# ifndef SLANG_OVERRIDE +# define SLANG_OVERRIDE +# endif +# ifndef SLANG_HAS_ENUM_CLASS +# define SLANG_HAS_ENUM_CLASS 0 +# endif +# ifndef SLANG_HAS_MOVE_SEMANTICS +# define SLANG_HAS_MOVE_SEMANTICS 0 +# endif + +#endif // __cplusplus + +/* Macros for detecting processor */ +#if defined(_M_ARM) || defined(__ARM_EABI__) +// This is special case for nVidia tegra +# define SLANG_PROCESSOR_ARM 1 +#elif defined(__i386__) || defined(_M_IX86) +# define SLANG_PROCESSOR_X86 1 +#elif defined(_M_AMD64) || defined(_M_X64) || defined(__amd64) || defined(__x86_64) +# define SLANG_PROCESSOR_X86_64 1 +#elif defined(_PPC_) || defined(__ppc__) || defined(__POWERPC__) || defined(_M_PPC) +# if defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) +# define SLANG_PROCESSOR_POWER_PC_64 1 +# else +# define SLANG_PROCESSOR_POWER_PC 1 +# endif +#elif defined(__arm__) +# define SLANG_PROCESSOR_ARM 1 +#elif defined(__aarch64__) +# define SLANG_PROCESSOR_ARM_64 1 +#endif + +#ifndef SLANG_PROCESSOR_ARM +# define SLANG_PROCESSOR_ARM 0 +#endif + +#ifndef SLANG_PROCESSOR_ARM_64 +# define SLANG_PROCESSOR_ARM_64 0 +#endif + +#ifndef SLANG_PROCESSOR_X86 +# define SLANG_PROCESSOR_X86 0 +#endif + +#ifndef SLANG_PROCESSOR_X86_64 +# define SLANG_PROCESSOR_X86_64 0 +#endif + +#ifndef SLANG_PROCESSOR_POWER_PC +# define SLANG_PROCESSOR_POWER_PC 0 +#endif + +#ifndef SLANG_PROCESSOR_POWER_PC_64 +# define SLANG_PROCESSOR_POWER_PC_64 0 +#endif + +// Processor families + +#define SLANG_PROCESSOR_FAMILY_X86 (SLANG_PROCESSOR_X86_64 | SLANG_PROCESSOR_X86) +#define SLANG_PROCESSOR_FAMILY_ARM (SLANG_PROCESSOR_ARM | SLANG_PROCESSOR_ARM_64) +#define SLANG_PROCESSOR_FAMILY_POWER_PC (SLANG_PROCESSOR_POWER_PC_64 | SLANG_PROCESSOR_POWER_PC) + +// Pointer size +#define SLANG_PTR_IS_64 (SLANG_PROCESSOR_ARM_64 | SLANG_PROCESSOR_X86_64 | SLANG_PROCESSOR_POWER_PC_64) +#define SLANG_PTR_IS_32 (SLANG_PTR_IS_64 ^ 1) + +// Processor features +#if SLANG_PROCESSOR_FAMILY_X86 +# define SLANG_LITTLE_ENDIAN 1 +# define SLANG_UNALIGNED_ACCESS 1 +#elif SLANG_PROCESSOR_FAMILY_ARM +# if defined(__ARMEB__) +# define SLANG_BIG_ENDIAN 1 +# else +# define SLANG_LITTLE_ENDIAN 1 +# endif +#elif SLANG_PROCESSOR_FAMILY_POWER_PC +# define SLANG_BIG_ENDIAN 1 +#endif + +#ifndef SLANG_LITTLE_ENDIAN +# define SLANG_LITTLE_ENDIAN 0 +#endif + +#ifndef SLANG_BIG_ENDIAN +# define SLANG_BIG_ENDIAN 0 +#endif + +#ifndef SLANG_UNALIGNED_ACCESS +# define SLANG_UNALIGNED_ACCESS 0 +#endif + +// One endianess must be set +#if ((SLANG_BIG_ENDIAN | SLANG_LITTLE_ENDIAN) == 0) +# error "Couldn't determine endianess" +#endif + +#ifndef SLANG_NO_INTTYPES #include <inttypes.h> -#include <math.h> -#include <inttypes.h> +#endif // ! SLANG_NO_INTTYPES + +#ifndef SLANG_NO_STDDEF +#include <stddef.h> +#endif // ! SLANG_NO_STDDEF + +/* --------------- END From slang.h ----------------- */ + #include <math.h> #include <assert.h> #include <stdlib.h> -#ifndef SLANG_FORCE_INLINE -# define SLANG_FORCE_INLINE inline -#endif - #ifndef SLANG_PRELUDE_PI # define SLANG_PRELUDE_PI 3.14159265358979323846 #endif @@ -35,6 +460,7 @@ namespace SLANG_PRELUDE_NAMESPACE { #endif + template <typename T, size_t SIZE> struct FixedArray { @@ -44,14 +470,12 @@ struct FixedArray T m_data[SIZE]; }; -template <typename T> -struct RWStructuredBuffer + +// Hmm... I guess a constant buffer should be unwrapped to be just a struct passed in +/* template <typename T> +struct ConstantBuffer { - T& operator[](size_t index) const { return data[index]; } - - T* data; - size_t count; -}; +}; */ template <typename T, int COUNT> struct Vector; @@ -80,12 +504,187 @@ struct Vector<T, 4> T x, y, z, w; }; + +typedef Vector<float, 2> float2; +typedef Vector<float, 3> float3; +typedef Vector<float, 4> float4; + +typedef Vector<int32_t, 2> int2; +typedef Vector<int32_t, 3> int3; +typedef Vector<int32_t, 4> int4; + +typedef Vector<uint32_t, 2> uint2; +typedef Vector<uint32_t, 3> uint3; +typedef Vector<uint32_t, 4> uint4; + template <typename T, int ROWS, int COLS> struct Matrix { Vector<T, COLS> rows[ROWS]; }; +// ----------------------------- ResourceType ----------------------------------------- + +// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-structuredbuffer-getdimensions +// Missing Load(_In_ int Location, _Out_ uint Status); + +template <typename T> +struct RWStructuredBuffer +{ + T& operator[](size_t index) const { assert(index < count); return data[index]; } + const T& Load(size_t index) const { assert(index < count); return data[index]; } + void GetDimensions(uint32_t& outNumStructs, uint32_t& outStride) { outNumStructs = uint32_t(count); outStride = uint32_t(sizeof(T)); } + + T* data; + size_t count; +}; + +template <typename T> +struct StructuredBuffer +{ + const T& operator[](size_t index) const { assert(index < count); return data[index]; } + const T& Load(size_t index) const { assert(index < count); return data[index]; } + void GetDimensions(uint32_t& outNumStructs, uint32_t& outStride) { outNumStructs = uint32_t(count); outStride = uint32_t(sizeof(T)); } + + T* data; + size_t count; +}; + +// Missing Load(_In_ int Location, _Out_ uint Status); +struct ByteAddressBuffer +{ + void GetDimensions(uint32_t& outDim) const { outDim = uint32_t(sizeInBytes); } + uint32_t Load(size_t index) const + { + assert(index + 4 <= sizeInBytes && (index & 3) == 0); + return data[index >> 2]; + } + uint2 Load2(size_t index) const + { + assert(index + 8 <= sizeInBytes && (index & 3) == 0); + const size_t dataIdx = index >> 2; + return uint2{data[dataIdx], data[dataIdx + 1]}; + } + uint3 Load3(size_t index) const + { + assert(index + 12 <= sizeInBytes && (index & 3) == 0); + const size_t dataIdx = index >> 2; + return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]}; + } + uint4 Load4(size_t index) const + { + assert(index + 16 <= sizeInBytes && (index & 3) == 0); + const size_t dataIdx = index >> 2; + return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]}; + } + + const uint32_t* data; + size_t sizeInBytes; //< Must be multiple of 4 +}; + +// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-rwbyteaddressbuffer +// Missing support for Atomic operations +// Missing support for Load with status +struct RWByteAddressBuffer +{ + void GetDimensions(uint32_t& outDim) const { outDim = uint32_t(sizeInBytes); } + + uint32_t Load(size_t index) const + { + assert(index + 4 <= sizeInBytes && (index & 3) == 0); + return data[index >> 2]; + } + uint2 Load2(size_t index) const + { + assert(index + 8 <= sizeInBytes && (index & 3) == 0); + const size_t dataIdx = index >> 2; + return uint2{data[dataIdx], data[dataIdx + 1]}; + } + uint3 Load3(size_t index) const + { + assert(index + 12 <= sizeInBytes && (index & 3) == 0); + const size_t dataIdx = index >> 2; + return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]}; + } + uint4 Load4(size_t index) const + { + assert(index + 16 <= sizeInBytes && (index & 3) == 0); + const size_t dataIdx = index >> 2; + return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]}; + } + + void Store(size_t index, uint32_t v) const + { + assert(index + 4 <= sizeInBytes && (index & 3) == 0); + data[index >> 2] = v; + } + void Store2(size_t index, uint2 v) const + { + assert(index + 8 <= sizeInBytes && (index & 3) == 0); + const size_t dataIdx = index >> 2; + data[dataIdx + 0] = v.x; + data[dataIdx + 1] = v.y; + } + void Store3(size_t index, uint3 v) const + { + assert(index + 12 <= sizeInBytes && (index & 3) == 0); + const size_t dataIdx = index >> 2; + data[dataIdx + 0] = v.x; + data[dataIdx + 1] = v.y; + data[dataIdx + 2] = v.z; + } + void Store4(size_t index, uint4 v) const + { + assert(index + 16 <= sizeInBytes && (index & 3) == 0); + const size_t dataIdx = index >> 2; + data[dataIdx + 0] = v.x; + data[dataIdx + 1] = v.y; + data[dataIdx + 2] = v.z; + data[dataIdx + 3] = v.w; + } + + uint32_t* data; + size_t sizeInBytes; //< Must be multiple of 4 +}; + +struct ISamplerState; +struct ISamplerComparisonState; + +struct SamplerState +{ + ISamplerState* state; +}; + +struct SamplerComparisonState +{ + ISamplerComparisonState* state; +}; + +// Texture + +struct ITexture2D +{ + virtual void Load(const int3& v, void* out) = 0; + virtual void Sample(SamplerState samplerState, const float2& loc, void* out) = 0; +}; + +template <typename T> +struct Texture2D +{ + T Load(const int3& v) const { T out; texture->Load(v, &out); return out; } + T Sample(SamplerState samplerState, const float2& v) const { T out; texture->Sample(samplerState, v, &out); return out; } + + ITexture2D* texture; +}; + +/* Varing input for Compute */ + +struct ComputeVaryingInput +{ + uint3 groupID; + uint3 groupThreadID; +}; + // ----------------------------- F32 ----------------------------------------- union Union32 diff --git a/tools/slang-test/slang-test-main.cpp b/tools/slang-test/slang-test-main.cpp index 5e7a3d6f3..3235cc785 100644 --- a/tools/slang-test/slang-test-main.cpp +++ b/tools/slang-test/slang-test-main.cpp @@ -1130,7 +1130,7 @@ TestResult runCPUExecuteTest(TestContext* context, TestInput& input) StringBuilder actualOutput; - // TODO(JS): For moment just assume function name/data/paramters + // TODO(JS): For moment just assume function name/data/parameters { SharedLibrary::FuncPtr func = SharedLibrary::findFuncByName(sharedLibrary, "computeMain"); if (!func) @@ -1139,22 +1139,32 @@ TestResult runCPUExecuteTest(TestContext* context, TestInput& input) return TestResult::Fail; } - typedef void (*Func)(CPPPrelude::Vector<uint32_t,3> threadID, CPPPrelude::RWStructuredBuffer<int32_t> buffer); + + struct UniformState + { + CPPPrelude::RWStructuredBuffer<int> buffer; + }; + + typedef void (*Func)(CPPPrelude::ComputeVaryingInput* varyingInput, UniformState* uniformState); Func runFunc = Func(func); int32_t data[4] = { 0, 0, 0, 0}; - CPPPrelude::RWStructuredBuffer<int32_t> buffer{data, 4}; + UniformState state; + + state.buffer = CPPPrelude::RWStructuredBuffer<int32_t>{data, 4}; + + CPPPrelude::ComputeVaryingInput varyingInput = {}; for (Int i = 0; i < 4; ++i) { - CPPPrelude::Vector<uint32_t, 3> threadID{ uint32_t(i), 0, 0}; - runFunc(threadID, buffer); + varyingInput.groupThreadID.x = uint32_t(i); + runFunc(&varyingInput, &state); } SharedLibrary::unload(sharedLibrary); // Write the data - _writeBuffer(buffer, actualOutput); + _writeBuffer(state.buffer, actualOutput); } String expectedOutputPath = outputStem + ".expected"; @@ -1314,13 +1324,12 @@ static TestResult runCPPCompilerCompile(TestContext* context, TestInput& input) // need to execute the stand-alone Slang compiler on the file, and compare its output to what we expect - auto filePath999 = input.filePath; auto outputStem = input.outputStem; CommandLine cmdLine; _initSlangCompiler(context, cmdLine); - cmdLine.addArg(filePath999); + cmdLine.addArg(input.filePath); for (auto arg : input.testOptions->args) { |
