diff options
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/slang-ast-dump.cpp | 41 | ||||
| -rw-r--r-- | source/slang/slang-ast-dump.h | 12 | ||||
| -rw-r--r-- | source/slang/slang-ast-serialize.cpp | 122 | ||||
| -rw-r--r-- | source/slang/slang-ast-serialize.h | 184 | ||||
| -rw-r--r-- | source/slang/slang.cpp | 16 |
5 files changed, 340 insertions, 35 deletions
diff --git a/source/slang/slang-ast-dump.cpp b/source/slang/slang-ast-dump.cpp index 1ae51615f..16b264f9b 100644 --- a/source/slang/slang-ast-dump.cpp +++ b/source/slang/slang-ast-dump.cpp @@ -10,9 +10,8 @@ namespace Slang { -namespace { // anonymous -struct Context +struct ASTDumpContext { struct ObjectInfo { @@ -23,7 +22,7 @@ struct Context struct ScopeWrite { - ScopeWrite(Context* context): + ScopeWrite(ASTDumpContext* context): m_context(context) { if (m_context->m_scopeWriteCount == 0) @@ -45,7 +44,7 @@ struct Context operator StringBuilder&() { return m_context->m_buf; } - Context* m_context; + ASTDumpContext* m_context; }; void dumpObject(const ReflectClassInfo& type, NodeBase* obj); @@ -138,6 +137,11 @@ struct Context void dump(const Scope* scope) { + if (m_dumpFlags & ASTDumpUtil::Flag::HideScope) + { + return; + } + if (scope == nullptr) { _dumpPtr(nullptr); @@ -212,6 +216,11 @@ struct Context void dump(SourceLoc sourceLoc) { + if (m_dumpFlags & ASTDumpUtil::Flag::HideSourceLoc) + { + return; + } + SourceManager* manager = m_writer->getSourceManager(); { @@ -562,13 +571,15 @@ struct Context void dumpObjectFull(NodeBase* node); - Context(SourceWriter* writer, ASTDumpUtil::Style dumpStyle): + ASTDumpContext(SourceWriter* writer, ASTDumpUtil::Flags flags, ASTDumpUtil::Style dumpStyle): m_writer(writer), m_scopeWriteCount(0), - m_dumpStyle(dumpStyle) + m_dumpStyle(dumpStyle), + m_dumpFlags(flags) { } + ASTDumpUtil::Flags m_dumpFlags; ASTDumpUtil::Style m_dumpStyle; Index m_scopeWriteCount; @@ -582,8 +593,6 @@ struct Context StringBuilder m_buf; }; -} // anonymous - // Lets generate functions one for each that attempts to write out *it's* fields. // We can write out the Super types fields by looking that up @@ -592,7 +601,7 @@ struct ASTDumpAccess #define SLANG_AST_DUMP_FIELD(FIELD_NAME, TYPE, param) context.dumpField(#FIELD_NAME, node->FIELD_NAME); #define SLANG_AST_DUMP_FIELDS_IMPL(NAME, SUPER, ORIGIN, LAST, MARKER, TYPE, param) \ -static void dumpFields_##NAME(NAME* node, Context& context) \ +static void dumpFields_##NAME(NAME* node, ASTDumpContext& context) \ { \ SLANG_UNUSED(node); \ SLANG_UNUSED(context); \ @@ -605,7 +614,7 @@ SLANG_ALL_ASTNode_NodeBase(SLANG_AST_DUMP_FIELDS_IMPL, _) #define SLANG_AST_GET_DUMP_FUNC(NAME, SUPER, ORIGIN, LAST, MARKER, TYPE, param) m_funcs[Index(ASTNodeType::NAME)] = (DumpFieldsFunc)&ASTDumpAccess::dumpFields_##NAME; -typedef void (*DumpFieldsFunc)(NodeBase* obj, Context& context); +typedef void (*DumpFieldsFunc)(NodeBase* obj, ASTDumpContext& context); struct DumpFieldFuncs { @@ -620,13 +629,13 @@ struct DumpFieldFuncs static const DumpFieldFuncs s_funcs; -void Context::dumpObjectReference(const ReflectClassInfo& type, NodeBase* obj, Index objIndex) +void ASTDumpContext::dumpObjectReference(const ReflectClassInfo& type, NodeBase* obj, Index objIndex) { SLANG_UNUSED(obj); ScopeWrite(this).getBuf() << type.m_name << ":" << objIndex; } -void Context::dumpObjectFull(const ReflectClassInfo& type, NodeBase* obj, Index objIndex) +void ASTDumpContext::dumpObjectFull(const ReflectClassInfo& type, NodeBase* obj, Index objIndex) { ObjectInfo& info = m_objects[objIndex]; SLANG_ASSERT(info.m_isDumped == false); @@ -662,7 +671,7 @@ void Context::dumpObjectFull(const ReflectClassInfo& type, NodeBase* obj, Index m_writer->emit("}\n"); } -void Context::dumpObject(const ReflectClassInfo& typeInfo, NodeBase* obj) +void ASTDumpContext::dumpObject(const ReflectClassInfo& typeInfo, NodeBase* obj) { Index index = getObjectIndex(typeInfo, obj); @@ -677,7 +686,7 @@ void Context::dumpObject(const ReflectClassInfo& typeInfo, NodeBase* obj) } } -void Context::dumpObjectFull(NodeBase* node) +void ASTDumpContext::dumpObjectFull(NodeBase* node) { if (!node) { @@ -691,9 +700,9 @@ void Context::dumpObjectFull(NodeBase* node) } } -/* static */void ASTDumpUtil::dump(NodeBase* node, Style style, SourceWriter* writer) +/* static */void ASTDumpUtil::dump(NodeBase* node, Style style, Flags flags, SourceWriter* writer) { - Context context(writer, style); + ASTDumpContext context(writer, flags, style); context.dumpObjectFull(node); context.dumpRemaining(); } diff --git a/source/slang/slang-ast-dump.h b/source/slang/slang-ast-dump.h index 0a83479b1..7a2b30c3e 100644 --- a/source/slang/slang-ast-dump.h +++ b/source/slang/slang-ast-dump.h @@ -19,7 +19,17 @@ struct ASTDumpUtil Flat, }; - static void dump(NodeBase* node, Style style, SourceWriter* writer); + typedef uint32_t Flags; + struct Flag + { + enum Enum : Flags + { + HideSourceLoc = 0x1, + HideScope = 0x2, + }; + }; + + static void dump(NodeBase* node, Style style, Flags flags, SourceWriter* writer); }; } // namespace Slang diff --git a/source/slang/slang-ast-serialize.cpp b/source/slang/slang-ast-serialize.cpp index f8364e777..bc58ebebf 100644 --- a/source/slang/slang-ast-serialize.cpp +++ b/source/slang/slang-ast-serialize.cpp @@ -7,6 +7,8 @@ #include "slang-compiler.h" #include "slang-type-layout.h" +#include "slang-ast-dump.h" + #include "slang-ast-support-types.h" #include "../core/slang-byte-encode-util.h" @@ -220,6 +222,24 @@ struct ASTSerialTypeInfo<T*> } }; +// Special case Name +template <> +struct ASTSerialTypeInfo<Name*> : public ASTSerialTypeInfo<RefObject*> +{ + // Special case + typedef Name* NativeType; + static void toNative(ASTSerialReader* reader, const void* inSerial, void* outNative) + { + *(Name**)outNative = reader->getName(*(const SerialType*)inSerial); + } +}; + +template <> +struct ASTSerialTypeInfo<const Name*> : public ASTSerialTypeInfo<Name*> +{ +}; + + struct ASTSerialDeclRefBaseTypeInfo { typedef DeclRefBase NativeType; @@ -651,7 +671,7 @@ struct ASTSerialTypeInfo<TypeExp> auto& dst = *(NativeType*)native; dst.type = reader->getPointer(src.type).dynamicCast<Type>(); - dst.exp = reader->getPointer(src.type).dynamicCast<Expr>(); + dst.exp = reader->getPointer(src.expr).dynamicCast<Expr>(); } }; @@ -779,7 +799,15 @@ struct ASTSerialTypeInfo<Token> ASTSerialTypeInfo<TokenType>::toSerial(writer, &src.type, &dst.type); ASTSerialTypeInfo<SourceLoc>::toSerial(writer, &src.loc, &dst.loc); - dst.name = writer->addName(src.getName()); + + if (src.flags & TokenFlag::Name) + { + dst.name = writer->addName(src.getName()); + } + else + { + dst.name = writer->addString(src.getContent()); + } } static void toNative(ASTSerialReader* reader, const void* serial, void* native) { @@ -792,6 +820,7 @@ struct ASTSerialTypeInfo<Token> ASTSerialTypeInfo<TokenType>::toNative(reader, &src.type, &dst.type); ASTSerialTypeInfo<SourceLoc>::toNative(reader, &src.loc, &dst.loc); + // At the other end all token content will appear as Names. if (src.name != ASTSerialIndex(0)) { dst.charsNameUnion.name = reader->getName(src.name); @@ -1682,7 +1711,7 @@ SlangResult ASTSerialReader::load(const uint8_t* data, size_t dataCount, ASTBuil // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ASTSerializeUtil !!!!!!!!!!!!!!!!!!!!!!!!!!!! -/* static */SlangResult ASTSerializeUtil::selfTest() +/* static */SlangResult ASTSerialTestUtil::selfTest() { RefPtr<ASTSerialClasses> classes = new ASTSerialClasses; @@ -1720,5 +1749,92 @@ SlangResult ASTSerialReader::load(const uint8_t* data, size_t dataCount, ASTBuil return SLANG_OK; } +/* static */SlangResult ASTSerialTestUtil::testSerialize(NodeBase* node, RootNamePool* rootNamePool, SharedASTBuilder* sharedASTBuilder, SourceManager* sourceManager) +{ + RefPtr<ASTSerialClasses> classes = new ASTSerialClasses; + + List<uint8_t> contents; + + { + OwnedMemoryStream stream(FileAccess::ReadWrite); + + ASTSerialWriter writer(classes); + + // Lets serialize it all + writer.addPointer(node); + // Let's stick it all in a stream + writer.write(&stream); + + stream.swapContents(contents); + + NamePool namePool; + namePool.setRootNamePool(rootNamePool); + + ASTSerialReader reader(classes); + + ASTBuilder builder(sharedASTBuilder, "Serialize Check"); + + // We could now check that the loaded data matches + + { + const List<ASTSerialInfo::Entry*>& writtenEntries = writer.getEntries(); + List<const ASTSerialInfo::Entry*> readEntries; + + SlangResult res = reader.loadEntries(contents.getBuffer(), contents.getCount(), readEntries); + SLANG_UNUSED(res); + + SLANG_ASSERT(writtenEntries.getCount() == readEntries.getCount()); + + // They should be identical up to the + for (Index i = 1; i < readEntries.getCount(); ++i) + { + auto writtenEntry = writtenEntries[i]; + auto readEntry = readEntries[i]; + + const size_t writtenSize = writtenEntry->calcSize(classes); + const size_t readSize = readEntry->calcSize(classes); + + SLANG_ASSERT(readSize == writtenSize); + // Check the payload is the same + SLANG_ASSERT(memcmp(readEntry, writtenEntry, readSize) == 0); + } + + } + + { + SlangResult res = reader.load(contents.getBuffer(), contents.getCount(), &builder, &namePool); + SLANG_UNUSED(res); + } + + // Lets see what we have + const ASTDumpUtil::Flags dumpFlags = ASTDumpUtil::Flag::HideSourceLoc | ASTDumpUtil::Flag::HideScope; + + String readDump; + { + SourceWriter sourceWriter(sourceManager, LineDirectiveMode::None); + ASTDumpUtil::dump(reader.getPointer(ASTSerialIndex(1)).dynamicCast<NodeBase>(), ASTDumpUtil::Style::Hierachical, dumpFlags, &sourceWriter); + readDump = sourceWriter.getContentAndClear(); + + } + String origDump; + { + SourceWriter sourceWriter(sourceManager, LineDirectiveMode::None); + ASTDumpUtil::dump(node, ASTDumpUtil::Style::Hierachical, dumpFlags, &sourceWriter); + origDump = sourceWriter.getContentAndClear(); + } + + // Write out + File::writeAllText("ast-read.ast-dump", readDump); + File::writeAllText("ast-orig.ast-dump", origDump); + + if (readDump != origDump) + { + return SLANG_FAIL; + } + } + + return SLANG_OK; +} + } // namespace Slang diff --git a/source/slang/slang-ast-serialize.h b/source/slang/slang-ast-serialize.h index 453102835..c2086f434 100644 --- a/source/slang/slang-ast-serialize.h +++ b/source/slang/slang-ast-serialize.h @@ -7,6 +7,8 @@ #include "slang-ast-support-types.h" #include "slang-ast-all.h" +#include "slang-ast-builder.h" + #include "../core/slang-byte-encode-util.h" #include "../core/slang-stream.h" @@ -14,6 +16,179 @@ namespace Slang { +/* +AST Serialization Overview +========================== + +The AST node types are generally types derived from the NodeBase. The C++ extractor is used to associate an ASTNodeType with +every NodeBase type, such that casting is fast and simple and we have a simple integer to uniquely identify those types. The +extractor also performs another task of associating with the type name all of the fields held in just that type. The definition +of the fields is stored in an 'x macro' which is in the slang-ast-generated-macro.h file, for example + +``` +#define SLANG_FIELDS_ASTNode_DeclRefExpr(_x_, _param_)\ + _x_(scope, (RefPtr<Scope>), _param_)\ + _x_(declRef, (DeclRef<Decl>), _param_)\ + _x_(name, (Name*), _param_) +`` + +For the type DeclRefExpr, this holds all of the fields held in just DeclRefExpr in this case `scope`, `declRef` and `name`. +DeclRefExpr derives from Expr and this might hold other fields and so forth. + +The implementation makes a distinction between the 'native' types, the regular C++ in memory types and 'serial' types. +Each serializable C++ type has an associated 'serial' type - with the distinction that it can be written out and (with perhaps some other data) +read back in to recreate the C++ type. The serial type can be a C++ type, but is such it can be written and read from disk and still +represent the same data. + +We need a mechanism to be able to do do a conversion between native and serial types. To make the association we use the template + +``` +template <typename T> +struct ASTSerialTypeInfo; +``` + +and specialize it for each native type. The specialization holds + +SerialType - The type that will be used to represent the native type +NativeType - The native typs +SerialAlignment - A value that holds what kind of alignment the SerialType needs to be serializable (it may be different from SLANG_ALIGN_OF(SerialType)!) +toSerial - A function that with the help of ASTSerialWriter convert the NativeType into the SerialType +toNative - A function that with the help of ASTSerialReader convert the SerialType into the NativeType + +It is useful to have a structure that holds the type information, so it can be stored. That is achieved with + +``` +template <typename T> +struct ASTSerialGetType; +``` + +This template can be specialized for a specific native types - but all it holds is just a function getType, which returns a ASTSerialType*, +which just holds the information held in the ASTSerialTypeInfo template, but additionally including the size of the SerialType. + +So we need to define a specialized ASTSerialTypeInfo for each type that can be a field in a NodeBase derived type. We don't need to define +anything explicitly for the NodeBase derived types, as we will just generate the layout from the fields. How do we know the fields? We just +used the macros generated from the C++ extactor. + +So first a few things to observe... + +1) Some types don't need any conversion to be serializable - int8_t, or float the bits can just be written out and read in (1) +2) Some types need a conversion but it's very simple - for example an enum without explicit size, being written as an explicit size +3) Some types can be written out but would not be directly readable or usable with different targets/processors, so need converting +4) Some types require complex conversions that require programmer code - like Dictionary/List + +For types that need no conversion (1), we can just use the template ASTSerialIdentityTypeInfo + +``` +template <> +struct ASTSerialTypeInfo<SomeType> : public ASTSerialIdentityTypeInfo<SomeType> {}; +``` + +This specialization means that SomeType can be written out and read in across targets/compilers without problems. + +For (2) we have another template that will do the conversion for us + +``` +template <typename NATIVE_T, typename SERIAL_T> +struct ASTSerialConvertTypeInfo; +``` + +That we can use as above, and specify the native and serial types. + +For (3) there are a few scenarios. For any field in a serial type we must store in the serialized type such that the representation +will work across all processors/compilers. So one problematic type is `bool`. It's not specified how it's laid out in memory - and +some compiles have stored it as a word. Most recently it's been stored as a byte. To make sure bool is ok for serialization therefore +we store as a uint8_t. + +Another example would be double. It's 64 bits, but on some arches/compilers it's SLANG_ALIGN_OF is 4 and on others it's 8. On some +arches a non aligned read will lead to a fault. To work around this problem therefore we have to ensure double has the alignment that +will work across all targets - and that alignment is 8. In that specific case that issue is handled via ASTSerialBasicTypeInfo, which +makes the SerialAlignment the sizeof the type. + +For (4) there are a few things to say. First a type can always implement a custom version of how to do a conversion by specializing +`ASTSerialTypeInfo`. But there remains another nagging issue - types which allocate/use other memory that changes at runtime. Clearly +we cannot define 'any size of memory' in a fixed SerialType defined in a specialization of ASTSerialTypeInfo. The mechanism to work around +this is to allow arbitrary arrays to be stored, that can be accessed via an ASTSerialIndex. This will be discussed more once we discuss +a little more about the file system, and ASTSerialIndex. + +Serialization Format +==================== + +The serialization format used is 'stream-like' with each 'object' stored in order. Each object is given an index starting from 1. +0 is used to be in effect nullptr. The stream looks like + +``` +ASTSerialInfo::Entry (for index 1) +Payload for type in entry + +ASTSerialInfo::Entry (for index 2) +Payload for type in entry + +... +... + +That when writing we have an array that maps each index to a pointer to the associated header. We also have a map that maps native pointers +to their indices. The Payload *is* the SerialType for thing saved. The payload directly follows the Entry data. + +Each object in this list can only be a few types of things - those derived from ASTSerialInfo::Type. + +The actual Entry followed by the payloads are allocated and stored when writing in a MemoryArena. When we want to write into a stream, we +can just iterate over each entry in order and write it out. + +You may have spotted a problem here - that some Entry types can be stored without alignment (for example a string - which stores the length +VarInt encoded followed by the characters). Others require an alignment - for example an NodeBase derived type that contains a int64_t will +*require* 8 byte alignment. That as a feature of the serialization format we want to be able to just map the data into memory, and be able +to access all the SerialType as is on the CPU. For that to work we *require* that the payload for each entry has the right alignment for +the associated SerialType. + +To achieve this we store in the Entry it's alignment requirement *AND* the next entries alignment. With this when we read, as we as stepping +through the entries we can find where the next Entry starts. Because the payload comes directly after the Entry - the Entrys size must be +a modulo of the largest alignment the payload can have. + +For the code that does the conversion between native and serial types it uses either the ASTSerialWriter or ASTSerialReader. This provides +the mechanism to turn a pointer into a serializable ASTSerialIndex and vice versa. There are some special functions for turning string like +types to and forth. + +The final mechanism is that of 'Arrays'. An array allows reading or writing a chunk of data associated with a ASTSerialIndex. The chunk of +data *must* hold data that is serializable. If the array holds pointers - then the serialized array must hold ASTSerialIndices that +represent those pointers. When reading back in they are converted back. + +Arrays are the escape hatch that allows for more complex types to serialize. Dictionaries for example are saved as a serial type that is +two ASTSerialIndices one to a keys array and one to a values array. + +Note that writing has two phases, serializing out into an ASTSerialWriter, and then secondly writing out to a stream. + +NodeBase Types +============== + +The ASTSerialTypeInfo mechanism is generally for *fields* of NodeBase types. That for NodeBase derived types we use the C++ extractors +field list to work out the native fields offsets and types. With this we can then calculate the layout for NodeBase types such that they +follow the requirements for serialization - such as alignment and so forth. + +This information is held in the ASTSerialClasses, which for a given ASTNodeType gives an ASTSerialClassInfo, that specifies fields for +just that type. Super types fields need to be serialized too, and this information can be found by using the ClassReflectInfo to find the +super type. + +Reading +======= + +Due to the care in writing reading is relatively simple. We can just take the contents of the file and put in memory, as long as in memory +it has an alignment of at least MAX_ALIGNMENT. Then we can build up an entries table by stepping through the data and writing the pointer. + +The toNative functions take an ASTSerialReader - this allows the implementation to ask for pointers and arrays from other parts of the serialized +data. It also allows for types to be lazily reconstructed if necessary. + +Lazy reconstruction may be useful in the future to partially reconstruct a sub part of the serialized data. In the current implementation, lazy +evaluation is used on Strings. The m_objects array holds all of the recreated native 'objects'. Since the objects can be derived from different +base classes the associated Entry will describe what it really is. + +For the String type, we initially store the object pointer as null. If a string is requested from that index, we see if the object pointer is null, +if it is we have to construct the StringRepresentation that will be used. + +An extra wrinkle is that we allow accessing of a serialized String as a Name or a string or a UnownedSubString. Fortunately a Name just holds a string, +and a Name remains in scope as long as it's NamePool does which is passed in. +*/ + + class ASTSerialClasses; // Type used to implement mechanisms to convert to and from serial types. @@ -360,9 +535,16 @@ protected: ASTSerialClass m_classes[Index(ASTNodeType::CountOf)]; }; -struct ASTSerializeUtil + +/* None of the functions in this util should *not* be called from production code, +they exist to test features of AST Serialization */ +struct ASTSerialTestUtil { static SlangResult selfTest(); + + /// Tries to serialize out, read back in and test the results are the same. + /// Will write dumped out node to files + static SlangResult testSerialize(NodeBase* node, RootNamePool* rootNamePool, SharedASTBuilder* sharedASTBuilder, SourceManager* sourceManager); }; } // namespace Slang diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp index 6d4a27733..b79cecb59 100644 --- a/source/slang/slang.cpp +++ b/source/slang/slang.cpp @@ -1064,7 +1064,7 @@ void FrontEndCompileRequest::parseTranslationUnit( StringBuilder buf; SourceWriter writer(linkage->getSourceManager(), LineDirectiveMode::None); - ASTDumpUtil::dump(translationUnit->getModuleDecl(), ASTDumpUtil::Style::Flat, &writer); + ASTDumpUtil::dump(translationUnit->getModuleDecl(), ASTDumpUtil::Style::Flat, 0, &writer); const String& path = sourceFile->getPathInfo().foundPath; if (path.getLength()) @@ -1079,19 +1079,7 @@ void FrontEndCompileRequest::parseTranslationUnit( #if 0 // Test serialization { - RefPtr<ASTSerialClasses> classes = new ASTSerialClasses; - - OwnedMemoryStream stream(FileAccess::ReadWrite); - - { - ASTSerialWriter writer(classes); - - // Lets serialize it all - writer.addPointer(translationUnit->getModuleDecl()); - // Let's stick it all in a stream - writer.write(&stream); - } - + ASTSerialTestUtil::testSerialize(translationUnit->getModuleDecl(), getSession()->getRootNamePool(), getLinkage()->getASTBuilder()->getSharedASTBuilder(), getSourceManager()); } #endif |
