diff --git a/cmake/libprotoc.cmake b/cmake/libprotoc.cmake index 8215827d878a..5883e612a762 100644 --- a/cmake/libprotoc.cmake +++ b/cmake/libprotoc.cmake @@ -87,8 +87,8 @@ set(libprotoc_files set(libprotoc_headers ${protobuf_SOURCE_DIR}/src/google/protobuf/compiler/code_generator.h ${protobuf_SOURCE_DIR}/src/google/protobuf/compiler/command_line_interface.h - ${protobuf_SOURCE_DIR}/src/google/protobuf/compiler/cpp/file.h ${protobuf_SOURCE_DIR}/src/google/protobuf/compiler/cpp/cpp_generator.h + ${protobuf_SOURCE_DIR}/src/google/protobuf/compiler/cpp/file.h ${protobuf_SOURCE_DIR}/src/google/protobuf/compiler/cpp/generator.h ${protobuf_SOURCE_DIR}/src/google/protobuf/compiler/cpp/helpers.h ${protobuf_SOURCE_DIR}/src/google/protobuf/compiler/cpp/names.h diff --git a/src/Makefile.am b/src/Makefile.am index 9c63fbff22c9..fc80935c04ad 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -74,8 +74,8 @@ nobase_include_HEADERS = \ google/protobuf/arenaz_sampler.h \ google/protobuf/compiler/code_generator.h \ google/protobuf/compiler/command_line_interface.h \ - google/protobuf/compiler/cpp/file.h \ google/protobuf/compiler/cpp/cpp_generator.h \ + google/protobuf/compiler/cpp/file.h \ google/protobuf/compiler/cpp/generator.h \ google/protobuf/compiler/cpp/helpers.h \ google/protobuf/compiler/cpp/names.h \ diff --git a/src/google/protobuf/compiler/cpp/enum_field.cc b/src/google/protobuf/compiler/cpp/enum_field.cc index 4b91547a3223..8ffb699e87f5 100644 --- a/src/google/protobuf/compiler/cpp/enum_field.cc +++ b/src/google/protobuf/compiler/cpp/enum_field.cc @@ -37,6 +37,7 @@ #include #include #include +#include #include namespace google { @@ -55,8 +56,9 @@ void SetEnumVariables(const FieldDescriptor* descriptor, (*variables)["default"] = Int32ToString(default_value->number()); (*variables)["full_name"] = descriptor->full_name(); (*variables)["cached_byte_size_name"] = MakeVarintCachedSizeName(descriptor); + bool cold = ShouldSplit(descriptor, options); (*variables)["cached_byte_size_field"] = - MakeVarintCachedSizeFieldName(descriptor); + MakeVarintCachedSizeFieldName(descriptor, cold); } } // namespace @@ -110,6 +112,7 @@ void EnumFieldGenerator::GenerateInlineAccessorDefinitions( " $field$ = value;\n" "}\n" "inline void $classname$::set_$name$($type$ value) {\n" + "$maybe_prepare_split_message$" " _internal_set_$name$(value);\n" "$annotate_set$" " // @@protoc_insertion_point(field_set:$full_name$)\n" @@ -162,6 +165,10 @@ void EnumFieldGenerator::GenerateConstexprAggregateInitializer( void EnumFieldGenerator::GenerateAggregateInitializer( io::Printer* printer) const { Formatter format(printer, variables_); + if (ShouldSplit(descriptor_, options_)) { + format("decltype(Impl_::Split::$name$_){$default$}"); + return; + } format("decltype($field$){$default$}"); } diff --git a/src/google/protobuf/compiler/cpp/enum_field.h b/src/google/protobuf/compiler/cpp/enum_field.h index 44269a615fb5..61bae855cd64 100644 --- a/src/google/protobuf/compiler/cpp/enum_field.h +++ b/src/google/protobuf/compiler/cpp/enum_field.h @@ -101,7 +101,9 @@ class RepeatedEnumFieldGenerator : public FieldGenerator { void GenerateMergingCode(io::Printer* printer) const override; void GenerateSwappingCode(io::Printer* printer) const override; void GenerateConstructorCode(io::Printer* printer) const override; - void GenerateCopyConstructorCode(io::Printer* printer) const override {} + void GenerateCopyConstructorCode(io::Printer* /*printer*/) const override { + GOOGLE_CHECK(!ShouldSplit(descriptor_, options_)); + } void GenerateDestructorCode(io::Printer* printer) const override; void GenerateSerializeWithCachedSizesToArray( io::Printer* printer) const override; diff --git a/src/google/protobuf/compiler/cpp/field.cc b/src/google/protobuf/compiler/cpp/field.cc index a242c59db587..90d20848b3af 100644 --- a/src/google/protobuf/compiler/cpp/field.cc +++ b/src/google/protobuf/compiler/cpp/field.cc @@ -244,7 +244,8 @@ void SetCommonFieldVariables(const FieldDescriptor* descriptor, (*variables)["number"] = StrCat(descriptor->number()); (*variables)["classname"] = ClassName(FieldScope(descriptor), false); (*variables)["declared_type"] = DeclaredTypeMethodName(descriptor->type()); - (*variables)["field"] = FieldMemberName(descriptor); + bool split = ShouldSplit(descriptor, options); + (*variables)["field"] = FieldMemberName(descriptor, split); (*variables)["tag_size"] = StrCat( WireFormat::TagSize(descriptor->number(), descriptor->type())); @@ -252,6 +253,8 @@ void SetCommonFieldVariables(const FieldDescriptor* descriptor, (*variables)["set_hasbit"] = ""; (*variables)["clear_hasbit"] = ""; + (*variables)["maybe_prepare_split_message"] = + split ? " PrepareSplitMessageForWrite();\n" : ""; AddAccessorAnnotations(descriptor, options, variables); @@ -299,6 +302,10 @@ void FieldGenerator::SetInlinedStringIndex(int32_t inlined_string_index) { void FieldGenerator::GenerateAggregateInitializer(io::Printer* printer) const { Formatter format(printer, variables_); + if (ShouldSplit(descriptor_, options_)) { + format("decltype(Impl_::Split::$name$_){arena}"); + return; + } format("decltype($field$){arena}"); } @@ -314,6 +321,15 @@ void FieldGenerator::GenerateCopyAggregateInitializer( format("decltype($field$){from.$field$}"); } +void FieldGenerator::GenerateCopyConstructorCode(io::Printer* printer) const { + if (ShouldSplit(descriptor_, options_)) { + // There is no copy constructor for the `Split` struct, so we need to copy + // the value here. + Formatter format(printer, variables_); + format("$field$ = from.$field$;\n"); + } +} + void SetCommonOneofFieldVariables( const FieldDescriptor* descriptor, std::map* variables) { diff --git a/src/google/protobuf/compiler/cpp/field.h b/src/google/protobuf/compiler/cpp/field.h index 52098e7352b7..dd2a51a2fe59 100644 --- a/src/google/protobuf/compiler/cpp/field.h +++ b/src/google/protobuf/compiler/cpp/field.h @@ -136,7 +136,7 @@ class FieldGenerator { virtual void GenerateMergingCode(io::Printer* printer) const = 0; // Generates a copy constructor - virtual void GenerateCopyConstructorCode(io::Printer* printer) const = 0; + virtual void GenerateCopyConstructorCode(io::Printer* printer) const; // Generate lines of code (statements, not declarations) which swaps // this field and the corresponding field of another message, which @@ -150,6 +150,9 @@ class FieldGenerator { // method, invoked by each of the generated constructors. virtual void GenerateConstructorCode(io::Printer* printer) const = 0; + // Generate initialization code for private members in the cold struct. + virtual void GenerateCreateSplitMessageCode(io::Printer* printer) const {} + // Generate any code that needs to go in the class's SharedDtor() method, // invoked by the destructor. // Most field types don't need this, so the default implementation is empty. diff --git a/src/google/protobuf/compiler/cpp/file.cc b/src/google/protobuf/compiler/cpp/file.cc index d409661dc145..838e0ab9b414 100644 --- a/src/google/protobuf/compiler/cpp/file.cc +++ b/src/google/protobuf/compiler/cpp/file.cc @@ -480,10 +480,40 @@ void FileGenerator::GenerateSourceDefaultInstance(int idx, io::Printer* printer) { Formatter format(printer, variables_); MessageGenerator* generator = message_generators_[idx].get(); + // Generate the split instance first because it's needed in the constexpr + // constructor. + if (ShouldSplit(generator->descriptor_, options_)) { + // Use a union to disable the destructor of the _instance member. + // We can constant initialize, but the object will still have a non-trivial + // destructor that we need to elide. + format( + "struct $1$ {\n" + " PROTOBUF_CONSTEXPR $1$()\n" + " : _instance{", + DefaultInstanceType(generator->descriptor_, options_, + /*split=*/true)); + generator->GenerateInitDefaultSplitInstance(printer); + format( + "} {}\n" + " ~$1$() {}\n" + " union {\n" + " $2$ _instance;\n" + " };\n" + "};\n", + DefaultInstanceType(generator->descriptor_, options_, /*split=*/true), + StrCat(generator->classname_, "::Impl_::Split")); + // NO_DESTROY is not necessary for correctness. The empty destructor is + // enough. However, the empty destructor fails to be elided in some + // configurations (like non-opt or with certain sanitizers). NO_DESTROY is + // there just to improve performance and binary size in these builds. + format( + "PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT " + "PROTOBUF_ATTRIBUTE_INIT_PRIORITY1 $1$ $2$;\n", + DefaultInstanceType(generator->descriptor_, options_, /*split=*/true), + DefaultInstanceName(generator->descriptor_, options_, /*split=*/true)); + } + generator->GenerateConstexprConstructor(printer); - // Use a union to disable the destructor of the _instance member. - // We can constant initialize, but the object will still have a non-trivial - // destructor that we need to elide. format( "struct $1$ {\n" " PROTOBUF_CONSTEXPR $1$()\n" @@ -495,14 +525,11 @@ void FileGenerator::GenerateSourceDefaultInstance(int idx, "};\n", DefaultInstanceType(generator->descriptor_, options_), generator->classname_); - // NO_DESTROY is not necessary for correctness. The empty destructor is - // enough. However, the empty destructor fails to be elided in some - // configurations (like non-opt or with certain sanitizers). NO_DESTROY is - // there just to improve performance and binary size in these builds. - format("PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT " - "PROTOBUF_ATTRIBUTE_INIT_PRIORITY1 $1$ $2$;\n", - DefaultInstanceType(generator->descriptor_, options_), - DefaultInstanceName(generator->descriptor_, options_)); + format( + "PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT " + "PROTOBUF_ATTRIBUTE_INIT_PRIORITY1 $1$ $2$;\n", + DefaultInstanceType(generator->descriptor_, options_), + DefaultInstanceName(generator->descriptor_, options_)); for (int i = 0; i < generator->descriptor_->field_count(); i++) { const FieldDescriptor* field = generator->descriptor_->field(i); @@ -514,7 +541,7 @@ void FileGenerator::GenerateSourceDefaultInstance(int idx, "($3$._instance.$4$.Init(), std::true_type{});\n", ClassName(generator->descriptor_), FieldName(field), DefaultInstanceName(generator->descriptor_, options_), - FieldMemberName(field)); + FieldMemberName(field, ShouldSplit(field, options_))); } } @@ -947,6 +974,7 @@ class FileGenerator::ForwardDeclarations { public: void AddMessage(const Descriptor* d) { classes_[ClassName(d)] = d; } void AddEnum(const EnumDescriptor* d) { enums_[ClassName(d)] = d; } + void AddSplit(const Descriptor* d) { splits_[ClassName(d)] = d; } void Print(const Formatter& format, const Options& options) const { for (const auto& p : enums_) { @@ -967,6 +995,14 @@ class FileGenerator::ForwardDeclarations { class_desc, classname, DefaultInstanceType(class_desc, options), DefaultInstanceName(class_desc, options)); } + for (const auto& p : splits_) { + const Descriptor* class_desc = p.second; + format( + "struct $1$;\n" + "$dllexport_decl $extern $1$ $2$;\n", + DefaultInstanceType(class_desc, options, /*split=*/true), + DefaultInstanceName(class_desc, options, /*split=*/true)); + } } void PrintTopLevelDecl(const Formatter& format, @@ -982,6 +1018,7 @@ class FileGenerator::ForwardDeclarations { private: std::map classes_; std::map enums_; + std::map splits_; }; static void PublicImportDFS(const FileDescriptor* fd, @@ -1027,6 +1064,12 @@ void FileGenerator::GenerateForwardDeclarations(io::Printer* printer) { if (d && !public_set.count(d->file())) decls[Namespace(d, options_)].AddEnum(d); } + for (const auto& mg : message_generators_) { + const Descriptor* d = mg->descriptor_; + if ((d != nullptr) && (public_set.count(d->file()) == 0u) && + ShouldSplit(mg->descriptor_, options_)) + decls[Namespace(d, options_)].AddSplit(d); + } { NamespaceOpener ns(format); diff --git a/src/google/protobuf/compiler/cpp/helpers.cc b/src/google/protobuf/compiler/cpp/helpers.cc index 0c37858aea96..79800f135c67 100644 --- a/src/google/protobuf/compiler/cpp/helpers.cc +++ b/src/google/protobuf/compiler/cpp/helpers.cc @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -185,6 +186,23 @@ bool AllocExpected(const Descriptor* descriptor) { return false; } +// Describes different approaches to detect non-canonical int32 encoding. Only +// kNever or kAlways is eligible for *simple* verification methods. +enum class VerifyInt32Type { + kCustom, // Only check if field number matches. + kNever, // Do not check. + kAlways, // Always check. +}; + +inline VerifySimpleType VerifyInt32TypeToVerifyCustom(VerifyInt32Type t) { + static VerifySimpleType kCustomTypes[] = { + VerifySimpleType::kCustom, VerifySimpleType::kCustomInt32Never, + VerifySimpleType::kCustomInt32Always}; + return kCustomTypes[static_cast(t) - + static_cast(VerifyInt32Type::kCustom)]; +} + + } // namespace bool IsLazy(const FieldDescriptor* field, const Options& options, @@ -261,6 +279,8 @@ void SetCommonMessageDataVariables( (*variables)["oneof_case"] = prefix + "_oneof_case_"; (*variables)["tracker"] = "Impl_::_tracker_"; (*variables)["weak_field_map"] = prefix + "_weak_field_map_"; + (*variables)["split"] = prefix + "_split_"; + (*variables)["cached_split_ptr"] = "cached_split_ptr"; } void SetUnknownFieldsVariable(const Descriptor* descriptor, @@ -425,29 +445,32 @@ std::string Namespace(const EnumDescriptor* d, const Options& options) { } std::string DefaultInstanceType(const Descriptor* descriptor, - const Options& options) { - return ClassName(descriptor) + "DefaultTypeInternal"; + const Options& /*options*/, bool split) { + return ClassName(descriptor) + (split ? "__Impl_Split" : "") + + "DefaultTypeInternal"; } std::string DefaultInstanceName(const Descriptor* descriptor, - const Options& options) { - return "_" + ClassName(descriptor, false) + "_default_instance_"; + const Options& /*options*/, bool split) { + return "_" + ClassName(descriptor, false) + (split ? "__Impl_Split" : "") + + "_default_instance_"; } std::string DefaultInstancePtr(const Descriptor* descriptor, - const Options& options) { - return DefaultInstanceName(descriptor, options) + "ptr_"; + const Options& options, bool split) { + return DefaultInstanceName(descriptor, options, split) + "ptr_"; } std::string QualifiedDefaultInstanceName(const Descriptor* descriptor, - const Options& options) { + const Options& options, bool split) { return QualifiedFileLevelSymbol( - descriptor->file(), DefaultInstanceName(descriptor, options), options); + descriptor->file(), DefaultInstanceName(descriptor, options, split), + options); } std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor, - const Options& options) { - return QualifiedDefaultInstanceName(descriptor, options) + "ptr_"; + const Options& options, bool split) { + return QualifiedDefaultInstanceName(descriptor, options, split) + "ptr_"; } std::string DescriptorTableName(const FileDescriptor* file, @@ -487,12 +510,15 @@ std::string FieldName(const FieldDescriptor* field) { return result; } -std::string FieldMemberName(const FieldDescriptor* field) { +std::string FieldMemberName(const FieldDescriptor* field, bool split) { StringPiece prefix = IsMapEntryMessage(field->containing_type()) ? "" : "_impl_."; + StringPiece split_prefix = split ? "_split_->" : ""; if (field->real_containing_oneof() == nullptr) { - return StrCat(prefix, FieldName(field), "_"); + return StrCat(prefix, split_prefix, FieldName(field), "_"); } + // Oneof fields are never split. + GOOGLE_CHECK(!split); return StrCat(prefix, field->containing_oneof()->name(), "_.", FieldName(field), "_"); } @@ -875,6 +901,9 @@ bool HasLazyFields(const FileDescriptor* file, const Options& options, return false; } +bool ShouldSplit(const Descriptor*, const Options&) { return false; } +bool ShouldSplit(const FieldDescriptor*, const Options&) { return false; } + static bool HasRepeatedFields(const Descriptor* descriptor) { for (int i = 0; i < descriptor->field_count(); ++i) { if (descriptor->field(i)->label() == FieldDescriptor::LABEL_REPEATED) { @@ -1016,9 +1045,9 @@ bool IsUtf8String(const FieldDescriptor* field) { field->type() == FieldDescriptor::TYPE_STRING; } -bool ShouldVerifySimple(const Descriptor* descriptor) { +VerifySimpleType ShouldVerifySimple(const Descriptor* descriptor) { (void)descriptor; - return false; + return VerifySimpleType::kCustom; } bool IsStringOrMessage(const FieldDescriptor* field) { diff --git a/src/google/protobuf/compiler/cpp/helpers.h b/src/google/protobuf/compiler/cpp/helpers.h index 7ad724447fb5..d8dcda7248ab 100644 --- a/src/google/protobuf/compiler/cpp/helpers.h +++ b/src/google/protobuf/compiler/cpp/helpers.h @@ -153,24 +153,26 @@ std::string QualifiedExtensionName(const FieldDescriptor* d); // Type name of default instance. std::string DefaultInstanceType(const Descriptor* descriptor, - const Options& options); + const Options& options, bool split = false); // Non-qualified name of the default_instance of this message. std::string DefaultInstanceName(const Descriptor* descriptor, - const Options& options); + const Options& options, bool split = false); // Non-qualified name of the default instance pointer. This is used only for // implicit weak fields, where we need an extra indirection. std::string DefaultInstancePtr(const Descriptor* descriptor, - const Options& options); + const Options& options, bool split = false); // Fully qualified name of the default_instance of this message. std::string QualifiedDefaultInstanceName(const Descriptor* descriptor, - const Options& options); + const Options& options, + bool split = false); // Fully qualified name of the default instance pointer. std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor, - const Options& options); + const Options& options, + bool split = false); // DescriptorTable variable name. std::string DescriptorTableName(const FileDescriptor* file, @@ -194,7 +196,7 @@ std::string ResolveKeyword(const std::string& name); std::string FieldName(const FieldDescriptor* field); // Returns the (unqualified) private member name for this field in C++ code. -std::string FieldMemberName(const FieldDescriptor* field); +std::string FieldMemberName(const FieldDescriptor* field, bool split); // Returns an estimate of the compiler's alignment for the field. This // can't guarantee to be correct because the generated code could be compiled on @@ -369,6 +371,12 @@ bool IsEagerlyVerifiedLazy(const FieldDescriptor* field, const Options& options, bool IsLazilyVerifiedLazy(const FieldDescriptor* field, const Options& options); +// Is the given message being split (go/pdsplit)? +bool ShouldSplit(const Descriptor* desc, const Options& options); + +// Is the given field being split out? +bool ShouldSplit(const FieldDescriptor* field, const Options& options); + inline bool IsFieldUsed(const FieldDescriptor* /* field */, const Options& /* options */) { return true; @@ -508,8 +516,10 @@ inline std::string MakeVarintCachedSizeName(const FieldDescriptor* field) { // MakeVarintCachedSizeFieldName, in case the field exists at some nested level // like: // internal_container_._field_cached_byte_size_; -inline std::string MakeVarintCachedSizeFieldName(const FieldDescriptor* field) { - return StrCat("_impl_._", FieldName(field), "_cached_byte_size_"); +inline std::string MakeVarintCachedSizeFieldName(const FieldDescriptor* field, + bool split) { + return StrCat("_impl_.", split ? "_split_->" : "", "_", + FieldName(field), "_cached_byte_size_"); } // Note: A lot of libraries detect Any protos based on Descriptor::full_name() @@ -1021,7 +1031,24 @@ bool ShouldVerify(const Descriptor* descriptor, const Options& options, bool ShouldVerify(const FileDescriptor* file, const Options& options, MessageSCCAnalyzer* scc_analyzer); -bool ShouldVerifySimple(const Descriptor* descriptor); +// Indicates whether to use predefined verify methods for a given message. If a +// message is "simple" and needs no special verification per field (e.g. message +// field, repeated packed, UTF8 string, etc.), we can use either VerifySimple or +// VerifySimpleAlwaysCheckInt32 methods as all verification can be done based on +// the wire type. +// +// Otherwise, we need "custom" verify methods tailored to a message to pass +// which field needs a special verification; i.e. InternalVerify. +enum class VerifySimpleType { + kSimpleInt32Never, // Use VerifySimple + kSimpleInt32Always, // Use VerifySimpleAlwaysCheckInt32 + kCustom, // Use InternalVerify and check only for int32 + kCustomInt32Never, // Use InternalVerify but never check for int32 + kCustomInt32Always, // Use InternalVerify and always check for int32 +}; + +// Returns VerifySimpleType if messages can be verified by predefined methods. +VerifySimpleType ShouldVerifySimple(const Descriptor* descriptor); bool IsUtf8String(const FieldDescriptor* field); diff --git a/src/google/protobuf/compiler/cpp/map_field.cc b/src/google/protobuf/compiler/cpp/map_field.cc index 0091bc9500e3..3a55ef535207 100644 --- a/src/google/protobuf/compiler/cpp/map_field.cc +++ b/src/google/protobuf/compiler/cpp/map_field.cc @@ -136,6 +136,7 @@ void MapFieldGenerator::GenerateInlineAccessorDefinitions( "}\n" "inline ::$proto_ns$::Map< $key_cpp$, $val_cpp$ >*\n" "$classname$::_internal_mutable_$name$() {\n" + "$maybe_prepare_split_message$" " return $field$.MutableMap();\n" "}\n" "inline ::$proto_ns$::Map< $key_cpp$, $val_cpp$ >*\n" @@ -291,6 +292,12 @@ void MapFieldGenerator::GenerateCopyAggregateInitializer( void MapFieldGenerator::GenerateAggregateInitializer( io::Printer* printer) const { Formatter format(printer, variables_); + if (ShouldSplit(descriptor_, options_)) { + format( + "/*decltype($classname$::Split::$name$_)*/" + "{::_pbi::ArenaInitialized(), arena}"); + return; + } // MapField has no move constructor. format("/*decltype($field$)*/{::_pbi::ArenaInitialized(), arena}"); } @@ -299,6 +306,11 @@ void MapFieldGenerator::GenerateDestructorCode(io::Printer* printer) const { GOOGLE_CHECK(!IsFieldStripped(descriptor_, options_)); Formatter format(printer, variables_); + if (ShouldSplit(descriptor_, options_)) { + format("$cached_split_ptr$->$name$_.Destruct();\n"); + format("$cached_split_ptr$->$name$_.~MapField$lite$();\n"); + return; + } format("$field$.Destruct();\n"); format("$field$.~MapField$lite$();\n"); } diff --git a/src/google/protobuf/compiler/cpp/message.cc b/src/google/protobuf/compiler/cpp/message.cc index b33ee6bee5f2..69069dacdd76 100644 --- a/src/google/protobuf/compiler/cpp/message.cc +++ b/src/google/protobuf/compiler/cpp/message.cc @@ -346,10 +346,10 @@ bool IsRequired(const std::vector& v) { return v.front()->is_required(); } -bool HasSingularString(const Descriptor* desc, const Options& options) { +bool HasNonSplitOptionalString(const Descriptor* desc, const Options& options) { for (const auto* field : FieldRange(desc)) { if (IsString(field, options) && !field->is_repeated() && - !field->real_containing_oneof()) { + !field->real_containing_oneof() && !ShouldSplit(field, options)) { return true; } } @@ -1202,6 +1202,9 @@ void MessageGenerator::GenerateFieldClear(const FieldDescriptor* field, format.Outdent(); format("}\n"); } else { + if (ShouldSplit(field, options_)) { + format("if (IsSplitMessageDefault()) return;\n"); + } field_generators_.get(field).GenerateClearingCode(format.printer()); if (HasHasbit(field)) { int has_bit_index = HasBitIndex(field); @@ -1242,13 +1245,12 @@ void MessageGenerator::GenerateFieldAccessorDefinitions(io::Printer* printer) { } else { format( "inline int $classname$::_internal_$name$_size() const {\n" - " return $1$$2$.size();\n" + " return $field$$1$.size();\n" "}\n" "inline int $classname$::$name$_size() const {\n" "$annotate_size$" " return _internal_$name$_size();\n" "}\n", - FieldMemberName(field), IsImplicitWeakField(field, options_, scc_analyzer_) && field->message_type() ? ".weak" @@ -1752,6 +1754,17 @@ void MessageGenerator::GenerateClassDefinition(io::Printer* printer) { "\n"); } + if (ShouldSplit(descriptor_, options_)) { + format( + "private:\n" + "inline bool IsSplitMessageDefault() const {\n" + " return $split$ == reinterpret_cast(&$1$);\n" + "}\n" + "PROTOBUF_NOINLINE void PrepareSplitMessageForWrite();\n" + "public:\n", + DefaultInstanceName(descriptor_, options_, /*split=*/true)); + } + format( "// nested types ----------------------------------------------------\n" "\n"); @@ -1896,7 +1909,24 @@ void MessageGenerator::GenerateClassDefinition(io::Printer* printer) { for (auto field : optimized_order_) { const FieldGenerator& generator = field_generators_.get(field); generator.GenerateStaticMembers(printer); - generator.GeneratePrivateMembers(printer); + if (!ShouldSplit(field, options_)) { + generator.GeneratePrivateMembers(printer); + } + } + if (ShouldSplit(descriptor_, options_)) { + format("struct Split {\n"); + format.Indent(); + for (auto field : optimized_order_) { + if (!ShouldSplit(field, options_)) continue; + const FieldGenerator& generator = field_generators_.get(field); + generator.GeneratePrivateMembers(printer); + } + format.Outdent(); + format( + " typedef void InternalArenaConstructable_;\n" + " typedef void DestructorSkippable_;\n" + "};\n" + "Split* _split_;\n"); } // For each oneof generate a union @@ -1955,6 +1985,14 @@ void MessageGenerator::GenerateClassDefinition(io::Printer* printer) { format("union { Impl_ _impl_; };\n"); } + if (ShouldSplit(descriptor_, options_)) { + format( + "static Impl_::Split* CreateSplitMessage(" + "::$proto_ns$::Arena* arena);\n"); + format("friend struct $1$;\n", + DefaultInstanceType(descriptor_, options_, /*split=*/true)); + } + // The TableStruct struct needs access to the private parts, in order to // construct the offsets of all members. format("friend struct ::$tablename$;\n"); @@ -2094,7 +2132,8 @@ void MessageGenerator::GenerateClassMethods(io::Printer* printer) { format("};\n\n"); for (auto field : FieldRange(descriptor_)) { if (!IsFieldStripped(field, options_)) { - field_generators_.get(field).GenerateInternalAccessorDefinitions(printer); + field_generators_.get(field).GenerateInternalAccessorDefinitions( + printer); } } @@ -2154,6 +2193,15 @@ void MessageGenerator::GenerateClassMethods(io::Printer* printer) { format("\n"); } + if (ShouldSplit(descriptor_, options_)) { + format( + "void $classname$::PrepareSplitMessageForWrite() {\n" + " if (IsSplitMessageDefault()) {\n" + " $split$ = CreateSplitMessage(GetArenaForAllocation());\n" + " }\n" + "}\n"); + } + GenerateVerify(printer); GenerateSwap(printer); @@ -2242,7 +2290,11 @@ std::pair MessageGenerator::GenerateOffsets( // Don't use the top bit because that is for unused fields. format("::_pbi::kInvalidFieldOffsetTag"); } else { - format("PROTOBUF_FIELD_OFFSET($classtype$, $1$)", FieldMemberName(field)); + format("PROTOBUF_FIELD_OFFSET($classtype$$1$, $2$)", + ShouldSplit(field, options_) ? "::Impl_::Split" : "", + ShouldSplit(field, options_) + ? FieldName(field) + "_" + : FieldMemberName(field, /*cold=*/false)); } // Some information about a field is in the pdproto profile. The profile is @@ -2338,9 +2390,17 @@ void MessageGenerator::GenerateSharedConstructorCode(io::Printer* printer) { // Initialize member variables with arena constructor. for (auto field : optimized_order_) { GOOGLE_DCHECK(!IsFieldStripped(field, options_)); + if (ShouldSplit(field, options_)) { + continue; + } put_sep(); field_generators_.get(field).GenerateAggregateInitializer(printer); } + if (ShouldSplit(descriptor_, options_)) { + put_sep(); + format("decltype($split$){reinterpret_cast(&$1$)}", + DefaultInstanceName(descriptor_, options_, /*split=*/true)); + } for (auto oneof : OneOfRange(descriptor_)) { put_sep(); format("decltype(_impl_.$1$_){}", oneof->name()); @@ -2394,6 +2454,9 @@ void MessageGenerator::GenerateSharedConstructorCode(io::Printer* printer) { } for (const FieldDescriptor* field : optimized_order_) { + if (ShouldSplit(field, options_)) { + continue; + } field_generators_.get(field).GenerateConstructorCode(printer); } @@ -2405,6 +2468,63 @@ void MessageGenerator::GenerateSharedConstructorCode(io::Printer* printer) { format("}\n\n"); } +void MessageGenerator::GenerateCreateSplitMessage(io::Printer* printer) { + Formatter format(printer, variables_); + format( + "$classname$::Impl_::Split* " + "$classname$::CreateSplitMessage(::$proto_ns$::Arena* arena) {\n"); + format.Indent(); + const char* field_sep = " "; + const auto put_sep = [&] { + format("\n$1$ ", field_sep); + field_sep = ","; + }; + format( + "const size_t size = sizeof(Impl_::Split);\n" + "void* chunk = (arena == nullptr) ?\n" + " ::operator new(size) :\n" + " arena->AllocateAligned(size, alignof(Impl_::Split));\n" + "Impl_::Split* ptr = reinterpret_cast(chunk);\n" + "new (ptr) Impl_::Split{"); + format.Indent(); + for (const FieldDescriptor* field : optimized_order_) { + GOOGLE_DCHECK(!IsFieldStripped(field, options_)); + if (ShouldSplit(field, options_)) { + put_sep(); + field_generators_.get(field).GenerateAggregateInitializer(printer); + } + } + format.Outdent(); + format("};\n"); + for (const FieldDescriptor* field : optimized_order_) { + GOOGLE_DCHECK(!IsFieldStripped(field, options_)); + if (ShouldSplit(field, options_)) { + field_generators_.get(field).GenerateCreateSplitMessageCode(printer); + } + } + format("return ptr;\n"); + format.Outdent(); + format("}\n"); +} + +void MessageGenerator::GenerateInitDefaultSplitInstance(io::Printer* printer) { + if (!ShouldSplit(descriptor_, options_)) return; + + Formatter format(printer, variables_); + const char* field_sep = " "; + const auto put_sep = [&] { + format("\n$1$ ", field_sep); + field_sep = ","; + }; + for (const auto* field : optimized_order_) { + if (ShouldSplit(field, options_)) { + put_sep(); + field_generators_.get(field).GenerateConstexprAggregateInitializer( + printer); + } + } +} + void MessageGenerator::GenerateSharedDestructorCode(io::Printer* printer) { if (HasSimpleBaseClass(descriptor_, options_)) return; Formatter format(printer, variables_); @@ -2420,8 +2540,24 @@ void MessageGenerator::GenerateSharedDestructorCode(io::Printer* printer) { // Write the destructors for each field except oneof members. // optimized_order_ does not contain oneof fields. for (auto field : optimized_order_) { + if (ShouldSplit(field, options_)) { + continue; + } field_generators_.get(field).GenerateDestructorCode(printer); } + if (ShouldSplit(descriptor_, options_)) { + format("if (!IsSplitMessageDefault()) {\n"); + format.Indent(); + format("auto* $cached_split_ptr$ = $split$;\n"); + for (auto field : optimized_order_) { + if (ShouldSplit(field, options_)) { + field_generators_.get(field).GenerateDestructorCode(printer); + } + } + format("delete $cached_split_ptr$;\n"); + format.Outdent(); + format("}\n"); + } // Generate code to destruct oneofs. Clearing should do the work. for (auto oneof : OneOfRange(descriptor_)) { @@ -2475,10 +2611,23 @@ void MessageGenerator::GenerateArenaDestructorCode(io::Printer* printer) { // Process non-oneof fields first. for (auto field : optimized_order_) { - if (IsFieldStripped(field, options_)) continue; + if (IsFieldStripped(field, options_) || ShouldSplit(field, options_)) + continue; const FieldGenerator& fg = field_generators_.get(field); fg.GenerateArenaDestructorCode(printer); } + if (ShouldSplit(descriptor_, options_)) { + format("if (!_this->IsSplitMessageDefault()) {\n"); + format.Indent(); + for (auto field : optimized_order_) { + if (IsFieldStripped(field, options_) || !ShouldSplit(field, options_)) + continue; + const FieldGenerator& fg = field_generators_.get(field); + fg.GenerateArenaDestructorCode(printer); + } + format.Outdent(); + format("}\n"); + } // Process oneof fields. for (auto oneof : OneOfRange(descriptor_)) { @@ -2532,9 +2681,19 @@ void MessageGenerator::GenerateConstexprConstructor(io::Printer* printer) { } } for (auto field : optimized_order_) { + if (ShouldSplit(field, options_)) { + continue; + } + put_sep(); + field_generators_.get(field).GenerateConstexprAggregateInitializer( + printer); + } + if (ShouldSplit(descriptor_, options_)) { put_sep(); - field_generators_.get(field).GenerateConstexprAggregateInitializer(printer); + format("/*decltype($split$)*/&$1$._instance", + DefaultInstanceName(descriptor_, options_, /*split=*/true)); } + for (auto oneof : OneOfRange(descriptor_)) { put_sep(); format("/*decltype(_impl_.$1$_)*/{}", oneof->name()); @@ -2570,16 +2729,33 @@ void MessageGenerator::GenerateCopyConstructorBody(io::Printer* printer) const { Formatter format(printer, variables_); const RunMap runs = - FindRuns(optimized_order_, - [](const FieldDescriptor* field) { return IsPOD(field); }); + FindRuns(optimized_order_, [this](const FieldDescriptor* field) { + return IsPOD(field) && !ShouldSplit(field, options_); + }); std::string pod_template = "::memcpy(&$first$, &from.$first$,\n" " static_cast(reinterpret_cast(&$last$) -\n" " reinterpret_cast(&$first$)) + sizeof($last$));\n"; + if (ShouldSplit(descriptor_, options_)) { + format("if (!from.IsSplitMessageDefault()) {\n"); + format.Indent(); + format("_this->PrepareSplitMessageForWrite();\n"); + for (auto field : optimized_order_) { + if (ShouldSplit(field, options_)) { + field_generators_.get(field).GenerateCopyConstructorCode(printer); + } + } + format.Outdent(); + format("}\n"); + } + for (size_t i = 0; i < optimized_order_.size(); ++i) { const FieldDescriptor* field = optimized_order_[i]; + if (ShouldSplit(field, options_)) { + continue; + } const auto it = runs.find(field); // We only apply the memset technique to runs of more than one field, as @@ -2587,9 +2763,10 @@ void MessageGenerator::GenerateCopyConstructorBody(io::Printer* printer) const { if (it != runs.end() && it->second > 1) { // Use a memset, then skip run_length fields. const size_t run_length = it->second; - const std::string first_field_name = FieldMemberName(field); + const std::string first_field_name = + FieldMemberName(field, /*cold=*/false); const std::string last_field_name = - FieldMemberName(optimized_order_[i + run_length - 1]); + FieldMemberName(optimized_order_[i + run_length - 1], /*cold=*/false); format.Set("first", first_field_name); format.Set("last", last_field_name); @@ -2682,9 +2859,17 @@ void MessageGenerator::GenerateStructors(io::Printer* printer) { // Initialize member variables with arena constructor. for (auto field : optimized_order_) { + if (ShouldSplit(field, options_)) { + continue; + } put_sep(); field_generators_.get(field).GenerateCopyAggregateInitializer(printer); } + if (ShouldSplit(descriptor_, options_)) { + put_sep(); + format("decltype($split$){reinterpret_cast(&$1$)}", + DefaultInstanceName(descriptor_, options_, /*split=*/true)); + } for (auto oneof : OneOfRange(descriptor_)) { put_sep(); format("decltype(_impl_.$1$_){}", oneof->name()); @@ -2760,6 +2945,10 @@ void MessageGenerator::GenerateStructors(io::Printer* printer) { // Generate the shared constructor code. GenerateSharedConstructorCode(printer); + if (ShouldSplit(descriptor_, options_)) { + GenerateCreateSplitMessage(printer); + } + // Generate the destructor. if (!HasSimpleBaseClass(descriptor_, options_)) { format( @@ -2857,6 +3046,7 @@ void MessageGenerator::GenerateClear(io::Printer* printer) { // (memset) per chunk, and if present it will be at the beginning. bool same = HasByteIndex(a) == HasByteIndex(b) && a->is_repeated() == b->is_repeated() && + ShouldSplit(a, options_) == ShouldSplit(b, options_) && (CanInitializeByZeroing(a) == CanInitializeByZeroing(b) || (CanInitializeByZeroing(a) && (chunk_count == 1 || merge_zero_init))); @@ -2875,7 +3065,7 @@ void MessageGenerator::GenerateClear(io::Printer* printer) { const FieldDescriptor* memset_start = nullptr; const FieldDescriptor* memset_end = nullptr; bool saw_non_zero_init = false; - + bool chunk_is_cold = !chunk.empty() && ShouldSplit(chunk.front(), options_); for (const auto& field : chunk) { if (CanInitializeByZeroing(field)) { GOOGLE_CHECK(!saw_non_zero_init); @@ -2915,17 +3105,25 @@ void MessageGenerator::GenerateClear(io::Printer* printer) { format.Indent(); } + if (chunk_is_cold) { + format("if (!IsSplitMessageDefault()) {\n"); + format.Indent(); + } + if (memset_start) { if (memset_start == memset_end) { // For clarity, do not memset a single field. field_generators_.get(memset_start) .GenerateMessageClearingCode(printer); } else { + GOOGLE_CHECK_EQ(chunk_is_cold, ShouldSplit(memset_start, options_)); + GOOGLE_CHECK_EQ(chunk_is_cold, ShouldSplit(memset_end, options_)); format( "::memset(&$1$, 0, static_cast(\n" " reinterpret_cast(&$2$) -\n" " reinterpret_cast(&$1$)) + sizeof($2$));\n", - FieldMemberName(memset_start), FieldMemberName(memset_end)); + FieldMemberName(memset_start, chunk_is_cold), + FieldMemberName(memset_end, chunk_is_cold)); } } @@ -2954,6 +3152,11 @@ void MessageGenerator::GenerateClear(io::Printer* printer) { } } + if (chunk_is_cold) { + format.Outdent(); + format("}\n"); + } + if (have_outer_if) { format.Outdent(); format("}\n"); @@ -3052,7 +3255,7 @@ void MessageGenerator::GenerateSwap(io::Printer* printer) { std::map vars; SetUnknownFieldsVariable(descriptor_, options_, &vars); format.AddMap(vars); - if (HasSingularString(descriptor_, options_)) { + if (HasNonSplitOptionalString(descriptor_, options_)) { format( "auto* lhs_arena = GetArenaForAllocation();\n" "auto* rhs_arena = other->GetArenaForAllocation();\n"); @@ -3068,11 +3271,15 @@ void MessageGenerator::GenerateSwap(io::Printer* printer) { // If possible, we swap several fields at once, including padding. const RunMap runs = FindRuns(optimized_order_, [this](const FieldDescriptor* field) { - return CanBeManipulatedAsRawBytes(field, options_, scc_analyzer_); + return !ShouldSplit(field, options_) && + CanBeManipulatedAsRawBytes(field, options_, scc_analyzer_); }); - for (int i = 0; i < optimized_order_.size(); ++i) { + for (size_t i = 0; i < optimized_order_.size(); ++i) { const FieldDescriptor* field = optimized_order_[i]; + if (ShouldSplit(field, options_)) { + continue; + } const auto it = runs.find(field); // We only apply the memswap technique to runs of more than one field, as @@ -3081,9 +3288,10 @@ void MessageGenerator::GenerateSwap(io::Printer* printer) { if (it != runs.end() && it->second > 1) { // Use a memswap, then skip run_length fields. const size_t run_length = it->second; - const std::string first_field_name = FieldMemberName(field); - const std::string last_field_name = - FieldMemberName(optimized_order_[i + run_length - 1]); + const std::string first_field_name = + FieldMemberName(field, /*cold=*/false); + const std::string last_field_name = FieldMemberName( + optimized_order_[i + run_length - 1], /*cold=*/false); format.Set("first", first_field_name); format.Set("last", last_field_name); @@ -3102,6 +3310,9 @@ void MessageGenerator::GenerateSwap(io::Printer* printer) { field_generators_.get(field).GenerateSwappingCode(printer); } } + if (ShouldSplit(descriptor_, options_)) { + format("swap($split$, other->$split$);\n"); + } for (auto oneof : OneOfRange(descriptor_)) { format("swap(_impl_.$1$_, other->_impl_.$1$_);\n", oneof->name()); @@ -3208,10 +3419,18 @@ void MessageGenerator::GenerateClassSpecificMergeImpl(io::Printer* printer) { "$uint32$ cached_has_bits = 0;\n" "(void) cached_has_bits;\n\n"); + if (ShouldSplit(descriptor_, options_)) { + format( + "if (!from.IsSplitMessageDefault()) {\n" + " _this->PrepareSplitMessageForWrite();\n" + "}\n"); + } + std::vector> chunks = CollectFields( optimized_order_, [&](const FieldDescriptor* a, const FieldDescriptor* b) -> bool { - return HasByteIndex(a) == HasByteIndex(b); + return HasByteIndex(a) == HasByteIndex(b) && + ShouldSplit(a, options_) == ShouldSplit(b, options_); }); ColdChunkSkipper cold_skipper(descriptor_, options_, chunks, has_bit_indices_, @@ -3999,7 +4218,8 @@ void MessageGenerator::GenerateByteSize(io::Printer* printer) { std::vector> chunks = CollectFields( optimized_order_, [&](const FieldDescriptor* a, const FieldDescriptor* b) -> bool { - return a->label() == b->label() && HasByteIndex(a) == HasByteIndex(b); + return a->label() == b->label() && HasByteIndex(a) == HasByteIndex(b) && + ShouldSplit(a, options_) == ShouldSplit(b, options_); }); // Remove chunks with required fields. diff --git a/src/google/protobuf/compiler/cpp/message.h b/src/google/protobuf/compiler/cpp/message.h index ba2e21577494..5bdfcb35e216 100644 --- a/src/google/protobuf/compiler/cpp/message.h +++ b/src/google/protobuf/compiler/cpp/message.h @@ -119,6 +119,9 @@ class MessageGenerator { // default instance. void GenerateConstexprConstructor(io::Printer* printer); + void GenerateCreateSplitMessage(io::Printer* printer); + void GenerateInitDefaultSplitInstance(io::Printer* printer); + // Generate standard Message methods. void GenerateClear(io::Printer* printer); void GenerateOneofClear(io::Printer* printer); diff --git a/src/google/protobuf/compiler/cpp/message_field.cc b/src/google/protobuf/compiler/cpp/message_field.cc index 9c8b8a060bdf..7e87a079931b 100644 --- a/src/google/protobuf/compiler/cpp/message_field.cc +++ b/src/google/protobuf/compiler/cpp/message_field.cc @@ -35,6 +35,7 @@ #include #include +#include #include #include @@ -178,6 +179,7 @@ void MessageFieldGenerator::GenerateInlineAccessorDefinitions( format( "inline void $classname$::unsafe_arena_set_allocated_$name$(\n" " $type$* $name$) {\n" + "$maybe_prepare_split_message$" // If we're not on an arena, free whatever we were holding before. // (If we are on arena, we can just forget the earlier pointer.) " if (GetArenaForAllocation() == nullptr) {\n" @@ -203,6 +205,7 @@ void MessageFieldGenerator::GenerateInlineAccessorDefinitions( "inline $type$* $classname$::$release_name$() {\n" "$type_reference_function$" "$annotate_release$" + "$maybe_prepare_split_message$" " $clear_hasbit$\n" " $type$* temp = $casted_member$;\n" " $field$ = nullptr;\n" @@ -221,6 +224,7 @@ void MessageFieldGenerator::GenerateInlineAccessorDefinitions( "$annotate_release$" " // @@protoc_insertion_point(field_release:$full_name$)\n" "$type_reference_function$" + "$maybe_prepare_split_message$" " $clear_hasbit$\n" " $type$* temp = $casted_member$;\n" " $field$ = nullptr;\n" @@ -243,6 +247,9 @@ void MessageFieldGenerator::GenerateInlineAccessorDefinitions( " return $casted_member$;\n" "}\n" "inline $type$* $classname$::mutable_$name$() {\n" + // TODO(b/122856539): add tests to make sure all write accessors are able + // to prepare split message allocation. + "$maybe_prepare_split_message$" " $type$* _msg = _internal_mutable_$name$();\n" "$annotate_mutable$" " // @@protoc_insertion_point(field_mutable:$full_name$)\n" @@ -254,7 +261,9 @@ void MessageFieldGenerator::GenerateInlineAccessorDefinitions( format( "inline void $classname$::set_allocated_$name$($type$* $name$) {\n" " ::$proto_ns$::Arena* message_arena = GetArenaForAllocation();\n"); - format(" if (message_arena == nullptr) {\n"); + format( + "$maybe_prepare_split_message$" + " if (message_arena == nullptr) {\n"); if (IsCrossFileMessage(descriptor_)) { format( " delete reinterpret_cast< ::$proto_ns$::MessageLite*>($field$);\n"); @@ -434,6 +443,10 @@ void MessageFieldGenerator::GenerateDestructorCode(io::Printer* printer) const { // care when handling them. format("if (this != internal_default_instance()) "); } + if (ShouldSplit(descriptor_, options_)) { + format("delete $cached_split_ptr$->$name$_;\n"); + return; + } format("delete $field$;\n"); } @@ -504,6 +517,10 @@ void MessageFieldGenerator::GenerateCopyAggregateInitializer( void MessageFieldGenerator::GenerateAggregateInitializer( io::Printer* printer) const { Formatter format(printer, variables_); + if (ShouldSplit(descriptor_, options_)) { + format("decltype(Impl_::Split::$name$_){nullptr}"); + return; + } format("decltype($field$){nullptr}"); } diff --git a/src/google/protobuf/compiler/cpp/parse_function_generator.cc b/src/google/protobuf/compiler/cpp/parse_function_generator.cc index 41fc215a5bd6..0f1d767c816b 100644 --- a/src/google/protobuf/compiler/cpp/parse_function_generator.cc +++ b/src/google/protobuf/compiler/cpp/parse_function_generator.cc @@ -899,11 +899,14 @@ void ParseFunctionGenerator::GenerateFastFieldEntries(Formatter& format) { if (info.func_name.empty()) { format("{::_pbi::TcParser::MiniParse, {}},\n"); } else { + bool cold = ShouldSplit(info.field, options_); format( "{$1$,\n" - " {$2$, $3$, $4$, PROTOBUF_FIELD_OFFSET($classname$, $5$)}},\n", + " {$2$, $3$, $4$, PROTOBUF_FIELD_OFFSET($classname$$5$, $6$)}},\n", info.func_name, info.coded_tag, info.hasbit_idx, info.aux_idx, - FieldMemberName(info.field)); + cold ? "::Impl_::Split" : "", + cold ? FieldName(info.field) + "_" + : FieldMemberName(info.field, /*cold=*/false)); } } } @@ -1048,8 +1051,11 @@ void ParseFunctionGenerator::GenerateFieldEntries(Formatter& format) { format("/* weak */ 0, 0, 0, 0"); } else { const OneofDescriptor* oneof = field->real_containing_oneof(); - format("PROTOBUF_FIELD_OFFSET($classname$, $1$), $2$, $3$,\n ", - FieldMemberName(field), + bool cold = ShouldSplit(field, options_); + format("PROTOBUF_FIELD_OFFSET($classname$$1$, $2$), $3$, $4$,\n ", + cold ? "::Impl_::Split" : "", + cold ? FieldName(field) + "_" + : FieldMemberName(field, /*cold=*/false), (oneof ? oneof->index() : entry.hasbit_idx), entry.aux_idx); FormatFieldKind(format, entry, options_, scc_analyzer_); } @@ -1550,7 +1556,8 @@ void ParseFunctionGenerator::GenerateFieldSwitch( format.Indent(); for (const auto* field : fields) { - format.Set("field", FieldMemberName(field)); + bool cold = ShouldSplit(field, options_); + format.Set("field", FieldMemberName(field, cold)); PrintFieldComment(format, field); format("case $1$:\n", field->number()); format.Indent(); @@ -1559,6 +1566,9 @@ void ParseFunctionGenerator::GenerateFieldSwitch( format("if (PROTOBUF_PREDICT_TRUE(static_cast<$uint8$>(tag) == $1$)) {\n", expected_tag & 0xFF); format.Indent(); + if (cold) { + format("$msg$PrepareSplitMessageForWrite();\n"); + } auto wiretype = WireFormatLite::GetTagWireType(expected_tag); uint32_t tag = WireFormatLite::MakeTag(field->number(), wiretype); int tag_size = io::CodedOutputStream::VarintSize32(tag); diff --git a/src/google/protobuf/compiler/cpp/primitive_field.cc b/src/google/protobuf/compiler/cpp/primitive_field.cc index db0ae2d606ea..6c92ede251f0 100644 --- a/src/google/protobuf/compiler/cpp/primitive_field.cc +++ b/src/google/protobuf/compiler/cpp/primitive_field.cc @@ -105,8 +105,9 @@ void SetPrimitiveVariables(const FieldDescriptor* descriptor, (*variables)["type"] = PrimitiveTypeName(options, descriptor->cpp_type()); (*variables)["default"] = DefaultValue(options, descriptor); (*variables)["cached_byte_size_name"] = MakeVarintCachedSizeName(descriptor); + bool cold = ShouldSplit(descriptor, options); (*variables)["cached_byte_size_field"] = - MakeVarintCachedSizeFieldName(descriptor); + MakeVarintCachedSizeFieldName(descriptor, cold); (*variables)["tag"] = StrCat(internal::WireFormat::MakeTag(descriptor)); int fixed_size = FixedSize(descriptor->type()); if (fixed_size != -1) { @@ -165,6 +166,7 @@ void PrimitiveFieldGenerator::GenerateInlineAccessorDefinitions( " $field$ = value;\n" "}\n" "inline void $classname$::set_$name$($type$ value) {\n" + "$maybe_prepare_split_message$" " _internal_set_$name$(value);\n" "$annotate_set$" " // @@protoc_insertion_point(field_set:$full_name$)\n" @@ -233,6 +235,10 @@ void PrimitiveFieldGenerator::GenerateConstexprAggregateInitializer( void PrimitiveFieldGenerator::GenerateAggregateInitializer( io::Printer* printer) const { Formatter format(printer, variables_); + if (ShouldSplit(descriptor_, options_)) { + format("decltype(Impl_::Split::$name$_){$default$}"); + return; + } format("decltype($field$){$default$}"); } diff --git a/src/google/protobuf/compiler/cpp/primitive_field.h b/src/google/protobuf/compiler/cpp/primitive_field.h index ec1fcc1850d3..bb8a08aa1415 100644 --- a/src/google/protobuf/compiler/cpp/primitive_field.h +++ b/src/google/protobuf/compiler/cpp/primitive_field.h @@ -102,7 +102,9 @@ class RepeatedPrimitiveFieldGenerator : public FieldGenerator { void GenerateMergingCode(io::Printer* printer) const override; void GenerateSwappingCode(io::Printer* printer) const override; void GenerateConstructorCode(io::Printer* printer) const override {} - void GenerateCopyConstructorCode(io::Printer* printer) const override {} + void GenerateCopyConstructorCode(io::Printer* /*printer*/) const override { + GOOGLE_CHECK(!ShouldSplit(descriptor_, options_)); + } void GenerateDestructorCode(io::Printer* printer) const override; void GenerateSerializeWithCachedSizesToArray( io::Printer* printer) const override; diff --git a/src/google/protobuf/compiler/cpp/string_field.cc b/src/google/protobuf/compiler/cpp/string_field.cc index a8d1f58a34b7..9e7c96d7ca57 100644 --- a/src/google/protobuf/compiler/cpp/string_field.cc +++ b/src/google/protobuf/compiler/cpp/string_field.cc @@ -115,11 +115,7 @@ void StringFieldGenerator::GeneratePrivateMembers(io::Printer* printer) const { // allocating arena is null. This is required to support message-owned // arena (go/path-to-arenas) where a root proto is destroyed but // InlinedStringField may have arena-allocated memory. - // - // `_init_inline_xxx` is used for initializing default instances. - format( - "::$proto_ns$::internal::InlinedStringField $name$_;\n" - "static std::true_type _init_inline_$name$_;\n"); + format("::$proto_ns$::internal::InlinedStringField $name$_;\n"); } } @@ -130,6 +126,10 @@ void StringFieldGenerator::GenerateStaticMembers(io::Printer* printer) const { "static const ::$proto_ns$::internal::LazyString" " $default_variable_name$;\n"); } + if (inlined_) { + // `_init_inline_xxx` is used for initializing default instances. + format("static std::true_type _init_inline_$name$_;\n"); + } } void StringFieldGenerator::GenerateAccessorDeclarations( @@ -215,6 +215,7 @@ void StringFieldGenerator::GenerateInlineAccessorDefinitions( "template \n" "inline PROTOBUF_ALWAYS_INLINE\n" "void $classname$::set_$name$(ArgT0&& arg0, ArgT... args) {\n" + "$maybe_prepare_split_message$" " $set_hasbit$\n" " $field$.$setter$(static_cast(arg0)," " args..., GetArenaForAllocation());\n" @@ -226,6 +227,7 @@ void StringFieldGenerator::GenerateInlineAccessorDefinitions( "template \n" "inline PROTOBUF_ALWAYS_INLINE\n" "void $classname$::set_$name$(ArgT0&& arg0, ArgT... args) {\n" + "$maybe_prepare_split_message$" " $set_hasbit$\n" " $field$.$setter$(static_cast(arg0)," " args..., GetArenaForAllocation(), _internal_$name$_donated(), " @@ -240,6 +242,7 @@ void StringFieldGenerator::GenerateInlineAccessorDefinitions( } format( "inline std::string* $classname$::mutable_$name$() {\n" + "$maybe_prepare_split_message$" " std::string* _s = _internal_mutable_$name$();\n" "$annotate_mutable$" " // @@protoc_insertion_point(field_mutable:$full_name$)\n" @@ -280,6 +283,7 @@ void StringFieldGenerator::GenerateInlineAccessorDefinitions( format( "inline std::string* $classname$::$release_name$() {\n" "$annotate_release$" + "$maybe_prepare_split_message$" " // @@protoc_insertion_point(field_release:$full_name$)\n"); if (HasHasbit(descriptor_)) { @@ -311,6 +315,7 @@ void StringFieldGenerator::GenerateInlineAccessorDefinitions( format( "}\n" "inline void $classname$::set_allocated_$name$(std::string* $name$) {\n" + "$maybe_prepare_split_message$" " if ($name$ != nullptr) {\n" " $set_hasbit$\n" " } else {\n" @@ -440,6 +445,21 @@ void StringFieldGenerator::GenerateConstructorCode(io::Printer* printer) const { } } +void StringFieldGenerator::GenerateCreateSplitMessageCode( + io::Printer* printer) const { + GOOGLE_CHECK(ShouldSplit(descriptor_, options_)); + GOOGLE_CHECK(!inlined_); + Formatter format(printer, variables_); + format("ptr->$name$_.InitDefault();\n"); + if (IsString(descriptor_, options_) && + descriptor_->default_value_string().empty()) { + format( + "#ifdef PROTOBUF_FORCE_COPY_DEFAULT_STRING\n" + " ptr->$name$_.Set(\"\", GetArenaForAllocation());\n" + "#endif // PROTOBUF_FORCE_COPY_DEFAULT_STRING\n"); + } +} + void StringFieldGenerator::GenerateCopyConstructorCode( io::Printer* printer) const { Formatter format(printer, variables_); @@ -474,6 +494,10 @@ void StringFieldGenerator::GenerateCopyConstructorCode( void StringFieldGenerator::GenerateDestructorCode(io::Printer* printer) const { Formatter format(printer, variables_); if (!inlined_) { + if (ShouldSplit(descriptor_, options_)) { + format("$cached_split_ptr$->$name$_.Destroy();\n"); + return; + } format("$field$.Destroy();\n"); return; } @@ -481,6 +505,7 @@ void StringFieldGenerator::GenerateDestructorCode(io::Printer* printer) const { // Destructor has been implicitly skipped as a union, and even the // message-owned arena is enabled, arena could still be missing for // Arena::CreateMessage(nullptr). + GOOGLE_DCHECK(!ShouldSplit(descriptor_, options_)); format("$field$.~InlinedStringField();\n"); } @@ -541,6 +566,11 @@ void StringFieldGenerator::GenerateConstexprAggregateInitializer( void StringFieldGenerator::GenerateAggregateInitializer( io::Printer* printer) const { Formatter format(printer, variables_); + if (ShouldSplit(descriptor_, options_)) { + GOOGLE_CHECK(!inlined_); + format("decltype(Impl_::Split::$name$_){}"); + return; + } if (!inlined_) { format("decltype($field$){}"); } else { diff --git a/src/google/protobuf/compiler/cpp/string_field.h b/src/google/protobuf/compiler/cpp/string_field.h index 0de057384a1d..db5f18bfb7de 100644 --- a/src/google/protobuf/compiler/cpp/string_field.h +++ b/src/google/protobuf/compiler/cpp/string_field.h @@ -63,6 +63,7 @@ class StringFieldGenerator : public FieldGenerator { void GenerateMergingCode(io::Printer* printer) const override; void GenerateSwappingCode(io::Printer* printer) const override; void GenerateConstructorCode(io::Printer* printer) const override; + void GenerateCreateSplitMessageCode(io::Printer* printer) const override; void GenerateCopyConstructorCode(io::Printer* printer) const override; void GenerateDestructorCode(io::Printer* printer) const override; void GenerateArenaDestructorCode(io::Printer* printer) const override; @@ -115,7 +116,9 @@ class RepeatedStringFieldGenerator : public FieldGenerator { void GenerateMergingCode(io::Printer* printer) const override; void GenerateSwappingCode(io::Printer* printer) const override; void GenerateConstructorCode(io::Printer* printer) const override {} - void GenerateCopyConstructorCode(io::Printer* printer) const override {} + void GenerateCopyConstructorCode(io::Printer* /*printer*/) const override { + GOOGLE_CHECK(!ShouldSplit(descriptor_, options_)); + } void GenerateDestructorCode(io::Printer* printer) const override; void GenerateSerializeWithCachedSizesToArray( io::Printer* printer) const override; diff --git a/src/google/protobuf/unittest.proto b/src/google/protobuf/unittest.proto index 44f8a4861b20..f1b0bd04baad 100644 --- a/src/google/protobuf/unittest.proto +++ b/src/google/protobuf/unittest.proto @@ -1286,4 +1286,133 @@ message TestExtensionRangeSerialize { } } +message TestVerifyInt32Simple { + optional int32 optional_int32_1 = 1; + optional int32 optional_int32_2 = 2; + optional int32 optional_int32_63 = 63; + optional int32 optional_int32_64 = 64; +} + +message TestVerifyInt32 { + optional int32 optional_int32_1 = 1; + optional int32 optional_int32_2 = 2; + optional int32 optional_int32_63 = 63; + optional int32 optional_int32_64 = 64; + + optional TestAllTypes optional_all_types = 9; + repeated TestAllTypes repeated_all_types = 10; +} + +message TestVerifyMostlyInt32 { + optional int64 optional_int64_30 = 30; + + optional int32 optional_int32_1 = 1; + optional int32 optional_int32_2 = 2; + optional int32 optional_int32_3 = 3; + optional int32 optional_int32_4 = 4; + optional int32 optional_int32_63 = 63; + optional int32 optional_int32_64 = 64; + + optional TestAllTypes optional_all_types = 9; + repeated TestAllTypes repeated_all_types = 10; +} + +message TestVerifyMostlyInt32BigFieldNumber { + optional int64 optional_int64_30 = 30; + optional int32 optional_int32_300 = 300; + + optional int32 optional_int32_1 = 1; + optional int32 optional_int32_2 = 2; + optional int32 optional_int32_3 = 3; + optional int32 optional_int32_4 = 4; + optional int32 optional_int32_63 = 63; + optional int32 optional_int32_64 = 64; + + optional TestAllTypes optional_all_types = 9; + repeated TestAllTypes repeated_all_types = 10; +} + +message TestVerifyUint32Simple { + optional uint32 optional_uint32_1 = 1; + optional uint32 optional_uint32_2 = 2; + optional uint32 optional_uint32_63 = 63; + optional uint32 optional_uint32_64 = 64; +} + +message TestVerifyUint32 { + optional uint32 optional_uint32_1 = 1; + optional uint32 optional_uint32_2 = 2; + optional uint32 optional_uint32_63 = 63; + optional uint32 optional_uint32_64 = 64; + + optional TestAllTypes optional_all_types = 9; + repeated TestAllTypes repeated_all_types = 10; +} + +message TestVerifyOneUint32 { + optional uint32 optional_uint32_1 = 1; + optional int32 optional_int32_2 = 2; + optional int32 optional_int32_63 = 63; + optional int32 optional_int32_64 = 64; + + optional TestAllTypes optional_all_types = 9; + repeated TestAllTypes repeated_all_types = 10; +} + +message TestVerifyOneInt32BigFieldNumber { + optional int32 optional_int32_65 = 65; + + optional int64 optional_int64_1 = 1; + optional int64 optional_int64_2 = 2; + optional int64 optional_int64_63 = 63; + optional int64 optional_int64_64 = 64; + + optional TestAllTypes optional_all_types = 9; + repeated TestAllTypes repeated_all_types = 10; +} + +message TestVerifyInt32BigFieldNumber { + optional int32 optional_int32_1000 = 1000; + optional int32 optional_int32_65 = 65; + + optional int32 optional_int32_1 = 1; + optional int32 optional_int32_2 = 2; + optional int32 optional_int32_63 = 63; + optional int32 optional_int32_64 = 64; + + optional TestAllTypes optional_all_types = 9; + repeated TestAllTypes repeated_all_types = 10; +} + +message TestVerifyUint32BigFieldNumber { + optional uint32 optional_uint32_1000 = 1000; + optional uint32 optional_uint32_65 = 65; + + optional uint32 optional_uint32_1 = 1; + optional uint32 optional_uint32_2 = 2; + optional uint32 optional_uint32_63 = 63; + optional uint32 optional_uint32_64 = 64; + + optional TestAllTypes optional_all_types = 9; + repeated TestAllTypes repeated_all_types = 10; +} + +message TestVerifyBigFieldNumberUint32 { + message Nested { + optional uint32 optional_uint32_5000 = 5000; + optional uint32 optional_uint32_1000 = 1000; + optional uint32 optional_uint32_66 = 66; + optional uint32 optional_uint32_65 = 65; + + optional uint32 optional_uint32_1 = 1; + optional uint32 optional_uint32_2 = 2; + optional uint32 optional_uint32_63 = 63; + optional uint32 optional_uint32_64 = 64; + + optional Nested optional_nested = 9; + repeated Nested repeated_nested = 10; + } + optional Nested optional_nested = 1; +} +