You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

message_differencer.h 47 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // Author: jschorr@google.com (Joseph Schorr)
  31. // Based on original Protocol Buffers design by
  32. // Sanjay Ghemawat, Jeff Dean, and others.
  33. //
  34. // This file defines static methods and classes for comparing Protocol
  35. // Messages.
  36. //
  37. // Aug. 2008: Added Unknown Fields Comparison for messages.
  38. // Aug. 2009: Added different options to compare repeated fields.
  39. // Apr. 2010: Moved field comparison to FieldComparator
  40. // Sep. 2020: Added option to output map keys in path
  41. #ifndef GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
  42. #define GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
  43. #include <functional>
  44. #include <map>
  45. #include <memory>
  46. #include <set>
  47. #include <string>
  48. #include <vector>
  49. #include <google/protobuf/descriptor.h> // FieldDescriptor
  50. #include <google/protobuf/message.h> // Message
  51. #include <google/protobuf/unknown_field_set.h>
  52. #include <google/protobuf/util/field_comparator.h>
  53. // Always include as last one, otherwise it can break compilation
  54. #include <google/protobuf/port_def.inc>
  55. namespace google {
  56. namespace protobuf {
  57. class DynamicMessageFactory;
  58. class FieldDescriptor;
  59. namespace io {
  60. class ZeroCopyOutputStream;
  61. class Printer;
  62. } // namespace io
  63. namespace util {
  64. class DefaultFieldComparator;
  65. class FieldContext; // declared below MessageDifferencer
  66. // Defines a collection of field descriptors.
  67. // In case of internal google codebase we are using absl::FixedArray instead
  68. // of vector. It significantly speeds up proto comparison (by ~30%) by
  69. // reducing the number of malloc/free operations
  70. typedef std::vector<const FieldDescriptor*> FieldDescriptorArray;
  71. // A basic differencer that can be used to determine
  72. // the differences between two specified Protocol Messages. If any differences
  73. // are found, the Compare method will return false, and any differencer reporter
  74. // specified via ReportDifferencesTo will have its reporting methods called (see
  75. // below for implementation of the report). Based off of the original
  76. // ProtocolDifferencer implementation in //net/proto/protocol-differencer.h
  77. // (Thanks Todd!).
  78. //
  79. // MessageDifferencer REQUIRES that compared messages be the same type, defined
  80. // as messages that share the same descriptor. If not, the behavior of this
  81. // class is undefined.
  82. //
  83. // People disagree on what MessageDifferencer should do when asked to compare
  84. // messages with different descriptors. Some people think it should always
  85. // return false. Others expect it to try to look for similar fields and
  86. // compare them anyway -- especially if the descriptors happen to be identical.
  87. // If we chose either of these behaviors, some set of people would find it
  88. // surprising, and could end up writing code expecting the other behavior
  89. // without realizing their error. Therefore, we forbid that usage.
  90. //
  91. // This class is implemented based on the proto2 reflection. The performance
  92. // should be good enough for normal usages. However, for places where the
  93. // performance is extremely sensitive, there are several alternatives:
  94. // - Comparing serialized string
  95. // Downside: false negatives (there are messages that are the same but their
  96. // serialized strings are different).
  97. // - Equals code generator by compiler plugin (net/proto2/contrib/equals_plugin)
  98. // Downside: more generated code; maintenance overhead for the additional rule
  99. // (must be in sync with the original proto_library).
  100. //
  101. // Note on handling of google.protobuf.Any: MessageDifferencer automatically
  102. // unpacks Any::value into a Message and compares its individual fields.
  103. // Messages encoded in a repeated Any cannot be compared using TreatAsMap.
  104. //
  105. // Note on thread-safety: MessageDifferencer is *not* thread-safe. You need to
  106. // guard it with a lock to use the same MessageDifferencer instance from
  107. // multiple threads. Note that it's fine to call static comparison methods
  108. // (like MessageDifferencer::Equals) concurrently, but it's not recommended for
  109. // performance critical code as it leads to extra allocations.
  110. class PROTOBUF_EXPORT MessageDifferencer {
  111. public:
  112. // Determines whether the supplied messages are equal. Equality is defined as
  113. // all fields within the two messages being set to the same value. Primitive
  114. // fields and strings are compared by value while embedded messages/groups
  115. // are compared as if via a recursive call. Use Compare() with IgnoreField()
  116. // if some fields should be ignored in the comparison. Use Compare() with
  117. // TreatAsSet() if there are repeated fields where ordering does not matter.
  118. //
  119. // This method REQUIRES that the two messages have the same
  120. // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
  121. static bool Equals(const Message& message1, const Message& message2);
  122. // Determines whether the supplied messages are equivalent. Equivalency is
  123. // defined as all fields within the two messages having the same value. This
  124. // differs from the Equals method above in that fields with default values
  125. // are considered set to said value automatically. For details on how default
  126. // values are defined for each field type, see:
  127. // https://developers.google.com/protocol-buffers/docs/proto?csw=1#optional.
  128. // Also, Equivalent() ignores unknown fields. Use IgnoreField() and Compare()
  129. // if some fields should be ignored in the comparison.
  130. //
  131. // This method REQUIRES that the two messages have the same
  132. // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
  133. static bool Equivalent(const Message& message1, const Message& message2);
  134. // Determines whether the supplied messages are approximately equal.
  135. // Approximate equality is defined as all fields within the two messages
  136. // being approximately equal. Primitive (non-float) fields and strings are
  137. // compared by value, floats are compared using MathUtil::AlmostEquals() and
  138. // embedded messages/groups are compared as if via a recursive call. Use
  139. // IgnoreField() and Compare() if some fields should be ignored in the
  140. // comparison.
  141. //
  142. // This method REQUIRES that the two messages have the same
  143. // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
  144. static bool ApproximatelyEquals(const Message& message1,
  145. const Message& message2);
  146. // Determines whether the supplied messages are approximately equivalent.
  147. // Approximate equivalency is defined as all fields within the two messages
  148. // being approximately equivalent. As in
  149. // MessageDifferencer::ApproximatelyEquals, primitive (non-float) fields and
  150. // strings are compared by value, floats are compared using
  151. // MathUtil::AlmostEquals() and embedded messages/groups are compared as if
  152. // via a recursive call. However, fields with default values are considered
  153. // set to said value, as per MessageDiffencer::Equivalent. Use IgnoreField()
  154. // and Compare() if some fields should be ignored in the comparison.
  155. //
  156. // This method REQUIRES that the two messages have the same
  157. // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
  158. static bool ApproximatelyEquivalent(const Message& message1,
  159. const Message& message2);
  160. // Identifies an individual field in a message instance. Used for field_path,
  161. // below.
  162. struct SpecificField {
  163. // For known fields, "field" is filled in and "unknown_field_number" is -1.
  164. // For unknown fields, "field" is NULL, "unknown_field_number" is the field
  165. // number, and "unknown_field_type" is its type.
  166. const FieldDescriptor* field = nullptr;
  167. int unknown_field_number = -1;
  168. UnknownField::Type unknown_field_type = UnknownField::Type::TYPE_VARINT;
  169. // If this a repeated field, "index" is the index within it. For unknown
  170. // fields, this is the index of the field among all unknown fields of the
  171. // same field number and type.
  172. int index = -1;
  173. // If "field" is a repeated field which is being treated as a map or
  174. // a set (see TreatAsMap() and TreatAsSet(), below), new_index indicates
  175. // the index the position to which the element has moved. If the element
  176. // has not moved, "new_index" will have the same value as "index".
  177. int new_index = -1;
  178. // If "field" is a map field, point to the map entry.
  179. const Message* map_entry1 = nullptr;
  180. const Message* map_entry2 = nullptr;
  181. // For unknown fields, these are the pointers to the UnknownFieldSet
  182. // containing the unknown fields. In certain cases (e.g. proto1's
  183. // MessageSet, or nested groups of unknown fields), these may differ from
  184. // the messages' internal UnknownFieldSets.
  185. const UnknownFieldSet* unknown_field_set1 = nullptr;
  186. const UnknownFieldSet* unknown_field_set2 = nullptr;
  187. // For unknown fields, these are the index of the field within the
  188. // UnknownFieldSets. One or the other will be -1 when
  189. // reporting an addition or deletion.
  190. int unknown_field_index1 = -1;
  191. int unknown_field_index2 = -1;
  192. };
  193. // Abstract base class from which all MessageDifferencer
  194. // reporters derive. The five Report* methods below will be called when
  195. // a field has been added, deleted, modified, moved, or matched. The third
  196. // argument is a vector of FieldDescriptor pointers which describes the chain
  197. // of fields that was taken to find the current field. For example, for a
  198. // field found in an embedded message, the vector will contain two
  199. // FieldDescriptors. The first will be the field of the embedded message
  200. // itself and the second will be the actual field in the embedded message
  201. // that was added/deleted/modified.
  202. // Fields will be reported in PostTraversalOrder.
  203. // For example, given following proto, if both baz and mooo are changed.
  204. // foo {
  205. // bar {
  206. // baz: 1
  207. // mooo: 2
  208. // }
  209. // }
  210. // ReportModified will be invoked with following order:
  211. // 1. foo.bar.baz or foo.bar.mooo
  212. // 2. foo.bar.mooo or foo.bar.baz
  213. // 2. foo.bar
  214. // 3. foo
  215. class PROTOBUF_EXPORT Reporter {
  216. public:
  217. Reporter();
  218. virtual ~Reporter();
  219. // Reports that a field has been added into Message2.
  220. virtual void ReportAdded(const Message& message1, const Message& message2,
  221. const std::vector<SpecificField>& field_path) = 0;
  222. // Reports that a field has been deleted from Message1.
  223. virtual void ReportDeleted(
  224. const Message& message1, const Message& message2,
  225. const std::vector<SpecificField>& field_path) = 0;
  226. // Reports that the value of a field has been modified.
  227. virtual void ReportModified(
  228. const Message& message1, const Message& message2,
  229. const std::vector<SpecificField>& field_path) = 0;
  230. // Reports that a repeated field has been moved to another location. This
  231. // only applies when using TreatAsSet or TreatAsMap() -- see below. Also
  232. // note that for any given field, ReportModified and ReportMoved are
  233. // mutually exclusive. If a field has been both moved and modified, then
  234. // only ReportModified will be called.
  235. virtual void ReportMoved(
  236. const Message& /* message1 */, const Message& /* message2 */,
  237. const std::vector<SpecificField>& /* field_path */) {}
  238. // Reports that two fields match. Useful for doing side-by-side diffs.
  239. // This function is mutually exclusive with ReportModified and ReportMoved.
  240. // Note that you must call set_report_matches(true) before calling Compare
  241. // to make use of this function.
  242. virtual void ReportMatched(
  243. const Message& /* message1 */, const Message& /* message2 */,
  244. const std::vector<SpecificField>& /* field_path */) {}
  245. // Reports that two fields would have been compared, but the
  246. // comparison has been skipped because the field was marked as
  247. // 'ignored' using IgnoreField(). This function is mutually
  248. // exclusive with all the other Report() functions.
  249. //
  250. // The contract of ReportIgnored is slightly different than the
  251. // other Report() functions, in that |field_path.back().index| is
  252. // always equal to -1, even if the last field is repeated. This is
  253. // because while the other Report() functions indicate where in a
  254. // repeated field the action (Addition, Deletion, etc...)
  255. // happened, when a repeated field is 'ignored', the differencer
  256. // simply calls ReportIgnored on the repeated field as a whole and
  257. // moves on without looking at its individual elements.
  258. //
  259. // Furthermore, ReportIgnored() does not indicate whether the
  260. // fields were in fact equal or not, as Compare() does not inspect
  261. // these fields at all. It is up to the Reporter to decide whether
  262. // the fields are equal or not (perhaps with a second call to
  263. // Compare()), if it cares.
  264. virtual void ReportIgnored(
  265. const Message& /* message1 */, const Message& /* message2 */,
  266. const std::vector<SpecificField>& /* field_path */) {}
  267. // Report that an unknown field is ignored. (see comment above).
  268. // Note this is a different function since the last SpecificField in field
  269. // path has a null field. This could break existing Reporter.
  270. virtual void ReportUnknownFieldIgnored(
  271. const Message& /* message1 */, const Message& /* message2 */,
  272. const std::vector<SpecificField>& /* field_path */) {}
  273. private:
  274. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Reporter);
  275. };
  276. // MapKeyComparator is used to determine if two elements have the same key
  277. // when comparing elements of a repeated field as a map.
  278. class PROTOBUF_EXPORT MapKeyComparator {
  279. public:
  280. MapKeyComparator();
  281. virtual ~MapKeyComparator();
  282. virtual bool IsMatch(
  283. const Message& /* message1 */, const Message& /* message2 */,
  284. const std::vector<SpecificField>& /* parent_fields */) const {
  285. GOOGLE_CHECK(false) << "IsMatch() is not implemented.";
  286. return false;
  287. }
  288. private:
  289. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MapKeyComparator);
  290. };
  291. // Abstract base class from which all IgnoreCriteria derive.
  292. // By adding IgnoreCriteria more complex ignore logic can be implemented.
  293. // IgnoreCriteria are registered with AddIgnoreCriteria. For each compared
  294. // field IsIgnored is called on each added IgnoreCriteria until one returns
  295. // true or all return false.
  296. // IsIgnored is called for fields where at least one side has a value.
  297. class PROTOBUF_EXPORT IgnoreCriteria {
  298. public:
  299. IgnoreCriteria();
  300. virtual ~IgnoreCriteria();
  301. // Returns true if the field should be ignored.
  302. virtual bool IsIgnored(
  303. const Message& /* message1 */, const Message& /* message2 */,
  304. const FieldDescriptor* /* field */,
  305. const std::vector<SpecificField>& /* parent_fields */) = 0;
  306. // Returns true if the unknown field should be ignored.
  307. // Note: This will be called for unknown fields as well in which case
  308. // field.field will be null.
  309. virtual bool IsUnknownFieldIgnored(
  310. const Message& /* message1 */, const Message& /* message2 */,
  311. const SpecificField& /* field */,
  312. const std::vector<SpecificField>& /* parent_fields */) {
  313. return false;
  314. }
  315. };
  316. // To add a Reporter, construct default here, then use ReportDifferencesTo or
  317. // ReportDifferencesToString.
  318. explicit MessageDifferencer();
  319. ~MessageDifferencer();
  320. enum MessageFieldComparison {
  321. EQUAL, // Fields must be present in both messages
  322. // for the messages to be considered the same.
  323. EQUIVALENT, // Fields with default values are considered set
  324. // for comparison purposes even if not explicitly
  325. // set in the messages themselves. Unknown fields
  326. // are ignored.
  327. };
  328. enum Scope {
  329. FULL, // All fields of both messages are considered in the comparison.
  330. PARTIAL // Only fields present in the first message are considered; fields
  331. // set only in the second message will be skipped during
  332. // comparison.
  333. };
  334. // DEPRECATED. Use FieldComparator::FloatComparison instead.
  335. enum FloatComparison {
  336. EXACT, // Floats and doubles are compared exactly.
  337. APPROXIMATE // Floats and doubles are compared using the
  338. // MathUtil::AlmostEquals method.
  339. };
  340. enum RepeatedFieldComparison {
  341. AS_LIST, // Repeated fields are compared in order. Differing values at
  342. // the same index are reported using ReportModified(). If the
  343. // repeated fields have different numbers of elements, the
  344. // unpaired elements are reported using ReportAdded() or
  345. // ReportDeleted().
  346. AS_SET, // Treat all the repeated fields as sets.
  347. // See TreatAsSet(), as below.
  348. AS_SMART_LIST, // Similar to AS_SET, but preserve the order and find the
  349. // longest matching sequence from the first matching
  350. // element. To use an optimal solution, call
  351. // SetMatchIndicesForSmartListCallback() to pass it in.
  352. AS_SMART_SET, // Similar to AS_SET, but match elements with fewest diffs.
  353. };
  354. // The elements of the given repeated field will be treated as a set for
  355. // diffing purposes, so different orderings of the same elements will be
  356. // considered equal. Elements which are present on both sides of the
  357. // comparison but which have changed position will be reported with
  358. // ReportMoved(). Elements which only exist on one side or the other are
  359. // reported with ReportAdded() and ReportDeleted() regardless of their
  360. // positions. ReportModified() is never used for this repeated field. If
  361. // the only differences between the compared messages is that some fields
  362. // have been moved, then the comparison returns true.
  363. //
  364. // Note that despite the name of this method, this is really
  365. // comparison as multisets: if one side of the comparison has a duplicate
  366. // in the repeated field but the other side doesn't, this will count as
  367. // a mismatch.
  368. //
  369. // If the scope of comparison is set to PARTIAL, then in addition to what's
  370. // above, extra values added to repeated fields of the second message will
  371. // not cause the comparison to fail.
  372. //
  373. // Note that set comparison is currently O(k * n^2) (where n is the total
  374. // number of elements, and k is the average size of each element). In theory
  375. // it could be made O(n * k) with a more complex hashing implementation. Feel
  376. // free to contribute one if the current implementation is too slow for you.
  377. // If partial matching is also enabled, the time complexity will be O(k * n^2
  378. // + n^3) in which n^3 is the time complexity of the maximum matching
  379. // algorithm.
  380. //
  381. // REQUIRES: field->is_repeated() and field not registered with TreatAsMap*
  382. void TreatAsSet(const FieldDescriptor* field);
  383. void TreatAsSmartSet(const FieldDescriptor* field);
  384. // The elements of the given repeated field will be treated as a list for
  385. // diffing purposes, so different orderings of the same elements will NOT be
  386. // considered equal.
  387. //
  388. // REQUIRES: field->is_repeated() and field not registered with TreatAsMap*
  389. void TreatAsList(const FieldDescriptor* field);
  390. // Note that the complexity is similar to treating as SET.
  391. void TreatAsSmartList(const FieldDescriptor* field);
  392. // The elements of the given repeated field will be treated as a map for
  393. // diffing purposes, with |key| being the map key. Thus, elements with the
  394. // same key will be compared even if they do not appear at the same index.
  395. // Differences are reported similarly to TreatAsSet(), except that
  396. // ReportModified() is used to report elements with the same key but
  397. // different values. Note that if an element is both moved and modified,
  398. // only ReportModified() will be called. As with TreatAsSet, if the only
  399. // differences between the compared messages is that some fields have been
  400. // moved, then the comparison returns true. See TreatAsSet for notes on
  401. // performance.
  402. //
  403. // REQUIRES: field->is_repeated()
  404. // REQUIRES: field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE
  405. // REQUIRES: key->containing_type() == field->message_type()
  406. void TreatAsMap(const FieldDescriptor* field, const FieldDescriptor* key);
  407. // Same as TreatAsMap except that this method will use multiple fields as
  408. // the key in comparison. All specified fields in 'key_fields' should be
  409. // present in the compared elements. Two elements will be treated as having
  410. // the same key iff they have the same value for every specified field. There
  411. // are two steps in the comparison process. The first one is key matching.
  412. // Every element from one message will be compared to every element from
  413. // the other message. Only fields in 'key_fields' are compared in this step
  414. // to decide if two elements have the same key. The second step is value
  415. // comparison. Those pairs of elements with the same key (with equal value
  416. // for every field in 'key_fields') will be compared in this step.
  417. // Time complexity of the first step is O(s * m * n ^ 2) where s is the
  418. // average size of the fields specified in 'key_fields', m is the number of
  419. // fields in 'key_fields' and n is the number of elements. If partial
  420. // matching is enabled, an extra O(n^3) will be incured by the maximum
  421. // matching algorithm. The second step is O(k * n) where k is the average
  422. // size of each element.
  423. void TreatAsMapWithMultipleFieldsAsKey(
  424. const FieldDescriptor* field,
  425. const std::vector<const FieldDescriptor*>& key_fields);
  426. // Same as TreatAsMapWithMultipleFieldsAsKey, except that each of the field
  427. // do not necessarily need to be a direct subfield. Each element in
  428. // key_field_paths indicate a path from the message being compared, listing
  429. // successive subfield to reach the key field.
  430. //
  431. // REQUIRES:
  432. // for key_field_path in key_field_paths:
  433. // key_field_path[0]->containing_type() == field->message_type()
  434. // for i in [0, key_field_path.size() - 1):
  435. // key_field_path[i+1]->containing_type() ==
  436. // key_field_path[i]->message_type()
  437. // key_field_path[i]->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE
  438. // !key_field_path[i]->is_repeated()
  439. void TreatAsMapWithMultipleFieldPathsAsKey(
  440. const FieldDescriptor* field,
  441. const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths);
  442. // Uses a custom MapKeyComparator to determine if two elements have the same
  443. // key when comparing a repeated field as a map.
  444. // The caller is responsible to delete the key_comparator.
  445. // This method varies from TreatAsMapWithMultipleFieldsAsKey only in the
  446. // first key matching step. Rather than comparing some specified fields, it
  447. // will invoke the IsMatch method of the given 'key_comparator' to decide if
  448. // two elements have the same key.
  449. void TreatAsMapUsingKeyComparator(const FieldDescriptor* field,
  450. const MapKeyComparator* key_comparator);
  451. // Initiates and returns a new instance of MultipleFieldsMapKeyComparator.
  452. MapKeyComparator* CreateMultipleFieldsMapKeyComparator(
  453. const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths);
  454. // Add a custom ignore criteria that is evaluated in addition to the
  455. // ignored fields added with IgnoreField.
  456. // Takes ownership of ignore_criteria.
  457. void AddIgnoreCriteria(IgnoreCriteria* ignore_criteria);
  458. // Indicates that any field with the given descriptor should be
  459. // ignored for the purposes of comparing two messages. This applies
  460. // to fields nested in the message structure as well as top level
  461. // ones. When the MessageDifferencer encounters an ignored field,
  462. // ReportIgnored is called on the reporter, if one is specified.
  463. //
  464. // The only place where the field's 'ignored' status is not applied is when
  465. // it is being used as a key in a field passed to TreatAsMap or is one of
  466. // the fields passed to TreatAsMapWithMultipleFieldsAsKey.
  467. // In this case it is compared in key matching but after that it's ignored
  468. // in value comparison.
  469. void IgnoreField(const FieldDescriptor* field);
  470. // Sets the field comparator used to determine differences between protocol
  471. // buffer fields. By default it's set to a DefaultFieldComparator instance.
  472. // MessageDifferencer doesn't take ownership over the passed object.
  473. // Note that this method must be called before Compare for the comparator to
  474. // be used.
  475. void set_field_comparator(FieldComparator* comparator);
  476. #ifdef PROTOBUF_FUTURE_BREAKING_CHANGES
  477. void set_field_comparator(DefaultFieldComparator* comparator);
  478. #endif // PROTOBUF_FUTURE_BREAKING_CHANGES
  479. // DEPRECATED. Pass a DefaultFieldComparator instance instead.
  480. // Sets the fraction and margin for the float comparison of a given field.
  481. // Uses MathUtil::WithinFractionOrMargin to compare the values.
  482. // NOTE: this method does nothing if differencer's field comparator has been
  483. // set to a custom object.
  484. //
  485. // REQUIRES: field->cpp_type == FieldDescriptor::CPPTYPE_DOUBLE or
  486. // field->cpp_type == FieldDescriptor::CPPTYPE_FLOAT
  487. // REQUIRES: float_comparison_ == APPROXIMATE
  488. void SetFractionAndMargin(const FieldDescriptor* field, double fraction,
  489. double margin);
  490. // Sets the type of comparison (as defined in the MessageFieldComparison
  491. // enumeration above) that is used by this differencer when determining how
  492. // to compare fields in messages.
  493. void set_message_field_comparison(MessageFieldComparison comparison);
  494. // Returns the current message field comparison used in this differencer.
  495. MessageFieldComparison message_field_comparison() const;
  496. // Tells the differencer whether or not to report matches. This method must
  497. // be called before Compare. The default for a new differencer is false.
  498. void set_report_matches(bool report_matches) {
  499. report_matches_ = report_matches;
  500. }
  501. // Tells the differencer whether or not to report moves (in a set or map
  502. // repeated field). This method must be called before Compare. The default for
  503. // a new differencer is true.
  504. void set_report_moves(bool report_moves) { report_moves_ = report_moves; }
  505. // Tells the differencer whether or not to report ignored values. This method
  506. // must be called before Compare. The default for a new differencer is true.
  507. void set_report_ignores(bool report_ignores) {
  508. report_ignores_ = report_ignores;
  509. }
  510. // Sets the scope of the comparison (as defined in the Scope enumeration
  511. // above) that is used by this differencer when determining which fields to
  512. // compare between the messages.
  513. void set_scope(Scope scope);
  514. // Returns the current scope used by this differencer.
  515. Scope scope() const;
  516. // DEPRECATED. Pass a DefaultFieldComparator instance instead.
  517. // Sets the type of comparison (as defined in the FloatComparison enumeration
  518. // above) that is used by this differencer when comparing float (and double)
  519. // fields in messages.
  520. // NOTE: this method does nothing if differencer's field comparator has been
  521. // set to a custom object.
  522. void set_float_comparison(FloatComparison comparison);
  523. // Sets the type of comparison for repeated field (as defined in the
  524. // RepeatedFieldComparison enumeration above) that is used by this
  525. // differencer when compare repeated fields in messages.
  526. void set_repeated_field_comparison(RepeatedFieldComparison comparison);
  527. // Returns the current repeated field comparison used by this differencer.
  528. RepeatedFieldComparison repeated_field_comparison() const;
  529. // Compares the two specified messages, returning true if they are the same,
  530. // false otherwise. If this method returns false, any changes between the
  531. // two messages will be reported if a Reporter was specified via
  532. // ReportDifferencesTo (see also ReportDifferencesToString).
  533. //
  534. // This method REQUIRES that the two messages have the same
  535. // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
  536. bool Compare(const Message& message1, const Message& message2);
  537. // Same as above, except comparing only the list of fields specified by the
  538. // two vectors of FieldDescriptors.
  539. bool CompareWithFields(
  540. const Message& message1, const Message& message2,
  541. const std::vector<const FieldDescriptor*>& message1_fields,
  542. const std::vector<const FieldDescriptor*>& message2_fields);
  543. // Automatically creates a reporter that will output the differences
  544. // found (if any) to the specified output string pointer. Note that this
  545. // method must be called before Compare.
  546. void ReportDifferencesToString(std::string* output);
  547. // Tells the MessageDifferencer to report differences via the specified
  548. // reporter. Note that this method must be called before Compare for
  549. // the reporter to be used. It is the responsibility of the caller to delete
  550. // this object.
  551. // If the provided pointer equals NULL, the MessageDifferencer stops reporting
  552. // differences to any previously set reporters or output strings.
  553. void ReportDifferencesTo(Reporter* reporter);
  554. private:
  555. // Class for processing Any deserialization. This logic is used by both the
  556. // MessageDifferencer and StreamReporter classes.
  557. class UnpackAnyField {
  558. private:
  559. std::unique_ptr<DynamicMessageFactory> dynamic_message_factory_;
  560. public:
  561. UnpackAnyField() = default;
  562. ~UnpackAnyField() = default;
  563. // If "any" is of type google.protobuf.Any, extract its payload using
  564. // DynamicMessageFactory and store in "data".
  565. bool UnpackAny(const Message& any, std::unique_ptr<Message>* data);
  566. };
  567. public:
  568. // An implementation of the MessageDifferencer Reporter that outputs
  569. // any differences found in human-readable form to the supplied
  570. // ZeroCopyOutputStream or Printer. If a printer is used, the delimiter
  571. // *must* be '$'.
  572. //
  573. // WARNING: this reporter does not necessarily flush its output until it is
  574. // destroyed. As a result, it is not safe to assume the output is valid or
  575. // complete until after you destroy the reporter. For example, if you use a
  576. // StreamReporter to write to a StringOutputStream, the target string may
  577. // contain uninitialized data until the reporter is destroyed.
  578. class PROTOBUF_EXPORT StreamReporter : public Reporter {
  579. public:
  580. explicit StreamReporter(io::ZeroCopyOutputStream* output);
  581. explicit StreamReporter(io::Printer* printer); // delimiter '$'
  582. ~StreamReporter() override;
  583. // When set to true, the stream reporter will also output aggregates nodes
  584. // (i.e. messages and groups) whose subfields have been modified. When
  585. // false, will only report the individual subfields. Defaults to false.
  586. void set_report_modified_aggregates(bool report) {
  587. report_modified_aggregates_ = report;
  588. }
  589. // The following are implementations of the methods described above.
  590. void ReportAdded(const Message& message1, const Message& message2,
  591. const std::vector<SpecificField>& field_path) override;
  592. void ReportDeleted(const Message& message1, const Message& message2,
  593. const std::vector<SpecificField>& field_path) override;
  594. void ReportModified(const Message& message1, const Message& message2,
  595. const std::vector<SpecificField>& field_path) override;
  596. void ReportMoved(const Message& message1, const Message& message2,
  597. const std::vector<SpecificField>& field_path) override;
  598. void ReportMatched(const Message& message1, const Message& message2,
  599. const std::vector<SpecificField>& field_path) override;
  600. void ReportIgnored(const Message& message1, const Message& message2,
  601. const std::vector<SpecificField>& field_path) override;
  602. void ReportUnknownFieldIgnored(
  603. const Message& message1, const Message& message2,
  604. const std::vector<SpecificField>& field_path) override;
  605. // Messages that are being compared must be provided to StreamReporter prior
  606. // to processing
  607. void SetMessages(const Message& message1, const Message& message2);
  608. protected:
  609. // Prints the specified path of fields to the buffer.
  610. virtual void PrintPath(const std::vector<SpecificField>& field_path,
  611. bool left_side);
  612. // Prints the value of fields to the buffer. left_side is true if the
  613. // given message is from the left side of the comparison, false if it
  614. // was the right. This is relevant only to decide whether to follow
  615. // unknown_field_index1 or unknown_field_index2 when an unknown field
  616. // is encountered in field_path.
  617. virtual void PrintValue(const Message& message,
  618. const std::vector<SpecificField>& field_path,
  619. bool left_side);
  620. // Prints the specified path of unknown fields to the buffer.
  621. virtual void PrintUnknownFieldValue(const UnknownField* unknown_field);
  622. // Just print a string
  623. void Print(const std::string& str);
  624. private:
  625. // helper function for PrintPath that contains logic for printing maps
  626. void PrintMapKey(bool left_side, const SpecificField& specific_field);
  627. io::Printer* printer_;
  628. bool delete_printer_;
  629. bool report_modified_aggregates_;
  630. const Message* message1_;
  631. const Message* message2_;
  632. MessageDifferencer::UnpackAnyField unpack_any_field_;
  633. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(StreamReporter);
  634. };
  635. private:
  636. friend class SimpleFieldComparator;
  637. // A MapKeyComparator to be used in TreatAsMapUsingKeyComparator.
  638. // Implementation of this class needs to do field value comparison which
  639. // relies on some private methods of MessageDifferencer. That's why this
  640. // class is declared as a nested class of MessageDifferencer.
  641. class MultipleFieldsMapKeyComparator;
  642. // A MapKeyComparator for use with map_entries.
  643. class PROTOBUF_EXPORT MapEntryKeyComparator : public MapKeyComparator {
  644. public:
  645. explicit MapEntryKeyComparator(MessageDifferencer* message_differencer);
  646. bool IsMatch(
  647. const Message& message1, const Message& message2,
  648. const std::vector<SpecificField>& parent_fields) const override;
  649. private:
  650. MessageDifferencer* message_differencer_;
  651. };
  652. // Returns true if field1's number() is less than field2's.
  653. static bool FieldBefore(const FieldDescriptor* field1,
  654. const FieldDescriptor* field2);
  655. // Retrieve all the set fields, including extensions.
  656. FieldDescriptorArray RetrieveFields(const Message& message,
  657. bool base_message);
  658. // Combine the two lists of fields into the combined_fields output vector.
  659. // All fields present in both lists will always be included in the combined
  660. // list. Fields only present in one of the lists will only appear in the
  661. // combined list if the corresponding fields_scope option is set to FULL.
  662. FieldDescriptorArray CombineFields(const FieldDescriptorArray& fields1,
  663. Scope fields1_scope,
  664. const FieldDescriptorArray& fields2,
  665. Scope fields2_scope);
  666. // Internal version of the Compare method which performs the actual
  667. // comparison. The parent_fields vector is a vector containing field
  668. // descriptors of all fields accessed to get to this comparison operation
  669. // (i.e. if the current message is an embedded message, the parent_fields
  670. // vector will contain the field that has this embedded message).
  671. bool Compare(const Message& message1, const Message& message2,
  672. std::vector<SpecificField>* parent_fields);
  673. // Compares all the unknown fields in two messages.
  674. bool CompareUnknownFields(const Message& message1, const Message& message2,
  675. const UnknownFieldSet&, const UnknownFieldSet&,
  676. std::vector<SpecificField>* parent_fields);
  677. // Compares the specified messages for the requested field lists. The field
  678. // lists are modified depending on comparison settings, and then passed to
  679. // CompareWithFieldsInternal.
  680. bool CompareRequestedFieldsUsingSettings(
  681. const Message& message1, const Message& message2,
  682. const FieldDescriptorArray& message1_fields,
  683. const FieldDescriptorArray& message2_fields,
  684. std::vector<SpecificField>* parent_fields);
  685. // Compares the specified messages with the specified field lists.
  686. bool CompareWithFieldsInternal(const Message& message1,
  687. const Message& message2,
  688. const FieldDescriptorArray& message1_fields,
  689. const FieldDescriptorArray& message2_fields,
  690. std::vector<SpecificField>* parent_fields);
  691. // Compares the repeated fields, and report the error.
  692. bool CompareRepeatedField(const Message& message1, const Message& message2,
  693. const FieldDescriptor* field,
  694. std::vector<SpecificField>* parent_fields);
  695. // Compares map fields, and report the error.
  696. bool CompareMapField(const Message& message1, const Message& message2,
  697. const FieldDescriptor* field,
  698. std::vector<SpecificField>* parent_fields);
  699. // Helper for CompareRepeatedField and CompareMapField: compares and reports
  700. // differences element-wise. This is the implementation for non-map fields,
  701. // and can also compare map fields by using the underlying representation.
  702. bool CompareRepeatedRep(const Message& message1, const Message& message2,
  703. const FieldDescriptor* field,
  704. std::vector<SpecificField>* parent_fields);
  705. // Helper for CompareMapField: compare the map fields using map reflection
  706. // instead of sync to repeated.
  707. bool CompareMapFieldByMapReflection(const Message& message1,
  708. const Message& message2,
  709. const FieldDescriptor* field,
  710. std::vector<SpecificField>* parent_fields,
  711. DefaultFieldComparator* comparator);
  712. // Shorthand for CompareFieldValueUsingParentFields with NULL parent_fields.
  713. bool CompareFieldValue(const Message& message1, const Message& message2,
  714. const FieldDescriptor* field, int index1, int index2);
  715. // Compares the specified field on the two messages, returning
  716. // true if they are the same, false otherwise. For repeated fields,
  717. // this method only compares the value in the specified index. This method
  718. // uses Compare functions to recurse into submessages.
  719. // The parent_fields vector is used in calls to a Reporter instance calls.
  720. // It can be NULL, in which case the MessageDifferencer will create new
  721. // list of parent messages if it needs to recursively compare the given field.
  722. // To avoid confusing users you should not set it to NULL unless you modified
  723. // Reporter to handle the change of parent_fields correctly.
  724. bool CompareFieldValueUsingParentFields(
  725. const Message& message1, const Message& message2,
  726. const FieldDescriptor* field, int index1, int index2,
  727. std::vector<SpecificField>* parent_fields);
  728. // Compares the specified field on the two messages, returning comparison
  729. // result, as returned by appropriate FieldComparator.
  730. FieldComparator::ComparisonResult GetFieldComparisonResult(
  731. const Message& message1, const Message& message2,
  732. const FieldDescriptor* field, int index1, int index2,
  733. const FieldContext* field_context);
  734. // Check if the two elements in the repeated field are match to each other.
  735. // if the key_comprator is NULL, this function returns true when the two
  736. // elements are equal.
  737. bool IsMatch(const FieldDescriptor* repeated_field,
  738. const MapKeyComparator* key_comparator, const Message* message1,
  739. const Message* message2,
  740. const std::vector<SpecificField>& parent_fields,
  741. Reporter* reporter, int index1, int index2);
  742. // Returns true when this repeated field has been configured to be treated
  743. // as a Set / SmartSet / SmartList.
  744. bool IsTreatedAsSet(const FieldDescriptor* field);
  745. bool IsTreatedAsSmartSet(const FieldDescriptor* field);
  746. bool IsTreatedAsSmartList(const FieldDescriptor* field);
  747. // When treating as SMART_LIST, it uses MatchIndicesPostProcessorForSmartList
  748. // by default to find the longest matching sequence from the first matching
  749. // element. The callback takes two vectors showing the matching indices from
  750. // the other vector, where -1 means an unmatch.
  751. void SetMatchIndicesForSmartListCallback(
  752. std::function<void(std::vector<int>*, std::vector<int>*)> callback);
  753. // Returns true when this repeated field is to be compared as a subset, ie.
  754. // has been configured to be treated as a set or map and scope is set to
  755. // PARTIAL.
  756. bool IsTreatedAsSubset(const FieldDescriptor* field);
  757. // Returns true if this field is to be ignored when this
  758. // MessageDifferencer compares messages.
  759. bool IsIgnored(const Message& message1, const Message& message2,
  760. const FieldDescriptor* field,
  761. const std::vector<SpecificField>& parent_fields);
  762. // Returns true if this unknown field is to be ignored when this
  763. // MessageDifferencer compares messages.
  764. bool IsUnknownFieldIgnored(const Message& message1, const Message& message2,
  765. const SpecificField& field,
  766. const std::vector<SpecificField>& parent_fields);
  767. // Returns MapKeyComparator* when this field has been configured to be treated
  768. // as a map or its is_map() return true. If not, returns NULL.
  769. const MapKeyComparator* GetMapKeyComparator(
  770. const FieldDescriptor* field) const;
  771. // Attempts to match indices of a repeated field, so that the contained values
  772. // match. Clears output vectors and sets their values to indices of paired
  773. // messages, ie. if message1[0] matches message2[1], then match_list1[0] == 1
  774. // and match_list2[1] == 0. The unmatched indices are indicated by -1.
  775. // Assumes the repeated field is not treated as a simple list.
  776. // This method returns false if the match failed. However, it doesn't mean
  777. // that the comparison succeeds when this method returns true (you need to
  778. // double-check in this case).
  779. bool MatchRepeatedFieldIndices(
  780. const Message& message1, const Message& message2,
  781. const FieldDescriptor* repeated_field,
  782. const MapKeyComparator* key_comparator,
  783. const std::vector<SpecificField>& parent_fields,
  784. std::vector<int>* match_list1, std::vector<int>* match_list2);
  785. // Checks if index is equal to new_index in all the specific fields.
  786. static bool CheckPathChanged(const std::vector<SpecificField>& parent_fields);
  787. // CHECKs that the given repeated field can be compared according to
  788. // new_comparison.
  789. void CheckRepeatedFieldComparisons(
  790. const FieldDescriptor* field,
  791. const RepeatedFieldComparison& new_comparison);
  792. // Defines a map between field descriptors and their MapKeyComparators.
  793. // Used for repeated fields when they are configured as TreatAsMap.
  794. typedef std::map<const FieldDescriptor*, const MapKeyComparator*>
  795. FieldKeyComparatorMap;
  796. // Defines a set to store field descriptors. Used for repeated fields when
  797. // they are configured as TreatAsSet.
  798. typedef std::set<const FieldDescriptor*> FieldSet;
  799. typedef std::map<const FieldDescriptor*, RepeatedFieldComparison> FieldMap;
  800. Reporter* reporter_;
  801. DefaultFieldComparator default_field_comparator_;
  802. MessageFieldComparison message_field_comparison_;
  803. Scope scope_;
  804. RepeatedFieldComparison repeated_field_comparison_;
  805. FieldMap repeated_field_comparisons_;
  806. // Keeps track of MapKeyComparators that are created within
  807. // MessageDifferencer. These MapKeyComparators should be deleted
  808. // before MessageDifferencer is destroyed.
  809. // When TreatAsMap or TreatAsMapWithMultipleFieldsAsKey is called, we don't
  810. // store the supplied FieldDescriptors directly. Instead, a new
  811. // MapKeyComparator is created for comparison purpose.
  812. std::vector<MapKeyComparator*> owned_key_comparators_;
  813. FieldKeyComparatorMap map_field_key_comparator_;
  814. MapEntryKeyComparator map_entry_key_comparator_;
  815. std::vector<IgnoreCriteria*> ignore_criteria_;
  816. // Reused multiple times in RetrieveFields to avoid extra allocations
  817. std::vector<const FieldDescriptor*> tmp_message_fields_;
  818. FieldSet ignored_fields_;
  819. union {
  820. DefaultFieldComparator* default_impl;
  821. FieldComparator* base;
  822. } field_comparator_ = {&default_field_comparator_};
  823. enum { kFCDefault, kFCBase } field_comparator_kind_ = kFCDefault;
  824. bool report_matches_;
  825. bool report_moves_;
  826. bool report_ignores_;
  827. std::string* output_string_;
  828. // Callback to post-process the matched indices to support SMART_LIST.
  829. std::function<void(std::vector<int>*, std::vector<int>*)>
  830. match_indices_for_smart_list_callback_;
  831. MessageDifferencer::UnpackAnyField unpack_any_field_;
  832. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MessageDifferencer);
  833. };
  834. // This class provides extra information to the FieldComparator::Compare
  835. // function.
  836. class PROTOBUF_EXPORT FieldContext {
  837. public:
  838. explicit FieldContext(
  839. std::vector<MessageDifferencer::SpecificField>* parent_fields)
  840. : parent_fields_(parent_fields) {}
  841. std::vector<MessageDifferencer::SpecificField>* parent_fields() const {
  842. return parent_fields_;
  843. }
  844. private:
  845. std::vector<MessageDifferencer::SpecificField>* parent_fields_;
  846. };
  847. } // namespace util
  848. } // namespace protobuf
  849. } // namespace google
  850. #include <google/protobuf/port_undef.inc>
  851. #endif // GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__