You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

decode.h 7.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. /*
  2. * Copyright (c) 2009-2021, Google LLC
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of Google LLC nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
  20. * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. /*
  28. * Internal implementation details of the decoder that are shared between
  29. * decode.c and decode_fast.c.
  30. */
  31. #ifndef UPB_INTERNAL_DECODE_H_
  32. #define UPB_INTERNAL_DECODE_H_
  33. #include <setjmp.h>
  34. #include "upb/decode.h"
  35. #include "upb/internal/upb.h"
  36. #include "upb/msg_internal.h"
  37. #include "third_party/utf8_range/utf8_range.h"
  38. /* Must be last. */
  39. #include "upb/port_def.inc"
  40. #define DECODE_NOGROUP (uint32_t) - 1
  41. typedef struct upb_Decoder
  42. {
  43. const char* end; /* Can read up to 16 bytes slop beyond this. */
  44. const char* limit_ptr; /* = end + UPB_MIN(limit, 0) */
  45. upb_Message* unknown_msg; /* If non-NULL, add unknown data at buffer flip. */
  46. const char* unknown; /* Start of unknown data. */
  47. const upb_ExtensionRegistry*
  48. extreg; /* For looking up extensions during the parse. */
  49. int limit; /* Submessage limit relative to end. */
  50. int depth; /* Tracks recursion depth to bound stack usage. */
  51. uint32_t end_group; /* field number of END_GROUP tag, else DECODE_NOGROUP */
  52. uint16_t options;
  53. bool missing_required;
  54. char patch[32];
  55. upb_Arena arena;
  56. jmp_buf err;
  57. #ifndef NDEBUG
  58. const char* debug_tagstart;
  59. const char* debug_valstart;
  60. #endif
  61. } upb_Decoder;
  62. /* Error function that will abort decoding with longjmp(). We can't declare this
  63. * UPB_NORETURN, even though it is appropriate, because if we do then compilers
  64. * will "helpfully" refuse to tailcall to it
  65. * (see: https://stackoverflow.com/a/55657013), which will defeat a major goal
  66. * of our optimizations. That is also why we must declare it in a separate file,
  67. * otherwise the compiler will see that it calls longjmp() and deduce that it is
  68. * noreturn. */
  69. const char* fastdecode_err(upb_Decoder* d, int status);
  70. extern const uint8_t upb_utf8_offsets[];
  71. UPB_INLINE
  72. bool decode_verifyutf8_inl(const char* ptr, int len)
  73. {
  74. const char* end = ptr + len;
  75. // Check 8 bytes at a time for any non-ASCII char.
  76. while (end - ptr >= 8)
  77. {
  78. uint64_t data;
  79. memcpy(&data, ptr, 8);
  80. if (data & 0x8080808080808080)
  81. goto non_ascii;
  82. ptr += 8;
  83. }
  84. // Check one byte at a time for non-ASCII.
  85. while (ptr < end)
  86. {
  87. if (*ptr & 0x80)
  88. goto non_ascii;
  89. ptr++;
  90. }
  91. return true;
  92. non_ascii:
  93. return utf8_range2((const unsigned char*)ptr, end - ptr) == 0;
  94. }
  95. const char* decode_checkrequired(upb_Decoder* d, const char* ptr, const upb_Message* msg, const upb_MiniTable* l);
  96. /* x86-64 pointers always have the high 16 bits matching. So we can shift
  97. * left 8 and right 8 without loss of information. */
  98. UPB_INLINE intptr_t decode_totable(const upb_MiniTable* tablep)
  99. {
  100. return ((intptr_t)tablep << 8) | tablep->table_mask;
  101. }
  102. UPB_INLINE const upb_MiniTable* decode_totablep(intptr_t table)
  103. {
  104. return (const upb_MiniTable*)(table >> 8);
  105. }
  106. UPB_INLINE
  107. const char* decode_isdonefallback_inl(upb_Decoder* d, const char* ptr, int overrun, int* status)
  108. {
  109. if (overrun < d->limit)
  110. {
  111. /* Need to copy remaining data into patch buffer. */
  112. UPB_ASSERT(overrun < 16);
  113. if (d->unknown_msg)
  114. {
  115. if (!_upb_Message_AddUnknown(d->unknown_msg, d->unknown, ptr - d->unknown, &d->arena))
  116. {
  117. *status = kUpb_DecodeStatus_OutOfMemory;
  118. return NULL;
  119. }
  120. d->unknown = &d->patch[0] + overrun;
  121. }
  122. memset(d->patch + 16, 0, 16);
  123. memcpy(d->patch, d->end, 16);
  124. ptr = &d->patch[0] + overrun;
  125. d->end = &d->patch[16];
  126. d->limit -= 16;
  127. d->limit_ptr = d->end + d->limit;
  128. d->options &= ~kUpb_DecodeOption_AliasString;
  129. UPB_ASSERT(ptr < d->limit_ptr);
  130. return ptr;
  131. }
  132. else
  133. {
  134. *status = kUpb_DecodeStatus_Malformed;
  135. return NULL;
  136. }
  137. }
  138. const char* decode_isdonefallback(upb_Decoder* d, const char* ptr, int overrun);
  139. UPB_INLINE
  140. bool decode_isdone(upb_Decoder* d, const char** ptr)
  141. {
  142. int overrun = *ptr - d->end;
  143. if (UPB_LIKELY(*ptr < d->limit_ptr))
  144. {
  145. return false;
  146. }
  147. else if (UPB_LIKELY(overrun == d->limit))
  148. {
  149. return true;
  150. }
  151. else
  152. {
  153. *ptr = decode_isdonefallback(d, *ptr, overrun);
  154. return false;
  155. }
  156. }
  157. #if UPB_FASTTABLE
  158. UPB_INLINE
  159. const char* fastdecode_tagdispatch(upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table, uint64_t hasbits, uint64_t tag)
  160. {
  161. const upb_MiniTable* table_p = decode_totablep(table);
  162. uint8_t mask = table;
  163. uint64_t data;
  164. size_t idx = tag & mask;
  165. UPB_ASSUME((idx & 7) == 0);
  166. idx >>= 3;
  167. data = table_p->fasttable[idx].field_data ^ tag;
  168. UPB_MUSTTAIL return table_p->fasttable[idx].field_parser(d, ptr, msg, table, hasbits, data);
  169. }
  170. #endif
  171. UPB_INLINE uint32_t fastdecode_loadtag(const char* ptr)
  172. {
  173. uint16_t tag;
  174. memcpy(&tag, ptr, 2);
  175. return tag;
  176. }
  177. UPB_INLINE void decode_checklimit(upb_Decoder* d)
  178. {
  179. UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
  180. }
  181. UPB_INLINE int decode_pushlimit(upb_Decoder* d, const char* ptr, int size)
  182. {
  183. int limit = size + (int)(ptr - d->end);
  184. int delta = d->limit - limit;
  185. decode_checklimit(d);
  186. d->limit = limit;
  187. d->limit_ptr = d->end + UPB_MIN(0, limit);
  188. decode_checklimit(d);
  189. return delta;
  190. }
  191. UPB_INLINE void decode_poplimit(upb_Decoder* d, const char* ptr, int saved_delta)
  192. {
  193. UPB_ASSERT(ptr - d->end == d->limit);
  194. decode_checklimit(d);
  195. d->limit += saved_delta;
  196. d->limit_ptr = d->end + UPB_MIN(0, d->limit);
  197. decode_checklimit(d);
  198. }
  199. #include "upb/port_undef.inc"
  200. #endif /* UPB_INTERNAL_DECODE_H_ */