You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

strutil.h 46 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // from google3/strings/strutil.h
  31. #ifndef GOOGLE_PROTOBUF_STUBS_STRUTIL_H__
  32. #define GOOGLE_PROTOBUF_STUBS_STRUTIL_H__
  33. #include <google/protobuf/stubs/common.h>
  34. #include <google/protobuf/stubs/stringpiece.h>
  35. #include <stdlib.h>
  36. #include <cstring>
  37. #include <google/protobuf/port_def.inc>
  38. #include <vector>
  39. namespace google
  40. {
  41. namespace protobuf
  42. {
  43. #if defined(_MSC_VER) && _MSC_VER < 1800
  44. #define strtoll _strtoi64
  45. #define strtoull _strtoui64
  46. #elif defined(__DECCXX) && defined(__osf__)
  47. // HP C++ on Tru64 does not have strtoll, but strtol is already 64-bit.
  48. #define strtoll strtol
  49. #define strtoull strtoul
  50. #endif
  51. // ----------------------------------------------------------------------
  52. // ascii_isalnum()
  53. // Check if an ASCII character is alphanumeric. We can't use ctype's
  54. // isalnum() because it is affected by locale. This function is applied
  55. // to identifiers in the protocol buffer language, not to natural-language
  56. // strings, so locale should not be taken into account.
  57. // ascii_isdigit()
  58. // Like above, but only accepts digits.
  59. // ascii_isspace()
  60. // Check if the character is a space character.
  61. // ----------------------------------------------------------------------
  62. inline bool ascii_isalnum(char c)
  63. {
  64. return ('a' <= c && c <= 'z') ||
  65. ('A' <= c && c <= 'Z') ||
  66. ('0' <= c && c <= '9');
  67. }
  68. inline bool ascii_isdigit(char c)
  69. {
  70. return ('0' <= c && c <= '9');
  71. }
  72. inline bool ascii_isspace(char c)
  73. {
  74. return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' ||
  75. c == '\r';
  76. }
  77. inline bool ascii_isupper(char c)
  78. {
  79. return c >= 'A' && c <= 'Z';
  80. }
  81. inline bool ascii_islower(char c)
  82. {
  83. return c >= 'a' && c <= 'z';
  84. }
  85. inline char ascii_toupper(char c)
  86. {
  87. return ascii_islower(c) ? c - ('a' - 'A') : c;
  88. }
  89. inline char ascii_tolower(char c)
  90. {
  91. return ascii_isupper(c) ? c + ('a' - 'A') : c;
  92. }
  93. inline int hex_digit_to_int(char c)
  94. {
  95. /* Assume ASCII. */
  96. int x = static_cast<unsigned char>(c);
  97. if (x > '9')
  98. {
  99. x += 9;
  100. }
  101. return x & 0xf;
  102. }
  103. // ----------------------------------------------------------------------
  104. // HasPrefixString()
  105. // Check if a string begins with a given prefix.
  106. // StripPrefixString()
  107. // Given a string and a putative prefix, returns the string minus the
  108. // prefix string if the prefix matches, otherwise the original
  109. // string.
  110. // ----------------------------------------------------------------------
  111. inline bool HasPrefixString(StringPiece str, StringPiece prefix)
  112. {
  113. return str.size() >= prefix.size() &&
  114. memcmp(str.data(), prefix.data(), prefix.size()) == 0;
  115. }
  116. inline std::string StripPrefixString(const std::string& str, const std::string& prefix)
  117. {
  118. if (HasPrefixString(str, prefix))
  119. {
  120. return str.substr(prefix.size());
  121. }
  122. else
  123. {
  124. return str;
  125. }
  126. }
  127. // ----------------------------------------------------------------------
  128. // HasSuffixString()
  129. // Return true if str ends in suffix.
  130. // StripSuffixString()
  131. // Given a string and a putative suffix, returns the string minus the
  132. // suffix string if the suffix matches, otherwise the original
  133. // string.
  134. // ----------------------------------------------------------------------
  135. inline bool HasSuffixString(StringPiece str, StringPiece suffix)
  136. {
  137. return str.size() >= suffix.size() &&
  138. memcmp(str.data() + str.size() - suffix.size(), suffix.data(), suffix.size()) == 0;
  139. }
  140. inline std::string StripSuffixString(const std::string& str, const std::string& suffix)
  141. {
  142. if (HasSuffixString(str, suffix))
  143. {
  144. return str.substr(0, str.size() - suffix.size());
  145. }
  146. else
  147. {
  148. return str;
  149. }
  150. }
  151. // ----------------------------------------------------------------------
  152. // ReplaceCharacters
  153. // Replaces any occurrence of the character 'remove' (or the characters
  154. // in 'remove') with the character 'replacewith'.
  155. // Good for keeping html characters or protocol characters (\t) out
  156. // of places where they might cause a problem.
  157. // StripWhitespace
  158. // Removes whitespaces from both ends of the given string.
  159. // ----------------------------------------------------------------------
  160. PROTOBUF_EXPORT void ReplaceCharacters(std::string* s, const char* remove, char replacewith);
  161. PROTOBUF_EXPORT void StripWhitespace(std::string* s);
  162. // ----------------------------------------------------------------------
  163. // LowerString()
  164. // UpperString()
  165. // ToUpper()
  166. // Convert the characters in "s" to lowercase or uppercase. ASCII-only:
  167. // these functions intentionally ignore locale because they are applied to
  168. // identifiers used in the Protocol Buffer language, not to natural-language
  169. // strings.
  170. // ----------------------------------------------------------------------
  171. inline void LowerString(std::string* s)
  172. {
  173. std::string::iterator end = s->end();
  174. for (std::string::iterator i = s->begin(); i != end; ++i)
  175. {
  176. // tolower() changes based on locale. We don't want this!
  177. if ('A' <= *i && *i <= 'Z')
  178. *i += 'a' - 'A';
  179. }
  180. }
  181. inline void UpperString(std::string* s)
  182. {
  183. std::string::iterator end = s->end();
  184. for (std::string::iterator i = s->begin(); i != end; ++i)
  185. {
  186. // toupper() changes based on locale. We don't want this!
  187. if ('a' <= *i && *i <= 'z')
  188. *i += 'A' - 'a';
  189. }
  190. }
  191. inline void ToUpper(std::string* s)
  192. {
  193. UpperString(s);
  194. }
  195. inline std::string ToUpper(const std::string& s)
  196. {
  197. std::string out = s;
  198. UpperString(&out);
  199. return out;
  200. }
  201. // ----------------------------------------------------------------------
  202. // StringReplace()
  203. // Give me a string and two patterns "old" and "new", and I replace
  204. // the first instance of "old" in the string with "new", if it
  205. // exists. RETURN a new string, regardless of whether the replacement
  206. // happened or not.
  207. // ----------------------------------------------------------------------
  208. PROTOBUF_EXPORT std::string StringReplace(const std::string& s, const std::string& oldsub, const std::string& newsub, bool replace_all);
  209. // ----------------------------------------------------------------------
  210. // SplitStringUsing()
  211. // Split a string using a character delimiter. Append the components
  212. // to 'result'. If there are consecutive delimiters, this function skips
  213. // over all of them.
  214. // ----------------------------------------------------------------------
  215. PROTOBUF_EXPORT void SplitStringUsing(StringPiece full, const char* delim, std::vector<std::string>* res);
  216. // Split a string using one or more byte delimiters, presented
  217. // as a nul-terminated c string. Append the components to 'result'.
  218. // If there are consecutive delimiters, this function will return
  219. // corresponding empty strings. If you want to drop the empty
  220. // strings, try SplitStringUsing().
  221. //
  222. // If "full" is the empty string, yields an empty string as the only value.
  223. // ----------------------------------------------------------------------
  224. PROTOBUF_EXPORT void SplitStringAllowEmpty(StringPiece full, const char* delim, std::vector<std::string>* result);
  225. // ----------------------------------------------------------------------
  226. // Split()
  227. // Split a string using a character delimiter.
  228. // ----------------------------------------------------------------------
  229. inline std::vector<std::string> Split(StringPiece full, const char* delim, bool skip_empty = true)
  230. {
  231. std::vector<std::string> result;
  232. if (skip_empty)
  233. {
  234. SplitStringUsing(full, delim, &result);
  235. }
  236. else
  237. {
  238. SplitStringAllowEmpty(full, delim, &result);
  239. }
  240. return result;
  241. }
  242. // ----------------------------------------------------------------------
  243. // JoinStrings()
  244. // These methods concatenate a vector of strings into a C++ string, using
  245. // the C-string "delim" as a separator between components. There are two
  246. // flavors of the function, one flavor returns the concatenated string,
  247. // another takes a pointer to the target string. In the latter case the
  248. // target string is cleared and overwritten.
  249. // ----------------------------------------------------------------------
  250. PROTOBUF_EXPORT void JoinStrings(const std::vector<std::string>& components, const char* delim, std::string* result);
  251. inline std::string JoinStrings(const std::vector<std::string>& components, const char* delim)
  252. {
  253. std::string result;
  254. JoinStrings(components, delim, &result);
  255. return result;
  256. }
  257. // ----------------------------------------------------------------------
  258. // UnescapeCEscapeSequences()
  259. // Copies "source" to "dest", rewriting C-style escape sequences
  260. // -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII
  261. // equivalents. "dest" must be sufficiently large to hold all
  262. // the characters in the rewritten string (i.e. at least as large
  263. // as strlen(source) + 1 should be safe, since the replacements
  264. // are always shorter than the original escaped sequences). It's
  265. // safe for source and dest to be the same. RETURNS the length
  266. // of dest.
  267. //
  268. // It allows hex sequences \xhh, or generally \xhhhhh with an
  269. // arbitrary number of hex digits, but all of them together must
  270. // specify a value of a single byte (e.g. \x0045 is equivalent
  271. // to \x45, and \x1234 is erroneous).
  272. //
  273. // It also allows escape sequences of the form \uhhhh (exactly four
  274. // hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight
  275. // hex digits, upper or lower case) to specify a Unicode code
  276. // point. The dest array will contain the UTF8-encoded version of
  277. // that code-point (e.g., if source contains \u2019, then dest will
  278. // contain the three bytes 0xE2, 0x80, and 0x99).
  279. //
  280. // Errors: In the first form of the call, errors are reported with
  281. // LOG(ERROR). The same is true for the second form of the call if
  282. // the pointer to the string std::vector is nullptr; otherwise, error
  283. // messages are stored in the std::vector. In either case, the effect on
  284. // the dest array is not defined, but rest of the source will be
  285. // processed.
  286. // ----------------------------------------------------------------------
  287. PROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest);
  288. PROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest, std::vector<std::string>* errors);
  289. // ----------------------------------------------------------------------
  290. // UnescapeCEscapeString()
  291. // This does the same thing as UnescapeCEscapeSequences, but creates
  292. // a new string. The caller does not need to worry about allocating
  293. // a dest buffer. This should be used for non performance critical
  294. // tasks such as printing debug messages. It is safe for src and dest
  295. // to be the same.
  296. //
  297. // The second call stores its errors in a supplied string vector.
  298. // If the string vector pointer is nullptr, it reports the errors with LOG().
  299. //
  300. // In the first and second calls, the length of dest is returned. In the
  301. // the third call, the new string is returned.
  302. // ----------------------------------------------------------------------
  303. PROTOBUF_EXPORT int UnescapeCEscapeString(const std::string& src, std::string* dest);
  304. PROTOBUF_EXPORT int UnescapeCEscapeString(const std::string& src, std::string* dest, std::vector<std::string>* errors);
  305. PROTOBUF_EXPORT std::string UnescapeCEscapeString(const std::string& src);
  306. // ----------------------------------------------------------------------
  307. // CEscape()
  308. // Escapes 'src' using C-style escape sequences and returns the resulting
  309. // string.
  310. //
  311. // Escaped chars: \n, \r, \t, ", ', \, and !isprint().
  312. // ----------------------------------------------------------------------
  313. PROTOBUF_EXPORT std::string CEscape(const std::string& src);
  314. // ----------------------------------------------------------------------
  315. // CEscapeAndAppend()
  316. // Escapes 'src' using C-style escape sequences, and appends the escaped
  317. // string to 'dest'.
  318. // ----------------------------------------------------------------------
  319. PROTOBUF_EXPORT void CEscapeAndAppend(StringPiece src, std::string* dest);
  320. namespace strings
  321. {
  322. // Like CEscape() but does not escape bytes with the upper bit set.
  323. PROTOBUF_EXPORT std::string Utf8SafeCEscape(const std::string& src);
  324. // Like CEscape() but uses hex (\x) escapes instead of octals.
  325. PROTOBUF_EXPORT std::string CHexEscape(const std::string& src);
  326. } // namespace strings
  327. // ----------------------------------------------------------------------
  328. // strto32()
  329. // strtou32()
  330. // strto64()
  331. // strtou64()
  332. // Architecture-neutral plug compatible replacements for strtol() and
  333. // strtoul(). Long's have different lengths on ILP-32 and LP-64
  334. // platforms, so using these is safer, from the point of view of
  335. // overflow behavior, than using the standard libc functions.
  336. // ----------------------------------------------------------------------
  337. PROTOBUF_EXPORT int32_t strto32_adaptor(const char* nptr, char** endptr, int base);
  338. PROTOBUF_EXPORT uint32_t strtou32_adaptor(const char* nptr, char** endptr, int base);
  339. inline int32_t strto32(const char* nptr, char** endptr, int base)
  340. {
  341. if (sizeof(int32_t) == sizeof(long))
  342. return strtol(nptr, endptr, base);
  343. else
  344. return strto32_adaptor(nptr, endptr, base);
  345. }
  346. inline uint32_t strtou32(const char* nptr, char** endptr, int base)
  347. {
  348. if (sizeof(uint32_t) == sizeof(unsigned long))
  349. return strtoul(nptr, endptr, base);
  350. else
  351. return strtou32_adaptor(nptr, endptr, base);
  352. }
  353. // For now, long long is 64-bit on all the platforms we care about, so these
  354. // functions can simply pass the call to strto[u]ll.
  355. inline int64_t strto64(const char* nptr, char** endptr, int base)
  356. {
  357. static_assert(sizeof(int64_t) == sizeof(long long), "sizeof int64_t is not sizeof long long");
  358. return strtoll(nptr, endptr, base);
  359. }
  360. inline uint64_t strtou64(const char* nptr, char** endptr, int base)
  361. {
  362. static_assert(sizeof(uint64_t) == sizeof(unsigned long long), "sizeof uint64_t is not sizeof unsigned long long");
  363. return strtoull(nptr, endptr, base);
  364. }
  365. // ----------------------------------------------------------------------
  366. // safe_strtob()
  367. // safe_strto32()
  368. // safe_strtou32()
  369. // safe_strto64()
  370. // safe_strtou64()
  371. // safe_strtof()
  372. // safe_strtod()
  373. // ----------------------------------------------------------------------
  374. PROTOBUF_EXPORT bool safe_strtob(StringPiece str, bool* value);
  375. PROTOBUF_EXPORT bool safe_strto32(const std::string& str, int32_t* value);
  376. PROTOBUF_EXPORT bool safe_strtou32(const std::string& str, uint32_t* value);
  377. inline bool safe_strto32(const char* str, int32_t* value)
  378. {
  379. return safe_strto32(std::string(str), value);
  380. }
  381. inline bool safe_strto32(StringPiece str, int32_t* value)
  382. {
  383. return safe_strto32(str.ToString(), value);
  384. }
  385. inline bool safe_strtou32(const char* str, uint32_t* value)
  386. {
  387. return safe_strtou32(std::string(str), value);
  388. }
  389. inline bool safe_strtou32(StringPiece str, uint32_t* value)
  390. {
  391. return safe_strtou32(str.ToString(), value);
  392. }
  393. PROTOBUF_EXPORT bool safe_strto64(const std::string& str, int64_t* value);
  394. PROTOBUF_EXPORT bool safe_strtou64(const std::string& str, uint64_t* value);
  395. inline bool safe_strto64(const char* str, int64_t* value)
  396. {
  397. return safe_strto64(std::string(str), value);
  398. }
  399. inline bool safe_strto64(StringPiece str, int64_t* value)
  400. {
  401. return safe_strto64(str.ToString(), value);
  402. }
  403. inline bool safe_strtou64(const char* str, uint64_t* value)
  404. {
  405. return safe_strtou64(std::string(str), value);
  406. }
  407. inline bool safe_strtou64(StringPiece str, uint64_t* value)
  408. {
  409. return safe_strtou64(str.ToString(), value);
  410. }
  411. PROTOBUF_EXPORT bool safe_strtof(const char* str, float* value);
  412. PROTOBUF_EXPORT bool safe_strtod(const char* str, double* value);
  413. inline bool safe_strtof(const std::string& str, float* value)
  414. {
  415. return safe_strtof(str.c_str(), value);
  416. }
  417. inline bool safe_strtod(const std::string& str, double* value)
  418. {
  419. return safe_strtod(str.c_str(), value);
  420. }
  421. inline bool safe_strtof(StringPiece str, float* value)
  422. {
  423. return safe_strtof(str.ToString(), value);
  424. }
  425. inline bool safe_strtod(StringPiece str, double* value)
  426. {
  427. return safe_strtod(str.ToString(), value);
  428. }
  429. // ----------------------------------------------------------------------
  430. // FastIntToBuffer()
  431. // FastHexToBuffer()
  432. // FastHex64ToBuffer()
  433. // FastHex32ToBuffer()
  434. // FastTimeToBuffer()
  435. // These are intended for speed. FastIntToBuffer() assumes the
  436. // integer is non-negative. FastHexToBuffer() puts output in
  437. // hex rather than decimal. FastTimeToBuffer() puts the output
  438. // into RFC822 format.
  439. //
  440. // FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format,
  441. // padded to exactly 16 bytes (plus one byte for '\0')
  442. //
  443. // FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format,
  444. // padded to exactly 8 bytes (plus one byte for '\0')
  445. //
  446. // All functions take the output buffer as an arg.
  447. // They all return a pointer to the beginning of the output,
  448. // which may not be the beginning of the input buffer.
  449. // ----------------------------------------------------------------------
  450. // Suggested buffer size for FastToBuffer functions. Also works with
  451. // DoubleToBuffer() and FloatToBuffer().
  452. static const int kFastToBufferSize = 32;
  453. PROTOBUF_EXPORT char* FastInt32ToBuffer(int32_t i, char* buffer);
  454. PROTOBUF_EXPORT char* FastInt64ToBuffer(int64_t i, char* buffer);
  455. char* FastUInt32ToBuffer(uint32_t i, char* buffer); // inline below
  456. char* FastUInt64ToBuffer(uint64_t i, char* buffer); // inline below
  457. PROTOBUF_EXPORT char* FastHexToBuffer(int i, char* buffer);
  458. PROTOBUF_EXPORT char* FastHex64ToBuffer(uint64_t i, char* buffer);
  459. PROTOBUF_EXPORT char* FastHex32ToBuffer(uint32_t i, char* buffer);
  460. // at least 22 bytes long
  461. inline char* FastIntToBuffer(int i, char* buffer)
  462. {
  463. return (sizeof(i) == 4 ? FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer));
  464. }
  465. inline char* FastUIntToBuffer(unsigned int i, char* buffer)
  466. {
  467. return (sizeof(i) == 4 ? FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer));
  468. }
  469. inline char* FastLongToBuffer(long i, char* buffer)
  470. {
  471. return (sizeof(i) == 4 ? FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer));
  472. }
  473. inline char* FastULongToBuffer(unsigned long i, char* buffer)
  474. {
  475. return (sizeof(i) == 4 ? FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer));
  476. }
  477. // ----------------------------------------------------------------------
  478. // FastInt32ToBufferLeft()
  479. // FastUInt32ToBufferLeft()
  480. // FastInt64ToBufferLeft()
  481. // FastUInt64ToBufferLeft()
  482. //
  483. // Like the Fast*ToBuffer() functions above, these are intended for speed.
  484. // Unlike the Fast*ToBuffer() functions, however, these functions write
  485. // their output to the beginning of the buffer (hence the name, as the
  486. // output is left-aligned). The caller is responsible for ensuring that
  487. // the buffer has enough space to hold the output.
  488. //
  489. // Returns a pointer to the end of the string (i.e. the null character
  490. // terminating the string).
  491. // ----------------------------------------------------------------------
  492. PROTOBUF_EXPORT char* FastInt32ToBufferLeft(int32_t i, char* buffer);
  493. PROTOBUF_EXPORT char* FastUInt32ToBufferLeft(uint32_t i, char* buffer);
  494. PROTOBUF_EXPORT char* FastInt64ToBufferLeft(int64_t i, char* buffer);
  495. PROTOBUF_EXPORT char* FastUInt64ToBufferLeft(uint64_t i, char* buffer);
  496. // Just define these in terms of the above.
  497. inline char* FastUInt32ToBuffer(uint32_t i, char* buffer)
  498. {
  499. FastUInt32ToBufferLeft(i, buffer);
  500. return buffer;
  501. }
  502. inline char* FastUInt64ToBuffer(uint64_t i, char* buffer)
  503. {
  504. FastUInt64ToBufferLeft(i, buffer);
  505. return buffer;
  506. }
  507. inline std::string SimpleBtoa(bool value)
  508. {
  509. return value ? "true" : "false";
  510. }
  511. // ----------------------------------------------------------------------
  512. // SimpleItoa()
  513. // Description: converts an integer to a string.
  514. //
  515. // Return value: string
  516. // ----------------------------------------------------------------------
  517. PROTOBUF_EXPORT std::string SimpleItoa(int i);
  518. PROTOBUF_EXPORT std::string SimpleItoa(unsigned int i);
  519. PROTOBUF_EXPORT std::string SimpleItoa(long i);
  520. PROTOBUF_EXPORT std::string SimpleItoa(unsigned long i);
  521. PROTOBUF_EXPORT std::string SimpleItoa(long long i);
  522. PROTOBUF_EXPORT std::string SimpleItoa(unsigned long long i);
  523. // ----------------------------------------------------------------------
  524. // SimpleDtoa()
  525. // SimpleFtoa()
  526. // DoubleToBuffer()
  527. // FloatToBuffer()
  528. // Description: converts a double or float to a string which, if
  529. // passed to NoLocaleStrtod(), will produce the exact same original double
  530. // (except in case of NaN; all NaNs are considered the same value).
  531. // We try to keep the string short but it's not guaranteed to be as
  532. // short as possible.
  533. //
  534. // DoubleToBuffer() and FloatToBuffer() write the text to the given
  535. // buffer and return it. The buffer must be at least
  536. // kDoubleToBufferSize bytes for doubles and kFloatToBufferSize
  537. // bytes for floats. kFastToBufferSize is also guaranteed to be large
  538. // enough to hold either.
  539. //
  540. // Return value: string
  541. // ----------------------------------------------------------------------
  542. PROTOBUF_EXPORT std::string SimpleDtoa(double value);
  543. PROTOBUF_EXPORT std::string SimpleFtoa(float value);
  544. PROTOBUF_EXPORT char* DoubleToBuffer(double i, char* buffer);
  545. PROTOBUF_EXPORT char* FloatToBuffer(float i, char* buffer);
  546. // In practice, doubles should never need more than 24 bytes and floats
  547. // should never need more than 14 (including null terminators), but we
  548. // overestimate to be safe.
  549. static const int kDoubleToBufferSize = 32;
  550. static const int kFloatToBufferSize = 24;
  551. namespace strings
  552. {
  553. enum PadSpec
  554. {
  555. NO_PAD = 1,
  556. ZERO_PAD_2,
  557. ZERO_PAD_3,
  558. ZERO_PAD_4,
  559. ZERO_PAD_5,
  560. ZERO_PAD_6,
  561. ZERO_PAD_7,
  562. ZERO_PAD_8,
  563. ZERO_PAD_9,
  564. ZERO_PAD_10,
  565. ZERO_PAD_11,
  566. ZERO_PAD_12,
  567. ZERO_PAD_13,
  568. ZERO_PAD_14,
  569. ZERO_PAD_15,
  570. ZERO_PAD_16,
  571. };
  572. struct Hex
  573. {
  574. uint64_t value;
  575. enum PadSpec spec;
  576. template<class Int>
  577. explicit Hex(Int v, PadSpec s = NO_PAD) :
  578. spec(s)
  579. {
  580. // Prevent sign-extension by casting integers to
  581. // their unsigned counterparts.
  582. #ifdef LANG_CXX11
  583. static_assert(
  584. sizeof(v) == 1 || sizeof(v) == 2 || sizeof(v) == 4 || sizeof(v) == 8,
  585. "Unknown integer type"
  586. );
  587. #endif
  588. value = sizeof(v) == 1 ? static_cast<uint8_t>(v) : sizeof(v) == 2 ? static_cast<uint16_t>(v) :
  589. sizeof(v) == 4 ? static_cast<uint32_t>(v) :
  590. static_cast<uint64_t>(v);
  591. }
  592. };
  593. struct PROTOBUF_EXPORT AlphaNum
  594. {
  595. const char* piece_data_; // move these to string_ref eventually
  596. size_t piece_size_; // move these to string_ref eventually
  597. char digits[kFastToBufferSize];
  598. // No bool ctor -- bools convert to an integral type.
  599. // A bool ctor would also convert incoming pointers (bletch).
  600. AlphaNum(int i32) :
  601. piece_data_(digits),
  602. piece_size_(FastInt32ToBufferLeft(i32, digits) - &digits[0])
  603. {
  604. }
  605. AlphaNum(unsigned int u32) :
  606. piece_data_(digits),
  607. piece_size_(FastUInt32ToBufferLeft(u32, digits) - &digits[0])
  608. {
  609. }
  610. AlphaNum(long long i64) :
  611. piece_data_(digits),
  612. piece_size_(FastInt64ToBufferLeft(i64, digits) - &digits[0])
  613. {
  614. }
  615. AlphaNum(unsigned long long u64) :
  616. piece_data_(digits),
  617. piece_size_(FastUInt64ToBufferLeft(u64, digits) - &digits[0])
  618. {
  619. }
  620. // Note: on some architectures, "long" is only 32 bits, not 64, but the
  621. // performance hit of using FastInt64ToBufferLeft to handle 32-bit values
  622. // is quite minor.
  623. AlphaNum(long i64) :
  624. piece_data_(digits),
  625. piece_size_(FastInt64ToBufferLeft(i64, digits) - &digits[0])
  626. {
  627. }
  628. AlphaNum(unsigned long u64) :
  629. piece_data_(digits),
  630. piece_size_(FastUInt64ToBufferLeft(u64, digits) - &digits[0])
  631. {
  632. }
  633. AlphaNum(float f) :
  634. piece_data_(digits),
  635. piece_size_(strlen(FloatToBuffer(f, digits)))
  636. {
  637. }
  638. AlphaNum(double f) :
  639. piece_data_(digits),
  640. piece_size_(strlen(DoubleToBuffer(f, digits)))
  641. {
  642. }
  643. AlphaNum(Hex hex);
  644. AlphaNum(const char* c_str) :
  645. piece_data_(c_str),
  646. piece_size_(strlen(c_str))
  647. {
  648. }
  649. // TODO: Add a string_ref constructor, eventually
  650. // AlphaNum(const StringPiece &pc) : piece(pc) {}
  651. AlphaNum(const std::string& str) :
  652. piece_data_(str.data()),
  653. piece_size_(str.size())
  654. {
  655. }
  656. AlphaNum(StringPiece str) :
  657. piece_data_(str.data()),
  658. piece_size_(str.size())
  659. {
  660. }
  661. size_t size() const
  662. {
  663. return piece_size_;
  664. }
  665. const char* data() const
  666. {
  667. return piece_data_;
  668. }
  669. private:
  670. // Use ":" not ':'
  671. AlphaNum(char c); // NOLINT(runtime/explicit)
  672. // Disallow copy and assign.
  673. AlphaNum(const AlphaNum&);
  674. void operator=(const AlphaNum&);
  675. };
  676. } // namespace strings
  677. using strings::AlphaNum;
  678. // ----------------------------------------------------------------------
  679. // StrCat()
  680. // This merges the given strings or numbers, with no delimiter. This
  681. // is designed to be the fastest possible way to construct a string out
  682. // of a mix of raw C strings, strings, bool values,
  683. // and numeric values.
  684. //
  685. // Don't use this for user-visible strings. The localization process
  686. // works poorly on strings built up out of fragments.
  687. //
  688. // For clarity and performance, don't use StrCat when appending to a
  689. // string. In particular, avoid using any of these (anti-)patterns:
  690. // str.append(StrCat(...)
  691. // str += StrCat(...)
  692. // str = StrCat(str, ...)
  693. // where the last is the worse, with the potential to change a loop
  694. // from a linear time operation with O(1) dynamic allocations into a
  695. // quadratic time operation with O(n) dynamic allocations. StrAppend
  696. // is a better choice than any of the above, subject to the restriction
  697. // of StrAppend(&str, a, b, c, ...) that none of the a, b, c, ... may
  698. // be a reference into str.
  699. // ----------------------------------------------------------------------
  700. PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b);
  701. PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c);
  702. PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d);
  703. PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, const AlphaNum& e);
  704. PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, const AlphaNum& e, const AlphaNum& f);
  705. PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, const AlphaNum& e, const AlphaNum& f, const AlphaNum& g);
  706. PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h);
  707. PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, const AlphaNum& e, const AlphaNum& f, const AlphaNum& g, const AlphaNum& h, const AlphaNum& i);
  708. inline std::string StrCat(const AlphaNum& a)
  709. {
  710. return std::string(a.data(), a.size());
  711. }
  712. // ----------------------------------------------------------------------
  713. // StrAppend()
  714. // Same as above, but adds the output to the given string.
  715. // WARNING: For speed, StrAppend does not try to check each of its input
  716. // arguments to be sure that they are not a subset of the string being
  717. // appended to. That is, while this will work:
  718. //
  719. // string s = "foo";
  720. // s += s;
  721. //
  722. // This will not (necessarily) work:
  723. //
  724. // string s = "foo";
  725. // StrAppend(&s, s);
  726. //
  727. // Note: while StrCat supports appending up to 9 arguments, StrAppend
  728. // is currently limited to 4. That's rarely an issue except when
  729. // automatically transforming StrCat to StrAppend, and can easily be
  730. // worked around as consecutive calls to StrAppend are quite efficient.
  731. // ----------------------------------------------------------------------
  732. PROTOBUF_EXPORT void StrAppend(std::string* dest, const AlphaNum& a);
  733. PROTOBUF_EXPORT void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b);
  734. PROTOBUF_EXPORT void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, const AlphaNum& c);
  735. PROTOBUF_EXPORT void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d);
  736. // ----------------------------------------------------------------------
  737. // Join()
  738. // These methods concatenate a range of components into a C++ string, using
  739. // the C-string "delim" as a separator between components.
  740. // ----------------------------------------------------------------------
  741. template<typename Iterator>
  742. void Join(Iterator start, Iterator end, const char* delim, std::string* result)
  743. {
  744. for (Iterator it = start; it != end; ++it)
  745. {
  746. if (it != start)
  747. {
  748. result->append(delim);
  749. }
  750. StrAppend(result, *it);
  751. }
  752. }
  753. template<typename Range>
  754. std::string Join(const Range& components, const char* delim)
  755. {
  756. std::string result;
  757. Join(components.begin(), components.end(), delim, &result);
  758. return result;
  759. }
  760. // ----------------------------------------------------------------------
  761. // ToHex()
  762. // Return a lower-case hex string representation of the given integer.
  763. // ----------------------------------------------------------------------
  764. PROTOBUF_EXPORT std::string ToHex(uint64_t num);
  765. // ----------------------------------------------------------------------
  766. // GlobalReplaceSubstring()
  767. // Replaces all instances of a substring in a string. Does nothing
  768. // if 'substring' is empty. Returns the number of replacements.
  769. //
  770. // NOTE: The string pieces must not overlap s.
  771. // ----------------------------------------------------------------------
  772. PROTOBUF_EXPORT int GlobalReplaceSubstring(const std::string& substring, const std::string& replacement, std::string* s);
  773. // ----------------------------------------------------------------------
  774. // Base64Unescape()
  775. // Converts "src" which is encoded in Base64 to its binary equivalent and
  776. // writes it to "dest". If src contains invalid characters, dest is cleared
  777. // and the function returns false. Returns true on success.
  778. // ----------------------------------------------------------------------
  779. PROTOBUF_EXPORT bool Base64Unescape(StringPiece src, std::string* dest);
  780. // ----------------------------------------------------------------------
  781. // WebSafeBase64Unescape()
  782. // This is a variation of Base64Unescape which uses '-' instead of '+', and
  783. // '_' instead of '/'. src is not null terminated, instead specify len. I
  784. // recommend that slen<szdest, but we honor szdest anyway.
  785. // RETURNS the length of dest, or -1 if src contains invalid chars.
  786. // The variation that stores into a string clears the string first, and
  787. // returns false (with dest empty) if src contains invalid chars; for
  788. // this version src and dest must be different strings.
  789. // ----------------------------------------------------------------------
  790. PROTOBUF_EXPORT int WebSafeBase64Unescape(const char* src, int slen, char* dest, int szdest);
  791. PROTOBUF_EXPORT bool WebSafeBase64Unescape(StringPiece src, std::string* dest);
  792. // Return the length to use for the output buffer given to the base64 escape
  793. // routines. Make sure to use the same value for do_padding in both.
  794. // This function may return incorrect results if given input_len values that
  795. // are extremely high, which should happen rarely.
  796. PROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len, bool do_padding);
  797. // Use this version when calling Base64Escape without a do_padding arg.
  798. PROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len);
  799. // ----------------------------------------------------------------------
  800. // Base64Escape()
  801. // WebSafeBase64Escape()
  802. // Encode "src" to "dest" using base64 encoding.
  803. // src is not null terminated, instead specify len.
  804. // 'dest' should have at least CalculateBase64EscapedLen() length.
  805. // RETURNS the length of dest.
  806. // The WebSafe variation use '-' instead of '+' and '_' instead of '/'
  807. // so that we can place the out in the URL or cookies without having
  808. // to escape them. It also has an extra parameter "do_padding",
  809. // which when set to false will prevent padding with "=".
  810. // ----------------------------------------------------------------------
  811. PROTOBUF_EXPORT int Base64Escape(const unsigned char* src, int slen, char* dest, int szdest);
  812. PROTOBUF_EXPORT int WebSafeBase64Escape(const unsigned char* src, int slen, char* dest, int szdest, bool do_padding);
  813. // Encode src into dest with padding.
  814. PROTOBUF_EXPORT void Base64Escape(StringPiece src, std::string* dest);
  815. // Encode src into dest web-safely without padding.
  816. PROTOBUF_EXPORT void WebSafeBase64Escape(StringPiece src, std::string* dest);
  817. // Encode src into dest web-safely with padding.
  818. PROTOBUF_EXPORT void WebSafeBase64EscapeWithPadding(StringPiece src, std::string* dest);
  819. PROTOBUF_EXPORT void Base64Escape(const unsigned char* src, int szsrc, std::string* dest, bool do_padding);
  820. PROTOBUF_EXPORT void WebSafeBase64Escape(const unsigned char* src, int szsrc, std::string* dest, bool do_padding);
  821. inline bool IsValidCodePoint(uint32_t code_point)
  822. {
  823. return code_point < 0xD800 ||
  824. (code_point >= 0xE000 && code_point <= 0x10FFFF);
  825. }
  826. static const int UTFmax = 4;
  827. // ----------------------------------------------------------------------
  828. // EncodeAsUTF8Char()
  829. // Helper to append a Unicode code point to a string as UTF8, without bringing
  830. // in any external dependencies. The output buffer must be as least 4 bytes
  831. // large.
  832. // ----------------------------------------------------------------------
  833. PROTOBUF_EXPORT int EncodeAsUTF8Char(uint32_t code_point, char* output);
  834. // ----------------------------------------------------------------------
  835. // UTF8FirstLetterNumBytes()
  836. // Length of the first UTF-8 character.
  837. // ----------------------------------------------------------------------
  838. PROTOBUF_EXPORT int UTF8FirstLetterNumBytes(const char* src, int len);
  839. // From google3/third_party/absl/strings/escaping.h
  840. // ----------------------------------------------------------------------
  841. // CleanStringLineEndings()
  842. // Clean up a multi-line string to conform to Unix line endings.
  843. // Reads from src and appends to dst, so usually dst should be empty.
  844. //
  845. // If there is no line ending at the end of a non-empty string, it can
  846. // be added automatically.
  847. //
  848. // Four different types of input are correctly handled:
  849. //
  850. // - Unix/Linux files: line ending is LF: pass through unchanged
  851. //
  852. // - DOS/Windows files: line ending is CRLF: convert to LF
  853. //
  854. // - Legacy Mac files: line ending is CR: convert to LF
  855. //
  856. // - Garbled files: random line endings: convert gracefully
  857. // lonely CR, lonely LF, CRLF: convert to LF
  858. //
  859. // @param src The multi-line string to convert
  860. // @param dst The converted string is appended to this string
  861. // @param auto_end_last_line Automatically terminate the last line
  862. //
  863. // Limitations:
  864. //
  865. // This does not do the right thing for CRCRLF files created by
  866. // broken programs that do another Unix->DOS conversion on files
  867. // that are already in CRLF format. For this, a two-pass approach
  868. // brute-force would be needed that
  869. //
  870. // (1) determines the presence of LF (first one is ok)
  871. // (2) if yes, removes any CR, else convert every CR to LF
  872. PROTOBUF_EXPORT void CleanStringLineEndings(const std::string& src, std::string* dst, bool auto_end_last_line);
  873. // Same as above, but transforms the argument in place.
  874. PROTOBUF_EXPORT void CleanStringLineEndings(std::string* str, bool auto_end_last_line);
  875. namespace strings
  876. {
  877. inline bool EndsWith(StringPiece text, StringPiece suffix)
  878. {
  879. return suffix.empty() ||
  880. (text.size() >= suffix.size() &&
  881. memcmp(text.data() + (text.size() - suffix.size()), suffix.data(), suffix.size()) == 0);
  882. }
  883. } // namespace strings
  884. namespace internal
  885. {
  886. // A locale-independent version of the standard strtod(), which always
  887. // uses a dot as the decimal separator.
  888. double NoLocaleStrtod(const char* str, char** endptr);
  889. } // namespace internal
  890. } // namespace protobuf
  891. } // namespace google
  892. #include <google/protobuf/port_undef.inc>
  893. #endif // GOOGLE_PROTOBUF_STUBS_STRUTIL_H__