You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

charconv_bigint.h 20 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. // Copyright 2018 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
  15. #define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
  16. #include <algorithm>
  17. #include <cstdint>
  18. #include <iostream>
  19. #include <string>
  20. #include "absl/base/config.h"
  21. #include "absl/strings/ascii.h"
  22. #include "absl/strings/internal/charconv_parse.h"
  23. #include "absl/strings/string_view.h"
  24. namespace absl
  25. {
  26. ABSL_NAMESPACE_BEGIN
  27. namespace strings_internal
  28. {
  29. // The largest power that 5 that can be raised to, and still fit in a uint32_t.
  30. constexpr int kMaxSmallPowerOfFive = 13;
  31. // The largest power that 10 that can be raised to, and still fit in a uint32_t.
  32. constexpr int kMaxSmallPowerOfTen = 9;
  33. ABSL_DLL extern const uint32_t
  34. kFiveToNth[kMaxSmallPowerOfFive + 1];
  35. ABSL_DLL extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1];
  36. // Large, fixed-width unsigned integer.
  37. //
  38. // Exact rounding for decimal-to-binary floating point conversion requires very
  39. // large integer math, but a design goal of absl::from_chars is to avoid
  40. // allocating memory. The integer precision needed for decimal-to-binary
  41. // conversions is large but bounded, so a huge fixed-width integer class
  42. // suffices.
  43. //
  44. // This is an intentionally limited big integer class. Only needed operations
  45. // are implemented. All storage lives in an array data member, and all
  46. // arithmetic is done in-place, to avoid requiring separate storage for operand
  47. // and result.
  48. //
  49. // This is an internal class. Some methods live in the .cc file, and are
  50. // instantiated only for the values of max_words we need.
  51. template<int max_words>
  52. class BigUnsigned
  53. {
  54. public:
  55. static_assert(max_words == 4 || max_words == 84, "unsupported max_words value");
  56. BigUnsigned() :
  57. size_(0),
  58. words_{}
  59. {
  60. }
  61. explicit constexpr BigUnsigned(uint64_t v) :
  62. size_((v >> 32) ? 2 : v ? 1 :
  63. 0),
  64. words_{static_cast<uint32_t>(v & 0xffffffffu), static_cast<uint32_t>(v >> 32)}
  65. {
  66. }
  67. // Constructs a BigUnsigned from the given string_view containing a decimal
  68. // value. If the input string is not a decimal integer, constructs a 0
  69. // instead.
  70. explicit BigUnsigned(absl::string_view sv) :
  71. size_(0),
  72. words_{}
  73. {
  74. // Check for valid input, returning a 0 otherwise. This is reasonable
  75. // behavior only because this constructor is for unit tests.
  76. if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() ||
  77. sv.empty())
  78. {
  79. return;
  80. }
  81. int exponent_adjust =
  82. ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1);
  83. if (exponent_adjust > 0)
  84. {
  85. MultiplyByTenToTheNth(exponent_adjust);
  86. }
  87. }
  88. // Loads the mantissa value of a previously-parsed float.
  89. //
  90. // Returns the associated decimal exponent. The value of the parsed float is
  91. // exactly *this * 10**exponent.
  92. int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits);
  93. // Returns the number of decimal digits of precision this type provides. All
  94. // numbers with this many decimal digits or fewer are representable by this
  95. // type.
  96. //
  97. // Analagous to std::numeric_limits<BigUnsigned>::digits10.
  98. static constexpr int Digits10()
  99. {
  100. // 9975007/1035508 is very slightly less than log10(2**32).
  101. return static_cast<uint64_t>(max_words) * 9975007 / 1035508;
  102. }
  103. // Shifts left by the given number of bits.
  104. void ShiftLeft(int count)
  105. {
  106. if (count > 0)
  107. {
  108. const int word_shift = count / 32;
  109. if (word_shift >= max_words)
  110. {
  111. SetToZero();
  112. return;
  113. }
  114. size_ = (std::min)(size_ + word_shift, max_words);
  115. count %= 32;
  116. if (count == 0)
  117. {
  118. std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_);
  119. }
  120. else
  121. {
  122. for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i)
  123. {
  124. words_[i] = (words_[i - word_shift] << count) |
  125. (words_[i - word_shift - 1] >> (32 - count));
  126. }
  127. words_[word_shift] = words_[0] << count;
  128. // Grow size_ if necessary.
  129. if (size_ < max_words && words_[size_])
  130. {
  131. ++size_;
  132. }
  133. }
  134. std::fill(words_, words_ + word_shift, 0u);
  135. }
  136. }
  137. // Multiplies by v in-place.
  138. void MultiplyBy(uint32_t v)
  139. {
  140. if (size_ == 0 || v == 1)
  141. {
  142. return;
  143. }
  144. if (v == 0)
  145. {
  146. SetToZero();
  147. return;
  148. }
  149. const uint64_t factor = v;
  150. uint64_t window = 0;
  151. for (int i = 0; i < size_; ++i)
  152. {
  153. window += factor * words_[i];
  154. words_[i] = window & 0xffffffff;
  155. window >>= 32;
  156. }
  157. // If carry bits remain and there's space for them, grow size_.
  158. if (window && size_ < max_words)
  159. {
  160. words_[size_] = window & 0xffffffff;
  161. ++size_;
  162. }
  163. }
  164. void MultiplyBy(uint64_t v)
  165. {
  166. uint32_t words[2];
  167. words[0] = static_cast<uint32_t>(v);
  168. words[1] = static_cast<uint32_t>(v >> 32);
  169. if (words[1] == 0)
  170. {
  171. MultiplyBy(words[0]);
  172. }
  173. else
  174. {
  175. MultiplyBy(2, words);
  176. }
  177. }
  178. // Multiplies in place by 5 to the power of n. n must be non-negative.
  179. void MultiplyByFiveToTheNth(int n)
  180. {
  181. while (n >= kMaxSmallPowerOfFive)
  182. {
  183. MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]);
  184. n -= kMaxSmallPowerOfFive;
  185. }
  186. if (n > 0)
  187. {
  188. MultiplyBy(kFiveToNth[n]);
  189. }
  190. }
  191. // Multiplies in place by 10 to the power of n. n must be non-negative.
  192. void MultiplyByTenToTheNth(int n)
  193. {
  194. if (n > kMaxSmallPowerOfTen)
  195. {
  196. // For large n, raise to a power of 5, then shift left by the same amount.
  197. // (10**n == 5**n * 2**n.) This requires fewer multiplications overall.
  198. MultiplyByFiveToTheNth(n);
  199. ShiftLeft(n);
  200. }
  201. else if (n > 0)
  202. {
  203. // We can do this more quickly for very small N by using a single
  204. // multiplication.
  205. MultiplyBy(kTenToNth[n]);
  206. }
  207. }
  208. // Returns the value of 5**n, for non-negative n. This implementation uses
  209. // a lookup table, and is faster then seeding a BigUnsigned with 1 and calling
  210. // MultiplyByFiveToTheNth().
  211. static BigUnsigned FiveToTheNth(int n);
  212. // Multiplies by another BigUnsigned, in-place.
  213. template<int M>
  214. void MultiplyBy(const BigUnsigned<M>& other)
  215. {
  216. MultiplyBy(other.size(), other.words());
  217. }
  218. void SetToZero()
  219. {
  220. std::fill(words_, words_ + size_, 0u);
  221. size_ = 0;
  222. }
  223. // Returns the value of the nth word of this BigUnsigned. This is
  224. // range-checked, and returns 0 on out-of-bounds accesses.
  225. uint32_t GetWord(int index) const
  226. {
  227. if (index < 0 || index >= size_)
  228. {
  229. return 0;
  230. }
  231. return words_[index];
  232. }
  233. // Returns this integer as a decimal string. This is not used in the decimal-
  234. // to-binary conversion; it is intended to aid in testing.
  235. std::string ToString() const;
  236. int size() const
  237. {
  238. return size_;
  239. }
  240. const uint32_t* words() const
  241. {
  242. return words_;
  243. }
  244. private:
  245. // Reads the number between [begin, end), possibly containing a decimal point,
  246. // into this BigUnsigned.
  247. //
  248. // Callers are required to ensure [begin, end) contains a valid number, with
  249. // one or more decimal digits and at most one decimal point. This routine
  250. // will behave unpredictably if these preconditions are not met.
  251. //
  252. // Only the first `significant_digits` digits are read. Digits beyond this
  253. // limit are "sticky": If the final significant digit is 0 or 5, and if any
  254. // dropped digit is nonzero, then that final significant digit is adjusted up
  255. // to 1 or 6. This adjustment allows for precise rounding.
  256. //
  257. // Returns `exponent_adjustment`, a power-of-ten exponent adjustment to
  258. // account for the decimal point and for dropped significant digits. After
  259. // this function returns,
  260. // actual_value_of_parsed_string ~= *this * 10**exponent_adjustment.
  261. int ReadDigits(const char* begin, const char* end, int significant_digits);
  262. // Performs a step of big integer multiplication. This computes the full
  263. // (64-bit-wide) values that should be added at the given index (step), and
  264. // adds to that location in-place.
  265. //
  266. // Because our math all occurs in place, we must multiply starting from the
  267. // highest word working downward. (This is a bit more expensive due to the
  268. // extra carries involved.)
  269. //
  270. // This must be called in steps, for each word to be calculated, starting from
  271. // the high end and working down to 0. The first value of `step` should be
  272. // `std::min(original_size + other.size_ - 2, max_words - 1)`.
  273. // The reason for this expression is that multiplying the i'th word from one
  274. // multiplicand and the j'th word of another multiplicand creates a
  275. // two-word-wide value to be stored at the (i+j)'th element. The highest
  276. // word indices we will access are `original_size - 1` from this object, and
  277. // `other.size_ - 1` from our operand. Therefore,
  278. // `original_size + other.size_ - 2` is the first step we should calculate,
  279. // but limited on an upper bound by max_words.
  280. // Working from high-to-low ensures that we do not overwrite the portions of
  281. // the initial value of *this which are still needed for later steps.
  282. //
  283. // Once called with step == 0, *this contains the result of the
  284. // multiplication.
  285. //
  286. // `original_size` is the size_ of *this before the first call to
  287. // MultiplyStep(). `other_words` and `other_size` are the contents of our
  288. // operand. `step` is the step to perform, as described above.
  289. void MultiplyStep(int original_size, const uint32_t* other_words, int other_size, int step);
  290. void MultiplyBy(int other_size, const uint32_t* other_words)
  291. {
  292. const int original_size = size_;
  293. const int first_step =
  294. (std::min)(original_size + other_size - 2, max_words - 1);
  295. for (int step = first_step; step >= 0; --step)
  296. {
  297. MultiplyStep(original_size, other_words, other_size, step);
  298. }
  299. }
  300. // Adds a 32-bit value to the index'th word, with carry.
  301. void AddWithCarry(int index, uint32_t value)
  302. {
  303. if (value)
  304. {
  305. while (index < max_words && value > 0)
  306. {
  307. words_[index] += value;
  308. // carry if we overflowed in this word:
  309. if (value > words_[index])
  310. {
  311. value = 1;
  312. ++index;
  313. }
  314. else
  315. {
  316. value = 0;
  317. }
  318. }
  319. size_ = (std::min)(max_words, (std::max)(index + 1, size_));
  320. }
  321. }
  322. void AddWithCarry(int index, uint64_t value)
  323. {
  324. if (value && index < max_words)
  325. {
  326. uint32_t high = value >> 32;
  327. uint32_t low = value & 0xffffffff;
  328. words_[index] += low;
  329. if (words_[index] < low)
  330. {
  331. ++high;
  332. if (high == 0)
  333. {
  334. // Carry from the low word caused our high word to overflow.
  335. // Short circuit here to do the right thing.
  336. AddWithCarry(index + 2, static_cast<uint32_t>(1));
  337. return;
  338. }
  339. }
  340. if (high > 0)
  341. {
  342. AddWithCarry(index + 1, high);
  343. }
  344. else
  345. {
  346. // Normally 32-bit AddWithCarry() sets size_, but since we don't call
  347. // it when `high` is 0, do it ourselves here.
  348. size_ = (std::min)(max_words, (std::max)(index + 1, size_));
  349. }
  350. }
  351. }
  352. // Divide this in place by a constant divisor. Returns the remainder of the
  353. // division.
  354. template<uint32_t divisor>
  355. uint32_t DivMod()
  356. {
  357. uint64_t accumulator = 0;
  358. for (int i = size_ - 1; i >= 0; --i)
  359. {
  360. accumulator <<= 32;
  361. accumulator += words_[i];
  362. // accumulator / divisor will never overflow an int32_t in this loop
  363. words_[i] = static_cast<uint32_t>(accumulator / divisor);
  364. accumulator = accumulator % divisor;
  365. }
  366. while (size_ > 0 && words_[size_ - 1] == 0)
  367. {
  368. --size_;
  369. }
  370. return static_cast<uint32_t>(accumulator);
  371. }
  372. // The number of elements in words_ that may carry significant values.
  373. // All elements beyond this point are 0.
  374. //
  375. // When size_ is 0, this BigUnsigned stores the value 0.
  376. // When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is
  377. // nonzero. This can occur due to overflow truncation.
  378. // In particular, x.size_ != y.size_ does *not* imply x != y.
  379. int size_;
  380. uint32_t words_[max_words];
  381. };
  382. // Compares two big integer instances.
  383. //
  384. // Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs.
  385. template<int N, int M>
  386. int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs)
  387. {
  388. int limit = (std::max)(lhs.size(), rhs.size());
  389. for (int i = limit - 1; i >= 0; --i)
  390. {
  391. const uint32_t lhs_word = lhs.GetWord(i);
  392. const uint32_t rhs_word = rhs.GetWord(i);
  393. if (lhs_word < rhs_word)
  394. {
  395. return -1;
  396. }
  397. else if (lhs_word > rhs_word)
  398. {
  399. return 1;
  400. }
  401. }
  402. return 0;
  403. }
  404. template<int N, int M>
  405. bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs)
  406. {
  407. int limit = (std::max)(lhs.size(), rhs.size());
  408. for (int i = 0; i < limit; ++i)
  409. {
  410. if (lhs.GetWord(i) != rhs.GetWord(i))
  411. {
  412. return false;
  413. }
  414. }
  415. return true;
  416. }
  417. template<int N, int M>
  418. bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs)
  419. {
  420. return !(lhs == rhs);
  421. }
  422. template<int N, int M>
  423. bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs)
  424. {
  425. return Compare(lhs, rhs) == -1;
  426. }
  427. template<int N, int M>
  428. bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs)
  429. {
  430. return rhs < lhs;
  431. }
  432. template<int N, int M>
  433. bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs)
  434. {
  435. return !(rhs < lhs);
  436. }
  437. template<int N, int M>
  438. bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs)
  439. {
  440. return !(lhs < rhs);
  441. }
  442. // Output operator for BigUnsigned, for testing purposes only.
  443. template<int N>
  444. std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num)
  445. {
  446. return os << num.ToString();
  447. }
  448. // Explicit instantiation declarations for the sizes of BigUnsigned that we
  449. // are using.
  450. //
  451. // For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is
  452. // still bigger than an int128, and 84 is a large value we will want to use
  453. // in the from_chars implementation.
  454. //
  455. // Comments justifying the use of 84 belong in the from_chars implementation,
  456. // and will be added in a follow-up CL.
  457. extern template class BigUnsigned<4>;
  458. extern template class BigUnsigned<84>;
  459. } // namespace strings_internal
  460. ABSL_NAMESPACE_END
  461. } // namespace absl
  462. #endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_