Loading...
Searching...
No Matches
diyfp.h
1// Tencent is pleased to support the open source community by making RapidJSON available.
2//
3// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
4//
5// Licensed under the MIT License (the "License"); you may not use this file except
6// in compliance with the License. You may obtain a copy of the License at
7//
8// http://opensource.org/licenses/MIT
9//
10// Unless required by applicable law or agreed to in writing, software distributed
11// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13// specific language governing permissions and limitations under the License.
14
15// This is a C++ header-only implementation of Grisu2 algorithm from the publication:
16// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
17// integers." ACM Sigplan Notices 45.6 (2010): 233-243.
18
19#ifndef RAPIDJSON_DIYFP_H_
20#define RAPIDJSON_DIYFP_H_
21
22#include "../rapidjson.h"
23#include "clzll.h"
24#include <limits>
25
26#if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
27#include <intrin.h>
28#if !defined(_ARM64EC_)
29#pragma intrinsic(_umul128)
30#else
31#pragma comment(lib,"softintrin")
32#endif
33#endif
34
35RAPIDJSON_NAMESPACE_BEGIN
36namespace internal {
37
38#ifdef __GNUC__
39RAPIDJSON_DIAG_PUSH
40RAPIDJSON_DIAG_OFF(effc++)
41#endif
42
43#ifdef __clang__
44RAPIDJSON_DIAG_PUSH
45RAPIDJSON_DIAG_OFF(padded)
46#endif
47
48struct DiyFp {
49 DiyFp() : f(), e() {}
50
51 DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
52
53 explicit DiyFp(double d) {
54 union {
55 double d;
56 uint64_t u64;
57 } u = { d };
58
59 int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
60 uint64_t significand = (u.u64 & kDpSignificandMask);
61 if (biased_e != 0) {
62 f = significand + kDpHiddenBit;
63 e = biased_e - kDpExponentBias;
64 }
65 else {
66 f = significand;
67 e = kDpMinExponent + 1;
68 }
69 }
70
71 DiyFp operator-(const DiyFp& rhs) const {
72 return DiyFp(f - rhs.f, e);
73 }
74
75 DiyFp operator*(const DiyFp& rhs) const {
76#if defined(_MSC_VER) && defined(_M_AMD64)
77 uint64_t h;
78 uint64_t l = _umul128(f, rhs.f, &h);
79 if (l & (uint64_t(1) << 63)) // rounding
80 h++;
81 return DiyFp(h, e + rhs.e + 64);
82#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
83 __extension__ typedef unsigned __int128 uint128;
84 uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
85 uint64_t h = static_cast<uint64_t>(p >> 64);
86 uint64_t l = static_cast<uint64_t>(p);
87 if (l & (uint64_t(1) << 63)) // rounding
88 h++;
89 return DiyFp(h, e + rhs.e + 64);
90#else
91 const uint64_t M32 = 0xFFFFFFFF;
92 const uint64_t a = f >> 32;
93 const uint64_t b = f & M32;
94 const uint64_t c = rhs.f >> 32;
95 const uint64_t d = rhs.f & M32;
96 const uint64_t ac = a * c;
97 const uint64_t bc = b * c;
98 const uint64_t ad = a * d;
99 const uint64_t bd = b * d;
100 uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
101 tmp += 1U << 31; /// mult_round
102 return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
103#endif
104 }
105
106 DiyFp Normalize() const {
107 int s = static_cast<int>(clzll(f));
108 return DiyFp(f << s, e - s);
109 }
110
111 DiyFp NormalizeBoundary() const {
112 DiyFp res = *this;
113 while (!(res.f & (kDpHiddenBit << 1))) {
114 res.f <<= 1;
115 res.e--;
116 }
117 res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
118 res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
119 return res;
120 }
121
122 void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
123 DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
124 DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
125 mi.f <<= mi.e - pl.e;
126 mi.e = pl.e;
127 *plus = pl;
128 *minus = mi;
129 }
130
131 double ToDouble() const {
132 union {
133 double d;
134 uint64_t u64;
135 }u;
136 RAPIDJSON_ASSERT(f <= kDpHiddenBit + kDpSignificandMask);
137 if (e < kDpDenormalExponent) {
138 // Underflow.
139 return 0.0;
140 }
141 if (e >= kDpMaxExponent) {
142 // Overflow.
143 return std::numeric_limits<double>::infinity();
144 }
145 const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 :
146 static_cast<uint64_t>(e + kDpExponentBias);
147 u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
148 return u.d;
149 }
150
151 static const int kDiySignificandSize = 64;
152 static const int kDpSignificandSize = 52;
153 static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
154 static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
155 static const int kDpMinExponent = -kDpExponentBias;
156 static const int kDpDenormalExponent = -kDpExponentBias + 1;
157 static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
158 static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
159 static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
160
161 uint64_t f;
162 int e;
163};
164
165inline DiyFp GetCachedPowerByIndex(size_t index) {
166 // 10^-348, 10^-340, ..., 10^340
167 static const uint64_t kCachedPowers_F[] = {
168 RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
169 RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
170 RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
171 RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
172 RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
173 RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
174 RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
175 RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
176 RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
177 RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
178 RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
179 RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
180 RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
181 RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
182 RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
183 RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
184 RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
185 RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
186 RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
187 RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
188 RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
189 RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
190 RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
191 RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
192 RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
193 RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
194 RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
195 RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
196 RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
197 RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
198 RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
199 RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
200 RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
201 RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
202 RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
203 RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
204 RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
205 RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
206 RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
207 RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
208 RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
209 RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
210 RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
211 RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
212 };
213 static const int16_t kCachedPowers_E[] = {
214 -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980,
215 -954, -927, -901, -874, -847, -821, -794, -768, -741, -715,
216 -688, -661, -635, -608, -582, -555, -529, -502, -475, -449,
217 -422, -396, -369, -343, -316, -289, -263, -236, -210, -183,
218 -157, -130, -103, -77, -50, -24, 3, 30, 56, 83,
219 109, 136, 162, 189, 216, 242, 269, 295, 322, 348,
220 375, 402, 428, 455, 481, 508, 534, 561, 588, 614,
221 641, 667, 694, 720, 747, 774, 800, 827, 853, 880,
222 907, 933, 960, 986, 1013, 1039, 1066
223 };
224 RAPIDJSON_ASSERT(index < 87);
225 return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
226}
227
228inline DiyFp GetCachedPower(int e, int* K) {
229
230 //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
231 double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
232 int k = static_cast<int>(dk);
233 if (dk - k > 0.0)
234 k++;
235
236 unsigned index = static_cast<unsigned>((k >> 3) + 1);
237 *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
238
239 return GetCachedPowerByIndex(index);
240}
241
242inline DiyFp GetCachedPower10(int exp, int *outExp) {
243 RAPIDJSON_ASSERT(exp >= -348);
244 unsigned index = static_cast<unsigned>(exp + 348) / 8u;
245 *outExp = -348 + static_cast<int>(index) * 8;
246 return GetCachedPowerByIndex(index);
247}
248
249#ifdef __GNUC__
250RAPIDJSON_DIAG_POP
251#endif
252
253#ifdef __clang__
254RAPIDJSON_DIAG_POP
255RAPIDJSON_DIAG_OFF(padded)
256#endif
257
258} // namespace internal
259RAPIDJSON_NAMESPACE_END
260
261#endif // RAPIDJSON_DIYFP_H_
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:437
#define RAPIDJSON_UINT64_C2(high32, low32)
Construct a 64-bit literal by a pair of 32-bit integer.
Definition: rapidjson.h:320