|
1 | 1 | // SPDX-License-Identifier: CAL |
2 | 2 | pragma solidity ^0.8.25; |
3 | 3 |
|
4 | | -import {ExponentOverflow, Log10Negative, Log10Zero} from "../../error/ErrDecimalFloat.sol"; |
| 4 | +import {ExponentOverflow, Log10Negative, Log10Zero, MulDivOverflow} from "../../error/ErrDecimalFloat.sol"; |
5 | 5 | import { |
6 | 6 | LOG_TABLES, |
7 | 7 | LOG_TABLES_SMALL, |
@@ -188,13 +188,149 @@ library LibDecimalFloatImplementation { |
188 | 188 | pure |
189 | 189 | returns (int256, int256) |
190 | 190 | { |
| 191 | + uint256 scale = 1e76; |
| 192 | + int256 adjustExponent = 76; |
| 193 | + int256 signedCoefficient; |
| 194 | + |
191 | 195 | unchecked { |
| 196 | + // Move both coefficients into the e75/e76 range, so that the result |
| 197 | + // of division will not cause a mulDiv overflow. |
192 | 198 | (signedCoefficientA, exponentA) = maximize(signedCoefficientA, exponentA); |
193 | | - (signedCoefficientB, exponentB) = normalize(signedCoefficientB, exponentB); |
| 199 | + (signedCoefficientB, exponentB) = maximize(signedCoefficientB, exponentB); |
| 200 | + |
| 201 | + // mulDiv only works with unsigned integers, so get the absolute |
| 202 | + // values of the coefficients. |
| 203 | + uint256 signedCoefficientAAbs; |
| 204 | + if (signedCoefficientA > 0) { |
| 205 | + signedCoefficientAAbs = uint256(signedCoefficientA); |
| 206 | + } else if (signedCoefficientA < 0) { |
| 207 | + if (signedCoefficientA == type(int256).min) { |
| 208 | + signedCoefficientAAbs = uint256(type(int256).max) + 1; |
| 209 | + } else { |
| 210 | + signedCoefficientAAbs = uint256(-signedCoefficientA); |
| 211 | + } |
| 212 | + } else { |
| 213 | + return (MAXIMIZED_ZERO_SIGNED_COEFFICIENT, MAXIMIZED_ZERO_EXPONENT); |
| 214 | + } |
| 215 | + uint256 signedCoefficientBAbs; |
| 216 | + if (signedCoefficientB < 0) { |
| 217 | + if (signedCoefficientB == type(int256).min) { |
| 218 | + signedCoefficientBAbs = uint256(type(int256).max) + 1; |
| 219 | + } else { |
| 220 | + signedCoefficientBAbs = uint256(-signedCoefficientB); |
| 221 | + } |
| 222 | + } else { |
| 223 | + signedCoefficientBAbs = uint256(signedCoefficientB); |
| 224 | + } |
194 | 225 |
|
195 | | - int256 signedCoefficient = signedCoefficientA / signedCoefficientB; |
196 | | - int256 exponent = exponentA - exponentB; |
197 | | - return (signedCoefficient, exponent); |
| 226 | + // We are going to scale the numerator up by the largest power of ten |
| 227 | + // that is smaller than the denominator. This will always overflow |
| 228 | + // internally to the mulDiv during the initial multiplication, in |
| 229 | + // 512 bits, but will subsequently always be reduced back down to |
| 230 | + // fit in 256 bits by the division of a denominator that is larger |
| 231 | + // than the scale up. |
| 232 | + if (signedCoefficientBAbs < scale) { |
| 233 | + scale = 1e75; |
| 234 | + adjustExponent = 75; |
| 235 | + } |
| 236 | + uint256 signedCoefficientAbs = mulDiv(signedCoefficientAAbs, scale, signedCoefficientBAbs); |
| 237 | + signedCoefficient = (signedCoefficientA ^ signedCoefficientB) < 0 |
| 238 | + ? -int256(signedCoefficientAbs) |
| 239 | + : int256(signedCoefficientAbs); |
| 240 | + } |
| 241 | + |
| 242 | + // Keep the exponent calculation outside the unchecked block so that we |
| 243 | + // don't silently under/overflow. |
| 244 | + int256 exponent = exponentA - exponentB - adjustExponent; |
| 245 | + return (signedCoefficient, exponent); |
| 246 | + } |
| 247 | + |
| 248 | + /// mulDiv as seen in Open Zeppelin, PRB Math, Solady, and other libraries. |
| 249 | + /// Credit to Remco Bloemen under MIT license: https://2π.com/21/muldiv |
| 250 | + function mulDiv(uint256 x, uint256 y, uint256 denominator) internal pure returns (uint256 result) { |
| 251 | + // 512-bit multiply [prod1 prod0] = x * y. Compute the product mod 2^256 and mod 2^256 - 1, then use |
| 252 | + // use the Chinese Remainder Theorem to reconstruct the 512-bit result. The result is stored in two 256 |
| 253 | + // variables such that product = prod1 * 2^256 + prod0. |
| 254 | + uint256 prod0; // Least significant 256 bits of the product |
| 255 | + uint256 prod1; // Most significant 256 bits of the product |
| 256 | + assembly ("memory-safe") { |
| 257 | + let mm := mulmod(x, y, not(0)) |
| 258 | + prod0 := mul(x, y) |
| 259 | + prod1 := sub(sub(mm, prod0), lt(mm, prod0)) |
| 260 | + } |
| 261 | + |
| 262 | + // Handle non-overflow cases, 256 by 256 division. |
| 263 | + if (prod1 == 0) { |
| 264 | + unchecked { |
| 265 | + return prod0 / denominator; |
| 266 | + } |
| 267 | + } |
| 268 | + |
| 269 | + // Make sure the result is less than 2^256. Also prevents denominator == 0. |
| 270 | + if (prod1 >= denominator) { |
| 271 | + revert MulDivOverflow(x, y, denominator); |
| 272 | + } |
| 273 | + |
| 274 | + //////////////////////////////////////////////////////////////////////////// |
| 275 | + // 512 by 256 division |
| 276 | + //////////////////////////////////////////////////////////////////////////// |
| 277 | + |
| 278 | + // Make division exact by subtracting the remainder from [prod1 prod0]. |
| 279 | + uint256 remainder; |
| 280 | + assembly ("memory-safe") { |
| 281 | + // Compute remainder using the mulmod Yul instruction. |
| 282 | + remainder := mulmod(x, y, denominator) |
| 283 | + |
| 284 | + // Subtract 256 bit number from 512-bit number. |
| 285 | + prod1 := sub(prod1, gt(remainder, prod0)) |
| 286 | + prod0 := sub(prod0, remainder) |
| 287 | + } |
| 288 | + |
| 289 | + unchecked { |
| 290 | + // Calculate the largest power of two divisor of the denominator using the unary operator ~. This operation cannot overflow |
| 291 | + // because the denominator cannot be zero at this point in the function execution. The result is always >= 1. |
| 292 | + // For more detail, see https://cs.stackexchange.com/q/138556/92363. |
| 293 | + uint256 lpotdod = denominator & (~denominator + 1); |
| 294 | + uint256 flippedLpotdod; |
| 295 | + |
| 296 | + assembly ("memory-safe") { |
| 297 | + // Factor powers of two out of denominator. |
| 298 | + // slither-disable-next-line divide-before-multiply |
| 299 | + denominator := div(denominator, lpotdod) |
| 300 | + |
| 301 | + // Divide [prod1 prod0] by lpotdod. |
| 302 | + // slither-disable-next-line divide-before-multiply |
| 303 | + prod0 := div(prod0, lpotdod) |
| 304 | + |
| 305 | + // Get the flipped value `2^256 / lpotdod`. If the `lpotdod` is zero, the flipped value is one. |
| 306 | + // `sub(0, lpotdod)` produces the two's complement version of `lpotdod`, which is equivalent to flipping all the bits. |
| 307 | + // However, `div` interprets this value as an unsigned value: https://ethereum.stackexchange.com/q/147168/24693 |
| 308 | + flippedLpotdod := add(div(sub(0, lpotdod), lpotdod), 1) |
| 309 | + } |
| 310 | + |
| 311 | + // Shift in bits from prod1 into prod0. |
| 312 | + prod0 |= prod1 * flippedLpotdod; |
| 313 | + |
| 314 | + // Invert denominator mod 2^256. Now that denominator is an odd number, it has an inverse modulo 2^256 such |
| 315 | + // that denominator * inv = 1 mod 2^256. Compute the inverse by starting with a seed that is correct for |
| 316 | + // four bits. That is, denominator * inv = 1 mod 2^4. |
| 317 | + // slither-disable-next-line incorrect-exp |
| 318 | + uint256 inverse = (3 * denominator) ^ 2; |
| 319 | + |
| 320 | + // Use the Newton-Raphson iteration to improve the precision. Thanks to Hensel's lifting lemma, this also works |
| 321 | + // in modular arithmetic, doubling the correct bits in each step. |
| 322 | + inverse *= 2 - denominator * inverse; // inverse mod 2^8 |
| 323 | + inverse *= 2 - denominator * inverse; // inverse mod 2^16 |
| 324 | + inverse *= 2 - denominator * inverse; // inverse mod 2^32 |
| 325 | + inverse *= 2 - denominator * inverse; // inverse mod 2^64 |
| 326 | + inverse *= 2 - denominator * inverse; // inverse mod 2^128 |
| 327 | + inverse *= 2 - denominator * inverse; // inverse mod 2^256 |
| 328 | + |
| 329 | + // Because the division is now exact we can divide by multiplying with the modular inverse of denominator. |
| 330 | + // This will give us the correct result modulo 2^256. Since the preconditions guarantee that the outcome is |
| 331 | + // less than 2^256, this is the final result. We don't need to compute the high bits of the result and prod1 |
| 332 | + // is no longer required. |
| 333 | + result = prod0 * inverse; |
198 | 334 | } |
199 | 335 | } |
200 | 336 |
|
@@ -367,14 +503,9 @@ library LibDecimalFloatImplementation { |
367 | 503 | return signedCoefficientA == signedCoefficientB; |
368 | 504 | } |
369 | 505 |
|
370 | | - /// Inverts a float. Equivalent to `1 / x` with modest gas optimizations. |
| 506 | + /// Inverts a float. Equivalent to `1 / x`. |
371 | 507 | function inv(int256 signedCoefficient, int256 exponent) internal pure returns (int256, int256) { |
372 | | - (signedCoefficient, exponent) = normalize(signedCoefficient, exponent); |
373 | | - |
374 | | - signedCoefficient = 1e76 / signedCoefficient; |
375 | | - exponent = -exponent - 76; |
376 | | - |
377 | | - return (signedCoefficient, exponent); |
| 508 | + return div(1e76, -76, signedCoefficient, exponent); |
378 | 509 | } |
379 | 510 |
|
380 | 511 | /// log10(x) for a float x. |
|
0 commit comments