Rework subtraction rounding (again) for more accuracy

- Go back to the old method of computing the mantissa, but when post
  processing, expand the mantissa to slightly larger than maxMantissa,
  then in doRoundDown, if the result is not exact, subtract one.
  Finally, let doNormalize figure out the rounding of the result.
This commit is contained in:
Ed Hennis
2026-06-05 18:02:45 -04:00
parent 74c66d0944
commit 012c67a7eb
3 changed files with 254 additions and 89 deletions

View File

@@ -877,6 +877,26 @@ to_string(MantissaRange::MantissaScale const& scale)
}
}
inline std::string
to_string(Number::RoundingMode const& round)
{
switch (round)
{
enum class RoundingMode { ToNearest, TowardsZero, Downward, Upward };
case Number::RoundingMode::ToNearest:
return "ToNearest";
case Number::RoundingMode::TowardsZero:
return "TowardsZero";
case Number::RoundingMode::Downward:
return "Downward";
case Number::RoundingMode::Upward:
return "Upward";
default:
throw std::runtime_error("Bad rounding mode");
}
}
class SaveNumberRoundMode
{
Number::RoundingMode mode_;

View File

@@ -208,6 +208,10 @@ public:
unsigned
pop() noexcept;
// if true, there are no more digits to recover with pop()
bool
empty() const noexcept;
/** Drop a digit from the mantissa, and increment the exponent, storing the dropped digit in
* this Guard.
*
@@ -221,8 +225,17 @@ public:
doDropDigit(T& mantissa, int& exponent) noexcept;
enum class Round {
// The result is exact. No rounding is needed.
Exact = -2,
// Round down. Since we use integer math, that usually means no change is needed.
// Exceptions are for when the result is between kMaxRap and kMaxRepUp (round to kMaxRep),
// or after subtraction where _any_ remainder will modify the result. The latter is what
// distinguishes Exact from Down.
Down = -1,
// The result was exactly half-way between two integers. This will round to whichever of
// the two is even.
Even = 0,
// Round up. Always adds 1 (or subtracts 1 in some cases if cuspRoundingFix is not enabled)
Up = 1,
};
@@ -302,6 +315,13 @@ Number::Guard::pop() noexcept
return d;
}
// if true, there are no more digits to recover with pop()
inline bool
Number::Guard::empty() const noexcept
{
return digits_ == 0 && !xbit_;
}
template <class T>
void
Number::Guard::doDropDigit(T& mantissa, int& exponent) noexcept
@@ -332,6 +352,12 @@ Number::Guard::round() const noexcept
{
auto mode = Number::getround();
if (cuspRoundingFix_ != MantissaRange::CuspRoundingFix::Disabled && empty())
{
// No remainder
return Round::Exact;
}
if (mode == RoundingMode::TowardsZero)
return Round::Down;
@@ -445,13 +471,31 @@ void
Number::Guard::doRoundDown(bool& negative, T& mantissa, int& exponent)
{
auto r = round();
if (r == Round::Up || (r == Round::Even && (mantissa & 1) == 1))
if (cuspRoundingFix_ != MantissaRange::CuspRoundingFix::Disabled)
{
--mantissa;
if (mantissa < minMantissa_)
// If there was any remainder, subtract 1 from the result, and pad with 9s.
// Example with 4 digit mantissas:
// 1000 - 0.0000000001 = 999.9999999999
// In operator+=, the result will be: 1000, with Guard holding (0, xbit=true)
// * Rounding away from zero, the result should be 1000
// * Rounding towards zero, the result should be 999.9
// * Rounding to nearest, the result should be 999.9
// The most accurate result is always 999.9
if (r != Round::Exact)
{
mantissa *= 10;
--exponent;
--mantissa;
}
}
else
{
if (r == Round::Up || (r == Round::Even && (mantissa & 1) == 1))
{
--mantissa;
if (mantissa < minMantissa_)
{
mantissa *= 10;
--exponent;
}
}
}
bringIntoRange(negative, mantissa, exponent);
@@ -720,46 +764,24 @@ Number::operator+=(Number const& y)
// Bring the exponents of both values into agreement, so the mantissas are on the same scale
// and can be added directly together
// expandM / expandE: First try to expand the mantissa and bring the exponent down
// shringM / shrinkE: Then shrink the mantissa and bring the exponent up, if necessary
auto const adjust = [&g](uint128_t& expandM, int& expandE, uint128_t& shrinkM, int& shrinkE) {
constexpr uint128_t kSafeLimit = kPowerOfTenImpl<uint128_t, detail::kUint128Digits>[37];
if (g.cuspRoundingFix_ == MantissaRange::CuspRoundingFix::Enabled)
{
while (shrinkE < expandE && shrinkM % 10 == 0)
{
g.doDropDigit(shrinkM, shrinkE);
}
// We've got 128 bits of mantissa to work with here. Don't throw away data unless we
// have to
while (shrinkE < expandE && expandE > kMinExponent && expandM < kSafeLimit)
{
expandM *= 10;
--expandE;
}
}
while (shrinkE < expandE)
{
g.doDropDigit(shrinkM, shrinkE);
}
};
// Then shrink the mantissa and bring the exponent up of the value with the lower exponent
if (xe < ye)
{
if (xn)
g.setNegative();
adjust(ym, ye, xm, xe);
do
{
g.doDropDigit(xm, xe);
} while (xe < ye);
}
else if (xe > ye)
{
if (yn)
g.setNegative();
adjust(xm, xe, ym, ye);
do
{
g.doDropDigit(ym, ye);
} while (xe > ye);
}
if (xn == yn)
@@ -793,31 +815,38 @@ Number::operator+=(Number const& y)
xe = ye;
xn = yn;
}
while (xm < minMantissa && xm * 10 <= kMaxRep)
if (cuspRoundingFix == MantissaRange::CuspRoundingFix::Enabled)
{
xm *= 10;
xm -= g.pop();
--xe;
}
g.doRoundDown(xn, xm, xe);
if (cuspRoundingFix == MantissaRange::CuspRoundingFix::Enabled && xm != 0)
{
// this will be going away
Guard g(kRange);
if (xn)
g.setNegative();
while (xm > maxMantissa || xm > kMaxRep)
// Grow xm/xe and pull digits out of the Guard until it's just past the range, so that
// normalize will have enough information to make an accurate rounding decision, but
// stop if the Guard empties out. Note that if the xbit is set, the Guard will never be
// empty.
while (xm <= maxMantissa && !g.empty())
{
g.doDropDigit(xm, xe);
xm *= 10;
xm -= g.pop();
--xe;
}
g.doRoundUp(xn, xm, xe, "Number::addition overflow");
}
else
{
// Grow xm/xe and pull digits out of the Guard until it's back in range.
while (xm < minMantissa && xm * 10 <= kMaxRep)
{
xm *= 10;
xm -= g.pop();
--xe;
}
}
// Round down, based on whether there is any data left in the Guard (depending on
// cuspRoundingFix)
g.doRoundDown(xn, xm, xe);
}
doNormalize(xn, xm, xe, minMantissa, maxMantissa, cuspRoundingFix, false);
negative_ = xn;
mantissa_ = static_cast<internalrep>(xm);
exponent_ = xe;
normalize(g);
return *this;
}

View File

@@ -43,12 +43,17 @@ class Number_test : public beast::unit_test::Suite
return out;
}
static BigInt
BigInt
toBigInt(Number const& n)
{
BigInt v = n.mantissa();
for (int i = 0; i < n.exponent(); ++i)
v *= 10;
for (int i = 0; i > n.exponent(); --i)
{
BEAST_EXPECT(v % 10 == 0);
v /= 10;
}
return v;
}
@@ -1923,49 +1928,160 @@ public:
}
{
testcase << "operator+ TowardsZero rounds away from zero " << to_string(scale);
testcase << "subtraction rounding " << to_string(scale);
Number const a{1LL, 20};
Number const b{-1'000'000'000'000'000'001LL};
BEAST_EXPECT(toBigInt(a) == BigInt{"100000000000000000000"});
if (scale != MantissaRange::MantissaScale::Small)
{
BEAST_EXPECT(toBigInt(b) == BigInt{"-1000000000000000001"});
}
else
{
BEAST_EXPECT(toBigInt(b) == BigInt{"-1000000000000000000"});
}
Number sum;
{
NumberRoundModeGuard const roundGuard{Number::RoundingMode::TowardsZero};
sum = a + b;
}
BigInt const exact = toBigInt(a) + toBigInt(b);
BigInt const stored = toBigInt(sum);
BigInt const diff = stored - exact;
log << "\n a = " << a << "\n b = " << b
<< "\n exact a + b = " << exact.str() << "\n TowardsZero = " << stored.str()
<< "\n difference = " << diff.str() << "\n";
log.flush();
auto const exp = Number::mantissaLog();
Number const a{1LL, exp + 2};
Number const b{-(Number{1, exp} + 1)};
if (scale == MantissaRange::MantissaScale::Small)
{
BEAST_EXPECT(stored == exact);
}
else if (scale == MantissaRange::MantissaScale::LargeLegacy)
{
BEAST_EXPECT(stored > exact);
BEAST_EXPECT(toBigInt(a) == BigInt{"100000000000000000"});
BEAST_EXPECT(toBigInt(b) == BigInt{"-1000000000000001"});
}
else
{
BEAST_EXPECT(stored < exact);
BEAST_EXPECT(diff < 0);
BEAST_EXPECT(-diff < pow10<BigInt>(sum.exponent()));
BEAST_EXPECT(toBigInt(a) == BigInt{"100000000000000000000"});
BEAST_EXPECT(toBigInt(b) == BigInt{"-1000000000000000001"});
}
auto construct = [&a, &b, this](Number::RoundingMode r) {
NumberRoundModeGuard const roundGuard{r};
auto const sum = a + b;
BigInt const stored = toBigInt(sum);
return std::make_pair(r, std::make_pair(stored, sum));
};
auto const bigA = toBigInt(a);
auto const bigB = toBigInt(b);
BigInt const exact = bigA + bigB;
auto const sums = [&]() {
std::map<Number::RoundingMode, std::pair<BigInt, Number>> sums;
sums.emplace(construct(Number::RoundingMode::TowardsZero));
sums.emplace(construct(Number::RoundingMode::Upward));
sums.emplace(construct(Number::RoundingMode::Downward));
sums.emplace(construct(Number::RoundingMode::ToNearest));
return sums;
}();
log << "\n a = " << a << " (" << fmt(bigA) << ")\n b = " << b
<< " (" << fmt(bigB) << ")\n exact a + b = " << fmt(exact) << "\n";
for (auto const& [r, sum] : sums)
{
auto const diff = sum.first - exact;
auto const rLabel = to_string(r);
log << std::string(15 - rLabel.length(), ' ') << rLabel << " = " << fmt(sum.first)
<< "\n difference = " << fmt(diff) << "\n";
}
log.flush();
switch (scale)
{
case MantissaRange::MantissaScale::Small:
case MantissaRange::MantissaScale::LargeLegacy: {
// Without the fix, all the results but one round up
BEAST_EXPECT(sums.at(Number::RoundingMode::TowardsZero).first > exact);
BEAST_EXPECT(sums.at(Number::RoundingMode::Upward).first > exact);
BEAST_EXPECT(sums.at(Number::RoundingMode::ToNearest).first > exact);
// Downward works because the Guard sign is negative, and Downward returns Up
// instead of Down if negative and there's a remainder, whereas TowardsZero
// always returns Down.
BEAST_EXPECT(sums.at(Number::RoundingMode::Downward).first < exact);
break;
}
default: {
for (auto const& [r, sum] : sums)
{
auto const epsilon = pow10<BigInt>(sum.second.exponent());
BEAST_EXPECT(epsilon == 100);
auto diff = sum.first - exact;
switch (r)
{
case Number::RoundingMode::Upward:
case Number::RoundingMode::ToNearest:
BEAST_EXPECT(sum.first > exact);
BEAST_EXPECT(diff < epsilon);
break;
default:
BEAST_EXPECT(sum.first < exact);
BEAST_EXPECT(-diff < epsilon);
}
}
}
}
}
{
auto const offset = 30;
testcase << "subtraction rounding offset of " << offset << " " << to_string(scale);
auto const exp = Number::mantissaLog();
Number const a{1LL, exp + offset};
Number const b{-1};
auto construct = [&a, &b, this](Number::RoundingMode r) {
NumberRoundModeGuard const roundGuard{r};
auto const sum = a + b;
BigInt const stored = toBigInt(sum);
return std::make_pair(r, std::make_pair(stored, sum));
};
auto const bigA = toBigInt(a);
auto const bigB = toBigInt(b);
BigInt const exact = bigA + bigB;
auto const sums = [&]() {
std::map<Number::RoundingMode, std::pair<BigInt, Number>> sums;
sums.emplace(construct(Number::RoundingMode::TowardsZero));
sums.emplace(construct(Number::RoundingMode::Upward));
sums.emplace(construct(Number::RoundingMode::Downward));
sums.emplace(construct(Number::RoundingMode::ToNearest));
return sums;
}();
log << "\n a = " << a << " (" << fmt(bigA) << ")\n b = " << b
<< " (" << fmt(bigB) << ")\n exact a + b = " << fmt(exact) << "\n";
for (auto const& [r, sum] : sums)
{
auto const diff = sum.first - exact;
auto const rLabel = to_string(r);
log << std::string(15 - rLabel.length(), ' ') << rLabel << " = " << fmt(sum.first)
<< "\n difference = " << fmt(diff) << "\n";
}
log.flush();
switch (scale)
{
case MantissaRange::MantissaScale::Small:
case MantissaRange::MantissaScale::LargeLegacy: {
// Without the fix, all the results but one round up
BEAST_EXPECT(sums.at(Number::RoundingMode::TowardsZero).first > exact);
BEAST_EXPECT(sums.at(Number::RoundingMode::Upward).first > exact);
BEAST_EXPECT(sums.at(Number::RoundingMode::ToNearest).first > exact);
// Downward works because the Guard sign is negative, and Downward returns Up
// instead of Down if negative and there's a remainder, whereas TowardsZero
// always returns Down.
BEAST_EXPECT(sums.at(Number::RoundingMode::Downward).first < exact);
break;
}
default: {
for (auto const& [r, sum] : sums)
{
auto const epsilon = pow10<BigInt>(sum.second.exponent());
auto diff = sum.first - exact;
switch (r)
{
case Number::RoundingMode::Upward:
case Number::RoundingMode::ToNearest:
BEAST_EXPECT(sum.first > exact);
BEAST_EXPECT(diff < epsilon);
break;
default:
BEAST_EXPECT(sum.first < exact);
BEAST_EXPECT(-diff < epsilon);
}
}
}
}
}
}