/* This file is part of the dynarmic project. * Copyright (c) 2018 MerryMage * This software may be used and distributed according to the terms of the GNU * General Public License version 2 or any later version. */ #include #include #include "common/assert.h" #include "common/common_types.h" #include "common/fp/fpcr.h" #include "common/fp/fpsr.h" #include "common/fp/info.h" #include "common/fp/op/FPRecipEstimate.h" #include "common/fp/process_exception.h" #include "common/fp/process_nan.h" #include "common/fp/unpacked.h" #include "common/safe_ops.h" namespace Dynarmic::FP { /// Input is a u0.9 fixed point number. Only values in [0.5, 1.0) are valid. /// Output is a u0.8 fixed point number, with an implied 1 prefixed. /// i.e.: The output is a value in [1.0, 2.0). static u8 RecipEstimate(u64 a) { constexpr u64 offset = 256; using LUT = std::array; static const LUT lut = [] { LUT result{}; for (u64 i = 0; i < result.size(); i++) { u64 a = i + offset; a = a * 2 + 1; u64 b = (1u << 19) / a; result[i] = static_cast((b + 1) / 2); } return result; }(); return lut[a - offset]; } template FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) { FPType type; bool sign; FPUnpacked value; std::tie(type, sign, value) = FPUnpack(op, fpcr, fpsr); if (type == FPType::SNaN || type == FPType::QNaN) { return FPProcessNaN(type, op, fpcr, fpsr); } if (type == FPType::Infinity) { return FPInfo::Zero(sign); } if (type == FPType::Zero) { FPProcessException(FPExc::DivideByZero, fpcr, fpsr); return FPInfo::Infinity(sign); } if (value.exponent < FPInfo::exponent_min - 2) { const bool overflow_to_inf = [&]{ switch (fpcr.RMode()) { case RoundingMode::ToNearest_TieEven: return true; case RoundingMode::TowardsPlusInfinity: return !sign; case RoundingMode::TowardsMinusInfinity: return sign; case RoundingMode::TowardsZero: return false; default: UNREACHABLE(); } return false; }(); FPProcessException(FPExc::Overflow, fpcr, fpsr); FPProcessException(FPExc::Inexact, fpcr, fpsr); return overflow_to_inf ? FPInfo::Infinity(sign) : FPInfo::MaxNormal(sign); } if ((fpcr.FZ() && !std::is_same_v) || (fpcr.FZ16() && std::is_same_v)) { if (value.exponent >= -FPInfo::exponent_min) { fpsr.UFC(true); return FPInfo::Zero(sign); } } const u64 scaled = Safe::LogicalShiftRight(value.mantissa, normalized_point_position - 8); u64 estimate = static_cast(RecipEstimate(scaled)) << (FPInfo::explicit_mantissa_width - 8); int result_exponent = -value.exponent; if (result_exponent < FPInfo::exponent_min) { switch (result_exponent) { case (FPInfo::exponent_min - 1): estimate |= FPInfo::implicit_leading_bit; estimate >>= 1; break; case (FPInfo::exponent_min - 2): estimate |= FPInfo::implicit_leading_bit; estimate >>= 2; result_exponent = 0; break; default: UNREACHABLE(); } } const FPT bits_exponent = static_cast(result_exponent + FPInfo::exponent_bias); const FPT bits_mantissa = static_cast(estimate); return (bits_exponent << FPInfo::explicit_mantissa_width) | (bits_mantissa & FPInfo::mantissa_mask); } template u32 FPRecipEstimate(u32 op, FPCR fpcr, FPSR& fpsr); template u64 FPRecipEstimate(u64 op, FPCR fpcr, FPSR& fpsr); } // namespace Dynarmic::FP