Jacobi: add safe scaling to real-scalar makeGivens

libeigen/eigen!2513

Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h
index a473412..35890c1 100644
--- a/Eigen/src/Jacobi/Jacobi.h
+++ b/Eigen/src/Jacobi/Jacobi.h
@@ -213,24 +213,68 @@
     m_c = p < Scalar(0) ? Scalar(-1) : Scalar(1);
     m_s = Scalar(0);
     if (r) *r = abs(p);
-  } else if (numext::is_exactly_zero(p)) {
+    return;
+  }
+  if (numext::is_exactly_zero(p)) {
     m_c = Scalar(0);
     m_s = q < Scalar(0) ? Scalar(1) : Scalar(-1);
     if (r) *r = abs(q);
-  } else if (abs(p) > abs(q)) {
-    Scalar t = q / p;
-    Scalar u = sqrt(Scalar(1) + numext::abs2(t));
-    if (p < Scalar(0)) u = -u;
-    m_c = Scalar(1) / u;
-    m_s = -t * m_c;
-    if (r) *r = p * u;
+    return;
+  }
+
+  // Safe-range thresholds following Anderson, "Algorithm 978: Safe Scaling
+  // in the Level 1 BLAS", ACM TOMS 44(1), 2017.  When both |p| and |q| lie
+  // in (rtmin, rtmax), the direct formula r = p * sqrt(1 + (q/p)^2) cannot
+  // over- or underflow before the true result would.  Outside that range
+  // we prescale by max(|p|, |q|) (clamped into [safmin, safmax]) so that
+  // the squared sum stays in the representable range.  This preserves the
+  // existing Eigen sign convention (r >= 0, sign carried in c).
+  const Scalar safmin = (std::numeric_limits<Scalar>::min)();
+  const Scalar safmax = Scalar(1) / safmin;
+  const Scalar rtmin = sqrt(safmin);
+  const Scalar rtmax = sqrt(safmax / Scalar(2));
+  const Scalar abs_p = abs(p);
+  const Scalar abs_q = abs(q);
+  const Scalar mx = numext::maxi(abs_p, abs_q);
+  const Scalar mn = numext::mini(abs_p, abs_q);
+
+  if (EIGEN_PREDICT_TRUE(mx < rtmax && mn > rtmin)) {
+    // Safe range: existing direct formulas are stable.
+    if (abs_p > abs_q) {
+      Scalar t = q / p;
+      Scalar u = sqrt(Scalar(1) + numext::abs2(t));
+      if (p < Scalar(0)) u = -u;
+      m_c = Scalar(1) / u;
+      m_s = -t * m_c;
+      if (r) *r = p * u;
+    } else {
+      Scalar t = p / q;
+      Scalar u = sqrt(Scalar(1) + numext::abs2(t));
+      if (q < Scalar(0)) u = -u;
+      m_s = -Scalar(1) / u;
+      m_c = -t * m_s;
+      if (r) *r = q * u;
+    }
   } else {
-    Scalar t = p / q;
-    Scalar u = sqrt(Scalar(1) + numext::abs2(t));
-    if (q < Scalar(0)) u = -u;
-    m_s = -Scalar(1) / u;
-    m_c = -t * m_s;
-    if (r) *r = q * u;
+    // Out of safe range: prescale by max(|p|, |q|) clamped into [safmin, safmax].
+    const Scalar scale = numext::mini(safmax, numext::maxi(safmin, numext::maxi(abs_p, abs_q)));
+    const Scalar ps = p / scale;
+    const Scalar qs = q / scale;
+    if (abs_p > abs_q) {
+      Scalar t = qs / ps;
+      Scalar u = sqrt(Scalar(1) + numext::abs2(t));
+      if (ps < Scalar(0)) u = -u;
+      m_c = Scalar(1) / u;
+      m_s = -t * m_c;
+      if (r) *r = (ps * u) * scale;
+    } else {
+      Scalar t = ps / qs;
+      Scalar u = sqrt(Scalar(1) + numext::abs2(t));
+      if (qs < Scalar(0)) u = -u;
+      m_s = -Scalar(1) / u;
+      m_c = -t * m_s;
+      if (r) *r = (qs * u) * scale;
+    }
   }
 }
 
diff --git a/test/jacobi.cpp b/test/jacobi.cpp
index 36e17cc..eab2d45 100644
--- a/test/jacobi.cpp
+++ b/test/jacobi.cpp
@@ -54,8 +54,77 @@
   }
 }
 
+// Verify that JacobiRotation::makeGivens(p, q, &r) produces a rotation that
+// zeros out q, even when (p, q) straddle the over-/underflow thresholds
+// where the direct formula r = p * sqrt(1 + (q/p)^2) would over- or
+// underflow.  Eigen's convention is r >= 0 with sign carried in c.
+template <typename Scalar>
+void verify_makeGivens(const Scalar& p, const Scalar& q) {
+  using std::abs;
+  Scalar r;
+  JacobiRotation<Scalar> rot;
+  rot.makeGivens(p, q, &r);
+
+  // Eigen's J^T * [p; q] = [r; 0] with J = [c s; -s c], so:
+  //   c*p - s*q = r,  s*p + c*q = 0.
+  Scalar rotated0 = rot.c() * p - rot.s() * q;
+  Scalar rotated1 = rot.s() * p + rot.c() * q;
+
+  Scalar tol = NumTraits<Scalar>::epsilon() * (abs(r) + (std::numeric_limits<Scalar>::min)()) * Scalar(8);
+  VERIFY(abs(rotated0 - r) <= tol);
+  VERIFY(abs(rotated1) <= tol);
+  VERIFY(r >= Scalar(0));
+  VERIFY_IS_APPROX(numext::abs2(rot.c()) + numext::abs2(rot.s()), Scalar(1));
+}
+
+template <typename Scalar>
+void jacobi_makegivens_safe_scaling() {
+  using std::sqrt;
+  const Scalar safmin = (std::numeric_limits<Scalar>::min)();
+  const Scalar safmax = Scalar(1) / safmin;
+  const Scalar rtmin = sqrt(safmin);
+  const Scalar rtmax = sqrt(safmax / Scalar(2));
+  const Scalar one(1);
+  const Scalar two(2);
+  const Scalar half(0.5);
+
+  // Safe-range cases (regression — must keep existing fast path working).
+  verify_makeGivens<Scalar>(Scalar(3), Scalar(4));
+  verify_makeGivens<Scalar>(Scalar(-3), Scalar(4));
+  verify_makeGivens<Scalar>(Scalar(3), Scalar(-4));
+  verify_makeGivens<Scalar>(Scalar(-3), Scalar(-4));
+
+  // Both inputs near overflow: direct formula r = p * sqrt(1+(q/p)^2) would
+  // overflow because sqrt(1+1) > 1.  Prescaling avoids this.
+  verify_makeGivens<Scalar>(rtmax * two, rtmax);
+  verify_makeGivens<Scalar>(-rtmax * two, rtmax);
+  verify_makeGivens<Scalar>(rtmax, rtmax);
+  verify_makeGivens<Scalar>(rtmax * Scalar(1.5), rtmax * Scalar(1.5));
+
+  // Both inputs near underflow / subnormal: direct (q/p)^2 underflows to 0.
+  verify_makeGivens<Scalar>(rtmin * half, rtmin * half);
+  verify_makeGivens<Scalar>(safmin, safmin);
+  verify_makeGivens<Scalar>(-safmin, safmin);
+
+  // Mixed: one near overflow, one normal.
+  verify_makeGivens<Scalar>(rtmax * Scalar(1.5), one);
+  verify_makeGivens<Scalar>(one, rtmax * Scalar(1.5));
+  verify_makeGivens<Scalar>(-rtmax * Scalar(1.5), one);
+
+  // Mixed: one near underflow, one normal.
+  verify_makeGivens<Scalar>(safmin, one);
+  verify_makeGivens<Scalar>(one, safmin);
+
+  // Mixed: subnormal and near-overflow simultaneously.
+  verify_makeGivens<Scalar>(safmin, rtmax);
+  verify_makeGivens<Scalar>(rtmax, safmin);
+}
+
 EIGEN_DECLARE_TEST(jacobi) {
   for (int i = 0; i < g_repeat; i++) {
+    CALL_SUBTEST_7((jacobi_makegivens_safe_scaling<float>()));
+    CALL_SUBTEST_7((jacobi_makegivens_safe_scaling<double>()));
+
     CALL_SUBTEST_1((jacobi<Matrix3f, float>()));
     CALL_SUBTEST_2((jacobi<Matrix4d, double>()));
     CALL_SUBTEST_3((jacobi<Matrix4cf, float>()));