Skip to content
13 changes: 13 additions & 0 deletions doc/distributions/students_t.qbk
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@
RealType beta,
RealType sd,
RealType hint = 100);

// degrees of freedom inversion from a quantile and probability:
BOOST_MATH_GPU_ENABLED static RealType invert_probability_with_respect_to_degrees_of_freedom(
RealType x,
RealType p);
};

}} // namespaces
Expand Down Expand Up @@ -106,6 +111,13 @@ For more information on this function see the
[@http://www.itl.nist.gov/div898/handbook/prc/section2/prc222.htm
NIST Engineering Statistics Handbook].

BOOST_MATH_GPU_ENABLED static RealType invert_probability_with_respect_to_degrees_of_freedom(
RealType x,
RealType p);

Returns the degrees of freedom [nu] such that CDF(/x/; [nu]) = /p/.
Requires 0 < /p/ < 1 and /x/ [ne] 0, otherwise calls __domain_error.

[h4 Non-member Accessors]

All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all
Expand Down Expand Up @@ -164,6 +176,7 @@ without the subtraction implied above.]]
[[skewness][if (v > 3) 0 else NaN ]]
[[kurtosis][if (v > 4) 3 * (v - 2) \/ (v - 4) else NaN]]
[[kurtosis excess][if (v > 4) 6 \/ (df - 4) else NaN]]
[[invert_probability_with_respect_to_degrees_of_freedom][Uses a 2nd-order Edgeworth expansion as initial guess for a numerical root finder]]
]

If the moment index /k/ is less than /v/, then the moment is undefined.
Expand Down
143 changes: 135 additions & 8 deletions include/boost/math/distributions/students_t.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ class students_t_distribution
RealType sd,
RealType hint = 100);

BOOST_MATH_GPU_ENABLED static RealType invert_probability_with_respect_to_degrees_of_freedom(
RealType x,
RealType p);

private:
// Data member:
RealType df_; // degrees of freedom is a real number > 0 or +infinity.
Expand Down Expand Up @@ -308,6 +312,98 @@ struct sample_size_func
RealType alpha, beta, ratio;
};

template <class RealType, class Policy>
struct cdf_to_df_func
{
BOOST_MATH_GPU_ENABLED cdf_to_df_func(RealType x_val, RealType p_val, bool c)
: x(x_val), p(p_val), comp(c) {}

BOOST_MATH_GPU_ENABLED RealType operator()(const RealType& df)
{
students_t_distribution<RealType, Policy> t(df);
return comp ?
RealType(p - cdf(complement(t, x)))
: RealType(cdf(t, x) - p);
}
RealType x, p;
bool comp;
};

//
// Shared root-finding helper used by both find_degrees_of_freedom and
// invert_probability_with_respect_to_degrees_of_freedom.
//
template <class RealType, class Policy, class Func>
BOOST_MATH_GPU_ENABLED RealType solve_for_degrees_of_freedom(
Func f,
RealType hint,
bool rising,
const char* function,
Policy const&)
{
tools::eps_tolerance<RealType> tol(policies::digits<RealType, Policy>());
boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
boost::math::pair<RealType, RealType> r = tools::bracket_and_solve_root(
f, hint, RealType(2), rising, tol, max_iter, Policy());
RealType result = r.first + (r.second - r.first) / 2;
if (max_iter >= policies::get_max_root_iterations<Policy>())
{
return policies::raise_evaluation_error<RealType>(function, // LCOV_EXCL_LINE
"Unable to locate solution in a reasonable time: either there is no answer to " // LCOV_EXCL_LINE
"the degrees of freedom or the answer is infinite. Current best guess is %1%", // LCOV_EXCL_LINE
result, Policy()); // LCOV_EXCL_LINE
}
return result;
}

//
// Edgeworth warm-start for invert_probability_with_respect_to_degrees_of_freedom.
// Returns the best df estimate from the quadratic Edgeworth approximation,
// or a small fallback value (0.01) when no positive root is found.
//
template <class RealType, class Policy>
BOOST_MATH_GPU_ENABLED RealType approximate_df_with_edgeworth_expansion(RealType x_abs, RealType p_adj)
{
BOOST_MATH_STD_USING
// F(x; nu) ~ cdf_normal(x) - pdf_normal(x)*(x + x^3)/(4*nu) + pdf_normal(x)*(3x + 5x^3 + 7x^5 - 3x^7)/(96*nu^2)
// Substituting u = 1/nu and setting F(x; nu) = p gives b*u^2 - a*u + c = 0, where:
// a = pdf_normal(x) * (x + x^3) / 4
// b = pdf_normal(x) * (3x + 5x^3 + 7x^5 - 3x^7) / 96
// c = cdf_normal(x) - p
normal_distribution<RealType, Policy> std_normal(0, 1);
RealType pdf_val = pdf(std_normal, x_abs);
RealType cdf_val = cdf(std_normal, x_abs);

RealType x2 = x_abs * x_abs;
RealType x3 = x2 * x_abs;
RealType x5 = x3 * x2;
RealType x7 = x5 * x2;

RealType a = pdf_val * (x_abs + x3) / 4;
RealType b = pdf_val * (3 * x_abs + 5 * x3 + 7 * x5 - 3 * x7) / 96;
RealType c = cdf_val - p_adj;

RealType discriminant = a * a - 4 * b * c;
if (discriminant >= 0 && b != 0)
{
RealType sqrt_disc = sqrt(discriminant);
// Pick the smallest positive u (= largest df = 1/u).
RealType u1 = (a - sqrt_disc) / (2 * b);
RealType u2 = (a + sqrt_disc) / (2 * b);
RealType u = -1;
if (u1 > 0 && u2 > 0)
u = (u1 < u2) ? u1 : u2;
else if (u1 > 0)
u = u1;
else if (u2 > 0)
u = u2;

if (u > 0)
return 1 / u;
}
return static_cast<RealType>(0.01); // fallback
}

} // namespace detail

template <class RealType, class Policy>
Expand All @@ -332,16 +428,47 @@ BOOST_MATH_GPU_ENABLED RealType students_t_distribution<RealType, Policy>::find_
hint = 1;

detail::sample_size_func<RealType, Policy> f(alpha, beta, sd, difference_from_mean);
tools::eps_tolerance<RealType> tol(policies::digits<RealType, Policy>());
boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
boost::math::pair<RealType, RealType> r = tools::bracket_and_solve_root(f, hint, RealType(2), false, tol, max_iter, Policy());
RealType result = r.first + (r.second - r.first) / 2;
if(max_iter >= policies::get_max_root_iterations<Policy>())
return detail::solve_for_degrees_of_freedom(f, hint, false, function, Policy());
}

template <class RealType, class Policy>
BOOST_MATH_GPU_ENABLED RealType students_t_distribution<RealType, Policy>::invert_probability_with_respect_to_degrees_of_freedom(
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already have a function for this here:

BOOST_MATH_GPU_ENABLED RealType students_t_distribution<RealType, Policy>::find_degrees_of_freedom(

However, you have a better initial guess to the root finder which would be a very welcome addition!

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found this function but was not sure how to map the inversion problem we have here (finding degrees of freedom given a probability and a quantile) to this one arising from sample size calculation.

The signatures are:
find_degrees_of_freedom(RealType difference_from_mean, RealType alpha, RealType beta, RealType sd, RealType hint)
invert_probability_with_respect_to_degrees_of_freedom(RealType x, RealType p)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah OK, slightly different problems, I would still overload the existing function though for consistency and call it:

find_degrees_of_freedom(RealType t_stat, RealType p)

RealType x,
RealType p)
{
BOOST_MATH_STD_USING // for ADL of std functions
constexpr auto function = "boost::math::students_t_distribution<%1%>::invert_probability_with_respect_to_degrees_of_freedom";
//
// Check for domain errors:
//
RealType error_result;
if (false == detail::check_probability(function, p, &error_result, Policy()))
return error_result;

// Use symmetry: CDF(-x; df) = 1 - CDF(x; df), so always work with x >= 0.
RealType x_abs = (x < 0) ? -x : x;
RealType p_adj = (x < 0) ? (1 - p) : p;

if (x_abs == 0)
{
return policies::raise_evaluation_error<RealType>(function, "Unable to locate solution in a reasonable time: either there is no answer to how many degrees of freedom are required" // LCOV_EXCL_LINE
" or the answer is infinite. Current best guess is %1%", result, Policy()); // LCOV_EXCL_LINE
// CDF(0; df) = 0.5 for all df; only solvable when p == 0.5.
if (p_adj == static_cast<RealType>(0.5))
return policies::raise_overflow_error<RealType>(function, 0, Policy());
return policies::raise_domain_error<RealType>(
function,
"No degrees of freedom can satisfy CDF(0; df) == %1% (must be 0.5).",
p, Policy());
}
return result;

// Edgeworth warm start: falls back to 0.01 if no positive root was found.
RealType hint = detail::approximate_df_with_edgeworth_expansion<RealType, Policy>(x_abs, p_adj);

// Root-find on f(df) = CDF(x; df) - p. For x > 0, CDF(x; df) is strictly
// increasing in df. Pass min(p_adj, q_adj) and use the complement CDF when
// p_adj > q_adj to avoid cancellation near 1.
RealType q_adj = 1 - p_adj;
detail::cdf_to_df_func<RealType, Policy> f(x_abs, p_adj < q_adj ? p_adj : q_adj, p_adj >= q_adj);
return detail::solve_for_degrees_of_freedom(f, hint, true, function, Policy());
}

template <class RealType, class Policy>
Expand Down
94 changes: 94 additions & 0 deletions test/test_students_t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,100 @@ void test_spots(RealType)
static_cast<RealType>(1.0))),
9);

// Tests for invert_probability_with_respect_to_degrees_of_freedom.
// Each case is derived from the CDF spot tests above: the exact df is known,
// so we verify that inverting CDF(x; df) = p recovers df to tight tolerance.
// float has ~7 significant digits; large-df inversion is ill-conditioned at that precision,
// so use a looser tolerance for single precision.
RealType tol_inv_df = std::is_same<RealType, float>::value
? static_cast<RealType>(0.1) // float: 0.1%
: static_cast<RealType>(0.01); // double+: 0.01%
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(-6.96455673428326),
static_cast<RealType>(0.01)),
static_cast<RealType>(2),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(-3.36492999890721),
static_cast<RealType>(0.01)),
static_cast<RealType>(5),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(-0.559429644),
static_cast<RealType>(0.3)),
static_cast<RealType>(5),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(1.475884049),
static_cast<RealType>(0.9)),
static_cast<RealType>(5),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(-1.475884049),
static_cast<RealType>(0.1)),
static_cast<RealType>(5),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(-5.2410429995425),
static_cast<RealType>(0.00001)),
static_cast<RealType>(25),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(6.96455673428326),
static_cast<RealType>(0.99)),
static_cast<RealType>(2),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(0.610822886098362)),
static_cast<RealType>(0.1),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(0.777242554908434)),
static_cast<RealType>(0.5),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(0.822925875908677)),
static_cast<RealType>(0.75),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(0.977114826753374)),
static_cast<RealType>(1000),
tol_inv_df);
BOOST_CHECK_CLOSE(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(1),
static_cast<RealType>(0.8413326478347855)),
static_cast<RealType>(1e4),
tol_inv_df * static_cast<RealType>(5)); // Looser tolerance for ill-conditioned problem with very large df
// Domain error: p outside (0,1)
#ifndef BOOST_NO_EXCEPTIONS
BOOST_MATH_CHECK_THROW(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(-0.1)),
std::domain_error);
BOOST_MATH_CHECK_THROW(
students_t_distribution<RealType>::invert_probability_with_respect_to_degrees_of_freedom(
static_cast<RealType>(2),
static_cast<RealType>(1.1)),
std::domain_error);
#endif

// Test for large degrees of freedom when should be same as normal.
RealType inf = std::numeric_limits<RealType>::infinity();
RealType nan = std::numeric_limits<RealType>::quiet_NaN();
Expand Down
Loading