Skip to content

Commit b61d325

Browse files
yasumorishimaclaude
andcommitted
Add geometric distribution (lpmf, cdf, lcdf, lccdf, rng)
Implements the geometric distribution as requested in #3098. The geometric distribution models the number of failures before the first success, with PMF: P(n|theta) = theta * (1-theta)^n where n in {0, 1, 2, ...} and theta in (0, 1]. This uses the number-of-failures parameterization, consistent with the geometric distribution being a special case of the negative binomial distribution with r=1 (neg_binomial(1, theta/(1-theta))). Verified: geometric_lpmf(n, theta) == neg_binomial_lpmf(n, 1, theta/(1-theta)) for all tested values. Includes: - geometric_lpmf: log probability mass function with autodiff - geometric_cdf: cumulative distribution function with autodiff - geometric_lcdf: log CDF with autodiff - geometric_lccdf: log complementary CDF with autodiff - geometric_rng: random number generation via inverse CDF method - Unit tests for all functions including distribution checks - CDF test fixture for gradient verification Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 8ee3d96 commit b61d325

8 files changed

Lines changed: 625 additions & 0 deletions

File tree

stan/math/prim/prob.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,11 @@
111111
#include <stan/math/prim/prob/gamma_lcdf.hpp>
112112
#include <stan/math/prim/prob/gamma_lpdf.hpp>
113113
#include <stan/math/prim/prob/gamma_rng.hpp>
114+
#include <stan/math/prim/prob/geometric_cdf.hpp>
115+
#include <stan/math/prim/prob/geometric_lccdf.hpp>
116+
#include <stan/math/prim/prob/geometric_lcdf.hpp>
117+
#include <stan/math/prim/prob/geometric_lpmf.hpp>
118+
#include <stan/math/prim/prob/geometric_rng.hpp>
114119
#include <stan/math/prim/prob/gaussian_dlm_obs_lpdf.hpp>
115120
#include <stan/math/prim/prob/gaussian_dlm_obs_rng.hpp>
116121
#include <stan/math/prim/prob/gumbel_ccdf_log.hpp>
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#ifndef STAN_MATH_PRIM_PROB_GEOMETRIC_CDF_HPP
2+
#define STAN_MATH_PRIM_PROB_GEOMETRIC_CDF_HPP
3+
4+
#include <stan/math/prim/meta.hpp>
5+
#include <stan/math/prim/err.hpp>
6+
#include <stan/math/prim/fun/constants.hpp>
7+
#include <stan/math/prim/fun/exp.hpp>
8+
#include <stan/math/prim/fun/log1m.hpp>
9+
#include <stan/math/prim/fun/max_size.hpp>
10+
#include <stan/math/prim/fun/scalar_seq_view.hpp>
11+
#include <stan/math/prim/fun/size.hpp>
12+
#include <stan/math/prim/fun/size_zero.hpp>
13+
#include <stan/math/prim/fun/value_of.hpp>
14+
#include <stan/math/prim/functor/partials_propagator.hpp>
15+
#include <limits>
16+
17+
namespace stan {
18+
namespace math {
19+
20+
/** \ingroup prob_dists
21+
* Returns the CDF of the geometric distribution with success probability
22+
* parameter. Given containers of matching sizes, returns the product of
23+
* probabilities.
24+
*
25+
* CDF: F(n | theta) = 1 - (1 - theta)^(n + 1)
26+
*
27+
* @tparam T_n type of outcome variable
28+
* @tparam T_prob type of success probability parameter
29+
*
30+
* @param n outcome variable (number of failures before first success)
31+
* @param theta success probability parameter
32+
* @return probability or product of probabilities
33+
* @throw std::domain_error if theta is not in [0, 1]
34+
* @throw std::invalid_argument if container sizes mismatch
35+
*/
36+
template <typename T_n, typename T_prob>
37+
inline return_type_t<T_prob> geometric_cdf(const T_n& n,
38+
const T_prob& theta) {
39+
using T_partials_return = partials_return_t<T_n, T_prob>;
40+
using T_n_ref = ref_type_t<T_n>;
41+
using T_prob_ref = ref_type_t<T_prob>;
42+
static constexpr const char* function = "geometric_cdf";
43+
44+
check_consistent_sizes(function, "Outcome variable", n,
45+
"Success probability parameter", theta);
46+
if (size_zero(n, theta)) {
47+
return 1.0;
48+
}
49+
50+
T_n_ref n_ref = n;
51+
T_prob_ref theta_ref = theta;
52+
check_bounded(function, "Success probability parameter",
53+
value_of(theta_ref), 0.0, 1.0);
54+
55+
scalar_seq_view<T_n_ref> n_vec(n_ref);
56+
scalar_seq_view<T_prob_ref> theta_vec(theta_ref);
57+
const size_t max_size_seq_view = max_size(n_ref, theta_ref);
58+
59+
for (int i = 0; i < stan::math::size(n); i++) {
60+
if (n_vec.val(i) < 0) {
61+
return 0.0;
62+
}
63+
}
64+
65+
T_partials_return cdf(1.0);
66+
auto ops_partials = make_partials_propagator(theta_ref);
67+
for (size_t i = 0; i < max_size_seq_view; i++) {
68+
const auto theta_val = theta_vec.val(i);
69+
const auto n_val = n_vec.val(i);
70+
const auto np1 = n_val + 1.0;
71+
const auto log1m_theta = log1m(theta_val);
72+
const auto ccdf_i = stan::math::exp(np1 * log1m_theta);
73+
const auto cdf_i = 1.0 - ccdf_i;
74+
75+
cdf *= cdf_i;
76+
77+
if constexpr (is_autodiff_v<T_prob>) {
78+
partials<0>(ops_partials)[i]
79+
+= np1 * ccdf_i / ((1.0 - theta_val) * cdf_i);
80+
}
81+
}
82+
83+
if constexpr (is_autodiff_v<T_prob>) {
84+
for (size_t i = 0; i < stan::math::size(theta); ++i) {
85+
partials<0>(ops_partials)[i] *= cdf;
86+
}
87+
}
88+
89+
return ops_partials.build(cdf);
90+
}
91+
92+
} // namespace math
93+
} // namespace stan
94+
#endif
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#ifndef STAN_MATH_PRIM_PROB_GEOMETRIC_LCCDF_HPP
2+
#define STAN_MATH_PRIM_PROB_GEOMETRIC_LCCDF_HPP
3+
4+
#include <stan/math/prim/meta.hpp>
5+
#include <stan/math/prim/err.hpp>
6+
#include <stan/math/prim/fun/constants.hpp>
7+
#include <stan/math/prim/fun/log1m.hpp>
8+
#include <stan/math/prim/fun/max_size.hpp>
9+
#include <stan/math/prim/fun/scalar_seq_view.hpp>
10+
#include <stan/math/prim/fun/size.hpp>
11+
#include <stan/math/prim/fun/size_zero.hpp>
12+
#include <stan/math/prim/fun/value_of.hpp>
13+
#include <stan/math/prim/functor/partials_propagator.hpp>
14+
15+
namespace stan {
16+
namespace math {
17+
18+
/** \ingroup prob_dists
19+
* Returns the log CCDF of the geometric distribution with success probability
20+
* parameter. Given containers of matching sizes, returns the log sum of
21+
* probabilities.
22+
*
23+
* log CCDF: (n + 1) * log(1 - theta)
24+
*
25+
* @tparam T_n type of outcome variable
26+
* @tparam T_prob type of success probability parameter
27+
*
28+
* @param n outcome variable (number of failures before first success)
29+
* @param theta success probability parameter
30+
* @return log complementary probability or log sum
31+
* @throw std::domain_error if theta is not in [0, 1]
32+
* @throw std::invalid_argument if container sizes mismatch
33+
*/
34+
template <typename T_n, typename T_prob>
35+
inline return_type_t<T_prob> geometric_lccdf(const T_n& n,
36+
const T_prob& theta) {
37+
using T_partials_return = partials_return_t<T_n, T_prob>;
38+
using T_n_ref = ref_type_t<T_n>;
39+
using T_prob_ref = ref_type_t<T_prob>;
40+
static constexpr const char* function = "geometric_lccdf";
41+
42+
check_consistent_sizes(function, "Outcome variable", n,
43+
"Success probability parameter", theta);
44+
if (size_zero(n, theta)) {
45+
return 0.0;
46+
}
47+
48+
T_n_ref n_ref = n;
49+
T_prob_ref theta_ref = theta;
50+
check_bounded(function, "Success probability parameter",
51+
value_of(theta_ref), 0.0, 1.0);
52+
53+
scalar_seq_view<T_n_ref> n_vec(n_ref);
54+
scalar_seq_view<T_prob_ref> theta_vec(theta_ref);
55+
const size_t max_size_seq_view = max_size(n_ref, theta_ref);
56+
57+
for (int i = 0; i < stan::math::size(n); i++) {
58+
if (n_vec.val(i) < 0) {
59+
return 0.0;
60+
}
61+
}
62+
63+
T_partials_return log_ccdf(0.0);
64+
auto ops_partials = make_partials_propagator(theta_ref);
65+
for (size_t i = 0; i < max_size_seq_view; i++) {
66+
const auto theta_val = theta_vec.val(i);
67+
const auto n_val = n_vec.val(i);
68+
const auto np1 = n_val + 1.0;
69+
const auto log1m_theta = log1m(theta_val);
70+
71+
log_ccdf += np1 * log1m_theta;
72+
73+
if constexpr (is_autodiff_v<T_prob>) {
74+
partials<0>(ops_partials)[i] += -np1 / (1.0 - theta_val);
75+
}
76+
}
77+
78+
return ops_partials.build(log_ccdf);
79+
}
80+
81+
} // namespace math
82+
} // namespace stan
83+
#endif
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#ifndef STAN_MATH_PRIM_PROB_GEOMETRIC_LCDF_HPP
2+
#define STAN_MATH_PRIM_PROB_GEOMETRIC_LCDF_HPP
3+
4+
#include <stan/math/prim/meta.hpp>
5+
#include <stan/math/prim/err.hpp>
6+
#include <stan/math/prim/fun/constants.hpp>
7+
#include <stan/math/prim/fun/exp.hpp>
8+
#include <stan/math/prim/fun/log1m.hpp>
9+
#include <stan/math/prim/fun/log1m_exp.hpp>
10+
#include <stan/math/prim/fun/max_size.hpp>
11+
#include <stan/math/prim/fun/scalar_seq_view.hpp>
12+
#include <stan/math/prim/fun/size.hpp>
13+
#include <stan/math/prim/fun/size_zero.hpp>
14+
#include <stan/math/prim/fun/value_of.hpp>
15+
#include <stan/math/prim/functor/partials_propagator.hpp>
16+
17+
namespace stan {
18+
namespace math {
19+
20+
/** \ingroup prob_dists
21+
* Returns the log CDF of the geometric distribution with success probability
22+
* parameter. Given containers of matching sizes, returns the log sum of
23+
* probabilities.
24+
*
25+
* log CDF: log(1 - (1 - theta)^(n + 1))
26+
*
27+
* @tparam T_n type of outcome variable
28+
* @tparam T_prob type of success probability parameter
29+
*
30+
* @param n outcome variable (number of failures before first success)
31+
* @param theta success probability parameter
32+
* @return log probability or log sum of probabilities
33+
* @throw std::domain_error if theta is not in [0, 1]
34+
* @throw std::invalid_argument if container sizes mismatch
35+
*/
36+
template <typename T_n, typename T_prob>
37+
inline return_type_t<T_prob> geometric_lcdf(const T_n& n,
38+
const T_prob& theta) {
39+
using T_partials_return = partials_return_t<T_n, T_prob>;
40+
using T_n_ref = ref_type_t<T_n>;
41+
using T_prob_ref = ref_type_t<T_prob>;
42+
static constexpr const char* function = "geometric_lcdf";
43+
44+
check_consistent_sizes(function, "Outcome variable", n,
45+
"Success probability parameter", theta);
46+
if (size_zero(n, theta)) {
47+
return 0.0;
48+
}
49+
50+
T_n_ref n_ref = n;
51+
T_prob_ref theta_ref = theta;
52+
check_bounded(function, "Success probability parameter",
53+
value_of(theta_ref), 0.0, 1.0);
54+
55+
scalar_seq_view<T_n_ref> n_vec(n_ref);
56+
scalar_seq_view<T_prob_ref> theta_vec(theta_ref);
57+
const size_t max_size_seq_view = max_size(n_ref, theta_ref);
58+
59+
for (int i = 0; i < stan::math::size(n); i++) {
60+
if (n_vec.val(i) < 0) {
61+
return negative_infinity();
62+
}
63+
}
64+
65+
T_partials_return log_cdf(0.0);
66+
auto ops_partials = make_partials_propagator(theta_ref);
67+
for (size_t i = 0; i < max_size_seq_view; i++) {
68+
const auto theta_val = theta_vec.val(i);
69+
const auto n_val = n_vec.val(i);
70+
const auto np1 = n_val + 1.0;
71+
const auto log1m_theta = log1m(theta_val);
72+
const auto log_ccdf = np1 * log1m_theta;
73+
74+
log_cdf += log1m_exp(log_ccdf);
75+
76+
if constexpr (is_autodiff_v<T_prob>) {
77+
const auto ccdf = stan::math::exp(log_ccdf);
78+
partials<0>(ops_partials)[i]
79+
+= np1 * ccdf / ((1.0 - theta_val) * (1.0 - ccdf));
80+
}
81+
}
82+
83+
return ops_partials.build(log_cdf);
84+
}
85+
86+
} // namespace math
87+
} // namespace stan
88+
#endif
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#ifndef STAN_MATH_PRIM_PROB_GEOMETRIC_LPMF_HPP
2+
#define STAN_MATH_PRIM_PROB_GEOMETRIC_LPMF_HPP
3+
4+
#include <stan/math/prim/meta.hpp>
5+
#include <stan/math/prim/err.hpp>
6+
#include <stan/math/prim/fun/constants.hpp>
7+
#include <stan/math/prim/fun/log.hpp>
8+
#include <stan/math/prim/fun/log1m.hpp>
9+
#include <stan/math/prim/fun/max_size.hpp>
10+
#include <stan/math/prim/fun/scalar_seq_view.hpp>
11+
#include <stan/math/prim/fun/size.hpp>
12+
#include <stan/math/prim/fun/size_zero.hpp>
13+
#include <stan/math/prim/fun/value_of.hpp>
14+
#include <stan/math/prim/functor/partials_propagator.hpp>
15+
16+
namespace stan {
17+
namespace math {
18+
19+
/** \ingroup prob_dists
20+
* Returns the log PMF of the geometric distribution. If containers
21+
* of matching sizes are supplied, returns the log sum of probabilities.
22+
*
23+
* The geometric distribution with success probability \f$\theta\f$ has PMF
24+
* \f[
25+
* P(n \mid \theta) = \theta (1 - \theta)^{n}
26+
* \f]
27+
* where \f$n \in \{0, 1, 2, \ldots\}\f$ and
28+
* \f$\theta \in (0, 1]\f$.
29+
*
30+
* This is the number-of-failures parameterization, consistent with
31+
* the geometric distribution being a special case of the negative
32+
* binomial distribution with r = 1.
33+
*
34+
* @tparam T_n type of outcome variable
35+
* @tparam T_prob type of success probability parameter
36+
*
37+
* @param n outcome variable (number of failures before first success)
38+
* @param theta success probability parameter
39+
* @return log probability or log sum of probabilities
40+
* @throw std::domain_error if theta is not in [0, 1]
41+
* @throw std::domain_error if n is negative
42+
* @throw std::invalid_argument if container sizes mismatch
43+
*/
44+
template <bool propto, typename T_n, typename T_prob,
45+
require_all_not_nonscalar_prim_or_rev_kernel_expression_t<
46+
T_n, T_prob>* = nullptr>
47+
inline return_type_t<T_prob> geometric_lpmf(const T_n& n,
48+
const T_prob& theta) {
49+
using std::log;
50+
using T_partials_return = partials_return_t<T_n, T_prob>;
51+
using T_n_ref = ref_type_t<T_n>;
52+
using T_prob_ref = ref_type_t<T_prob>;
53+
static constexpr const char* function = "geometric_lpmf";
54+
check_consistent_sizes(function, "Outcome variable", n,
55+
"Success probability parameter", theta);
56+
if (size_zero(n, theta)) {
57+
return 0.0;
58+
}
59+
60+
T_n_ref n_ref = n;
61+
T_prob_ref theta_ref = theta;
62+
check_nonnegative(function, "Outcome variable", n_ref);
63+
check_bounded(function, "Success probability parameter",
64+
value_of(theta_ref), 0.0, 1.0);
65+
66+
if constexpr (!include_summand<propto, T_prob>::value) {
67+
return 0.0;
68+
}
69+
70+
auto ops_partials = make_partials_propagator(theta_ref);
71+
72+
scalar_seq_view<T_n_ref> n_vec(n_ref);
73+
scalar_seq_view<T_prob_ref> theta_vec(theta_ref);
74+
const size_t max_size_seq_view = max_size(n_ref, theta_ref);
75+
T_partials_return logp(0.0);
76+
77+
for (size_t i = 0; i < max_size_seq_view; i++) {
78+
const auto theta_val = theta_vec.val(i);
79+
const auto n_val = n_vec.val(i);
80+
81+
// When theta == 1.0, P(n=0) = 1, P(n>0) = 0
82+
if (theta_val == 1.0) {
83+
if (n_val > 0) {
84+
return negative_infinity();
85+
}
86+
// logp += 0 for n=0, theta=1
87+
continue;
88+
}
89+
90+
logp += log(theta_val) + n_val * log1m(theta_val);
91+
92+
if constexpr (is_autodiff_v<T_prob>) {
93+
partials<0>(ops_partials)[i]
94+
+= 1.0 / theta_val - n_val / (1.0 - theta_val);
95+
}
96+
}
97+
return ops_partials.build(logp);
98+
}
99+
100+
template <typename T_n, typename T_prob>
101+
inline return_type_t<T_prob> geometric_lpmf(const T_n& n,
102+
const T_prob& theta) {
103+
return geometric_lpmf<false>(n, theta);
104+
}
105+
106+
} // namespace math
107+
} // namespace stan
108+
#endif

0 commit comments

Comments
 (0)