Merge pull request #3169 from stan-dev/sum-to-zero-matrix

WardBrian · web-flow · commit bc4b4bc37ff8 · 2025-03-27T15:16:47.000-04:00
Add sum_to_zero_constrain and _free for matrix types
diff --git a/stan/math/prim/constraint/sum_to_zero_constrain.hpp b/stan/math/prim/constraint/sum_to_zero_constrain.hpp
@@ -50,7 +50,7 @@ inline plain_type_t<Vec> sum_to_zero_constrain(const Vec& y) {
   value_type_t<Vec> sum_w(0);
   for (int i = N; i > 0; --i) {
     double n = static_cast<double>(i);
-    auto w = y_ref(i - 1) * inv_sqrt(n * (n + 1));
+    auto w = y_ref.coeff(i - 1) * inv_sqrt(n * (n + 1));
     sum_w += w;
 
     z.coeffRef(i - 1) += sum_w;
@@ -61,47 +61,81 @@ inline plain_type_t<Vec> sum_to_zero_constrain(const Vec& y) {
 }
 
 /**
- * Return a vector with sum zero corresponding to the specified
- * free vector.
+ * Return a matrix that sums to zero over both the rows
+ * and columns corresponding to the free matrix x.
  *
- * The sum-to-zero transform is defined using a modified version of the
- * the inverse of the isometric log ratio transform (ILR).
- * See:
- * Egozcue, Juan Jose; Pawlowsky-Glahn, Vera; Mateu-Figueras, Gloria;
- * Barcelo-Vidal, Carles (2003), "Isometric logratio transformations for
- * compositional data analysis", Mathematical Geology, 35 (3): 279–300,
- * doi:10.1023/A:1023818214614, S2CID 122844634
+ * This is a linear transform, with no Jacobian.
  *
- * This implementation is closer to the description of the same using "pivot
- * coordinates" in
- * Filzmoser, P., Hron, K., Templ, M. (2018). Geometrical Properties of
- * Compositional Data. In: Applied Compositional Data Analysis. Springer Series
- * in Statistics. Springer, Cham. https://doi.org/10.1007/978-3-319-96422-5_3
+ * @tparam Mat type of the matrix
+ * @param x Free matrix input of dimensionality (N - 1, M - 1).
+ * @return Zero-sum matrix of dimensionality (N, M).
+ */
+template <typename Mat, require_eigen_matrix_dynamic_t<Mat>* = nullptr,
+          require_not_st_var<Mat>* = nullptr>
+inline plain_type_t<Mat> sum_to_zero_constrain(const Mat& x) {
+  const auto N = x.rows();
+  const auto M = x.cols();
+
+  plain_type_t<Mat> Z = Eigen::MatrixXd::Zero(N + 1, M + 1);
+  if (unlikely(N == 0 || M == 0)) {
+    return Z;
+  }
+  auto&& x_ref = to_ref(x);
+
+  Eigen::Matrix<value_type_t<Mat>, -1, 1> beta = Eigen::VectorXd::Zero(N);
+
+  for (int j = M - 1; j >= 0; --j) {
+    value_type_t<Mat> ax_previous(0);
+
+    double a_j = inv_sqrt((j + 1.0) * (j + 2.0));
+    double b_j = (j + 1.0) * a_j;
+
+    for (int i = N - 1; i >= 0; --i) {
+      double a_i = inv_sqrt((i + 1.0) * (i + 2.0));
+      double b_i = (i + 1.0) * a_i;
+
+      auto b_i_x = b_i * x_ref.coeff(i, j) - ax_previous;
+
+      Z.coeffRef(i, j) = (b_j * b_i_x) - beta.coeff(i);
+      beta.coeffRef(i) += a_j * b_i_x;
+
+      Z.coeffRef(N, j) -= Z.coeff(i, j);
+      Z.coeffRef(i, M) -= Z.coeff(i, j);
+
+      ax_previous += a_i * x_ref.coeff(i, j);
+    }
+    Z.coeffRef(N, M) -= Z.coeff(N, j);
+  }
+
+  return Z;
+}
+
+/**
+ * Return a vector or matrix with sum zero corresponding to the specified
+ * free input.
  *
  * This is a linear transform, with no Jacobian.
  *
- * @tparam Vec type of the vector
+ * @tparam T type of the input, either a vector or a matrix
  * @tparam Lp unused
- * @param y Free vector input of dimensionality K - 1.
+ * @param y Free vector or matrix
  * @param lp unused
- * @return Zero-sum vector of dimensionality K.
+ * @return Zero-sum vector or matrix which is one larger in each dimension
  */
-template <typename Vec, typename Lp, require_eigen_col_vector_t<Vec>* = nullptr,
-          require_not_st_var<Vec>* = nullptr>
-inline plain_type_t<Vec> sum_to_zero_constrain(const Vec& y, Lp& lp) {
+template <typename T, typename Lp, require_not_st_var<T>* = nullptr>
+inline plain_type_t<T> sum_to_zero_constrain(const T& y, Lp& lp) {
   return sum_to_zero_constrain(y);
 }
 
 /**
- * Return a vector with sum zero corresponding to the specified
- * free vector.
+ * Return a vector or matrix with sum zero corresponding to the specified
+ * free input.
  * This overload handles looping over the elements of a standard vector.
  *
- * @tparam Vec A standard vector with inner type inheriting from
- * `Eigen::DenseBase` or a `var_value` with inner type inheriting from
- * `Eigen::DenseBase` with compile time dynamic rows and 1 column
- * @param[in] y free vector
- * @return Zero-sum vectors of dimensionality one greater than `y`
+ * @tparam T A standard vector with inner type that is either a vector or a
+ * matrix
+ * @param[in] y free vector or matrix
+ * @return Zero-sum vectors or matrices which are one larger in each dimension
  */
 template <typename T, require_std_vector_t<T>* = nullptr>
 inline auto sum_to_zero_constrain(const T& y) {
@@ -114,13 +148,12 @@ inline auto sum_to_zero_constrain(const T& y) {
  * free vector.
  * This overload handles looping over the elements of a standard vector.
  *
- * @tparam Vec A standard vector with inner type inheriting from
- * `Eigen::DenseBase` or a `var_value` with inner type inheriting from
- * `Eigen::DenseBase` with compile time dynamic rows and 1 column
+ * @tparam T A standard vector with inner type that is either a vector or a
+ * matrix
  * @tparam Lp unused
- * @param[in] y free vector
+ * @param[in] y free vector or matrix
  * @param[in, out] lp unused
- * @return Zero-sum vectors of dimensionality one greater than `y`
+ * @return Zero-sum vectors or matrices which are one larger in each dimension
  */
 template <typename T, typename Lp, require_std_vector_t<T>* = nullptr,
           require_convertible_t<return_type_t<T>, Lp>* = nullptr>
@@ -130,36 +163,19 @@ inline auto sum_to_zero_constrain(const T& y, Lp& lp) {
 }
 
 /**
- * Return a vector with sum zero corresponding to the specified
- * free vector.
- *
- * The sum-to-zero transform is defined using a modified version of
- * the inverse of the isometric log ratio transform (ILR).
- * See:
- * Egozcue, Juan Jose; Pawlowsky-Glahn, Vera; Mateu-Figueras, Gloria;
- * Barcelo-Vidal, Carles (2003), "Isometric logratio transformations for
- * compositional data analysis", Mathematical Geology, 35 (3): 279–300,
- * doi:10.1023/A:1023818214614, S2CID 122844634
- *
- * This implementation is closer to the description of the same using "pivot
- * coordinates" in
- * Filzmoser, P., Hron, K., Templ, M. (2018). Geometrical Properties of
- * Compositional Data. In: Applied Compositional Data Analysis. Springer Series
- * in Statistics. Springer, Cham. https://doi.org/10.1007/978-3-319-96422-5_3
- *
+ * Return a vector or matrix with sum zero corresponding to the specified
+ * free input.
  * This is a linear transform, with no Jacobian.
  *
  * @tparam Jacobian unused
- * @tparam Vec A type inheriting from `Eigen::DenseBase` or a `var_value` with
- *  inner type inheriting from `Eigen::DenseBase` with compile time dynamic rows
- *  and 1 column, or a standard vector thereof
+ * @tparam T type of the input
  * @tparam Lp unused
- * @param[in] y free vector
+ * @param[in] y free vector or matrix
  * @param[in, out] lp unused
- * @return Zero-sum vector of dimensionality one greater than `y`
+ * @return Zero-sum vector or matrix which is one larger in each dimension
  */
-template <bool Jacobian, typename Vec, typename Lp>
-inline plain_type_t<Vec> sum_to_zero_constrain(const Vec& y, Lp& lp) {
+template <bool Jacobian, typename T, typename Lp>
+inline plain_type_t<T> sum_to_zero_constrain(const T& y, Lp& lp) {
   return sum_to_zero_constrain(y);
 }
 
diff --git a/stan/math/prim/constraint/sum_to_zero_free.hpp b/stan/math/prim/constraint/sum_to_zero_free.hpp
@@ -5,6 +5,7 @@
 #include <stan/math/prim/err.hpp>
 #include <stan/math/prim/fun/Eigen.hpp>
 #include <stan/math/prim/fun/to_ref.hpp>
+#include <stan/math/prim/fun/inv_sqrt.hpp>
 #include <stan/math/prim/fun/sqrt.hpp>
 #include <stan/math/prim/functor/apply_vector_unary.hpp>
 #include <cmath>
@@ -47,25 +48,70 @@ inline plain_type_t<Vec> sum_to_zero_free(const Vec& z) {
     return y;
   }
 
-  y.coeffRef(N - 1) = -z_ref(N) * sqrt(N * (N + 1)) / N;
+  y.coeffRef(N - 1) = -z_ref.coeff(N) * sqrt(N * (N + 1)) / N;
 
   value_type_t<Vec> sum_w(0);
 
   for (int i = N - 2; i >= 0; --i) {
     double n = static_cast<double>(i + 1);
-    auto w = y(i + 1) / sqrt((n + 1) * (n + 2));
+    auto w = y.coeff(i + 1) / sqrt((n + 1) * (n + 2));
     sum_w += w;
-    y.coeffRef(i) = (sum_w - z_ref(i + 1)) * sqrt(n * (n + 1)) / n;
+    y.coeffRef(i) = (sum_w - z_ref.coeff(i + 1)) * sqrt(n * (n + 1)) / n;
   }
 
   return y;
 }
 
 /**
- * Overload of `sum_to_zero_free()` to untransform each Eigen vector
+ * Return an unconstrained matrix.
+ *
+ * @tparam Mat a column vector type
+ * @param z Matrix of size (N, M)
+ * @return Free matrix of length (N - 1, M - 1)
+ * @throw std::domain_error if z does not sum to zero
+ */
+template <typename Mat, require_eigen_matrix_dynamic_t<Mat>* = nullptr>
+inline plain_type_t<Mat> sum_to_zero_free(const Mat& z) {
+  const auto& z_ref = to_ref(z);
+  check_sum_to_zero("stan::math::sum_to_zero_free", "sum_to_zero variable",
+                    z_ref);
+
+  const auto N = z_ref.rows() - 1;
+  const auto M = z_ref.cols() - 1;
+
+  plain_type_t<Mat> x = Eigen::MatrixXd::Zero(N, M);
+  if (unlikely(N == 0 || M == 0)) {
+    return x;
+  }
+
+  Eigen::Matrix<value_type_t<Mat>, -1, 1> beta = Eigen::VectorXd::Zero(N);
+
+  for (int j = M - 1; j >= 0; --j) {
+    value_type_t<Mat> ax_previous(0);
+
+    double a_j = inv_sqrt((j + 1.0) * (j + 2.0));
+    double b_j = (j + 1.0) * a_j;
+
+    for (int i = N - 1; i >= 0; --i) {
+      double a_i = inv_sqrt((i + 1.0) * (i + 2.0));
+      double b_i = (i + 1.0) * a_i;
+
+      auto alpha_plus_beta = z_ref.coeff(i, j) + beta.coeff(i);
+
+      x.coeffRef(i, j) = (alpha_plus_beta + b_j * ax_previous) / (b_j * b_i);
+      beta.coeffRef(i) += a_j * (b_i * x.coeff(i, j) - ax_previous);
+      ax_previous += a_i * x.coeff(i, j);
+    }
+  }
+
+  return x;
+}
+
+/**
+ * Overload of `sum_to_zero_free()` to untransform each Eigen type
  * in a standard vector.
  * @tparam T A standard vector with with a `value_type` which inherits from
- *  `Eigen::MatrixBase` with compile time rows or columns equal to 1.
+ *  `Eigen::MatrixBase`
  * @param z The standard vector to untransform.
  */
 template <typename T, require_std_vector_t<T>* = nullptr>
diff --git a/stan/math/rev/constraint/sum_to_zero_constrain.hpp b/stan/math/rev/constraint/sum_to_zero_constrain.hpp
@@ -5,6 +5,7 @@
 #include <stan/math/rev/core/reverse_pass_callback.hpp>
 #include <stan/math/rev/core/arena_matrix.hpp>
 #include <stan/math/prim/fun/Eigen.hpp>
+#include <stan/math/prim/fun/inv_sqrt.hpp>
 #include <stan/math/prim/fun/sqrt.hpp>
 #include <stan/math/prim/constraint/sum_to_zero_constrain.hpp>
 #include <cmath>
@@ -55,14 +56,66 @@ inline auto sum_to_zero_constrain(T&& y) {
       double n = static_cast<double>(i + 1);
 
       // adjoint of the reverse cumulative sum computed in the forward mode
-      sum_u_adj += arena_z.adj()(i);
+      sum_u_adj += arena_z.adj().coeff(i);
 
       // adjoint of the offset subtraction
-      double v_adj = -arena_z.adj()(i + 1) * n;
+      double v_adj = -arena_z.adj().coeff(i + 1) * n;
 
       double w_adj = v_adj + sum_u_adj;
 
-      arena_y.adj()(i) += w_adj / sqrt(n * (n + 1));
+      arena_y.adj().coeffRef(i) += w_adj / sqrt(n * (n + 1));
+    }
+  });
+
+  return arena_z;
+}
+
+/**
+ * Return a matrix that sums to zero over both the rows
+ * and columns corresponding to the free matrix x.
+ *
+ * This is a linear transform, with no Jacobian.
+ *
+ * @tparam Mat type of the matrix
+ * @param x Free matrix input of dimensionality (N - 1, M - 1).
+ * @return Zero-sum matrix of dimensionality (N, M).
+ */
+template <typename T, require_rev_matrix_t<T>* = nullptr,
+          require_not_t<is_rev_vector<T>>* = nullptr>
+inline auto sum_to_zero_constrain(T&& x) {
+  using ret_type = plain_type_t<T>;
+  if (unlikely(x.size() == 0)) {
+    return arena_t<ret_type>(Eigen::MatrixXd{{0}});
+  }
+  auto arena_x = to_arena(std::forward<T>(x));
+  arena_t<ret_type> arena_z = sum_to_zero_constrain(arena_x.val());
+
+  reverse_pass_callback([arena_x, arena_z]() mutable {
+    const auto Nf = arena_x.val().rows();
+    const auto Mf = arena_x.val().cols();
+
+    Eigen::VectorXd d_beta = Eigen::VectorXd::Zero(Nf);
+
+    for (int j = 0; j < Mf; ++j) {
+      double a_j = inv_sqrt((j + 1.0) * (j + 2.0));
+      double b_j = (j + 1.0) * a_j;
+
+      double d_ax = 0.0;
+
+      for (int i = 0; i < Nf; ++i) {
+        double a_i = inv_sqrt((i + 1.0) * (i + 2.0));
+        double b_i = (i + 1.0) * a_i;
+
+        double dY = arena_z.adj().coeff(i, j) - arena_z.adj().coeff(Nf, j)
+                    + arena_z.adj().coeff(Nf, Mf) - arena_z.adj().coeff(i, Mf);
+        double dI_from_beta = a_j * d_beta.coeff(i);
+        d_beta.coeffRef(i) += -dY;
+
+        double dI_from_alpha = b_j * dY;
+        double dI = dI_from_alpha + dI_from_beta;
+        arena_x.adj().coeffRef(i, j) += b_i * dI + a_i * d_ax;
+        d_ax -= dI;
+      }
     }
   });
 
@@ -89,12 +142,12 @@ inline auto sum_to_zero_constrain(T&& y) {
  *
  * This is a linear transform, with no Jacobian.
  *
- * @tparam Vec type of the vector
- * @param y Free vector input of dimensionality K - 1.
+ * @tparam T type of the vector or matrix
+ * @param y Free vector or matrix.
  * @param lp unused
- * @return Zero-sum vector of dimensionality K.
+ * @return Zero-sum vector or matrix which is one larger in each dimension
  */
-template <typename T, typename Lp, require_rev_col_vector_t<T>* = nullptr>
+template <typename T, typename Lp, is_rev_matrix<T>* = nullptr>
 inline auto sum_to_zero_constrain(T&& y, Lp& lp) {
   return sum_to_zero_constrain(std::forward<T>(y));
 }
diff --git a/test/unit/math/mix/constraint/sum_to_zero_constrain_test.cpp b/test/unit/math/mix/constraint/sum_to_zero_constrain_test.cpp
@@ -56,3 +56,24 @@ TEST(MathMixMatFun, sum_to_zeroTransform) {
   v5 << 1, -3, 2, 0, -1;
   sum_to_zero_constrain_test::expect_sum_to_zero_transform(v5);
 }
+
+TEST(MathMixMatFun, sum_to_zero_matrixTransform) {
+  Eigen::MatrixXd m0_0(0, 0);
+  sum_to_zero_constrain_test::expect_sum_to_zero_transform(m0_0);
+
+  Eigen::MatrixXd m1_1(1, 1);
+  m1_1 << 1;
+  sum_to_zero_constrain_test::expect_sum_to_zero_transform(m1_1);
+
+  Eigen::MatrixXd m2_2(2, 2);
+  m2_2 << 1, 2, -3, 4;
+  sum_to_zero_constrain_test::expect_sum_to_zero_transform(m2_2);
+
+  Eigen::MatrixXd m3_4(3, 4);
+  m3_4 << 1, 2, -3, 4, 5, 6, -7, 8, 9, -10, 11, -12;
+
+  sum_to_zero_constrain_test::expect_sum_to_zero_transform(m3_4);
+
+  Eigen::MatrixXd m4_3 = m3_4.transpose();
+  sum_to_zero_constrain_test::expect_sum_to_zero_transform(m4_3);
+}
diff --git a/test/unit/math/prim/constraint/sum_to_zero_transform_test.cpp b/test/unit/math/prim/constraint/sum_to_zero_transform_test.cpp