began adding docs

idc9 · idc9 · commit 873561c26bbc · 2021-07-17T15:41:29.000-04:00
diff --git a/todos.txt b/todos.txt
@@ -1,28 +1,21 @@
 Need to do
 ----------
 
-- multinomial
-- huber
-- Poisson
-- Gamma
-- max pen val for Tikhonov
-- total variation CV and max pen val
-- extend LLA algorithm to group lasso and nuclear norm
-
 - testing testing testing
 - documentation documentation documentation
 - speed comparison of opt module
 - compare fits to sklearn baseline
 
-- 1se rule for ENet
+- Gamma
+- max pen val for Tikhonov
+- total variation CV and max pen val
 
+- 1se rule for ENet
 
 Eventualy
 ---------
-- sample weights
 - cox
 - cv over other parameters with path algorithms
-- quantile regression (this will require a different default solver -- perhaps cvxpy?)
 - constraints: positive, simplex
 - bulid coordinate descent framework (e.g. based on https://arxiv.org/abs/1410.1386)
 - for cv_scorer figure out how to have custom fit_metrics (e.g. for n_nonzero) instead of the ugly train/test
diff --git a/ya_glm/pen_glms/GlmLasso.py b/ya_glm/pen_glms/GlmLasso.py
@@ -1,3 +1,5 @@
+from textwrap import dedent
+
 from ya_glm.base.Glm import Glm
 from ya_glm.base.GlmCV import GlmCVSinglePen, GlmCVENet
 from ya_glm.cv.CVPath import CVPathMixin
@@ -11,12 +13,41 @@
 from ya_glm.processing import check_estimator_type
 
 
+_glm_lasso_params = dedent("""
+pen_val: float
+    The penalty value.
+
+lasso_weights: None, array-like
+    Optional weights to put on each term in the penalty.
+
+groups: None, list of ints
+    Optional groups of variables. If groups is provided then each element in the list should be a list of feature indices. Variables not in a group are not penalized.
+
+ridge_pen_val: None, float
+    Penalty strength for an optional ridge penalty.
+
+ridge_weights: None, array-like shape (n_featuers, )
+    Optional features weights for the ridge peanlty.
+
+tikhonov: None, array-like (K, n_features)
+    Optional tikhonov matrix for the ridge penalty. Both tikhonov and ridge weights cannot be provided at the same time.
+    """)
+
+
 class GlmLasso(Glm):
 
+    descr = dedent("""
+        Lasso or group lasso penalty with an optional ridge penalty.
+        """)
+
+    descr_mr = dedent("""
+        Lasso, group lasso, multi-task lasso or nuclear norm penalty with an optional ridge penalty.
+        """)
+
     @add_from_classes(Glm)
-    def __init__(self, pen_val=1, lasso_weights=None,
-                 ridge_pen_val=None, ridge_weights=None, tikhonov=None,
-                 groups=None): pass
+    def __init__(self, pen_val=1, lasso_weights=None, groups=None,
+                 ridge_pen_val=None, ridge_weights=None, tikhonov=None
+                 ): pass
 
     def _get_solve_kws(self):
         """
@@ -89,6 +120,10 @@ def _get_pen_val_max_from_pro(self, X, y, sample_weight=None):
 
 class GlmLassoCVPath(CVPathMixin, GlmCVSinglePen):
 
+    desrc = dedent("""
+        Tunes the lasso penalty parameter via cross-validation using a path algorithm.
+        """)
+
     def _get_solve_path_kws(self):
         if not hasattr(self, 'pen_val_seq_'):
             raise RuntimeError("pen_val_seq_ has not yet been set")
@@ -103,12 +138,53 @@ def _check_base_estimator(self, estimator):
 
 
 class GlmLassoCVGridSearch(CVGridSearchMixin, GlmCVSinglePen):
+    desrc = dedent("""
+    Tunes the lasso penalty parameter via cross-validation.
+    """)
+
     def _check_base_estimator(self, estimator):
         check_estimator_type(estimator, GlmLasso)
 
 
+_glm_lasso_params = dedent("""
+pen_val: float
+    The penalty strength (corresponds to lambda in glmnet)
+
+l1_ratio: float
+    The ElasticNet mixing parameter, with ``0 <= l1_ratio <= 1``. For
+        ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it
+        is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a
+        combination of L1 and L2.
+
+lasso_weights: None, array-like
+    Optional weights to put on each term in the penalty.
+
+groups: None, list of ints
+    Optional groups of variables. If groups is provided then each element in the list should be a list of feature indices. Variables not in a group are not penalized.
+
+tikhonov: None, array-like (K, n_features)
+    Optional tikhonov matrix for the ridge penalty.
+    """)
+
+
 class GlmENet(Glm):
 
+    descr = dedent("""
+        Elastic net penalty
+
+        pen_val * (l1_ratio) Lasso(coef) + pen_val * (1 - l1_ratio) * Ridge(coef)
+
+        where Lasso(coef) is either the Lasso or group Lasso penalty.
+        """)
+
+    descr_mr = dedent("""
+        Elastic net penalty
+
+        pen_val * (l1_ratio) Lasso(coef) + pen_val * (1 - l1_ratio) * Ridge(coef)
+
+        where Lasso(coef) is either the Lasso, group Lasso, multi-task Lasso or nuclear norm.
+        """)
+
     @add_from_classes(Glm)
     def __init__(self, pen_val=1, l1_ratio=0.5,
                  lasso_weights=None, ridge_weights=None, tikhonov=None,
@@ -192,6 +268,10 @@ def _get_pen_val_max_from_pro(self, X, y, sample_weight=None):
 class GlmENetCVPath(ENetCVPathMixin, GlmCVENet):
     solve_glm_path = None
 
+    desrc = dedent("""
+        Tunes the ElasticNet penalty parameter and or the l1_ratio via cross-validation. Makes use of a path algorithm for computing the penalty value tuning path.
+        """)
+
     def _get_solve_path_enet_base_kws(self):
         kws = self.estimator._get_solve_kws()
         del kws['lasso_pen']
@@ -203,5 +283,10 @@ def _check_base_estimator(self, estimator):
 
 
 class GlmENetCVGridSearch(CVGridSearchMixin, GlmCVSinglePen):
+
+    desrc = dedent("""
+        Tunes the ElasticNet penalty parameter and or the l1_ratio via cross-validation.
+        """)
+
     def _check_base_estimator(self, estimator):
         check_estimator_type(estimator, GlmENet)
diff --git a/ya_glm/pen_glms/GlmRidge.py b/ya_glm/pen_glms/GlmRidge.py
@@ -1,3 +1,5 @@
+from textwrap import dedent
+
 from ya_glm.base.Glm import Glm
 from ya_glm.base.GlmCV import GlmCVSinglePen
 from ya_glm.cv.CVPath import CVPathMixin
@@ -9,8 +11,24 @@
 from ya_glm.processing import check_estimator_type
 
 
+_glm_ridge_params = dedent("""
+pen_val: float
+    The penalty value.
+
+weights: None, array-like shape (n_featuers, )
+    Optional features weights for the ridge peanlty.
+
+tikhonov: None, array-like (K, n_features)
+    Optional tikhonov matrix for the ridge penalty. Both tikhonov and weights cannot be provided at the same time.
+    """)
+
+
 class GlmRidge(Glm):
 
+    descr = dedent("""
+    Ridge penalty.
+    """)
+
     @add_from_classes(Glm)
     def __init__(self, pen_val=1, weights=None, tikhonov=None): pass
 
@@ -59,6 +77,10 @@ def _get_pen_val_max_from_pro(self, X, y, sample_weight=None):
 
 class GlmRidgeCVPath(CVPathMixin, GlmCVSinglePen):
 
+    descr = dedent("""
+    Tunes the ridge penalty parameter via cross-validation using a path algorithm.
+    """)
+
     def _get_solve_path_kws(self):
         if not hasattr(self, 'pen_val_seq_'):
             raise RuntimeError("pen_val_seq_ has not yet been set")
@@ -74,5 +96,9 @@ def _check_base_estimator(self, estimator):
 
 class GlmRidgeCVGridSearch(CVGridSearchMixin, GlmCVSinglePen):
 
+    descr = dedent("""
+    Tunes the ridge penalty parameter via cross-validation.
+    """)
+
     def _check_base_estimator(self, estimator):
         check_estimator_type(estimator, GlmRidge)
diff --git a/ya_glm/pen_glms/GlmVanilla.py b/ya_glm/pen_glms/GlmVanilla.py
@@ -10,5 +10,5 @@ def _get_solve_kws(self):
                 'loss_kws': loss_kws,
 
                 'fit_intercept': self.fit_intercept,
-                **self.opt_kws,
+                **self.opt_kws
                 }

Original file line number	Diff line number	Diff line change
`@@ -10,5 +10,5 @@ def _get_solve_kws(self):`
`10`	`10`	`'loss_kws': loss_kws,`
`11`	`11`
`12`	`12`	`'fit_intercept': self.fit_intercept,`
`13`		`- **self.opt_kws,`
	`13`	`+ **self.opt_kws`
`14`	`14`	`}`