statistical-python
diff --git a/‎ya_glm/base/Glm.py‎
Lines changed: 6 additions & 6 deletions b/‎ya_glm/base/Glm.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎ya_glm/base/GlmCV.py‎
Lines changed: 1 addition & 1 deletion b/‎ya_glm/base/GlmCV.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ya_glm/base/GlmWithInit.py‎
Lines changed: 2 additions & 66 deletions b/‎ya_glm/base/GlmWithInit.py‎
Lines changed: 2 additions & 66 deletions
@@ -15,13 +15,13 @@
 
 _glm_base_params = dedent("""
     fit_intercept: bool
-        Whether or not to fit an intercept.
+        Whether or not to fit an intercept. The intercept will not be penalized.
 
     standardize: bool
-        Whether or not to perform internal standardization before fitting the data. Here standardization means mean centering and scaling each column by its standard deviation. Putting each column on the same scale makes sense for fitting penalized models. Note the fitted coefficient/intercept is transformed to be on the original scale of the input data.
+        Whether or not to perform internal standardization before fitting the data. Standardization means mean centering and scaling each column by its standard deviation. For the group lasso penalty an additional scaling is applied that scales each variable by 1 / sqrt(group size). Putting each variable on the same scale makes sense for fitting penalized models. Note the fitted coefficient/intercept is transformed to be on the original scale of the input data.
 
     opt_kws: dict
-        Keyword arguments to the glm solver optimization algorithm.
+        Additional keyword arguments for solve_glm.
     """)
 
 
@@ -102,7 +102,7 @@ def _validate_data(self, X, y, sample_weight=None, accept_sparse=True):
 
     def preprocess(self, X, y, sample_weight=None, copy=True):
         """
-        Preprocesses the data for fitting. This method may transform the data e.g. centering and scaling X. If sample weights are provided then these are used for computing weighted means / standard deviations for standardization.
+        Preprocesses the data for fitting. This method may transform the data e.g. centering and scaling X. If sample weights are provided then these are used for computing weighted means / standard deviations for standardization. For the group lasso penalty an additional scaling is applied that scales each variable by 1 / sqrt(group size).
 
         Parameters
         ----------
@@ -138,8 +138,8 @@ def preprocess(self, X, y, sample_weight=None, copy=True):
                            groups=groups,
                            sample_weight=sample_weight,
                            copy=copy,
-                           check_input=False,
-                           accept_sparse=False,  # TODO!
+                           check_input=True,
+                           accept_sparse=True,
                            allow_const_cols=not self.fit_intercept)
 
         y, y_out = self._process_y(y, sample_weight=sample_weight, copy=copy)
 
@@ -14,6 +14,7 @@
 from ya_glm.cv.cv_select import CVSlectMixin  # select_best_cv_tune_param
 
 
+# TODO: move estimator descripting to subclasses
 _cv_params = dedent(
 """
 estimator: estimator object
@@ -251,7 +252,6 @@ def get_tuning_param_grid(self):
 
 
 _enet_cv_params = dedent("""
-
 l1_ratio: float, str, list
     The l1_ratio value to use. If a float is provided then this parameter is fixed and not tuned over. If l1_ratio='tune' then the l1_ratio is tuned over using an automatically generated tuning parameter sequence. Alternatively, the user may provide a list of l1_ratio values to tune over.
 
 
@@ -3,17 +3,9 @@
 
 from ya_glm.utils import fit_if_unfitted
 from ya_glm.utils import get_coef_and_intercept
-from ya_glm.processing import process_init_data
 
 
-class InitMixin:
-    """
-    init
-
-    _get_defualt_init
-
-    _get_init_data_from_fit_est
-    """
+class GlmWithInitMixin:
 
     def get_init_data(self, X, y=None, **fit_params):
         """
@@ -60,44 +52,6 @@ def get_init_data(self, X, y=None, **fit_params):
                                        **fit_params)
             return self._get_init_data_from_fit_est(est=init_est)
 
-    def _get_defualt_init(self):
-        raise NotImplementedError
-
-    def _get_init_data_from_fit_est(self, est, X, y):
-        raise NotImplementedError
-
-
-class GlmWithInitMixin(InitMixin):
-
-    def fit(self, X, y, sample_weight=None):
-
-        # validate the data!
-        X, y, sample_weight = self._validate_data(X, y,
-                                                  sample_weight=sample_weight)
-
-        # get data for initialization
-        init_data = self.get_init_data(X, y)
-        if 'est' in init_data:
-            self.init_est_ = init_data['est']
-            del init_data['est']
-
-        # pre-process data
-        X_pro, y_pro, pre_pro_out = self.preprocess(X, y,
-                                                    sample_weight=sample_weight,
-                                                    copy=True)
-
-        # possibly process the init data e.g. shift/scale
-        init_data_pro = process_init_data(init_data=init_data,
-                                          pre_pro_out=pre_pro_out)
-
-        # Fit!
-        fit_out = self.compute_fit(X=X_pro, y=y_pro,
-                                   init_data=init_data_pro,
-                                   sample_weight=sample_weight)
-
-        self._set_fit(fit_out=fit_out, pre_pro_out=pre_pro_out)
-        return self
-
     def _get_init_data_from_fit_est(self, est):
         out = {}
         coef, intercept = get_coef_and_intercept(est, copy=True, error=True)
@@ -112,23 +66,5 @@ def _get_init_data_from_fit_est(self, est):
 
         return out
 
-    def get_pen_val_max(self, X, y, init_data=None, sample_weight=None):
-        if init_data is None:
-            init_data = self.get_init_data(X, y, sample_weight=sample_weight)
-
-        X_pro, y_pro, pre_pro_out = self.preprocess(X, y,
-                                                    sample_weight=sample_weight,
-                                                    copy=True)
-
-        init_data_pro = process_init_data(init_data=init_data,
-                                          pre_pro_out=pre_pro_out)
-
-        return self._get_pen_val_max_from_pro(X=X_pro, y=y_pro,
-                                              init_data=init_data_pro,
-                                              sample_weight=sample_weight)
-
-    def _get_pen_val_max_from_pro(self, X, y, init_data, sample_weight=None):
-        raise NotImplementedError
-
-    def compute_fit(self, X, y, init_data, sample_weight=None):
+    def _get_defualt_init(self):
         raise NotImplementedError