statistical-python
diff --git a/‎ya_glm/backends/scipy/__init__.py‎ b/‎ya_glm/backends/scipy/__init__.py‎
diff --git a/‎ya_glm/backends/scipy/fcp_lla_solver.py‎
Lines changed: 83 additions & 0 deletions b/‎ya_glm/backends/scipy/fcp_lla_solver.py‎
Lines changed: 83 additions & 0 deletions
diff --git a/‎ya_glm/backends/scipy/glm_solver.py‎
Lines changed: 61 additions & 0 deletions b/‎ya_glm/backends/scipy/glm_solver.py‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎ya_glm/backends/scipy/quantile_lin_prog.py‎
Lines changed: 175 additions & 0 deletions b/‎ya_glm/backends/scipy/quantile_lin_prog.py‎
Lines changed: 175 additions & 0 deletions
@@ -0,0 +1,83 @@
+# import numpy as np
+from ya_glm.lla.WeightedLassoSolver import WeightedLassoSolver
+from ya_glm.lla.utils import safe_concat
+
+from ya_glm.backends.fista.glm_solver import get_glm_loss as get_glm_loss_fista
+from .glm_solver import solve_glm
+
+
+class WL1SolverGlm(WeightedLassoSolver):
+    def __init__(self,  X, y, loss_func, loss_kws={},
+                 fit_intercept=True,
+                 opt_kws={}):
+
+        self.glm_loss = get_glm_loss_fista(X=X, y=y,
+                                           loss_func=loss_func,
+                                           loss_kws=loss_kws,
+                                           fit_intercept=fit_intercept,
+                                           precomp_lip=None)
+
+        self.loss_func = loss_func
+        self.loss_kws = loss_kws
+
+        self.opt_kws = opt_kws
+
+    def solve(self, L1_weights, opt_init=None, opt_init_upv=None):
+        """
+        Parameters
+        ----------
+        L1_weights: array-like
+            Weights for lasso penalty.
+
+        opt_init: None, array-like
+            Optional initializaiton for the coefficient.
+
+        opt_init_upv: None, array-like
+            Optional initializaiton for the intercept.
+
+        Output
+        ------
+        solution, upv_solution, other_data
+        """
+
+        coef, intercept, opt_data = \
+            solve_glm(X=self.glm_loss.X,
+                      y=self.glm_loss.y,
+                      loss_func=self.loss_func,
+                      loss_kws=self.loss_kws,
+                      fit_intercept=self.glm_loss.fit_intercept,
+                      lasso_pen=1,
+                      lasso_weights=L1_weights,
+                      coef_init=opt_init,
+                      intercept_init=opt_init_upv,
+                      # groups=self.groups,
+                      # L1to2=self.L1to2,
+                      # nuc=self.nuc,
+                      **self.opt_kws)
+
+        return coef, intercept, opt_data
+
+    def loss(self, value, upv=None):
+        """
+        Returns the loss function
+
+        loss(y) or loss(y, u)
+
+        Parameters
+        ----------
+        value: array-like
+            The value of the coefficient.
+
+        upv: None, array-like
+            The intercept.
+
+        Output
+        ------
+        loss: float
+        """
+        if self.glm_loss.fit_intercept:
+            #return self.glm_loss.eval(np.concatenate([[upv], value]))
+            return self.glm_loss.eval(safe_concat(upv, value))
+
+        else:
+            return self.glm_loss.eval(value)
@@ -0,0 +1,61 @@
+from ya_glm.backends.scipy.quantile_lin_prog import solve_lin_prog
+from ya_glm.backends.scipy.quantile_quad import solve_quad_prog
+
+
+def solve_glm(X, y,
+              loss_func='quantile',
+              loss_kws={'quantile': 0.5},
+              fit_intercept=True,
+
+              lasso_pen=None,
+              lasso_weights=None,
+
+              ridge_pen=None,
+              ridge_weights=None,
+              tikhonov=None,
+
+              coef_init=None,
+              intercept_init=None,
+
+              solver='default',
+              tol=None,
+              options=None
+              ):
+
+    quantile = loss_kws['quantile']
+
+    if lasso_weights is not None and lasso_pen is None:
+        lasso_pen = 1
+
+    if (ridge_weights is not None or tikhonov is not None) \
+            and ridge_pen is None:
+        ridge_pen = 1
+
+    kws = {'X': X,
+           'y': y,
+           'fit_intercept': fit_intercept,
+           'quantile': quantile,
+           'lasso_pen': lasso_pen,
+           'lasso_weights': lasso_weights,
+           'sample_weights': None,  # TODO: add
+           'tol': tol}
+
+    if ridge_pen is None:
+        if solver == 'default':
+            solver = 'highs'
+
+        return solve_lin_prog(solver=solver,
+                              **kws)
+
+    else:
+        if solver == 'default':
+            solver = None
+
+        return solve_quad_prog(ridge_pen=ridge_pen,
+                               ridge_weights=ridge_weights,
+                               tikhonov=tikhonov,
+                               coef_init=coef_init,
+                               intercept_init=intercept_init,
+                               solver=solver,
+                               options=options,
+                               **kws)
@@ -0,0 +1,175 @@
+
+import warnings
+from time import time
+
+import numpy as np
+from scipy.optimize import linprog
+from scipy.optimize import OptimizeWarning
+from scipy.linalg import LinAlgWarning
+
+
+def solve_lin_prog(X, y, fit_intercept=True, quantile=0.5, lasso_pen=1,
+                   sample_weights=None,
+                   lasso_weights=None,
+                   tol=None,
+                   solver='highs'):
+    """
+    Solves the L1 penalized quantile regression problem using scipy's linprog solver. The code is adapted from https://github.com/benchopt/benchmark_quantile_regression and https://github.com/scikit-learn/scikit-learn/blob/0d064cfd4eda6dd4f7c8711a4870d2f02fda52fb/sklearn/linear_model/_quantile.py#L195-L209
+
+    Parameters
+    ----------
+    X: array-like, shape (n_samples, n_features)
+        The training covariate data.
+
+    y: array-like, shape (n_samples, )
+        The training response data.
+
+    fit_intercept: bool
+        Whether or not to fit an intercept.
+
+    quantile: float
+        Which quantile.
+
+    lasso_pen: float
+        The multiplicated penalty strength parameter.
+
+    sample_weights: None, array-like shape (n_features, )
+        Sample weights
+
+    lasso_weights: None, array-like shape (n_features, )
+        Feature weights for the L1 norm.
+
+    tol: None, float
+        Tolerance for stopping criteria.
+
+    solver: str
+        Which linprog solver to use, see scipy.optimize.linprog
+
+    Output
+    ------
+    coef, intercept, opt_out
+    """
+    start_time = time()
+
+    A_eq, b_eq, c, n_params = \
+        get_lin_prog_data(X=X, y=y,
+                          fit_intercept=fit_intercept,
+                          quantile=quantile,
+                          lasso_pen=lasso_pen,
+                          sample_weights=sample_weights,
+                          lasso_weights=lasso_weights)
+
+    if 'highs' in solver:
+        options = {'primal_feasibility_tolerance': tol}
+    else:
+        options = {'tol': tol}
+
+    warnings.filterwarnings('ignore', category=OptimizeWarning)
+    warnings.filterwarnings('ignore', category=LinAlgWarning)
+
+    result = linprog(
+        c=c,
+        A_eq=A_eq,
+        b_eq=b_eq,
+        method=solver,
+        options=options
+    )
+
+    coef, intercept = get_coef_inter(solution=result.x,
+                                     n_params=n_params,
+                                     fit_intercept=fit_intercept)
+
+    opt_out = scipy_result_to_dict(result)
+    opt_out['runtime'] = time() - start_time
+
+    return coef, intercept, opt_out
+
+
+def get_coef_inter(solution, n_params, fit_intercept):
+    # positive slack - negative slack
+    # solution is an array with (params_pos, params_neg, u, v)
+    params = solution[:n_params] - solution[n_params:2 * n_params]
+
+    if fit_intercept:
+        coef = params[1:]
+        intercept = params[0]
+    else:
+        coef = params
+        intercept = None
+
+    return coef, intercept
+
+
+def scipy_result_to_dict(result):
+    return {'opt_val': result.fun,
+            'success': result.success,
+            'status': result.status,
+            'nit': result.nit,
+            'message': result.message}
+
+
+def get_lin_prog_data(X, y, fit_intercept=True, quantile=0.5, lasso_pen=1,
+                      sample_weights=None,
+                      lasso_weights=None):
+    """
+
+    Output
+    ------
+    A_eq, b_eq, c, n_params
+    """
+
+    n_samples, n_features = X.shape
+
+    # TODO: perhaps filter zero sample weights as in https://github.com/scikit-learn/scikit-learn/blob/0d064cfd4eda6dd4f7c8711a4870d2f02fda52fb/sklearn/linear_model/_quantile.py#L195-L209
+
+    # format sample weights vec
+    if sample_weights is None:
+        sample_weights = np.ones(n_samples) / n_samples
+    else:
+        sample_weights = np.array(sample_weights).copy() / n_samples
+
+    # format the L1_vec
+    if lasso_weights is None:
+        L1_vec = np.ones(n_features)
+
+    else:
+        assert len(lasso_weights) == n_features
+        L1_vec = np.array(lasso_weights)
+
+    if fit_intercept:
+        n_params = n_features + 1
+        L1_vec = np.concatenate([[0], L1_vec,  # 0 = do not penalize intercept
+                                 [0], L1_vec])
+    else:
+        n_params = n_features
+        L1_vec = np.concatenate([L1_vec, L1_vec])
+
+    # the linear programming formulation of quantile regression
+    # follows https://stats.stackexchange.com/questions/384909/
+
+    c = np.concatenate([
+        L1_vec * lasso_pen,
+        sample_weights * quantile,
+        sample_weights * (1 - quantile),
+    ])
+
+    if fit_intercept:
+
+        A_eq = np.concatenate([
+            np.ones((n_samples, 1)),
+            X,
+            -np.ones((n_samples, 1)),
+            -X,
+            np.eye(n_samples),
+            -np.eye(n_samples),
+        ], axis=1)
+
+    else:
+        A_eq = np.concatenate([
+            X,
+            -X,
+            np.eye(n_samples),
+            -np.eye(n_samples),
+        ], axis=1)
+
+    return A_eq, y, c, n_params