Skip to content

Commit 4cbe8f8

Browse files
committed
added docs
1 parent 873561c commit 4cbe8f8

18 files changed

+564
-288
lines changed

ya_glm/base/Glm.py

Lines changed: 37 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,24 @@
1313
from ya_glm.opt.utils import euclid_norm
1414

1515

16-
_glm_base_params = dedent("""
16+
class Glm(BaseEstimator):
17+
"""
18+
Base class for a penalized generalized linear model.
19+
"""
20+
21+
# subclass may implement the following
22+
23+
# function that solves the penalized GLM optimizaiton problem.
24+
solve_glm = None
25+
26+
# description of the GLM penalty
27+
_pen_descr = None
28+
29+
# description of the GLM penalty for multiple response loss GLMs
30+
_pen_descr_mr = None
31+
32+
# description of the parameters
33+
_params_descr = dedent("""
1734
fit_intercept: bool
1835
Whether or not to fit an intercept. The intercept will not be penalized.
1936
@@ -24,11 +41,27 @@
2441
Additional keyword arguments for solve_glm.
2542
""")
2643

44+
_attr_descr = dedent("""
45+
coef_: array-like, (n_features, )
46+
The estimated coefficient vector.
2747
28-
class Glm(BaseEstimator):
48+
intercept_: float
49+
The estimated intercept.
2950
30-
# subclass should implement
31-
solve_glm = None
51+
opt_data_: dict
52+
Output from the optimization algorithm.
53+
""")
54+
55+
_attr_descr_mr = dedent("""
56+
coef_: array-like, (n_features, n_responses)
57+
The estimated coefficient matrix.
58+
59+
intercept_: array-like, (n_responses, )
60+
The estimated intercept.
61+
62+
opt_data_: dict
63+
Output from the optimization algorithm.
64+
""")
3265

3366
@autoassign
3467
def __init__(self, fit_intercept=True, standardize=False, opt_kws={}):
@@ -349,14 +382,3 @@ def get_loss_info(self):
349382
Keyword arguments for the loss function.
350383
"""
351384
raise NotImplementedError
352-
353-
354-
Glm.__doc__ = dedent(
355-
"""
356-
Base class for Lasso generalized linear model.
357-
358-
Parameters
359-
----------
360-
{}
361-
""".format(_glm_base_params)
362-
)

ya_glm/base/GlmAdptLasso.py

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
import numpy as np
2+
from ya_glm.base.Glm import Glm
3+
from ya_glm.base.GlmWithInit import GlmWithInitMixin
4+
5+
from ya_glm.pen_max.lasso import get_pen_max
6+
7+
from ya_glm.init_signature import keep_agreeable
8+
from ya_glm.opt.penalty.concave_penalty import get_penalty_func
9+
from ya_glm.processing import process_init_data
10+
11+
12+
# TODO: the way we set the adpative weights is a little ugly
13+
# let's see if we can figure out a better solution.
14+
# this is difficult because we need to know the original data (to get n_samples)
15+
# but this needs to work with cross-validation where we clone the estimator
16+
# this destroy any attributed derived from the original data
17+
18+
19+
class GlmAdaptiveLassoBase(GlmWithInitMixin, Glm):
20+
21+
def fit(self, X, y, sample_weight=None):
22+
23+
# get the adaptive weights and preprocessed data
24+
adpt_weights, X_pro, y_pro, pre_pro_out, init_data_pro = \
25+
self._get_adpt_weights_and_pro_data(X, y, sample_weight)
26+
27+
##########################
28+
# solve the GLM problem! #
29+
##########################
30+
31+
kws = self._get_solve_kws()
32+
if sample_weight is not None:
33+
kws['sample_weight'] = sample_weight
34+
kws['lasso_weights'] = adpt_weights
35+
36+
coef, intercept, opt_data = self.solve_glm(X=X, y=y, **kws)
37+
38+
fit_out = {'coef': coef, 'intercept': intercept, 'opt_data': opt_data}
39+
self._set_fit(fit_out=fit_out, pre_pro_out=pre_pro_out)
40+
self.adpt_weights_ = adpt_weights
41+
42+
return self
43+
44+
def _get_adpt_weights_and_pro_data(self, X, y, sample_weight=None):
45+
# validate the data!
46+
X, y, sample_weight = self._validate_data(X, y,
47+
sample_weight=sample_weight)
48+
49+
if self.adpt_weights is None:
50+
51+
# get data for initialization if we have not already provided
52+
# the adaptive weights
53+
init_data = self.get_init_data(X, y)
54+
if 'est' in init_data:
55+
self.init_est_ = init_data['est']
56+
del init_data['est']
57+
58+
else:
59+
init_data = None
60+
adpt_weights = self.adpt_weights
61+
62+
# pre-process data for fitting
63+
X_pro, y_pro, pre_pro_out = self.preprocess(X, y,
64+
sample_weight=sample_weight,
65+
copy=True)
66+
67+
if self.adpt_weights is None:
68+
# if we have not already provided the adpative weights
69+
# then compute them now
70+
71+
# possibly process the init data e.g. shift/scale
72+
init_data_pro = process_init_data(init_data=init_data,
73+
pre_pro_out=pre_pro_out)
74+
75+
adpt_weights = \
76+
self._get_adpt_weights_from_pro_init(init_data=init_data_pro,
77+
n_samples=X.shape[0])
78+
79+
else:
80+
init_data_pro = None
81+
82+
return adpt_weights, X_pro, y_pro, pre_pro_out, init_data_pro
83+
84+
def _get_adpt_weights_from_pro_init(self, init_data, n_samples=None):
85+
"""
86+
Gets the adaptive lasso weights from the processed init data
87+
"""
88+
coef = np.array(init_data['coef'])
89+
transform = self._get_coef_transform()
90+
t = transform(coef)
91+
92+
if type(self.pertub_init) == str and self.pertub_init == 'n_samples':
93+
t += 1 / n_samples
94+
95+
elif self.pertub_init is not None:
96+
t += self.pertub_init
97+
98+
# Setup penalty function
99+
penalty_func = get_penalty_func(pen_func=self.pen_func,
100+
pen_val=1,
101+
pen_func_kws=self.pen_func_kws)
102+
weights = penalty_func.grad(t)
103+
return weights
104+
105+
def _get_pen_max_lasso(self, X, y, init_data, sample_weight=None):
106+
107+
# get the adaptive weights and processed data
108+
adpt_weights, X_pro, y_pro, pre_pro_out, init_data_pro = \
109+
self._get_adpt_weights_and_pro_data(X, y, sample_weight)
110+
111+
loss_func, loss_kws = self.get_loss_info()
112+
pen_kind = self._get_penalty_kind()
113+
114+
kws = {'X': X,
115+
'y': y,
116+
'fit_intercept': self.fit_intercept,
117+
'loss_func': loss_func,
118+
'loss_kws': loss_kws,
119+
'weights': adpt_weights,
120+
'sample_weight': sample_weight
121+
}
122+
123+
if pen_kind == 'group':
124+
kws['groups'] = self.groups
125+
126+
return get_pen_max(pen_kind, **kws)
127+
128+
def _kws_for_default_init(self, c=None):
129+
"""
130+
Returns the keyword arguments for the default initialization estimator.
131+
132+
Parameters
133+
----------
134+
c: None, class
135+
If a class is provided we only return keyword arguemnts that
136+
aggree with c.__init__
137+
"""
138+
139+
keys = ['fit_intercept', 'standardize', 'opt_kws',
140+
'ridge_weights', 'tikhonov',
141+
'groups']
142+
143+
if hasattr(self, 'multi_task'):
144+
keys.append('multi_task')
145+
146+
if hasattr(self, 'nuc'):
147+
keys.append('nuc')
148+
149+
if c is not None:
150+
keys = keep_agreeable(keys, func=c.__init__)
151+
152+
return {k: self.__dict__[k] for k in keys}
153+
154+
155+
class AdptCVMixin:
156+
def _pre_fit(self, X, y, init_data, estimator, sample_weight=None):
157+
"""
158+
Sets the adaptive weights parameter.
159+
"""
160+
161+
# get the adaptive weights and preprocessed data
162+
adpt_weights, X_pro, y_pro, pre_pro_out, init_data_pro = \
163+
estimator._get_adpt_weights_and_pro_data(X, y, sample_weight)
164+
165+
estimator.set_params(adpt_weights=adpt_weights)
166+
self.adpt_weights_ = adpt_weights
167+
return estimator

0 commit comments

Comments
 (0)