@@ -91,10 +91,12 @@ log-likelihood values in memory.
9191
9292The log-likelihood in R can be coded as follows:
9393``` {r llfun_logistic}
94- llfun_logistic <- function(data_i, draws) {
94+ # we'll add an argument log to toggle whether this is a log-likelihood or
95+ # likelihood function. this will be useful later in the vignette.
96+ llfun_logistic <- function(data_i, draws, log = TRUE) {
9597 x_i <- as.matrix(data_i[, which(grepl(colnames(data_i), pattern = "X")), drop=FALSE])
9698 logit_pred <- draws %*% t(x_i)
97- dbinom(x = data_i$y, size = 1, prob = 1/(1 + exp(-logit_pred)), log = TRUE )
99+ dbinom(x = data_i$y, size = 1, prob = 1/(1 + exp(-logit_pred)), log = log )
98100}
99101```
100102
@@ -133,15 +135,28 @@ function we wrote is working as it should. The `loo_i()` function is a helper
133135function that can be used to test a log-likelihood function on a single observation.
134136
135137``` {r, eval=FALSE}
138+ # used for draws argument to loo_i
136139parameter_draws_1 <- extract(fit_1)$beta
140+
141+ # used for data argument to loo_i
137142stan_df_1 <- as.data.frame(standata)
138- loo_i(1, llfun_logistic, data = stan_df_1, draws = parameter_draws_1)
143+
144+ # compute relative efficiency (this is slow and optional but is recommended to allow
145+ # for adjusting PSIS effective sample size based on MCMC effective sample size)
146+ r_eff <- relative_eff(llfun_logistic,
147+ log = FALSE, # relative_eff wants likelihood not log-likelihood values
148+ chain_id = rep(1:4, each = 1000),
149+ data = stan_df_1,
150+ draws = parameter_draws_1,
151+ cores = 2)
152+
153+ loo_i(i = 1, llfun_logistic, r_eff = r_eff, data = stan_df_1, draws = parameter_draws_1)
139154```
140155
141156```
142157$pointwise
143- elpd_loo mcse_elpd_loo p_loo looic
144- 1 -0.3310342 0.0002908997 0.0003487243 0.6620683
158+ elpd_loo mcse_elpd_loo p_loo looic influence_pareto_k
159+ 1 -0.3314552 0.0002887608 0.0003361772 0.6629103 -0.05679886
145160...
146161```
147162
@@ -151,16 +166,16 @@ We can then use the `loo_subsample()` function to compute the efficient PSIS-LOO
151166approximation to exact LOO-CV using subsampling:
152167
153168``` {r, eval=FALSE}
154- parameter_draws_1 <- extract(fit_1)$beta
155- stan_df_1 <- as.data.frame(standata)
156-
157169set.seed(4711)
158170loo_ss_1 <-
159171 loo_subsample(
160172 llfun_logistic,
173+ observations = 100, # take a subsample of size 100
174+ cores = 2,
175+ # these next objects were computed above
176+ r_eff = r_eff,
161177 draws = parameter_draws_1,
162- data = stan_df_1,
163- observations = 100 # take a subsample of size 100
178+ data = stan_df_1
164179 )
165180print(loo_ss_1)
166181```
@@ -205,8 +220,14 @@ simply add more samples until we are satisfied using the `update()` method.
205220
206221``` {r, eval=FALSE}
207222set.seed(4711)
208- loo_ss_1b <- update(loo_ss_1, draws = parameter_draws_1, data = stan_df_1,
209- observations = 200) # subsample 200 instead of 100
223+ loo_ss_1b <-
224+ update(
225+ loo_ss_1,
226+ observations = 200, # subsample 200 instead of 100
227+ r_eff = r_eff,
228+ draws = parameter_draws_1,
229+ data = stan_df_1
230+ )
210231print(loo_ss_1b)
211232```
212233
@@ -241,12 +262,14 @@ set.seed(4711)
241262loo_ss_1c <-
242263 loo_subsample(
243264 x = llfun_logistic,
265+ r_eff = r_eff,
244266 draws = parameter_draws_1,
245267 data = stan_df_1,
246268 observations = 100,
247269 estimator = "hh_pps", # use Hansen-Hurwitz
248270 loo_approximation = "lpd", # use lpd instead of plpd
249- loo_approximation_draws = 100
271+ loo_approximation_draws = 100,
272+ cores = 2
250273 )
251274print(loo_ss_1c)
252275```
@@ -297,7 +320,8 @@ loo_ap_1 <-
297320 draws = parameter_draws_laplace,
298321 data = stan_df_1,
299322 log_p = log_p,
300- log_g = log_g
323+ log_g = log_g,
324+ cores = 2
301325 )
302326print(loo_ap_1)
303327```
@@ -340,7 +364,8 @@ loo_ap_ss_1 <-
340364 data = stan_df_1,
341365 log_p = log_p,
342366 log_g = log_g,
343- observations = 100
367+ observations = 100,
368+ cores = 2
344369 )
345370print(loo_ap_ss_1)
346371```
@@ -387,23 +412,46 @@ parameter_draws_2 <- extract(fit_2)$beta
387412stan_df_2 <- as.data.frame(standata)
388413
389414# recompute subsampling loo for first model for demonstration purposes
415+
416+ # compute relative efficiency (this is slow and optional but is recommended to allow
417+ # for adjusting PSIS effective sample size based on MCMC effective sample size)
418+ r_eff_1 <- relative_eff(
419+ llfun_logistic,
420+ log = FALSE, # relative_eff wants likelihood not log-likelihood values
421+ chain_id = rep(1:4, each = 1000),
422+ data = stan_df_1,
423+ draws = parameter_draws_1,
424+ cores = 2
425+ )
426+
390427set.seed(4711)
391- loo_ss_1 <-
392- loo_subsample(
393- x = llfun_logistic,
394- draws = parameter_draws_1,
395- data = stan_df_1,
396- observations = 200
397- )
428+ loo_ss_1 <- loo_subsample(
429+ x = llfun_logistic,
430+ r_eff = r_eff_1,
431+ draws = parameter_draws_1,
432+ data = stan_df_1,
433+ observations = 200,
434+ cores = 2
435+ )
398436
399437# compute subsampling loo for a second model (with log-arsenic)
400- loo_ss_2 <-
401- loo_subsample(
402- x = llfun_logistic,
403- draws = parameter_draws_2,
404- data = stan_df_2,
405- observations = 200
406- )
438+
439+ r_eff_2 <- relative_eff(
440+ llfun_logistic,
441+ log = FALSE, # relative_eff wants likelihood not log-likelihood values
442+ chain_id = rep(1:4, each = 1000),
443+ data = stan_df_2,
444+ draws = parameter_draws_2,
445+ cores = 2
446+ )
447+ loo_ss_2 <- loo_subsample(
448+ x = llfun_logistic,
449+ r_eff = r_eff_2,
450+ draws = parameter_draws_2,
451+ data = stan_df_2,
452+ observations = 200,
453+ cores = 2
454+ )
407455
408456print(loo_ss_2)
409457```
@@ -455,9 +503,11 @@ we can simply extract the observations used in `loo_ss_1` and use them in
455503loo_ss_2 <-
456504 loo_subsample(
457505 x = llfun_logistic,
506+ r_eff = r_eff_2,
458507 draws = parameter_draws_2,
459508 data = stan_df_2,
460- observations = loo_ss_1
509+ observations = loo_ss_1,
510+ cores = 2
461511 )
462512```
463513
@@ -466,13 +516,14 @@ helper function:
466516
467517``` {r, eval=FALSE}
468518idx <- obs_idx(loo_ss_1)
469- loo_ss_2 <-
470- loo_subsample(
471- x = llfun_logistic,
472- draws = parameter_draws_2,
473- data = stan_df_2,
474- observations = idx
475- )
519+ loo_ss_2 <- loo_subsample(
520+ x = llfun_logistic,
521+ r_eff = r_eff_2,
522+ draws = parameter_draws_2,
523+ data = stan_df_2,
524+ observations = idx,
525+ cores = 2
526+ )
476527```
477528
478529```
@@ -510,7 +561,13 @@ It is also possible to compare a subsampled loo computation with a full loo obje
510561
511562``` {r, eval=FALSE}
512563# use loo() instead of loo_subsample() to compute full PSIS-LOO for model 2
513- loo_full_2 <- loo(x = llfun_logistic, draws = parameter_draws_2, data = stan_df_2)
564+ loo_full_2 <- loo(
565+ x = llfun_logistic,
566+ r_eff = r_eff_2,
567+ draws = parameter_draws_2,
568+ data = stan_df_2,
569+ cores = 2
570+ )
514571loo_compare(loo_ss_1, loo_full_2)
515572```
516573
0 commit comments