Skip to content

Commit 62442a6

Browse files
committed
solved bonus nb on finches - included test with multivariate
1 parent 15dc75e commit 62442a6

1 file changed

Lines changed: 160 additions & 3 deletions

File tree

notebooks/bonus-exploration-finches.ipynb

Lines changed: 160 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@
254254
"\n",
255255
" mu = pm.HalfNormal('mu', sd=20, shape=(2,))\n",
256256
" \n",
257-
" like = pm.MvNormal('like', mu=mu, cov=sigma, observed=df[['beak_depth', 'beak_length']].values)"
257+
" like = pm.MvNormal('like', mu=mu, cov=sigma, observed=df.iloc[scandens_idx][['beak_depth', 'beak_length']].values)"
258258
]
259259
},
260260
{
@@ -300,15 +300,172 @@
300300
"metadata": {},
301301
"outputs": [],
302302
"source": [
303-
"samples_mv = pm.sample_ppc(trace, model=mv_beaks)"
303+
"samples_mv = pm.sample_ppc(trace_mv, model=mv_beaks)"
304304
]
305305
},
306306
{
307307
"cell_type": "code",
308308
"execution_count": null,
309309
"metadata": {},
310310
"outputs": [],
311-
"source": []
311+
"source": [
312+
"samples_mv['like'][:, 0] # beak_depth\n",
313+
"samples_mv['like'][:, 1] # beak_length"
314+
]
315+
},
316+
{
317+
"cell_type": "code",
318+
"execution_count": null,
319+
"metadata": {},
320+
"outputs": [],
321+
"source": [
322+
"fig = plt.figure()\n",
323+
"ax1 = fig.add_subplot(121)\n",
324+
"ax2 = fig.add_subplot(122)\n",
325+
"\n",
326+
"x, y = ECDF(samples_mv['like'][:, 0])\n",
327+
"ax1.plot(x, y, label='ppc')\n",
328+
"x, y = ECDF(df.iloc[scandens_idx]['beak_depth'])\n",
329+
"ax1.plot(x, y, label='data')\n",
330+
"ax1.set_title('beak depth')\n",
331+
"ax1.legend()\n",
332+
"\n",
333+
"x, y = ECDF(samples_mv['like'][:, 1])\n",
334+
"ax2.plot(x, y, label='ppc')\n",
335+
"x, y = ECDF(df.iloc[scandens_idx]['beak_length'])\n",
336+
"ax2.plot(x, y, label='data')\n",
337+
"ax2.set_title('beak length')\n",
338+
"ax2.legend()"
339+
]
340+
},
341+
{
342+
"cell_type": "code",
343+
"execution_count": null,
344+
"metadata": {},
345+
"outputs": [],
346+
"source": [
347+
"fig = plt.figure()\n",
348+
"ax = fig.add_subplot(111)\n",
349+
"\n",
350+
"x, y = ECDF(trace_mv['sigma'][:, 0, 1])\n",
351+
"ax.plot(x, y, label='samples')\n",
352+
"x, y = ECDF(df.iloc[scandens_idx]['shape'])\n",
353+
"ax.plot(x, y)"
354+
]
355+
},
356+
{
357+
"cell_type": "markdown",
358+
"metadata": {},
359+
"source": [
360+
"Maybe the right way to compute shape is to regress depth on length, and compute the slope. After all, that's all that depth/length really is.\n",
361+
"\n",
362+
"We will assume a model: $y=mx$, no intercept."
363+
]
364+
},
365+
{
366+
"cell_type": "code",
367+
"execution_count": null,
368+
"metadata": {},
369+
"outputs": [],
370+
"source": [
371+
"with pm.Model() as shape_model:\n",
372+
" shape = pm.Normal('shape', mu=0, sd=100)\n",
373+
" sd = pm.HalfCauchy('sd', beta=100)\n",
374+
" \n",
375+
" mu = shape * df.iloc[scandens_idx]['beak_length'].values\n",
376+
" \n",
377+
" like = pm.Normal('like', mu=mu, sd=sd, observed=df.iloc[scandens_idx]['beak_depth'].values)"
378+
]
379+
},
380+
{
381+
"cell_type": "code",
382+
"execution_count": null,
383+
"metadata": {},
384+
"outputs": [],
385+
"source": [
386+
"with shape_model:\n",
387+
" trace_shape = pm.sample(2000)"
388+
]
389+
},
390+
{
391+
"cell_type": "code",
392+
"execution_count": null,
393+
"metadata": {},
394+
"outputs": [],
395+
"source": [
396+
"fig = plt.figure()\n",
397+
"ax = fig.add_subplot(111)\n",
398+
"\n",
399+
"x, y = ECDF(trace_shape['shape'])\n",
400+
"ax.plot(x, y, label='sample')\n",
401+
"x, y = ECDF(df.iloc[scandens_idx]['shape'].values)\n",
402+
"ax.plot(x, y, label='data')\n",
403+
"ax.legend()"
404+
]
405+
},
406+
{
407+
"cell_type": "markdown",
408+
"metadata": {},
409+
"source": [
410+
"I have the model mis-specified - I get the posterior distribution over the slope, but not the distribution of shapes. I guess shapes and slopes are kind of different. \n",
411+
"\n",
412+
"Let's try just estimating shape directly."
413+
]
414+
},
415+
{
416+
"cell_type": "code",
417+
"execution_count": null,
418+
"metadata": {},
419+
"outputs": [],
420+
"source": [
421+
"with pm.Model() as shape_model:\n",
422+
" mu = pm.HalfNormal('mu', sd=100)\n",
423+
" sd = pm.HalfCauchy('sd', beta=100)\n",
424+
" \n",
425+
" like = pm.Normal('shape', mu=mu, sd=sd, observed=df.iloc[scandens_idx]['shape'].values)"
426+
]
427+
},
428+
{
429+
"cell_type": "code",
430+
"execution_count": null,
431+
"metadata": {},
432+
"outputs": [],
433+
"source": [
434+
"with shape_model:\n",
435+
" trace = pm.sample(2000)"
436+
]
437+
},
438+
{
439+
"cell_type": "code",
440+
"execution_count": null,
441+
"metadata": {},
442+
"outputs": [],
443+
"source": [
444+
"samples = pm.sample_ppc(trace, model=shape_model)"
445+
]
446+
},
447+
{
448+
"cell_type": "code",
449+
"execution_count": null,
450+
"metadata": {},
451+
"outputs": [],
452+
"source": [
453+
"fig = plt.figure()\n",
454+
"ax = fig.add_subplot(111)\n",
455+
"\n",
456+
"x, y = ECDF(samples['shape'])\n",
457+
"ax.plot(x, y, label='samples')\n",
458+
"x, y = ECDF(df.iloc[scandens_idx]['shape'])\n",
459+
"ax.plot(x, y, label='data')\n",
460+
"ax.legend()"
461+
]
462+
},
463+
{
464+
"cell_type": "markdown",
465+
"metadata": {},
466+
"source": [
467+
"As it turns out, the simplest model is the best fitting one..."
468+
]
312469
}
313470
],
314471
"metadata": {

0 commit comments

Comments
 (0)