|
254 | 254 | "\n", |
255 | 255 | " mu = pm.HalfNormal('mu', sd=20, shape=(2,))\n", |
256 | 256 | " \n", |
257 | | - " like = pm.MvNormal('like', mu=mu, cov=sigma, observed=df[['beak_depth', 'beak_length']].values)" |
| 257 | + " like = pm.MvNormal('like', mu=mu, cov=sigma, observed=df.iloc[scandens_idx][['beak_depth', 'beak_length']].values)" |
258 | 258 | ] |
259 | 259 | }, |
260 | 260 | { |
|
300 | 300 | "metadata": {}, |
301 | 301 | "outputs": [], |
302 | 302 | "source": [ |
303 | | - "samples_mv = pm.sample_ppc(trace, model=mv_beaks)" |
| 303 | + "samples_mv = pm.sample_ppc(trace_mv, model=mv_beaks)" |
304 | 304 | ] |
305 | 305 | }, |
306 | 306 | { |
307 | 307 | "cell_type": "code", |
308 | 308 | "execution_count": null, |
309 | 309 | "metadata": {}, |
310 | 310 | "outputs": [], |
311 | | - "source": [] |
| 311 | + "source": [ |
| 312 | + "samples_mv['like'][:, 0] # beak_depth\n", |
| 313 | + "samples_mv['like'][:, 1] # beak_length" |
| 314 | + ] |
| 315 | + }, |
| 316 | + { |
| 317 | + "cell_type": "code", |
| 318 | + "execution_count": null, |
| 319 | + "metadata": {}, |
| 320 | + "outputs": [], |
| 321 | + "source": [ |
| 322 | + "fig = plt.figure()\n", |
| 323 | + "ax1 = fig.add_subplot(121)\n", |
| 324 | + "ax2 = fig.add_subplot(122)\n", |
| 325 | + "\n", |
| 326 | + "x, y = ECDF(samples_mv['like'][:, 0])\n", |
| 327 | + "ax1.plot(x, y, label='ppc')\n", |
| 328 | + "x, y = ECDF(df.iloc[scandens_idx]['beak_depth'])\n", |
| 329 | + "ax1.plot(x, y, label='data')\n", |
| 330 | + "ax1.set_title('beak depth')\n", |
| 331 | + "ax1.legend()\n", |
| 332 | + "\n", |
| 333 | + "x, y = ECDF(samples_mv['like'][:, 1])\n", |
| 334 | + "ax2.plot(x, y, label='ppc')\n", |
| 335 | + "x, y = ECDF(df.iloc[scandens_idx]['beak_length'])\n", |
| 336 | + "ax2.plot(x, y, label='data')\n", |
| 337 | + "ax2.set_title('beak length')\n", |
| 338 | + "ax2.legend()" |
| 339 | + ] |
| 340 | + }, |
| 341 | + { |
| 342 | + "cell_type": "code", |
| 343 | + "execution_count": null, |
| 344 | + "metadata": {}, |
| 345 | + "outputs": [], |
| 346 | + "source": [ |
| 347 | + "fig = plt.figure()\n", |
| 348 | + "ax = fig.add_subplot(111)\n", |
| 349 | + "\n", |
| 350 | + "x, y = ECDF(trace_mv['sigma'][:, 0, 1])\n", |
| 351 | + "ax.plot(x, y, label='samples')\n", |
| 352 | + "x, y = ECDF(df.iloc[scandens_idx]['shape'])\n", |
| 353 | + "ax.plot(x, y)" |
| 354 | + ] |
| 355 | + }, |
| 356 | + { |
| 357 | + "cell_type": "markdown", |
| 358 | + "metadata": {}, |
| 359 | + "source": [ |
| 360 | + "Maybe the right way to compute shape is to regress depth on length, and compute the slope. After all, that's all that depth/length really is.\n", |
| 361 | + "\n", |
| 362 | + "We will assume a model: $y=mx$, no intercept." |
| 363 | + ] |
| 364 | + }, |
| 365 | + { |
| 366 | + "cell_type": "code", |
| 367 | + "execution_count": null, |
| 368 | + "metadata": {}, |
| 369 | + "outputs": [], |
| 370 | + "source": [ |
| 371 | + "with pm.Model() as shape_model:\n", |
| 372 | + " shape = pm.Normal('shape', mu=0, sd=100)\n", |
| 373 | + " sd = pm.HalfCauchy('sd', beta=100)\n", |
| 374 | + " \n", |
| 375 | + " mu = shape * df.iloc[scandens_idx]['beak_length'].values\n", |
| 376 | + " \n", |
| 377 | + " like = pm.Normal('like', mu=mu, sd=sd, observed=df.iloc[scandens_idx]['beak_depth'].values)" |
| 378 | + ] |
| 379 | + }, |
| 380 | + { |
| 381 | + "cell_type": "code", |
| 382 | + "execution_count": null, |
| 383 | + "metadata": {}, |
| 384 | + "outputs": [], |
| 385 | + "source": [ |
| 386 | + "with shape_model:\n", |
| 387 | + " trace_shape = pm.sample(2000)" |
| 388 | + ] |
| 389 | + }, |
| 390 | + { |
| 391 | + "cell_type": "code", |
| 392 | + "execution_count": null, |
| 393 | + "metadata": {}, |
| 394 | + "outputs": [], |
| 395 | + "source": [ |
| 396 | + "fig = plt.figure()\n", |
| 397 | + "ax = fig.add_subplot(111)\n", |
| 398 | + "\n", |
| 399 | + "x, y = ECDF(trace_shape['shape'])\n", |
| 400 | + "ax.plot(x, y, label='sample')\n", |
| 401 | + "x, y = ECDF(df.iloc[scandens_idx]['shape'].values)\n", |
| 402 | + "ax.plot(x, y, label='data')\n", |
| 403 | + "ax.legend()" |
| 404 | + ] |
| 405 | + }, |
| 406 | + { |
| 407 | + "cell_type": "markdown", |
| 408 | + "metadata": {}, |
| 409 | + "source": [ |
| 410 | + "I have the model mis-specified - I get the posterior distribution over the slope, but not the distribution of shapes. I guess shapes and slopes are kind of different. \n", |
| 411 | + "\n", |
| 412 | + "Let's try just estimating shape directly." |
| 413 | + ] |
| 414 | + }, |
| 415 | + { |
| 416 | + "cell_type": "code", |
| 417 | + "execution_count": null, |
| 418 | + "metadata": {}, |
| 419 | + "outputs": [], |
| 420 | + "source": [ |
| 421 | + "with pm.Model() as shape_model:\n", |
| 422 | + " mu = pm.HalfNormal('mu', sd=100)\n", |
| 423 | + " sd = pm.HalfCauchy('sd', beta=100)\n", |
| 424 | + " \n", |
| 425 | + " like = pm.Normal('shape', mu=mu, sd=sd, observed=df.iloc[scandens_idx]['shape'].values)" |
| 426 | + ] |
| 427 | + }, |
| 428 | + { |
| 429 | + "cell_type": "code", |
| 430 | + "execution_count": null, |
| 431 | + "metadata": {}, |
| 432 | + "outputs": [], |
| 433 | + "source": [ |
| 434 | + "with shape_model:\n", |
| 435 | + " trace = pm.sample(2000)" |
| 436 | + ] |
| 437 | + }, |
| 438 | + { |
| 439 | + "cell_type": "code", |
| 440 | + "execution_count": null, |
| 441 | + "metadata": {}, |
| 442 | + "outputs": [], |
| 443 | + "source": [ |
| 444 | + "samples = pm.sample_ppc(trace, model=shape_model)" |
| 445 | + ] |
| 446 | + }, |
| 447 | + { |
| 448 | + "cell_type": "code", |
| 449 | + "execution_count": null, |
| 450 | + "metadata": {}, |
| 451 | + "outputs": [], |
| 452 | + "source": [ |
| 453 | + "fig = plt.figure()\n", |
| 454 | + "ax = fig.add_subplot(111)\n", |
| 455 | + "\n", |
| 456 | + "x, y = ECDF(samples['shape'])\n", |
| 457 | + "ax.plot(x, y, label='samples')\n", |
| 458 | + "x, y = ECDF(df.iloc[scandens_idx]['shape'])\n", |
| 459 | + "ax.plot(x, y, label='data')\n", |
| 460 | + "ax.legend()" |
| 461 | + ] |
| 462 | + }, |
| 463 | + { |
| 464 | + "cell_type": "markdown", |
| 465 | + "metadata": {}, |
| 466 | + "source": [ |
| 467 | + "As it turns out, the simplest model is the best fitting one..." |
| 468 | + ] |
312 | 469 | } |
313 | 470 | ], |
314 | 471 | "metadata": { |
|
0 commit comments