added multivariate model

ericmjl · ericmjl · commit 15dc75ea2e47 · 2018-07-10T09:57:48.000-05:00
diff --git a/notebooks/bonus-exploration-finches.ipynb b/notebooks/bonus-exploration-finches.ipynb
@@ -78,19 +78,17 @@
     "    nu = pm.Exponential('nu', lam=1/29.) + 1\n",
     "    \n",
     "    # Define the likelihood distribution for the data.\n",
-    "    depth = pm.StudentT('depth', \n",
+    "    depth = pm.StudentT('beak_depth', \n",
     "                        nu=nu,\n",
     "                        mu=mean_depth[df['species_enc']], \n",
     "                        sd=sd_depth[df['species_enc']], \n",
     "                        observed=df['beak_depth'])\n",
     "    \n",
-    "    length = pm.StudentT('length',\n",
+    "    length = pm.StudentT('beak_length',\n",
     "                         nu=nu,\n",
     "                         mu=mean_length[df['species_enc']],\n",
     "                         sd=sd_length[df['species_enc']],\n",
-    "                         observed=df['beak_length'])\n",
-    "    \n",
-    "    shape = pm.Deterministic('shape', depth / length)"
+    "                         observed=df['beak_length'])"
    ]
   },
   {
@@ -131,6 +129,73 @@
     "samples"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "PPC check for Fortis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plt.figure()\n",
+    "ax1 = fig.add_subplot(121)\n",
+    "ax2 = fig.add_subplot(122, sharex=ax1)\n",
+    "\n",
+    "def plot_ppc_data(samples, df, idxs, column, ax):\n",
+    "    x, y = ECDF(samples[column][:, idxs].flatten())\n",
+    "    ax.plot(x, y, label='ppc')\n",
+    "    x, y = ECDF(df.iloc[idxs][column])\n",
+    "    ax.plot(x, y, label='data')\n",
+    "    ax.set_xlabel(column)\n",
+    "    ax.set_ylabel('cumulative fraction')\n",
+    "    return ax\n",
+    "\n",
+    "ax1 = plot_ppc_data(samples, df, fortis_idx, 'beak_depth', ax1)\n",
+    "ax2 = plot_ppc_data(samples, df, fortis_idx, 'beak_length', ax2)\n",
+    "\n",
+    "fig.suptitle('Fortis')\n",
+    "plt.tight_layout()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "PPC check for Scandens"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plt.figure()\n",
+    "ax1 = fig.add_subplot(121)\n",
+    "ax2 = fig.add_subplot(122, sharex=ax1)\n",
+    "\n",
+    "ax1 = plot_ppc_data(samples, df, scandens_idx, 'beak_depth', ax1)\n",
+    "ax2 = plot_ppc_data(samples, df, scandens_idx, 'beak_length', ax2)\n",
+    "\n",
+    "fig.suptitle('Scandens')\n",
+    "plt.tight_layout()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.hist(samples['beak_depth'].flatten())\n",
+    "plt.hist(samples['beak_length'].flatten())"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -139,7 +204,7 @@
    "source": [
     "fig = plt.figure()\n",
     "ax = fig.add_subplot(111)\n",
-    "x, y = ECDF((samples['depth'][:, fortis_idx] / samples['length'][:, fortis_idx]).flatten())\n",
+    "x, y = ECDF((samples['beak_depth'][:, fortis_idx] / samples['beak_length'][:, fortis_idx]).flatten())\n",
     "ax.plot(x, y)\n",
     "x, y = ECDF(df.loc[fortis_idx, 'shape'])\n",
     "ax.plot(x, y)"
@@ -151,12 +216,91 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fig = plt.figure()\n",
-    "ax = fig.add_subplot(111)\n",
-    "x, y = ECDF(df['shape'])\n",
-    "ax.plot(x, y, label='data')\n",
-    "# x, y = ECDF(trace['shape'][0, :])\n",
-    "# ax.plot(x, y, label='posterior')\n"
+    "fig = plt.figure(figsize=(12, 4))\n",
+    "\n",
+    "def plot_length_depth_scatter(df, idxs, title, ax):\n",
+    "    ax.scatter(df.iloc[idxs]['beak_length'], df.iloc[idxs]['beak_depth'])\n",
+    "    ax.set_xlabel('beak_length')\n",
+    "    ax.set_ylabel('beak_depth')\n",
+    "    ax.set_title(title)\n",
+    "    return ax\n",
+    "\n",
+    "\n",
+    "ax1 = fig.add_subplot(121)\n",
+    "ax1 = plot_length_depth_scatter(df, scandens_idx, 'scandens', ax1)\n",
+    "\n",
+    "ax2 = fig.add_subplot(122, sharex=ax1, sharey=ax1)\n",
+    "ax2 = plot_length_depth_scatter(df, fortis_idx, 'fortis', ax2)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Going to try a new model: we explicity model depth and length jointly, as a multivariate gaussian."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with pm.Model() as mv_beaks:  # multivariate beak model\n",
+    "    packed_L = pm.LKJCholeskyCov('packed_L', n=2,\n",
+    "                             eta=2., sd_dist=pm.HalfCauchy.dist(2.5))\n",
+    "    L = pm.expand_packed_triangular(2, packed_L)\n",
+    "    sigma = pm.Deterministic('sigma', L.dot(L.T))\n",
+    "\n",
+    "    mu = pm.HalfNormal('mu', sd=20, shape=(2,))\n",
+    "    \n",
+    "    like = pm.MvNormal('like', mu=mu, cov=sigma, observed=df[['beak_depth', 'beak_length']].values)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with mv_beaks:\n",
+    "    trace_mv = pm.sample(2000, njobs=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pm.traceplot(trace_mv)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pm.forestplot(trace_mv, varnames=['sigma'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pm.forestplot(trace_mv, varnames=['mu'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "samples_mv = pm.sample_ppc(trace, model=mv_beaks)"
    ]
   },
   {