From 6faff67c5931b6f3b98e9f7440fc0814e727df89 Mon Sep 17 00:00:00 2001 From: BaileeEgan Date: Fri, 17 Nov 2017 13:21:30 -0500 Subject: [PATCH 1/2] Added exercise 1 --- exercise1.ipynb | 718 +++ exercise1_output.html | 12802 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 13520 insertions(+) create mode 100755 exercise1.ipynb create mode 100755 exercise1_output.html diff --git a/exercise1.ipynb b/exercise1.ipynb new file mode 100755 index 0000000..54f6ee4 --- /dev/null +++ b/exercise1.ipynb @@ -0,0 +1,718 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Step-by-step Creating a maximum likelihood t-test in Python\n", + "\n", + "Hi, this is a tutorial on how to code a t-test using maximum likelihood in Python. This tutorial will explain how to create separate negative log-likelihood functions for the null and alternate model as well as produce a t-test that compares these models.\n", + "\n", + "## Stats language:\n", + "\n", + "Before we jump into the code, we need to define the statistical models for the t-test. The two models are the **null model** and the **alternative model**. \n", + "\n", + "In the null model of a t-test, we hypothesize that there is no difference between the means of two populations. We describe the model using the equation:\n", + "\n", + "$$y = \\beta_0 + ε$$\n", + "\n", + "In the alternative model, we hypothesize that there is a significance between the means.\n", + "\n", + "$$y = \\beta_0 + \\beta_1x + ε$$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Complete code:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas\n", + "import numpy\n", + "import scipy.stats\n", + "from scipy.optimize import minimize\n", + "from plotnine import *\n", + "\n", + "#Import our data set\n", + "data = pandas.read_csv(\"chickwts.txt\")\n", + "\n", + "#Graph a summary plot of means for each seed type\n", + "ggplot(data=data) + geom_bar(aes(x='factor(feed)', y='weight'), stat = \"summary\", fun_y = numpy.mean) + theme_classic() + xlab(\"feed type\") + ylab(\"mean weight\")\n", + "\n", + "#A function to return the negative log-likelihood for a given observation\n", + "def nllike (p, obs):\n", + " B0 = p[0]\n", + " B1 = p[1]\n", + " sigma = p[2]\n", + " expected = B0 + B1 * obs.x\n", + " nll = -1 * scipy.stats.norm(expected, sigma).logpdf(obs.y).sum()\n", + " return nll\n", + "\n", + "#A function to return the negative log-likelihood for the null model\n", + "def nllike_null (p, obs):\n", + " B0 = p[0]\n", + " sigma = p[1]\n", + " expected = B0\n", + " nll = -1 * scipy.stats.norm(expected, sigma).logpdf(obs.y).sum()\n", + " return nll\n", + "\n", + "#function for returning p-value for t-test\n", + "def ttest (data, group1, group2, x, y):\n", + " #define a temporary slice of the data\n", + " temp_df = data[(data[x]==group1) | (data[x]==group2)]\n", + " \n", + " #Make new column 'x' set as 0 or 1 based on group\n", + " temp_df[\"x\"] = 0\n", + " temp_df[\"x\"][temp_df[x] == group2] = 1\n", + " temp_df[\"y\"] = temp_df[y]\n", + " \n", + " # y = B0 + B1*x + E\n", + " model = minimize(nllike, [1, 1, 1], method = \"Nelder-Mead\", options={'disp': True}, args = temp_df)\n", + " \n", + " # y = B0 + E\n", + " null_model = minimize(nllike_null, [1, 1], method = \"Nelder-Mead\", options={'disp': True}, args = temp_df)\n", + " \n", + " #Get differences in fit\n", + " D = (null_model.fun - model.fun) * 2\n", + " \n", + " #Use chi3.sf() for returning p-value\n", + " p = scipy.stats.chi2.sf(D,1)\n", + " \n", + " #Print results\n", + " print (\"-----------------------------\")\n", + " print (group1 + \" vs. \" + group2)\n", + " print (\"p-value = \" + str(p))\n", + " if p <= 0.05:\n", + " print (\"Significance\")\n", + " else:\n", + " print (\"No significance\")\n", + " print (\"-----------------------------\")\n", + "\n", + "#Perform t-tests\n", + "ttest(data, group1=\"horsebean\", group2=\"casein\", x=\"feed\", y=\"weight\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Importing our tools and dataset\n", + "\n", + "We import pandas, numpy, scipy.stats, minimize from scipy.optimize, and all functions from plotnine (this is different than *import plotnine*).\n", + "\n", + "We then use pandas.read_csv() to import a text file as a pandas Dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import pandas\n", + "import numpy\n", + "import scipy.stats\n", + "from scipy.optimize import minimize\n", + "from plotnine import *\n", + "\n", + "#Import our data set\n", + "data = pandas.read_csv(\"chickwts.txt\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can check whether the data have been loaded. The dataframe contains 70 rows of feed types and the chick weights produced from that type of feed." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " weight feed\n", + "0 179 horsebean\n", + "1 160 horsebean\n", + "2 136 horsebean\n", + "3 227 horsebean\n", + "4 217 horsebean\n", + "5 168 horsebean\n", + "6 108 horsebean\n", + "7 124 horsebean\n", + "8 143 horsebean\n", + "9 140 horsebean\n", + "10 309 linseed\n", + "11 229 linseed\n", + "12 181 linseed\n", + "13 141 linseed\n", + "14 260 linseed\n", + "15 203 linseed\n", + "16 148 linseed\n", + "17 169 linseed\n", + "18 213 linseed\n", + "19 257 linseed\n", + "20 244 linseed\n", + "21 271 linseed\n", + "22 243 soybean\n", + "23 230 soybean\n", + "24 248 soybean\n", + "25 327 soybean\n", + "26 329 soybean\n", + "27 250 soybean\n", + "28 193 soybean\n", + "29 271 soybean\n", + ".. ... ...\n", + "41 226 sunflower\n", + "42 320 sunflower\n", + "43 295 sunflower\n", + "44 334 sunflower\n", + "45 322 sunflower\n", + "46 297 sunflower\n", + "47 318 sunflower\n", + "48 325 meatmeal\n", + "49 257 meatmeal\n", + "50 303 meatmeal\n", + "51 315 meatmeal\n", + "52 380 meatmeal\n", + "53 153 meatmeal\n", + "54 263 meatmeal\n", + "55 242 meatmeal\n", + "56 206 meatmeal\n", + "57 344 meatmeal\n", + "58 258 meatmeal\n", + "59 368 casein\n", + "60 390 casein\n", + "61 379 casein\n", + "62 260 casein\n", + "63 404 casein\n", + "64 318 casein\n", + "65 352 casein\n", + "66 359 casein\n", + "67 216 casein\n", + "68 222 casein\n", + "69 283 casein\n", + "70 332 casein\n", + "\n", + "[71 rows x 2 columns]\n" + ] + } + ], + "source": [ + "print data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can visualize the data using the following line of code, which creates a bar graph of mean chick weights by feed type." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhoAAAFzCAYAAAB4qqApAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3Xl8Tdf+//H3yTxJiQgxJeEiiKpQ2pqLXpevqVKX1FhT\n8f0qV6vVVlvtbd1LW1XT1ZZLG0FbOperLaqUqqS01DwkQSSIMfNJ1u8PP+c2DZVDtki8no9HH4+e\ns/bZ+7NWTrZ39tqDzRhjBAAAYAGXki4AAACUXQQNAABgGYIGAACwDEEDAABYhqABAAAsQ9AAAACW\nIWgAAADLEDQAAIBlCBoAAMAyBA0AAGCZ2y5oZGRkKD4+XhkZGSVdCgAAZd5tFzT27Nmjpk2bas+e\nPSVdCgAAZd5tFzQAAMDNQ9AAAACWIWgAAADLEDQAAIBlCBoAAMAyBA0AAGAZggYAALAMQQMAAFiG\noAEAACxD0AAAAJYhaAAAAMsQNAAAgGUIGgAAwDJuJV0AAAA3W3R0dEmXcEuJjY21bN0c0QAAAJYh\naAAAAMsQNAAAgGUIGgAAwDKcDHoNnDBUkJUnDAEAyh6OaAAAAMsQNAAAgGUIGgAAwDIEDQAAYBmC\nBgAAsAxBAwAAWIagAQAALEPQAAAAliFoAAAAyxA0AACAZQgaAADAMgQNAABgGYIGAACwDEEDAABY\n5pZ4TPzs2bO1bds2ZWZmqly5cnrggQfUp08fSVJCQoJmzZqlI0eOqHLlyhoxYoQaN27s+OymTZu0\nePFipaWlKTw8XGPHjlVQUFBJdQUAAPzGLXFEo3v37po3b56WL1+uqVOn6ttvv9XGjRtlt9v10ksv\nqXnz5lq6dKn69u2rqVOn6uzZs5KkpKQkzZw5U6NGjdKSJUsUGhqqadOmlXBvAADAZbdE0KhZs6a8\nvb0dr202m44fP65ffvlF2dnZioqKkru7u1q3bq2QkBBt2rRJkrR+/XpFRkaqSZMm8vT0VHR0tI4c\nOaLExMSS6goAAPiNW2LqRJIWL16szz//XNnZ2QoKClL79u31/fffKzQ0VC4u/81DYWFhSkhIkHRp\nWqVOnTqONh8fH1WpUkUJCQmqWbPmTe8DAAAo6JYJGoMGDdLAgQN14MABbdmyRb6+vsrMzJSvr2+B\n5Xx9fZWamipJysrKKtTu4+OjzMzMAu8lJycrOTlZkrR7924LewEAAH7rlgka0qUpkzp16iguLk5L\nly5VYGCg0tPTCyyTnp7umGbx8vJSRkbGVdsvmz9/vqZMmWJt8QAAoJBb4hyN38vPz1dycrJq1qyp\nhIQE5efnO9oOHTqkkJAQSVJISIgOHTrkaMvIyFBKSoqj/bKRI0cqLi5OcXFxiomJuTmdAAAAJR80\nLl68qHXr1ikjI0P5+fn69ddftWrVKt11111q1KiRPDw8tHLlSuXm5mrjxo1KTExUy5YtJUnt2rVT\nfHy8tm/frpycHMXGxio0NLTQ+RnBwcGKjIxUZGSk6tevXxLdBADgtnRLTJ18/fXXeuutt5Sfn6+A\ngAD17NlTXbt2lc1m07PPPqvZs2dr2bJlCgoK0qRJk1S+fHlJUo0aNTR27FjNmTNHZ86cUb169TRx\n4sQS7g0AALisxIOGn5+fXn755au2h4aG6tVXX71qe6tWrdSqVSsrSgMAADeoxKdOAABA2UXQAAAA\nliFoAAAAyxA0AACAZQgaAADAMgQNAABgGYIGAACwDEEDAABYhqABAAAsQ9AAAACWIWgAAADLEDQA\nAIBlCBoAAMAyBA0AAGCZEn9MPAD8kejo6JIu4ZYSGxtb0iUATuGIBgAAsAxBAwAAWIagAQAALEPQ\nAAAAliFoAAAAyxA0AACAZQgaAADAMgQNAABgGYIGAACwDEEDAABYhqABAAAsQ9AAAACWIWgAAADL\nEDQAAIBlCBoAAMAyBA0AAGAZggYAALAMQQMAAFiGoAEAACxD0AAAAJYhaAAAAMsQNAAAgGUIGgAA\nwDIEDQAAYBmCBgAAsIxbSRcAACge0dHRJV3CLSU2NrakS4A4ogEAACxE0AAAAJYhaAAAAMsQNAAA\ngGUIGgAAwDIEDQAAYBmCBgAAsAxBAwAAWIagAQAALEPQAAAAliFoAAAAyxA0AACAZQgaAADAMgQN\nAABgGYIGAACwDEEDAABYhqABAAAsQ9AAAACWIWgAAADLOB00XnzxRR0/fvyKbcnJyXrxxRdvuCgA\nAFA2OB00pkyZoqNHj16x7fjx45oyZcoNFwUAAMoGp4OGMUY2m+2KbcnJySpfvvwNFwUAAMoGt6Is\ntHTpUi1dulSSZLPZNGHChEKBIisrS9u2bVPLli2Lv0oAAFAqFSlo5OTk6MKFC5IuHdFIT0+Xq6tr\ngWU8PDw0cOBATZw4sfirBMqY6Ojoki7hlhIbG1vSJQCwSJGCxqBBgzRo0CBJUvv27TVv3jyFh4db\nWhgAACj9ihQ0fmvdunVW1AEAAMogp4OGJO3du1crVqzQ0aNHlZWVVaDNZrNpwYIFxVIcAAAo3ZwO\nGu+9956GDBkiLy8vhYSEyMPDo0D71a5IAQAAtx+ng8ZLL72kqKgoLVy4UD4+PlbUBAAAygin76Nx\n/PhxDR8+nJABAACuyemg0aZNG+3cudOKWgAAQBlTpKmTtLQ0x/+/8sor6t+/v7y8vNSpU6cr3gk0\nICCg+CoEAAClVpGCRmBgYIGTPI0xGjVq1FVP/MzLyyue6gAAQKlWpKCxcOFCriYBAABOK1LQGDx4\nsGUF5Obm6l//+pd27NihCxcuKDAwUH369FHbtm0lSQkJCZo1a5aOHDmiypUra8SIEWrcuLHj85s2\nbdLixYuVlpam8PBwjR07VkFBQZbVCwAAis7pk0GLW15engICAvT3v/9dy5Yt05gxYzRv3jzt2bNH\ndrtdL730kpo3b66lS5eqb9++mjp1qs6ePStJSkpK0syZMzVq1CgtWbJEoaGhmjZtWgn3CAAAXOb0\nfTTCwsKuOo3i4uKiO+64Q3fddZfGjBmjyMjIa67Py8tLDz/8sON1gwYNVL9+fe3evVuZmZnKzs5W\nVFSUXFxc1Lp1a33++efatGmTunbtqvXr1ysyMlJNmjSRdOlBVQMHDlRiYqJq1qzpbNcAAEAxc/qI\nRo8ePZSXl6czZ84oMjJSnTt3VmRkpM6cOaPc3Fw1btxYGzZs0D333KOvv/7a6YKysrJ04MABhYSE\nKDExUaGhoXJx+W+ZYWFhSkhIkHRpWiUsLMzR5uPjoypVqjjaAQBAyXL6iEZoaKhCQkK0atUq+fr6\nOt6/ePGiunTpovDwcM2fP19dunTR888/r44dOxZ53fn5+XrjjTdUp04dNWnSRPv27SuwDUny9fVV\namqqpEuh5PftPj4+yszMLPBecnKykpOTJUm7d+92qr8AAOD6OX1EY8aMGZo4cWKhf+D9/Pz0xBNP\n6M0335S7u7tGjRqlHTt2FHm9xhjNnTtXaWlpeuKJJ2Sz2eTt7a309PQCy6Wnp8vb21vSpWmXjIyM\nq7ZfNn/+fDVt2lRNmzZV//79nekuAAC4AU4HjVOnTun8+fNXbDt37pzOnDkjybmbdhlj9K9//UuH\nDx/WCy+84AgKNWvWVEJCgvLz8x3LHjp0SCEhIZKkkJAQHTp0yNGWkZGhlJQUR/tlI0eOVFxcnOLi\n4hQTE1PkugAAwI1xOmi0b99eTz31lL7//vsC72/cuFGTJk3S/fffL+nSo+RDQ0OLtM758+dr7969\nmjJlSoFnqDRq1EgeHh5auXKlcnNztXHjRiUmJqply5aSpHbt2ik+Pl7bt29XTk6OYmNjFRoaWuhE\n0ODgYEVGRioyMlL169d3tssAAOA6OX2Oxvz589W9e3e1bt1a5cuXV6VKlXTy5EmdPXtWTZo00fz5\n8yVdugLlySefvOb6UlNT9eWXX8rd3V2PPPKI4/2oqCj16dNHzz77rGbPnq1ly5YpKChIkyZNctz2\nvEaNGho7dqzmzJmjM2fOqF69epo4caKzXQIAABZxOmhUq1ZNcXFx+vLLL7Vt2zYlJycrODhYd999\nt/7yl784lhs+fHiR1hcUFKRPP/30qu2hoaF69dVXr9reqlUrtWrVqugdAAAAN43TQeOyLl26qEuX\nLsVZCwAAKGOK/PTW8uXLy8XFpcCTXK+Gp7cCAACpiEGjUqVK2rx5s5o3b17oSa5XwtNbAQCA5MTT\nW2vXru34f57kCgAAiqJIQWPQoEGO/7fySa4AAKBsue6nt545c0bfffedYmNjHTfpysrKKnBzLQAA\ncHtzOmjk5+fr6aefVo0aNdS2bVsNGDBAhw8fliQ9+OCDeumll4q9SAAAUDo5HTSee+45zZ49W6+9\n9pr27dsnY4yjrXv37vrss8+KtUAAAFB6OX0fjUWLFumVV17RyJEjC11dUrt2bR08eLDYigMAAKWb\n00c0Tp8+fdXnheTl5Sk3N/eGiwIAAGWD00Gjbt26+uqrr67Ytn79ekVERNxwUQAAoGxweupk/Pjx\nGj58uNzd3RUVFSVJOnr0qDZv3qw333xTixYtKu4aAQBAKeV00Bg8eLDS0tL0wgsv6JVXXpEk9ezZ\nU76+vvr73/+uPn36FHuRAACgdLquh6r97W9/04gRI/T999/r1KlTCggI0L333qs77rijuOsDAACl\nmNNBwxgjm80mPz8/PfDAA1bUBAAAyging0aFChXUsmVLtWnTRm3atNHdd98tN7frfto8AAAow5y+\n6uSf//ynypcvrzlz5qhly5a64447dP/99+uFF17Q2rVrlZmZaUWdAACgFHI6aIwcOVJLlixRYmKi\nDh48qLlz5yosLEyxsbHq1KmTKlSoYEWdAACgFLruh6pJkt1ul91uV05OjrKysmSMUa1atYqrNgAA\nUMo5fXLFvHnztGHDBm3YsEEpKSmKiIhQmzZtNGPGDLVp00aVKlWyok4AAFAKOR00xowZI29vbw0b\nNkwTJ05UtWrVrKgLAACUAU5PnUyfPl0dO3ZUTEyMatWqpfvuu09PPfWUVq1apQsXLlhRIwAAKKWc\nDhoTJkzQJ598otOnT+vHH3/Uww8/rMOHD2vYsGEKCAhQs2bNrKgTAACUQjd0MmhgYKACAgJUoUIF\n+fv7Ky8vTz///HNx1QYAAEo5p8/R+Pe//+04GfTIkSNyc3NTs2bN1LNnT7Vp00atWrWyok4AAFAK\nOR00/vd//1ctWrTQgAED1KZNG917773y9va2ojYAAFDKOR00zp49K3d3dytqAQAAZYzT52gQMgAA\nQFHd0MmgAAAAf4THrqLYREdHl3QJt5TY2NiSLgEAShxHNAAAgGUIGgAAwDI3NHWSmpqqrKysQu/X\nrFnzRlYLAADKCKeDxunTp/V///d/WrlypXJzcwu0GWNks9mUl5dXbAUCAIDSy+mgMWzYMH377bea\nNGmSGjRoIA8PDyvqAgAAZYDTQWPdunV68803NXDgQCvqAQAAZYjTJ4OWL19egYGBVtQCAADKGKeD\nxsSJEzVr1izZ7XYr6gEAAGWI01Mnu3fv1q+//qratWurbdu2Kl++fIF2m82mmTNnFluBAACg9HI6\naHz++edycbl0IOS7774r1E7QAAAAlzkdNA4fPmxFHQAAoAzizqAAAMAy131n0AMHDmjfvn1XvDPo\ngw8+eENFAQCAssHpoHH+/Hn16tVL69evl3TpbqDSpXMzLuPOoAAAQLqOqZMnn3xSJ06c0HfffSdj\njD766COtX79eQ4cOVVhYmLZs2WJFnQAAoBRyOmisXr1azzzzjFq0aCFJqlq1qtq0aaO33npLPXr0\n0GuvvVbsRQIAgNLJ6aCRmpqqGjVqyNXVVb6+vjp9+rSjrUuXLlq9enWxFggAAEovp4NGjRo1lJKS\nIkmqU6eOPv30U0fb5s2b5eXlVXzVAQCAUs3pk0E7deqkb775RlFRURo/frwGDRqkH374QR4eHtq6\ndasmTJhgRZ0AAKAUcjpo/POf/1RGRoYkacCAAfLz89OHH36ozMxMzZ49WyNHjiz2IgEAQOnkdNDw\n8fGRj4+P43WvXr3Uq1evYi0KAACUDdd9w67du3dr27ZtSkpK0iOPPKIqVarowIEDqly5ssqVK1ec\nNQIAgFLK6aCRkZGhYcOG6f3335fNZlN+fr46d+6sKlWqaNKkSQoLC9O0adOsqBUAAJQyTl918vjj\nj2vt2rX68ssvdf78ecedQSUubwUAAAU5fUTjww8/1PTp0/XAAw8UutV4aGiojhw5Uly1AQCAUs7p\nIxoXL15UcHDwFdvS09NvuCAAAFB2OB007rzzTq1YseKKbV988YWaNWt2w0UBAICywempk8mTJ6tH\njx7KyMjQQw89JJvNpq1bt2rp0qVauHChvvzySyvqBAAApZDTRzS6du2qZcuWaePGjerZs6eMMRo9\nerSWL1+uJUuWqEOHDlbUCQAASqHruo9GVFSUoqKitG/fPp06dUoBAQEKDw8v7toAAEApd9037JKk\nunXrqm7dusVVCwAAKGOuK2gkJSXp448/VlJSkrKysgq02Ww2zZw5s1iKAwAApZvTQeP999/XgAED\nlJ+fr6CgIHl4eBRoJ2gAAIDLnA4aTz/9tHr27Km33npLd9xxhxU1AQCAMsLpq05OnjypESNGEDIA\nAMA1OR00OnfurC1btlhRCwAAKGOcnjr517/+pb/+9a/KyMhQhw4dVL58+ULLREZGFktxAACgdHM6\naFy4cEEZGRmaOnWq/vGPfxRoM8bIZrMVetgaAAC4PTkdNAYOHKjExETNmjVLdevWLXTVCQAAwGVO\nB42tW7cqNjZWPXv2tKIeAABQhjh9MmidOnVkt9utqAUAAJQxTgeN119/XS+//LL27NljRT0AAKAM\ncXrqZNy4cTpx4oQiIiJUtWrVQled2Gw27dixo9gKBAAApZfTQaNp06ay2WxW1AIAAMoYp4PGokWL\nLCgDAACURTf0mPji8Pnnn2vt2rU6cuSI7r33Xj3xxBOOtoSEBM2aNUtHjhxR5cqVNWLECDVu3NjR\nvmnTJi1evFhpaWkKDw/X2LFjFRQUVBLdAAAAV+D0yaDFLSAgQH369NEDDzxQ4H273a6XXnpJzZs3\n19KlS9W3b19NnTpVZ8+elXTpUfUzZ87UqFGjtGTJEoWGhmratGkl0QUAAHAVJR407rvvPt1zzz3y\n9/cv8P4vv/yi7OxsRUVFyd3dXa1bt1ZISIg2bdokSVq/fr0iIyPVpEkTeXp6Kjo6WkeOHFFiYmJJ\ndAMAAFxBiQeNq0lMTFRoaKhcXP5bYlhYmBISEiRdmlYJCwtztPn4+KhKlSqOdgAAUPJK/ByNq8nM\nzJSvr2+B93x9fZWamipJysrKKtTu4+OjzMzMQutKTk5WcnKyJGn37t0WVQwAAH7vlg0a3t7eSk9P\nL/Beenq6vL29JUleXl7KyMi4avtvzZ8/X1OmTLGuWAAAcEW37NRJzZo1lZCQoPz8fMd7hw4dUkhI\niCQpJCREhw4dcrRlZGQoJSXF0f5bI0eOVFxcnOLi4hQTE2N98QAAQNItEDTy8vKUk5Oj/Px85efn\nKycnR3a7XY0aNZKHh4dWrlyp3Nxcbdy4UYmJiWrZsqUkqV27doqPj9f27duVk5Oj2NhYhYaGqmbN\nmoW2ERwcrMjISEVGRqp+/fo3u4sAANy2SnzqZPny5Vq2bJnj9aZNm3T//fdr3LhxevbZZzV79mwt\nW7ZMQUFBmjRpkuOW5zVq1NDYsWM1Z84cnTlzRvXq1dPEiRNLqhsAAOAKSjxoREdHKzo6+optoaGh\nevXVV6/62VatWqlVq1ZWlQYAAG5QiU+dAACAsougAQAALEPQAAAAliFoAAAAyxA0AACAZQgaAADA\nMgQNAABgGYIGAACwDEEDAABYhqABAAAsQ9AAAACWIWgAAADLEDQAAIBlCBoAAMAyBA0AAGAZggYA\nALAMQQMAAFiGoAEAACxD0AAAAJYhaAAAAMsQNAAAgGUIGgAAwDIEDQAAYBmCBgAAsAxBAwAAWIag\nAQAALEPQAAAAliFoAAAAyxA0AACAZQgaAADAMgQNAABgGYIGAACwDEEDAABYhqABAAAsQ9AAAACW\nIWgAAADLEDQAAIBlCBoAAMAyBA0AAGAZggYAALAMQQMAAFiGoAEAACxD0AAAAJYhaAAAAMsQNAAA\ngGUIGgAAwDIEDQAAYBmCBgAAsAxBAwAAWIagAQAALEPQAAAAliFoAAAAyxA0AACAZQgaAADAMgQN\nAABgGYIGAACwDEEDAABYhqABAAAsQ9AAAACWIWgAAADLEDQAAIBlCBoAAMAyBA0AAGAZggYAALAM\nQQMAAFiGoAEAACxD0AAAAJYhaAAAAMsQNAAAgGUIGgAAwDIEDQAAYBmCBgAAsAxBAwAAWIagAQAA\nLEPQAAAAlnEr6QJu1MWLFzVnzhzFx8fL29tbvXr1Uo8ePUq6LAAAoDIQNObPn6/c3Fz9+9//Vmpq\nqiZPnqzq1auradOmJV0aAAC3vVI9dZKVlaVNmzZpwIAB8vHxUWhoqP785z/rq6++KunSAACASnnQ\nOHbsmIwxCgkJcbwXFhamxMTEEqwKAABcVqqnTrKysuTj41PgPR8fH2VmZhZ4Lzk5WcnJyZKk3bt3\n37T6AAC43ZXqoOHl5VUoVKSnp8vb27vAe/Pnz9eUKVOuaxuxsbHXXd/thrEqOsaq6BiromOsio6x\nunlK9dRJtWrVJKnAVMnhw4dVs2bNAsuNHDlScXFxiouLU0xMzE2tEQCA21mpDhpeXl5q2bKl3nvv\nPWVkZOjIkSNas2aNOnXqVGC54OBgRUZGKjIyUvXr1y+hagEAuP2U6qkT6dLRitmzZ2vw4MHy9vZW\nVFQUl7YCAHCLKPVBw8/PT0899VRJlwEAAK6gVE+dAACAWxtBAwAAWIagAQAALEPQAAAAliFoAAAA\nyxA0AACAZQgaAADAMgQNAABgmVJ/wy5nXX4IG09xBQDg+oSHhxd6evrV3HZB48iRI5Kk/v37l2wh\nAACUUnFxcYqMjCzSsjZjjLG4nlvKqVOn9J///EehoaGFHid/q9q9e7f69++vmJgYHgp3DYxV0TFW\nRcdYFR1jVXSleaw4ovEHAgMD9fDDD5d0Gdelfv36RU6QtzvGqugYq6JjrIqOsSq6sj5WnAwKAAAs\n4/rCCy+8UNJF4Nr8/PzUrl07lStXrqRLueUxVkXHWBUdY1V0jFXR3Q5jddudowEAAG4epk4AAIBl\nCBoAAMAyBI0yZO7cuYqNjS3pMiRJw4YNU1xcXInWEBsbq+nTp5doDcXp8pi+//77euONN0q6nAIW\nL158y9VU2nXv3l1Hjx4t6TKK5I033tDixYtLuoxb1rlz5/TMM8/or3/9q2bNmlXm9k3Xcttd3lqW\njR49uqRLwE3Qp0+fki7htjBs2DCNGjVKTZs2LelSUMr95z//kZeXl5YtWyabzXbL/EF4s3BEA7e0\nvLy8ki4BAG5ISkqKatasKZvNVtKlSLr5+1WOaFjs9OnTWrhwoX7++WfZ7XZFRETomWee0fTp07Vz\n505lZ2crNDRUjz76qEJDQyVdurXrokWLlJqaKk9PT7Vv315DhgyRJO3fv18LFixQQkKCKlSooP79\n++u+++6TdOnwZYUKFTRo0CD98ssvmj59uvr06aMPPvhA+fn56tWrlx588MGb1vfExETFxMTo+PHj\natCggSZMmCA/Pz/FxcVp8eLFSk1NVfXq1TVs2DCFh4c7+uDh4aGzZ89q+/btGjNmjKpWrar58+cr\nKSlJ7u7uatKkiSZMmCBJOn78uN566y3t379fvr6+6tmzp7p06eKoITc3V6+99pq2bt2qwMBAjRo1\nShEREZKkjIwMLVq0SD/++KPy8vJ033336ZFHHpGHh4cyMjL02muvad++fbLb7QoPD9fo0aNVqVIl\nSdLTTz+tBg0aaPfu3Tpw4IBCQkL0+OOPKygoyPJxjY2N1bFjx/TEE08oJSVFw4cP1/jx47VkyRKl\np6erQ4cOGjZsmCTpxIkTmjVrlg4ePChXV1fVqFFD//jHPyRJZ86c0dtvv61ffvlF7u7u6tixo/r2\n7SsXl0t/f6xbt04rVqzQ6dOnFRISotGjR6tmzZqSpMOHD2vWrFk6duyYGjZsqIoVK1re78uGDRum\nLl26aMOGDTp27JiaNGmisWPH6u2339aWLVsUFBSkxx9/XCEhIX/YxxMnTmj27Nk6fPiwJOmuu+7S\nqFGj5Ofnp+nTp+vkyZOaOnWqXFxc1K1bNw0YMEDdu3fXqFGj9Omnn+rUqVNq3769Hn74Yc2cOVO7\ndu1SSEiIJk6cqMDAQEl//P3cv3+/3n77bcf3+t5779WwYcPk7u5u+Rh+9NFH+uyzz5Seni5/f38N\nGDBArVu31sqVK7V69WplZGSoQYMGevTRR1WxYkV99NFH+vnnn/X888871rFixQrt2rVLzz33nCTp\n4sWLmjJlin799VdVr15dY8eOVUhIiKQ//q790c/h8s+7a9eu2rBhQ6F9yc0am6NHjzp+5yQpJydH\nUVFRevvtt1W5cmW98cYb8vT01JkzZ7Rjxw4FBQVp/PjxqlWrll5//XV99913stls+uKLLzRu3LhC\n27zaPvHUqVMaNWqUYmNj5e7urpiYGK1YsUKxsbHy9vbWhx9+qKNHj2rcuHHKzc3V0qVLtWHDBmVm\nZqpJkyZ69NFH5efn59hPPPbYY1q6dKk8PT01Z84cS8bvigwsY7fbzbhx48zcuXNNenq6yc3NNb/8\n8osxxpivvvrKpKenm5ycHPPOO++YMWPGOD43cOBAs3btWmOMMRkZGWbPnj3GGGNOnz5toqOjzebN\nm43dbjd79uwx/fr1M4mJicYYY2bMmGEWLVpkjDHm559/Nj169DALFy40OTk5Zu/evaZXr17m2LFj\nN6XvQ4cONWPHjjWpqakmIyPDPPHEEyYmJsYcO3bM9O7d22zdutXY7XbzzTffmL59+5pz5845+vDQ\nQw+ZHTt2mPz8fJOVlWUef/xxs3z5cpOXl2eys7PNrl27jDHGZGVlmSFDhpgvvvjC5ObmmqSkJDNk\nyBATHx+DdFMzAAAUzElEQVRvjDFmyZIlpkePHmbt2rXGbrebr7/+2vTt29dcuHDBGGPMK6+8YmbM\nmGHS09PNhQsXzHPPPWfeffddY4wxFy5cMBs3bjRZWVkmIyPDTJs2zUyZMsXRv0mTJplHHnnEHDly\nxOTk5JipU6ea119/3fIx3bZtm1myZImZNm2aMcaYEydOmG7dupnXX3/dZGZmmuTkZNO3b1+zfft2\nY4wx06ZNM3PmzDG5ubkmNzfX7Ny50xhjTF5envnb3/5m3n33XZOdnW1OnTplxo4da1avXm2MMeaH\nH34wQ4cONYcPHzZ2u92sWrXKDBs2zOTk5Jjc3FwzdOhQs3z5cpObm2vi4+NN7969zYwZMyzt/2/H\nYdy4cebUqVPm3LlzZuTIkWbkyJFm27Ztxm63m/nz55vJkydfs4/JyckmPj7e5OTkmHPnzplJkyaZ\nefPmFRrv3+rWrZt5/vnnzYULF0xKSorp16+feeyxx8y+fftMbm6u+fvf/25mzZpljLn29/PAgQPm\n119/NXa73aSkpJjRo0eblStXFthWUlJSsY9fUlKS6d27t2Pdp0+fNgkJCebrr782Q4cONUlJSSYr\nK8vMmTPHTJw40RhjTFpamundu7dJS0tzrGfMmDFm48aNxphLv7e9e/c2P/30k8nNzTXLli0zw4cP\nN3a7vVh+Dlfal1jhamPz2985Y4zJzs423bp1MydOnHD0v2/fvmbXrl3Gbrebt956yzz55JOO5X+7\nbzbGFFjftfaJw4cPd+zznnjiCTN8+HDH9/K5554zX3/9tTHGmHfeecc899xz5syZMyYrK8vMmDHD\nvPrqq8aY/+4npk2bZtLT001WVpYl43c1TJ1YaP/+/UpJSdGwYcPk4+MjNzc3x1/THTt2lI+Pj9zd\n3dW3b18lJibq/PnzkiQ3NzclJyfr/Pnz8vb2Vr169SRd+guzcePGuueee+Tq6qp69erpnnvu0aZN\nm664fRcXF/Xv31/u7u6qW7euqlWr5vir4Wbo3r27KlWqJG9vb9133306ePCgvvvuO0VGRuruu++W\nq6ur7r//flWrVk1btmxxfO7uu+/WnXfeKZvNJk9PT7m5uSk1NVVpaWny8PBQgwYNJEk//vijAgIC\n1KVLF7m5ual69ep64IEHtGHDBse6wsLC1L59e7m6uqpDhw4KCgrSjz/+qLNnz2rr1q0aMWKEfHx8\n5Ofnpz59+jg+6+fnp5YtW8rT01Pe3t6KiorSrl27CvSvQ4cOCgkJkbu7u9q0aaODBw/ehFG9sujo\naHl5ealKlSpq2LChDh06JOnSdyktLU2pqalyc3NTw4YNJUkHDhzQqVOn1L9/f3l4eKhixYrq2bOn\no/+rVq3Sgw8+qNDQULm6uqpz586y2Wzau3ev9uzZo+zsbEVFRcnNzU1NmjS56bdP/p//+R9VrFhR\n/v7+ioyMVOXKldW0aVO5urqqdevWOnjw4DX7WKVKFTVp0kTu7u7y9/dX9+7dC/2Mr6R3797y8/NT\nUFCQGjRooLp166pOnTpyc3NTq1atHGN/re9n7dq1Vb9+fbm6uiooKEh//vOftXPnTusG7f9zdXWV\ndOmIY3Z2tgICAlSzZk2tX79e3bt3V/Xq1eXp6akhQ4Zo3759Sk5OVoUKFXTXXXfp22+/lXTp+5OW\nlqbmzZs71tu0aVPdddddcnNzU1RUlDIyMrR3795i+TlcaV9yM8emKFq0aKEGDRo49muXvwfXcq19\nYqNGjfTLL78oKytLycnJ6tq1q3bu3Cm73a7du3crIiJCxhitXr1aw4YNU/ny5eXp6amHH35YmzZt\nKjBN0q9fP/n4+MjT09PJkbkxTJ1Y6NSpU6pUqVKhQ6F5eXmKiYnRpk2bdO7cOceh6vPnz8vf319P\nP/20li9frpEjRyo4OFj9+vXT3XffrdTUVP3www/q169fgXW1a9fuitv38/MrsG1PT09lZWUVf0ev\nonz58oW2ffr06ULTC5UrV1ZaWprj9eXpicvGjh2r2NhYjR8/Xv7+/urZs6c6deqklJQUHTp0qMB4\n5OfnO4LIldYVFBTk+Ic3Pz9fQ4cOdbQZY5Sfny9Jys7O1jvvvKP4+HhdvHhRkpSZmanc3FzHmFao\nUKFQ/0rK72vJzMyUJA0ZMkSxsbF69tln5erqqj//+c+KiopSamqqzp07p+joaMfn8vPzHYf8U1NT\ntWjRIr333nuO9tzcXJ0+fVo2m00BAQGO7610aZzT09Ot7qbD779bV/pZXKuPZ86c0TvvvKNdu3Yp\nMzNTxpgiPWjx99v6fS2Xx/5a389jx45pwYIFOnDggLKzs5WXl6ewsDBnh8JpwcHBGjdunD777DPN\nnDlTDRs21COPPFLod9Pb21vlypXT6dOnFRwcrI4dO2rp0qXq2bOn1q1bp9atWxfYv/z2d83V1VUV\nK1Z0fF9u9OdwpX2JFa42NkVxvfuDa+0TIyIi9M0336hu3bqqV6+eGjdurNmzZ2v//v3y9/dX5cqV\ndfbsWWVnZ2vixIkF1mOz2XT27FnH69/vD28WgoaFAgMDdfLkSdntdrm5/XeoN2zYoM2bN+vFF19U\n5cqVlZGRUWBnVLt2bT399NPKy8vTxo0b9Y9//ENLlixRpUqV1Lp16yvO8ZUWFStWLPTXSEpKiho3\nbux4/fsTpoKDgzVhwgQZY7Rz5049//zzatiwoSpVqqTw8HC98sorV93eyZMnC72+7777VKlSJbm6\nuurdd9+94pz4Rx99pKSkJE2fPl0BAQE6fPiwHnvsMZlSdiPd8uXLO65GOnTokCZPnqw6deooMDBQ\ngYGBeuedd674ucDAQD344IPq0KFDobadO3cqLS1N+fn5jrBx8uTJIj/J8Wa5Vh/fe+895efn6803\n35S/v7+2bNmiuXPnFtv2r/X9nDdvnuPcHh8fH3366acFjsZZqVWrVmrVqpWys7O1ePFizZ49WxUr\nVlRqaqpjmczMTF24cMFx/k2zZs00Z84cHTx4UBs2bNCzzz5bYJ2//V3Ly8vT6dOnVbFiRbm4uJTo\nz8FZVxqbFi1aKDs727HMmTNnim1719onNmrUSHPmzNFPP/2kRo0aKSQkRCdPntTWrVsdR8j9/f3l\n4eGhN954Q5UrVy60jZSUFEmF9603C1MnFqpTp44qVaqkBQsWKCMjQ3a7XTt37lRmZqbc3d1Vrlw5\n5eTkKCYmxvGZ3NxcrVu3ThcvXpSrq6t8fX1ls9nk4uKidu3aKS4uTlu3blVeXp5yc3O1d+9eJSUl\nlWAvndOqVSvFx8crLi5OeXl5WrdunY4dO6Z77rnnqp9Zu3atzp49K5vNJl9fX0mXpoUuH+VZs2aN\ncnNzlZeXpyNHjmj//v2Ozx4+fFjffvutY1snTpxQs2bNVKFCBTVr1kxvv/22Ll68KGOMTp486bj3\nR2Zmpjw8POTr66uLFy9q+fLl1g6MRTZu3Oj4B8DX11cuLi5ycXFRnTp15O/vr2XLlikrK0v5+fk6\nfvy449D9X/7yF3344Yc6fPiwjDHKzMzU1q1blZGRofDwcHl4eGjlypWy2+3avn274uPjS7KbV3St\nPmZmZsrLy0u+vr46ffq0Pv744wKfL1++vE6cOHHd27/W9zMzM1M+Pj7y9vbWsWPHtHr16uvvrBOO\nHj2q7du3KycnR25ubvLy8pKLi4vatm2rTz/9VMeOHVNOTo4WL16sOnXqKDg4WNKlabh27dpp5syZ\nKleunGNK97L4+Hjt2LFDdrtdK1askLe3t2Na6UZ+DjfT1camVq1a2rVrl06cOKGsrCwtW7as2LZ5\nrX1iYGCgAgICtGbNGjVq1Eg2m03h4eFatWqVGjVqJOnS/rBz585asGCB40jI2bNnC0xJlySOaFjI\n1dVVkydP1jvvvKMRI0YoPz9fjRo10rhx4xQfH68hQ4aoXLlyhR5b/+233+rtt99WXl6egoKCNHHi\nRHl4eCgwMFDPP/+8Fi1apJkzZ0qSQkNDCxz+v9VVq1ZNTz75pBYtWqSTJ0+qatWqmjx5svz9/a/6\nme3bt+vf//63srOzVaFCBT366KOqUqWKJOnFF1/UwoUL9d5778lut6t69erq37+/47PNmzfXtm3b\nNHfuXAUGBmrSpEmOhxeNGzdOMTExeuyxx3Tx4kUFBgaqc+fOatq0qbp3767XXntNAwYMUEBAgHr2\n7Knvv//e2sGxwIEDB7RgwQJdvHhR5cqVU7du3Rw7p8mTJ2vRokV69NFHlZWVpcqVK6t3796SpHvu\nuUfZ2dl64403lJKSIk9PTzVo0EARERFyc3PTM888o9mzZ+v9999XRESE2rdvr9zc3JLsaiGXf/+u\n1sd+/fppxowZ6tevn4KDg9WuXTt99NFHjs9fvqogJiZGXbt2LfC9Kgpvb+8//H4OGTJEc+bM0Sef\nfKJatWqpZcuW+umnn4pvAK4iNzdXMTExSkpKcvwjOnr0aFWtWlVnzpzR888/77jq5PeH4jt06KBP\nPvlEAwcOLLTedu3a6eOPP9bLL7+s6tWr6+mnn3Ycyb2Rn8PNdLWxqV69uu6//36NHz9evr6+6t+/\nv7755pti2WZR9omNGjXS5s2bHVcmXn59+YiGJA0aNEgffPCBnnrqKZ07d0533HGHWrdu/Yd/xN0s\nPFQNAFAkFy5c0KBBg/TWW285zrEAroWpEwDANRlj9OmnnyoyMpKQAacwdQIA+EO5ubl6+OGHVaFC\nBU2ePLmky0Epw9QJAACwDFMnAADAMgQNAABgGYIGAACwDEEDAABYhqABAAAsQ9AAbiMzZsxQzZo1\n5erqqp49e96Ubfbs2fOqD/677OOPPy7R51sAsA730QBuE/v379eECRP05JNPqlu3brfUTZc+/vhj\nbdu2zfEAOABlB0EDuE3s3btXxhgNHz5ctWrVKulyANwmmDoBbgODBw9Wt27dJEm1a9eWzWbTokWL\nJF16yuPo0aMVHBwsT09PNW3aVGvWrCm0ji+++EItWrSQt7e3KlWqpFGjRik9Pb3AMrt371bbtm3l\n5eWl2rVra/HixUWqbfHixdq1a5dsNptsNpsGDx6szz77TDabrcDTeKVLj+j29vZ2TLUMHjxYERER\nWrVqlSIiIuTl5aWmTZte8cmVixYt0p133ikvLy9Vq1ZNzzzzjPLy8oo0hgCukwFQ5h04cMD885//\nNJLMypUrzebNm01qaqrJzs42zZo1MzVq1DALFiwwq1evNv379zdubm7m559/dnz+gw8+MC4uLmbo\n0KFm1apVZuHChSYoKMj89a9/dSyTmZlpqlevburVq2fef/998/7775vw8HBTtWpV07Zt2z+srUuX\nLqZWrVpm8+bNZvPmzebAgQPGbrebatWqmaeeeqrA8rNnzzZeXl7mzJkzxhhjBg0aZAICAkxoaKhZ\ntGiR+eSTT8y9995r/P39TUpKiuNzr732mnF1dTWPP/64WbNmjZk5c6bx8/MzTz75ZDGNMoArIWgA\nt4mPPvrISDKHDx92vLdw4ULj5uZmdu3aVWDZFi1amIceesgYY0x+fr4JCQkx/fr1K7DMqlWrjM1m\nMzt37jTGGDNv3jzj4uJi9u3b51hm//79xsXF5Q+DhjGXwkLDhg0Lvf/ss8+aqlWrGrvd7ngvMjLS\nREdHF/isJPPNN9843jt79qwpV66cI6ScP3/e+Pn5mUmTJhVY/7x584y3t7c5derUH9YH4PoxdQLc\nxtasWaNGjRqpbt26stvtjv86deqkH3/8UZK0b98+JSQkqE+fPgWWadu2rVxcXLRt2zZJ0g8//KCI\niAjVqVPHsf4//elPaty48XXXN3ToUCUnJ2v16tWSpJ9//lnx8fEaOnRogeXuuOMO3X///QVed+zY\nUT/88IMk6fvvv9fFixf10EMPFehDx44dlZmZqZ07d153jQD+GCeDArexU6dO6aeffpK7u3uhNldX\nV8cyktSrV68rriMpKUmSlJycrKCgoELtlStXVmZm5nXVFxoaqk6dOmnBggXq2rWrFi5cqLCwMLVv\n377AcpUqVbridnfv3l2gD5GRkX/YBwDFj6AB3MYCAgJ05513asGCBX+4jCTNnj1bLVq0KNRetWpV\nSVJwcLDi4+MLtaekpMjf3/+6axw+fLiio6N17NgxLVmyRGPHjpXNZiuwzMmTJ6+43eDg4AJ9WLly\npWrUqFFo2bCwsOuuD8AfI2gAt7GOHTvqyy+/VNWqVR2B4ffCw8NVvXp1HTp0SGPGjLnqupo3b653\n331XBw4c0J/+9CdJ0oEDB7Rjxw61bt36D+vw8PBQVlbWFdt69OihChUqKDo6WmlpaRo8eHChZc6d\nO6e1a9c6pk/OnTunr7/+2lHvvffeKx8fHx09evSqR2YAWMP1hRdeeKGkiwBgvT179mj58uUaN26c\nypcvL0mKiIjQJ598ovnz58vT01MXLlzQ9u3btXTpUq1Zs0YdO3aUzWZTjRo1NHHiRB0/flzGGB07\ndkzr16/Xyy+/rLvuuksVK1ZURESEFi5cqBUrVqhy5cr69ddfNWLECLm7u6tixYpXDAiXHThwQB98\n8IHq1Kmj7Oxs5eTkOGp0dXXVyZMntXTpUj3wwAOFws7HH3+spKQk/ec//1FAQIASEhI0atQopaSk\nKDY2Vn5+fvLy8pKXl5cmTZqkCxcuyG636/Dhw/rqq680efJkPfjgg1ecPgJQDEr6bFQAN8eVrjox\nxphz586Z8ePHm5o1axp3d3cTHBxsunTpYj7//PMCy61Zs8a0bdvW+Pr6Gl9fX9OwYUMzYcIEc/bs\nWccyO3fuNK1btzYeHh4mLCzMLFy40PTo0eOaV52cO3fO9O3b11SsWNFIMoMGDSrQ/v333xtJZvny\n5YU+e/mKlc8//9zUr1/feHh4mCZNmphNmzYVWnbp0qXm7rvvNt7e3sbf3980adLETJ482eTm5v7x\n4AG4bjZjjCnhrAMAf+i5557T3LlzdezYMXl6ehZoGzx4sLZt28aVI8AtinM0ANyy9u7dq71792rW\nrFkaM2ZMoZAB4NZH0ABwyxo5cqS2bNmizp07a9KkSSVdDoDrwNQJAACwDHcGBQAAliFoAAAAyxA0\nAACAZQgaAADAMgQNAABgGYIGAACwDEEDAABYhqABAAAs8/8A43DQj2N8EcsAAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Graph a summary plot of means for each feed type\n", + "ggplot(data=data) + geom_bar(aes(x='factor(feed)', y='weight'), stat = \"summary\", fun_y = numpy.mean) + theme_classic() + xlab(\"feed type\") + ylab(\"mean weight\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Defining functions for computing negative loglikelihood for the models\n", + "\n", + "Here, we'll be coding two functions to calculate the negative loglikelihood for a given observation for each of our models.\n", + "Remember, the equation for the null model is:\n", + "$$y = \\beta_0 + ε$$\n", + "\n", + "The equation for the alternative model is:\n", + "\n", + "$$y = \\beta_0 + \\beta_1x + ε$$\n", + "\n", + "We'll start with the null model.\n", + "\n", + "**First define the function**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def nllike_null (p, obs):" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Now we will unpack the parameters**\n", + "\n", + "Here, *p* is a list of two terms: the intercept (β0) and the error (ε)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def nllike_null (p, obs):\n", + " B0 = p[0]\n", + " sigma = p[1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**We calculate the expected value (y).**\n", + "\n", + "*Where's the error in the formula? Stick around to find out!*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def nllike_null (p, obs):\n", + " B0 = p[0]\n", + " sigma = p[1]\n", + " expected = B0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Then we calculate the negative loglikelihood (nll)**\n", + "\n", + "The error *sigma* pops back in this equation. We then return nll." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def nllike_null (p, obs):\n", + " B0 = p[0]\n", + " sigma = p[1]\n", + " expected = B0\n", + " nll = -1 * scipy.stats.norm(expected, sigma).logpdf(obs.y).sum()\n", + " return nll" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***Our completed function for the negative loglikelihood for the null model:***" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#A function to return the negative log-likelihood for the null model\n", + "def nllike_null (p, obs):\n", + " B0 = p[0]\n", + " sigma = p[1]\n", + " expected = B0\n", + " nll = -1 * scipy.stats.norm(expected, sigma).logpdf(obs.y).sum()\n", + " return nll" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**We then create a similar function for the alternative model:**\n", + "\n", + "Notice that we have more parameters to unpack as well as another term in the expected variable. *Why B1 * obs.x?*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#A function to return the negative log-likelihood for a given observation\n", + "def nllike (p, obs):\n", + " B0 = p[0]\n", + " B1 = p[1]\n", + " sigma = p[2]\n", + " expected = B0 + B1 * obs.x\n", + " nll = -1 * scipy.stats.norm(expected, sigma).logpdf(obs.y).sum()\n", + " return nll" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Creating a function for the t-test\n", + "\n", + "Finally, we get to the actual test. We will create a function with four parameters.\n", + "\n", + " data will be our dataset\n", + " group1 will be the feed type compared\n", + " group2 will be the second feed type compared\n", + " x is the independent categorical variable, i.e. \"feed\"\n", + " y is the response variable, i.e. \"weight\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#function for returning p-value for t-test\n", + "def ttest (data, group1, group2, x, y):" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**We'll just subset the data, named *temp_df* of the feed types that we're only interested**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#function for returning p-value for t-test\n", + "def ttest (data, group1, group2, x, y):\n", + " #define a temporary slice of the data\n", + " temp_df = data[(data[x]==group1) | (data[x]==group2)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**We'll create a new column called *x* in *temp_df* called x with the values as 0 or 1 depending on the feed type of the row.**\n", + "\n", + "**We then make another new column called *y* that's just a copy of the response column**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#function for returning p-value for t-test\n", + "def ttest (data, group1, group2, x, y):\n", + " #define a temporary slice of the data\n", + " temp_df = data[(data[x]==group1) | (data[x]==group2)]\n", + " \n", + " #Make new column 'x' set as 0 or 1 based on group\n", + " temp_df[\"x\"] = 0\n", + " temp_df[\"x\"][temp_df[x] == group2] = 1\n", + " temp_df[\"y\"] = temp_df[y]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**We use *minimize* in the scipy package to calculate the fit for the two models.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#function for returning p-value for t-test\n", + "def ttest (data, group1, group2, x, y):\n", + " #define a temporary slice of the data\n", + " temp_df = data[(data[x]==group1) | (data[x]==group2)]\n", + " \n", + " #Make new column 'x' set as 0 or 1 based on group\n", + " temp_df[\"x\"] = 0\n", + " temp_df[\"x\"][temp_df[x] == group2] = 1\n", + " temp_df[\"y\"] = temp_df[y]\n", + " \n", + " # y = B0 + B1*x + E\n", + " model = minimize(nllike, [1, 1, 1], method = \"Nelder-Mead\", options={'disp': True}, args = temp_df)\n", + " \n", + " # y = B0 + E\n", + " null_model = minimize(nllike_null, [1, 1], method = \"Nelder-Mead\", options={'disp': True}, args = temp_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**To compute the p-value of the t-test**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#function for returning p-value for t-test\n", + "def ttest (data, group1, group2, x, y):\n", + " #define a temporary slice of the data\n", + " temp_df = data[(data[x]==group1) | (data[x]==group2)]\n", + " \n", + " #Make new column 'x' set as 0 or 1 based on group\n", + " temp_df[\"x\"] = 0\n", + " temp_df[\"x\"][temp_df[x] == group2] = 1\n", + " temp_df[\"y\"] = temp_df[y]\n", + " \n", + " # y = B0 + B1*x + E\n", + " model = minimize(nllike, [1, 1, 1], method = \"Nelder-Mead\", options={'disp': True}, args = temp_df)\n", + " \n", + " # y = B0 + E\n", + " null_model = minimize(nllike_null, [1, 1], method = \"Nelder-Mead\", options={'disp': True}, args = temp_df)\n", + " \n", + " #Get differences in fit\n", + " D = (null_model.fun - model.fun) * 2\n", + " \n", + " #Use chi3.sf() for returning p-value\n", + " p = scipy.stats.chi2.sf(D,1)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Finally, we display the output in a nicely formated way.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#function for returning p-value for t-test\n", + "def ttest (data, group1, group2, x, y):\n", + " #define a temporary slice of the data\n", + " temp_df = data[(data[x]==group1) | (data[x]==group2)]\n", + " \n", + " #Make new column 'x' set as 0 or 1 based on group\n", + " temp_df[\"x\"] = 0\n", + " temp_df[\"x\"][temp_df[x] == group2] = 1\n", + " temp_df[\"y\"] = temp_df[y]\n", + " \n", + " # y = B0 + B1*x + E\n", + " model = minimize(nllike, [1, 1, 1], method = \"Nelder-Mead\", options={'disp': True}, args = temp_df)\n", + " \n", + " # y = B0 + E\n", + " null_model = minimize(nllike_null, [1, 1], method = \"Nelder-Mead\", options={'disp': True}, args = temp_df)\n", + " \n", + " #Get differences in fit\n", + " D = (null_model.fun - model.fun) * 2\n", + " \n", + " #Use chi3.sf() for returning p-value\n", + " p = scipy.stats.chi2.sf(D,1)\n", + " \n", + " #Print results\n", + " print (\"-----------------------------\")\n", + " print (group1 + \" vs. \" + group2)\n", + " print (\"p-value = \" + str(p))\n", + " if p <= 0.05:\n", + " print (\"Significance\")\n", + " else:\n", + " print (\"No significance\")\n", + " print (\"-----------------------------\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4: Run the t-test function!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Perform t-tests\n", + "ttest(data, group1=\"horsebean\", group2=\"casein\", x=\"feed\", y=\"weight\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Completed code (again)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import pandas\n", + "import numpy\n", + "import scipy.stats\n", + "from scipy.optimize import minimize\n", + "from plotnine import *\n", + "\n", + "data = pandas.read_csv(\"chickwts.txt\")\n", + "\n", + "ggplot(data=data) + geom_bar(aes(x='factor(feed)', y='weight'), stat = \"summary\", fun_y = numpy.mean) + theme_classic() + xlab(\"feed type\") + ylab(\"mean weight\")\n", + "\n", + "#function for returning negative log likelihood for t-test model\n", + "def nllike (p, obs):\n", + " B0 = p[0]\n", + " B1 = p[1]\n", + " sigma = p[2]\n", + " expected = B0 + B1 * obs.x\n", + " nll = -1 * scipy.stats.norm(expected, sigma).logpdf(obs.y).sum()\n", + " return nll\n", + "\n", + "#function for returning negative log likelihood for null model\n", + "def nllike_null (p, obs):\n", + " B0 = p[0]\n", + " sigma = p[1]\n", + " expected = B0\n", + " nll = -1 * scipy.stats.norm(expected, sigma).logpdf(obs.y).sum()\n", + " return nll\n", + "\n", + "#function for returning p-value for t-test\n", + "def ttest (data, group1, group2, x, y):\n", + " #define a temporary slice of the data\n", + " temp_df = data[(data[x]==group1) | (data[x]==group2)]\n", + " \n", + " #Make new column 'x' set as 0 or 1 based on group\n", + " temp_df[\"x\"] = 0\n", + " temp_df[\"x\"][temp_df[x] == group2] = 1\n", + " temp_df[\"y\"] = temp_df[y]\n", + " \n", + " # y = B0 + B1*x + E\n", + " model = minimize(nllike, [1, 1, 1], method = \"Nelder-Mead\", options={'disp': True}, args = temp_df)\n", + " \n", + " # y = B0 + E\n", + " null_model = minimize(nllike_null, [1, 1], method = \"Nelder-Mead\", options={'disp': True}, args = temp_df)\n", + " \n", + " #Get differences in fit\n", + " D = (null_model.fun - model.fun) * 2\n", + " \n", + " #Use chi3.sf() for returning p-value\n", + " p = scipy.stats.chi2.sf(D,1)\n", + " \n", + " #Print results\n", + " print (\"-----------------------------\")\n", + " print (group1 + \" vs. \" + group2)\n", + " print (\"p-value = \" + str(p))\n", + " if p <= 0.05:\n", + " print (\"Significance\")\n", + " else:\n", + " print (\"No significance\")\n", + " print (\"-----------------------------\")\n", + "\n", + "#Perform t-tests\n", + "ttest(data, group1=\"horsebean\", group2=\"casein\", x=\"feed\", y=\"weight\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/exercise1_output.html b/exercise1_output.html new file mode 100755 index 0000000..09a8742 --- /dev/null +++ b/exercise1_output.html @@ -0,0 +1,12802 @@ + + + +exercise1 + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+
+
+

Step-by-step Creating a maximum likelihood t-test in Python

Hi, this is a tutorial on how to code a t-test using maximum likelihood in Python. This tutorial will explain how to create separate negative log-likelihood functions for the null and alternate model as well as produce a t-test that compares these models.

+

Stats language:

Before we jump into the code, we need to define the statistical models for the t-test. The two models are the null model and the alternative model.

+

In the null model of a t-test, we hypothesize that there is no difference between the means of two populations. We describe the model using the equation:

+$$y = \beta_0 + ε$$

In the alternative model, we hypothesize that there is a significance between the means.

+$$y = \beta_0 + \beta_1x + ε$$ +
+
+
+
+
+
+
+
+

Complete code:

+
+
+
+
+
+
In [ ]:
+
+
+
import pandas
+import numpy
+import scipy.stats
+from scipy.optimize import minimize
+from plotnine import *
+
+#Import our data set
+data = pandas.read_csv("chickwts.txt")
+
+#Graph a summary plot of means for each seed type
+ggplot(data=data) + geom_bar(aes(x='factor(feed)', y='weight'), stat = "summary", fun_y = numpy.mean) + theme_classic() + xlab("feed type") + ylab("mean weight")
+
+#A function to return the negative log-likelihood for a given observation
+def nllike (p, obs):
+    B0 = p[0]
+    B1 = p[1]
+    sigma = p[2]
+    expected = B0 + B1 * obs.x
+    nll = -1 * scipy.stats.norm(expected, sigma).logpdf(obs.y).sum()
+    return nll
+
+#A function to return the negative log-likelihood for the null model
+def nllike_null (p, obs):
+    B0 = p[0]
+    sigma = p[1]
+    expected = B0
+    nll = -1 * scipy.stats.norm(expected, sigma).logpdf(obs.y).sum()
+    return nll
+
+#function for returning p-value for t-test
+def ttest (data, group1, group2, x, y):
+    #define a temporary slice of the data
+    temp_df = data[(data[x]==group1) | (data[x]==group2)]
+    
+    #Make new column 'x' set as 0 or 1 based on group
+    temp_df["x"] = 0
+    temp_df["x"][temp_df[x] == group2] = 1
+    temp_df["y"] = temp_df[y]
+    
+    # y = B0 + B1*x + E
+    model = minimize(nllike, [1, 1, 1], method = "Nelder-Mead", options={'disp': True}, args = temp_df)
+    
+    # y = B0 + E
+    null_model = minimize(nllike_null, [1, 1], method = "Nelder-Mead", options={'disp': True}, args = temp_df)
+    
+    #Get differences in fit
+    D = (null_model.fun - model.fun) * 2
+    
+    #Use chi3.sf() for returning p-value
+    p = scipy.stats.chi2.sf(D,1)
+    
+    #Print results
+    print ("-----------------------------")
+    print (group1 + " vs. " + group2)
+    print ("p-value = " + str(p))
+    if p <= 0.05:
+        print ("Significance")
+    else:
+        print ("No significance")
+    print ("-----------------------------")
+
+#Perform t-tests
+ttest(data, group1="horsebean", group2="casein", x="feed", y="weight")
+
+ +
+
+
+ +
+
+
+
+
+
+

Step 1: Importing our tools and dataset

We import pandas, numpy, scipy.stats, minimize from scipy.optimize, and all functions from plotnine (this is different than import plotnine).

+

We then use pandas.read_csv() to import a text file as a pandas Dataframe.

+ +
+
+
+
+
+
In [ ]:
+
+
+
import pandas
+import numpy
+import scipy.stats
+from scipy.optimize import minimize
+from plotnine import *
+
+#Import our data set
+data = pandas.read_csv("chickwts.txt")
+
+ +
+
+
+ +
+
+
+
+
+
+

We can check whether the data have been loaded. The dataframe contains 70 rows of feed types and the chick weights produced from that type of feed.

+ +
+
+
+
+
+
In [9]:
+
+
+
print data
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
    weight       feed
+0      179  horsebean
+1      160  horsebean
+2      136  horsebean
+3      227  horsebean
+4      217  horsebean
+5      168  horsebean
+6      108  horsebean
+7      124  horsebean
+8      143  horsebean
+9      140  horsebean
+10     309    linseed
+11     229    linseed
+12     181    linseed
+13     141    linseed
+14     260    linseed
+15     203    linseed
+16     148    linseed
+17     169    linseed
+18     213    linseed
+19     257    linseed
+20     244    linseed
+21     271    linseed
+22     243    soybean
+23     230    soybean
+24     248    soybean
+25     327    soybean
+26     329    soybean
+27     250    soybean
+28     193    soybean
+29     271    soybean
+..     ...        ...
+41     226  sunflower
+42     320  sunflower
+43     295  sunflower
+44     334  sunflower
+45     322  sunflower
+46     297  sunflower
+47     318  sunflower
+48     325   meatmeal
+49     257   meatmeal
+50     303   meatmeal
+51     315   meatmeal
+52     380   meatmeal
+53     153   meatmeal
+54     263   meatmeal
+55     242   meatmeal
+56     206   meatmeal
+57     344   meatmeal
+58     258   meatmeal
+59     368     casein
+60     390     casein
+61     379     casein
+62     260     casein
+63     404     casein
+64     318     casein
+65     352     casein
+66     359     casein
+67     216     casein
+68     222     casein
+69     283     casein
+70     332     casein
+
+[71 rows x 2 columns]
+
+
+
+ +
+
+ +
+
+
+
+
+
+

We can visualize the data using the following line of code, which creates a bar graph of mean chick weights by feed type.

+ +
+
+
+
+
+
In [10]:
+
+
+
#Graph a summary plot of means for each feed type
+ggplot(data=data) + geom_bar(aes(x='factor(feed)', y='weight'), stat = "summary", fun_y = numpy.mean) + theme_classic() + xlab("feed type") + ylab("mean weight")
+
+ +
+
+
+ +
+
+ + +
+
+ + + +
+ +
+ +
+ +
+
Out[10]:
+ + + +
+
<ggplot: (6866962)>
+
+ +
+ +
+
+ +
+
+
+
+
+
+

Step 2: Defining functions for computing negative loglikelihood for the models

Here, we'll be coding two functions to calculate the negative loglikelihood for a given observation for each of our models. +Remember, the equation for the null model is: +$$y = \beta_0 + ε$$

+

The equation for the alternative model is:

+$$y = \beta_0 + \beta_1x + ε$$

We'll start with the null model.

+

First define the function

+ +
+
+
+
+
+
In [ ]:
+
+
+
def nllike_null (p, obs):
+
+ +
+
+
+ +
+
+
+
+
+
+

Now we will unpack the parameters

+

Here, p is a list of two terms: the intercept (β0) and the error (ε)

+ +
+
+
+
+
+
In [ ]:
+
+
+
def nllike_null (p, obs):
+    B0 = p[0]
+    sigma = p[1]
+
+ +
+
+
+ +
+
+
+
+
+
+

We calculate the expected value (y).

+

Where's the error in the formula? Stick around to find out!

+ +
+
+
+
+
+
In [ ]:
+
+
+
def nllike_null (p, obs):
+    B0 = p[0]
+    sigma = p[1]
+    expected = B0
+
+ +
+
+
+ +
+
+
+
+
+
+

Then we calculate the negative loglikelihood (nll)

+

The error sigma pops back in this equation. We then return nll.

+ +
+
+
+
+
+
In [ ]:
+
+
+
def nllike_null (p, obs):
+    B0 = p[0]
+    sigma = p[1]
+    expected = B0
+    nll = -1 * scipy.stats.norm(expected, sigma).logpdf(obs.y).sum()
+    return nll
+
+ +
+
+
+ +
+
+
+
+
+
+

Our completed function for the negative loglikelihood for the null model:

+ +
+
+
+
+
+
In [ ]:
+
+
+
#A function to return the negative log-likelihood for the null model
+def nllike_null (p, obs):
+    B0 = p[0]
+    sigma = p[1]
+    expected = B0
+    nll = -1 * scipy.stats.norm(expected, sigma).logpdf(obs.y).sum()
+    return nll
+
+ +
+
+
+ +
+
+
+
+
+
+

We then create a similar function for the alternative model:

+

Notice that we have more parameters to unpack as well as another term in the expected variable. Why B1 obs.x?*

+ +
+
+
+
+
+
In [ ]:
+
+
+
#A function to return the negative log-likelihood for a given observation
+def nllike (p, obs):
+    B0 = p[0]
+    B1 = p[1]
+    sigma = p[2]
+    expected = B0 + B1 * obs.x
+    nll = -1 * scipy.stats.norm(expected, sigma).logpdf(obs.y).sum()
+    return nll
+
+ +
+
+
+ +
+
+
+
+
+
+

Step 3: Creating a function for the t-test

Finally, we get to the actual test. We will create a function with four parameters.

+ +
data will be our dataset
+group1 will be the feed type compared
+group2 will be the second feed type compared
+x is the independent categorical variable, i.e. "feed"
+y is the response variable, i.e. "weight"
+ +
+
+
+
+
+
In [ ]:
+
+
+
#function for returning p-value for t-test
+def ttest (data, group1, group2, x, y):
+
+ +
+
+
+ +
+
+
+
+
+
+

We'll just subset the data, named temp_df of the feed types that we're only interested

+ +
+
+
+
+
+
In [ ]:
+
+
+
#function for returning p-value for t-test
+def ttest (data, group1, group2, x, y):
+    #define a temporary slice of the data
+    temp_df = data[(data[x]==group1) | (data[x]==group2)]
+
+ +
+
+
+ +
+
+
+
+
+
+

We'll create a new column called x in temp_df called x with the values as 0 or 1 depending on the feed type of the row.

+

We then make another new column called y that's just a copy of the response column

+ +
+
+
+
+
+
In [ ]:
+
+
+
#function for returning p-value for t-test
+def ttest (data, group1, group2, x, y):
+    #define a temporary slice of the data
+    temp_df = data[(data[x]==group1) | (data[x]==group2)]
+    
+    #Make new column 'x' set as 0 or 1 based on group
+    temp_df["x"] = 0
+    temp_df["x"][temp_df[x] == group2] = 1
+    temp_df["y"] = temp_df[y]
+
+ +
+
+
+ +
+
+
+
+
+
+

We use minimize in the scipy package to calculate the fit for the two models.

+ +
+
+
+
+
+
In [ ]:
+
+
+
#function for returning p-value for t-test
+def ttest (data, group1, group2, x, y):
+    #define a temporary slice of the data
+    temp_df = data[(data[x]==group1) | (data[x]==group2)]
+    
+    #Make new column 'x' set as 0 or 1 based on group
+    temp_df["x"] = 0
+    temp_df["x"][temp_df[x] == group2] = 1
+    temp_df["y"] = temp_df[y]
+    
+    # y = B0 + B1*x + E
+    model = minimize(nllike, [1, 1, 1], method = "Nelder-Mead", options={'disp': True}, args = temp_df)
+    
+    # y = B0 + E
+    null_model = minimize(nllike_null, [1, 1], method = "Nelder-Mead", options={'disp': True}, args = temp_df)
+
+ +
+
+
+ +
+
+
+
+
+
+

To compute the p-value of the t-test

+ +
+
+
+
+
+
In [ ]:
+
+
+
#function for returning p-value for t-test
+def ttest (data, group1, group2, x, y):
+    #define a temporary slice of the data
+    temp_df = data[(data[x]==group1) | (data[x]==group2)]
+    
+    #Make new column 'x' set as 0 or 1 based on group
+    temp_df["x"] = 0
+    temp_df["x"][temp_df[x] == group2] = 1
+    temp_df["y"] = temp_df[y]
+    
+    # y = B0 + B1*x + E
+    model = minimize(nllike, [1, 1, 1], method = "Nelder-Mead", options={'disp': True}, args = temp_df)
+    
+    # y = B0 + E
+    null_model = minimize(nllike_null, [1, 1], method = "Nelder-Mead", options={'disp': True}, args = temp_df)
+    
+    #Get differences in fit
+    D = (null_model.fun - model.fun) * 2
+    
+    #Use chi3.sf() for returning p-value
+    p = scipy.stats.chi2.sf(D,1)
+
+ +
+
+
+ +
+
+
+
+
+
+

Finally, we display the output in a nicely formated way.

+ +
+
+
+
+
+
In [ ]:
+
+
+
#function for returning p-value for t-test
+def ttest (data, group1, group2, x, y):
+    #define a temporary slice of the data
+    temp_df = data[(data[x]==group1) | (data[x]==group2)]
+    
+    #Make new column 'x' set as 0 or 1 based on group
+    temp_df["x"] = 0
+    temp_df["x"][temp_df[x] == group2] = 1
+    temp_df["y"] = temp_df[y]
+    
+    # y = B0 + B1*x + E
+    model = minimize(nllike, [1, 1, 1], method = "Nelder-Mead", options={'disp': True}, args = temp_df)
+    
+    # y = B0 + E
+    null_model = minimize(nllike_null, [1, 1], method = "Nelder-Mead", options={'disp': True}, args = temp_df)
+    
+    #Get differences in fit
+    D = (null_model.fun - model.fun) * 2
+    
+    #Use chi3.sf() for returning p-value
+    p = scipy.stats.chi2.sf(D,1)
+    
+    #Print results
+    print ("-----------------------------")
+    print (group1 + " vs. " + group2)
+    print ("p-value = " + str(p))
+    if p <= 0.05:
+        print ("Significance")
+    else:
+        print ("No significance")
+    print ("-----------------------------")
+
+ +
+
+
+ +
+
+
+
+
+
+

Step 4: Run the t-test function!

+
+
+
+
+
+
In [ ]:
+
+
+
#Perform t-tests
+ttest(data, group1="horsebean", group2="casein", x="feed", y="weight")
+
+ +
+
+
+ +
+
+
+
+
+
+

Completed code (again)

+
+
+
+
+
+
In [ ]:
+
+
+
import pandas
+import numpy
+import scipy.stats
+from scipy.optimize import minimize
+from plotnine import *
+
+data = pandas.read_csv("chickwts.txt")
+
+ggplot(data=data) + geom_bar(aes(x='factor(feed)', y='weight'), stat = "summary", fun_y = numpy.mean) + theme_classic() + xlab("feed type") + ylab("mean weight")
+
+#function for returning negative log likelihood for t-test model
+def nllike (p, obs):
+    B0 = p[0]
+    B1 = p[1]
+    sigma = p[2]
+    expected = B0 + B1 * obs.x
+    nll = -1 * scipy.stats.norm(expected, sigma).logpdf(obs.y).sum()
+    return nll
+
+#function for returning negative log likelihood for null model
+def nllike_null (p, obs):
+    B0 = p[0]
+    sigma = p[1]
+    expected = B0
+    nll = -1 * scipy.stats.norm(expected, sigma).logpdf(obs.y).sum()
+    return nll
+
+#function for returning p-value for t-test
+def ttest (data, group1, group2, x, y):
+    #define a temporary slice of the data
+    temp_df = data[(data[x]==group1) | (data[x]==group2)]
+    
+    #Make new column 'x' set as 0 or 1 based on group
+    temp_df["x"] = 0
+    temp_df["x"][temp_df[x] == group2] = 1
+    temp_df["y"] = temp_df[y]
+    
+    # y = B0 + B1*x + E
+    model = minimize(nllike, [1, 1, 1], method = "Nelder-Mead", options={'disp': True}, args = temp_df)
+    
+    # y = B0 + E
+    null_model = minimize(nllike_null, [1, 1], method = "Nelder-Mead", options={'disp': True}, args = temp_df)
+    
+    #Get differences in fit
+    D = (null_model.fun - model.fun) * 2
+    
+    #Use chi3.sf() for returning p-value
+    p = scipy.stats.chi2.sf(D,1)
+    
+    #Print results
+    print ("-----------------------------")
+    print (group1 + " vs. " + group2)
+    print ("p-value = " + str(p))
+    if p <= 0.05:
+        print ("Significance")
+    else:
+        print ("No significance")
+    print ("-----------------------------")
+
+#Perform t-tests
+ttest(data, group1="horsebean", group2="casein", x="feed", y="weight")
+
+ +
+
+
+ +
+
+
+ + + + + + From ea0b2ee4fc8417a43dcf0b6e687d8a176650bc3b Mon Sep 17 00:00:00 2001 From: Aaron Long Date: Mon, 20 Nov 2017 15:30:03 -0500 Subject: [PATCH 2/2] Finished exercise 2, the one with the regex --- Regular+Expressions+Practice.html | 11953 +++++++++++++++++++++++++++ Regular+Expressions+Practice.ipynb | 145 + 2 files changed, 12098 insertions(+) create mode 100755 Regular+Expressions+Practice.html create mode 100755 Regular+Expressions+Practice.ipynb diff --git a/Regular+Expressions+Practice.html b/Regular+Expressions+Practice.html new file mode 100755 index 0000000..6e13b35 --- /dev/null +++ b/Regular+Expressions+Practice.html @@ -0,0 +1,11953 @@ + + + +Regular Expressions Practice + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+
+

a.) Military times after noon

+ +
+
+
+
+
+
In [1]:
+
+
+
import numpy
+import pandas
+import re
+
+Times=['15:30', '12:00', '2:00', '21:54', '01:69', '01:20', '6:00', '30:00', '14:50','12:01']
+
+strtimes=' '.join(Times)
+
+MT=re.compile('1[0-9]:[0-5][0-9]|2[0-3]:[0-5][0-9]')
+
+militarytimes=re.findall(MT,strtimes)
+print(militarytimes)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
['15:30', '12:00', '21:54', '14:50', '12:01']
+
+
+
+ +
+
+ +
+
+
+
+
+

The regex here is defined as the variable 'MT'. The first half (before the |) describes the situation in which the time begins with a 1 while the second half describes the situation in which the time begins with a 2. Combining these two halfs lets the regex search for all times that include and come after 12:00 without matching any fake times or times before noon.

+ +
+
+
+
+
+
+
+

b.) Correctly formatted genus and species

+ +
+
+
+
+
+
In [2]:
+
+
+
Species=['H. Sapien', 'Homo sapien', 'H. sapien', 'D. melanogaster', 'D. Melanogaster', 'Drosophila melanogaster']
+
+strspecies=' '.join(Species)
+
+sregex=re.compile('[A-Z]\. [a-z]+')
+
+slist=re.findall(sregex,strspecies)
+print(slist)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
['H. sapien', 'D. melanogaster']
+
+
+
+ +
+
+ +
+
+
+
+
+

The regex, sregex, utilizes the escape character to ensure it searches for a period after the capital letter. The plus next to the "all lowercase letters" set ensures that the regex will find any species names (correctly written in all lowercase) no matter how long.

+ +
+
+
+
+
+
+
+

c.) Social Security Number

+ +
+
+
+
+
+
In [3]:
+
+
+
socialsecurity=['555-55-5555', '123-45-6789', '55555-55555', '1234567890']
+
+strssn=' '.join(socialsecurity)
+
+ssnregex=re.compile('[0-9]{3}\-[0-9]{2}\-[0-9]{4}')
+
+ssnlist=re.findall(ssnregex,strssn)
+print(ssnlist)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
['555-55-5555', '123-45-6789']
+
+
+
+ +
+
+ +
+
+
+
+
+

The curly brackets in the regex, ssnregex, tell the regex to search for 3 digits in a row, then 2 digits in a row, and finally 4 digits in a row. The escape character forces the regex to search for litteral hyphens instead of some other operator.

+ +
+
+
+
+
+ + + + + + diff --git a/Regular+Expressions+Practice.ipynb b/Regular+Expressions+Practice.ipynb new file mode 100755 index 0000000..de6c1fc --- /dev/null +++ b/Regular+Expressions+Practice.ipynb @@ -0,0 +1,145 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "**a.) Military times after noon**" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['15:30', '12:00', '21:54', '14:50', '12:01']\n" + ] + } + ], + "source": [ + "import numpy\n", + "import pandas\n", + "import re\n", + "\n", + "Times=['15:30', '12:00', '2:00', '21:54', '01:69', '01:20', '6:00', '30:00', '14:50','12:01']\n", + "\n", + "strtimes=' '.join(Times)\n", + "\n", + "MT=re.compile('1[0-9]:[0-5][0-9]|2[0-3]:[0-5][0-9]')\n", + "\n", + "militarytimes=re.findall(MT,strtimes)\n", + "print(militarytimes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The regex here is defined as the variable 'MT'. The first half (before the |) describes the situation in which the time begins with a 1 while the second half describes the situation in which the time begins with a 2. Combining these two halfs lets the regex search for all times that include and come after 12:00 without matching any fake times or times before noon." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**b.) Correctly formatted genus and species**" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['H. sapien', 'D. melanogaster']\n" + ] + } + ], + "source": [ + "Species=['H. Sapien', 'Homo sapien', 'H. sapien', 'D. melanogaster', 'D. Melanogaster', 'Drosophila melanogaster']\n", + "\n", + "strspecies=' '.join(Species)\n", + "\n", + "sregex=re.compile('[A-Z]\\. [a-z]+')\n", + "\n", + "slist=re.findall(sregex,strspecies)\n", + "print(slist)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The regex, sregex, utilizes the escape character to ensure it searches for a period after the capital letter. The plus next to the \"all lowercase letters\" set ensures that the regex will find any species names (correctly written in all lowercase) no matter how long." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**c.) Social Security Number**" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['555-55-5555', '123-45-6789']\n" + ] + } + ], + "source": [ + "socialsecurity=['555-55-5555', '123-45-6789', '55555-55555', '1234567890']\n", + "\n", + "strssn=' '.join(socialsecurity)\n", + "\n", + "ssnregex=re.compile('[0-9]{3}\\-[0-9]{2}\\-[0-9]{4}')\n", + "\n", + "ssnlist=re.findall(ssnregex,strssn)\n", + "print(ssnlist)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The curly brackets in the regex, ssnregex, tell the regex to search for 3 digits in a row, then 2 digits in a row, and finally 4 digits in a row. The escape character forces the regex to search for litteral hyphens instead of some other operator." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}