diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/exercise+12A+dan+edits.html b/exercise+12A+dan+edits.html new file mode 100755 index 0000000..f941745 --- /dev/null +++ b/exercise+12A+dan+edits.html @@ -0,0 +1,12445 @@ + + + +exercise 12A dan edits + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+
+
+

The Dataset

    +
  • Column 1 is weight
  • +
  • Column 2 is feed type
  • +
+

The data looks like this:

+
+
+
+
+
+
In [3]:
+
+
+
import pandas
+chicks = pandas.read_csv("chickwts.txt")
+import plotnine
+from plotnine import *
+p=(ggplot(data=chicks) +
+    aes(x="feed",y="weight") +
+    geom_point(color="blue") +
+    theme_classic())
+print p
+
+ +
+
+
+ +
+
+ + +
+
+ + + +
+ +
+ +
+ +
+
+ +
+
<ggplot: (16069457)>
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Is chick weight different when they're fed soybean feed vs sunflower feed?

Hypotheses:

    +
  • Null hypothesis: There's no difference
  • +
+ +
+
+
+
+
+
In [36]:
+
+
+
def null(p,obs):
+    B0=p[0]
+    sigma=p[1]
+    
+    expected=B0
+    nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.weight).sum()
+    return nll
+
+ +
+
+
+ +
+
+
+
+
+
+
    +
  • Alternate hypothesis:
  • +
+ +
+
+
+
+
+
In [37]:
+
+
+
def alter(p,obs):
+    B0=p[0]
+    B1=p[1]
+    sigma=p[2]
+    
+    expected=B0+B1*obs.x # error?
+    nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.weight).sum()
+    return nll
+
+ +
+
+
+ +
+
+
+
+
+
+

Then we have to subset the data to only include chicks fed soybeans or sunflowers:

Note: Must change feed names to integers so that numpy can understand

    +
  • soybean=0
  • +
  • sunflower=1
  • +
+ +
+
+
+
+
+
In [38]:
+
+
+
chicks = pandas.read_csv("chickwts.txt")
+
+#fixed subset
+subset1 = chicks.loc[chicks['feed'].isin(['sunflower','soybean'])]
+subset1['x'] = [0 if ele  == "soybean" else 1 for ele in subset1["feed"]]
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
C:\Users\NicCage\Anaconda2\lib\site-packages\ipykernel_launcher.py:5: SettingWithCopyWarning: 
+A value is trying to be set on a copy of a slice from a DataFrame.
+Try using .loc[row_indexer,col_indexer] = value instead
+
+See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
+  """
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Let's test it using a likelihood ratio test!

First import your packages

+
+
+
+
+
+
In [39]:
+
+
+
import numpy
+import scipy
+from scipy import optimize
+from scipy import stats
+from scipy.stats import norm
+
+ +
+
+
+ +
+
+
+
+
+
+

The fit

+
+
+
+
+
+
In [40]:
+
+
+
initialGuess=numpy.array([1,1])
+alterGuess=numpy.array([1,1,1])
+fitNull=scipy.optimize.minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1)
+fitAlter=scipy.optimize.minimize(alter,alterGuess,method="Nelder-Mead",options={'disp':True},args=subset1)
+print(fitNull)
+print(fitAlter)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
Optimization terminated successfully.
+         Current function value: 145.240592
+         Iterations: 85
+         Function evaluations: 162
+Optimization terminated successfully.
+         Current function value: 138.469162
+         Iterations: 200
+         Function evaluations: 363
+ final_simplex: (array([[ 284.49999051,   64.53691755],
+       [ 284.49994818,   64.53700563],
+       [ 284.50008783,   64.53694034]]), array([ 145.2405921,  145.2405921,  145.2405921]))
+           fun: 145.24059209701645
+       message: 'Optimization terminated successfully.'
+          nfev: 162
+           nit: 85
+        status: 0
+       success: True
+             x: array([ 284.49999051,   64.53691755])
+ final_simplex: (array([[ 246.42855057,   82.48813575,   49.73948886],
+       [ 246.42852198,   82.48806896,   49.73943687],
+       [ 246.42847238,   82.48817448,   49.73945349],
+       [ 246.42854646,   82.48810237,   49.73950669]]), array([ 138.46916182,  138.46916182,  138.46916182,  138.46916182]))
+           fun: 138.4691618247918
+       message: 'Optimization terminated successfully.'
+          nfev: 363
+           nit: 200
+        status: 0
+       success: True
+             x: array([ 246.42855057,   82.48813575,   49.73948886])
+
+
+
+ +
+
+ +
+
+
+
In [41]:
+
+
+
#### The results
+
+ +
+
+
+ +
+
+
+
In [43]:
+
+
+
D=2*(fitNull.fun-fitAlter.fun)
+print "result"
+print 1-scipy.stats.chi2.cdf(x=D,df=1)
+# sunflower seed sig increases weight
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
result
+0.000233176728695
+
+
+
+ +
+
+ +
+
+
+
In [ ]:
+
+
+
 
+
+ +
+
+
+ +
+
+
+ + + + + + diff --git a/exercise12A.ipynb b/exercise12A.ipynb new file mode 100755 index 0000000..27ef87d --- /dev/null +++ b/exercise12A.ipynb @@ -0,0 +1,288 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "## The Dataset\n", + "* Column 1 is weight\n", + "* Column 2 is feed type\n", + "\n", + "## The data looks like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhoAAAFzCAYAAAB4qqApAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzs3XlYlXX+//Hn4bDIIgkqaC5gjrkyjphmLrlVYzaao34b\nM1ssy8zrUvvZZlOWVuN3tEVSa5wsc1TU76RONWPLmKlpiwppamhuCG4IgqSs53Du3x93gAQUIof7\ncHg9rutcet8HOO/75uY+7/NZ3h+bYRgGIiIiIm7gY3UAIiIi4r2UaIiIiIjbKNEQERERt1GiISIi\nIm6jRENERETcRomGiIiIuI0SDREREXEbJRoiIiLiNko0RERExG2UaIiIiIjb1LtEIzc3l8TERHJz\nc60ORURExOvVu0TjwIEDdO/enQMHDlgdioiIiNerd4mGiIiI1B4lGiIiIuI2SjRERETEbZRoiIiI\niNso0RARERG3UaIhIiIibqNEQ0RERNxGiYaIiIi4jRINERERcRslGiIiIhZwuSA/3+oo3E+JhoiI\nSC0yDJg5EwIDzUeXLvDDD1ZH5T5KNERERGpRXBzMmQOFheb2wYMweDBcvGhtXO6iRENERKQWLV0K\nTmfpttMJJ0/Czp3WxeROSjRERERqkWGU32ezVbzfGyjREBERqUV33w2+vqXbdjtERkKPHtbF5E5K\nNERERGrR9Onw6KNmggHQti189hk0bGhtXO6iRENERKQW+fjA3Lnm1NbsbHMwaMeOVkflPko0RERE\nLODrC6GhVkfhfko0RERExG2UaIiIiIjbKNEQERERt1GiISIiIm6jRENERETcRomGiIiIuI0SDRER\nEXEbJRoiIiLiNko06oCMDDhyBBwOqyMRERG5PEo0PJjDAffdB02bwm9+Ay1bwo4dVkclIiJSdUo0\nPNjs2RAfX7qdng5DhkBWlnUxiYiIXA7fX/+S2vPjjz8yadIkmjdvzssvvwzA8ePHWbBgAcnJyURG\nRvLQQw/RtWvXku/Zvn07y5YtIzMzkw4dOjBlyhQiIiKsOoQatX592e4SwzAX4ElIgJtusi4uERGR\nqvKoFo2lS5fSqlWrkm2n08kLL7xAz549WbVqFWPGjGHOnDmcP38egNTUVOLi4pg0aRIrV64kOjqa\nuXPnWhV+jQsIKL/PMMDfv/ZjERERqQ6PSTT27dvHqVOnuOmSj+p79+6loKCA0aNH4+fnR79+/YiK\nimL79u0AbN68mdjYWLp160ZAQABjx44lOTmZlJQUqw6jRk2ebC4nXMzPD9q1g549rYtJRETkcnhE\nouFwOFi8eDEPP/wwNputZH9KSgrR0dH4XPJu26ZNG44fPw6Y3Spt2rQpeS4oKIhmzZqVPF/s9OnT\nJCYmkpiYSFJSkpuPpuaMHw+vvw6RkRAUBDfeCJs2QYMGVkcmIiJSNR4xRmPt2rV07dqVNm3acPTo\n0ZL9eXl5BAcHl/na4OBgzp49C0B+fn6554OCgsjLyyuzb/HixcyaNctN0buPzWa2akyebHUkIiIi\n1WN5i8apU6f47LPPGDt2bLnnAgMDycnJKbMvJyeHwMBAABo0aEBubm6lzxebOHEiCQkJJCQksGLF\niho+AhEREamM5S0aSUlJZGVl8fDDDwNQWFhIYWEh99xzD4888gjHjx/H5XKVdJ8cPXqU/v37AxAV\nFVWmBSQ3N5e0tDSioqLKvEbz5s1p3rx5LR2RiIiIFLM80ejbty+xsbEl21988QWff/45M2fOpGHD\nhvj7+7Nu3Tpuv/12vvnmG1JSUujTpw8AAwYMYPr06ezevZtOnToRHx9PdHQ0rVu3tupwRERE5BKW\nJxoBAQEEXDKPMzg4GLvdTlhYGADPPPMMCxcuZPXq1URERDBjxgwaNWoEQKtWrZgyZQqLFi0iKyuL\n9u3b88QTT1hyHCIiIlWVmQlr1sD589C7N/zUUO+VbIZhGFYHUZsSExPp3r07CQkJZVpSREREakNq\nqlmmIDPTLGFQUABz5sCTT1odmXtYPhhUfllCAowbB7feCn/9KzidVkckIiJXYupUOHcOCgshP98s\nxDhjBhw+bHVk7mF514lU7quvzNoZLpf5+Owzc1G1994zp76KiEjds29f+dW4fXzg0CFzAU1voxYN\nD/b006VJBpgX5rp18O231sYlIiLVFx0NdnvZfUVF5grd3kiJhgc7dao0yShmt8NP9cpERKQOeuUV\nc80qPz+zddrXFx58EGJirI7MPdR14sGuuw6Sk81+vEt17GhJOCIiUgNiYmDPHli8GLKyoG9fuO8+\nq6NyHyUaHiwuDnbuhGPHzIy3sBD+9jf4WT0yERGpY9q1g5dftjqK2qFEw4M1aQK7d8NHH5lzra+/\nHrp0sToqERGRqlOi4eGCgmDUKKujEBERqR4NBhURERG3UaIhIvILNmwwS0R37AiTJsHFi1ZHJFK3\nKNHwcEuWQKtW0KgRDB0KaWlWRyRSf2zYAMOGmcXzDhyAd94xq/QWFVkdmUjdoUTDgy1fDhMnwokT\nkJ0NGzfCoEFmXXwRcb8XXyxby6awELZtg127rItJpK5RouHB4uLK3uQcDkhKMqe8ioj7ZWWV32e3\nm7PARKRqlGh4sLy88vtsNnMRHhFxvwEDzAqOl/Lzg65dLQlHpE5SouHBhg83b2rFbDYICQGtbi9S\nO+bNM5fzBrMlo0EDWLMGmjWzNi6RukR1NDzYrFlmVdA1a8ztsDD44AMID7c2LpH6IiQENm82x2Sc\nP2+2ZCjJELk8SjQ8mL8/rF5tLsCTnQ3XXGN+ohKR2mO3m1V5RaR6lGjUAS1amA8REZG6RmM0RERE\nxG2UaNQBGRlw5Ig5vVVERKQuUaLhwRwOuO8+aNoUfvMbaNkSduywOioREZGqU6LhwWbPhvj40u30\ndBgypOIiQiIiIp5IiYYHW7++bHeJYZizTxISrItJRETkcijR8GABAeX3GUb5SoUiIiKeSomGB5s8\nGXwu+Q35+UG7dqWVCkVERDydEg0PNn48vP46REZCUBDceCNs2qSiXSIiUneoYJcHs9nMVo3Jk62O\nREREpHrUolEHqI6GiIjUVUo0PJjqaIiISF2nRMODqY6GiIjUdUo0PJjqaIiISF2nRMODqY6GiIjU\ndUo0PJjqaIiISF2nRMODqY6GiIjUdUo0PJjNZnaf+PmZD39/8FXlExERqUOUaHiw5cth4kQ4ccIc\nBLpxIwwaBAUFVkcmIiJSNUo0PFhcHLhcpdsOByQlwc6d1sUkIiJyOZRoeLC8vPL7bDbIz6/9WERE\nRKpDiYYHGz7cHJtRzGaDkBCIjbUuJhERkcuhRMODzZoFI0eWboeFwYYNEB5uXUwiIiKXQ4mGB/P3\nh9WrzcGg+/fDyZPQp4/VUYmIiFSdEo06oEUL6NRJ9TNErLBhA/TuDR07wqRJcPGi1RGJ1C2qyiAi\nUokNG2DYsNLZX0ePwr59sHkz2O2WhiZSZ6hFQ0SkEi++WHaKeWEhbNsGu3ZZF5N4j4wMOHKk7OKZ\n3kiJhohIJbKyyu+z2+H8+dqPRbyHwwH33gtNm8JvfgNXXw07dlgdlfso0RARqcSAAeVXS/bzg65d\nLQlHvMSzz5qVn4tlZMDgwRUntt5AiYaISCXmzStdLdluNwdkr1kDzZpZG5fUbYsXg2GU3XfxornM\nhDfSYFARkUqEhJgDP3ftMrtLunZVkiFX7scfK96/Zw/8z//Ubiy1QYmGiMgvsNvh+uutjkK8iY9P\n2UHGxUJDaz+W2qCuExERkVrUrl3F+/v3r904aosSDRERkVq0eLG5dtWlBgwoHQ/kbZRo1AH1Za61\niEh9kJxcNtHw8YG0NLNOizdSouHBHA64777SudYtW3r3XGsRkfogLq7sGA2XCw4cgJ07rYvJnZRo\neLDZsyE+vnQ7PR2GDPHeudYiIvVBXl75fTYb5OfXfiy1QYmGB1u/vmx3iWFAdjYkJFgXk4hIZTIz\n4c03Yc4c2LLF6mg81/DhZuG3YjabOZU6Nta6mNxJ01s9WEBA+X2GUb5SoYiI1VJTzcGMmZnmmIOC\nAjPhePJJqyPzPLNmwbFjZvE3gLAw+OADCA+3Ni53UYuGB5s82fyDLebnZ06L8taRySJSd02dag5c\nLyw0uwAMA2bMgMOHrY7M8/j7w+rVcOIE7N8PJ09Cnz5WR+U+SjQ82Pjx8PrrEBkJQUFw442waZNZ\nBllExJPs2gVOZ/n9P/xQ+7HUFS1aQKdO3n9PV6LhwWw2s1XjzBnIyTHr4LdoYXVUIvXLkiXQqhU0\nagRDh5rTEKW8ixfL7zMMuHCh9mMRz6JEQ0SkEsuXw8SJZhN3draZ7A8aZI4/kLIqK5991VW1G4d4\nHiUaIiKV+Hm9A4cDkpK8t97BlejTp+xMCjDHmHXsaE08ns7lgt27zUX7MjKsjsa9lGiIiFSivtU7\nuBJxcRAdDb6+5pgDHx+z1HZUlNWReZ7cXLj5ZujWzWwha90aPvrI6qjcR9NbRUQqMXw4HDpUWs/G\n2+sdXIkmTcxP6B99BOfPmyvedulidVSe6ckn4YsvzP8bhpnQjhxpliaPjLQ0NLdQoiEiUon6Vu/g\nSgUFwahRVkfh+T7+uPzaVQUFsGcP3HKLNTG5k7pOPFxCAowbB7feCn/9a8XTx0TEPepbvQOpHefO\nld9XXPnZG6lFw4N99ZVZO8PlMh+ffWYuqvbee+WXGBYR92nRQlPLpeb4VPIR31vraahFw4M9/XRp\nkgFmU9u6dfDtt9bGJVLfuFwaAFoVqjlSNZfOZLpUbm7txlFblGh4sFOnyl+QdjucPWtNPCL1jWHA\nzJkQGGg+unRRpcvKqOZI1VVUc8Rm896aI0o0PNh111W8gJrmpYvUjrg4c2GwwkJz++BBGDy44iqY\n9Z1qjlRdRTVHbDbvvbcr0fBgcXHmHPRL56X/7W+aly5SW5YuLTsA2+k0B4TqzbM81RypuvpWc0SD\nQT2Y5qWLWMswyu+z2SreX9+p5kjV1bd7uxIND6d56SLWuftuc1B2cauG3Q4REdCjh7VxeSLVHLk8\n9enerq4TkXpI9VmqZvp0ePRRM8EAaNvWnGbesKG1cXki1RyRytgMo341AiYmJtK9e3cSEhKIVZue\n1EM/r8/i5wfDhqk+yy9xOs2ph5WtUCoilVOLhkg9o/osl8/XV0mGSHUp0RCpZ1SfRURqkxINkXpG\n9VlEpDYp0RCpZ1Sf5fLk58OHH8KKFWbBLhG5PJreKlLP1Lc5/FciMxP69TPLjtvt5niWd96Be++1\nOjKRusMjEo2FCxeya9cu8vLyaNiwIbfccgt33HEHAMePH2fBggUkJycTGRnJQw89RNeuXUu+d/v2\n7SxbtozMzEw6dOjAlClTiIiIsOpQROqE+jSH/0pMmwaHD5uzToqnAD/wgLmGR6tW1sYmUld4RNfJ\n8OHDefPNN1mzZg1z5sxhy5YtbNu2DafTyQsvvEDPnj1ZtWoVY8aMYc6cOZw/fx6A1NRU4uLimDRp\nEitXriQ6Opq5c+dafDQi4i2++aZ0nZNL7d9f+7GI1FUekWi0bt2awMDAkm2bzcapU6fYu3cvBQUF\njB49Gj8/P/r160dUVBTbt28HYPPmzcTGxtKtWzcCAgIYO3YsycnJpKSkWHUoIuJFIiPL1xYpKoKm\nTa2JR6Qu8oiuE4Bly5bx73//m4KCAiIiIhg4cCBffvkl0dHR+PiU5kNt2rTh+PHjgNmt0q5du5Ln\ngoKCaNasGcePH6d169Yl+0+fPs3p06cBSEpKqqUjEpG67qWXYOBAc22T4uJmv/+91u+QmuFymS1m\nDRpYHYl7eUyice+993LPPfdw+PBhvv76a4KDg8nLyyM4OLjM1wUHB3P2pwn/+fn55Z4PCgoi72fL\nCC5evJhZs2a59wBExOv06wfbtsHLL0NGhjk246mnVEFVroxhwHPPmeX/Cwuhc2ezaN6111odmXt4\nTKIBZpdJu3btSEhIYNWqVTRp0oScnJwyX5OTk1PSzdKgQQNyc3Mrfb7YxIkTGT58OGC2aIwbN86N\nRyEi3qRXL7M8u0hNiYuDOXNKBxgfOACDB0NSkrnirbfxiDEaP+dyuTh9+jStW7fm+PHjuC4pY3j0\n6FGifprwHxUVxdGjR0uey83NJS0treT5Ys2bNyc2NpbY2Fg6qiqRiIhY6O23yy5kWFRkLkK3c6d1\nMbmT5YnGxYsX+fzzz8nNzcXlcvH999/z0Ucf8bvf/Y6YmBj8/f1Zt24dDoeDbdu2kZKSQp+flgQc\nMGAAiYmJ7N69m8LCQuLj44mOji4zPkNERMST/DRksAzDMOu2eCOP6DrZuHEjf//733G5XISHhzNi\nxAhuu+02bDYbzzzzDAsXLmT16tVEREQwY8YMGjVqBECrVq2YMmUKixYtIisri/bt2/PEE09YfDQi\nIiKV86nkI77dXrtx1BYtEy8iIlKLevUya7T83Jdfwg031H487latrpOtW7dy8eLFCp+7ePEiW7du\nvaKgREREvNWMGWVbNfz9oWdP6NHDupjcqVqJxsCBA/n+++8rfO7gwYMMHDjwioISERHxVrffDmvX\nmvVY2rSBu+6CTz81Fzr0RtU6rF/qbaloeqmIiIiUGjHCfNQHVU40vv76a7788suS7fj4eLZt21bm\na/Lz83n//fc1hVRERESAy0g0Pvnkk5Lqmjabjddff73c1/j5+dGxY0feeOONmotQRERE6qwqj9F4\n7rnncLlcuFwuDMPg66+/LtkufhQUFLB792569+7tzphFRESkjqjWGI1LK3WKex07Bm+9BdnZ0Ls3\njB2rdRZERKTuqPYY16KiIr755htOnDhBfn5+uefvueeeKwpM4Pvv4frroaDAXOVv8WL46itYuNDq\nyERERKqmWolGYmIiI0eOJDU1tcIZKDabTYlGDZg+HfLyzDr4xRYtgkmTzNX+REREPF21Eo1JkyZx\n1VVXsWzZMjp16oS/v39NxyXAkSNlkwwwS9SmpCjREBGRuqFaicb+/fv55z//Sf/+/Ws6HrlEp07m\nGI2fr/LXtq11MYmIiFyOalUGvfbaa/nxxx9rOhb5mfnzISwMAgLMh48PzJoF115rdWQiIiJVU60W\njddee42pU6fStWtXOnToUNMxyU+io2HfPoiPh/PnoU8fuPlmq6MSERGpuionGjExMdgumVd5+vRp\nunTpwtVXX12ybHsxm83Gnj17ai7KeiwiAqZNszoKERGR6qlyotG9e/cyiYaIiIjIr6lyovHuu++6\nMQwRERHxRtUaDCoiIiJSFdUaDHr//fdX+pyPjw9XXXUV3bp1Y+TIkQQFBVU7OBFxjyVLzBlMFy6Y\npe2XLoXISKujkrpO15VUxGZUVNrzV3Tr1o1Tp06Rnp5OeHg4ERERnD17lszMTJo2bUpwcDApKSm0\naNGCTZs20daDCj8kJibSvXt3EhISiI2NtTockVq3fDncd59Z1h7Azw/atYPERHMatUh16LqSylSr\n62TevHmEhobyxRdfkJGRwffff09GRgZbtmwhNDSURYsWkZSUREBAAE888URNxywiVyAurvTNAMDh\ngKQk2LnTupik7tN1JZWpVqLx2GOP8fzzz9OnT58y+/v168fMmTN5/PHHadeuHTNmzODzzz+vkUBF\npGbk5ZXfZ7NBBWsjilSZriupTLUSjYMHD5arnVEsLCyMI0eOANC2bVvyKrr6RMQyw4ebzdrFbDYI\nCQH1JMqVqKiYoL+/riupZqLRoUMHXn75ZXJzc8vsz8nJYd68eXTq1AmAU6dOEamRQCIeZdYsGDmy\ndDssDDZsgPBw62KSui8jw1wm4VKFhZCTY0084jmqNetkwYIF3HrrrbRs2ZKBAwfStGlT0tPT2bRp\nE06nk48//hiA7777jtGjR9dowCJyZfz9YfVqeOUVyM6Ga66BBg2sjkrqup07y47RALO1bP9+aNXK\nmpjEM1SrRaNv374cOnSICRMmkJWVxZYtW8jKyuKhhx7i0KFDJWM3/vKXv/Dyyy/XaMAiUjNatDBX\nCFaS8cs2bDCnanbsCJMmwcWLVkfkmSIjzcTiUkVF0LSpNfGI56hWiwZAs2bNmDt3bk3GIiLiUTZs\ngGHDSj+pHz1qLnS4eTPY7ZaG5nFeegkGDgTDMM+Xnx/8/vcaoyGqDFonZGTAkSPmdDGRmqLr6te9\n+GLZ7oDCQti2DXbtsi4mT9Wvn3lu/vhH6N8fnnkG1q4t38oh9U+VWzR++9vfEh8fT5cuXcqt5Ppz\nWr21Zjgc8OCDsGyZuR0RAR9+CD17WhuX1G26rqouK6v8Prsdzp+v/Vjqgl694L33rI5CPM1lrd4a\nHBxc8n+t5Op+s2dDfHzpdno6DBlifgoNC7MuLqnbdF1V3YAB5Vt9/Pyga1fLQhKpc6pVgrwuq0sl\nyLt0MUdsX8rHBz75BG66yZqYpO7TdVV1KSnQvn3ZolNTp8L8+dbFJFLXXPEYDcMwOHXqFE6nsybi\nkUtUtD6AYZjTE0Wqq6LGSJdL11VFHn20fGXL11+H1FRr4hGpi6qdaHzyySf06tWLBg0a0KpVK777\n7jsAHnroIVauXFljAdZnkyeXLYBTvEiR+tLlSlSWUDRuXLtx1AWfflp+n2HAf/9b+7GI1FXVSjRW\nrVrF0KFDadOmDW+88QaX9r60bduWpUuX1liA9dn48eanp8hICAqCG2+ETZtU90CuzIUL5ffZ7WY3\ngZRVWUOtammIVF21Eo0XXniBadOmsWrVKu67774yz3Xu3Jl9+/bVRGz1ns1mtmqcOWOW8d240Syy\nJHIloqPL14AoKoKWLS0Jx6N16VLx/gEDajUMkTqtWonG0aNHGTp0aIXPBQcHk52dfUVBiYj7vPKK\n2X3i52cms76+5nTXmBirI/M877xjjpUqHtdis8H998Nvf2ttXCJ1SbUqgzZr1owDBw4wePDgcs99\n9913REVFXXFgIuIeMTGwZw8sXmzWiejbF37WMCk/iYmBvXt1rqrK5YLvvjPrjHTpAk2aWB2ReIJq\nJRpjx47l+eefp0OHDgz4qQ3RZrOxb98+5s6dy6RJk2oyRhGpYe3agZYhqhqdq6rJzTXLtW/aZLb8\nNGhgVga99VarIxOrVSvReP7559m/fz8333wzjX8aqn7rrbeSnp7OH/7wB5566qkaDVJERDzbk0/C\n1q3m/w0D8vJg5EhITjYHtEv9Va1Ew9/fn/fff5/PP/+cTz/9lHPnzhEeHs5NN93ETar4IyJS73z4\nYflZOvn5ZjfdLbdYE5N4hmolGsOGDaN///7069ePF198EbuWMRQRqddOnap4/8GDSjTqu2rNOgkJ\nCWH+/PnccMMNNGrUiJtvvpnZs2ezefNmCgoKajpGERHxcJeucnupc+dqNw7xPNUu2HXixAl++OEH\n4uLiaNmyJcuWLWPw4MFcddVV9OvXr6bjFJEatGQJtGoFjRrB0KGQlmZ1RFLXXXVVxfs1bVpqZFG1\nI0eOsHnzZlauXMnmzZux2WwUFRXVRHw1ri4tqibiDsuXm1M0iz+BFpe2T0yseH0dgYwMyM6G1q3N\n8yXlrV0Lo0eX3demDRw9ak084jmq1aKRlJTE4sWLGTt2LC1btqR9+/a8/vrrdO7cmTVr1nCqss46\nEbFcXFzZZm6HA5KSYOdO62LyVA6HmZQ1bQq/+Y1ZPXXHDquj8kypqWXXZrLZoKBA5dqlmoNBO3fu\nTGBgIPfeey9vvvkmffv2JSwsrKZjExE3yMsrv89mK79KqcDs2RAfX7qdng5DhsCRI6BbXllLl5ZN\nYA0DTp82E9iBA62LS6xXrRaN2267jYCAAN5++23++te/Mm/ePD755BMuKnUV8XjDh5dt/rfZICQE\n1JNY3vr1ZqtGMcMwu1ASEqyLyVNV1Alvs1W8X+qXaiUaH374IefOnWPHjh386U9/4tChQ9x7772E\nh4fTo0cPHnvssZqOU0RqyKxZZiGlYmFhsGEDhIdbF5OnqmjMimGYa8VIWXffba6bU8xuNwt19ehh\nXUziGWpkMOjp06fZsmULixcvZsuWLRoMKlIHnDxpfjq/5hqzXLSU98475oJzlw6cbdPGLEKlc1aW\nywVPPQWvvmquBnzttfCvf0HHjlZHJlar1hiNY8eOsXXr1pLH0aNH8fPzo1u3bjz++OP079+/puMU\nkRrWooX5kMqNH2+OaXnhBbhwAW64AZYtU5JRER8fmDsX/vIXc92T0FCrIxJPUa0WDR8fHxo0aEDP\nnj258cYb6d+/PzfccANBQUHuiLFGqUVDRESk9lSrRWPr1q307NkTf3VUioiIyC+oVqLRt2/fmo5D\nREREvFC1Zp2IiIiIVEW1WjRERER+LjMT1qyB8+ehd2/QvAABJRoi9dKxY/DWW+b01t69YexYs7iS\nSHWlpkLPnmay4eNjlh+fMweefNLqyMRqSjRE6pnvv4frrzffCFwuWLwYvvoKFi60OjKpy6ZONZeE\nv7SS6owZMGqUuU6M1F8aoyFeY8MG89N5x44waZIWc6rM9OlmbQiHwyysVFQEixbB/v1WRyZ12b59\nZZMMMFs2Dh2yJh7xHEo0xCts2ADDhpmfzA8cMCs63nqr+SYqZR05Uv682O2QkmJNPOIdoqPN6+hS\nRUXmirdS3pIl0KoVNGoEQ4dCWprVEbmPEg3xCi++WHblyMJC2LYNdu2yLiZP1alT2TUpwHxDaNvW\nmnjEO7zyirkGjJ+fOd7H19cs3x4TY3Vknmf5cpg4EU6cMMdJbdwIgwaZ3ZneSGM0xCtkZZXfZ7eb\no9+lrPnz4csv4ccfzW2HA557zlybQqS6YmLMNWAWLzb/Hvv2hfvuszoqzxQXV/aDkcMBSUmwc6d5\n3ryNEg0Pl58P//2vmfX26AHt21sdkWcaMACOHjVbMor5+UHXrpaF5LGio83+9Ph4MxHr0wduvtnq\nqMQbtGsHL79sdRSeLy+v/D6bzbzfeyMlGh4sMxP69YMffjA/nTsc5tiDe++1OjLPM2+e+ea5bZt5\nrvz8zPn8zZpZHZlnioiAadOsjkKkfho+3BwkWzx41maDkBDw1uW3NEbDg02bBocPg9NZOhXxgQfM\n+epSVkhHnR+xAAAgAElEQVQIbN4MX38N//mPWSdi+HCroxIRKW/WLBg5snQ7LMwc0B4ebl1M7qQW\nDQ/2zTdluwKK7d9vjlaWsux2sz6EiIgn8/eH1avNAbTZ2XDNNdCggdVRuY9aNDxYZGT5ao1FRdC0\nqTXxiIhIzWnRwpwF5s1JBijR8GgvvWQWvPH56bfk5wd/+IP39uNdqfo0L11EpK5QouHB+vaFe+4B\nwzC3GzUy1w7QmhTl1bd56SIidYUSDQ8WF2e+gRYnGllZZrVLldYu75fmpYuIiHWUaHiwpUvNGSfF\nnE44eVJvnhWpb/PSr1R+Pnz4IaxYAQcPWh2NiHgzzTrxYMUtGZey2SreX9/Vt3npV0L1WUSkNqlF\nw4PdfXfZNSnsdnMmSo8e1sXkqerbvPQrofosIlKblGh4sOnT4dFHS1dEbNsWPvsMGja0Ni5PVDwv\n/cQJs87IyZNmaW0p75fqs4iI1DQlGh7MxwfmzjX707Ozzb70jh2tjsqz1Zd56VdC9VlEpDYp0agD\nfH0hNNTqKMRbqD6LiNQmDQYVqWf69TMXn3v5ZcjIMOuNPPWU6rP8kowMs1WxdWszMRORqlOiIVIP\n9eoF771ndRSez+GABx+EZcvM7YgIc1pwz57WxiVSl6jrRESkErNnQ3x86XZ6OgwZYhbPE5GqUaIh\nUg9lZsKbb5ol7bdssToaz7V+fWltFjBr2GRnQ0KCdTGJ1DWWd504HA7+9re/sWfPHi5cuECTJk24\n44476N+/PwDHjx9nwYIFJCcnExkZyUMPPUTXrl1Lvn/79u0sW7aMzMxMOnTowJQpU4iIiLDqcEQ8\nXmqq2fSfmWkOCC0oMBOOJ5+0OjLPExBQfp9hmNOpRaRqLG/RKCoqIjw8nBdffJHVq1czefJk3nzz\nTQ4cOIDT6eSFF16gZ8+erFq1ijFjxjBnzhzOnz8PQGpqKnFxcUyaNImVK1cSHR3N3LlzLT4iEc82\ndSqcO2fW0sjPN984Z8wwi3hJWZMnl87OAXMgaLt2GqMhcjksTzQaNGjAXXfdRbNmzbDZbHTq1ImO\nHTuSlJTE3r17KSgoYPTo0fj5+dGvXz+ioqLYvn07AJs3byY2NpZu3boREBDA2LFjSU5OJiUlxeKj\nEvFc+/aV7Q4A88300CFr4vFk48fD66+btUeCguDGG2HTJtVpEbkclicaP5efn8/hw4eJiooiJSWF\n6OhofC75SNGmTRuOHz8OmN0qbdq0KXkuKCiIZs2alTwvIuVFR5dWmy1WVAQtW1oSjkez2cxWjTNn\nICcHNm40i8JJxZYsgVatoFEjGDoU0tKsjkg8gUclGi6Xi/nz59OuXTu6detGXl4ewcHBZb4mODiY\nvJ+W6szPzy/3fFBQUMnzxU6fPk1iYiKJiYkkJSW59yBEPNwrr5hjDPz8zDdSX19zCmdMjNWRSV22\nfDlMnGguA5CdbSZlgwaZY4CkfrN8MGgxwzB44403yMzMZNasWdhsNgIDA8nJySnzdTk5OQQGBgJm\nt0tubm6lzxdbvHgxs2bNcu8BiNQRMTGwZw8sXmxO0+zbF+67z+qopK6LizMX6CvmcEBSEuzcaV5j\nUn95RKJhGAZ/+9vfOHbsGC+88EJJotC6dWvWrl2Ly+Uq6T45evRoyYyUqKgojh49WvJzcnNzSUtL\nIyoqqszPnzhxIsOHDwcgKSmJcePG1cZhiXisdu3MyqAiNeVnDcmA2WKWn1/7sYhn8Yiuk8WLF3Pw\n4EFmzZpFUFBQyf6YmBj8/f1Zt24dDoeDbdu2kZKSQp+fluUcMGAAiYmJ7N69m8LCQuLj44mOjqZ1\n69Zlfn7z5s2JjY0lNjaWjlqVTESkxg0fXrY8u80GISFaQ0fAZhiGYWUAZ8+eZcKECfj5+WG/ZITa\n6NGjueOOO0hOTmbhwoUkJycTERHBxIkTy9TR2LZtG8uWLSMrK4v27dszderUX6yjkZiYSPfu3UlI\nSCBWfwEiIjWisBBGjYJ//9vcDg6Gjz9Wt4l4QKJR25RoiIjUvO+/h+uvN7tKisdqTJoECxdaG5dY\nzyO6TkREpG6bPt0cp+F0momGywWLFsH+/VZHJlZToiEiIlfsyBGzHsul7HZQ/URRoiEiIlesUyez\nJsulioqgbVtr4hHPoURDRESu2Pz5EBZmLkQXEGCWtZ81C6691urIxGoeUUdDRETqtuhocx2d+Hg4\nfx769IGbb7Y6KvEESjRERKRGRETAtGlWRyGeRl0nIiIi4jZKNERERMRtlGiIiIiI2yjREKmHliyB\nVq2gUSMYOhTS0qyOSES8lRIN8SoZGWbhIIfD6kg81/LlMHEinDgB2dmwcSMMGgQFBVZHJiLeSImG\neAWHA+67D5o2hd/8Blq2hB07rI7KM8XFla5FAea5S0qCnTuti0lEvJcSDfEKs2eb8/eLpafDkCGQ\nlWVdTJ4qL6/8PpvNXAxLRKSmKdEQr7B+fdnuEsMwuwUSEqyLyVMNHw5+fqXbNhuEhIAWMxYRd1Ci\nIV4hIKD8PsMAf//aj8XTzZoFI0eWboeFwYYNEB5uXUwi4r2UaIhXmDzZXFuhmJ8ftGsHPXtaF5On\n8veH1avNwaD798PJk2a5aBERd1CiIV5h/Hh4/XWIjISgILjxRti0CRo0sDoyz9Wihbnips6RiLiT\n1joRr2Czma0akydbHYlI/eZyQWGhElgppRYNERG5YoYBM2dCYKD56NIFfvjB6qjEEyjREKmHXC7Y\nvRs2bzaLnIlcqbg4mDPHbM0AOHgQBg+GixetjUusp0RDvEZ+Pnz4IaxYYd7kpGK5uXDzzdCtm1kR\ntHVr+Ogjq6OSum7pUnA6S7edTnOgsQrBicZoiFfIzIR+/cymWrvdrKnxzjtw771WR+Z5nnwStm0z\n/28YZgGvkSMhOdkcTCtSHYZRfp/NVvF+qV/UoiFeYdo0OHzY/BRVUGB2DTzwAKSmWh2Z59m4sbR5\nu5jDAXv2WBOPeIe77wbfSz662u1m4tqjh3UxiWdQoiFe4Ztvyr95glknQspq1Kj8vqIiaNiw9mMR\n7zF9Ojz6qJlgALRtC599putKlGiIl4iMNJtpL1VUZC6yJmU99VTZ4mb+/mZhM33ylCvh4wNz55pj\npbKzzXFSHTtaHZV4AiUa4hVeesm80RW/gfr5wR/+oPU7KnL77bB2rXlu2rSBu+6CTz8t2+wtUl2+\nvhAaanUU4kl0axGv0K+fOcDx5ZfN6ZqDBpmf3H/eyiGmESPMh4iIuynREK/Rsyc88wycP28WC9KC\naiIi1lPXiXgF1YYQEfFMatEQr6DaECLWy8yENWvMVsXevaF/f6sjEk+gREO8wi/VhrjlFmtiEqlP\nUlPN7svMTHNQdkGBWZL8ySetjkyspq4T8QqqDSFiralT4dw5M+HPzzdbFmfMMAvpSf2mREO8gmpD\niFhr3z6zFfFSPj5w6JA18YjnUKIhXkG1IcSdXC7zU7pULjq6tCposaIiaNnSknDEgyjREK8xYgQk\nJMDRo+aCalddZXVEUtcZBsycCYGB5qNLF3PhPinvlVfMlkQ/P7N+ja8vPPggxMRYHZlYTZ/3REQq\nERdnDmgsXv784EEYPBiSkiAkxNrYPE1MjDn4evFiyMqCvn3hvvusjko8gRINEZFKLF1ammSA+f+T\nJ2HnThg40Lq4PFW7dmZ1XpFLqetERKQShlF+n81W8X4RqZgSDZF66NgxePppmDwZVq7UG2dl7r67\n7IBiu90sAKfZTCJVp64TkXrm++/h+uvNgkoul9mn/tVXsHCh1ZF5nunTIT0dXn3VnEHRti3861+q\nzyJyOdSiIVLPTJ9ulmh3OMw3z6IiWLQI9u+3OjLP4+MDc+eaU1uzs83BoB07Wh2VSN2iREOknjly\nxEwuLmW3Q0qKNfHUBb6+EBpqdRQidZMSDfEqGRnmG+nPKxRKqU6dyhcyK+4WEBGpaUo0xCs4HOac\n/aZN4Te/MasR7thhdVSeaf58CAuDgADz4eMDs2bBtddaHZmIeCMNBhWvMHs2xMeXbqenw5AhZutG\nWJh1cXmi6GjYtcs8Z5mZ8Ic/wP33Wx2ViHgrJRriFdavL9tdYhjm4L2EBLjpJuvi8kSZmXDrrWYp\nbbsd3n/f/Pfee62OTES8kbpOxCvYbOX3uVzm2gtS1rRp5tLdTmfpFNcHHoDUVKsjExFvpERDvEJl\nCUXjxrUbR13wzTdQWFh+v6a3iog7KNEQr3DhQvl9mrJZscjI8i1ARUXmQFoRkZqmREO8QnS0OXvi\nUkVF5uwTKeull8xzVXy+/PzMAaGxsdbGJSLeSYmGeIUBA8yxBpdq1EhTNivSrx9s2wZ//CP07w/P\nPANr11Y8zkVE5Epp1ol4hXXryu87f95czrtv39qPx9P16gXvvWd1FCJSH6hFQ7xCXl75fT4+5hoV\nIiJiHSUa4hWGDzfHGhSz2SAkROMORESspkRDvMKsWTByZOl2WBhs2ADh4dbFJCIiSjTES/j7w+rV\ncOKEWQ/i5Eno08fqqERERINBxau0aGE+RETEM6hFQ0RERNxGiYaIiIi4jbpOxGu4XPDdd2b9jC5d\noEkTqyMSERG1aIhXyM2Fm2+Gbt1g0CBo3Ro++sjqqERERImGeIUnnzTLagMYhlnAa+RISEuzNi4R\nkfpOiYZ4hY0byy997nDAnj3WxCMiIiYlGuIVGjUqv6+oCBo2rP1YRESklBIN8QpPPVV2mXh/f+jZ\nE3r0sC4mERFRoiFe4vbbzaXOY2OhTRu46y749FPw1bwqERFL6TYsXmPECPMhIiKeQy0aIiIi4jZK\nNERERMRtlGiIiIiI2yjREBEREbdRoiEiIiJuo0RDRERE3EaJhngVlwvy862OQkREiinREK9gGDBz\nJgQGmo8uXeCHH6yOSkRElGiIV4iLgzlzShdWO3gQBg+GixetjUtEpL5ToiFeYelScDpLt51OOHkS\ndu60LiYREVGiIV7CMMrvs9kq3i8iIrVHiYZ4hbvvLruAmt0OkZFavVVExGqWL6r273//m02bNpGc\nnMwNN9zA448/XvLc8ePHWbBgAcnJyURGRvLQQw/RtWvXkue3b9/OsmXLyMzMpEOHDkyZMoWIiAgr\nDkMsNn06pKfDq69CURG0bQv/+hc0bGh1ZCIi9ZvlLRrh4eHccccd3HLLLWX2O51OXnjhBXr27Mmq\nVasYM2YMc+bM4fz58wCkpqYSFxfHpEmTWLlyJdHR0cydO9eKQxAP4OMDc+eaU1uzs83BoB07Wh2V\niIhYnmj07t2bXr16ERoaWmb/3r17KSgoYPTo0fj5+dGvXz+ioqLYvn07AJs3byY2NpZu3boREBDA\n2LFjSU5OJiUlxYrDEA/h6ws/u5RERMRClicalUlJSSE6Ohofn9IQ27Rpw/HjxwGzW6VNmzYlzwUF\nBdGsWbOS50VERMR6lo/RqExeXh7BwcFl9gUHB3P27FkA8vPzyz0fFBREXl5euZ91+vRpTp8+DUBS\nUpKbIhYREZGf89hEIzAwkJycnDL7cnJyCAwMBKBBgwbk5uZW+vylFi9ezKxZs9wXrIiIiFTIY7tO\nWrduzfHjx3G5XCX7jh49SlRUFABRUVEcPXq05Lnc3FzS0tJKnr/UxIkTSUhIICEhgRUrVrg/eBER\nEQE8INEoKiqisLAQl8uFy+WisLAQp9NJTEwM/v7+rFu3DofDwbZt20hJSaFPnz4ADBgwgMTERHbv\n3k1hYSHx8fFER0fTunXrcq/RvHlzYmNjiY2NpaOmIoiIiNQam2FYWzsxPj6e1atXl9k3aNAgpk2b\nRnJyMgsXLiQ5OZmIiAgmTpxYpo7Gtm3bWLZsGVlZWbRv356pU6f+ah2NxMREunfvTkJCArGxsW45\nJhERETFZnmjUNiUaIiIitcfyrhMRERHxXh4768Rdiqe/apqriIhI9XTo0IGgoKAqfW29SzSSk5MB\nGDdunLWBiIiI1FGXM/yg3o3RyMjI4JNPPiE6OrrCmhueKCkpiXHjxrFixQrNmvkVOldVp3NVdTpX\nVadzVXV1+VypReMXNGnShLvuusvqMKqlY8eOGsBaRTpXVadzVXU6V1Wnc1V13n6uNBhURERE3Mb+\n/PPPP291EPLrQkJCGDBgAA0bNrQ6FI+nc1V1OldVp3NVdTpXVVcfzlW9G6MhIiIitUddJyIiIuI2\nSjRERETEbZRoeJE33niD+Ph4q8MAYMKECSQkJFgaQ3x8PPPmzbM0hppUfE7/7//+j/nz51sdThnL\nli3zuJjquuHDh3PixAmrw6iS+fPns2zZMqvD8FjZ2dn8+c9/5k9/+hMLFizwunvTr6l301u92SOP\nPGJ1CFIL7rjjDqtDqBcmTJjApEmT6N69u9WhSB33ySef0KBBA1avXo3NZvOYD4S1RS0a4tGKioqs\nDkFE5IqkpaXRunVrbDab1aEAtX9fVYuGm507d4533nmH7777DqfTSZcuXfjzn//MvHnz2LdvHwUF\nBURHR/Pwww8THR0NmKVd3333Xc6ePUtAQAADBw5k/PjxABw6dIi3336b48ePExYWxrhx4+jduzdg\nNl+GhYVx7733snfvXubNm8cdd9zBP//5T1wuF3/84x8ZOXJkrR17SkoKK1as4NSpU3Tq1Inp06cT\nEhJCQkICy5Yt4+zZs7Rs2ZIJEybQoUOHkmPw9/fn/Pnz7N69m8mTJ3P11VezePFiUlNT8fPzo1u3\nbkyfPh2AU6dO8fe//51Dhw4RHBzMiBEjGDp0aEkMDoeDV155hR07dtCkSRMmTZpEly5dAMjNzeXd\nd99l586dFBUV0bt3b+6//378/f3Jzc3llVde4YcffsDpdNKhQwceeeQRmjZtCsDTTz9Np06dSEpK\n4vDhw0RFRfHYY48RERHh9vMaHx/PyZMnefzxx0lLS+PBBx/k0UcfZeXKleTk5DB48GAmTJgAwJkz\nZ1iwYAFHjhzBbrfTqlUr/vd//xeArKws3nrrLfbu3Yufnx833XQTY8aMwcfH/Pzx+eefs3btWs6d\nO0dUVBSPPPIIrVu3BuDYsWMsWLCAkydP0rlzZxo3buz24y42YcIEhg4dytatWzl58iTdunVjypQp\nvPXWW3z99ddERETw2GOPERUV9YvHeObMGRYuXMixY8cA+N3vfsekSZMICQlh3rx5pKenM2fOHHx8\nfBg2bBh33303w4cPZ9KkSXzwwQdkZGQwcOBA7rrrLuLi4ti/fz9RUVE88cQTNGnSBPjl6/PQoUO8\n9dZbJdf1DTfcwIQJE/Dz83P7OVy/fj0ffvghOTk5hIaGcvfdd9OvXz/WrVvHxx9/TG5uLp06deLh\nhx+mcePGrF+/nu+++47nnnuu5GesXbuW/fv3M3PmTAAuXrzIrFmz+P7772nZsiVTpkwhKioK+OVr\n7Zd+D8W/79tuu42tW7eWu5fU1rk5ceJEyd8cQGFhIaNHj+att94iMjKS+fPnExAQQFZWFnv27CEi\nIoJHH32Ua665hldffZUvvvgCm83Gf/7zH6ZNm1buNSu7J2ZkZDBp0iTi4+Px8/NjxYoVrF27lvj4\neAIDA3nvvfc4ceIE06ZNw+FwsGrVKrZu3UpeXh7dunXj4YcfJiQkpOQ+MXXqVFatWkVAQACLFi1y\ny/mrkCFu43Q6jWnTphlvvPGGkZOTYzgcDmPv3r2GYRjGf//7XyMnJ8coLCw0lixZYkyePLnk++65\n5x5j06ZNhmEYRm5urnHgwAHDMAzj3LlzxtixY42vvvrKcDqdxoEDB4w777zTSElJMQzDMF577TXj\n3XffNQzDML777jvj9ttvN9555x2jsLDQOHjwoPHHP/7ROHnyZK0c+wMPPGBMmTLFOHv2rJGbm2s8\n/vjjxooVK4yTJ08ao0aNMnbs2GE4nU7js88+M8aMGWNkZ2eXHMP//M//GHv27DFcLpeRn59vPPbY\nY8aaNWuMoqIio6CgwNi/f79hGIaRn59vjB8/3vjPf/5jOBwOIzU11Rg/fryRmJhoGIZhrFy50rj9\n9tuNTZs2GU6n09i4caMxZswY48KFC4ZhGMZf/vIX47XXXjNycnKMCxcuGDNnzjT+8Y9/GIZhGBcu\nXDC2bdtm5OfnG7m5ucbcuXONWbNmlRzfjBkzjPvvv99ITk42CgsLjTlz5hivvvqq28/prl27jJUr\nVxpz5841DMMwzpw5YwwbNsx49dVXjby8POP06dPGmDFjjN27dxuGYRhz5841Fi1aZDgcDsPhcBj7\n9u0zDMMwioqKjP/3//6f8Y9//MMoKCgwMjIyjClTphgff/yxYRiG8c033xgPPPCAcezYMcPpdBof\nffSRMWHCBKOwsNBwOBzGAw88YKxZs8ZwOBxGYmKiMWrUKOO1115z6/Ffeh6mTZtmZGRkGNnZ2cbE\niRONiRMnGrt27TKcTqexePFi49lnn/3VYzx9+rSRmJhoFBYWGtnZ2caMGTOMN998s9z5vtSwYcOM\n5557zrhw4YKRlpZm3HnnncbUqVONH374wXA4HMaLL75oLFiwwDCMX78+Dx8+bHz//feG0+k00tLS\njEceecRYt25dmddKTU2t8fOXmppqjBo1quRnnzt3zjh+/LixceNG44EHHjBSU1ON/Px8Y9GiRcYT\nTzxhGIZhZGZmGqNGjTIyMzNLfs7kyZONbdu2GYZh/t2OGjXK+Pbbbw2Hw2GsXr3aePDBBw2n01kj\nv4eK7iXuUNm5ufRvzjAMo6CgwBg2bJhx5syZkuMfM2aMsX//fsPpdBp///vfjSeffLLk6y+9NxuG\nUebn/do98cEHHyy55z3++OPGgw8+WHJdzpw509i4caNhGIaxZMkSY+bMmUZWVpaRn59vvPbaa8bL\nL79sGEbpfWLu3LlGTk6OkZ+f75bzVxl1nbjRoUOHSEtLY8KECQQFBeHr61vyafqmm24iKCgIPz8/\nxowZQ0pKCj/++CMAvr6+nD59mh9//JHAwEDat28PmJ8wu3btSq9evbDb7bRv355evXqxffv2Cl/f\nx8eHcePG4efnx7XXXkuLFi1KPjXUhuHDh9O0aVMCAwPp3bs3R44c4YsvviA2NpYePXpgt9sZNGgQ\nLVq04Ouvvy75vh49evDb3/4Wm81GQEAAvr6+nD17lszMTPz9/enUqRMAO3fuJDw8nKFDh+Lr60vL\nli255ZZb2Lp1a8nPatOmDQMHDsRutzN48GAiIiLYuXMn58+fZ8eOHTz00EMEBQUREhLCHXfcUfK9\nISEh9OnTh4CAAAIDAxk9ejT79+8vc3yDBw8mKioKPz8/brzxRo4cOVILZ7ViY8eOpUGDBjRr1ozO\nnTtz9OhRwLyWMjMzOXv2LL6+vnTu3BmAw4cPk5GRwbhx4/D396dx48aMGDGi5Pg/+ugjRo4cSXR0\nNHa7nSFDhmCz2Th48CAHDhygoKCA0aNH4+vrS7du3Wq9fPIf/vAHGjduTGhoKLGxsURGRtK9e3fs\ndjv9+vXjyJEjv3qMzZo1o1u3bvj5+REaGsrw4cPL/Y4rMmrUKEJCQoiIiKBTp05ce+21tGvXDl9f\nX/r27Vty7n/t+mzbti0dO3bEbrcTERHB73//e/bt2+e+k/YTu90OmC2OBQUFhIeH07p1azZv3szw\n4cNp2bIlAQEBjB8/nh9++IHTp08TFhbG7373O7Zs2QKY109mZiY9e/Ys+bndu3fnd7/7Hb6+vowe\nPZrc3FwOHjxYI7+Hiu4ltXluquL666+nU6dOJfe14uvg1/zaPTEmJoa9e/eSn5/P6dOnue2229i3\nbx9Op5OkpCS6dOmCYRh8/PHHTJgwgUaNGhEQEMBdd93F9u3by3ST3HnnnQQFBREQEHCZZ+bKqOvE\njTIyMmjatGm5ptCioiJWrFjB9u3byc7OLmmq/vHHHwkNDeXpp59mzZo1TJw4kebNm3PnnXfSo0cP\nzp49yzfffMOdd95Z5mcNGDCgwtcPCQkp89oBAQHk5+fX/IFWolGjRuVe+9y5c+W6FyIjI8nMzCzZ\nLu6eKDZlyhTi4+N59NFHCQ0NZcSIEdx8882kpaVx9OjRMufD5XKVJCIV/ayIiIiSN16Xy8UDDzxQ\n8pxhGLhcLgAKCgpYsmQJiYmJXLx4EYC8vDwcDkfJOQ0LCyt3fFb5eSx5eXkAjB8/nvj4eJ555hns\ndju///3vGT16NGfPniU7O5uxY8eWfJ/L5Spp8j979izvvvsuy5cvL3ne4XBw7tw5bDYb4eHhJdct\nmOc5JyfH3YdZ4ufXVkW/i187xqysLJYsWcL+/fvJy8vDMIwqLbT489f6eSzF5/7Xrs+TJ0/y9ttv\nc/jwYQoKCigqKqJNmzaXeyouW/PmzZk2bRoffvghcXFxdO7cmfvvv7/c32ZgYCANGzbk3LlzNG/e\nnJtuuolVq1YxYsQIPv/8c/r161fm/nLp35rdbqdx48Yl18uV/h4qupe4Q2Xnpiqqez/4tXtily5d\n+Oyzz7j22mtp3749Xbt2ZeHChRw6dIjQ0FAiIyM5f/48BQUFPPHEE2V+js1m4/z58yXbP78f1hYl\nGm7UpEkT0tPTcTqd+PqWnuqtW7fy1VdfMXv2bCIjI8nNzS1zM2rbti1PP/00RUVFbNu2jf/93/9l\n5cqVNG3alH79+lXYx1dXNG7cuNynkbS0NLp27Vqy/fMBU82bN2f69OkYhsG+fft47rnn6Ny5M02b\nNqVDhw785S9/qfT10tPTy2337t2bpk2bYrfb+cc//lFhn/j69etJTU1l3rx5hIeHc+zYMaZOnYpR\nxwrpNmrUqGQ20tGjR3n22Wdp164dTZo0oUmTJixZsqTC72vSpAkjR45k8ODB5Z7bt28fmZmZuFyu\nkmQjPT29yis51pZfO8bly5fjcrl4/fXXCQ0N5euvv+aNN96osdf/tevzzTffLBnbExQUxAcffFCm\nNc6d+vbtS9++fSkoKGDZsmUsXLiQxo0bc/bs2ZKvycvL48KFCyXjb6677joWLVrEkSNH2Lp1K888\n8ynwRroAAAjdSURBVEyZn3np31pRURHnzp2jcePG+Pj4WPp7uFwVnZvrr7+egoKCkq/Jysqqsdf7\ntXtiTEwMixYt4ttvvyUmJoaoqCjS09PZsWNHSQt5aGgo/v7+zJ8/n8jIyHKvkZaWBpS/t9YWdZ24\nUbt27WjatClvv/02ubm5OJ1O9u3bR15eHn5+fjRs2JDCwkJWrFhR8j0Oh4PPP/+cixcvYrfbCQ4O\nxmaz4ePjw4ABA0hISGDHjh0UFRXhcDg4ePAgqampFh7l5enbty+JiYkkJCRQVFTE559/zsmTJ+nV\nq1el37Np0ybOnz+PzWYjODgYMLuFilt5Pv30UxwOB0VFRSQnJ3Po0KGS7z127Bhbtmwpea0zZ85w\n3XXXERYWxnXXXcdbb73FxYsXMQyD9PT0ktofeXl5+Pv7ExwczMWLF1mzZo17T4ybbNu2reQNIDg4\nGB8fH3x8fGjXrh2hoaGsXr2a/Px8XC4Xp06dKmm6v/XWW3nvvfc4duwYhmGQl5fHjh07yM3NpUOH\nDvj7+7Nu3TqcTie7d+8mMTHRysOs0K8dY15eHg0aNCA4OJhz587xr3/9q8z3N2rUiDNnzlT79X/t\n+szLyyMoKIjAwEBOnjzJxx9/XP2DvQwnTpxg9+7dFBYW4uvrS4MGDfDx8aF///588MEHnDx5ksLC\nQpYtW0a7du1o3rw5YHbDDRgwgLi4OBo2bFjSpVssMTGRPXv24HQ6Wbt2LYGBgSXdSlfye6hNlZ2b\na665hv3793PmzBny8/NZvXp1jb3mr90TmzRpQnh4OJ9++ikxMTHYbDY6dOjARx99RExMDGDeD4cM\nGcLbb79d0hJy/vz5Ml3SVlKLhhvZ7XaeffZZlixZwkMPPYTL5SImJoZp06aRmJjI+PHjadiwYbll\n67ds2cJbb71FUVERERERPPHEE/j7+9OkSROee+453n33XeLi4gCIjo4u0/zv6Vq0aMGTTz7Ju+++\nS3p6OldffTXPPvssoaGhlX7P7t27Wbp0KQUFBYSFhfHwww/TrFkzAGbPns0777zD8uXLcTqdtGzZ\nknHjxpV8b8+ePdm1axdvvPEGTZo0YcaMGSWLF02bNo0VK1YwdepULl68SJMmTRgyZAjdu3dn+PDh\nvPLKK9x9992Eh4czYsQIvvzyS/eeHDc4fPgwb7/9NhcvXqRhw4YMGzas5Ob07LPP8u677/Lwww+T\nn59PZGQko0aNAqBXr14UFBQwf/580tLSCAgIoFOnTnTp0gVfX1/+/Oc/s3DhQv7v//6PLl26MHDg\nQBwOh5WHWk7x319lx3jnnXfy2muvceedd9K8eXMGDBjA+vXrS76/eFbBihUruO2228pcV1URGBj4\ni9fn+PHjWbRoEe+//z7XXHMNffr04dtvv625E1AJh8PBihUrSE1NLXkTfeSRR7j66qvJysriueee\nK5l18vOm+MGDB/P+++9zzz33lPu5AwYM4F//+hcvvfQSLVu25Omnny5pyb2S30NtquzctGzZkkGD\nBvHoo48SHBzMuHHj+Oyzz2rkNatyT4yJieGrr74qmZlYvF3cogH/v717CYmyi+M4/nUsTKQrlYxG\nWU5DRBcqaBYhs8kIQtCFRRewiBbaskA3E0VMLdsEQVB02xndwIJZRNsolKCIVBJpMQhBkxF2o9Nu\nwNcurzKPFnw/MKvzn2fO2f2e85/nOdDW1kZ3dzddXV28f/+e+fPn09DQ8NubuOnioWqSpP/lw4cP\ntLW1cfHixeJ/LKQ/sXUiSfqjEAL37t1j8+bNhgxNiq0TSdJvff36lf3797Nw4UIymcxMT0f/GFsn\nkiQpMrZOJElSZAwakiQpMgYNSZIUGYOGJEmKjEFDkiRFxqAhacrOnTvH8uXLKS8vp7m5OfLfa25u\n/uUhgpL+Tr5HQ9KUDAwMcOzYMTo7O2lqavIlTpJ+yqAhaUpevXpFCIEjR46watWqmZ6OpL+UrRNJ\nk3bw4EGampoAqK+vp6ysjCtXrlAoFOjo6CAej1NRUcGWLVvI5XITvt/T00MqlaKyspIlS5bQ3t7O\nx48fx9W8fPmSdDrNnDlzqK+v5+rVq9OyNkml5Y6GpEnLZDKsXbuWzs5Obt26RTweZ+XKlTQ2NjIy\nMkI2m6W2trZ48mlvb2/x1NibN2+yZ88eDh06xKlTp8jn83R1dfHu3bvi8dufPn1ix44dVFVVcf36\ndQBOnDjB6Ogoq1evnrF1S5qCIElTcPv27QCEoaGhEEIIly9fDrNmzQovXrwYV5dKpUJra2sIIYTv\n37+HFStWhL17946refDgQSgrKwvPnz8PIYRw4cKFEIvFQn9/f7FmYGAgxGKxkE6no1uUpJKzdSKp\nJHK5HOvXryeZTPLt27fip7GxkSdPngDQ39/P8PAwu3fvHleTTqeJxWI8ffoUgMePH7Nu3bpxuxeJ\nRIKNGzfOyNokTZ2tE0kl8fbtW/r6+pg9e/aEsfLy8mINQEtLy0+v8ebNGwDy+TxLly6dMF5dXc3Y\n2FippixpGhg0JJXEokWL2LBhA5cuXfptDcD58+dJpVITxmtqagCIx+P09vZOGB8ZGWHevHklmrGk\n6WDQkFQS27dv5/79+9TU1BQDw3+tWbOGZcuW8fr1a44ePfrLa23dupVr164xODhIIpEAYHBwkGfP\nntHQ0BDJ/CVFoyyEEGZ6EpL+PXfu3KGlpYWhoSHq6ur4/Pkz27ZtY3R0lOPHj5NMJikUCvT19fHl\nyxfOnj0LQHd3N/v27ePw4cPs2rWLqqoqhoeH6enp4cyZMySTScbGxkgkEsydO5fTp08D4586efTo\n0QyuXNJkuKMhqSQqKip4+PAhJ0+eJJvNks/nWbx4MZs2baKjo6NY19rayoIFC8hms9y4cQOAuro6\ndu7cSXV1NQCVlZXkcjna29s5cOAAtbW1ZDIZ7t69S6FQmJH1SZoadzQkSVJkfLxVkiRFxqAhSZIi\nY9CQJEmRMWhIkqTIGDQkSVJkDBqSJCkyBg1JkhQZg4YkSYqMQUOSJEXGoCFJkiJj0JAkSZH5AR2A\n8DGkoVJvAAAAAElFTkSuQmCC\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "import pandas\n", + "chicks = pandas.read_csv(\"chickwts.txt\")\n", + "import plotnine\n", + "from plotnine import *\n", + "p=(ggplot(data=chicks) +\n", + " aes(x=\"feed\",y=\"weight\") +\n", + " geom_point(color=\"blue\") +\n", + " theme_classic())\n", + "print p" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Is chick weight different when they're fed soybean feed vs sunflower feed?\n", + "### Hypotheses:\n", + "* Null hypothesis: There's no difference" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def null(p,obs):\n", + " B0=p[0]\n", + " sigma=p[1]\n", + " \n", + " expected=B0\n", + " nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.weight).sum()\n", + " return nll" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Alternate hypothesis:" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def alter(p,obs):\n", + " B0=p[0]\n", + " B1=p[1]\n", + " sigma=p[2]\n", + " \n", + " expected=B0+B1*obs.x # error?\n", + " nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.weight).sum()\n", + " return nll" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Then we have to subset the data to only include chicks fed soybeans or sunflowers:\n", + "#### Note: Must change feed names to integers so that numpy can understand\n", + "* soybean=0\n", + "* sunflower=1" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\NicCage\\Anaconda2\\lib\\site-packages\\ipykernel_launcher.py:5: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + " \"\"\"\n" + ] + } + ], + "source": [ + "chicks = pandas.read_csv(\"chickwts.txt\")\n", + "\n", + "#fixed subset\n", + "subset1 = chicks.loc[chicks['feed'].isin(['sunflower','soybean'])]\n", + "subset1['x'] = [0 if ele == \"soybean\" else 1 for ele in subset1[\"feed\"]]\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Let's test it using a likelihood ratio test!\n", + "#### First import your packages" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import numpy\n", + "import scipy\n", + "from scipy import optimize\n", + "from scipy import stats\n", + "from scipy.stats import norm\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### The fit" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Optimization terminated successfully.\n", + " Current function value: 145.240592\n", + " Iterations: 85\n", + " Function evaluations: 162\n", + "Optimization terminated successfully.\n", + " Current function value: 138.469162\n", + " Iterations: 200\n", + " Function evaluations: 363\n", + " final_simplex: (array([[ 284.49999051, 64.53691755],\n", + " [ 284.49994818, 64.53700563],\n", + " [ 284.50008783, 64.53694034]]), array([ 145.2405921, 145.2405921, 145.2405921]))\n", + " fun: 145.24059209701645\n", + " message: 'Optimization terminated successfully.'\n", + " nfev: 162\n", + " nit: 85\n", + " status: 0\n", + " success: True\n", + " x: array([ 284.49999051, 64.53691755])\n", + " final_simplex: (array([[ 246.42855057, 82.48813575, 49.73948886],\n", + " [ 246.42852198, 82.48806896, 49.73943687],\n", + " [ 246.42847238, 82.48817448, 49.73945349],\n", + " [ 246.42854646, 82.48810237, 49.73950669]]), array([ 138.46916182, 138.46916182, 138.46916182, 138.46916182]))\n", + " fun: 138.4691618247918\n", + " message: 'Optimization terminated successfully.'\n", + " nfev: 363\n", + " nit: 200\n", + " status: 0\n", + " success: True\n", + " x: array([ 246.42855057, 82.48813575, 49.73948886])\n" + ] + } + ], + "source": [ + "initialGuess=numpy.array([1,1])\n", + "alterGuess=numpy.array([1,1,1])\n", + "fitNull=scipy.optimize.minimize(null,initialGuess,method=\"Nelder-Mead\",options={'disp':True},args=subset1)\n", + "fitAlter=scipy.optimize.minimize(alter,alterGuess,method=\"Nelder-Mead\",options={'disp':True},args=subset1)\n", + "print(fitNull)\n", + "print(fitAlter)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "#### The results" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "result\n", + "0.000233176728695\n" + ] + } + ], + "source": [ + "D=2*(fitNull.fun-fitAlter.fun)\n", + "print \"result\"\n", + "print 1-scipy.stats.chi2.cdf(x=D,df=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# sunflower seed sig increases weight!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/exercise12B.html b/exercise12B.html new file mode 100755 index 0000000..ddfa666 --- /dev/null +++ b/exercise12B.html @@ -0,0 +1,12028 @@ + + + +Untitled3 + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+
+
+

Exercise 12 problem 2

Regular experesions are a powerful tool to select only the text string you want.

+

Follow along on this exercise due the day after thanksgiving :( for three useful scenarios.

+ +
+
+
+
+
+
+
+
+

Load the library

+
+
+
+
+
+
In [35]:
+
+
+
import re
+
+ +
+
+
+ +
+
+
+
+
+
+

2-1 selecting times with regex

times after noon but before midnight in military times (12:01 to 23:59)

+ +
+
+
+
+
+
In [36]:
+
+
+
# the regular expression
+a = r'(([1][3-9]|[2][0-3]):[0-5][0-9])|(12:[0-5][1-9])' 
+
+ +
+
+
+ +
+
+
+
+
+
+

Our regular expression captures times by breaking down the military time into selectable portions. +We first select the hour digits with an expression "[1][3-9]|[2][0-3]". This experession captures hours 13-19 or hours 20-23. We then select the ":" and then minutes 00-59 with "[0-5][0-9]". Finally, because we did not want to select noon we have to create an additional or statement that says select everying from 12:01 to 12:59 we do this with: "12:[0-5][1-9]".

+

Let's see this in action.

+ +
+
+
+
+
+
In [37]:
+
+
+
# create an example list, notice there are times we don't want to select
+times = ["1:00","2:40","4:50","10:50","11:59","12:00","12:01", "13:50", "15:10", "17:20", "20:20", "22:22",
+         "23:00", "23:59", "24:00"]
+
+# example
+select_times = re.compile(a)
+time_result = filter(select_times.match, times)
+print str(time_result) 
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
['12:01', '13:50', '15:10', '17:20', '20:20', '22:22', '23:00', '23:59']
+
+
+
+ +
+
+ +
+
+
+
+
+
+

2-2 matching abbreviated species names (G. species) with regex

+
+
+
+
+
+
In [ ]:
+
+
+
# the regular expression
+b = r'[A-Z]\.\s[a-z]+'
+
+ +
+
+
+ +
+
+
+
+
+
+

This regular expression captures only strings containing the abbreviated Genus (G.) with "[A-Z]." It then caputres strings containing a space followed by a lowercase species name of any length with this "\s[a-z]+".

+

Let's check out the example below!

+ +
+
+
+
+
+
In [39]:
+
+
+
# create an example list, notice there are things we don't want to select
+species = ["H. sapiens", "turkey", "t. rot", "thanksgiving", "holiday", "time off", "M. pecosella", "R. pomonella",
+           "Homo. sapiens", "T. Urkey", "T. urkey", "R. elaxing", "stuffing", "Pumpkin pie", "M. stellela"]
+
+# example
+select_species = re.compile(b)
+species_result = filter(select_species.match, species)
+print str(species_result)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
['H. sapiens', 'M. pecosella', 'R. pomonella', 'T. urkey', 'R. elaxing', 'M. stellela']
+
+
+
+ +
+
+ +
+
+
+
+
+
+

2-3 matching social security numbers (xxx-xx-xxxx) with regex

+
+
+
+
+
+
In [41]:
+
+
+
# the regular expression 
+c = r'[0-9]{3}-[0-9]{2}-[0-9]{4}'
+
+ +
+
+
+ +
+
+
+
+
+
+

Here in this last example, we see that regex is useful for matching the exact format of social security numbers. +"[0-9]{3}-[0-9]{2}-[0-9]{4}" matches 3 of any number, a dash, 2 of any number, a dash, and finally 4 of any number.

+

This code is useful if you ever want to steal social security numbers from a large dataframe! Follow alog in the example below!

+ +
+
+
+
+
+
In [42]:
+
+
+
# create an example list
+social = ["1234567", "1234345", "123-34353-346", "321-33-5543", "234-432-5453", "321-34-454", "232-31-5436", "Turkey", "232-2a-4323"]
+
+# example
+select_social = re.compile(c)
+social_result = filter(select_social.match, social)
+print str(social_result)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
['321-33-5543', '232-31-5436']
+
+
+
+ +
+
+ +
+
+
+ + + + + + diff --git a/exercise12B.ipynb b/exercise12B.ipynb new file mode 100755 index 0000000..b275989 --- /dev/null +++ b/exercise12B.ipynb @@ -0,0 +1,213 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Exercise 12 problem 2\n", + "\n", + "Regular experesions are a powerful tool to select only the text string you want.\n", + "\n", + "Follow along on this exercise ***due the day after thanksgiving :(*** for three useful scenarios. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load the library" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import re" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2-1 selecting times with regex\n", + "times after noon but before midnight in military times (12:01 to 23:59)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# the regular expression\n", + "a = r'(([1][3-9]|[2][0-3]):[0-5][0-9])|(12:[0-5][1-9])' " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Our regular expression captures times by breaking down the military time into selectable portions.\n", + "We first select the hour digits with an expression \"[1][3-9]|[2][0-3]\". This experession captures hours 13-19 or hours 20-23. We then select the \":\" and then minutes 00-59 with \"[0-5][0-9]\". Finally, because we did not want to select noon we have to create an additional or statement that says select everying from 12:01 to 12:59 we do this with: \"12:[0-5][1-9]\".\n", + "\n", + "Let's see this in action. " + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['12:01', '13:50', '15:10', '17:20', '20:20', '22:22', '23:00', '23:59']\n" + ] + } + ], + "source": [ + "# create an example list, notice there are times we don't want to select\n", + "times = [\"1:00\",\"2:40\",\"4:50\",\"10:50\",\"11:59\",\"12:00\",\"12:01\", \"13:50\", \"15:10\", \"17:20\", \"20:20\", \"22:22\",\n", + " \"23:00\", \"23:59\", \"24:00\"]\n", + "\n", + "# example\n", + "select_times = re.compile(a)\n", + "time_result = filter(select_times.match, times)\n", + "print str(time_result) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2-2 matching abbreviated species names (G. species) with regex" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# the regular expression\n", + "b = r'[A-Z]\\.\\s[a-z]+'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This regular expression captures only strings containing the abbreviated Genus (G.) with \"[A-Z]\\.\" It then caputres strings containing a space followed by a lowercase species name of any length with this \"\\s[a-z]+\". \n", + "\n", + "Let's check out the example below!" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['H. sapiens', 'M. pecosella', 'R. pomonella', 'T. urkey', 'R. elaxing', 'M. stellela']\n" + ] + } + ], + "source": [ + "# create an example list, notice there are things we don't want to select\n", + "species = [\"H. sapiens\", \"turkey\", \"t. rot\", \"thanksgiving\", \"holiday\", \"time off\", \"M. pecosella\", \"R. pomonella\",\n", + " \"Homo. sapiens\", \"T. Urkey\", \"T. urkey\", \"R. elaxing\", \"stuffing\", \"Pumpkin pie\", \"M. stellela\"]\n", + "\n", + "# example\n", + "select_species = re.compile(b)\n", + "species_result = filter(select_species.match, species)\n", + "print str(species_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2-3 matching social security numbers (xxx-xx-xxxx) with regex" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# the regular expression \n", + "c = r'[0-9]{3}-[0-9]{2}-[0-9]{4}'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here in this last example, we see that regex is useful for matching the exact format of social security numbers. \n", + "\"[0-9]{3}-[0-9]{2}-[0-9]{4}\" matches 3 of any number, a dash, 2 of any number, a dash, and finally 4 of any number.\n", + "\n", + "\n", + "This code is useful if you ever want to steal social security numbers from a large dataframe! Follow alog in the example below!" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['321-33-5543', '232-31-5436']\n" + ] + } + ], + "source": [ + "# create an example list\n", + "social = [\"1234567\", \"1234345\", \"123-34353-346\", \"321-33-5543\", \"234-432-5453\", \"321-34-454\", \"232-31-5436\", \"Turkey\", \"232-2a-4323\"]\n", + "\n", + "# example\n", + "select_social = re.compile(c)\n", + "social_result = filter(select_social.match, social)\n", + "print str(social_result)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/exercise12Bcode.py b/exercise12Bcode.py new file mode 100755 index 0000000..11de7ea --- /dev/null +++ b/exercise12Bcode.py @@ -0,0 +1,7 @@ +import re +a = r'([1][2-9]|[2][0-3]):[0-5][0-9]' +re.search(a, "21:30") +b = r'[A-Z]\.\s[a-z]+' +re.search(b,"H. sapiens") +c = r'[0-9]{3}-[0-9]{2}-[0-9]{4}' +re.search(c,"389-05-4771")