From c9866c75d17d30f54b9d21ae4d37104d0293e35e Mon Sep 17 00:00:00 2001 From: Bridgette Drummond Date: Fri, 17 Nov 2017 13:28:20 -0500 Subject: [PATCH 1/4] Problem 2 html --- Problem+2.html | 11922 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 11922 insertions(+) create mode 100644 Problem+2.html diff --git a/Problem+2.html b/Problem+2.html new file mode 100644 index 0000000..52087e6 --- /dev/null +++ b/Problem+2.html @@ -0,0 +1,11922 @@ + + + +Problem 2 + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
In [10]:
+
+
+
import re
+list1 = "13:01", "14:02", "15:03", "16:07", "09:00", "10:00"
+regex=r"(13)|(14)|(15)|(16)|(17)|(18)|(19)|(20)|(21)|(22)|(23)\:[0-9]{2}"
+var1=re.compile(regex)
+filter(var1.match,list1)
+
+ +
+
+
+ +
+
+ + +
+
Out[10]:
+ + + +
+
('13:01', '14:02', '15:03', '16:07')
+
+ +
+ +
+
+ +
+
+
+
+
+
+

1) First, import your re package.

+

2) Then create a list that you want use to check your regex. Put the time in quotes so that Python doesn't get confused by the colons.

+

3) Name your regex and type it in between quotes proceeded by an r.

+

4) Use re.compile to find exact matches to your regex (begins search at the beginning of a string). Remember to name this dataset.

+

5) Use the filter function to display the times that match the regex from the list.

+ +
+
+
+
+
+
In [11]:
+
+
+
list2 = 'H. sapien', 'D. rerio', 'Doggo', 'pupper', 'D. melangaster'
+regex = r"[A-Z]\. [a-z]+"
+var2 = re.compile (regex)
+filter(var2.match, list2)
+
+ +
+
+
+ +
+
+ + +
+
Out[11]:
+ + + +
+
('H. sapien', 'D. rerio', 'D. melangaster')
+
+ +
+ +
+
+ +
+
+
+
+
+
+

1) Create a list

+

2) Create a regex that does the following; A set of squeare brackets indicating a search for one letter between A and Z that is capitalized, followed by a period, a space, and then one or more lower case letters between a and z.

+

3) Use re.compile and the filter functions to search for your regex within your list.

+ +
+
+
+
+
+
In [12]:
+
+
+
list3 = '389-05-4771', '234-444-12', '123-45-6789', '1234'
+regex = r"[0-9]{3}-[0-9]{2}-[0-9]{4}"
+var3 = re.compile (regex)
+filter(var3.match, list3)
+
+ +
+
+
+ +
+
+ + +
+
Out[12]:
+ + + +
+
('389-05-4771', '123-45-6789')
+
+ +
+ +
+
+ +
+
+
+
+
+
+

Repeat steps 1-5 of the first example except this time your regex should be structured with 0-9 in square brackets followed by the number of instances of those numbers in curly brackets in a way that tells the regex to search for; 3 numbers, followed by a dash, two numbers, followed by a dash, and 4 numbers, followed by a dash.

+ +
+
+
+
+
+ + + + + + From 1eaa3d87bf79e116a655a2c9d2c575fab2092218 Mon Sep 17 00:00:00 2001 From: Bridgette Drummond Date: Fri, 17 Nov 2017 13:31:12 -0500 Subject: [PATCH 2/4] ipynb version of Problem 2 --- Problem_2.ipynb | 129 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 Problem_2.ipynb diff --git a/Problem_2.ipynb b/Problem_2.ipynb new file mode 100644 index 0000000..db09bd3 --- /dev/null +++ b/Problem_2.ipynb @@ -0,0 +1,129 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('13:01', '14:02', '15:03', '16:07')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import re\n", + "list1 = \"13:01\", \"14:02\", \"15:03\", \"16:07\", \"09:00\", \"10:00\"\n", + "regex=r\"(13)|(14)|(15)|(16)|(17)|(18)|(19)|(20)|(21)|(22)|(23)\\:[0-9]{2}\"\n", + "var1=re.compile(regex)\n", + "filter(var1.match,list1)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1) First, import your re package.\n", + "\n", + "2) Then create a list that you want use to check your regex. Put the time in quotes so that Python doesn't get confused by the colons.\n", + "\n", + "3) Name your regex and type it in between quotes proceeded by an r.\n", + "\n", + "4) Use re.compile to find exact matches to your regex (begins search at the beginning of a string). Remember to name this dataset.\n", + "\n", + "5) Use the filter function to display the times that match the regex from the list." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('H. sapien', 'D. rerio', 'D. melangaster')" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list2 = 'H. sapien', 'D. rerio', 'Doggo', 'pupper', 'D. melangaster'\n", + "regex = r\"[A-Z]\\. [a-z]+\"\n", + "var2 = re.compile (regex)\n", + "filter(var2.match, list2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1) Create a list\n", + "\n", + "2) Create a regex that does the following; A set of squeare brackets indicating a search for one letter between A and Z that is capitalized, followed by a period, a space, and then one or more lower case letters between a and z.\n", + "\n", + "3) Use re.compile and the filter functions to search for your regex within your list." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('389-05-4771', '123-45-6789')" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list3 = '389-05-4771', '234-444-12', '123-45-6789', '1234'\n", + "regex = r\"[0-9]{3}-[0-9]{2}-[0-9]{4}\"\n", + "var3 = re.compile (regex)\n", + "filter(var3.match, list3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Repeat steps 1-5 of the first example except this time your regex should be structured with 0-9 in square brackets followed by the number of instances of those numbers in curly brackets in a way that tells the regex to search for; 3 numbers, followed by a dash, two numbers, followed by a dash, and 4 numbers, followed by a dash." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 036de5b0d50857f88f01f7aa9cf0be09fdc41410 Mon Sep 17 00:00:00 2001 From: Brooke Date: Sun, 19 Nov 2017 19:17:28 -0500 Subject: [PATCH 3/4] Ex12Q1 done --- Exercise12Q1.html | 12320 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 12320 insertions(+) create mode 100755 Exercise12Q1.html diff --git a/Exercise12Q1.html b/Exercise12Q1.html new file mode 100755 index 0000000..27bd29a --- /dev/null +++ b/Exercise12Q1.html @@ -0,0 +1,12320 @@ + + + +Exercise12Q1 + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+
+
+

The first column consists of the weights of chicks and the second column consists of the feed type (either horsebean, linseed, soybean, sunflower, meatmeal, or casein). The two columns are separated by a comma, so the file is a "csv".

+
+
+
+
+
+
+
+
+

Import all the necessary packages

+ +
+
+
+
+
+
In [36]:
+
+
+
#Load packages
+import numpy
+import pandas
+from plotnine import *
+
+ +
+
+
+ +
+
+
+
+
+
+

Load the csv file by pandas.read, separator = ,

+ +
+
+
+
+
+
In [37]:
+
+
+
#Load file
+chicks=pandas.read_csv("chickwts.txt", header=0, sep=",")
+
+ +
+
+
+ +
+
+
+
+
+
+

Generating a plot that summarizes the mean weights of chicks when fed different feed types

+
+
+
+
+
+
In [38]:
+
+
+
#Make a bar graph that summarizes all data
+d=ggplot(chicks)+theme_classic()+xlab("feed")+ylab("weight")
+d+geom_bar(aes(x="factor(feed)",y="weight"),stat="summary",fun_y=numpy.mean)
+
+ +
+
+
+ +
+
+ + +
+
+ + + +
+ +
+ +
+ +
+
Out[38]:
+ + + +
+
<ggplot: (14264296)>
+
+ +
+ +
+
+ +
+
+
+
+
+
+

Hnull : chick weight when fed soybean = chick weight when fed sunflower

+

Halternative: chick weight when fed soybean ≠ chick weight when fed sunflower

+ +
+
+
+
+
+
+
+
+

Testing the null hypthothesis using likelihood ratio test

+
+
+
+
+
+
In [39]:
+
+
+
#Import packages
+import numpy
+import pandas
+from scipy.optimize import minimize
+from scipy.stats import norm
+from plotnine import *
+
+ +
+
+
+ +
+
+
+
In [40]:
+
+
+
#Subsetting data so it only includes sunflower & soybean data points
+SFvsSB=chicks.loc[chicks.feed.isin(['sunflower', 'soybean']),:]
+
+ +
+
+
+ +
+
+
+
In [41]:
+
+
+
#Creating a new dataframe to change x column to 0's and 1's
+#For example, replaces "sunflower" with "0" and "soybean" with "1"
+SFvsSBdf=pandas.DataFrame({'y':SFvsSB.weight, 'x':0})
+SFvsSBdf.loc[SFvsSB.feed=='sunflower', 'x']=1
+
+ +
+
+
+ +
+
+
+
In [42]:
+
+
+
#Null hypothesis likelihood ratio equation
+def nllike(p,obs):
+    B0=p[0]
+    sigma=p[1]
+    expected=B0
+    nll=-1*norm(expected,sigma).logpdf(obs.y).sum()
+    return nll
+
+ +
+
+
+ +
+
+
+
In [43]:
+
+
+
#Alternative hypothesis likelihood ratio equation
+def nllike2(p,obs):
+    B0=p[0]
+    B1=p[1]
+    sigma=p[2]
+    expected=B0+B1*obs.x
+    nll=-1*norm(expected,sigma).logpdf(obs.y).sum()
+    return nll
+
+ +
+
+
+ +
+
+
+
In [44]:
+
+
+
#Estimating parameters by minimizing the nll 
+initialVals1=numpy.array([1,1,1])
+
+ +
+
+
+ +
+
+
+
In [45]:
+
+
+
#Fit null and alternative hypotheseses to Nelder-Mead model
+fitNull=minimize(nllike,initialVals1, method="Nelder-Mead",options={'disp': True}, args=SFvsSBdf)
+fitAlt=minimize(nllike2,initialVals1, method="Nelder-Mead",options={'disp': True}, args=SFvsSBdf)
+print(fitNull.x)
+print(fitAlt.x)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
Optimization terminated successfully.
+         Current function value: 145.240592
+         Iterations: 138
+         Function evaluations: 261
+Optimization terminated successfully.
+         Current function value: 138.469162
+         Iterations: 200
+         Function evaluations: 363
+[ 284.49996562   64.53698087  -87.59469238]
+[ 246.42855057   82.48813575   49.73948886]
+
+
+
+ +
+
+ +
+
+
+
In [46]:
+
+
+
#Calculating p-value
+from scipy.stats import chi2
+D=(2*(fitNull.fun-fitAlt.fun))
+feedanswer=(1-chi2.cdf(x=D,df=1))
+print('weight p value')
+print(feedanswer)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
weight p value
+0.000233176728697
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Because p=0.00023, which is less than 0.01, the alternative hypothesis is true

+
+
+
+
+
+
+
+
+

There is a statistically significant difference between the weights of chicks fed sunflower versus soybean

+
+
+
+
+
+ + + + + + From c4b1f513f869e0ab700dfa6d207261e89231047c Mon Sep 17 00:00:00 2001 From: Brooke Date: Sun, 19 Nov 2017 19:23:13 -0500 Subject: [PATCH 4/4] python code for Q1 --- Ex12Q1.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100755 Ex12Q1.py diff --git a/Ex12Q1.py b/Ex12Q1.py new file mode 100755 index 0000000..5f44a43 --- /dev/null +++ b/Ex12Q1.py @@ -0,0 +1,70 @@ +#Exercise 12 Question 1 +############################################################################################################### +##Generating a plot + +#Load packages +import numpy +import pandas +from plotnine import * + +#Load file +chicks=pandas.read_csv("chickwts.txt", header=0, sep=",") + +#Generate bar graph that summarizes means of data +d=ggplot(chicks)+theme_classic()+xlab("feed")+ylab("weight") +d+geom_bar(aes(x="factor(feed)",y="weight"),stat="summary",fun_y=numpy.mean) + +############################################################################################################## +##Running the likelihood ratio test + +#Import packages +import numpy +import pandas +from scipy.optimize import minimize +from scipy.stats import norm +from plotnine import * + +#Subsetting data so it only includes sunflower & soybean data points +SFvsSB=chicks.loc[chicks.feed.isin(['sunflower', 'soybean']),:] + +#Creating a new dataframe to change x column to 0's and 1's +SFvsSBdf=pandas.DataFrame({'y':SFvsSB.weight, 'x':0}) +SFvsSBdf.loc[SFvsSB.feed=='sunflower', 'x']=1 + +#Null hypothesis likelihood ratio equation +def nllike(p,obs): + B0=p[0] + sigma=p[1] + expected=B0 + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll + +#Alternative hypothesis likelihood ratio equation +def nllike2(p,obs): + B0=p[0] + B1=p[1] + sigma=p[2] + expected=B0+B1*obs.x + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll + +#Estimating parameters by minimizing the nll +initialVals1=numpy.array([1,1,1]) + +#Fit null and alternative hypotheseses to Nelder-Mead model +fitNull=minimize(nllike,initialVals1, method="Nelder-Mead",options={'disp': True}, args=SFvsSBdf) +fitAlt=minimize(nllike2,initialVals1, method="Nelder-Mead",options={'disp': True}, args=SFvsSBdf) +print(fitNull.x) +print(fitAlt.x) + +#Calculating p-value +from scipy.stats import chi2 +D=(2*(fitNull.fun-fitAlt.fun)) +feedanswer=(1-chi2.cdf(x=D,df=1)) +print('weight p value') +print(feedanswer) + + + + +