diff --git a/Ex12Q1.py b/Ex12Q1.py new file mode 100755 index 0000000..5f44a43 --- /dev/null +++ b/Ex12Q1.py @@ -0,0 +1,70 @@ +#Exercise 12 Question 1 +############################################################################################################### +##Generating a plot + +#Load packages +import numpy +import pandas +from plotnine import * + +#Load file +chicks=pandas.read_csv("chickwts.txt", header=0, sep=",") + +#Generate bar graph that summarizes means of data +d=ggplot(chicks)+theme_classic()+xlab("feed")+ylab("weight") +d+geom_bar(aes(x="factor(feed)",y="weight"),stat="summary",fun_y=numpy.mean) + +############################################################################################################## +##Running the likelihood ratio test + +#Import packages +import numpy +import pandas +from scipy.optimize import minimize +from scipy.stats import norm +from plotnine import * + +#Subsetting data so it only includes sunflower & soybean data points +SFvsSB=chicks.loc[chicks.feed.isin(['sunflower', 'soybean']),:] + +#Creating a new dataframe to change x column to 0's and 1's +SFvsSBdf=pandas.DataFrame({'y':SFvsSB.weight, 'x':0}) +SFvsSBdf.loc[SFvsSB.feed=='sunflower', 'x']=1 + +#Null hypothesis likelihood ratio equation +def nllike(p,obs): + B0=p[0] + sigma=p[1] + expected=B0 + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll + +#Alternative hypothesis likelihood ratio equation +def nllike2(p,obs): + B0=p[0] + B1=p[1] + sigma=p[2] + expected=B0+B1*obs.x + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll + +#Estimating parameters by minimizing the nll +initialVals1=numpy.array([1,1,1]) + +#Fit null and alternative hypotheseses to Nelder-Mead model +fitNull=minimize(nllike,initialVals1, method="Nelder-Mead",options={'disp': True}, args=SFvsSBdf) +fitAlt=minimize(nllike2,initialVals1, method="Nelder-Mead",options={'disp': True}, args=SFvsSBdf) +print(fitNull.x) +print(fitAlt.x) + +#Calculating p-value +from scipy.stats import chi2 +D=(2*(fitNull.fun-fitAlt.fun)) +feedanswer=(1-chi2.cdf(x=D,df=1)) +print('weight p value') +print(feedanswer) + + + + + diff --git a/Exercise12Q1.html b/Exercise12Q1.html new file mode 100755 index 0000000..27bd29a --- /dev/null +++ b/Exercise12Q1.html @@ -0,0 +1,12320 @@ + + +
+Import all the necessary packages
+ +#Load packages
+import numpy
+import pandas
+from plotnine import *
+Load the csv file by pandas.read, separator = ,
+ +#Load file
+chicks=pandas.read_csv("chickwts.txt", header=0, sep=",")
+#Make a bar graph that summarizes all data
+d=ggplot(chicks)+theme_classic()+xlab("feed")+ylab("weight")
+d+geom_bar(aes(x="factor(feed)",y="weight"),stat="summary",fun_y=numpy.mean)
+#Import packages
+import numpy
+import pandas
+from scipy.optimize import minimize
+from scipy.stats import norm
+from plotnine import *
+#Subsetting data so it only includes sunflower & soybean data points
+SFvsSB=chicks.loc[chicks.feed.isin(['sunflower', 'soybean']),:]
+#Creating a new dataframe to change x column to 0's and 1's
+#For example, replaces "sunflower" with "0" and "soybean" with "1"
+SFvsSBdf=pandas.DataFrame({'y':SFvsSB.weight, 'x':0})
+SFvsSBdf.loc[SFvsSB.feed=='sunflower', 'x']=1
+#Null hypothesis likelihood ratio equation
+def nllike(p,obs):
+ B0=p[0]
+ sigma=p[1]
+ expected=B0
+ nll=-1*norm(expected,sigma).logpdf(obs.y).sum()
+ return nll
+#Alternative hypothesis likelihood ratio equation
+def nllike2(p,obs):
+ B0=p[0]
+ B1=p[1]
+ sigma=p[2]
+ expected=B0+B1*obs.x
+ nll=-1*norm(expected,sigma).logpdf(obs.y).sum()
+ return nll
+#Estimating parameters by minimizing the nll
+initialVals1=numpy.array([1,1,1])
+#Fit null and alternative hypotheseses to Nelder-Mead model
+fitNull=minimize(nllike,initialVals1, method="Nelder-Mead",options={'disp': True}, args=SFvsSBdf)
+fitAlt=minimize(nllike2,initialVals1, method="Nelder-Mead",options={'disp': True}, args=SFvsSBdf)
+print(fitNull.x)
+print(fitAlt.x)
+#Calculating p-value
+from scipy.stats import chi2
+D=(2*(fitNull.fun-fitAlt.fun))
+feedanswer=(1-chi2.cdf(x=D,df=1))
+print('weight p value')
+print(feedanswer)
+import re
+list1 = "13:01", "14:02", "15:03", "16:07", "09:00", "10:00"
+regex=r"(13)|(14)|(15)|(16)|(17)|(18)|(19)|(20)|(21)|(22)|(23)\:[0-9]{2}"
+var1=re.compile(regex)
+filter(var1.match,list1)
+1) First, import your re package.
+2) Then create a list that you want use to check your regex. Put the time in quotes so that Python doesn't get confused by the colons.
+3) Name your regex and type it in between quotes proceeded by an r.
+4) Use re.compile to find exact matches to your regex (begins search at the beginning of a string). Remember to name this dataset.
+5) Use the filter function to display the times that match the regex from the list.
+ +list2 = 'H. sapien', 'D. rerio', 'Doggo', 'pupper', 'D. melangaster'
+regex = r"[A-Z]\. [a-z]+"
+var2 = re.compile (regex)
+filter(var2.match, list2)
+1) Create a list
+2) Create a regex that does the following; A set of squeare brackets indicating a search for one letter between A and Z that is capitalized, followed by a period, a space, and then one or more lower case letters between a and z.
+3) Use re.compile and the filter functions to search for your regex within your list.
+ +list3 = '389-05-4771', '234-444-12', '123-45-6789', '1234'
+regex = r"[0-9]{3}-[0-9]{2}-[0-9]{4}"
+var3 = re.compile (regex)
+filter(var3.match, list3)
+Repeat steps 1-5 of the first example except this time your regex should be structured with 0-9 in square brackets followed by the number of instances of those numbers in curly brackets in a way that tells the regex to search for; 3 numbers, followed by a dash, two numbers, followed by a dash, and 4 numbers, followed by a dash.
+ +