diff --git a/12.1.html b/12.1.html new file mode 100644 index 0000000..8c3bc41 --- /dev/null +++ b/12.1.html @@ -0,0 +1,12555 @@ + + +
+import pandas as pd
+import numpy as np
+from scipy.stats import chi2
+from scipy.optimize import minimize
+from scipy.stats import norm
+from plotnine import *
+
+chickens=pd.read_table("chickwts.txt", delimiter=",")
+means=dict()
+
+for i in np.unique(chickens.feed):
+ means[i]=np.mean(chickens[chickens.feed == i][["weight"]])[0]
+
+means_df=pd.DataFrame(means, index =[0])
+means_df
+ggplot(chickens, aes(x="feed",y="weight")) + geom_boxplot() + theme_classic()
+sun_soy_chickens=chickens[(chickens.feed == "sunflower") | (chickens.feed == "soybean")]
+
+sun_soy_chickens["Factor"]=0
+for i in range(0, len(sun_soy_chickens.feed)):
+ if sun_soy_chickens.feed.iloc[i] == "sunflower":
+ sun_soy_chickens.iloc[i,2]=1
+ else:
+ sun_soy_chickens.iloc[i,2]=0
+
+sun_soy_chickens
+def nllalt(p,obs):
+ B0=p[0]
+ sigma=p[1]
+ B1=p[2]
+
+ expected=B0+B1*obs.Factor
+ nll=-1*norm(expected,sigma).logpdf(obs.weight).sum()
+ return nll
+
+
+def nllnull(p,obs):
+ B0=p[0]
+ sigma=p[1]
+
+ expected=B0
+ nll=-1*norm(expected,sigma).logpdf(obs.weight).sum()
+ return nll
+guess=[1,1,1]
+
+fit_alt=minimize(nllalt, guess, method="Nelder-Mead", options={'disp': True},args=sun_soy_chickens)
+fit_null=minimize(nllnull, guess, method="Nelder-Mead", options={'disp': True},args=sun_soy_chickens)
+
+D=2*(fit_null.fun-fit_alt.fun)
+1-chi2.cdf(x=D, df=1)
+| \n", + " | casein | \n", + "horsebean | \n", + "linseed | \n", + "meatmeal | \n", + "soybean | \n", + "sunflower | \n", + "
|---|---|---|---|---|---|---|
| 0 | \n", + "323.583333 | \n", + "160.2 | \n", + "218.75 | \n", + "276.909091 | \n", + "246.428571 | \n", + "328.916667 | \n", + "
| \n", + " | weight | \n", + "feed | \n", + "Factor | \n", + "
|---|---|---|---|
| 22 | \n", + "243 | \n", + "soybean | \n", + "0 | \n", + "
| 23 | \n", + "230 | \n", + "soybean | \n", + "0 | \n", + "
| 24 | \n", + "248 | \n", + "soybean | \n", + "0 | \n", + "
| 25 | \n", + "327 | \n", + "soybean | \n", + "0 | \n", + "
| 26 | \n", + "329 | \n", + "soybean | \n", + "0 | \n", + "
| 27 | \n", + "250 | \n", + "soybean | \n", + "0 | \n", + "
| 28 | \n", + "193 | \n", + "soybean | \n", + "0 | \n", + "
| 29 | \n", + "271 | \n", + "soybean | \n", + "0 | \n", + "
| 30 | \n", + "316 | \n", + "soybean | \n", + "0 | \n", + "
| 31 | \n", + "267 | \n", + "soybean | \n", + "0 | \n", + "
| 32 | \n", + "199 | \n", + "soybean | \n", + "0 | \n", + "
| 33 | \n", + "171 | \n", + "soybean | \n", + "0 | \n", + "
| 34 | \n", + "158 | \n", + "soybean | \n", + "0 | \n", + "
| 35 | \n", + "248 | \n", + "soybean | \n", + "0 | \n", + "
| 36 | \n", + "423 | \n", + "sunflower | \n", + "1 | \n", + "
| 37 | \n", + "340 | \n", + "sunflower | \n", + "1 | \n", + "
| 38 | \n", + "392 | \n", + "sunflower | \n", + "1 | \n", + "
| 39 | \n", + "339 | \n", + "sunflower | \n", + "1 | \n", + "
| 40 | \n", + "341 | \n", + "sunflower | \n", + "1 | \n", + "
| 41 | \n", + "226 | \n", + "sunflower | \n", + "1 | \n", + "
| 42 | \n", + "320 | \n", + "sunflower | \n", + "1 | \n", + "
| 43 | \n", + "295 | \n", + "sunflower | \n", + "1 | \n", + "
| 44 | \n", + "334 | \n", + "sunflower | \n", + "1 | \n", + "
| 45 | \n", + "322 | \n", + "sunflower | \n", + "1 | \n", + "
| 46 | \n", + "297 | \n", + "sunflower | \n", + "1 | \n", + "
| 47 | \n", + "318 | \n", + "sunflower | \n", + "1 | \n", + "
import re
+time= "12:01", "09:25","23:30", "11:59"
+reg1=re.compile("(([1][3-9]|[2][0-3]):[0-5][0-9])|(12:[0-5][1-9])")
+print(filter(reg1.match,time))
+In this example, we used a regex to filter out any times that do not match our target. (([1][2-9]|[2][0-3]):[0-5][0-9]) matched anything from 13:00 to 23:59 and our third section of (12:[0-5][1-9]) matched anything from 12:01 to 12:59.
+ +names= "M. tuberculosis", "e. Coli", "E. coli", "Staph aureus"
+reg2=re.compile("[A-Z]\\.\\s[a-z]+")
+print(filter(reg2.match,names))
+In this example we used the expression [A-Z]\.\s[a-z]+ to indicate we wanted terms with only 1 capital letter, followed by period, a space, and then one or more undercase letters(indicated by the plus sign at the end of the [a-z]
+ +numbers= "123-12-1234", "Bob", "234-234-234", "999-88-7777", "Using this to find SS numbers to commit fraud is illegal"
+reg3=re.compile("[0-9]{3}-[0-9]{2}-[0-9]{4}")
+print(filter(reg3.match,numbers))
+In this last example, we used the expression [0-9]{3}-[0-9]{2}-[0-9]{4} to filter out anything that didn't follow the xxx-xx-xxxx format, including the dashes. Only numbers were allowed to fill the "x" spaces in the example.
+With these 3 examples, you should have a firmer grasp on the benefits of using Regular Expressions to find terms in a series of strings.
+ +