diff --git a/EX_12_Q1.html b/EX_12_Q1.html new file mode 100644 index 0000000..db66152 --- /dev/null +++ b/EX_12_Q1.html @@ -0,0 +1,12568 @@ + + +
+import numpy
+import os
+import pandas
+import plotnine
+from plotnine import *
+import scipy
+from scipy.optimize import minimize
+from scipy.stats import norm
+from scipy.stats import chi2
+from plotnine import *
+os.listdir('.')
+os.chdir('/Users/sampathkumarbalaji/EX_12/Intro_Biocom_ND_319_Tutorial12')
+chick_weights = pandas.read_csv("chickwts.txt")
+print (chick_weights)
+ggplot(chick_weights,aes(x="weight",y="feed"))+geom_point(aes(x="weight",y="feed"))
+Sun_Soy_feed_DS = chick_weights.query('feed == "sunflower" or feed == "soybean"')
+Sun_Soy_feed_DS['x'] = [0 if feedtype == 'sunflower' else 1 for feedtype in Sun_Soy_feed_DS.feed]
+def null_hyp(p,obs):
+ B0=p[0]
+ sigma=p[1]
+
+ expected=B0
+ nll= -1*norm(expected,sigma).logpdf(obs.weight).sum()
+ return nll
+
+def alt_hyp(p,obs):
+ B0=p[0]
+ B1=p[1]
+ sigma=p[2]
+
+ expected=B0+B1*obs.x
+ nll=-1*norm(expected,sigma).logpdf(obs.weight).sum()
+ return nll
+initialGuess_null_hyp=numpy.array([1,1])
+initialGuess_alt_hyp=numpy.array([1,1,1])
+fitnull=minimize(null_hyp,initialGuess_null_hyp,method="Nelder-Mead",options={'disp': True},args=Sun_Soy_feed_DS)
+fitalt=minimize(alt_hyp,initialGuess_alt_hyp,method="Nelder-Mead",options={'disp': True},args=Sun_Soy_feed_DS)
+1-scipy.stats.chi2.cdf(x=-2*(fitalt.fun-fitnull.fun),df=1)
+Since the p-value is small (< 0.05), it means that the null hypothesis of there is no difference in weight gained based off the feedtype can be rejected.
+ +Exercise 12, Part 2:
+Exercise 12, Part 2 consists of three portions, with each part requiring a regular expression that matches information of a certain type. This file will provide answers to the three prompts, along with explanations for why the regular expression exclusively matches desired strings and excludes others.
+ +# Regular expression 1: Times after noon, but before midnight when reported in military format:
+
+import re
+
+^([1][2-9]|2[0-3]):?([0-5]\d)$
+
+# Match: 19:15
+
+# Non-match 1: 11:15
+
+# Non-match 2: 24:00
+
+# Non-match 3: 35:65
+Explanation for solution to Part One:
+The ^ character indicates the start of the string, while the ( character indicates where to begin capturing the group. The [1] indicates that the first character must be a 1, followed by any number between 2 and 9. This portion of the regular expression captures times from noon to 7 PM, otherwise known as 12-19:00 hours. The | operates as an or, which allows the first character to alternatively be a 2. [0-3] allows for the second character to be a 0,1,2,3 - this captures the remaining times to be captured, namely 8 PM to 11:59 PM.
+The :? allows for an optional colon; some times are reported with the colon, whereas others may not use a colon when reporting time. (e.g. 19:00 vs. 1900).
+The [0-5] portion ensures that times fall between 0 and 59 minutes, along with the \d, which represents any digit. This portion could have alternatively been written as [0-9].
+The ) character indicates the end of the captured group, and the $ indicates the end of the string.
+ +# Regular expression 2: Genus species names expressed in the format G. species (e.g. H. sapien)
+
+import re
+
+^([A-Z]\.\s[a-z]{1,21})$
+
+# Match: H. sapiens
+
+# Non-match 1: Homo sapiens
+
+# Non-match 2: m. musculus
+Explanation for solution to Part Two:
+Again, the ^ character indicates the start of the string, while the ( character indicates where to begin capturing the group. The [A-Z] portion of the regular expression will accept input of any capital letter while excluding any input that is in the lowercase (i.e. [a-z]). Since a period must follow the capitalized letter of the genus name, a . is added after the genus name. A space is introduced as indicated as desired by the prompt by adding \s.
+The longest species name is 21 characters long, so the regular expression should accept the full set of lowercase letters of length 1 to 21. This is accomplished through [a-z]{1,21}. The ) character indicates the end of the captured group, and the $ indicates the end of the string.
+ +# Regular expression 3: Social Security numbers (e.g. 389-05-4771)
+
+import re
+
+^([\d]{3}[-][\d]{2}[-][\d]{4})$
+
+# Match: 022-57-9213
+
+# Non-match 1: 0225-7-92-13
+
+# Non-match 2: A0B-12-3456
+Explanation for solution to Part Three:
+Based upon the example, Social Security numbers obey the format AAA-BB-CCCC. This can be captured rather easily using regular expressions.
+Like the regular expression for part one and two, the ^ will indicates the start of the string; the ( character indicates where to begin capturing the group. The [\d]{3} will capture any sequence of three digits. The [-] requires a dash separating the first three numbers from the next two numbers, which are denoted as [\d]{2}. The last portion consists of any four digit pattern, which can be written as [\d]{4}. The ) character indicates the end of the captured group, and the $ indicates the end of the string.
+ +