lyy005 · kkilgoreND · Oct 6, 2017 · Oct 6, 2017 · Oct 6, 2017 · Oct 6, 2017
diff --git a/Exercise07-3.py b/Exercise07-3.py
@@ -0,0 +1,24 @@
+#Part 3
+import pandas
+import numpy
+from plotnine import *
+
+#load data
+data=pandas.read_csv("data.txt")
+
+#Barplot for the population means
+means=ggplot(data)+theme_classic()+xlab("Populations")+ylab("Mean Number of Observations")
++means+geom_bar(aes(x="factor(region)",y="observations",fill="region"),stat="summary",fun_y=numpy.mean)+ggtitle("Population Means")
+
+#Barplot means calculated for check(not necessary)
+data.groupby(['region'])['observations'].mean()
+#means are slightly different
+
+#Scatterplot for the observations
+scatter=ggplot(data,aes('observations','region'))
+scatter+geom_jitter(aes(color='factor(region)'))+theme_classic()+ggtitle('All Observations')
+#could have used scatter+geom_jitter()+coord_cartesian() instead
+#Scatterplot shows that although the average observations seem to be similar across the regions, the observation distributions are different.
+
+#Why?
+#Barplot shows the means for the regional observations whilst the scatterplot shows the values for the all of the observations from each region.
diff --git a/Exercise07-Full.py b/Exercise07-Full.py
@@ -0,0 +1,59 @@
+#Part 1
+import numpy
+import pandas
+from plotnine import *
+
+#load data
+Ex7=open("Lecture11.fasta", "r")
+
+sequenceID=[]
+sequenceLength=[]
+percentGC=[]
+
+#Determine G:C content
+for line in Ex7:
+    line=line.strip()
+    if '>' in line:
+        sequenceID.append(line[1:])
+    else:
+        seqLen=float(len(line))
+        G=line.count("G")
+        C=line.count("C")
+    sequenceLength.append(seqLen)
+    percentGC.append((G+C)/seqLen*100)
+
+#Generate histogram of G:C content    
+b=ggplot(line,aes(x="seqID"))
+b+geom_histogram()+theme_classic
+
+#Part 2
+import pandas
+from plotnine import *
+football=pandas.read_csv("NFL-graph-for-class.txt", sep='\t',header=0)
+football.shape
+(ggplot(football) + aes('Pats-Win-Tot','Lions-Win-Tot') + geom_point() + geom_smooth(method='lm'))
+
+#Part 3
+import pandas
+import numpy
+from plotnine import *
+
+#load data
+data=pandas.read_csv("data.txt")
+
+#Barplot for the population means
+means=ggplot(data)+theme_classic()+xlab("Populations")+ylab("Mean Number of Observations")
++means+geom_bar(aes(x="factor(region)",y="observations",fill="region"),stat="summary",fun_y=numpy.mean)+ggtitle("Population Means")
+
+#Barplot means calculated for check(not necessary)
+data.groupby(['region'])['observations'].mean()
+#means are slightly different
+
+#Scatterplot for the observations
+scatter=ggplot(data,aes('observations','region'))
+scatter+geom_jitter(aes(color='factor(region)'))+theme_classic()+ggtitle('All Observations')
+#could have used scatter+geom_jitter()+coord_cartesian() instead
+#Scatterplot shows that although the average observations seem to be similar across the regions, the observation distributions are different.
+
+#Why?
+#Barplot shows the means for the regional observations whilst the scatterplot shows the values for the all of the observations from each region.
diff --git a/Exercise7-1.py b/Exercise7-1.py
@@ -0,0 +1,26 @@
+import numpy
+import pandas
+from plotnine import *
+
+#load data
+Ex7=open("Lecture11.fasta", "r")
+
+sequenceID=[]
+sequenceLength=[]
+percentGC=[]
+
+#Determine G:C content
+for line in Ex7:
+    line=line.strip()
+    if '>' in line:
+        sequenceID.append(line[1:])
+    else:
+        seqLen=float(len(line))
+        G=line.count("G")
+        C=line.count("C")
+    sequenceLength.append(seqLen)
+    percentGC.append((G+C)/seqLen*100)
+
+#Generate histogram of G:C content    
+b=ggplot(line,aes(x="seqID"))
+b+geom_histogram()+theme_classic
diff --git a/Exercise7-2.py b/Exercise7-2.py
@@ -0,0 +1,5 @@
+import pandas
+from plotnine import *
+football=pandas.read_csv("NFL-graph-for-class.txt", sep='\t',header=0)
+football.shape
+(ggplot(football) + aes('Pats-Win-Tot','Lions-Win-Tot') + geom_point() + geom_smooth(method='lm'))
diff --git a/NFL-graph-for-class.csv b/NFL-graph-for-class.csv
@@ -0,0 +1,58 @@
+Year,Pats-Wins,Pats-Win-Tot,Lions-Wins,Lions-Win-Tot
+1960,5,5,7,7
+1961,9,14,8,15
+1962,9,23,11,26
+1963,7,30,5,31
+1964,10,40,7,38
+1965,4,44,6,44
+1966,8,52,4,48
+1967,3,55,5,53
+1968,4,59,4,57
+1969,4,63,9,66
+1970,2,65,10,76
+1971,6,71,7,83
+1972,3,74,8,91
+1973,5,79,6,97
+1974,7,86,7,104
+1975,3,89,7,111
+1976,11,100,6,117
+1977,9,109,6,123
+1978,11,120,7,130
+1979,9,129,2,132
+1980,10,139,9,141
+1981,2,141,8,149
+1982,5,146,4,153
+1983,8,154,9,162
+1984,9,163,4,166
+1985,11,174,7,173
+1986,11,185,5,178
+1987,8,193,4,182
+1988,9,202,4,186
+1989,5,207,7,193
+1990,1,208,6,199
+1991,6,214,12,211
+1992,2,216,5,216
+1993,5,221,10,226
+1994,10,231,9,235
+1995,6,237,10,245
+1996,11,248,5,250
+1997,10,258,9,259
+1998,9,267,5,264
+1999,8,275,8,272
+2000,5,280,9,281
+2001,11,291,2,283
+2002,9,300,3,286
+2003,14,314,5,291
+2004,14,328,6,297
+2005,10,338,5,302
+2006,12,350,3,305
+2007,16,366,7,312
+2008,11,377,0,312
+2009,10,387,2,314
+2010,14,401,6,320
+2011,13,414,10,330
+2012,12,426,4,334
+2013,12,438,7,341
+2014,12,450,11,352
+2015,12,462,7,359
+2016,14,476,9,368
diff --git a/NFL-graph-for-class.txt b/NFL-graph-for-class.txt
@@ -0,0 +1,58 @@
+Year	Pats-Wins	Pats-Win-Tot	Lions-Wins	Lions-Win-Tot
+1960	5	5	7	7
+1961	9	14	8	15
+1962	9	23	11	26
+1963	7	30	5	31
+1964	10	40	7	38
+1965	4	44	6	44
+1966	8	52	4	48
+1967	3	55	5	53
+1968	4	59	4	57
+1969	4	63	9	66
+1970	2	65	10	76
+1971	6	71	7	83
+1972	3	74	8	91
+1973	5	79	6	97
+1974	7	86	7	104
+1975	3	89	7	111
+1976	11	100	6	117
+1977	9	109	6	123
+1978	11	120	7	130
+1979	9	129	2	132
+1980	10	139	9	141
+1981	2	141	8	149
+1982	5	146	4	153
+1983	8	154	9	162
+1984	9	163	4	166
+1985	11	174	7	173
+1986	11	185	5	178
+1987	8	193	4	182
+1988	9	202	4	186
+1989	5	207	7	193
+1990	1	208	6	199
+1991	6	214	12	211
+1992	2	216	5	216
+1993	5	221	10	226
+1994	10	231	9	235
+1995	6	237	10	245
+1996	11	248	5	250
+1997	10	258	9	259
+1998	9	267	5	264
+1999	8	275	8	272
+2000	5	280	9	281
+2001	11	291	2	283
+2002	9	300	3	286
+2003	14	314	5	291
+2004	14	328	6	297
+2005	10	338	5	302
+2006	12	350	3	305
+2007	16	366	7	312
+2008	11	377	0	312
+2009	10	387	2	314
+2010	14	401	6	320
+2011	13	414	10	330
+2012	12	426	4	334
+2013	12	438	7	341
+2014	12	450	11	352
+2015	12	462	7	359
+2016	14	476	9	368