From bc6dfc7ce692c5e345f67bd706d82d3551a0b51c Mon Sep 17 00:00:00 2001
From: Mati Nemera <mnemera@nd.edu>
Date: Fri, 6 Oct 2017 20:21:06 -0400
Subject: [PATCH 1/4] first commit

---
 exercise7.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100755 exercise7.py

diff --git a/exercise7.py b/exercise7.py
new file mode 100755
index 0000000..99ac1ce
--- /dev/null
+++ b/exercise7.py
@@ -0,0 +1,12 @@
+import pandas
+InFile=open("Lecture11.fasta","r")
+sequenceLength=[]
+percentGC = []
+for line in InFile:
+    line = line.strip()
+    if ">" in line:
+        next
+    else: 
+        sequenceLength.append(len(line)-1)
+        percentGC.append(1.0*(line.count("G")+line.count("C"))/len(line))
+print(percentGC)
\ No newline at end of file

From 5b3cb066f11cfd72111e005b37c9e4557d0ada6d Mon Sep 17 00:00:00 2001
From: Soren Holm <sholm@prudence.campus.nd.edu>
Date: Wed, 11 Oct 2017 20:49:27 -0400
Subject: [PATCH 2/4] Part 2 and 3 ready for review

---
 exercise72.py |  8 ++++++++
 exercise73.py | 17 +++++++++++++++++
 icecream.txt  | 18 ++++++++++++++++++
 3 files changed, 43 insertions(+)
 create mode 100644 exercise72.py
 create mode 100644 exercise73.py
 create mode 100644 icecream.txt

diff --git a/exercise72.py b/exercise72.py
new file mode 100644
index 0000000..1d97d6f
--- /dev/null
+++ b/exercise72.py
@@ -0,0 +1,8 @@
+import numpy
+import pandas
+from plotnine import *
+ice=pandas.read_csv("icecream.txt",sep=",",header=0)
+
+
+scatter=ggplot(ice,aes(x="Temperature C",y="How much I want ice cream"))
+scatter+geom_point()+coord_cartesian() + stat_smooth(method="lm")
\ No newline at end of file
diff --git a/exercise73.py b/exercise73.py
new file mode 100644
index 0000000..b52f706
--- /dev/null
+++ b/exercise73.py
@@ -0,0 +1,17 @@
+import numpy
+import pandas
+from plotnine import *
+data=pandas.read_csv("data.txt",sep=",",header=0)
+
+
+#produces bar plot for means of populations
+barplot=ggplot(data)+theme_classic()+xlab("region")+ylab("observations")
+barplot+geom_bar(aes(x="factor(region)",y="observations"),stat="summary",fun_y=numpy.mean)
+
+
+#produces scatter plot with jittering of observations
+scatterplot=ggplot(data,aes(x="region",y="observations"))
+scatterplot+geom_point()+coord_cartesian()+geom_jitter()
+
+#The bar plot clearly shows that the mean is about the same for each population. The scatterplot also shows that but less clearly.
+#The new information the scatter plot reaveals is the spread abd grouping of data which was hidden in the bar plot
\ No newline at end of file
diff --git a/icecream.txt b/icecream.txt
new file mode 100644
index 0000000..1b92598
--- /dev/null
+++ b/icecream.txt
@@ -0,0 +1,18 @@
+"Temperature C","How much I want ice cream"
+2,5
+1,5
+3,4
+5,5
+6,7
+10,7
+9,7
+8,6
+11,7
+15,8
+13,8
+20,9
+25,10
+29,10
+31,11
+22,8
+27,9

From f15103785e2c1233250d73de504612218e9db728 Mon Sep 17 00:00:00 2001
From: Soren Holm <sholm@prudence.campus.nd.edu>
Date: Wed, 11 Oct 2017 21:16:14 -0400
Subject: [PATCH 3/4] Problem 1 complete

---
 exercise7.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/exercise7.py b/exercise7.py
index 99ac1ce..a2108ae 100755
--- a/exercise7.py
+++ b/exercise7.py
@@ -9,4 +9,13 @@
     else: 
         sequenceLength.append(len(line)-1)
         percentGC.append(1.0*(line.count("G")+line.count("C"))/len(line))
-print(percentGC)
\ No newline at end of file
+#print(percentGC)
+
+#So we need the data in a datafram to be used by ggplot aparantly. So here I put it in a dataframe
+data=pandas.DataFrame({"Sequence Length": sequenceLength, "Percent GC": percentGC})
+
+length=ggplot(data,aes(x="Sequence Length"))
+length+geom_histogram()+theme_classic()
+
+gc=ggplot(data,aes(x="Percent GC"))
+gc+geom_histogram()+theme_classic()
\ No newline at end of file

From c3b3028561b02bb174148d9d189900a5b7ced6de Mon Sep 17 00:00:00 2001
From: Mati Nemera <mnemera@nd.edu>
Date: Thu, 12 Oct 2017 00:25:52 -0400
Subject: [PATCH 4/4] exercise7 question 1

---
 exercise7.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/exercise7.py b/exercise7.py
index a2108ae..20a5d6e 100755
--- a/exercise7.py
+++ b/exercise7.py
@@ -3,19 +3,19 @@
 sequenceLength=[]
 percentGC = []
 for line in InFile:
-    line = line.strip()
+    line = line.strip() #remove extra space
     if ">" in line:
         next
     else: 
         sequenceLength.append(len(line)-1)
         percentGC.append(1.0*(line.count("G")+line.count("C"))/len(line))
-#print(percentGC)
+print(percentGC)
 
-#So we need the data in a datafram to be used by ggplot aparantly. So here I put it in a dataframe
+#Puts data in dataframe
 data=pandas.DataFrame({"Sequence Length": sequenceLength, "Percent GC": percentGC})
-
+from plotnine import *
 length=ggplot(data,aes(x="Sequence Length"))
 length+geom_histogram()+theme_classic()
 
 gc=ggplot(data,aes(x="Percent GC"))
-gc+geom_histogram()+theme_classic()
\ No newline at end of file
+gc+geom_histogram()+theme_classic()