From 0327e7943f33a9370de28e06345e1b5bf53318a2 Mon Sep 17 00:00:00 2001
From: omegadan01 <dbruzzes@nd.edi>
Date: Fri, 6 Oct 2017 11:00:46 -0400
Subject: [PATCH 1/9] made file

---
 .idea/vcs.xml |  6 ++++++
 exercise 7.py | 18 ++++++++++++++++++
 2 files changed, 24 insertions(+)
 create mode 100644 .idea/vcs.xml
 create mode 100644 exercise 7.py
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/exercise 7.py b/exercise 7.py
new file mode 100644
index 0000000..295cf84
--- /dev/null
+++ b/exercise 7.py	
@@ -0,0 +1,18 @@
+#exercise 7#
+#Dan Bruzzese and Zoe Loh
+
+
+
+
+
+# question 1
+
+
+#question2
+
+
+
+
+
+
+#question 3
\ No newline at end of file

From a77b8108893293d15f562f87fb4aff5d5bbd42e1 Mon Sep 17 00:00:00 2001
From: omegadan01 <dbruzzes@nd.edi>
Date: Fri, 6 Oct 2017 11:15:47 -0400
Subject: [PATCH 2/9] started q3

---
 exercise 7.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/exercise 7.py b/exercise 7.py
index 295cf84..7d22240 100644
--- a/exercise 7.py	
+++ b/exercise 7.py	
@@ -8,6 +8,8 @@
 # question 1
 
 
+
+
 #question2
 
 
@@ -15,4 +17,22 @@
 
 
 
-#question 3
\ No newline at end of file
+#question 3
+#making the plot
+from plotnine import *
+
+import pandas
+dat = pandas.read_csv("data.txt")
+
+print dat.head(n=5)
+
+#need graph for mean
+
+p=(ggplot(data=dat)
+   + aes( "region", "observations")
+   + geom_bar(stat = "identity")
+   + theme_classic()
+)
+
+print p
+

From 80473e5766f2613f9103979a2384dc2d0d74a263 Mon Sep 17 00:00:00 2001
From: omegadan01 <dbruzzes@nd.edi>
Date: Tue, 10 Oct 2017 02:25:22 -0400
Subject: [PATCH 3/9] work on q3

---
 exercise 7.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/exercise 7.py b/exercise 7.py
index 7d22240..2af229f 100644
--- a/exercise 7.py	
+++ b/exercise 7.py	
@@ -24,15 +24,14 @@
 import pandas
 dat = pandas.read_csv("data.txt")
 
-print dat.head(n=5)
+#barplot  for mean observations in a region
+dat_grp= dat['observations'].groupby(dat['region']) #group observations by region
+dat_mean= dat_grp.mean() # mean of the dat grp into a list
 
-#need graph for mean
+df = pandas.DataFrame({'col':dat_mean})  #turn list into a dataframe
+print (df)
+print df[0:4]
+#only has one row....
 
-p=(ggplot(data=dat)
-   + aes( "region", "observations")
-   + geom_bar(stat = "identity")
-   + theme_classic()
-)
 
-print p
 

From d7ee4b5e44bf6e94b176c88662858e5003f9570c Mon Sep 17 00:00:00 2001
From: omegadan01 <dbruzzes@nd.edi>
Date: Tue, 10 Oct 2017 12:17:41 -0400
Subject: [PATCH 4/9] q3 barplot

---
 exercise 7.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/exercise 7.py b/exercise 7.py
index 2af229f..27ce2b5 100644
--- a/exercise 7.py	
+++ b/exercise 7.py	
@@ -17,21 +17,22 @@
 
 
 
-#question 3
-#making the plot
+#########question 3################
 from plotnine import *
-
-import pandas
-dat = pandas.read_csv("data.txt")
+import pandas as pd
+dat = pd.read_csv("data.txt")
 
 #barplot  for mean observations in a region
-dat_grp= dat['observations'].groupby(dat['region']) #group observations by region
-dat_mean= dat_grp.mean() # mean of the dat grp into a list
+grouped= dat.groupby(["region"]).mean().reset_index() #mean observations by region
+print grouped
+grouped.columns = ['region', 'mean_observations']
+p= (ggplot(data=grouped)
+    + aes(x='region', y= 'mean_observations',fill= 'region')
+    + geom_bar(stat = "identity")
+    + theme_classic()
+    )
+print p
 
-df = pandas.DataFrame({'col':dat_mean})  #turn list into a dataframe
-print (df)
-print df[0:4]
-#only has one row....
 
 
 

From 399fc3496492433a787ce38573154c801ed867d1 Mon Sep 17 00:00:00 2001
From: omegadan01 <dbruzzes@nd.edi>
Date: Tue, 10 Oct 2017 12:43:11 -0400
Subject: [PATCH 5/9] q3 scatterplot

---
 exercise 7.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/exercise 7.py b/exercise 7.py
index 27ce2b5..7b9d24b 100644
--- a/exercise 7.py	
+++ b/exercise 7.py	
@@ -33,6 +33,11 @@
     )
 print p
 
-
-
+#scatterplot
+d= (ggplot(data=dat)
+    + aes(y='observations', x='region', fill= 'region')
+    + geom_point(alpha= .01)
+    + theme_classic()
+    )
+print d
 

From d69f5f3b871b4a0b6c8a946a7f25c976cc9628bc Mon Sep 17 00:00:00 2001
From: omegadan01 <dbruzzes@nd.edi>
Date: Tue, 10 Oct 2017 12:47:33 -0400
Subject: [PATCH 6/9] q3 finished! and works! let me know if you want me to
 help with q1

---
 exercise 7.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/exercise 7.py b/exercise 7.py
index 7b9d24b..60fadd5 100644
--- a/exercise 7.py	
+++ b/exercise 7.py	
@@ -40,4 +40,5 @@
     + theme_classic()
     )
 print d
-
+ # why= the bar chart shows us the mean of observations from each region
+#while the scatter plot shows us the value of all  observations from each region

From afe0ca396955e0999f6ef08edba8df82a43e322c Mon Sep 17 00:00:00 2001
From: Zoe Loh <zloh@nd.edu>
Date: Wed, 11 Oct 2017 21:49:00 -0400
Subject: [PATCH 7/9] problem 1 and 2

---
 exercise 7.py | 34 +++++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/exercise 7.py b/exercise 7.py
index 7d22240..7940b3f 100644
--- a/exercise 7.py	
+++ b/exercise 7.py	
@@ -6,15 +6,39 @@
 
 
 # question 1
-
-
-
+import pandas
+File=open("Lecture11.fasta","r")
+plotData = pandas.DataFrame(columns = ["Sequence Length" , "GC content"])
+
+for line in File:
+    line = line.strip()
+    if ">" in line:
+        continue
+    else:
+        #First the length of the sequence and the percent gc count is calculated
+        Length = (len(line)-1)
+        #Because it is integer division we must force python do divide as if it was real numbers by using float()
+        GCcount = (float((line.count("G"))+line.count("C"))/len(line))
+        #The values are inserted into a dataframe for plotting
+        row = pandas.DataFrame({"Sequence Length": Length, "GC content": GCcount}, index=[0])
+        plotData = plotData.append(row)
+#GC histogram plot
+a=ggplot(plotData,aes(x="GC content"))
+a+geom_histogram()+theme_classic()        
+
+#sequence length histogram plot
+b=ggplot(plotData,aes(x="Sequence Length"))
+b+geom_histogram()+theme_classic()
 
 #question2
 
+import pandas
+from plotnine import *
+data=pandas.read_csv("heartrate.txt",sep=",",header=0)
 
-
-
+#Here I make the scatter plot showing how running speed and heart rate are related
+plot=ggplot(data,aes(x="Heart rate",y="Running speed"))
+plot+geom_point()+coord_cartesian()+stat_smooth(method="lm")
 
 
 #question 3

From 248cb606f2460ab951331db1f0e7e6f8d94bcf04 Mon Sep 17 00:00:00 2001
From: Zoe Loh <zloh@nd.edu>
Date: Wed, 11 Oct 2017 22:41:23 -0400
Subject: [PATCH 8/9] file for graph

---
 heartrate.txt | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 heartrate.txt

diff --git a/heartrate.txt b/heartrate.txt
new file mode 100644
index 0000000..63dcc94
--- /dev/null
+++ b/heartrate.txt
@@ -0,0 +1,22 @@
+"Heart rate","Running speed"
+80,0
+85,2
+87,3
+90,3
+94,3
+97,4
+102,4
+60,-5
+110,5
+117,6
+120,6
+124,7
+130,7
+138,8
+143,8
+150,9
+157,10
+160,11
+165,12
+170,12.5
+185,14

From 6fcd03057b90d8b233c5ba42df31855af4276851 Mon Sep 17 00:00:00 2001
From: omegadan01 <dbruzzes@nd.edi>
Date: Thu, 12 Oct 2017 00:16:57 -0400
Subject: [PATCH 9/9] added print command to ggplots (didnt work for me
 otherwise) tweaked the alpha in my code Code looks great and runs!!

---
 exercise 7.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/exercise 7.py b/exercise 7.py
index 088d49d..37d91f2 100644
--- a/exercise 7.py	
+++ b/exercise 7.py	
@@ -7,6 +7,7 @@
 
 # question 1
 import pandas
+from plotnine import *
 File=open("Lecture11.fasta","r")
 plotData = pandas.DataFrame(columns = ["Sequence Length" , "GC content"])
 
@@ -24,11 +25,13 @@
         plotData = plotData.append(row)
 #GC histogram plot
 a=ggplot(plotData,aes(x="GC content"))
-a+geom_histogram()+theme_classic()        
+aa= a+geom_histogram()+theme_classic()
+print aa
 
 #sequence length histogram plot
 b=ggplot(plotData,aes(x="Sequence Length"))
-b+geom_histogram()+theme_classic()
+bb=b+geom_histogram()+theme_classic()
+print bb
 
 #question2
 
@@ -38,8 +41,8 @@
 
 #Here I make the scatter plot showing how running speed and heart rate are related
 plot=ggplot(data,aes(x="Heart rate",y="Running speed"))
-plot+geom_point()+coord_cartesian()+stat_smooth(method="lm")
-
+p=plot+geom_point()+coord_cartesian()+stat_smooth(method="lm")
+print p
 
 #########question 3################
 from plotnine import *
@@ -60,7 +63,7 @@
 #scatterplot
 d= (ggplot(data=dat)
     + aes(y='observations', x='region', fill= 'region')
-    + geom_point(alpha= .01)
+    + geom_point(alpha= .1)
     + theme_classic()
     )
 print d