From 6f77eda14b3d35f8399b01774b76cd607aef4042 Mon Sep 17 00:00:00 2001
From: Marlee Shaffer <mshaffe2@nd.edu>
Date: Fri, 19 Nov 2021 10:19:58 -0500
Subject: [PATCH 1/5] Sets up structure of code. Creates bar graph

---
 Exercise10R.R | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 Exercise10R.R

diff --git a/Exercise10R.R b/Exercise10R.R
new file mode 100644
index 0000000..ff3da27
--- /dev/null
+++ b/Exercise10R.R
@@ -0,0 +1,31 @@
+#Marlee Shaffer
+#Exercise 10 - Analysis and Plotting 
+
+#Set working directory 
+setwd("~/Desktop/Biocomp_tutorial12")
+
+#Load packages
+library(ggplot2)
+library(cowplot)
+
+##Part 1
+#Find data on two variables that you would expect to be related to each other 
+#Write a script that loads the text file and produces a scatter plot with a trendline
+
+
+##Part 2
+#Write a script that generates two figures from data.txt
+#Read the data.txt file 
+data<-read.table("data.txt", header = TRUE, sep=",", stringsAsFactors = FALSE)
+
+#Produce a bar plot of means of 4 populations
+ggplot(data, aes(x = region, y = observations, color = region))+
+  stat_summary(geom = "bar", fun = "mean")+ xlab("Region")+
+  ylab("Mean Observations")+ ggtitle("Mean Observations for each Region")+
+  theme_bw() + theme(legend.position = "none")
+
+#Produce a scatterplot of all the observtions 
+
+
+#Do the bar and scatter plots tell different stories? Why?
+

From f82d554eda9a83ae99800dd54c71c48c0c5f4e57 Mon Sep 17 00:00:00 2001
From: Marlee Shaffer <mshaffe2@nd.edu>
Date: Fri, 19 Nov 2021 10:35:54 -0500
Subject: [PATCH 2/5] Adds scatter plot and description

---
 Exercise10R.R | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/Exercise10R.R b/Exercise10R.R
index ff3da27..d30470c 100644
--- a/Exercise10R.R
+++ b/Exercise10R.R
@@ -19,13 +19,28 @@ library(cowplot)
 data<-read.table("data.txt", header = TRUE, sep=",", stringsAsFactors = FALSE)
 
 #Produce a bar plot of means of 4 populations
-ggplot(data, aes(x = region, y = observations, color = region))+
-  stat_summary(geom = "bar", fun = "mean")+ xlab("Region")+
+plot1<-ggplot(data, aes(x = region, y = observations, fill = region))+
+  stat_summary(fun.y = mean, geom = "bar")+ 
+  stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.5 )+ xlab("Region")+
   ylab("Mean Observations")+ ggtitle("Mean Observations for each Region")+
   theme_bw() + theme(legend.position = "none")
+plot1
 
-#Produce a scatterplot of all the observtions 
+#Produce a scatter plot of all the observations 
+plot2<-ggplot(data, aes(x = region, y = observations, color = region))+
+  geom_jitter() + xlab("Region") + ylab("Observations")+
+  ggtitle("Observations for each Region")+ theme_bw()+
+  theme(legend.position = "none")
+plot2
 
+#Show plots side by side for comparison
+plot_grid(plot1, plot2, labels = c("a", "b"),
+                  rel_widths = c(4, 4),
+                  ncol = 2, nrow = 1)
 
 #Do the bar and scatter plots tell different stories? Why?
+#The bar graph and the scatter plot of the same data show different stories. 
+#The bar graph is showing the mean of the overall data for each region, which were similar 
+#to each other. The scatter plot shows each observation, there by showing the variation between
+#the observations. Error bars were added to the bar graph to better represent the data. 
 

From 52e7a22031127b18953ab5724d24b0432ea08aea Mon Sep 17 00:00:00 2001
From: Marlee Shaffer <mshaffe2@nd.edu>
Date: Sun, 21 Nov 2021 12:56:52 -0500
Subject: [PATCH 3/5] Adds solution to part 1

---
 Exercise10R.R |  4 ++++
 Polio.txt     | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 Polio.txt

diff --git a/Exercise10R.R b/Exercise10R.R
index d30470c..93a07de 100644
--- a/Exercise10R.R
+++ b/Exercise10R.R
@@ -12,6 +12,10 @@ library(cowplot)
 #Find data on two variables that you would expect to be related to each other 
 #Write a script that loads the text file and produces a scatter plot with a trendline
 
+#Load the text file 
+
+#Create the scatter plot with a trend line 
+ggplot( aes()) + geom_point() + stat_smooth(method = "lm")
 
 ##Part 2
 #Write a script that generates two figures from data.txt
diff --git a/Polio.txt b/Polio.txt
new file mode 100644
index 0000000..6b18889
--- /dev/null
+++ b/Polio.txt
@@ -0,0 +1,37 @@
+Cases	Vaccine
+45.982689	20
+30.680101	23
+30.122467	31
+17.625319	30
+11.16941	38
+17.514452	48
+11.931867	52
+8.7925091	54
+6.6822147	63
+6.4030457	66
+4.042563	71
+5.5830231	67
+4.8590789	65
+6.1091552	63
+2.467695	61
+1.8725411	69
+1.2334731	66
+2.8445969	68
+1.230391	71
+1.9823751	73
+1.0716786	73
+0.29694039	74
+0.22351779	74
+0.2246893	75
+0.36383402	77
+1.3877367	77
+0.19927098	75
+0.10566861	75
+0.31011206	75
+0.30686665	75
+0.29852691	76
+0.49573576	79
+0.20091401	78
+0.62201583	79
+0.57006198	80
+0.11664868	80
\ No newline at end of file

From f86fbfeaca3786f4d96148260cb6dac13518526c Mon Sep 17 00:00:00 2001
From: Marlee Shaffer <mshaffe2@nd.edu>
Date: Sun, 21 Nov 2021 13:01:05 -0500
Subject: [PATCH 4/5] Final Code

---
 Exercise10R.R | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/Exercise10R.R b/Exercise10R.R
index 93a07de..4a30f05 100644
--- a/Exercise10R.R
+++ b/Exercise10R.R
@@ -10,14 +10,23 @@ library(cowplot)
 
 ##Part 1
 #Find data on two variables that you would expect to be related to each other 
-#Write a script that loads the text file and produces a scatter plot with a trendline
+#Write a script that loads the text file and produces a scatter plot with a trend line
+
+#Data was found online from: 
+#Data shows Polio cases per million people and the vaccination rate of one-year olds
 
 #Load the text file 
+data1<-read.table(file = "Polio.txt", header = TRUE, sep="\t", stringsAsFactors = FALSE)
 
 #Create the scatter plot with a trend line 
-ggplot( aes()) + geom_point() + stat_smooth(method = "lm")
-
-##Part 2
+ggplot(data = data1, aes(x = Vaccine, y = Cases)) + 
+  geom_point() + stat_smooth(method = "lm") + 
+  xlab("Percent of Vaccinated One Year Olds") + 
+  ylab("Cases per 1 Million People") + 
+  ggtitle("Polio Cases vs Vaccination Rate of One Year Olds") + 
+  theme_bw()
+
+l##Part 2
 #Write a script that generates two figures from data.txt
 #Read the data.txt file 
 data<-read.table("data.txt", header = TRUE, sep=",", stringsAsFactors = FALSE)

From 4853141e15afe974fc1b116fbfefc38d1dd86db3 Mon Sep 17 00:00:00 2001
From: Marlee Shaffer <mshaffe2@nd.edu>
Date: Sun, 21 Nov 2021 13:02:03 -0500
Subject: [PATCH 5/5] Final Code

---
 Exercise10R.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Exercise10R.R b/Exercise10R.R
index 4a30f05..2af204a 100644
--- a/Exercise10R.R
+++ b/Exercise10R.R
@@ -26,7 +26,7 @@ ggplot(data = data1, aes(x = Vaccine, y = Cases)) +
   ggtitle("Polio Cases vs Vaccination Rate of One Year Olds") + 
   theme_bw()
 
-l##Part 2
+##Part 2
 #Write a script that generates two figures from data.txt
 #Read the data.txt file 
 data<-read.table("data.txt", header = TRUE, sep=",", stringsAsFactors = FALSE)