From 6f77eda14b3d35f8399b01774b76cd607aef4042 Mon Sep 17 00:00:00 2001 From: Marlee Shaffer Date: Fri, 19 Nov 2021 10:19:58 -0500 Subject: [PATCH 1/5] Sets up structure of code. Creates bar graph --- Exercise10R.R | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 Exercise10R.R diff --git a/Exercise10R.R b/Exercise10R.R new file mode 100644 index 0000000..ff3da27 --- /dev/null +++ b/Exercise10R.R @@ -0,0 +1,31 @@ +#Marlee Shaffer +#Exercise 10 - Analysis and Plotting + +#Set working directory +setwd("~/Desktop/Biocomp_tutorial12") + +#Load packages +library(ggplot2) +library(cowplot) + +##Part 1 +#Find data on two variables that you would expect to be related to each other +#Write a script that loads the text file and produces a scatter plot with a trendline + + +##Part 2 +#Write a script that generates two figures from data.txt +#Read the data.txt file +data<-read.table("data.txt", header = TRUE, sep=",", stringsAsFactors = FALSE) + +#Produce a bar plot of means of 4 populations +ggplot(data, aes(x = region, y = observations, color = region))+ + stat_summary(geom = "bar", fun = "mean")+ xlab("Region")+ + ylab("Mean Observations")+ ggtitle("Mean Observations for each Region")+ + theme_bw() + theme(legend.position = "none") + +#Produce a scatterplot of all the observtions + + +#Do the bar and scatter plots tell different stories? Why? + From f82d554eda9a83ae99800dd54c71c48c0c5f4e57 Mon Sep 17 00:00:00 2001 From: Marlee Shaffer Date: Fri, 19 Nov 2021 10:35:54 -0500 Subject: [PATCH 2/5] Adds scatter plot and description --- Exercise10R.R | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/Exercise10R.R b/Exercise10R.R index ff3da27..d30470c 100644 --- a/Exercise10R.R +++ b/Exercise10R.R @@ -19,13 +19,28 @@ library(cowplot) data<-read.table("data.txt", header = TRUE, sep=",", stringsAsFactors = FALSE) #Produce a bar plot of means of 4 populations -ggplot(data, aes(x = region, y = observations, color = region))+ - stat_summary(geom = "bar", fun = "mean")+ xlab("Region")+ +plot1<-ggplot(data, aes(x = region, y = observations, fill = region))+ + stat_summary(fun.y = mean, geom = "bar")+ + stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.5 )+ xlab("Region")+ ylab("Mean Observations")+ ggtitle("Mean Observations for each Region")+ theme_bw() + theme(legend.position = "none") +plot1 -#Produce a scatterplot of all the observtions +#Produce a scatter plot of all the observations +plot2<-ggplot(data, aes(x = region, y = observations, color = region))+ + geom_jitter() + xlab("Region") + ylab("Observations")+ + ggtitle("Observations for each Region")+ theme_bw()+ + theme(legend.position = "none") +plot2 +#Show plots side by side for comparison +plot_grid(plot1, plot2, labels = c("a", "b"), + rel_widths = c(4, 4), + ncol = 2, nrow = 1) #Do the bar and scatter plots tell different stories? Why? +#The bar graph and the scatter plot of the same data show different stories. +#The bar graph is showing the mean of the overall data for each region, which were similar +#to each other. The scatter plot shows each observation, there by showing the variation between +#the observations. Error bars were added to the bar graph to better represent the data. From 52e7a22031127b18953ab5724d24b0432ea08aea Mon Sep 17 00:00:00 2001 From: Marlee Shaffer Date: Sun, 21 Nov 2021 12:56:52 -0500 Subject: [PATCH 3/5] Adds solution to part 1 --- Exercise10R.R | 4 ++++ Polio.txt | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 Polio.txt diff --git a/Exercise10R.R b/Exercise10R.R index d30470c..93a07de 100644 --- a/Exercise10R.R +++ b/Exercise10R.R @@ -12,6 +12,10 @@ library(cowplot) #Find data on two variables that you would expect to be related to each other #Write a script that loads the text file and produces a scatter plot with a trendline +#Load the text file + +#Create the scatter plot with a trend line +ggplot( aes()) + geom_point() + stat_smooth(method = "lm") ##Part 2 #Write a script that generates two figures from data.txt diff --git a/Polio.txt b/Polio.txt new file mode 100644 index 0000000..6b18889 --- /dev/null +++ b/Polio.txt @@ -0,0 +1,37 @@ +Cases Vaccine +45.982689 20 +30.680101 23 +30.122467 31 +17.625319 30 +11.16941 38 +17.514452 48 +11.931867 52 +8.7925091 54 +6.6822147 63 +6.4030457 66 +4.042563 71 +5.5830231 67 +4.8590789 65 +6.1091552 63 +2.467695 61 +1.8725411 69 +1.2334731 66 +2.8445969 68 +1.230391 71 +1.9823751 73 +1.0716786 73 +0.29694039 74 +0.22351779 74 +0.2246893 75 +0.36383402 77 +1.3877367 77 +0.19927098 75 +0.10566861 75 +0.31011206 75 +0.30686665 75 +0.29852691 76 +0.49573576 79 +0.20091401 78 +0.62201583 79 +0.57006198 80 +0.11664868 80 \ No newline at end of file From f86fbfeaca3786f4d96148260cb6dac13518526c Mon Sep 17 00:00:00 2001 From: Marlee Shaffer Date: Sun, 21 Nov 2021 13:01:05 -0500 Subject: [PATCH 4/5] Final Code --- Exercise10R.R | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/Exercise10R.R b/Exercise10R.R index 93a07de..4a30f05 100644 --- a/Exercise10R.R +++ b/Exercise10R.R @@ -10,14 +10,23 @@ library(cowplot) ##Part 1 #Find data on two variables that you would expect to be related to each other -#Write a script that loads the text file and produces a scatter plot with a trendline +#Write a script that loads the text file and produces a scatter plot with a trend line + +#Data was found online from: +#Data shows Polio cases per million people and the vaccination rate of one-year olds #Load the text file +data1<-read.table(file = "Polio.txt", header = TRUE, sep="\t", stringsAsFactors = FALSE) #Create the scatter plot with a trend line -ggplot( aes()) + geom_point() + stat_smooth(method = "lm") - -##Part 2 +ggplot(data = data1, aes(x = Vaccine, y = Cases)) + + geom_point() + stat_smooth(method = "lm") + + xlab("Percent of Vaccinated One Year Olds") + + ylab("Cases per 1 Million People") + + ggtitle("Polio Cases vs Vaccination Rate of One Year Olds") + + theme_bw() + +l##Part 2 #Write a script that generates two figures from data.txt #Read the data.txt file data<-read.table("data.txt", header = TRUE, sep=",", stringsAsFactors = FALSE) From 4853141e15afe974fc1b116fbfefc38d1dd86db3 Mon Sep 17 00:00:00 2001 From: Marlee Shaffer Date: Sun, 21 Nov 2021 13:02:03 -0500 Subject: [PATCH 5/5] Final Code --- Exercise10R.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Exercise10R.R b/Exercise10R.R index 4a30f05..2af204a 100644 --- a/Exercise10R.R +++ b/Exercise10R.R @@ -26,7 +26,7 @@ ggplot(data = data1, aes(x = Vaccine, y = Cases)) + ggtitle("Polio Cases vs Vaccination Rate of One Year Olds") + theme_bw() -l##Part 2 +##Part 2 #Write a script that generates two figures from data.txt #Read the data.txt file data<-read.table("data.txt", header = TRUE, sep=",", stringsAsFactors = FALSE)