From 3b7a2f0821de4926469f7b22bf6c33bdd3fc287b Mon Sep 17 00:00:00 2001 From: Casey Novak Date: Fri, 26 Nov 2021 08:29:24 -0800 Subject: [PATCH 1/4] Answers for question 1 --- Exercise10_Answers.R | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 Exercise10_Answers.R diff --git a/Exercise10_Answers.R b/Exercise10_Answers.R new file mode 100644 index 0000000..6bc94ee --- /dev/null +++ b/Exercise10_Answers.R @@ -0,0 +1,27 @@ +# Tutorial 12 +## Exercise 10 + +# Question 1 + +# Set working directory +setwd("~/Desktop/Fall-2021/Biocomputing/Biocomp_tutorial12/") + +# Load marathon pace data text file +marathon <- read.table("Marathon_Pace.txt", header = FALSE, sep = "\t", stringsAsFactors = FALSE) +colnames(marathon) <- c("mile","time") + +# Load ggplot library and check marathon file for headers +head(marathon) +library(ggplot2) + +# Scatter plot of two variables: mile count vs time in minutes +ggplot(marathon, aes(x=mile,y=time))+ + geom_point(size=3)+ + xlab("Mile Count")+ + ylab("Time Spent Running (minutes)")+ + theme_classic()+ + geom_smooth(method="lm") # Include trend line + + +# Question 2 + From d301bdd0a7309a2dfdb598f9fcc1bd12afb9f2a8 Mon Sep 17 00:00:00 2001 From: Casey Novak Date: Fri, 26 Nov 2021 09:34:53 -0800 Subject: [PATCH 2/4] Added plots for problem two --- Exercise10_Answers.R | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/Exercise10_Answers.R b/Exercise10_Answers.R index 6bc94ee..a7e3594 100644 --- a/Exercise10_Answers.R +++ b/Exercise10_Answers.R @@ -15,7 +15,7 @@ head(marathon) library(ggplot2) # Scatter plot of two variables: mile count vs time in minutes -ggplot(marathon, aes(x=mile,y=time))+ +ggplot(data=marathon, aes(x=mile,y=time))+ geom_point(size=3)+ xlab("Mile Count")+ ylab("Time Spent Running (minutes)")+ @@ -25,3 +25,31 @@ ggplot(marathon, aes(x=mile,y=time))+ # Question 2 +# Load data from data.txt +data <- read.table("data.txt", sep = ",", header=TRUE, stringsAsFactors = TRUE) +head(data) + +# Bar plot of the means of four observations +ggplot(data=data, aes(x=region,y=observations,color=region))+ + stat_summary(fun='mean',geom='bar')+ + xlab("Regions")+ + ylab("Mean of Observations")+ + theme_classic() + +# Scatter plot of all observations +ggplot(data=data, aes(x=region, y=observations, color=region))+ + geom_point()+ + geom_jitter()+ + xlab("Region")+ + ylab("Observations")+ + theme_classic() + + + + + + + + + + From f65510fe49d5e240623603a99bfa7f0f2e08bbd7 Mon Sep 17 00:00:00 2001 From: Casey Novak Date: Fri, 26 Nov 2021 09:50:44 -0800 Subject: [PATCH 3/4] Final Answers --- Exercise10_Answers.R | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/Exercise10_Answers.R b/Exercise10_Answers.R index a7e3594..cb2b099 100644 --- a/Exercise10_Answers.R +++ b/Exercise10_Answers.R @@ -1,18 +1,20 @@ # Tutorial 12 ## Exercise 10 -# Question 1 - # Set working directory setwd("~/Desktop/Fall-2021/Biocomputing/Biocomp_tutorial12/") +# Load ggplot library +library(ggplot2) + + + +# Question 1 + # Load marathon pace data text file marathon <- read.table("Marathon_Pace.txt", header = FALSE, sep = "\t", stringsAsFactors = FALSE) colnames(marathon) <- c("mile","time") - -# Load ggplot library and check marathon file for headers head(marathon) -library(ggplot2) # Scatter plot of two variables: mile count vs time in minutes ggplot(data=marathon, aes(x=mile,y=time))+ @@ -23,29 +25,36 @@ ggplot(data=marathon, aes(x=mile,y=time))+ geom_smooth(method="lm") # Include trend line + # Question 2 # Load data from data.txt data <- read.table("data.txt", sep = ",", header=TRUE, stringsAsFactors = TRUE) head(data) -# Bar plot of the means of four observations -ggplot(data=data, aes(x=region,y=observations,color=region))+ +# Bar plot of the means of the four populations +ggplot(data=data, aes(x=region,y=observations,fill=region))+ stat_summary(fun='mean',geom='bar')+ - xlab("Regions")+ + xlab("Region")+ ylab("Mean of Observations")+ theme_classic() # Scatter plot of all observations -ggplot(data=data, aes(x=region, y=observations, color=region))+ +ggplot(data=data, aes(x=region,y=observations,color=region))+ geom_point()+ geom_jitter()+ xlab("Region")+ ylab("Observations")+ theme_classic() +## Do the bar and scatter plots tell you different stories? Why? - +# Yes, the scatter plots and bar plots tell very different stories. +# The bar plot only shows the mean of each population, which are very similar for all of the regions. +# This makes it look like the populations are very similar to each other in general. +# The scatter plot shows every observation, and gives a more accurate picture of what each population looks like. +# The East and West regions are more spread out, while the North region is concentrated in the middle and the South is concentrated on both ends. +# While the mean for each region is almost the same, the distributions are very different. From 577486427ded31aa3074c421df19fae82076162f Mon Sep 17 00:00:00 2001 From: Casey Novak Date: Fri, 26 Nov 2021 12:00:44 -0800 Subject: [PATCH 4/4] Marathon Pace text file --- Marathon_Pace.txt | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 Marathon_Pace.txt diff --git a/Marathon_Pace.txt b/Marathon_Pace.txt new file mode 100644 index 0000000..2d18f61 --- /dev/null +++ b/Marathon_Pace.txt @@ -0,0 +1,27 @@ +1 8 +2 16 +3 24 +4 32 +5 40 +6 48 +7 56 +8 64 +9 72 +10 80 +11 88 +12 96 +13 104 +14 112 +15 120 +16 128 +17 136 +18 144 +19 152 +20 160 +21 168 +22 176 +23 184 +24 192 +25 200 +26 208 +26.2 209.45 \ No newline at end of file