diff --git a/Exercise10R.R b/Exercise10R.R new file mode 100644 index 0000000..2af204a --- /dev/null +++ b/Exercise10R.R @@ -0,0 +1,59 @@ +#Marlee Shaffer +#Exercise 10 - Analysis and Plotting + +#Set working directory +setwd("~/Desktop/Biocomp_tutorial12") + +#Load packages +library(ggplot2) +library(cowplot) + +##Part 1 +#Find data on two variables that you would expect to be related to each other +#Write a script that loads the text file and produces a scatter plot with a trend line + +#Data was found online from: +#Data shows Polio cases per million people and the vaccination rate of one-year olds + +#Load the text file +data1<-read.table(file = "Polio.txt", header = TRUE, sep="\t", stringsAsFactors = FALSE) + +#Create the scatter plot with a trend line +ggplot(data = data1, aes(x = Vaccine, y = Cases)) + + geom_point() + stat_smooth(method = "lm") + + xlab("Percent of Vaccinated One Year Olds") + + ylab("Cases per 1 Million People") + + ggtitle("Polio Cases vs Vaccination Rate of One Year Olds") + + theme_bw() + +##Part 2 +#Write a script that generates two figures from data.txt +#Read the data.txt file +data<-read.table("data.txt", header = TRUE, sep=",", stringsAsFactors = FALSE) + +#Produce a bar plot of means of 4 populations +plot1<-ggplot(data, aes(x = region, y = observations, fill = region))+ + stat_summary(fun.y = mean, geom = "bar")+ + stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.5 )+ xlab("Region")+ + ylab("Mean Observations")+ ggtitle("Mean Observations for each Region")+ + theme_bw() + theme(legend.position = "none") +plot1 + +#Produce a scatter plot of all the observations +plot2<-ggplot(data, aes(x = region, y = observations, color = region))+ + geom_jitter() + xlab("Region") + ylab("Observations")+ + ggtitle("Observations for each Region")+ theme_bw()+ + theme(legend.position = "none") +plot2 + +#Show plots side by side for comparison +plot_grid(plot1, plot2, labels = c("a", "b"), + rel_widths = c(4, 4), + ncol = 2, nrow = 1) + +#Do the bar and scatter plots tell different stories? Why? +#The bar graph and the scatter plot of the same data show different stories. +#The bar graph is showing the mean of the overall data for each region, which were similar +#to each other. The scatter plot shows each observation, there by showing the variation between +#the observations. Error bars were added to the bar graph to better represent the data. + diff --git a/Polio.txt b/Polio.txt new file mode 100644 index 0000000..6b18889 --- /dev/null +++ b/Polio.txt @@ -0,0 +1,37 @@ +Cases Vaccine +45.982689 20 +30.680101 23 +30.122467 31 +17.625319 30 +11.16941 38 +17.514452 48 +11.931867 52 +8.7925091 54 +6.6822147 63 +6.4030457 66 +4.042563 71 +5.5830231 67 +4.8590789 65 +6.1091552 63 +2.467695 61 +1.8725411 69 +1.2334731 66 +2.8445969 68 +1.230391 71 +1.9823751 73 +1.0716786 73 +0.29694039 74 +0.22351779 74 +0.2246893 75 +0.36383402 77 +1.3877367 77 +0.19927098 75 +0.10566861 75 +0.31011206 75 +0.30686665 75 +0.29852691 76 +0.49573576 79 +0.20091401 78 +0.62201583 79 +0.57006198 80 +0.11664868 80 \ No newline at end of file