From b643f1d5fd9bd59723e8b02208361a288de6fb9e Mon Sep 17 00:00:00 2001 From: Nicholas Ross Date: Mon, 22 Nov 2021 14:31:11 -0500 Subject: [PATCH 1/2] Exercise 10 completion with supplementary files --- Exercise10.R | 74 ++++++++++++++++++++++++++++++++++++++ NFL_Defense_stats_2021.csv | 33 +++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 Exercise10.R create mode 100644 NFL_Defense_stats_2021.csv diff --git a/Exercise10.R b/Exercise10.R new file mode 100644 index 0000000..59dbaca --- /dev/null +++ b/Exercise10.R @@ -0,0 +1,74 @@ +## Exercise 10 - Analysis and plotting ## +## by Nicholas Ross on 11/22/2021 + +## Part 1- Plotting and comparing two variables from data set ## + +# load ggplot library +library (ggplot2) + +#read in data from outside source +stats = read.csv("NFL_Defense_stats_2021.csv", header = T, stringsAsFactors = F) + +# set variable for ggplot we want to run +plot0 = ggplot(stats, aes(x = Rank, y = YDS.G))+ + + #give color to points that relate to rankings + geom_point(aes(color = Rank))+ + scale_color_gradientn(colors = terrain.colors(100))+ # set the color gradient to use + xlab("Team Rank")+ # create x axis label + ylab("Yards per Game Allowed")+ # create y axis label + stat_smooth(method="lm")+ # implement trend line + + #set background to black to display colors clearly + theme(panel.background = element_rect(fill = "black")) + + +## Part 2- Side-by-side comparisons using plots ## + +#if used in separate script, be sure load ggplot library with "library(ggplot2)" + +#read in data from outside source +data = read.table("data.txt", header = T, sep = ",", stringsAsFactors = F) + +# set variable equal to ggplot we want to implement +plot1 = ggplot(data, aes(x= region, y = observations, color = factor(region)))+ + stat_summary(fun = mean, geom = "bar")+ # plot as a function of the mean using bar graph + xlab("Region")+ # create x axis label + ylab("Average Population")+ # create y axis label + theme_classic() # set plot theme to classic + +## the code below can be used to find the mean population for each region to ensure plot is accurate +## one could either replace each instance of "north" and "northMean" with a different +## region or extend the if-else statements to include all regions + +# north = 0 +# south = 0 +# east = 0 +# west = 0 +# northMean = 0 +# southMean = 0 +# eastMean = 0 +# westMean = 0 +# +# for (i in 1:nrow(data)){ +# if (data[i,1] == "north"){ +# north [i] = data[i,2] +# } +# } +# northMean = mean(north) + +# create second variable for ggplot to compare side by side with plot1 +plot2 = ggplot(data, aes(x= region, y = observations))+ + geom_jitter(aes(color = factor(region)))+ # create scatter plot using regions as the color factor + xlab("Region")+ # create x axis label + ylab("Average Population")+ # create y axis label + theme_classic() # set plot theme to classic + +# display plot1 and plot2 in the same viewing window +plot_grid(plot1,plot2) + +## In analyzing both plots, one can see that plot1 displays the average population +## per region. However, plot2 shows that plot1 doesn't tell the full story. The north +## region shows that almost all populations in that region are close to the average. The south, however, +## displays that none of the populations are equal to the average, but rather the populations are +## quite larger or relatively small. \ No newline at end of file diff --git a/NFL_Defense_stats_2021.csv b/NFL_Defense_stats_2021.csv new file mode 100644 index 0000000..7cc4b07 --- /dev/null +++ b/NFL_Defense_stats_2021.csv @@ -0,0 +1,33 @@ +Rank,TEAM,GP,PA ,PA/G,YDS/G,PYDS,PYDS/G,RYDS,RYDS/G,SCK,YDS,INT,INTTD,FF,FUMR,FUMTD,HUR,PDEF, +12,Bills,9,127,14.1,274.1,"1,712",190.2,755,83.9,19,99,15,1,9,9,0,58,50, +8,Patriots,11,165,15,313.2,"2,326",211.5,"1,119",101.7,28,200,18,3,6,3,0,50,57, +20,Broncos,10,173,17.3,328.2,"2,181",218.1,"1,101",110.1,24,153,8,0,4,3,0,59,44, +1,Cardinals,10,173,17.3,323,"2,064",206.4,"1,166",116.6,25,149,8,1,15,11,0,37,44, +17,Saints,9,176,19.6,337.8,"2,384",264.9,656,72.9,20,143,11,2,6,2,0,26,46, +3,Packers,10,178,17.8,309.9,"2,027",202.7,"1,072",107.2,24,183,11,0,5,5,0,41,40, +13,Steelers,9,179,19.9,348.9,"2,033",225.9,"1,107",123,26,166,3,0,11,5,0,42,39, +22,Panthers,10,181,18.1,280.7,"1,737",173.7,"1,070",107,27,206,8,0,9,4,0,37,39, +5,Cowboys,9,183,20.3,354,"2,275",252.8,911,101.2,18,156,14,3,5,3,0,37,42, +28,Seahawks,9,184,20.4,400.6,"2,478",275.3,"1,127",125.2,15,111,4,0,4,5,0,43,41, +10,Bengals,9,187,20.8,361.2,"2,343",260.3,908,100.9,23,139,7,0,7,2,0,52,31, +16,Vikings,9,199,22.1,369.1,"2,147",238.6,"1,175",130.6,29,198,8,1,6,4,0,54,36, +7,Buccaneers,9,202,22.4,334,"2,288",254.2,718,79.8,22,127,10,2,7,5,0,43,43, +6,Rams,10,203,20.3,346.9,"2,417",241.7,"1,052",105.2,29,216,12,0,7,2,0,49,37, +4,Ravens,9,205,22.8,371.6,"2,550",283.3,794,88.2,19,137,5,0,5,2,0,38,40, +18,49ers,9,206,22.9,331.4,"1,878",208.7,"1,105",122.8,20,151,4,2,10,3,0,31,31, +26,Giants,9,210,23.3,372.4,"2,249",249.9,"1,103",122.6,19,120,9,1,7,5,0,30,45, +30,Jaguars,9,214,23.8,366.4,"2,344",260.4,954,106,16,129,4,0,3,1,0,46,26, +19,Raiders,9,214,23.8,360.1,"2,079",231,"1,162",129.1,21,109,4,0,9,7,0,38,36, +14,Colts,10,216,21.6,363.6,"2,499",249.9,"1,137",113.7,22,135,10,0,11,11,1,22,38, +11,Chargers,9,218,24.2,361.1,"1,854",206,"1,396",155.1,17,116,6,0,11,5,0,41,21, +21,Eagles,10,221,22.1,350.6,"2,333",233.3,"1,173",117.3,18,137,6,0,8,3,3,40,21, +27,Bears,9,222,24.7,348.9,"2,035",226.1,"1,105",122.8,25,192,4,1,6,5,1,26,31, +9,Chiefs,10,225,22.5,373.2,"2,586",258.6,"1,146",114.6,14,106,8,1,8,4,0,56,29, +2,Titans,10,230,23,366.5,"2,679",267.9,986,98.6,27,192,9,2,10,5,1,50,52, +15,Browns,10,239,23.9,323.9,"2,292",229.2,947,94.7,29,194,5,1,6,3,0,34,46, +23,Football Team,9,246,27.3,376.4,"2,514",279.3,874,97.1,19,97,6,0,7,5,0,39,32, +32,Lions,9,246,27.3,379.8,"2,197",244.1,"1,221",135.7,14,116,5,0,6,5,0,33,33, +25,Dolphins,10,250,25,383.1,"2,738",273.8,"1,093",109.3,21,170,7,1,12,8,1,36,55, +24,Falcons,10,256,25.6,362.3,"2,390",239,"1,233",123.3,14,124,4,0,8,5,0,34,46, +29,Texans,9,258,28.7,385.9,"2,241",249,"1,232",136.9,19,133,9,0,11,5,0,29,34, +31,Jets,9,294,32.7,417.1,"2,549",283.2,"1,205",133.9,20,145,2,,,,,,, \ No newline at end of file From c3c02dcbc7f7c4573d17f4d14c8abaf97357e5f6 Mon Sep 17 00:00:00 2001 From: Nicholas Ross Date: Mon, 22 Nov 2021 14:35:24 -0500 Subject: [PATCH 2/2] updated comments for part 1 --- Exercise10.R | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Exercise10.R b/Exercise10.R index 59dbaca..aa86fb9 100644 --- a/Exercise10.R +++ b/Exercise10.R @@ -12,15 +12,15 @@ stats = read.csv("NFL_Defense_stats_2021.csv", header = T, stringsAsFactors = F) # set variable for ggplot we want to run plot0 = ggplot(stats, aes(x = Rank, y = YDS.G))+ - #give color to points that relate to rankings - geom_point(aes(color = Rank))+ - scale_color_gradientn(colors = terrain.colors(100))+ # set the color gradient to use - xlab("Team Rank")+ # create x axis label - ylab("Yards per Game Allowed")+ # create y axis label - stat_smooth(method="lm")+ # implement trend line - - #set background to black to display colors clearly - theme(panel.background = element_rect(fill = "black")) + #give color to points that relate to rankings + geom_point(aes(color = Rank))+ + scale_color_gradientn(colors = terrain.colors(100))+ # set the color gradient to use + xlab("Team Rank")+ # create x axis label + ylab("Yards per Game Allowed")+ # create y axis label + stat_smooth(method="lm")+ # implement trend line + + #set background to black to display colors clearly + theme(panel.background = element_rect(fill = "black")) ## Part 2- Side-by-side comparisons using plots ## @@ -59,10 +59,10 @@ plot1 = ggplot(data, aes(x= region, y = observations, color = factor(region)))+ # create second variable for ggplot to compare side by side with plot1 plot2 = ggplot(data, aes(x= region, y = observations))+ - geom_jitter(aes(color = factor(region)))+ # create scatter plot using regions as the color factor - xlab("Region")+ # create x axis label - ylab("Average Population")+ # create y axis label - theme_classic() # set plot theme to classic + geom_jitter(aes(color = factor(region)))+ # create scatter plot using regions as the color factor + xlab("Region")+ # create x axis label + ylab("Average Population")+ # create y axis label + theme_classic() # set plot theme to classic # display plot1 and plot2 in the same viewing window plot_grid(plot1,plot2)