diff --git a/Exercise 10 Script.R b/Exercise 10 Script.R new file mode 100644 index 0000000..01cfc65 --- /dev/null +++ b/Exercise 10 Script.R @@ -0,0 +1,39 @@ +## PROBLEM 1 +# read in data +crime = read.table("crime.txt", header=TRUE, sep="\t", stringsAsFactors=FALSE) + +# load ggplot +library(ggplot2) + +# use ggplot to produce scatterplot +ggplot(crime, aes(x=Year, y=Crime.Rate)) + + geom_point() + + xlab("Year") + + ylab("Crime Rate (per 100,000 population)") + + ggtitle("Crime Rate Overtime") + + stat_smooth(method="loess") + + theme_classic() + + +## PROBLEM 2 +# read in data +data = read.table("data.txt", header=TRUE, sep=",", stringsAsFactors = FALSE) + +# use ggplot to produce barplot +ggplot(data, aes(x = region, y = observations)) + + stat_summary(fun = mean, geom = "bar") + + xlab("Region") + + ylab("Average Population") + + theme_classic() + +# produce scatterplot +ggplot(data = data, aes(x = region, y = observations)) + + geom_point() + + geom_jitter() + + theme_classic() + +# the barplot and scatterplot are different because the scatterplot will show +# the distribution of the data, whereas the barplot just reports the average. +# the barplot does not report very precisely. the scatterplot shows that for +# south, the data is very split (bimodal distribution). The barplot does not +# show this. \ No newline at end of file diff --git a/crime.txt b/crime.txt new file mode 100644 index 0000000..7d69fd5 --- /dev/null +++ b/crime.txt @@ -0,0 +1,27 @@ +Year Crime Rate +1965 26036 +1966 27857 +1967 28846 +1968 33209 +1969 38801 +1970 38018 +1971 41645 +1972 44785 +1973 49433 +1974 52398 +1975 61966 +1976 62207 +1977 58980 +1978 60462 +1979 62037 +1980 56457 +1981 65949 +1982 62126 +1983 60186 +1984 61152 +1985 58770 +1986 62459 +1987 53775 +1988 49216 +1989 47799 + \ No newline at end of file