Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions Exercise 10 Script.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
## PROBLEM 1
# read in data
crime = read.table("crime.txt", header=TRUE, sep="\t", stringsAsFactors=FALSE)

# load ggplot
library(ggplot2)

# use ggplot to produce scatterplot
ggplot(crime, aes(x=Year, y=Crime.Rate)) +
geom_point() +
xlab("Year") +
ylab("Crime Rate (per 100,000 population)") +
ggtitle("Crime Rate Overtime") +
stat_smooth(method="loess") +
theme_classic()


## PROBLEM 2
# read in data
data = read.table("data.txt", header=TRUE, sep=",", stringsAsFactors = FALSE)

# use ggplot to produce barplot
ggplot(data, aes(x = region, y = observations)) +
stat_summary(fun = mean, geom = "bar") +
xlab("Region") +
ylab("Average Population") +
theme_classic()

# produce scatterplot
ggplot(data = data, aes(x = region, y = observations)) +
geom_point() +
geom_jitter() +
theme_classic()

# the barplot and scatterplot are different because the scatterplot will show
# the distribution of the data, whereas the barplot just reports the average.
# the barplot does not report very precisely. the scatterplot shows that for
# south, the data is very split (bimodal distribution). The barplot does not
# show this.
27 changes: 27 additions & 0 deletions crime.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Year Crime Rate
1965 26036
1966 27857
1967 28846
1968 33209
1969 38801
1970 38018
1971 41645
1972 44785
1973 49433
1974 52398
1975 61966
1976 62207
1977 58980
1978 60462
1979 62037
1980 56457
1981 65949
1982 62126
1983 60186
1984 61152
1985 58770
1986 62459
1987 53775
1988 49216
1989 47799