-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathIntro_to_R_Tutorial.R
More file actions
87 lines (77 loc) · 3.33 KB
/
Copy pathIntro_to_R_Tutorial.R
File metadata and controls
87 lines (77 loc) · 3.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
## INTRO TO R TUTORIAL
# By Elisabetta Canaletti and Casey Hon
#Loading a base R dataframe. Use the data() function to see what base r datasets are available
data(iris)
#To view the dataframe in another window, you use the view() function
View(iris)
#Has sepal length, sepal width, petal length, petal width, and species as variables. all of these variables make up a dataframe.
#To have certain functions or datasets, you need certain packages!!!
install.packages("ggplot2")
library(ggplot2)
mpg
#You'll want to import your own data (usually excel sheets) from another application/file.
#There's a variety of functions that help you do this, depending on the file format your importing.
#E.G. read_excel, read_csv, import()
library(readxl)
read_excel("./GPI-2022-overall-scores-and-domains-2008-2022.xlsx")
#Now, if I want to actually use this dataframe, I need to make it an object/label it.
GPI <- read_excel("./GPI-2022-overall-scores-and-domains-2008-2022.xlsx")
view(dataframe)
#DATA CLEANING
#selecting columns in a dataframe
install.packages("dplyr") # dplyr is a useful package for manipulating data, let's install it
library(dplyr)
GPI_1 <- GPI %>% select(c("Country", "iso3c", 4:17))
# TO-DO: section below still needs to be updated
# #selecting and renaming variables in a dataframe
# gallupExtracted <- GPI %>%
# select(WPID, WP1220.AGE = WP1220, INCOME_5,
# COUNTRYNEW, COUNTRY_ISO3, WGT,
# YEAR_WAVE, YEAR_CALENDAR, WP1219.MALE = WP1219,
# EMP_2010, WP1223.MARITAL = WP1223, WP27.COUNT_ON_HELP = WP27,
# WP117.MONEY_STOLEN = WP117, WP118.ASSAULTED = WP118, WP43.SHELTER = WP43,
# WP40.FOOD = WP40, WP97.HEALTH_CARE = WP97, WP23.HEALTH_PROB = WP23,
# WP68.PHYS_PAIN = WP68, WP60.WELL_RESTED = WP60, WP134.CITIZ_ENGA = WP134,
# M30.ECON_COND = M30, WP2319.FEEL_HSEINCOME = WP2319, stress = WP71)
# write.csv(gallupExtracted, "gallupExtracted.csv", row.names = FALSE)
#
#
# #converting from wide to long format...look at gpi_1 and gpi_2
# GPI_2 <- gather(GPI_1, key = "Year", value = "GPI")
#
#
# #You can even create your own function. E.g. Kenith made a function to change "DK" and "Refused" responses in Gallup data to NA
# na_codes <- function(x, ...) {
# for(y in list(...))
# x[x == y] <- NA
# x
# }
#
# #recoding responses
# #no social support was 0, social support was 1
# #here, changed social support to 0, no social support to 1 - meaning results come out as: no social support increases/reduces stress this much.
# GWP_GPI_data$WP27.COUNT_ON_HELP <- 1 - GWP_GPI_data$WP27.COUNT_ON_HELP
#
#
# #merging dataframes
# GallupWithGDP <- left_join(gallupCleaned, GDPbase, by = c("YEAR_WAVE" = "Year", "COUNTRY_ISO3" = "Country Code"))
#
#
# #centering variables
# GWP_GPI_data$WP1220.AGE <-scale(GWP_GPI_data$WP1220.AGE)
#
# #factoring variables
# GWP_GPI_data$WP1219.MALE <-as.factor(GWP_GPI_data$WP1219.MALE)
#
#
# #DATA ANALYSIS
# #Model 1: Predict how perceived stress may be linked to GPI scores with GDP, age, gender, income as controls
# mod_base <- glmer(stress~ GPI + year + GDPperCapPPP + INCOME_5 + WP1220.AGE + WP1219.MALE + (GPI_within + year|COUNTRYNEW), dataframe)
#
#
# #DATA VISUALIZATION
# ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width)) +
# geom_point()
#
# flower_plot <- ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width)) +
# geom_point()