-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_analysis.R
More file actions
108 lines (78 loc) · 3.18 KB
/
run_analysis.R
File metadata and controls
108 lines (78 loc) · 3.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#Get Clean Data - Project
###
### Part 1 - Setup and load data
###
# Clear the workspace and load required packages
rm(list = ls())
library(readr)
library(dplyr)
# Load the zip file that containts the data files and unzip data into project folder
url <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
download.file(url, "project.zip")
unzip("project.zip", list = TRUE) #view the files in the zip folder
unzip("project.zip") #unzip file into project directory
###
### Part 2 - Combine the train and test data
###
# Load in the test data
subject_test <- read.table("UCI HAR Dataset/test/subject_test.txt", header = FALSE)
y_test <- read.table("UCI HAR Dataset/test/y_test.txt", header = FALSE)
x_test <- read.table("UCI HAR Dataset/test/x_test.txt", header = FALSE)
# See variable names in features file
features <- read.table("UCI HAR Dataset/features.txt", header = FALSE)
head(features)
# Rename the columns of the test data
colnames(subject_test) <- "Subject"
head(subject_test)
colnames(y_test) <- "Activity"
head(y_test)
# label the data set with descriptive variable names
head(x_test)[1:5]
colnames(x_test) <- features$V2
head(x_test)[1:5]
test_data <- cbind(subject_test, y_test, x_test)
# Load in the training data
unzip("project.zip", list = TRUE) #view file names
subject_train <- read.table("UCI HAR Dataset/train/subject_train.txt", header = FALSE)
y_train <- read.table("UCI HAR Dataset/train/y_train.txt", header = FALSE)
x_train <- read.table("UCI HAR Dataset/train/x_train.txt", header = FALSE)
# Rename the columns of the test data
head(subject_train)
colnames(subject_train) <- "Subject"
head(subject_train)
colnames(y_train) <- "Activity"
head(y_train)
# Label the data set with descriptive variable names
head(x_train)[1:5]
colnames(x_train) <- features$V2
head(x_train)[1:5]
train_data <- cbind(subject_train, y_train, x_train)
# Combine train and test data
total_data <- rbind(train_data, test_data)
###
### Part 3 - Extract only the measurements on the mean and standard deviation for each measurement.
###
data_mean_std <- total_data %>%
select( contains("Subject") | contains("Activity") | contains("mean") | contains("std"))
###
### Part 4 - Use descriptive activity names to name the activities in the data set
###
# Read in the activity labels
activities <- read.table("UCI HAR Dataset/activity_labels.txt", header = FALSE)
activities
colnames(activities) <- c("Activity", "Activity_Label")
activities
# Create a column in the data set called "Activity_Label" that describes the activity value
data_mean_std <- data_mean_std %>%
left_join(activities, by = "Activity") %>%
select(Subject, Activity, Activity_Label, everything())
colnames(data_mean_std)
###
### Part 5 - From the prior data set, create a second, independent tidy data set with the average of each variable for each activity and each subject.
###
# create the new data set
tidydata <- data_mean_std %>%
group_by(Subject, Activity_Label) %>%
summarise(across(everything(), mean))
# write the new data as a text file
write.table(tidydata, file = "tidydata.txt", row.name=FALSE)