From 8369633a6c284ee656747b7e0e72ce2732eae003 Mon Sep 17 00:00:00 2001 From: Marlee Shaffer Date: Sun, 14 Nov 2021 09:49:01 -0500 Subject: [PATCH 01/10] Makes base of code structure --- Exercise9R.R | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 Exercise9R.R diff --git a/Exercise9R.R b/Exercise9R.R new file mode 100644 index 0000000..a52eebd --- /dev/null +++ b/Exercise9R.R @@ -0,0 +1,37 @@ +#Marlee Shaffer +##Exercise 9 + +#Write a function that takes a directory name as an argument called dir plus any other arguments required to +#accomplish the specified task. + +#The function should read data from each file in the specified directory and calculate the coefficient of variation +#(standard deviation divided by the mean) for a user specified column. These values should be returned as a +#vector. + +#To calculate a reliable coefficient of variation we would like to have 50 observations, but we also don’t want +#to force the user to use our high standard for the data. Make your function, by default, report an error if any +#file has less than 50 observations, but allow the user to override this behavior and only receive a warning if 50 +#observations are not present in a file. + +#For an extra credit point, add arguments and associated code to your function to situations where a file +#doesn’t have the correct number of columns or the provided data includes NA’s. + +VarCof<-function(dir, column, x=50){ + #List files in a variable + file.list<-list.files(dir) + vector<-c() + #For each of the files, pull out the column of interest + for (i in 1:length(file.list)){ + interestcol<-file.list[, file.list$column] + if{(length(interestcol)>x) + M<-mean(interestcol) + SD<-sd(interestcol, na.rm = FALSE) + output<-SD/M + }else if {(length(interestcol) Date: Sun, 14 Nov 2021 10:09:21 -0500 Subject: [PATCH 02/10] Lots of debugging is happening. Need the file to be a table with headers to call the column. --- Exercise9R.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Exercise9R.R b/Exercise9R.R index a52eebd..b2ee2fa 100644 --- a/Exercise9R.R +++ b/Exercise9R.R @@ -19,16 +19,19 @@ VarCof<-function(dir, column, x=50){ #List files in a variable file.list<-list.files(dir) + #Empty vector to add results to vector<-c() #For each of the files, pull out the column of interest for (i in 1:length(file.list)){ + #Read the file as a table so the column can be pulled out + file<-read.table(file.list[i], header = TRUE, stringsAsFactors = FALSE) interestcol<-file.list[, file.list$column] if{(length(interestcol)>x) M<-mean(interestcol) SD<-sd(interestcol, na.rm = FALSE) - output<-SD/M + Var<-SD/M }else if {(length(interestcol) Date: Sun, 14 Nov 2021 13:22:26 -0500 Subject: [PATCH 03/10] Adds defaults and math operations to solve --- Exercise9R.R | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/Exercise9R.R b/Exercise9R.R index b2ee2fa..7869eb3 100644 --- a/Exercise9R.R +++ b/Exercise9R.R @@ -16,7 +16,11 @@ #For an extra credit point, add arguments and associated code to your function to situations where a file #doesn’t have the correct number of columns or the provided data includes NA’s. -VarCof<-function(dir, column, x=50){ +#Need to figure out how to override a default for a warning not an error. By having two defaults, changing them gives the output of 50+, one +#gives an error for being under 50 with req="Yes", and the last gives a Warning for req="No".By changing x, the minimum number of observations +#is changed. + +VarCof<-function(dir, column, x=50, req="Yes"){ #List files in a variable file.list<-list.files(dir) #Empty vector to add results to @@ -25,16 +29,28 @@ VarCof<-function(dir, column, x=50){ for (i in 1:length(file.list)){ #Read the file as a table so the column can be pulled out file<-read.table(file.list[i], header = TRUE, stringsAsFactors = FALSE) - interestcol<-file.list[, file.list$column] - if{(length(interestcol)>x) + #Pull out the column of interest + interestcol<-file[, column] + #Find the number of observations + if(length(interestcol)>x){ #More than X, no error message + #Calculate, mean, standard deviation, and variation M<-mean(interestcol) SD<-sd(interestcol, na.rm = FALSE) Var<-SD/M - }else if {(length(interestcol) Date: Mon, 15 Nov 2021 21:09:16 -0500 Subject: [PATCH 04/10] Fixes minor bugs. Need column number to be the input --- Exercise9R.R | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/Exercise9R.R b/Exercise9R.R index 7869eb3..6c35a9e 100644 --- a/Exercise9R.R +++ b/Exercise9R.R @@ -1,52 +1,44 @@ #Marlee Shaffer ##Exercise 9 -#Write a function that takes a directory name as an argument called dir plus any other arguments required to -#accomplish the specified task. +#VarCof is a function that reads data from each file in the specified directory +#and calculates the coefficient of variation. By default, 50 observations are required +#to get an output. -#The function should read data from each file in the specified directory and calculate the coefficient of variation -#(standard deviation divided by the mean) for a user specified column. These values should be returned as a -#vector. - -#To calculate a reliable coefficient of variation we would like to have 50 observations, but we also don’t want -#to force the user to use our high standard for the data. Make your function, by default, report an error if any -#file has less than 50 observations, but allow the user to override this behavior and only receive a warning if 50 -#observations are not present in a file. - -#For an extra credit point, add arguments and associated code to your function to situations where a file -#doesn’t have the correct number of columns or the provided data includes NA’s. - -#Need to figure out how to override a default for a warning not an error. By having two defaults, changing them gives the output of 50+, one -#gives an error for being under 50 with req="Yes", and the last gives a Warning for req="No".By changing x, the minimum number of observations -#is changed. +#Usage: VarCof(dir = "full directory map", column = "number of column of interest", +#x = minimum number of observations, req = is override on or off (off by default)) VarCof<-function(dir, column, x=50, req="Yes"){ + #Set the working directory to the directory entered + setwd(dir) #List files in a variable file.list<-list.files(dir) #Empty vector to add results to vector<-c() #For each of the files, pull out the column of interest - for (i in 1:length(file.list)){ + for (files in file.list){ #Read the file as a table so the column can be pulled out - file<-read.table(file.list[i], header = TRUE, stringsAsFactors = FALSE) + file<-read.table(files, header = TRUE, sep=",", stringsAsFactors = FALSE) #Pull out the column of interest interestcol<-file[, column] #Find the number of observations - if(length(interestcol)>x){ #More than X, no error message + if(length(interestcol)>=x){ #More than or equal to X, no error message #Calculate, mean, standard deviation, and variation M<-mean(interestcol) SD<-sd(interestcol, na.rm = FALSE) Var<-SD/M vector<-c(vector, Var) - }else if (length(interestcol) Date: Mon, 15 Nov 2021 21:45:39 -0500 Subject: [PATCH 05/10] Creates a data frame as an output for easier reading --- Exercise9R.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Exercise9R.R b/Exercise9R.R index 6c35a9e..14cb5d2 100644 --- a/Exercise9R.R +++ b/Exercise9R.R @@ -3,7 +3,7 @@ #VarCof is a function that reads data from each file in the specified directory #and calculates the coefficient of variation. By default, 50 observations are required -#to get an output. +#to get an output. The column of interest must be numerical. #Usage: VarCof(dir = "full directory map", column = "number of column of interest", #x = minimum number of observations, req = is override on or off (off by default)) @@ -12,22 +12,22 @@ VarCof<-function(dir, column, x=50, req="Yes"){ #Set the working directory to the directory entered setwd(dir) #List files in a variable - file.list<-list.files(dir) + File<-list.files(dir) #Empty vector to add results to - vector<-c() + Variation<-c() #For each of the files, pull out the column of interest - for (files in file.list){ + for (files in File){ #Read the file as a table so the column can be pulled out - file<-read.table(files, header = TRUE, sep=",", stringsAsFactors = FALSE) + file_i<-read.table(files, header = TRUE, sep=",", stringsAsFactors = FALSE) #Pull out the column of interest - interestcol<-file[, column] + interestcol<-file_i[, column] #Find the number of observations if(length(interestcol)>=x){ #More than or equal to X, no error message #Calculate, mean, standard deviation, and variation M<-mean(interestcol) SD<-sd(interestcol, na.rm = FALSE) Var<-SD/M - vector<-c(vector, Var) + Variation<-c(Variation, Var) }else if (length(interestcol) Date: Thu, 18 Nov 2021 11:26:36 -0500 Subject: [PATCH 06/10] Redid entire code. Still not working completely, but is better. Same base. Working on changing x and override and getting proper output --- WorkingExercise9?R.R | 49 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 WorkingExercise9?R.R diff --git a/WorkingExercise9?R.R b/WorkingExercise9?R.R new file mode 100644 index 0000000..df99716 --- /dev/null +++ b/WorkingExercise9?R.R @@ -0,0 +1,49 @@ +#VarCof is a function that reads data from each file in the specified directory +#and calculates the coefficient of variation. By default, 50 observations are required +#to get an output. The column of interest must be numerical. +VarCof<-function(dir, col, x=50, override="No"){ + #Set the working directory to the directory entered + setwd(dir) + #List all files in the directory and save them as a variable + File<-list.files(dir) + #Create an empty vector for variance + Variance<-c() + #Create an empty vector for error + Error<-c() + #For each file.. + for (files in File){ + #Read the file as a table so the columns can be pulled out + file_i<-read.table(files, header = TRUE, sep=",", stringsAsFactors = FALSE) + #Pull out column of interest + interestcol<-file_i[, col] + interestcol.na<-na.omit(interestcol) + #Find the number of observations and do math depending on value of x and override + if (length(interestcol.na)>=x){ #Greater than or equal to 50, or changed value + #Calculate the mean, standard deviation, and variation + M<-mean(interestcol.na) + SD<-sd(interestcol.na, na.rm = FALSE) + Var<-SD/M + Variance<-c(Variance, Var) + Er<-0 + Error<-c(Error, Er) + }else if (length(interestcol.na) Date: Thu, 18 Nov 2021 11:27:53 -0500 Subject: [PATCH 07/10] Made changes to get correct variance. Working on changing defaults and still getting outputs --- Exercise9R.R | 75 +++++++++++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 33 deletions(-) diff --git a/Exercise9R.R b/Exercise9R.R index 14cb5d2..b601e9a 100644 --- a/Exercise9R.R +++ b/Exercise9R.R @@ -6,43 +6,52 @@ #to get an output. The column of interest must be numerical. #Usage: VarCof(dir = "full directory map", column = "number of column of interest", -#x = minimum number of observations, req = is override on or off (off by default)) +#x = minimum number of observations (by default, it is 50)) -VarCof<-function(dir, column, x=50, req="Yes"){ - #Set the working directory to the directory entered +VarCof<-function(dir, col, x="50", override="No"){ + #Set the working directory to the directory entered setwd(dir) - #List files in a variable + #List all files in the directory and save them as a variable File<-list.files(dir) - #Empty vector to add results to - Variation<-c() - #For each of the files, pull out the column of interest + #Create an empty vector for variance + Variance<-c() + #Create an empty vector for error + Error<-c() + #For each file.. for (files in File){ - #Read the file as a table so the column can be pulled out - file_i<-read.table(files, header = TRUE, sep=",", stringsAsFactors = FALSE) - #Pull out the column of interest - interestcol<-file_i[, column] - #Find the number of observations - if(length(interestcol)>=x){ #More than or equal to X, no error message - #Calculate, mean, standard deviation, and variation - M<-mean(interestcol) - SD<-sd(interestcol, na.rm = FALSE) + #Read the file as a table so the columns can be pulled out + file_i<-read.table(files, header = TRUE, sep=",", stringsAsFactors = FALSE, fill = TRUE) + #Pull out column of interest + interestcol<-file_i[, col] + interestcol.na<-na.omit(interestcol) + #Find the number of observations and do math depending on value of x and override + if (length(interestcol.na)>=x){ #Greater than or equal to 50, or changed value + #Calculate the mean, standard deviation, and variation + M<-mean(interestcol.na) + SD<-sd(interestcol.na, na.rm = FALSE) Var<-SD/M - Variation<-c(Variation, Var) - }else if (length(interestcol) Date: Thu, 18 Nov 2021 11:30:27 -0500 Subject: [PATCH 08/10] Tested for override = yes and x=50. Worked --- Exercise9R.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Exercise9R.R b/Exercise9R.R index b601e9a..8cec8f0 100644 --- a/Exercise9R.R +++ b/Exercise9R.R @@ -6,7 +6,7 @@ #to get an output. The column of interest must be numerical. #Usage: VarCof(dir = "full directory map", column = "number of column of interest", -#x = minimum number of observations (by default, it is 50)) +#x = minimum number of observations (by default, it is 50), override = "Yes" or "No" for if the number of observations needs to be met) VarCof<-function(dir, col, x="50", override="No"){ #Set the working directory to the directory entered From f5f1817702912aa44eeb0befef5b57adf4a4f771 Mon Sep 17 00:00:00 2001 From: Marlee Shaffer Date: Thu, 18 Nov 2021 11:42:58 -0500 Subject: [PATCH 09/10] Finished code. --- Exercise9R.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Exercise9R.R b/Exercise9R.R index 8cec8f0..bb412e2 100644 --- a/Exercise9R.R +++ b/Exercise9R.R @@ -3,10 +3,11 @@ #VarCof is a function that reads data from each file in the specified directory #and calculates the coefficient of variation. By default, 50 observations are required -#to get an output. The column of interest must be numerical. +#to get an output. The column of interest must be numerical and have a header. #Usage: VarCof(dir = "full directory map", column = "number of column of interest", -#x = minimum number of observations (by default, it is 50), override = "Yes" or "No" for if the number of observations needs to be met) +#x = minimum number of observations (by default, it is 50), override = "Yes" or "No" for +#if the number of observations needs to be met) VarCof<-function(dir, col, x="50", override="No"){ #Set the working directory to the directory entered From cdc2740de93b6d817e1d53f68e3e0c12a204f98c Mon Sep 17 00:00:00 2001 From: mshaffer16 <91496613+mshaffer16@users.noreply.github.com> Date: Thu, 18 Nov 2021 11:46:33 -0500 Subject: [PATCH 10/10] Delete WorkingExercise9?R.R --- WorkingExercise9?R.R | 49 -------------------------------------------- 1 file changed, 49 deletions(-) delete mode 100644 WorkingExercise9?R.R diff --git a/WorkingExercise9?R.R b/WorkingExercise9?R.R deleted file mode 100644 index df99716..0000000 --- a/WorkingExercise9?R.R +++ /dev/null @@ -1,49 +0,0 @@ -#VarCof is a function that reads data from each file in the specified directory -#and calculates the coefficient of variation. By default, 50 observations are required -#to get an output. The column of interest must be numerical. -VarCof<-function(dir, col, x=50, override="No"){ - #Set the working directory to the directory entered - setwd(dir) - #List all files in the directory and save them as a variable - File<-list.files(dir) - #Create an empty vector for variance - Variance<-c() - #Create an empty vector for error - Error<-c() - #For each file.. - for (files in File){ - #Read the file as a table so the columns can be pulled out - file_i<-read.table(files, header = TRUE, sep=",", stringsAsFactors = FALSE) - #Pull out column of interest - interestcol<-file_i[, col] - interestcol.na<-na.omit(interestcol) - #Find the number of observations and do math depending on value of x and override - if (length(interestcol.na)>=x){ #Greater than or equal to 50, or changed value - #Calculate the mean, standard deviation, and variation - M<-mean(interestcol.na) - SD<-sd(interestcol.na, na.rm = FALSE) - Var<-SD/M - Variance<-c(Variance, Var) - Er<-0 - Error<-c(Error, Er) - }else if (length(interestcol.na)