-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathpreprocessing.R
More file actions
56 lines (48 loc) · 2.66 KB
/
preprocessing.R
File metadata and controls
56 lines (48 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
library(readxl)
library(dplyr)
library(tidyr)
library(stringr)
library(purrr)
source('utils.R')
## From raw data it will get the worker list of ids
getWorkersId <-function(workerData, workersId) {
workerData %>% filter(!str_detect(Fecha, "\\d{2}\\/\\d{2}\\/\\d{4}")) %>%
mutate(worker = Fecha) %>% select(worker) %>%
separate(worker, c('ID','Name'), sep = "\\-") %>% mutate(ID = as.integer(ID), Name = str_trim(Name)) %>%
filter(!(ID %in% IGNORE_WORKERS_IDS)) %>%
{ ifelse(length(workersId) > 0 , filter(., ID %in% workersId), . ) }
}
# From raw data it will provide the timeclock of the workers
getWorkerTimeClock <- function(workerData, workersId = NULL){
workerData %>%
mutate(ID=case_when(
grepl(x=Fecha, pattern='^([0-9]+) .+') ~ gsub(x=Fecha, pattern='^([0-9]+) .+', replacement='\\1'),
TRUE ~ NA_character_)) %>%
fill(ID) %>% filter(across(any_of("Entrada"), ~!is.na(.x))) %>%
purrr::when(length(workersId) > 0 ~ filter(., ID %in% workersId), T ~ . ) %>%
filter(!(ID %in% IGNORE_WORKERS_IDS)) %>%
mutate(Registros = ifelse(toupper(Registros) == "VACACIONES", NA, Registros),
viatico_alimentacion = `Viatico alimentacion`,
viatico_transporte = `Viatico de transporte`) %>%
filter(str_detect(Fecha, "\\d{2}\\/\\d{2}\\/\\d{4}")) %>% filter(Registros != '--') %>%
filter(!is.na(Registros)) %>% select(ID, Fecha, Registros, Observaciones, viatico_alimentacion, viatico_transporte) %>%
separate(Registros, c('T1','T2','T3','T4'), sep = "\\|") %>% mutate_all(str_trim) %>%
filter(!is.na(T2)) %>% mutate(ID = as.integer(ID)) %>%
mutate( incomplete = case_when(is.na(T4) ~ T, T ~ F), T4 = case_when(incomplete ~ T2, T ~ T4) )
}
# Process the source file and transform it into a timeClock tidy data
processTimeClock <- function(sourceFile, workersId = NULL, strickColumns = F) {
workers <- read_excel(sourceFile)
#TODO: set this required columns into a config file
requiredColumns <- c("Fecha", "Registros", "Viatico alimentacion", "Viatico de transporte", "Observaciones")
missingColumns <- requiredColumns %in% colnames(workers)
if ( length(requiredColumns[!missingColumns]) > 0 & strickColumns ) {
stop(paste("Columnas insuficientes para general el reporte", paste(requiredColumns[!missingColumns], collapse = ',')))
}
worker_id <- workers %>% filter(!str_detect(Fecha, "\\d{2}\\/\\d{2}\\/\\d{4}")) %>%
mutate(worker = Fecha) %>% select(worker) %>%
separate(worker, c('ID','Name'), sep = "\\-") %>%
mutate(ID = as.integer(ID), Name = str_trim(Name)) %>%
filter(!(ID %in% IGNORE_WORKERS_IDS))
getWorkerTimeClock(workers, workersId) %>% inner_join(worker_id)
}