Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.Rproj.user
docs
.Rhistory
329 changes: 329 additions & 0 deletions r/adsl.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,329 @@
# Note to Reviewer
# To rerun the code below, please refer ADRG appendix.
# After required package are installed.
# The path variable needs to be defined by using example code below
#
# nolint start
# path <- list(
# sdtm = "path/to/esub/tabulations/sdtm", # Modify path to the sdtm location
# adam = "path/to/esub/analysis/adam" # Modify path to the adam location
# )
# nolint end

###########################################################################
#' developers : Steven Haesendonckx/Declan Hodges/Thomas Neitmann
#' date: 09DEC2022
#' modification History:
#'
###########################################################################

# Set up ------------------------------------------------------------------

library(haven)
library(admiral)
library(dplyr)
library(tidyr)
library(metacore)
library(metatools)
library(pilot3utils)
library(xportr)
library(janitor)

# read source -------------------------------------------------------------
# When SAS datasets are imported into R using read_sas(), missing
# character values from SAS appear as "" characters in R, instead of appearing
# as NA values. Further details can be obtained via the following link:
# https://pharmaverse.github.io/admiral/articles/admiral.html#handling-of-missing-values

dm <- convert_blanks_to_na(read_xpt(file.path(path$sdtm, "dm.xpt")))
ds <- convert_blanks_to_na(read_xpt(file.path(path$sdtm, "ds.xpt")))
ex <- convert_blanks_to_na(read_xpt(file.path(path$sdtm, "ex.xpt")))
qs <- convert_blanks_to_na(read_xpt(file.path(path$sdtm, "qs.xpt")))
sv <- convert_blanks_to_na(read_xpt(file.path(path$sdtm, "sv.xpt")))
vs <- convert_blanks_to_na(read_xpt(file.path(path$sdtm, "vs.xpt")))
sc <- convert_blanks_to_na(read_xpt(file.path(path$sdtm, "sc.xpt")))
mh <- convert_blanks_to_na(read_xpt(file.path(path$sdtm, "mh.xpt")))

## placeholder for origin=predecessor, use metatool::build_from_derived()
metacore <- spec_to_metacore(file.path(path$adam, "adam-pilot-3.xlsx"), where_sep_sheet = FALSE)
# Get the specifications for the dataset we are currently building
adsl_spec <- metacore %>%
select_dataset("ADSL")

ds00 <- ds %>%
filter(DSCAT == "DISPOSITION EVENT", DSDECOD != "SCREEN FAILURE") %>%
derive_vars_dt(
dtc = DSSTDTC,
new_vars_prefix = "EOS",
highest_imputation = "n",
) %>%
mutate(
DISCONFL = ifelse(!is.na(EOSDT) & DSDECOD != "COMPLETED", "Y", NA),
DSRAEFL = ifelse(DSTERM == "ADVERSE EVENT", "Y", NA),
DCDECOD = DSDECOD
) %>%
select(STUDYID, USUBJID, EOSDT, DISCONFL, DSRAEFL, DSDECOD, DSTERM, DCDECOD)

# Treatment information ---------------------------------------------------

ex_dt <- ex %>%
derive_vars_dt(
dtc = EXSTDTC,
new_vars_prefix = "EXST",
highest_imputation = "n",
) %>%
# treatment end is imputed by discontinuation if subject discontinued after visit 3 = randomization as per protocol
derive_vars_merged(
dataset_add = ds00,
by_vars = exprs(STUDYID, USUBJID),
new_vars = exprs(EOSDT = EOSDT),
filter_add = DCDECOD != "COMPLETED"
) %>%
derive_vars_dt(
dtc = EXENDTC,
new_vars_prefix = "EXEN",
highest_imputation = "Y",
max_dates = exprs(EOSDT),
date_imputation = "last",
flag_imputation = "none"
) %>%
mutate(DOSE = EXDOSE * (EXENDT - EXSTDT + 1))

ex_dose <- ex_dt %>%
group_by(STUDYID, USUBJID, EXTRT) %>%
summarise(cnt = n_distinct(EXTRT), CUMDOSE = sum(DOSE))

ex_dose[which(ex_dose[["cnt"]] > 1), "USUBJID"] # are there subjects with mixed treatments?

adsl00 <- dm %>%
select(-DOMAIN) %>%
filter(ACTARMCD != "Scrnfail") %>%
# planned treatment
mutate(
TRT01P = ARM,
TRT01PN = case_when(
ARM == "Placebo" ~ 0,
ARM == "Xanomeline High Dose" ~ 81,
ARM == "Xanomeline Low Dose" ~ 54
)
) %>%
# actual treatment - It is assumed TRT01A=TRT01P which is not really true.
mutate(
TRT01A = TRT01P,
TRT01AN = TRT01PN
) %>%
# treatment start
derive_vars_merged(
dataset_add = ex_dt,
filter_add = (EXDOSE > 0 |
(EXDOSE == 0 &
grepl("PLACEBO", EXTRT, fixed = TRUE))) &
!is.na(EXSTDT),
new_vars = exprs(TRTSDT = EXSTDT),
order = exprs(EXSTDT, EXSEQ),
mode = "first",
by_vars = exprs(STUDYID, USUBJID)
) %>%
# treatment end
derive_vars_merged(
dataset_add = ex_dt,
filter_add = (EXDOSE > 0 |
(EXDOSE == 0 &
grepl("PLACEBO", EXTRT, fixed = TRUE))) &
!is.na(EXENDT),
new_vars = exprs(TRTEDT = EXENDT),
order = exprs(EXENDT, EXSEQ),
mode = "last",
by_vars = exprs(STUDYID, USUBJID)
) %>%
# treatment duration
derive_var_trtdurd() %>%
# dosing
left_join(ex_dose, by = c("STUDYID", "USUBJID")) %>%
select(-cnt) %>%
mutate(AVGDD = round_half_up(as.numeric(CUMDOSE) / TRTDURD, digits = 1))

# Demographic grouping ----------------------------------------------------
adsl01 <- adsl00 %>%
create_cat_var(adsl_spec, AGE, AGEGR1, AGEGR1N) %>%
create_var_from_codelist(adsl_spec, RACE, RACEN) %>%
mutate(
SITEGR1 = format_sitegr1(SITEID)
)

# Population flag ---------------------------------------------------------
# SAFFL - Y if ITTFL='Y' and TRTSDT ne missing. N otherwise
# ITTFL - Y if ARMCD ne ' '. N otherwise
# EFFFL - Y if SAFFL='Y AND at least one record in QS for ADAS-Cog and for CIBIC+ with VISITNUM>3, N otherwise
# these variables are also in suppdm, but define said derived

qstest <- distinct(qs[, c("QSTESTCD", "QSTEST")])

eff <- qs %>%
filter(VISITNUM > 3, QSTESTCD %in% c("CIBIC", "ACTOT")) %>%
group_by(STUDYID, USUBJID) %>%
summarise(effcnt = n_distinct(QSTESTCD))

adsl02 <- adsl01 %>%
left_join(eff, by = c("STUDYID", "USUBJID")) %>%
mutate(
SAFFL = case_when(
ARMCD != "Scrnfail" & ARMCD != "" & !is.na(TRTSDT) ~ "Y",
ARMCD == "Scrnfail" ~ NA_character_,
TRUE ~ "N"
),
ITTFL = case_when(
ARMCD != "Scrnfail" & ARMCD != "" ~ "Y",
ARMCD == "Scrnfail" ~ NA_character_,
TRUE ~ "N"
),
EFFFL = case_when(
ARMCD != "Scrnfail" & ARMCD != "" & !is.na(TRTSDT) & effcnt == 2 ~ "Y",
ARMCD == "Scrnfail" ~ NA_character_,
TRUE ~ "N"
)
)

# Study Visit compliance --------------------------------------------------
# these variables are also in suppdm, but define said derived

sv00 <- sv %>%
select(STUDYID, USUBJID, VISIT, VISITDY, SVSTDTC) %>%
mutate(
FLG = "Y",
VISITCMP = case_when(
VISIT == "WEEK 8" ~ "COMP8FL",
VISIT == "WEEK 16" ~ "COMP16FL",
VISIT == "WEEK 24" ~ "COMP24FL",
TRUE ~ "ZZZ" # ensures every subject with one visit will get a row with minimally 'N'
)
) %>%
arrange(STUDYID, USUBJID, VISITDY) %>%
distinct(STUDYID, USUBJID, VISITCMP, FLG) %>%
pivot_wider(names_from = VISITCMP, values_from = FLG, values_fill = "N") %>%
select(-ZZZ)

adsl03 <- adsl02 %>%
left_join(sv00, by = c("STUDYID", "USUBJID"))

# Disposition -------------------------------------------------------------

adsl04 <- adsl03 %>%
left_join(ds00, by = c("STUDYID", "USUBJID")) %>%
select(-DSDECOD) %>%
derive_var_merged_cat(
dataset_add = ds00,
by_vars = exprs(STUDYID, USUBJID),
new_var = EOSSTT,
source_var = DSDECOD,
cat_fun = format_eosstt,
filter_add = !is.na(USUBJID),
) %>%
derive_var_merged_cat(
dataset_add = ds00,
by_vars = exprs(STUDYID, USUBJID),
new_var = DCSREAS,
source_var = DSDECOD,
cat_fun = format_dcsreas, # could not include dsterm in formatting logic
filter_add = !is.na(USUBJID),
) %>%
mutate(DCSREAS = ifelse(DSTERM == "PROTOCOL ENTRY CRITERIA NOT MET", "I/E Not Met", DCSREAS))

# Baseline variables ------------------------------------------------------
# selection definition from define

vs00 <- vs %>%
filter((VSTESTCD == "HEIGHT" & VISITNUM == 1) | (VSTESTCD == "WEIGHT" & VISITNUM == 3)) %>%
mutate(AVAL = round_half_up(VSSTRESN, digits = 1)) %>%
select(STUDYID, USUBJID, VSTESTCD, AVAL) %>%
pivot_wider(names_from = VSTESTCD, values_from = AVAL, names_glue = "{VSTESTCD}BL") %>%
mutate(
BMIBL = round_half_up(WEIGHTBL / (HEIGHTBL / 100)^2, digits = 1)
) %>%
create_cat_var(adsl_spec, BMIBL, BMIBLGR1)

sc00 <- sc %>%
filter(SCTESTCD == "EDLEVEL") %>%
select(STUDYID, USUBJID, SCTESTCD, SCSTRESN) %>%
pivot_wider(names_from = SCTESTCD, values_from = SCSTRESN, names_glue = "EDUCLVL")

adsl05 <- adsl04 %>%
left_join(vs00, by = c("STUDYID", "USUBJID")) %>%
left_join(sc00, by = c("STUDYID", "USUBJID"))

# Disease information -----------------------------------------------------

visit1dt <- sv %>%
filter(VISITNUM == 1) %>%
derive_vars_dt(
dtc = SVSTDTC,
new_vars_prefix = "VISIT1",
) %>%
select(STUDYID, USUBJID, VISIT1DT)

visnumen <- sv %>%
filter(VISITNUM < 100) %>%
arrange(STUDYID, USUBJID, SVSTDTC) %>%
group_by(STUDYID, USUBJID) %>%
slice(n()) %>%
ungroup() %>%
mutate(VISNUMEN = ifelse(round_half_up(VISITNUM, digits = 0) == 13, 12, round_half_up(VISITNUM, digits = 0))) %>%
select(STUDYID, USUBJID, VISNUMEN)

disonsdt <- mh %>%
filter(MHCAT == "PRIMARY DIAGNOSIS") %>%
derive_vars_dt(
dtc = MHSTDTC,
new_vars_prefix = "DISONS",
) %>%
select(STUDYID, USUBJID, DISONSDT)

adsl06 <- adsl05 %>%
left_join(visit1dt, by = c("STUDYID", "USUBJID")) %>%
left_join(visnumen, by = c("STUDYID", "USUBJID")) %>%
left_join(disonsdt, by = c("STUDYID", "USUBJID")) %>%
derive_vars_duration(
new_var = DURDIS,
start_date = DISONSDT,
end_date = VISIT1DT,
out_unit = "days",
add_one = TRUE
) %>%
# derive_vars_duration(..., out_unit = "months") is not used here because
# it calculates months based on date internals, while the original CDISC
# adsl.DURDIS was derived assuming each month has the same number of days,
# i.e., 365.25/12=30.4375.
# Feature requested: https://github.com/pharmaverse/admiral/issues/1875
# Workaround: derive days first and then convert it to months
mutate(
DURDIS = round_half_up(DURDIS / (365.25 / 12), digits = 1)
) %>%
create_cat_var(adsl_spec, DURDIS, DURDSGR1) %>%
derive_vars_dt(
dtc = RFENDTC,
new_vars_prefix = "RFEN",
)

mmsetot <- qs %>%
filter(QSCAT == "MINI-MENTAL STATE") %>%
group_by(STUDYID, USUBJID) %>%
summarise(MMSETOT = sum(as.numeric(QSORRES), na.rm = TRUE)) %>%
select(STUDYID, USUBJID, MMSETOT)

adsl07 <- adsl06 %>%
left_join(mmsetot, by = c("STUDYID", "USUBJID"))

# Export to xpt -----------------------------------------------------
adsl07 %>%
drop_unspec_vars(adsl_spec) %>% # Check all variables specified are present and no more
check_ct_data(adsl_spec, na_acceptable = TRUE) %>% # Checks all variables with CT only contain values within the CT
order_cols(adsl_spec) %>% # Orders the columns according to the spec
sort_by_key(adsl_spec) %>% # Sorts the rows by the sort keys
xportr_length(adsl_spec) %>% # Assigns SAS length from a variable level metadata
xportr_label(adsl_spec) %>% # Assigns variable label from metacore specifications
xportr_df_label(adsl_spec) %>% # Assigns dataset label from metacore specifications
xportr_format(adsl_spec$var_spec %>%
mutate_at(c("format"), ~ replace_na(., "")), "ADSL") %>%
xportr_write(file.path(path$adam, "adsl.xpt"),
label = "Subject-Level Analysis Dataset"
)
42 changes: 42 additions & 0 deletions r/getdependency.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@


dir_to_programs <- file.path(getwd(), "r")

# Scrape scripts ----------------------------------------------------------

scrapemea <- list.files(dir_to_programs, pattern = "\\.r$", full.names = TRUE)
scrapemeb <- list.files(dir_to_programs, pattern = "^ad", full.names = TRUE)

scrapeme <- intersect(scrapemea, scrapemeb)

scrape_inputfiles <- function(script=scrapeme){

# do some checks on input


# read file to extract lines where input is read into the script, ignoring comments
tmp <- data.frame(lines = trimws(suppressWarnings(readLines(script)), which=("both")), stringsAsFactors = F)
tmp <- data.frame(lines = tmp$lines[which(!grepl("#", tmp$lines) & !grepl("write", tmp$lines))], stringsAsFactors = F)
lines <- tolower(tmp$lines)

# extract file types to detect
filetypes <- c("rds", "xpt", "sas7bdat", "json")


# Extract input files for each type and simplify the regex operation
inputfiles <- list()

for (type in filetypes) {
pattern <- paste0("\\b\\S+\\.", type, "\\b")
inputfiles[[type]] <- unlist(regmatches(lines, gregexpr(pattern, lines)))
}

# Include excel files specifically for .xls and .xlsx
inputfiles$excel <- unlist(regmatches(lines, gregexpr("\\b\\S+\\.xls[x]?\\b", lines)))

return(inputfiles)
}

infiles <- sapply(scrapeme, FUN = scrape_inputfiles, simplify = F, USE.NAMES = F)
names(infiles) <- sapply(scrapeme, basename)
print(infiles)