Skip to content
Merged

Main #21

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: tame
Title: Timing, Anatomical, Therapeutic and Chemical Based Medication
Clustering
Version: 0.2.0
Version: 0.2.1
Authors@R:
person("Anna", "Laksafoss", , "adls@ssi.dk", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-9898-2924"))
Expand All @@ -14,7 +14,7 @@ License: GPL (>= 3) | file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
Imports: dplyr (>= 1.0.0), fuzzyjoin, magrittr, purrr (>= 1.0.0),
Imports: dplyr (>= 1.0.0), purrr (>= 1.0.0),
Rfast, rlang (>= 1.0.0), stats, stringr, tibble (>= 3.0.0),
tidyr (>= 1.2.0), tidyselect, Rcpp (>= 1.0.8), ggplot2 (>=
3.3.0), scales, utils
Expand Down
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ export(summary_crop)
export(timing_atc_group)
export(timing_trajectory)
importFrom(Rcpp,evalCpp)
importFrom(magrittr,"%>%")
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(utils,as.roman)
Expand Down
132 changes: 66 additions & 66 deletions R/employ.R
Original file line number Diff line number Diff line change
Expand Up @@ -109,31 +109,31 @@ employ <- function(
selected_analyses <- method_selector(clust, {{ only }})
selected_names <- selected_analyses$cluster_name

parameters <- clust$parameters %>%
parameters <- clust$parameters |>
dplyr::filter(.data$cluster_name %in% selected_names)

old <- clust$data %>%
old <- clust$data |>
dplyr::select(
!!rlang::sym(clust$variables$id),
!!rlang::sym(clust$variables$atc),
dplyr::all_of(clust$variables$timing),
!!!rlang::syms(selected_names)
) %>%
) |>
tidyr::nest(pattern = c(clust$variables$atc, clust$variables$timing))

old_distinct <- old %>%
dplyr::select(-!!rlang::sym(clust$variables$id)) %>%
old_distinct <- old |>
dplyr::select(-!!rlang::sym(clust$variables$id)) |>
dplyr::distinct()

new <- new_data %>%
new <- new_data |>
dplyr::select(
!!rlang::sym(clust$variables$id),
!!rlang::sym(clust$variables$atc),
dplyr::all_of(clust$variables$timing)
) %>%
) |>
tidyr::nest(pattern = c(clust$variables$atc, clust$variables$timing))

matching <- new %>%
matching <- new |>
dplyr::full_join(old_distinct, by = "pattern")


Expand All @@ -142,44 +142,44 @@ employ <- function(

if (assignment_method == "exact_only") {

exact_clusters <- matching %>%
dplyr::select(-"pattern") %>%
exact_clusters <- matching |>
dplyr::select(-"pattern") |>
dplyr::mutate(
dplyr::across(dplyr::all_of(selected_names), list("new_exact" = ~.))
)

old_clusters <- clust$clustering %>%
old_clusters <- clust$clustering |>
dplyr::select(
!!rlang::sym(clust$variables$id),
".analysis_order",
!!!rlang::syms(selected_names)
) %>%
dplyr::distinct() %>%
) |>
dplyr::distinct() |>
dplyr::mutate(
dplyr::across(dplyr::all_of(selected_names), list("old" = ~.))
) %>%
) |>
dplyr::arrange(
match(
!!rlang::sym(clust$variables$id),
clust$data[[clust$variables$id]]
)
)

all_clusterings <- dplyr::bind_rows(exact_clusters, old_clusters) %>%
all_clusterings <- dplyr::bind_rows(exact_clusters, old_clusters) |>
dplyr::relocate(dplyr::all_of(clust$variables$id), ".analysis_order")

final_clusters <- all_clusterings %>%
final_clusters <- all_clusterings |>
dplyr::select(
!!rlang::sym(clust$variables$id),
!!!rlang::syms(selected_names)
)

joined_data <- clust$data %>%
dplyr::select(-dplyr::all_of(selected_names)) %>%
dplyr::mutate(.origin = "old") %>%
dplyr::bind_rows(new_data) %>%
dplyr::mutate(.origin = tidyr::replace_na(.data$.origin, "new")) %>%
dplyr::left_join(final_clusters, by = clust$variables$id) %>%
joined_data <- clust$data |>
dplyr::select(-dplyr::all_of(selected_names)) |>
dplyr::mutate(.origin = "old") |>
dplyr::bind_rows(new_data) |>
dplyr::mutate(.origin = tidyr::replace_na(.data$.origin, "new")) |>
dplyr::left_join(final_clusters, by = clust$variables$id) |>
dplyr::relocate(dplyr::all_of(clust$variables$id))

return(
Expand All @@ -201,42 +201,42 @@ employ <- function(
# === Non-exact Matching ===============================================

# --- Split new into exact and missed -----------------------------------
exact <- matching %>%
exact <- matching |>
dplyr::filter(
!is.na(!!rlang::sym(selected_names[1])),
!is.na(!!rlang::sym(clust$variables$id))
) %>%
) |>
dplyr::select(
!!rlang::sym(clust$variables$id),
!!!rlang::syms(selected_names)
)

missed <- matching %>%
missed <- matching |>
dplyr::filter(
is.na(!!rlang::sym(selected_names[1])),
!is.na(!!rlang::sym(clust$variables$id))
) %>%
) |>
dplyr::select(!!rlang::sym(clust$variables$id))

# --- construct the new data set for pseudo-clustering ------------------
missed_full_info <- new_data %>%
missed_full_info <- new_data |>
dplyr::select(
!!rlang::sym(clust$variables$id),
!!rlang::sym(clust$variables$atc),
dplyr::all_of(clust$variables$timing)
) %>%
dplyr::inner_join(missed, by = clust$variables$id) %>%
) |>
dplyr::inner_join(missed, by = clust$variables$id) |>
dplyr::mutate(.origin = "new")

missed_with_old <- clust$data %>%
missed_with_old <- clust$data |>
dplyr::select(
!!rlang::sym(clust$variables$id),
!!rlang::sym(clust$variables$atc),
dplyr::all_of(clust$variables$timing),
!!!rlang::syms(selected_names)
) %>%
dplyr::mutate(.origin = "old") %>%
dplyr::bind_rows(missed_full_info) %>%
) |>
dplyr::mutate(.origin = "old") |>
dplyr::bind_rows(missed_full_info) |>
dplyr::mutate(
.internal_character_id = paste0(
.data$.origin, !!rlang::sym(clust$variables$id)
Expand All @@ -252,8 +252,8 @@ employ <- function(
dplyr::all_of(clust$variables$timing)
)

old_patterns <- keys$base_clustering %>% dplyr::filter(.data$.origin == "old")
new_patterns <- keys$base_clustering %>% dplyr::filter(.data$.origin == "new")
old_patterns <- keys$base_clustering |> dplyr::filter(.data$.origin == "old")
new_patterns <- keys$base_clustering |> dplyr::filter(.data$.origin == "new")

lookup_tables <- lookup_constructor(keys, parameters)

Expand Down Expand Up @@ -308,8 +308,8 @@ employ <- function(
keys,
method,
cur_tables,
old_patterns = old_patterns %>% dplyr::pull(.data$unique_pattern_key),
new_patterns = new_patterns %>% dplyr::pull(.data$unique_pattern_key)
old_patterns = old_patterns |> dplyr::pull(.data$unique_pattern_key),
new_patterns = new_patterns |> dplyr::pull(.data$unique_pattern_key)
)

chosen_linkage <- switch(
Expand All @@ -331,7 +331,7 @@ employ <- function(
function(d) {
clust_dist <- tapply(
d,
old_patterns %>% dplyr::pull(method[["cluster_name"]]),
old_patterns |> dplyr::pull(method[["cluster_name"]]),
FUN = chosen_linkage
)
closest <- which(clust_dist == min(clust_dist))
Expand All @@ -343,8 +343,8 @@ employ <- function(
}
)

new_clusters <- new_patterns %>%
dplyr::select("unique_pattern_key") %>%
new_clusters <- new_patterns |>
dplyr::select("unique_pattern_key") |>
dplyr::mutate(!!method[["cluster_name"]] := new_clusters)

return(new_clusters)
Expand All @@ -369,8 +369,8 @@ employ <- function(
keys,
method,
cur_tables,
old_patterns = old_patterns %>% dplyr::pull(.data$unique_pattern_key),
new_patterns = new_patterns %>% dplyr::pull(.data$unique_pattern_key)
old_patterns = old_patterns |> dplyr::pull(.data$unique_pattern_key),
new_patterns = new_patterns |> dplyr::pull(.data$unique_pattern_key)
)

chosen_linkage <- switch(
Expand All @@ -392,7 +392,7 @@ employ <- function(
function(d) {
clust_dist <- tapply(
d,
old_patterns %>% dplyr::pull(method[["cluster_name"]]),
old_patterns |> dplyr::pull(method[["cluster_name"]]),
FUN = chosen_linkage
)
closest <- which(clust_dist == min(clust_dist))
Expand All @@ -404,49 +404,49 @@ employ <- function(
}
)

new_clusters <- new_patterns %>%
dplyr::select("unique_pattern_key") %>%
new_clusters <- new_patterns |>
dplyr::select("unique_pattern_key") |>
dplyr::mutate(!!method[["cluster_name"]] := new_clusters)

return(new_clusters)
}
)
}

all_new_closest_clusterings <- clusterings %>%
all_new_closest_clusterings <- clusterings |>
purrr::reduce(dplyr::left_join, by = "unique_pattern_key")

missed_clusters <- keys$key %>%
dplyr::filter(.data$.origin == "new") %>%
missed_clusters <- keys$key |>
dplyr::filter(.data$.origin == "new") |>
dplyr::select(
!!rlang::sym(clust$variables$id),
"unique_pattern_key"
) %>%
dplyr::distinct() %>%
dplyr::left_join(all_new_closest_clusterings, by = "unique_pattern_key") %>%
dplyr::select(-"unique_pattern_key") %>%
) |>
dplyr::distinct() |>
dplyr::left_join(all_new_closest_clusterings, by = "unique_pattern_key") |>
dplyr::select(-"unique_pattern_key") |>
dplyr::mutate(
dplyr::across(
dplyr::all_of(selected_names),
list("new_closest" = ~.)
)
)

exact_clusters <- exact %>%
exact_clusters <- exact |>
dplyr::mutate(
dplyr::across(dplyr::all_of(selected_names), list("new_exact" = ~.))
)

old_clusters <- clust$clustering %>%
old_clusters <- clust$clustering |>
dplyr::select(
!!rlang::sym(clust$variables$id),
".analysis_order",
!!!rlang::syms(selected_names)
) %>%
dplyr::distinct() %>%
) |>
dplyr::distinct() |>
dplyr::mutate(
dplyr::across(dplyr::all_of(selected_names), list("old" = ~.))
) %>%
) |>
dplyr::arrange(
match(
!!rlang::sym(clust$variables$id),
Expand All @@ -455,31 +455,31 @@ employ <- function(
)


all_new_clusterings <- dplyr::bind_rows(missed_clusters, exact_clusters) %>%
all_new_clusterings <- dplyr::bind_rows(missed_clusters, exact_clusters) |>
dplyr::arrange(
match(
!!rlang::sym(clust$variables$id),
new_data[[clust$variables$id]]
)
)

all_clusterings <- dplyr::bind_rows(all_new_clusterings, old_clusters) %>%
all_clusterings <- dplyr::bind_rows(all_new_clusterings, old_clusters) |>
dplyr::relocate(dplyr::all_of(clust$variables$id), ".analysis_order")


final_clusters <- all_clusterings %>%
final_clusters <- all_clusterings |>
dplyr::select(
!!rlang::sym(clust$variables$id),
!!!rlang::syms(selected_names)
)


joined_data <- clust$data %>%
dplyr::select(-dplyr::all_of(selected_names)) %>%
dplyr::mutate(.origin = "old") %>%
dplyr::bind_rows(new_data) %>%
dplyr::mutate(.origin = tidyr::replace_na(.data$.origin, "new")) %>%
dplyr::left_join(final_clusters, by = clust$variables$id) %>%
joined_data <- clust$data |>
dplyr::select(-dplyr::all_of(selected_names)) |>
dplyr::mutate(.origin = "old") |>
dplyr::bind_rows(new_data) |>
dplyr::mutate(.origin = tidyr::replace_na(.data$.origin, "new")) |>
dplyr::left_join(final_clusters, by = clust$variables$id) |>
dplyr::relocate(dplyr::all_of(clust$variables$id))

names(keys)[which(names(keys) == "base_clustering")] <- "clustered_patterns"
Expand Down
2 changes: 1 addition & 1 deletion R/enrich.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ enrich <- function(object, additional_data = NULL, by = NULL) {
bys <- by
}
new <- object
new$parameters <- new$parameters %>%
new$parameters <- new$parameters |>
dplyr::left_join(additional_data, by = bys)
return(new)
}
Expand Down
Loading