diff --git a/DESCRIPTION b/DESCRIPTION index d4baf3d..870b921 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tame Title: Timing, Anatomical, Therapeutic and Chemical Based Medication Clustering -Version: 0.2.0 +Version: 0.2.1 Authors@R: person("Anna", "Laksafoss", , "adls@ssi.dk", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9898-2924")) @@ -14,7 +14,7 @@ License: GPL (>= 3) | file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 -Imports: dplyr (>= 1.0.0), fuzzyjoin, magrittr, purrr (>= 1.0.0), +Imports: dplyr (>= 1.0.0), purrr (>= 1.0.0), Rfast, rlang (>= 1.0.0), stats, stringr, tibble (>= 3.0.0), tidyr (>= 1.2.0), tidyselect, Rcpp (>= 1.0.8), ggplot2 (>= 3.3.0), scales, utils diff --git a/NAMESPACE b/NAMESPACE index ecbb785..bb04695 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,7 +45,6 @@ export(summary_crop) export(timing_atc_group) export(timing_trajectory) importFrom(Rcpp,evalCpp) -importFrom(magrittr,"%>%") importFrom(rlang,":=") importFrom(rlang,.data) importFrom(utils,as.roman) diff --git a/R/employ.R b/R/employ.R index e9c745d..c41671a 100644 --- a/R/employ.R +++ b/R/employ.R @@ -109,31 +109,31 @@ employ <- function( selected_analyses <- method_selector(clust, {{ only }}) selected_names <- selected_analyses$cluster_name - parameters <- clust$parameters %>% + parameters <- clust$parameters |> dplyr::filter(.data$cluster_name %in% selected_names) - old <- clust$data %>% + old <- clust$data |> dplyr::select( !!rlang::sym(clust$variables$id), !!rlang::sym(clust$variables$atc), dplyr::all_of(clust$variables$timing), !!!rlang::syms(selected_names) - ) %>% + ) |> tidyr::nest(pattern = c(clust$variables$atc, clust$variables$timing)) - old_distinct <- old %>% - dplyr::select(-!!rlang::sym(clust$variables$id)) %>% + old_distinct <- old |> + dplyr::select(-!!rlang::sym(clust$variables$id)) |> dplyr::distinct() - new <- new_data %>% + new <- new_data |> dplyr::select( !!rlang::sym(clust$variables$id), !!rlang::sym(clust$variables$atc), dplyr::all_of(clust$variables$timing) - ) %>% + ) |> tidyr::nest(pattern = c(clust$variables$atc, clust$variables$timing)) - matching <- new %>% + matching <- new |> dplyr::full_join(old_distinct, by = "pattern") @@ -142,22 +142,22 @@ employ <- function( if (assignment_method == "exact_only") { - exact_clusters <- matching %>% - dplyr::select(-"pattern") %>% + exact_clusters <- matching |> + dplyr::select(-"pattern") |> dplyr::mutate( dplyr::across(dplyr::all_of(selected_names), list("new_exact" = ~.)) ) - old_clusters <- clust$clustering %>% + old_clusters <- clust$clustering |> dplyr::select( !!rlang::sym(clust$variables$id), ".analysis_order", !!!rlang::syms(selected_names) - ) %>% - dplyr::distinct() %>% + ) |> + dplyr::distinct() |> dplyr::mutate( dplyr::across(dplyr::all_of(selected_names), list("old" = ~.)) - ) %>% + ) |> dplyr::arrange( match( !!rlang::sym(clust$variables$id), @@ -165,21 +165,21 @@ employ <- function( ) ) - all_clusterings <- dplyr::bind_rows(exact_clusters, old_clusters) %>% + all_clusterings <- dplyr::bind_rows(exact_clusters, old_clusters) |> dplyr::relocate(dplyr::all_of(clust$variables$id), ".analysis_order") - final_clusters <- all_clusterings %>% + final_clusters <- all_clusterings |> dplyr::select( !!rlang::sym(clust$variables$id), !!!rlang::syms(selected_names) ) - joined_data <- clust$data %>% - dplyr::select(-dplyr::all_of(selected_names)) %>% - dplyr::mutate(.origin = "old") %>% - dplyr::bind_rows(new_data) %>% - dplyr::mutate(.origin = tidyr::replace_na(.data$.origin, "new")) %>% - dplyr::left_join(final_clusters, by = clust$variables$id) %>% + joined_data <- clust$data |> + dplyr::select(-dplyr::all_of(selected_names)) |> + dplyr::mutate(.origin = "old") |> + dplyr::bind_rows(new_data) |> + dplyr::mutate(.origin = tidyr::replace_na(.data$.origin, "new")) |> + dplyr::left_join(final_clusters, by = clust$variables$id) |> dplyr::relocate(dplyr::all_of(clust$variables$id)) return( @@ -201,42 +201,42 @@ employ <- function( # === Non-exact Matching =============================================== # --- Split new into exact and missed ----------------------------------- - exact <- matching %>% + exact <- matching |> dplyr::filter( !is.na(!!rlang::sym(selected_names[1])), !is.na(!!rlang::sym(clust$variables$id)) - ) %>% + ) |> dplyr::select( !!rlang::sym(clust$variables$id), !!!rlang::syms(selected_names) ) - missed <- matching %>% + missed <- matching |> dplyr::filter( is.na(!!rlang::sym(selected_names[1])), !is.na(!!rlang::sym(clust$variables$id)) - ) %>% + ) |> dplyr::select(!!rlang::sym(clust$variables$id)) # --- construct the new data set for pseudo-clustering ------------------ - missed_full_info <- new_data %>% + missed_full_info <- new_data |> dplyr::select( !!rlang::sym(clust$variables$id), !!rlang::sym(clust$variables$atc), dplyr::all_of(clust$variables$timing) - ) %>% - dplyr::inner_join(missed, by = clust$variables$id) %>% + ) |> + dplyr::inner_join(missed, by = clust$variables$id) |> dplyr::mutate(.origin = "new") - missed_with_old <- clust$data %>% + missed_with_old <- clust$data |> dplyr::select( !!rlang::sym(clust$variables$id), !!rlang::sym(clust$variables$atc), dplyr::all_of(clust$variables$timing), !!!rlang::syms(selected_names) - ) %>% - dplyr::mutate(.origin = "old") %>% - dplyr::bind_rows(missed_full_info) %>% + ) |> + dplyr::mutate(.origin = "old") |> + dplyr::bind_rows(missed_full_info) |> dplyr::mutate( .internal_character_id = paste0( .data$.origin, !!rlang::sym(clust$variables$id) @@ -252,8 +252,8 @@ employ <- function( dplyr::all_of(clust$variables$timing) ) - old_patterns <- keys$base_clustering %>% dplyr::filter(.data$.origin == "old") - new_patterns <- keys$base_clustering %>% dplyr::filter(.data$.origin == "new") + old_patterns <- keys$base_clustering |> dplyr::filter(.data$.origin == "old") + new_patterns <- keys$base_clustering |> dplyr::filter(.data$.origin == "new") lookup_tables <- lookup_constructor(keys, parameters) @@ -308,8 +308,8 @@ employ <- function( keys, method, cur_tables, - old_patterns = old_patterns %>% dplyr::pull(.data$unique_pattern_key), - new_patterns = new_patterns %>% dplyr::pull(.data$unique_pattern_key) + old_patterns = old_patterns |> dplyr::pull(.data$unique_pattern_key), + new_patterns = new_patterns |> dplyr::pull(.data$unique_pattern_key) ) chosen_linkage <- switch( @@ -331,7 +331,7 @@ employ <- function( function(d) { clust_dist <- tapply( d, - old_patterns %>% dplyr::pull(method[["cluster_name"]]), + old_patterns |> dplyr::pull(method[["cluster_name"]]), FUN = chosen_linkage ) closest <- which(clust_dist == min(clust_dist)) @@ -343,8 +343,8 @@ employ <- function( } ) - new_clusters <- new_patterns %>% - dplyr::select("unique_pattern_key") %>% + new_clusters <- new_patterns |> + dplyr::select("unique_pattern_key") |> dplyr::mutate(!!method[["cluster_name"]] := new_clusters) return(new_clusters) @@ -369,8 +369,8 @@ employ <- function( keys, method, cur_tables, - old_patterns = old_patterns %>% dplyr::pull(.data$unique_pattern_key), - new_patterns = new_patterns %>% dplyr::pull(.data$unique_pattern_key) + old_patterns = old_patterns |> dplyr::pull(.data$unique_pattern_key), + new_patterns = new_patterns |> dplyr::pull(.data$unique_pattern_key) ) chosen_linkage <- switch( @@ -392,7 +392,7 @@ employ <- function( function(d) { clust_dist <- tapply( d, - old_patterns %>% dplyr::pull(method[["cluster_name"]]), + old_patterns |> dplyr::pull(method[["cluster_name"]]), FUN = chosen_linkage ) closest <- which(clust_dist == min(clust_dist)) @@ -404,8 +404,8 @@ employ <- function( } ) - new_clusters <- new_patterns %>% - dplyr::select("unique_pattern_key") %>% + new_clusters <- new_patterns |> + dplyr::select("unique_pattern_key") |> dplyr::mutate(!!method[["cluster_name"]] := new_clusters) return(new_clusters) @@ -413,18 +413,18 @@ employ <- function( ) } - all_new_closest_clusterings <- clusterings %>% + all_new_closest_clusterings <- clusterings |> purrr::reduce(dplyr::left_join, by = "unique_pattern_key") - missed_clusters <- keys$key %>% - dplyr::filter(.data$.origin == "new") %>% + missed_clusters <- keys$key |> + dplyr::filter(.data$.origin == "new") |> dplyr::select( !!rlang::sym(clust$variables$id), "unique_pattern_key" - ) %>% - dplyr::distinct() %>% - dplyr::left_join(all_new_closest_clusterings, by = "unique_pattern_key") %>% - dplyr::select(-"unique_pattern_key") %>% + ) |> + dplyr::distinct() |> + dplyr::left_join(all_new_closest_clusterings, by = "unique_pattern_key") |> + dplyr::select(-"unique_pattern_key") |> dplyr::mutate( dplyr::across( dplyr::all_of(selected_names), @@ -432,21 +432,21 @@ employ <- function( ) ) - exact_clusters <- exact %>% + exact_clusters <- exact |> dplyr::mutate( dplyr::across(dplyr::all_of(selected_names), list("new_exact" = ~.)) ) - old_clusters <- clust$clustering %>% + old_clusters <- clust$clustering |> dplyr::select( !!rlang::sym(clust$variables$id), ".analysis_order", !!!rlang::syms(selected_names) - ) %>% - dplyr::distinct() %>% + ) |> + dplyr::distinct() |> dplyr::mutate( dplyr::across(dplyr::all_of(selected_names), list("old" = ~.)) - ) %>% + ) |> dplyr::arrange( match( !!rlang::sym(clust$variables$id), @@ -455,7 +455,7 @@ employ <- function( ) - all_new_clusterings <- dplyr::bind_rows(missed_clusters, exact_clusters) %>% + all_new_clusterings <- dplyr::bind_rows(missed_clusters, exact_clusters) |> dplyr::arrange( match( !!rlang::sym(clust$variables$id), @@ -463,23 +463,23 @@ employ <- function( ) ) - all_clusterings <- dplyr::bind_rows(all_new_clusterings, old_clusters) %>% + all_clusterings <- dplyr::bind_rows(all_new_clusterings, old_clusters) |> dplyr::relocate(dplyr::all_of(clust$variables$id), ".analysis_order") - final_clusters <- all_clusterings %>% + final_clusters <- all_clusterings |> dplyr::select( !!rlang::sym(clust$variables$id), !!!rlang::syms(selected_names) ) - joined_data <- clust$data %>% - dplyr::select(-dplyr::all_of(selected_names)) %>% - dplyr::mutate(.origin = "old") %>% - dplyr::bind_rows(new_data) %>% - dplyr::mutate(.origin = tidyr::replace_na(.data$.origin, "new")) %>% - dplyr::left_join(final_clusters, by = clust$variables$id) %>% + joined_data <- clust$data |> + dplyr::select(-dplyr::all_of(selected_names)) |> + dplyr::mutate(.origin = "old") |> + dplyr::bind_rows(new_data) |> + dplyr::mutate(.origin = tidyr::replace_na(.data$.origin, "new")) |> + dplyr::left_join(final_clusters, by = clust$variables$id) |> dplyr::relocate(dplyr::all_of(clust$variables$id)) names(keys)[which(names(keys) == "base_clustering")] <- "clustered_patterns" diff --git a/R/enrich.R b/R/enrich.R index fa12700..b590223 100644 --- a/R/enrich.R +++ b/R/enrich.R @@ -63,7 +63,7 @@ enrich <- function(object, additional_data = NULL, by = NULL) { bys <- by } new <- object - new$parameters <- new$parameters %>% + new$parameters <- new$parameters |> dplyr::left_join(additional_data, by = bys) return(new) } diff --git a/R/extractors.R b/R/extractors.R index c137007..c3b359e 100644 --- a/R/extractors.R +++ b/R/extractors.R @@ -42,9 +42,9 @@ method_selector <- function(clustering, only, additional_data = NULL) { return(clust$parameters) } - res <- clust$parameters %>% - dplyr::rowwise() %>% - dplyr::filter({{ only }}) %>% + res <- clust$parameters |> + dplyr::rowwise() |> + dplyr::filter({{ only }}) |> dplyr::ungroup() return(res) @@ -78,26 +78,26 @@ method_selector <- function(clustering, only, additional_data = NULL) { cluster_selector <- function(clustering, clusters = NULL) { all_names <- clustering$parameters$cluster_name - all_clusters <- clustering$clustering %>% - dplyr::select(dplyr::all_of(all_names)) %>% - dplyr::distinct() %>% + all_clusters <- clustering$clustering |> + dplyr::select(dplyr::all_of(all_names)) |> + dplyr::distinct() |> tidyr::pivot_longer(dplyr::everything(), names_to = "cluster_name", - values_to = "cluster") %>% + values_to = "cluster") |> dplyr::filter(!is.na(.data$cluster)) # Defaulte NULL handling if (rlang::quo_is_null(rlang::enquo(clusters))) { - return(all_clusters %>% dplyr::pull(.data$cluster) %>% unique()) + return(all_clusters |> dplyr::pull(.data$cluster) |> unique()) } # General handling - chosen_clusters <- all_clusters %>% - dplyr::distinct() %>% - dplyr::arrange(as.numeric(as.roman(as.character(.data$cluster)))) %>% - dplyr::mutate(dummy = 1) %>% - tidyr::pivot_wider(names_from = "cluster", values_from = "dummy") %>% - dplyr::select({{ clusters }}) %>% + chosen_clusters <- all_clusters |> + dplyr::distinct() |> + dplyr::arrange(as.numeric(as.roman(as.character(.data$cluster)))) |> + dplyr::mutate(dummy = 1) |> + tidyr::pivot_wider(names_from = "cluster", values_from = "dummy") |> + dplyr::select({{ clusters }}) |> names() return(chosen_clusters) diff --git a/R/medic-utils.R b/R/medic-utils.R index 95be7a5..6e5be21 100644 --- a/R/medic-utils.R +++ b/R/medic-utils.R @@ -35,8 +35,8 @@ parameters_constructor <- function( ... ) { - na_cols <- data %>% - dplyr::select({{ id }}, {{ atc }}, {{ timing }}, {{ base_clustering }}) %>% + na_cols <- data |> + dplyr::select({{ id }}, {{ atc }}, {{ timing }}, {{ base_clustering }}) |> dplyr::summarise(dplyr::across(dplyr::everything(), ~ any(is.na(.)))) if (any(na_cols)) { @@ -66,13 +66,13 @@ parameters_constructor <- function( } if (! missing(base_clustering)) { - test <- data %>% - dplyr::group_by({{ id }}) %>% + test <- data |> + dplyr::group_by({{ id }}) |> dplyr::summarise( n = dplyr::n_distinct({{ base_clustering }}), .groups = "drop" - ) %>% - dplyr::summarise(test = any(.data$n > 1), .groups = "drop") %>% + ) |> + dplyr::summarise(test = any(.data$n > 1), .groups = "drop") |> dplyr::pull(.data$test) if (test) { stop( @@ -98,11 +98,11 @@ parameters_constructor <- function( gamma = gamma, p = p, theta_list = if (is.list(theta)) theta else list(theta) - ) %>% + ) |> dplyr::mutate( theta = as.character(theta_list), clustering = paste0("cluster_", dplyr::row_number()) - ) %>% + ) |> dplyr::relocate("clustering") }, error = function(cond) { @@ -132,7 +132,7 @@ parameters_constructor <- function( #' @noRd key_constructor <- function(data, id, base_clustering, atc, timing) { - key <- data %>% + key <- data |> dplyr::select( ".internal_character_id", {{ id }}, @@ -144,15 +144,15 @@ key_constructor <- function(data, id, base_clustering, atc, timing) { # === Unique ATC codes ================================================= - if (data %>% dplyr::select({{ atc }}) %>% ncol() != 0) { + if (data |> dplyr::select({{ atc }}) |> ncol() != 0) { - unique_atc <- data %>% - dplyr::select({{ atc }}) %>% - dplyr::distinct() %>% - dplyr::mutate(unique_atc_key = dplyr::row_number()) %>% + unique_atc <- data |> + dplyr::select({{ atc }}) |> + dplyr::distinct() |> + dplyr::mutate(unique_atc_key = dplyr::row_number()) |> dplyr::relocate("unique_atc_key") - key <- key %>% + key <- key |> dplyr::left_join(unique_atc, by = names(unique_atc)[-1]) out <- list(unique_atc = unique_atc) @@ -166,15 +166,15 @@ key_constructor <- function(data, id, base_clustering, atc, timing) { # === Unique Timing codes ============================================== - if (data %>% dplyr::select({{ timing }}) %>% ncol() != 0) { + if (data |> dplyr::select({{ timing }}) |> ncol() != 0) { - unique_timing <- data %>% - dplyr::select({{ timing }}) %>% - dplyr::distinct() %>% - dplyr::mutate(unique_timing_key = dplyr::row_number()) %>% + unique_timing <- data |> + dplyr::select({{ timing }}) |> + dplyr::distinct() |> + dplyr::mutate(unique_timing_key = dplyr::row_number()) |> dplyr::relocate("unique_timing_key") - key <- key %>% + key <- key |> dplyr::left_join(unique_timing, by = names(unique_timing)[-1]) out <- c(out, list(unique_timing = unique_timing)) @@ -183,13 +183,13 @@ key_constructor <- function(data, id, base_clustering, atc, timing) { # === Unique Exposure ================================================== - unique_exposure <- key %>% - dplyr::select(dplyr::any_of(c("unique_atc_key", "unique_timing_key"))) %>% - dplyr::distinct() %>% - dplyr::mutate(unique_exposure_key = dplyr::row_number()) %>% + unique_exposure <- key |> + dplyr::select(dplyr::any_of(c("unique_atc_key", "unique_timing_key"))) |> + dplyr::distinct() |> + dplyr::mutate(unique_exposure_key = dplyr::row_number()) |> dplyr::relocate("unique_exposure_key") - key <- key %>% + key <- key |> dplyr::left_join(unique_exposure, by = names(unique_exposure)[-1]) out <- c(out, list(unique_exposure = unique_exposure)) @@ -198,25 +198,25 @@ key_constructor <- function(data, id, base_clustering, atc, timing) { # === Unique patterns =================================================== - nest_key <- key %>% + nest_key <- key |> tidyr::nest(pattern = unique(unlist(lapply(out, names)))) - unique_patterns <- nest_key %>% - dplyr::select("pattern") %>% - dplyr::distinct() %>% + unique_patterns <- nest_key |> + dplyr::select("pattern") |> + dplyr::distinct() |> dplyr::mutate(unique_pattern_key = dplyr::row_number(), - n_unique_exposures = sapply(.data$pattern, nrow)) %>% + n_unique_exposures = sapply(.data$pattern, nrow)) |> dplyr::relocate("unique_pattern_key", "n_unique_exposures") - key <- nest_key %>% - dplyr::left_join(unique_patterns, by = "pattern") %>% + key <- nest_key |> + dplyr::left_join(unique_patterns, by = "pattern") |> tidyr::unnest("pattern") out <- c(out, list(unique_patterns = unique_patterns)) - if (data %>% dplyr::select({{ base_clustering }}) %>% ncol() != 0) { - base_clu <- key %>% - dplyr::select({{ base_clustering }}, "unique_pattern_key") %>% + if (data |> dplyr::select({{ base_clustering }}) |> ncol() != 0) { + base_clu <- key |> + dplyr::select({{ base_clustering }}, "unique_pattern_key") |> dplyr::distinct() out <- c(out, list(base_clustering = base_clu)) } @@ -230,18 +230,18 @@ key_constructor <- function(data, id, base_clustering, atc, timing) { # === Construct Reduced Keys =========================================== - rms <- key %>% + rms <- key |> dplyr::select( ".internal_character_id", {{ id }}, {{ base_clustering }}, {{ atc }}, {{ timing }} - ) %>% + ) |> names() - reduced_key <- key %>% - dplyr::select(-dplyr::any_of(rms)) %>% + reduced_key <- key |> + dplyr::select(-dplyr::any_of(rms)) |> dplyr::distinct() # === Return Results =================================================== @@ -309,8 +309,8 @@ lookup_constructor <- function(keys, parameters) { #' @noRd atc_metric_lookup_constructor <- function(unique_atc) { - atc_codes <- unique_atc %>% - dplyr::select(-"unique_atc_key") %>% + atc_codes <- unique_atc |> + dplyr::select(-"unique_atc_key") |> dplyr::pull(1) atc_levels <- data.frame( @@ -327,7 +327,7 @@ atc_metric_lookup_constructor <- function(unique_atc) { }) }) - atc_names <- unique_atc %>% dplyr::pull(.data$unique_atc_key) + atc_names <- unique_atc |> dplyr::pull(.data$unique_atc_key) dimnames(res) <- list(atc_names, atc_names) return(res + 1) @@ -351,7 +351,7 @@ normalizing_lookup_constructor <- function( summation_methods = "double_sum" ) { - int_exposure_in_pattern <- unique_patterns %>% + int_exposure_in_pattern <- unique_patterns |> dplyr::pull(.data$n_unique_exposures, name = .data$unique_pattern_key) numeric_exposure_in_pattern <- as.numeric(int_exposure_in_pattern) @@ -519,9 +519,9 @@ distance_matrix_constructor <- function( ) { if ((!is.null(old_patterns)) && (!is.null(new_patterns))) { - rows <- keys$reduced_key %>% + rows <- keys$reduced_key |> dplyr::filter(.data$unique_pattern_key %in% old_patterns) - cols <- keys$reduced_key %>% + cols <- keys$reduced_key |> dplyr::filter(.data$unique_pattern_key %in% new_patterns) calc <- "full" @@ -734,17 +734,17 @@ hierarchical_clustering <- function( cluster_names <- paste0(method$clustering, "_k=", k) - pattern_clusters <- data.frame(stats::cutree(dendogram, k)) %>% + pattern_clusters <- data.frame(stats::cutree(dendogram, k)) |> dplyr::rename_at(dplyr::vars(tidyselect::everything()), ~cluster_names) if (is.null(members)) { - pattern_clusters <- pattern_clusters %>% - tibble::rownames_to_column(var = "unique_pattern_key") %>% + pattern_clusters <- pattern_clusters |> + tibble::rownames_to_column(var = "unique_pattern_key") |> dplyr::mutate(unique_pattern_key = as.numeric(.data$unique_pattern_key)) joiner <- "unique_pattern_key" } else { - pattern_clusters <- pattern_clusters %>% + pattern_clusters <- pattern_clusters |> tibble::rownames_to_column(var = names(keys$base_clustering)[1]) joiner <- names(keys$base_clustering)[1] } @@ -752,15 +752,15 @@ hierarchical_clustering <- function( # === ORGANISING RESULTS ============================================= - cluster_assignment <- keys$key %>% + cluster_assignment <- keys$key |> dplyr::select( ".internal_character_id", "unique_pattern_key", dplyr::any_of(names(keys$base_clustering)[1]) - ) %>% - dplyr::distinct() %>% - dplyr::left_join(pattern_clusters, by = joiner) %>% - dplyr::select(".internal_character_id", dplyr::all_of(cluster_names)) %>% + ) |> + dplyr::distinct() |> + dplyr::left_join(pattern_clusters, by = joiner) |> + dplyr::select(".internal_character_id", dplyr::all_of(cluster_names)) |> dplyr::mutate( dplyr::across( dplyr::all_of(cluster_names), diff --git a/R/medic.R b/R/medic.R index 1c9ed56..45fbaf9 100644 --- a/R/medic.R +++ b/R/medic.R @@ -205,7 +205,7 @@ medic <- function( # create character id key - saves us some pain when naming if (is.numeric(set_seed)) set.seed(set_seed) - data <- data %>% + data <- data |> dplyr::mutate( .original_order = dplyr::row_number(), .analysis_order = sample( @@ -213,8 +213,8 @@ medic <- function( size = dplyr::n(), replace = FALSE ) - ) %>% - dplyr::arrange(.data$.analysis_order) %>% + ) |> + dplyr::arrange(.data$.analysis_order) |> dplyr::mutate(.internal_character_id = as.character({{ id }})) # make keys @@ -357,24 +357,24 @@ medic <- function( ) # for nice output data - cluster_data <- data %>% - dplyr::arrange(.data$.original_order) %>% - dplyr::select({{ id }}, ".analysis_order", ".internal_character_id") %>% - dplyr::distinct() %>% - dplyr::left_join(cluster_assignment, by = ".internal_character_id") %>% + cluster_data <- data |> + dplyr::arrange(.data$.original_order) |> + dplyr::select({{ id }}, ".analysis_order", ".internal_character_id") |> + dplyr::distinct() |> + dplyr::left_join(cluster_assignment, by = ".internal_character_id") |> dplyr::select(-".internal_character_id") # for nice output data - out_data <- data %>% - dplyr::arrange(.data$.original_order) %>% - dplyr::left_join(cluster_assignment, by = ".internal_character_id") %>% + out_data <- data |> + dplyr::arrange(.data$.original_order) |> + dplyr::left_join(cluster_assignment, by = ".internal_character_id") |> dplyr::select(-".internal_character_id", -".original_order") distance_matrix <- lapply(clusterings, function(d) d$distance_matrix) - expanded_options <- parameters %>% - dplyr::cross_join(data.frame(k = k)) %>% - dplyr::mutate(cluster_name = paste0(.data$clustering, "_k=", .data$k)) %>% + expanded_options <- parameters |> + dplyr::cross_join(data.frame(k = k)) |> + dplyr::mutate(cluster_name = paste0(.data$clustering, "_k=", .data$k)) |> dplyr::relocate("cluster_name") @@ -384,7 +384,7 @@ medic <- function( return( structure( list( - data = out_data %>% dplyr::select(-".analysis_order"), + data = out_data |> dplyr::select(-".analysis_order"), clustering = cluster_data, variables = input_variables, parameters = expanded_options, diff --git a/R/refactor.R b/R/refactor.R index 591aca6..b0528cf 100644 --- a/R/refactor.R +++ b/R/refactor.R @@ -47,11 +47,11 @@ refactor <- function(object, ..., inheret_parameters = TRUE) { changes <- rlang::enquos(..., .named = TRUE, .homonyms = "last") clust <- object - clust$data <- clust$data %>% dplyr::mutate(...) - clust$clustering <- clust$clustering %>% dplyr::mutate(...) + clust$data <- clust$data |> dplyr::mutate(...) + clust$clustering <- clust$clustering |> dplyr::mutate(...) - only_changed_clusters <- clust$clustering %>% - dplyr::transmute(...) %>% + only_changed_clusters <- clust$clustering |> + dplyr::transmute(...) |> colnames() all_cluster_names <- colnames(clust$clustering)[-1] @@ -67,13 +67,13 @@ refactor <- function(object, ..., inheret_parameters = TRUE) { ) } - clust$parameters <- clust$parameters %>% # this used to be object$parameters - dplyr::left_join(tmp, by = "cluster_name") %>% + clust$parameters <- clust$parameters |> # this used to be object$parameters + dplyr::left_join(tmp, by = "cluster_name") |> dplyr::arrange(order(match(.data$cluster_name, all_cluster_names))) # inheritance is only name based at the moment -- can we do better????? if (!inheret_parameters) { - clust$parameters <- clust$parameters %>% + clust$parameters <- clust$parameters |> dplyr::mutate( dplyr::across( .data$clustering:.data$p, diff --git a/R/summary-utils.R b/R/summary-utils.R index 8a95f33..da90c70 100644 --- a/R/summary-utils.R +++ b/R/summary-utils.R @@ -89,4 +89,30 @@ default_atc_groups <- function(object, min_n = 2) { } found_atc <- sort(unique(stringr::str_sub(observed_atc, 1, i))) return(data.frame(regex = paste0("^", found_atc), atc_groups = found_atc)) -} \ No newline at end of file +} + + +#' Quickfix regex many-to-many inner join +#' +#' fuzzyjoin was kicked out of CRAN, so I quickly made an extremely simple +#' version of regex_inner_join, that would suit our needs here. +#' +#' @param x A data frame with valid ATC codes. +#' @param y A data frame with regex codes and corresponding groups. +#' @param by A named vector of length 1 where the name is the name of the +#' ATC column in `x` and the value is the regex column in `y`. +#' +#' This function assumes that x has the full ATC codes and that y has the +#' regex, and that by is only of length 1. And we're simply doing a cross-join +#' caus i'm lazy like that. +#' +#' @return A data frame with added columns from y to x based on a regex match. +regex_inner_join <- function(x, y, by) { + string_name <- names(by)[1] + regex_name <- by[[1]] + + dplyr::cross_join(x, y) |> + dplyr::filter( + stringr::str_detect(!!dplyr::sym(string_name), !!dplyr::sym(regex_name)) + ) +} diff --git a/R/summary.R b/R/summary.R index a2ff639..6692014 100644 --- a/R/summary.R +++ b/R/summary.R @@ -752,7 +752,7 @@ if (length(clust$variables$timing) == 0) { values_to = "Cluster" ) |> dplyr::filter(.data$Cluster %in% output_clusters) |> - fuzzyjoin::regex_inner_join(atc_groups, by = by_name) |> + regex_inner_join(atc_groups, by = by_name) |> dplyr::rename("ATC Groups" = "atc_groups") |> dplyr::mutate( "ATC Groups" = factor(.data$`ATC Groups`, levels = all_atc_groups), diff --git a/R/tame_package.R b/R/tame_package.R index 88317e7..230c621 100644 --- a/R/tame_package.R +++ b/R/tame_package.R @@ -7,7 +7,7 @@ #' #' @importFrom rlang .data #' @importFrom rlang := -#' @importFrom magrittr %>% + #' @importFrom utils as.roman str #' @importFrom Rcpp evalCpp #' @useDynLib tame, .registration = TRUE diff --git a/man/regex_inner_join.Rd b/man/regex_inner_join.Rd new file mode 100644 index 0000000..a79e305 --- /dev/null +++ b/man/regex_inner_join.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summary-utils.R +\name{regex_inner_join} +\alias{regex_inner_join} +\title{Quickfix regex many-to-many inner join} +\usage{ +regex_inner_join(x, y, by) +} +\arguments{ +\item{x}{A data frame with valid ATC codes.} + +\item{y}{A data frame with regex codes and corresponding groups.} + +\item{by}{A named vector of length 1 where the name is the name of the +ATC column in \code{x} and the value is the regex column in \code{y}. + +This function assumes that x has the full ATC codes and that y has the +regex, and that by is only of length 1. And we're simply doing a cross-join +caus i'm lazy like that.} +} +\value{ +A data frame with added columns from y to x based on a regex match. +} +\description{ +fuzzyjoin was kicked out of CRAN, so I quickly made an extremely simple +version of regex_inner_join, that would suit our needs here. +}