From 91fc9ecfda329b82e73c5c4dacbc45587c8d960f Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Mon, 16 Sep 2024 17:32:22 -0400 Subject: [PATCH] Convert filter_data to dplyr --- .../apps/r/calculate_interaction_zscores.R | 36 +++++++------------ 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 08e4dcda..30e7d1ed 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -776,45 +776,35 @@ generate_correlation_plot_configs <- function(df, variables) { return(configs) } -filter_data <- function(df, variables, nf = FALSE, missing = FALSE, limits_map = NULL) { +filter_data <- function(df, variables, nf = FALSE, missing = FALSE, limits_map = NULL, verbose = TRUE) { - # Loop through each variable to filter and print missing/out-of-range data for (variable in variables) { - y_var_sym <- sym(variable) - if (nf) { - non_finite <- df %>% filter(!is.finite(!!y_var_sym)) - if (nrow(non_finite) > 0) { + non_finite <- df %>% filter(!is.finite(.data[[variable]])) + if (verbose && nrow(non_finite) > 0) { message("Non-finite rows for variable ", variable, ":") print(non_finite) } - df <- df %>% filter(is.finite(!!y_var_sym)) + df <- df %>% filter(is.finite(.data[[variable]])) } - # Filter missing data if (missing) { - missing_data <- df %>% filter(is.na(!!y_var_sym)) - if (nrow(missing_data) > 0) { + missing_data <- df %>% filter(is.na(.data[[variable]])) + if (verbose && nrow(missing_data) > 0) { message("Missing data for variable ", variable, ":") print(missing_data) } - df <- df %>% filter(!is.na(!!y_var_sym)) + df <- df %>% filter(!is.na(.data[[variable]])) } - # Filter out-of-range data if limits_map is provided - if (!is.null(limits_map)) { + if (!is.null(limits_map) && !is.null(limits_map[[variable]])) { ylim_vals <- limits_map[[variable]] - - # Print and filter out-of-range data - out_of_range_data <- df %>% filter( - !!y_var_sym < ylim_vals[1] | !!y_var_sym > ylim_vals[2] - ) - if (nrow(out_of_range_data) > 0) { + out_of_range_data <- df %>% filter(.data[[variable]] < ylim_vals[1] | .data[[variable]] > ylim_vals[2]) + if (verbose && nrow(out_of_range_data) > 0) { message("Out-of-range data for variable ", variable, ":") print(out_of_range_data) - df <- df %>% - filter(!!y_var_sym >= ylim_vals[1] & !!y_var_sym <= ylim_vals[2]) } + df <- df %>% filter(.data[[variable]] >= ylim_vals[1] & .data[[variable]] <= ylim_vals[2]) } } @@ -1209,7 +1199,7 @@ main <- function() { message("Generating rank plots") # Generate rank plots for L and K using standard ranks rank_plot_configs <- generate_rank_plot_configs( - df = zscores_interactions, + df = zscores_interactions_joined, variables = interaction_vars, is_lm = FALSE, adjust = TRUE @@ -1222,7 +1212,7 @@ main <- function() { message("Generating ranked linear model plots") # Generate rank plots for L and K using linear model (`lm`) ranks rank_lm_plot_configs <- generate_rank_plot_configs( - df = zscores_interactions, + df = zscores_interactions_joined, variables = interaction_vars, is_lm = TRUE, adjust = TRUE