diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 9cbf0184..cccf5fa9 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -826,12 +826,21 @@ generate_correlation_plot_configs <- function(df) { filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FALSE, rank = FALSE, limits_map = NULL, verbose = TRUE) { - # Precompute column names for efficiency + # Precompute Column Names for Efficiency avg_zscore_cols <- paste0("Avg_Zscore_", variables) z_lm_cols <- paste0("Z_lm_", variables) + # # Optional: Validate that the expected columns exist in the dataframe + # expected_cols <- c(avg_zscore_cols, z_lm_cols, variables) + # missing_cols <- setdiff(expected_cols, names(df)) + # if (length(missing_cols) > 0) { + # stop("The following expected columns are missing from the dataframe: ", + # paste(missing_cols, collapse = ", ")) + # } + + # Adjust NAs if 'adjust' is TRUE if (adjust) { - if (verbose) message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns") + if (verbose) message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns.") df <- df %>% mutate( across(all_of(avg_zscore_cols), ~ replace_na(., 0.001)), @@ -839,10 +848,13 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL ) } - # Filter non-finite values + # Filter Non-Finite Values if 'nf' is TRUE if (nf) { + if (verbose) message("Filtering non-finite values for variables: ", paste(variables, collapse = ", ")) + + # Identify non-finite rows for logging non_finite_df <- df %>% - filter(across(all_of(variables), ~ !is.finite(.))) + filter(if_any(all_of(variables), ~ !is.finite(.))) if (verbose && nrow(non_finite_df) > 0) { message("Non-finite rows for variables ", paste(variables, collapse = ", "), ":") @@ -851,13 +863,16 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL # Keep only rows where all specified variables are finite df <- df %>% - filter(across(all_of(variables), ~ is.finite(.))) + filter(if_all(all_of(variables), ~ is.finite(.))) } - # Filter missing malues + # Filter Missing Values if 'missing' is TRUE if (missing) { + if (verbose) message("Filtering missing values for variables: ", paste(variables, collapse = ", ")) + + # Identify missing rows for logging missing_df <- df %>% - filter(across(all_of(variables), ~ is.na(.))) + filter(if_any(all_of(variables), ~ is.na(.))) if (verbose && nrow(missing_df) > 0) { message("Missing data for variables ", paste(variables, collapse = ", "), ":") @@ -866,16 +881,18 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL # Keep only rows where all specified variables are not missing df <- df %>% - filter(across(all_of(variables), ~ !is.na(.))) + filter(if_all(all_of(variables), ~ !is.na(.))) } - # Filter data outside of y-limits (for plotting) + # Apply Limits from 'limits_map' if Provided if (!is.null(limits_map)) { for (variable in names(limits_map)) { if (variable %in% variables) { ylim_vals <- limits_map[[variable]] - # Identify out-of-range data + if (verbose) message("Applying limits for variable ", variable, ": [", ylim_vals[1], ", ", ylim_vals[2], "].") + + # Identify out-of-range data for logging out_of_range_df <- df %>% filter(.data[[variable]] < ylim_vals[1] | .data[[variable]] > ylim_vals[2]) @@ -890,7 +907,8 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL } } } - + + # Calculate Rank Columns if 'rank' is TRUE if (rank) { if (verbose) message("Calculating rank columns for variables: ", paste(variables, collapse = ", ")) @@ -898,21 +916,20 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL df <- df %>% mutate( # Rank based on Avg_Zscore_ - across(all_of(avg_zscore_cols), ~ rank(., na.last = "keep"), .names = "Rank_{.col}"), + across(all_of(avg_zscore_cols), ~ rank(., na.last = "keep"), .names = "Rank_Avg_Zscore_{.col}"), # Rank_lm based on Z_lm_ - across(all_of(z_lm_cols), ~ rank(., na.last = "keep"), .names = "Rank_lm_{.col}") + across(all_of(z_lm_cols), ~ rank(., na.last = "keep"), .names = "Rank_lm_Z_lm_{.col}") ) - # Rename the newly created rank columns to match desired names - for (variable in variables) { - old_rank_col <- paste0("Rank_Avg_Zscore_", variable) - new_rank_col <- paste0("Rank_", variable) - df <- df %>% rename(!!new_rank_col := !!sym(old_rank_col)) - - old_rank_lm_col <- paste0("Rank_lm_Z_lm_", variable) - new_rank_lm_col <- paste0("Rank_lm_", variable) - df <- df %>% rename(!!new_rank_lm_col := !!sym(old_rank_lm_col)) - } + # Prepare a named vector for renaming columns: new_name = old_name + rename_vector <- c( + setNames(paste0("Rank_", variables), paste0("Rank_Avg_Zscore_", avg_zscore_cols)), + setNames(paste0("Rank_lm_", variables), paste0("Rank_lm_Z_lm_", z_lm_cols)) + ) + + # Rename the rank columns in a single step + df <- df %>% + rename(!!!rename_vector) } return(df)