diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 8f590039..caef72f6 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -579,7 +579,7 @@ generate_interaction_plot_configs <- function(df, variables) { AUC = c(-6500, 6500) ) - df_filtered <- filter_data_for_plots(df, variables, limits_map)$filtered_data + df_filtered <- filter_data_for_plots(df, variables, missing = TRUE, limits_map) # Define annotation label functions generate_annotation_labels <- function(df, var, annotation_name) { @@ -670,10 +670,11 @@ generate_interaction_plot_configs <- function(df, variables) { return(configs) } +generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FALSE) { -generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", "K"), is_lm = FALSE, adjust = FALSE) { + df_filtered <- filter_data_for_plots(df, variables, missing = TRUE) - for (var in interaction_vars) { + for (var in variables) { avg_zscore_col <- paste0("Avg_Zscore_", var) z_lm_col <- paste0("Z_lm_", var) rank_col <- paste0("Rank_", var) @@ -694,7 +695,7 @@ generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", configs <- list() # Generate plot configurations for rank variables (L and K) with sd bands - for (var in rank_vars) { + for (var in c("L", "K")) { if (is_lm) { rank_var <- paste0("Rank_lm_", var) zscore_var <- paste0("Z_lm_", var) @@ -779,53 +780,55 @@ filter_and_print_non_finite <- function(df, vars_to_check, print_vars) { non_finite_rows <- df %>% filter(if_any(all_of(vars_to_check), ~ !is.finite(.))) if (nrow(non_finite_rows) > 0) { - message("Removing the following non-finite rows:") + message("Filtering non-finite rows:") print(non_finite_rows %>% select(all_of(print_vars)), n = 200) } df %>% filter(if_all(all_of(vars_to_check), is.finite)) } -filter_data_for_plots <- function(df, variables, limits_map = NULL) { +filter_data_for_plots <- function(df, variables, missing = TRUE, limits_map = NULL) { - # Initialize lists to store lm lines and filtered data + # Initialize lists to store lm lines lm_lines <- list() - # Print out NA and out-of-range data separately + # Check for missing and out-of-range data for (variable in variables) { - # Get y-limits for the variable - ylim_vals <- limits_map[[variable]] - - # Extract precomputed linear model coefficients - lm_lines[[variable]] <- list( - intercept = df[[paste0("lm_intercept_", variable)]], - slope = df[[paste0("lm_slope_", variable)]] - ) - - # Convert variable name to symbol for dplyr y_var_sym <- sym(variable) - - # Identify missing data and print it - missing_data <- df %>% filter(is.na(!!y_var_sym)) - if (nrow(missing_data) > 0) { - message("Missing data for variable ", variable, ":") - print(missing_data) + + # Print missing data if requested + if (missing) { + missing_data <- df %>% filter(is.na(!!y_var_sym)) + if (nrow(missing_data) > 0) { + message("Filtering missing data for variable ", variable, " for plotting:") + print(head(missing_data, 10)) # Print only the first 10 rows to avoid too much output + } } - - # Identify out-of-range data and print it - out_of_range_data <- df %>% filter( - !is.na(!!y_var_sym) & - (!!y_var_sym < min(ylim_vals, na.rm = TRUE) | !!y_var_sym > max(ylim_vals, na.rm = TRUE)) - ) - if (nrow(out_of_range_data) > 0) { - message("Out-of-range data for variable ", variable, ":") - print(out_of_range_data) + + # Print out-of-range data if limits_map is provided + if (!is.null(limits_map)) { + ylim_vals <- limits_map[[variable]] + out_of_range_data <- df %>% filter( + !is.na(!!y_var_sym) & + (!!y_var_sym < min(ylim_vals, na.rm = TRUE) | !!y_var_sym > max(ylim_vals, na.rm = TRUE)) + ) + if (nrow(out_of_range_data) > 0) { + message("Filtering out-of-range data for variable ", variable, " for plotting:") + print(head(out_of_range_data, 10)) # Print only the first 10 rows + } } } - # Perform all filtering at once for all variables - df_filtered <- df %>% filter(across(all_of(variables), ~ !is.na(.))) %>% - filter(across(all_of(variables), ~ between(., limits_map[[cur_column()]][1], limits_map[[cur_column()]][2]), .names = "filter_{col}")) + # Apply filtering across all variables in one step using if_any and if_all + if (!is.null(limits_map)) { + df_filtered <- df %>% + filter(if_all(all_of(variables), ~ !is.na(.))) %>% + filter(if_all(all_of(variables), + ~ between(., limits_map[[cur_column()]][1], limits_map[[cur_column()]][2]) + )) + } else { + df_filtered <- df %>% filter(if_all(all_of(variables), ~ !is.na(.))) + } return(df_filtered) }