Extend plot filtering

This commit is contained in:
2024-09-16 15:04:15 -04:00
parent b13f2df26b
commit 4045b31543

View File

@@ -579,7 +579,7 @@ generate_interaction_plot_configs <- function(df, variables) {
AUC = c(-6500, 6500) AUC = c(-6500, 6500)
) )
df_filtered <- filter_data_for_plots(df, variables, limits_map)$filtered_data df_filtered <- filter_data_for_plots(df, variables, missing = TRUE, limits_map)
# Define annotation label functions # Define annotation label functions
generate_annotation_labels <- function(df, var, annotation_name) { generate_annotation_labels <- function(df, var, annotation_name) {
@@ -670,10 +670,11 @@ generate_interaction_plot_configs <- function(df, variables) {
return(configs) return(configs)
} }
generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FALSE) {
generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", "K"), is_lm = FALSE, adjust = FALSE) { df_filtered <- filter_data_for_plots(df, variables, missing = TRUE)
for (var in interaction_vars) { for (var in variables) {
avg_zscore_col <- paste0("Avg_Zscore_", var) avg_zscore_col <- paste0("Avg_Zscore_", var)
z_lm_col <- paste0("Z_lm_", var) z_lm_col <- paste0("Z_lm_", var)
rank_col <- paste0("Rank_", var) rank_col <- paste0("Rank_", var)
@@ -694,7 +695,7 @@ generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L",
configs <- list() configs <- list()
# Generate plot configurations for rank variables (L and K) with sd bands # Generate plot configurations for rank variables (L and K) with sd bands
for (var in rank_vars) { for (var in c("L", "K")) {
if (is_lm) { if (is_lm) {
rank_var <- paste0("Rank_lm_", var) rank_var <- paste0("Rank_lm_", var)
zscore_var <- paste0("Z_lm_", var) zscore_var <- paste0("Z_lm_", var)
@@ -779,53 +780,55 @@ filter_and_print_non_finite <- function(df, vars_to_check, print_vars) {
non_finite_rows <- df %>% filter(if_any(all_of(vars_to_check), ~ !is.finite(.))) non_finite_rows <- df %>% filter(if_any(all_of(vars_to_check), ~ !is.finite(.)))
if (nrow(non_finite_rows) > 0) { if (nrow(non_finite_rows) > 0) {
message("Removing the following non-finite rows:") message("Filtering non-finite rows:")
print(non_finite_rows %>% select(all_of(print_vars)), n = 200) print(non_finite_rows %>% select(all_of(print_vars)), n = 200)
} }
df %>% filter(if_all(all_of(vars_to_check), is.finite)) df %>% filter(if_all(all_of(vars_to_check), is.finite))
} }
filter_data_for_plots <- function(df, variables, limits_map = NULL) { filter_data_for_plots <- function(df, variables, missing = TRUE, limits_map = NULL) {
# Initialize lists to store lm lines and filtered data # Initialize lists to store lm lines
lm_lines <- list() lm_lines <- list()
# Print out NA and out-of-range data separately # Check for missing and out-of-range data
for (variable in variables) { for (variable in variables) {
# Get y-limits for the variable
ylim_vals <- limits_map[[variable]]
# Extract precomputed linear model coefficients
lm_lines[[variable]] <- list(
intercept = df[[paste0("lm_intercept_", variable)]],
slope = df[[paste0("lm_slope_", variable)]]
)
# Convert variable name to symbol for dplyr
y_var_sym <- sym(variable) y_var_sym <- sym(variable)
# Identify missing data and print it # Print missing data if requested
missing_data <- df %>% filter(is.na(!!y_var_sym)) if (missing) {
if (nrow(missing_data) > 0) { missing_data <- df %>% filter(is.na(!!y_var_sym))
message("Missing data for variable ", variable, ":") if (nrow(missing_data) > 0) {
print(missing_data) message("Filtering missing data for variable ", variable, " for plotting:")
print(head(missing_data, 10)) # Print only the first 10 rows to avoid too much output
}
} }
# Identify out-of-range data and print it # Print out-of-range data if limits_map is provided
out_of_range_data <- df %>% filter( if (!is.null(limits_map)) {
!is.na(!!y_var_sym) & ylim_vals <- limits_map[[variable]]
(!!y_var_sym < min(ylim_vals, na.rm = TRUE) | !!y_var_sym > max(ylim_vals, na.rm = TRUE)) out_of_range_data <- df %>% filter(
) !is.na(!!y_var_sym) &
if (nrow(out_of_range_data) > 0) { (!!y_var_sym < min(ylim_vals, na.rm = TRUE) | !!y_var_sym > max(ylim_vals, na.rm = TRUE))
message("Out-of-range data for variable ", variable, ":") )
print(out_of_range_data) if (nrow(out_of_range_data) > 0) {
message("Filtering out-of-range data for variable ", variable, " for plotting:")
print(head(out_of_range_data, 10)) # Print only the first 10 rows
}
} }
} }
# Perform all filtering at once for all variables # Apply filtering across all variables in one step using if_any and if_all
df_filtered <- df %>% filter(across(all_of(variables), ~ !is.na(.))) %>% if (!is.null(limits_map)) {
filter(across(all_of(variables), ~ between(., limits_map[[cur_column()]][1], limits_map[[cur_column()]][2]), .names = "filter_{col}")) df_filtered <- df %>%
filter(if_all(all_of(variables), ~ !is.na(.))) %>%
filter(if_all(all_of(variables),
~ between(., limits_map[[cur_column()]][1], limits_map[[cur_column()]][2])
))
} else {
df_filtered <- df %>% filter(if_all(all_of(variables), ~ !is.na(.)))
}
return(df_filtered) return(df_filtered)
} }