Extend plot filtering

This commit is contained in:
2024-09-16 15:04:15 -04:00
parent b13f2df26b
commit 4045b31543

View File

@@ -579,7 +579,7 @@ generate_interaction_plot_configs <- function(df, variables) {
AUC = c(-6500, 6500)
)
df_filtered <- filter_data_for_plots(df, variables, limits_map)$filtered_data
df_filtered <- filter_data_for_plots(df, variables, missing = TRUE, limits_map)
# Define annotation label functions
generate_annotation_labels <- function(df, var, annotation_name) {
@@ -670,10 +670,11 @@ generate_interaction_plot_configs <- function(df, variables) {
return(configs)
}
generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FALSE) {
generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", "K"), is_lm = FALSE, adjust = FALSE) {
df_filtered <- filter_data_for_plots(df, variables, missing = TRUE)
for (var in interaction_vars) {
for (var in variables) {
avg_zscore_col <- paste0("Avg_Zscore_", var)
z_lm_col <- paste0("Z_lm_", var)
rank_col <- paste0("Rank_", var)
@@ -694,7 +695,7 @@ generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L",
configs <- list()
# Generate plot configurations for rank variables (L and K) with sd bands
for (var in rank_vars) {
for (var in c("L", "K")) {
if (is_lm) {
rank_var <- paste0("Rank_lm_", var)
zscore_var <- paste0("Z_lm_", var)
@@ -779,53 +780,55 @@ filter_and_print_non_finite <- function(df, vars_to_check, print_vars) {
non_finite_rows <- df %>% filter(if_any(all_of(vars_to_check), ~ !is.finite(.)))
if (nrow(non_finite_rows) > 0) {
message("Removing the following non-finite rows:")
message("Filtering non-finite rows:")
print(non_finite_rows %>% select(all_of(print_vars)), n = 200)
}
df %>% filter(if_all(all_of(vars_to_check), is.finite))
}
filter_data_for_plots <- function(df, variables, limits_map = NULL) {
filter_data_for_plots <- function(df, variables, missing = TRUE, limits_map = NULL) {
# Initialize lists to store lm lines and filtered data
# Initialize lists to store lm lines
lm_lines <- list()
# Print out NA and out-of-range data separately
# Check for missing and out-of-range data
for (variable in variables) {
# Get y-limits for the variable
ylim_vals <- limits_map[[variable]]
# Extract precomputed linear model coefficients
lm_lines[[variable]] <- list(
intercept = df[[paste0("lm_intercept_", variable)]],
slope = df[[paste0("lm_slope_", variable)]]
)
# Convert variable name to symbol for dplyr
y_var_sym <- sym(variable)
# Identify missing data and print it
missing_data <- df %>% filter(is.na(!!y_var_sym))
if (nrow(missing_data) > 0) {
message("Missing data for variable ", variable, ":")
print(missing_data)
# Print missing data if requested
if (missing) {
missing_data <- df %>% filter(is.na(!!y_var_sym))
if (nrow(missing_data) > 0) {
message("Filtering missing data for variable ", variable, " for plotting:")
print(head(missing_data, 10)) # Print only the first 10 rows to avoid too much output
}
}
# Identify out-of-range data and print it
out_of_range_data <- df %>% filter(
!is.na(!!y_var_sym) &
(!!y_var_sym < min(ylim_vals, na.rm = TRUE) | !!y_var_sym > max(ylim_vals, na.rm = TRUE))
)
if (nrow(out_of_range_data) > 0) {
message("Out-of-range data for variable ", variable, ":")
print(out_of_range_data)
# Print out-of-range data if limits_map is provided
if (!is.null(limits_map)) {
ylim_vals <- limits_map[[variable]]
out_of_range_data <- df %>% filter(
!is.na(!!y_var_sym) &
(!!y_var_sym < min(ylim_vals, na.rm = TRUE) | !!y_var_sym > max(ylim_vals, na.rm = TRUE))
)
if (nrow(out_of_range_data) > 0) {
message("Filtering out-of-range data for variable ", variable, " for plotting:")
print(head(out_of_range_data, 10)) # Print only the first 10 rows
}
}
}
# Perform all filtering at once for all variables
df_filtered <- df %>% filter(across(all_of(variables), ~ !is.na(.))) %>%
filter(across(all_of(variables), ~ between(., limits_map[[cur_column()]][1], limits_map[[cur_column()]][2]), .names = "filter_{col}"))
# Apply filtering across all variables in one step using if_any and if_all
if (!is.null(limits_map)) {
df_filtered <- df %>%
filter(if_all(all_of(variables), ~ !is.na(.))) %>%
filter(if_all(all_of(variables),
~ between(., limits_map[[cur_column()]][1], limits_map[[cur_column()]][2])
))
} else {
df_filtered <- df %>% filter(if_all(all_of(variables), ~ !is.na(.)))
}
return(df_filtered)
}