|
@@ -579,7 +579,7 @@ generate_interaction_plot_configs <- function(df, variables) {
|
|
AUC = c(-6500, 6500)
|
|
AUC = c(-6500, 6500)
|
|
)
|
|
)
|
|
|
|
|
|
- df_filtered <- filter_data_for_plots(df, variables, limits_map)$filtered_data
|
|
|
|
|
|
+ df_filtered <- filter_data_for_plots(df, variables, missing = TRUE, limits_map)
|
|
|
|
|
|
# Define annotation label functions
|
|
# Define annotation label functions
|
|
generate_annotation_labels <- function(df, var, annotation_name) {
|
|
generate_annotation_labels <- function(df, var, annotation_name) {
|
|
@@ -670,10 +670,11 @@ generate_interaction_plot_configs <- function(df, variables) {
|
|
return(configs)
|
|
return(configs)
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FALSE) {
|
|
|
|
|
|
-generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", "K"), is_lm = FALSE, adjust = FALSE) {
|
|
|
|
|
|
+ df_filtered <- filter_data_for_plots(df, variables, missing = TRUE)
|
|
|
|
|
|
- for (var in interaction_vars) {
|
|
|
|
|
|
+ for (var in variables) {
|
|
avg_zscore_col <- paste0("Avg_Zscore_", var)
|
|
avg_zscore_col <- paste0("Avg_Zscore_", var)
|
|
z_lm_col <- paste0("Z_lm_", var)
|
|
z_lm_col <- paste0("Z_lm_", var)
|
|
rank_col <- paste0("Rank_", var)
|
|
rank_col <- paste0("Rank_", var)
|
|
@@ -694,7 +695,7 @@ generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L",
|
|
configs <- list()
|
|
configs <- list()
|
|
|
|
|
|
# Generate plot configurations for rank variables (L and K) with sd bands
|
|
# Generate plot configurations for rank variables (L and K) with sd bands
|
|
- for (var in rank_vars) {
|
|
|
|
|
|
+ for (var in c("L", "K")) {
|
|
if (is_lm) {
|
|
if (is_lm) {
|
|
rank_var <- paste0("Rank_lm_", var)
|
|
rank_var <- paste0("Rank_lm_", var)
|
|
zscore_var <- paste0("Z_lm_", var)
|
|
zscore_var <- paste0("Z_lm_", var)
|
|
@@ -779,53 +780,55 @@ filter_and_print_non_finite <- function(df, vars_to_check, print_vars) {
|
|
non_finite_rows <- df %>% filter(if_any(all_of(vars_to_check), ~ !is.finite(.)))
|
|
non_finite_rows <- df %>% filter(if_any(all_of(vars_to_check), ~ !is.finite(.)))
|
|
|
|
|
|
if (nrow(non_finite_rows) > 0) {
|
|
if (nrow(non_finite_rows) > 0) {
|
|
- message("Removing the following non-finite rows:")
|
|
|
|
|
|
+ message("Filtering non-finite rows:")
|
|
print(non_finite_rows %>% select(all_of(print_vars)), n = 200)
|
|
print(non_finite_rows %>% select(all_of(print_vars)), n = 200)
|
|
}
|
|
}
|
|
|
|
|
|
df %>% filter(if_all(all_of(vars_to_check), is.finite))
|
|
df %>% filter(if_all(all_of(vars_to_check), is.finite))
|
|
}
|
|
}
|
|
|
|
|
|
-filter_data_for_plots <- function(df, variables, limits_map = NULL) {
|
|
|
|
|
|
+filter_data_for_plots <- function(df, variables, missing = TRUE, limits_map = NULL) {
|
|
|
|
|
|
- # Initialize lists to store lm lines and filtered data
|
|
|
|
|
|
+ # Initialize lists to store lm lines
|
|
lm_lines <- list()
|
|
lm_lines <- list()
|
|
|
|
|
|
- # Print out NA and out-of-range data separately
|
|
|
|
|
|
+ # Check for missing and out-of-range data
|
|
for (variable in variables) {
|
|
for (variable in variables) {
|
|
- # Get y-limits for the variable
|
|
|
|
- ylim_vals <- limits_map[[variable]]
|
|
|
|
-
|
|
|
|
- # Extract precomputed linear model coefficients
|
|
|
|
- lm_lines[[variable]] <- list(
|
|
|
|
- intercept = df[[paste0("lm_intercept_", variable)]],
|
|
|
|
- slope = df[[paste0("lm_slope_", variable)]]
|
|
|
|
- )
|
|
|
|
-
|
|
|
|
- # Convert variable name to symbol for dplyr
|
|
|
|
y_var_sym <- sym(variable)
|
|
y_var_sym <- sym(variable)
|
|
-
|
|
|
|
- # Identify missing data and print it
|
|
|
|
- missing_data <- df %>% filter(is.na(!!y_var_sym))
|
|
|
|
- if (nrow(missing_data) > 0) {
|
|
|
|
- message("Missing data for variable ", variable, ":")
|
|
|
|
- print(missing_data)
|
|
|
|
|
|
+
|
|
|
|
+ # Print missing data if requested
|
|
|
|
+ if (missing) {
|
|
|
|
+ missing_data <- df %>% filter(is.na(!!y_var_sym))
|
|
|
|
+ if (nrow(missing_data) > 0) {
|
|
|
|
+ message("Filtering missing data for variable ", variable, " for plotting:")
|
|
|
|
+ print(head(missing_data, 10)) # Print only the first 10 rows to avoid too much output
|
|
|
|
+ }
|
|
}
|
|
}
|
|
-
|
|
|
|
- # Identify out-of-range data and print it
|
|
|
|
- out_of_range_data <- df %>% filter(
|
|
|
|
- !is.na(!!y_var_sym) &
|
|
|
|
- (!!y_var_sym < min(ylim_vals, na.rm = TRUE) | !!y_var_sym > max(ylim_vals, na.rm = TRUE))
|
|
|
|
- )
|
|
|
|
- if (nrow(out_of_range_data) > 0) {
|
|
|
|
- message("Out-of-range data for variable ", variable, ":")
|
|
|
|
- print(out_of_range_data)
|
|
|
|
|
|
+
|
|
|
|
+ # Print out-of-range data if limits_map is provided
|
|
|
|
+ if (!is.null(limits_map)) {
|
|
|
|
+ ylim_vals <- limits_map[[variable]]
|
|
|
|
+ out_of_range_data <- df %>% filter(
|
|
|
|
+ !is.na(!!y_var_sym) &
|
|
|
|
+ (!!y_var_sym < min(ylim_vals, na.rm = TRUE) | !!y_var_sym > max(ylim_vals, na.rm = TRUE))
|
|
|
|
+ )
|
|
|
|
+ if (nrow(out_of_range_data) > 0) {
|
|
|
|
+ message("Filtering out-of-range data for variable ", variable, " for plotting:")
|
|
|
|
+ print(head(out_of_range_data, 10)) # Print only the first 10 rows
|
|
|
|
+ }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- # Perform all filtering at once for all variables
|
|
|
|
- df_filtered <- df %>% filter(across(all_of(variables), ~ !is.na(.))) %>%
|
|
|
|
- filter(across(all_of(variables), ~ between(., limits_map[[cur_column()]][1], limits_map[[cur_column()]][2]), .names = "filter_{col}"))
|
|
|
|
|
|
+ # Apply filtering across all variables in one step using if_any and if_all
|
|
|
|
+ if (!is.null(limits_map)) {
|
|
|
|
+ df_filtered <- df %>%
|
|
|
|
+ filter(if_all(all_of(variables), ~ !is.na(.))) %>%
|
|
|
|
+ filter(if_all(all_of(variables),
|
|
|
|
+ ~ between(., limits_map[[cur_column()]][1], limits_map[[cur_column()]][2])
|
|
|
|
+ ))
|
|
|
|
+ } else {
|
|
|
|
+ df_filtered <- df %>% filter(if_all(all_of(variables), ~ !is.na(.)))
|
|
|
|
+ }
|
|
|
|
|
|
return(df_filtered)
|
|
return(df_filtered)
|
|
}
|
|
}
|