|
@@ -826,12 +826,21 @@ generate_correlation_plot_configs <- function(df) {
|
|
|
filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FALSE,
|
|
|
rank = FALSE, limits_map = NULL, verbose = TRUE) {
|
|
|
|
|
|
- # Precompute column names for efficiency
|
|
|
+ # Precompute Column Names for Efficiency
|
|
|
avg_zscore_cols <- paste0("Avg_Zscore_", variables)
|
|
|
z_lm_cols <- paste0("Z_lm_", variables)
|
|
|
|
|
|
+ # # Optional: Validate that the expected columns exist in the dataframe
|
|
|
+ # expected_cols <- c(avg_zscore_cols, z_lm_cols, variables)
|
|
|
+ # missing_cols <- setdiff(expected_cols, names(df))
|
|
|
+ # if (length(missing_cols) > 0) {
|
|
|
+ # stop("The following expected columns are missing from the dataframe: ",
|
|
|
+ # paste(missing_cols, collapse = ", "))
|
|
|
+ # }
|
|
|
+
|
|
|
+ # Adjust NAs if 'adjust' is TRUE
|
|
|
if (adjust) {
|
|
|
- if (verbose) message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns")
|
|
|
+ if (verbose) message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns.")
|
|
|
df <- df %>%
|
|
|
mutate(
|
|
|
across(all_of(avg_zscore_cols), ~ replace_na(., 0.001)),
|
|
@@ -839,10 +848,13 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
|
|
|
)
|
|
|
}
|
|
|
|
|
|
- # Filter non-finite values
|
|
|
+ # Filter Non-Finite Values if 'nf' is TRUE
|
|
|
if (nf) {
|
|
|
+ if (verbose) message("Filtering non-finite values for variables: ", paste(variables, collapse = ", "))
|
|
|
+
|
|
|
+ # Identify non-finite rows for logging
|
|
|
non_finite_df <- df %>%
|
|
|
- filter(across(all_of(variables), ~ !is.finite(.)))
|
|
|
+ filter(if_any(all_of(variables), ~ !is.finite(.)))
|
|
|
|
|
|
if (verbose && nrow(non_finite_df) > 0) {
|
|
|
message("Non-finite rows for variables ", paste(variables, collapse = ", "), ":")
|
|
@@ -851,13 +863,16 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
|
|
|
|
|
|
# Keep only rows where all specified variables are finite
|
|
|
df <- df %>%
|
|
|
- filter(across(all_of(variables), ~ is.finite(.)))
|
|
|
+ filter(if_all(all_of(variables), ~ is.finite(.)))
|
|
|
}
|
|
|
|
|
|
- # Filter missing malues
|
|
|
+ # Filter Missing Values if 'missing' is TRUE
|
|
|
if (missing) {
|
|
|
+ if (verbose) message("Filtering missing values for variables: ", paste(variables, collapse = ", "))
|
|
|
+
|
|
|
+ # Identify missing rows for logging
|
|
|
missing_df <- df %>%
|
|
|
- filter(across(all_of(variables), ~ is.na(.)))
|
|
|
+ filter(if_any(all_of(variables), ~ is.na(.)))
|
|
|
|
|
|
if (verbose && nrow(missing_df) > 0) {
|
|
|
message("Missing data for variables ", paste(variables, collapse = ", "), ":")
|
|
@@ -866,16 +881,18 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
|
|
|
|
|
|
# Keep only rows where all specified variables are not missing
|
|
|
df <- df %>%
|
|
|
- filter(across(all_of(variables), ~ !is.na(.)))
|
|
|
+ filter(if_all(all_of(variables), ~ !is.na(.)))
|
|
|
}
|
|
|
|
|
|
- # Filter data outside of y-limits (for plotting)
|
|
|
+ # Apply Limits from 'limits_map' if Provided
|
|
|
if (!is.null(limits_map)) {
|
|
|
for (variable in names(limits_map)) {
|
|
|
if (variable %in% variables) {
|
|
|
ylim_vals <- limits_map[[variable]]
|
|
|
|
|
|
- # Identify out-of-range data
|
|
|
+ if (verbose) message("Applying limits for variable ", variable, ": [", ylim_vals[1], ", ", ylim_vals[2], "].")
|
|
|
+
|
|
|
+ # Identify out-of-range data for logging
|
|
|
out_of_range_df <- df %>%
|
|
|
filter(.data[[variable]] < ylim_vals[1] | .data[[variable]] > ylim_vals[2])
|
|
|
|
|
@@ -890,7 +907,8 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
+ # Calculate Rank Columns if 'rank' is TRUE
|
|
|
if (rank) {
|
|
|
if (verbose) message("Calculating rank columns for variables: ", paste(variables, collapse = ", "))
|
|
|
|
|
@@ -898,21 +916,20 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
|
|
|
df <- df %>%
|
|
|
mutate(
|
|
|
# Rank based on Avg_Zscore_
|
|
|
- across(all_of(avg_zscore_cols), ~ rank(., na.last = "keep"), .names = "Rank_{.col}"),
|
|
|
+ across(all_of(avg_zscore_cols), ~ rank(., na.last = "keep"), .names = "Rank_Avg_Zscore_{.col}"),
|
|
|
# Rank_lm based on Z_lm_
|
|
|
- across(all_of(z_lm_cols), ~ rank(., na.last = "keep"), .names = "Rank_lm_{.col}")
|
|
|
+ across(all_of(z_lm_cols), ~ rank(., na.last = "keep"), .names = "Rank_lm_Z_lm_{.col}")
|
|
|
)
|
|
|
|
|
|
- # Rename the newly created rank columns to match desired names
|
|
|
- for (variable in variables) {
|
|
|
- old_rank_col <- paste0("Rank_Avg_Zscore_", variable)
|
|
|
- new_rank_col <- paste0("Rank_", variable)
|
|
|
- df <- df %>% rename(!!new_rank_col := !!sym(old_rank_col))
|
|
|
-
|
|
|
- old_rank_lm_col <- paste0("Rank_lm_Z_lm_", variable)
|
|
|
- new_rank_lm_col <- paste0("Rank_lm_", variable)
|
|
|
- df <- df %>% rename(!!new_rank_lm_col := !!sym(old_rank_lm_col))
|
|
|
- }
|
|
|
+ # Prepare a named vector for renaming columns: new_name = old_name
|
|
|
+ rename_vector <- c(
|
|
|
+ setNames(paste0("Rank_", variables), paste0("Rank_Avg_Zscore_", avg_zscore_cols)),
|
|
|
+ setNames(paste0("Rank_lm_", variables), paste0("Rank_lm_Z_lm_", z_lm_cols))
|
|
|
+ )
|
|
|
+
|
|
|
+ # Rename the rank columns in a single step
|
|
|
+ df <- df %>%
|
|
|
+ rename(!!!rename_vector)
|
|
|
}
|
|
|
|
|
|
return(df)
|