Convert filter_data to dplyr

This commit is contained in:
2024-09-16 17:32:22 -04:00
parent 28c9fda051
commit 91fc9ecfda

View File

@@ -776,45 +776,35 @@ generate_correlation_plot_configs <- function(df, variables) {
return(configs) return(configs)
} }
filter_data <- function(df, variables, nf = FALSE, missing = FALSE, limits_map = NULL) { filter_data <- function(df, variables, nf = FALSE, missing = FALSE, limits_map = NULL, verbose = TRUE) {
# Loop through each variable to filter and print missing/out-of-range data
for (variable in variables) { for (variable in variables) {
y_var_sym <- sym(variable)
if (nf) { if (nf) {
non_finite <- df %>% filter(!is.finite(!!y_var_sym)) non_finite <- df %>% filter(!is.finite(.data[[variable]]))
if (nrow(non_finite) > 0) { if (verbose && nrow(non_finite) > 0) {
message("Non-finite rows for variable ", variable, ":") message("Non-finite rows for variable ", variable, ":")
print(non_finite) print(non_finite)
} }
df <- df %>% filter(is.finite(!!y_var_sym)) df <- df %>% filter(is.finite(.data[[variable]]))
} }
# Filter missing data
if (missing) { if (missing) {
missing_data <- df %>% filter(is.na(!!y_var_sym)) missing_data <- df %>% filter(is.na(.data[[variable]]))
if (nrow(missing_data) > 0) { if (verbose && nrow(missing_data) > 0) {
message("Missing data for variable ", variable, ":") message("Missing data for variable ", variable, ":")
print(missing_data) print(missing_data)
} }
df <- df %>% filter(!is.na(!!y_var_sym)) df <- df %>% filter(!is.na(.data[[variable]]))
} }
# Filter out-of-range data if limits_map is provided if (!is.null(limits_map) && !is.null(limits_map[[variable]])) {
if (!is.null(limits_map)) {
ylim_vals <- limits_map[[variable]] ylim_vals <- limits_map[[variable]]
out_of_range_data <- df %>% filter(.data[[variable]] < ylim_vals[1] | .data[[variable]] > ylim_vals[2])
# Print and filter out-of-range data if (verbose && nrow(out_of_range_data) > 0) {
out_of_range_data <- df %>% filter(
!!y_var_sym < ylim_vals[1] | !!y_var_sym > ylim_vals[2]
)
if (nrow(out_of_range_data) > 0) {
message("Out-of-range data for variable ", variable, ":") message("Out-of-range data for variable ", variable, ":")
print(out_of_range_data) print(out_of_range_data)
df <- df %>%
filter(!!y_var_sym >= ylim_vals[1] & !!y_var_sym <= ylim_vals[2])
} }
df <- df %>% filter(.data[[variable]] >= ylim_vals[1] & .data[[variable]] <= ylim_vals[2])
} }
} }
@@ -1209,7 +1199,7 @@ main <- function() {
message("Generating rank plots") message("Generating rank plots")
# Generate rank plots for L and K using standard ranks # Generate rank plots for L and K using standard ranks
rank_plot_configs <- generate_rank_plot_configs( rank_plot_configs <- generate_rank_plot_configs(
df = zscores_interactions, df = zscores_interactions_joined,
variables = interaction_vars, variables = interaction_vars,
is_lm = FALSE, is_lm = FALSE,
adjust = TRUE adjust = TRUE
@@ -1222,7 +1212,7 @@ main <- function() {
message("Generating ranked linear model plots") message("Generating ranked linear model plots")
# Generate rank plots for L and K using linear model (`lm`) ranks # Generate rank plots for L and K using linear model (`lm`) ranks
rank_lm_plot_configs <- generate_rank_plot_configs( rank_lm_plot_configs <- generate_rank_plot_configs(
df = zscores_interactions, df = zscores_interactions_joined,
variables = interaction_vars, variables = interaction_vars,
is_lm = TRUE, is_lm = TRUE,
adjust = TRUE adjust = TRUE