Explorar o código

Convert filter_data to dplyr

Bryan Roessler hai 9 meses
pai
achega
91fc9ecfda
Modificáronse 1 ficheiros con 13 adicións e 23 borrados
  1. 13 23
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 13 - 23
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -776,45 +776,35 @@ generate_correlation_plot_configs <- function(df, variables) {
   return(configs)
 }
 
-filter_data <- function(df, variables, nf = FALSE, missing = FALSE, limits_map = NULL) {
+filter_data <- function(df, variables, nf = FALSE, missing = FALSE, limits_map = NULL, verbose = TRUE) {
   
-  # Loop through each variable to filter and print missing/out-of-range data
   for (variable in variables) {
-    y_var_sym <- sym(variable)
-
     if (nf) {
-      non_finite <- df %>% filter(!is.finite(!!y_var_sym))
-      if (nrow(non_finite) > 0) {
+      non_finite <- df %>% filter(!is.finite(.data[[variable]]))
+      if (verbose && nrow(non_finite) > 0) {
         message("Non-finite rows for variable ", variable, ":")
         print(non_finite)
       }
-      df <- df %>% filter(is.finite(!!y_var_sym))
+      df <- df %>% filter(is.finite(.data[[variable]]))
     }
     
-    # Filter missing data
     if (missing) {
-      missing_data <- df %>% filter(is.na(!!y_var_sym))
-      if (nrow(missing_data) > 0) {
+      missing_data <- df %>% filter(is.na(.data[[variable]]))
+      if (verbose && nrow(missing_data) > 0) {
         message("Missing data for variable ", variable, ":")
         print(missing_data)
       }
-      df <- df %>% filter(!is.na(!!y_var_sym))
+      df <- df %>% filter(!is.na(.data[[variable]]))
     }
     
-    # Filter out-of-range data if limits_map is provided
-    if (!is.null(limits_map)) {
+    if (!is.null(limits_map) && !is.null(limits_map[[variable]])) {
       ylim_vals <- limits_map[[variable]]
-      
-      # Print and filter out-of-range data
-      out_of_range_data <- df %>% filter(
-        !!y_var_sym < ylim_vals[1] | !!y_var_sym > ylim_vals[2]
-      )
-      if (nrow(out_of_range_data) > 0) {
+      out_of_range_data <- df %>% filter(.data[[variable]] < ylim_vals[1] | .data[[variable]] > ylim_vals[2])
+      if (verbose && nrow(out_of_range_data) > 0) {
         message("Out-of-range data for variable ", variable, ":")
         print(out_of_range_data)
-      df <- df %>%
-        filter(!!y_var_sym >= ylim_vals[1] & !!y_var_sym <= ylim_vals[2])
       }
+      df <- df %>% filter(.data[[variable]] >= ylim_vals[1] & .data[[variable]] <= ylim_vals[2])
     }
   }
   
@@ -1209,7 +1199,7 @@ main <- function() {
       message("Generating rank plots")
       # Generate rank plots for L and K using standard ranks
       rank_plot_configs <- generate_rank_plot_configs(
-        df = zscores_interactions,
+        df = zscores_interactions_joined,
         variables = interaction_vars,
         is_lm = FALSE,
         adjust = TRUE
@@ -1222,7 +1212,7 @@ main <- function() {
       message("Generating ranked linear model plots")
       # Generate rank plots for L and K using linear model (`lm`) ranks
       rank_lm_plot_configs <- generate_rank_plot_configs(
-        df = zscores_interactions,
+        df = zscores_interactions_joined,
         variables = interaction_vars,
         is_lm = TRUE,
         adjust = TRUE