9 months ago · 4fd3453bd6
--- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
+++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
@@ -826,12 +826,21 @@ generate_correlation_plot_configs <- function(df) {
 
				 filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FALSE,
			
 
				   rank = FALSE, limits_map = NULL, verbose = TRUE) {
			
 
				   
			
 
				-  # Precompute column names for efficiency
			
 
				+  # Precompute Column Names for Efficiency
			
 
				   avg_zscore_cols <- paste0("Avg_Zscore_", variables)
			
 
				   z_lm_cols <- paste0("Z_lm_", variables)
			
 
				   
			
 
				+  # # Optional: Validate that the expected columns exist in the dataframe
			
 
				+  # expected_cols <- c(avg_zscore_cols, z_lm_cols, variables)
			
 
				+  # missing_cols <- setdiff(expected_cols, names(df))
			
 
				+  # if (length(missing_cols) > 0) {
			
 
				+  #   stop("The following expected columns are missing from the dataframe: ",
			
 
				+  #        paste(missing_cols, collapse = ", "))
			
 
				+  # }
			
 
				+  
			
 
				+  # Adjust NAs if 'adjust' is TRUE
			
 
				   if (adjust) {
			
 
				-    if (verbose) message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns")
			
 
				+    if (verbose) message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns.")
			
 
				     df <- df %>%
			
 
				       mutate(
			
 
				         across(all_of(avg_zscore_cols), ~ replace_na(., 0.001)),
			
@@ -839,10 +848,13 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
 
				       )
			
 
				   }
			
 
				   
			
 
				-  # Filter non-finite values
			
 
				+  # Filter Non-Finite Values if 'nf' is TRUE
			
 
				   if (nf) {
			
 
				+    if (verbose) message("Filtering non-finite values for variables: ", paste(variables, collapse = ", "))
			
 
				+    
			
 
				+    # Identify non-finite rows for logging
			
 
				     non_finite_df <- df %>%
			
 
				-      filter(across(all_of(variables), ~ !is.finite(.)))
			
 
				+      filter(if_any(all_of(variables), ~ !is.finite(.)))
			
 
				     
			
 
				     if (verbose && nrow(non_finite_df) > 0) {
			
 
				       message("Non-finite rows for variables ", paste(variables, collapse = ", "), ":")
			
@@ -851,13 +863,16 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
 
				     
			
 
				     # Keep only rows where all specified variables are finite
			
 
				     df <- df %>%
			
 
				-      filter(across(all_of(variables), ~ is.finite(.)))
			
 
				+      filter(if_all(all_of(variables), ~ is.finite(.)))
			
 
				   }
			
 
				   
			
 
				-  # Filter missing malues
			
 
				+  # Filter Missing Values if 'missing' is TRUE
			
 
				   if (missing) {
			
 
				+    if (verbose) message("Filtering missing values for variables: ", paste(variables, collapse = ", "))
			
 
				+    
			
 
				+    # Identify missing rows for logging
			
 
				     missing_df <- df %>%
			
 
				-      filter(across(all_of(variables), ~ is.na(.)))
			
 
				+      filter(if_any(all_of(variables), ~ is.na(.)))
			
 
				     
			
 
				     if (verbose && nrow(missing_df) > 0) {
			
 
				       message("Missing data for variables ", paste(variables, collapse = ", "), ":")
			
@@ -866,16 +881,18 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
 
				     
			
 
				     # Keep only rows where all specified variables are not missing
			
 
				     df <- df %>%
			
 
				-      filter(across(all_of(variables), ~ !is.na(.)))
			
 
				+      filter(if_all(all_of(variables), ~ !is.na(.)))
			
 
				   }
			
 
				   
			
 
				-  # Filter data outside of y-limits (for plotting)
			
 
				+  # Apply Limits from 'limits_map' if Provided
			
 
				   if (!is.null(limits_map)) {
			
 
				     for (variable in names(limits_map)) {
			
 
				       if (variable %in% variables) {
			
 
				         ylim_vals <- limits_map[[variable]]
			
 
				         
			
 
				-        # Identify out-of-range data
			
 
				+        if (verbose) message("Applying limits for variable ", variable, ": [", ylim_vals[1], ", ", ylim_vals[2], "].")
			
 
				+        
			
 
				+        # Identify out-of-range data for logging
			
 
				         out_of_range_df <- df %>%
			
 
				           filter(.data[[variable]] < ylim_vals[1] | .data[[variable]] > ylim_vals[2])
			
 
				         
			
@@ -890,7 +907,8 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
 
				       }
			
 
				     }
			
 
				   }
			
 
				-
			
 
				+  
			
 
				+  # Calculate Rank Columns if 'rank' is TRUE
			
 
				   if (rank) {
			
 
				     if (verbose) message("Calculating rank columns for variables: ", paste(variables, collapse = ", "))
			
 
				     
			
@@ -898,21 +916,20 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
 
				     df <- df %>%
			
 
				       mutate(
			
 
				         # Rank based on Avg_Zscore_
			
 
				-        across(all_of(avg_zscore_cols), ~ rank(., na.last = "keep"), .names = "Rank_{.col}"),
			
 
				+        across(all_of(avg_zscore_cols), ~ rank(., na.last = "keep"), .names = "Rank_Avg_Zscore_{.col}"),
			
 
				         # Rank_lm based on Z_lm_
			
 
				-        across(all_of(z_lm_cols), ~ rank(., na.last = "keep"), .names = "Rank_lm_{.col}")
			
 
				+        across(all_of(z_lm_cols), ~ rank(., na.last = "keep"), .names = "Rank_lm_Z_lm_{.col}")
			
 
				       )
			
 
				     
			
 
				-    # Rename the newly created rank columns to match desired names
			
 
				-    for (variable in variables) {
			
 
				-      old_rank_col <- paste0("Rank_Avg_Zscore_", variable)
			
 
				-      new_rank_col <- paste0("Rank_", variable)
			
 
				-      df <- df %>% rename(!!new_rank_col := !!sym(old_rank_col))
			
 
				-      
			
 
				-      old_rank_lm_col <- paste0("Rank_lm_Z_lm_", variable)
			
 
				-      new_rank_lm_col <- paste0("Rank_lm_", variable)
			
 
				-      df <- df %>% rename(!!new_rank_lm_col := !!sym(old_rank_lm_col))
			
 
				-    }
			
 
				+    # Prepare a named vector for renaming columns: new_name = old_name
			
 
				+    rename_vector <- c(
			
 
				+      setNames(paste0("Rank_", variables), paste0("Rank_Avg_Zscore_", avg_zscore_cols)),
			
 
				+      setNames(paste0("Rank_lm_", variables), paste0("Rank_lm_Z_lm_", z_lm_cols))
			
 
				+    )
			
 
				+    
			
 
				+    # Rename the rank columns in a single step
			
 
				+    df <- df %>%
			
 
				+      rename(!!!rename_vector)
			
 
				   }
			
 
				   
			
 
				   return(df)