浏览代码

Simplify rank columns

Bryan Roessler 7 月之前
父节点
当前提交
a58c8db90d
共有 1 个文件被更改,包括 13 次插入22 次删除
  1. 13 22
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 13 - 22
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -862,8 +862,8 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
     if (verbose) message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns.")
     df <- df %>%
       mutate(
-        across(all_of(avg_zscore_cols), ~ replace_na(., 0.001)),
-        across(all_of(z_lm_cols), ~ replace_na(., 0.001))
+        across(all_of(avg_zscore_cols), ~ ifelse(is.na(.), 0.001, .)),
+        across(all_of(z_lm_cols), ~ ifelse(is.na(.), 0.001, .))
       )
   }
   
@@ -930,27 +930,18 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
   # Calculate Rank Columns if 'rank' is TRUE
   if (rank) {
     if (verbose) message("Calculating rank columns for variables: ", paste(variables, collapse = ", "))
-    
-    # Create Rank and Rank_lm columns using mutate and across
-    df <- df %>%
-      mutate(
-        # Rank based on Avg_Zscore_
-        across(all_of(avg_zscore_cols), ~ rank(., na.last = "keep"), .names = "Rank_Avg_Zscore_{.col}"),
-        # Rank_lm based on Z_lm_
-        across(all_of(z_lm_cols), ~ rank(., na.last = "keep"), .names = "Rank_lm_Z_lm_{.col}")
-      )
-    
-    # Prepare a named vector for renaming columns: new_name = old_name
-    rename_vector <- c(
-      setNames(paste0("Rank_", variables), paste0("Rank_Avg_Zscore_", avg_zscore_cols)),
-      setNames(paste0("Rank_lm_", variables), paste0("Rank_lm_Z_lm_", z_lm_cols))
-    )
-    
-    # Rename the rank columns in a single step
-    df <- df %>%
-      rename(!!!rename_vector)
+
+    for (col in avg_zscore_cols) {
+      rank_col <- paste0("Rank_", col)
+      df[[rank_col]] <- rank(df[[col]], na.last = "keep")
+    }
+
+    for (col in z_lm_cols) {
+      rank_lm_col <- paste0("Rank_lm_", col)
+      df[[rank_lm_col]] <- rank(df[[col]], na.last = "keep")
+    }
   }
-  
+
   return(df)
 }