From 00984b50601a1a0a1c8594f8023466a0c25b2554 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Sun, 1 Sep 2024 03:28:28 -0400 Subject: [PATCH] Fix update_gene_names --- .../apps/r/calculate_interaction_zscores5.R | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index 7ba92dd8..492c7a7e 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -140,18 +140,24 @@ load_and_process_data <- function(easy_results_file, sd = 3) { # Update Gene names using the SGD gene list update_gene_names <- function(df, sgd_gene_list) { + # Load SGD gene list genes <- read.delim(file = sgd_gene_list, quote = "", header = FALSE, colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) - + + # Create a named vector for mapping ORF to GeneName gene_map <- setNames(genes$V5, genes$V4) - df <- df %>% - rowwise() %>% - mutate(Gene = ifelse(OrfRep != "YDL227C", gene_map[[ORF]], Gene)) %>% - ungroup() %>% - mutate(Gene = ifelse(Gene == "" | Gene == "OCT1", OrfRep, Gene)) + # Vectorized match to find the GeneName from gene_map + mapped_genes <- gene_map[df$ORF] + # Replace NAs in mapped_genes with original Gene names (preserves existing Gene names if ORF is not found) + updated_genes <- ifelse(is.na(mapped_genes) | df$OrfRep == "YDL227C", df$Gene, mapped_genes) + + # Ensure Gene is not left blank or incorrectly updated to "OCT1" + df <- df %>% + mutate(Gene = ifelse(updated_genes == "" | updated_genes == "OCT1", OrfRep, updated_genes)) + return(df) } @@ -453,6 +459,7 @@ adjust_missing_and_rank <- function(df) { r_Rank_lm = rank(Z_lm_r), AUC_Rank_lm = rank(Z_lm_AUC) ) + return(df) }