Fix update_gene_names
This commit is contained in:
@@ -140,18 +140,24 @@ load_and_process_data <- function(easy_results_file, sd = 3) {
|
||||
|
||||
# Update Gene names using the SGD gene list
|
||||
update_gene_names <- function(df, sgd_gene_list) {
|
||||
# Load SGD gene list
|
||||
genes <- read.delim(file = sgd_gene_list,
|
||||
quote = "", header = FALSE,
|
||||
colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11)))
|
||||
|
||||
|
||||
# Create a named vector for mapping ORF to GeneName
|
||||
gene_map <- setNames(genes$V5, genes$V4)
|
||||
|
||||
df <- df %>%
|
||||
rowwise() %>%
|
||||
mutate(Gene = ifelse(OrfRep != "YDL227C", gene_map[[ORF]], Gene)) %>%
|
||||
ungroup() %>%
|
||||
mutate(Gene = ifelse(Gene == "" | Gene == "OCT1", OrfRep, Gene))
|
||||
# Vectorized match to find the GeneName from gene_map
|
||||
mapped_genes <- gene_map[df$ORF]
|
||||
|
||||
# Replace NAs in mapped_genes with original Gene names (preserves existing Gene names if ORF is not found)
|
||||
updated_genes <- ifelse(is.na(mapped_genes) | df$OrfRep == "YDL227C", df$Gene, mapped_genes)
|
||||
|
||||
# Ensure Gene is not left blank or incorrectly updated to "OCT1"
|
||||
df <- df %>%
|
||||
mutate(Gene = ifelse(updated_genes == "" | updated_genes == "OCT1", OrfRep, updated_genes))
|
||||
|
||||
return(df)
|
||||
}
|
||||
|
||||
@@ -453,6 +459,7 @@ adjust_missing_and_rank <- function(df) {
|
||||
r_Rank_lm = rank(Z_lm_r),
|
||||
AUC_Rank_lm = rank(Z_lm_AUC)
|
||||
)
|
||||
|
||||
return(df)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user