Fix update_gene_names

This commit is contained in:
2024-09-01 03:28:28 -04:00
parent a484a2b104
commit 00984b5060

View File

@@ -140,17 +140,23 @@ load_and_process_data <- function(easy_results_file, sd = 3) {
# Update Gene names using the SGD gene list
update_gene_names <- function(df, sgd_gene_list) {
# Load SGD gene list
genes <- read.delim(file = sgd_gene_list,
quote = "", header = FALSE,
colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11)))
# Create a named vector for mapping ORF to GeneName
gene_map <- setNames(genes$V5, genes$V4)
# Vectorized match to find the GeneName from gene_map
mapped_genes <- gene_map[df$ORF]
# Replace NAs in mapped_genes with original Gene names (preserves existing Gene names if ORF is not found)
updated_genes <- ifelse(is.na(mapped_genes) | df$OrfRep == "YDL227C", df$Gene, mapped_genes)
# Ensure Gene is not left blank or incorrectly updated to "OCT1"
df <- df %>%
rowwise() %>%
mutate(Gene = ifelse(OrfRep != "YDL227C", gene_map[[ORF]], Gene)) %>%
ungroup() %>%
mutate(Gene = ifelse(Gene == "" | Gene == "OCT1", OrfRep, Gene))
mutate(Gene = ifelse(updated_genes == "" | updated_genes == "OCT1", OrfRep, updated_genes))
return(df)
}
@@ -453,6 +459,7 @@ adjust_missing_and_rank <- function(df) {
r_Rank_lm = rank(Z_lm_r),
AUC_Rank_lm = rank(Z_lm_AUC)
)
return(df)
}