From 7b0b2c9bbd0878d578323136ed6eb6acaa6021b7 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Sat, 31 Aug 2024 12:32:05 -0400 Subject: [PATCH] Auto-commit: apps/r/calculate_interaction_zscores5.R --- .../apps/r/calculate_interaction_zscores5.R | 175 ++++++++---------- 1 file changed, 82 insertions(+), 93 deletions(-) diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index 46df32d9..d6c7a65b 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -289,9 +289,30 @@ process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir) return(df_strains) } -calculate_interaction_scores <- function(interaction_scores, df, df_stats_by_l, df_stats_by_k, - df_stats_by_r, df_stats_by_auc, background_means, - max_conc, variables, out_dir, is_reference = TRUE) { +calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k, + df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir) { + + # Initialize the dataframe + interaction_scores <- df %>% + distinct(OrfRep) %>% + mutate( + Gene = NA, + raw_shift_l = NA, z_shift_l = NA, lm_score_l = NA, z_lm_l = NA, + r_squared_l = NA, sum_z_score_l = NA, avg_zscore_l = NA, + raw_shift_k = NA, z_shift_k = NA, lm_score_k = NA, z_lm_k = NA, + r_squared_k = NA, sum_z_score_k = NA, avg_zscore_k = NA, + raw_shift_r = NA, z_shift_r = NA, lm_score_r = NA, z_lm_r = NA, + r_squared_r = NA, sum_z_score_r = NA, avg_zscore_r = NA, + raw_shift_auc = NA, z_shift_auc = NA, lm_score_auc = NA, z_lm_auc = NA, + r_squared_auc = NA, sum_z_score_auc = NA, avg_zscore_auc = NA, + NG = NA, DB = NA, SM = NA + ) + + # The list to hold all of the combined interactions + interaction_scores_list <- list() + + # TODO is this necessary? + # Initialize the dataframe interaction_scores <- interaction_scores %>% mutate(across(.cols = everything(), ~ NA)) # Initialize columns with NA @@ -385,67 +406,65 @@ calculate_interaction_scores <- function(interaction_scores, df, df_stats_by_l, SM = sum(df_stats_interaction$SM, na.rm = TRUE) ) - # If deletion strain, append to the final interaction statistics - if (!is_reference) { - if (i == 1) { - df_stats_interaction_all <- df_stats_interaction - } else { - df_stats_interaction_all <- bind_rows(df_stats_interaction_all, df_stats_interaction) - } - } + interaction_scores_list[[i]] <- interaction_scores } - # If deletion strain, return the final interaction statistics along with the scores - if (!is_reference) { - return(list(interaction_scores = interaction_scores, df_stats_interaction_all = df_stats_interaction_all)) - } - - return(interaction_scores) + return(interaction_scores_list) } filter_and_save_interaction_scores <- function(interaction_scores, out_dir, prefix) { + # Arrange the interaction scores by Z_lm_L and NG interaction_scores <- interaction_scores %>% - arrange(desc(Z_lm_L)) %>% - arrange(desc(NG)) + arrange(desc(Z_lm_L), desc(NG)) - output_files <- list( - paste0(prefix, "_ZScores_Interaction.csv") = interaction_scores, - paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_L.csv") = filter(interaction_scores, Avg_Zscore_L >= 2), - paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_K.csv") = filter(interaction_scores, Avg_Zscore_K <= -2), - paste0(prefix, "_ZScores_Interaction_DeletionSuppressors_L.csv") = filter(interaction_scores, Avg_Zscore_L <= -2), - paste0(prefix, "_ZScores_Interaction_DeletionSuppressors_K.csv") = filter(interaction_scores, Avg_Zscore_K >= 2), - paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv") = filter(interaction_scores, Avg_Zscore_L >= 2 | Avg_Zscore_L <= -2), - paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv") = filter(interaction_scores, Avg_Zscore_K >= 2 | Avg_Zscore_K <= -2), - paste0(prefix, "_ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv") = filter(interaction_scores, Z_lm_L >= 2 & Avg_Zscore_L <= -2), - paste0(prefix, "_ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv") = filter(interaction_scores, Z_lm_L <= -2 & Avg_Zscore_L >= 2), - paste0(prefix, "_ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv") = filter(interaction_scores, Z_lm_K <= -2 & Avg_Zscore_K >= 2), - paste0(prefix, "_ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv") = filter(interaction_scores, Z_lm_K >= 2 & Avg_Zscore_K <= -2) + filters <- list( + list(name = "_ZScores_Interaction.csv", + filter = interaction_scores), + list(name = "_ZScores_Interaction_DeletionEnhancers_L.csv", + filter = filter(interaction_scores, Avg_Zscore_L >= 2)), + list(name = "_ZScores_Interaction_DeletionEnhancers_K.csv", + filter = filter(interaction_scores, Avg_Zscore_K <= -2)), + list(name = "_ZScores_Interaction_DeletionSuppressors_L.csv", + filter = filter(interaction_scores, Avg_Zscore_L <= -2)), + list(name = "_ZScores_Interaction_DeletionSuppressors_K.csv", + filter = filter(interaction_scores, Avg_Zscore_K >= 2)), + list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv", + filter = filter(interaction_scores, Avg_Zscore_L >= 2 | Avg_Zscore_L <= -2)), + list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv", + filter = filter(interaction_scores, Avg_Zscore_K >= 2 | Avg_Zscore_K <= -2)), + list(name = "_ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv", + filter = filter(interaction_scores, Z_lm_L >= 2 & Avg_Zscore_L <= -2)), + list(name = "_ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv", + filter = filter(interaction_scores, Z_lm_L <= -2 & Avg_Zscore_L >= 2)), + list(name = "_ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv", + filter = filter(interaction_scores, Z_lm_K <= -2 & Avg_Zscore_K >= 2)), + list(name = "_ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv", + filter = filter(interaction_scores, Z_lm_K >= 2 & Avg_Zscore_K <= -2)), + list(name = "_ZScores_Interaction_DeletionEnhancers_L_lm.csv", + filter = filter(interaction_scores, Z_lm_L >= 2)), + list(name = "_ZScores_Interaction_DeletionEnhancers_K_lm.csv", + filter = filter(interaction_scores, Z_lm_K <= -2)), + list(name = "_ZScores_Interaction_DeletionSuppressors_L_lm.csv", + filter = filter(interaction_scores, Z_lm_L <= -2)), + list(name = "_ZScores_Interaction_DeletionSuppressors_K_lm.csv", + filter = filter(interaction_scores, Z_lm_K >= 2)), + list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv", + filter = filter(interaction_scores, Z_lm_L >= 2 | Z_lm_L <= -2)), + list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv", + filter = filter(interaction_scores, Z_lm_K >= 2 | Z_lm_K <= -2)) ) - output_files_lm <- list( - paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_L_lm.csv") = filter(interaction_scores, Z_lm_L >= 2), - paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_K_lm.csv") = filter(interaction_scores, Z_lm_K <= -2), - paste0(prefix, "_ZScores_Interaction_DeletionSuppressors_L_lm.csv") = filter(interaction_scores, Z_lm_L <= -2), - paste0(prefix, "_ZScores_Interaction_DeletionSuppressors_K_lm.csv") = filter(interaction_scores, Z_lm_K >= 2), - paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv") = filter(interaction_scores, Z_lm_L >= 2 | Z_lm_L <= -2), - paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv") = filter(interaction_scores, Z_lm_K >= 2 | Z_lm_K <= -2) - ) - - for (file_name in names(output_files)) { - write.csv(output_files[[file_name]], file = file.path(out_dir, file_name), row.names = FALSE) - } - - for (file_name in names(output_files_lm)) { - write.csv(output_files_lm[[file_name]], file = file.path(out_dir, file_name), row.names = FALSE) + # Iterate over each filter and save the corresponding CSV file + for (item in filters) { + file_name <- paste0(prefix, item$name) + write.csv(item$filter, file = file.path(out_dir, file_name), row.names = FALSE) } } - - main <- function() { # Applying to all experiments lapply(names(args$experiments), function(exp_name) { @@ -578,55 +597,25 @@ main <- function() { df_reference_strains <- df_reference_strains %>% mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_")) - num_genes <- length(unique(df_reference_strains$OrfRep)) - print(num_genes) + # num_genes <- length(unique(df_reference_strains$OrfRep)) + # print(num_genes) - # TODO: Is this necessary? - interaction_scores_reference <- df_reference_strains %>% - distinct(OrfRep) %>% - mutate( - Gene = NA, - raw_shift_l = NA, z_shift_l = NA, lm_score_l = NA, z_lm_l = NA, - r_squared_l = NA, sum_z_score_l = NA, avg_zscore_l = NA, - raw_shift_k = NA, z_shift_k = NA, lm_score_k = NA, z_lm_k = NA, - r_squared_k = NA, sum_z_score_k = NA, avg_zscore_k = NA, - raw_shift_r = NA, z_shift_r = NA, lm_score_r = NA, z_lm_r = NA, - r_squared_r = NA, sum_z_score_r = NA, avg_zscore_r = NA, - raw_shift_auc = NA, z_shift_auc = NA, lm_score_auc = NA, z_lm_auc = NA, - r_squared_auc = NA, sum_z_score_auc = NA, avg_zscore_auc = NA, - NG = NA, SM = NA - ) - - interaction_scores_reference <- calculate_interaction_scores( - interaction_scores_reference, df_reference_strains, df_stats_by_l, df_stats_by_k, - df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir, is_reference = TRUE) + variables <- c("L", "K", "r", "AUC") + interaction_scores_reference <- calculate_interaction_scores(df_reference_strains, df_stats_by_l, + df_stats_by_k, df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir) + interaction_scores_deletion <- calculate_interaction_scores(df_deletion_strains, df_stats_by_l, + df_stats_by_k, df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir) write.csv(interaction_scores_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE) - - interaction_scores_deletion <- df_deletion_strains %>% - distinct(OrfRep) %>% - mutate( - Gene = NA, - raw_shift_l = NA, z_shift_l = NA, lm_score_l = NA, z_lm_l = NA, - r_squared_l = NA, sum_z_score_l = NA, avg_zscore_l = NA, - raw_shift_k = NA, z_shift_k = NA, lm_score_k = NA, z_lm_k = NA, - r_squared_k = NA, sum_z_score_k = NA, avg_zscore_k = NA, - raw_shift_r = NA, z_shift_r = NA, lm_score_r = NA, z_lm_r = NA, - r_squared_r = NA, sum_z_score_r = NA, avg_zscore_r = NA, - raw_shift_auc = NA, z_shift_auc = NA, lm_score_auc = NA, z_lm_auc = NA, - r_squared_auc = NA, sum_z_score_auc = NA, avg_zscore_auc = NA, - NG = NA, DB = NA, SM = NA - ) - - result <- calculate_interaction_scores( - interaction_scores_deletion, df_deletion_strains, df_stats_by_l, df_stats_by_k, - df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir, is_reference = FALSE) - - interaction_scores_deletion <- result$interaction_scores - df_stats_interaction_all <- result$df_stats_interaction_all - write.csv(interaction_scores_deletion, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE) - write.csv(df_stats_interaction_all, file = file.path(out_dir, "RF_ZScore_Calculations.csv"), row.names = FALSE) + + + + + + + + # write.csv(interaction_scores_deletion_all, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE) # Filter and save interaction scores for reference strains filter_and_save_interaction_scores(interaction_scores_reference, out_dir, "RF")