From 6b1428a384e3df79ab3cee05524ea4b2ea8f5eca Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Sun, 1 Sep 2024 16:44:57 -0400 Subject: [PATCH] Add more output to main() to aid debug --- .../apps/r/calculate_interaction_zscores5.R | 67 ++++++++++--------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index efa707b8..82e131c1 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -564,7 +564,16 @@ main <- function() { ) df_no_zeros <- df_na %>% filter(L > 0) + # Generate QC PDFs and HTMLs + message("Generating QC plots") + variables <- c("L", "K", "r", "AUC", "delta_bg") + generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE) + generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE) + generate_and_save_plots(df_na, out_dir_qc, "After_QC", variables) + generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables) + # Calculate summary statistics + message("Calculating summary statistics for all strains") variables <- c("L", "K", "r", "AUC") summary_stats <- calculate_summary_stats(df_na, variables, group_vars = c("conc_num", "conc_num_factor")) write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE) @@ -575,16 +584,11 @@ main <- function() { stats_by_r <- summary_stats %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_auc <- summary_stats %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor") - # Generate QC PDFs and HTMLs - variables <- c("L", "K", "r", "AUC", "delta_bg") - generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE) - generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE) - generate_and_save_plots(df_na, out_dir_qc, "After_QC", variables) - generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables) - # Process background strains background_strains <- c("YDL227C") lapply(background_strains, function(strain) { + + message("Processing background strain: ", strain) # Handle missing data by setting zero values to NA # and then removing any rows with NA in L col @@ -599,73 +603,72 @@ main <- function() { filter(!is.na(L)) # Recalculate summary statistics for the background strain + message("Calculating summary statistics for background strain") stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor")) stats_by_l_background <- stats_background %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_k_background <- stats_background %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_r_background <- stats_background %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_auc_background <- stats_background %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor") - - # Backup in case previous block explodes - # Combine all summary statistics into one dataframe - # df_stats_background <- stats_l %>% - # left_join(stats_k, by = c("OrfRep", "conc_num", "conc_num_factor")) %>% - # left_join(stats_r, by = c("OrfRep", "conc_num", "conc_num_factor")) %>% - # left_join(stats_auc, by = c("OrfRep", "conc_num", "conc_num_factor")) - - # Save the summary statistics for the background strain write.csv(stats_background, file = file.path(output_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")), row.names = FALSE) - - # Calculate L values within and outside 2SD of K + + # Filter L values within and outside 2SD of K results_2sd <- calculate_l_2sd_of_k(df_background, stats_by_k_background) within_2sd_k <- results_2sd$within_2sd_k outside_2sd_k <- results_2sd$outside_2sd_k # Calculate summary statistics for L within and outside 2SD of K + message("Calculating summary statistics for for L within 2SD of K") l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor")) write.csv(l_within_2sd_k, file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), row.names = FALSE) - + message("Calculating summary statistics for for L outside 2SD of K") l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor")) write.csv(l_outside_2sd_k, file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), row.names = FALSE) + message("Generating Raw_L_vs_K_for_strains_outside_2sd_k plots") generate_and_save_plots(outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k") + message("Calculating background means") background_means <- calculate_background_means(stats_by_l_background, stats_by_k_background, stats_by_r_background, stats_by_auc_background) - # Formerly X2_RF + # Filter reference and deletion strains + # Formerly X2_RF (reference strain) df_reference <- df_na %>% filter(OrfRep == strain) %>% mutate(SM = 0) - # Formerly X2 + # Formerly X2 (deletion strains) df_deletion <- df_na %>% filter(OrfRep != strain) %>% mutate(SM = 0) - df_reference_strains <- process_strains(df_reference, l_within_2sd_k, strain, out_dir) - df_deletion_strains <- process_strains(df_deletion, l_within_2sd_k, strain, out_dir) - - variables <- c("L", "K", "r", "AUC") + message("Processing reference strain") + reference_strain <- process_strains(df_reference, l_within_2sd_k, strain, out_dir) + message("Processing deletion strains") + deletion_strains <- process_strains(df_deletion, l_within_2sd_k, strain, out_dir) # Deprecated # Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score - # df_reference_strains <- df_reference_strains %>% + # reference_strain <- reference_strain %>% # mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_")) # We are leaving OrfRep unchanged and using group_by(OrfRep, Gene, num) by default # This is synonymous with the legacy OrfRep mutation # Use group_by in functions in lieu of mutating OrfRep # default_group_vars <- c("OrfRep", "Gene", "num") - - reference_results <- calculate_interaction_scores(df_reference_strains, stats_by_l, - stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables) - deletion_results <- calculate_interaction_scores(df_deletion_strains, stats_by_l, + # Calculate interactions + variables <- c("L", "K", "r", "AUC") + message("Calculating reference interaction scores") + reference_results <- calculate_interaction_scores(reference_strain, stats_by_l, + stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables) + message("Calculating deletion interaction scores") + deletion_results <- calculate_interaction_scores(deletion_strains, stats_by_l, stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables) zscores_calculations_reference <- reference_results$zscores_calculations @@ -673,12 +676,12 @@ main <- function() { zscores_calculations <- deletion_results$zscores_calculations zscores_interactions <- deletion_results$zscores_interactions - # TODO: I don't know if we need this? zscores_interactions <- zscores_interactions %>% arrange(desc(Z_lm_L)) %>% arrange(desc(NG)) + message("Writing zscores csv files") write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE) write.csv(zscores_calculations, file = file.path(out_dir, "ZScores_Calculations.csv"), row.names = FALSE) write.csv(zscores_interactions_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE) @@ -697,6 +700,7 @@ main <- function() { suppressors_K <- zscores_interactions[suppressor_condition_K, ] # Save enhancers and suppressors + message("Writing enhancer/suppressor csv files") write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L.csv"), row.names = FALSE) write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L.csv"), row.names = FALSE) write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K.csv"), row.names = FALSE) @@ -719,6 +723,7 @@ main <- function() { suppressors_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K <= -2, ] # Save linear model based enhancers and suppressors + message("Writing linear model enhancer/suppressor csv files") write.csv(enhancers_lm_L, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L_lm.csv"), row.names = FALSE) write.csv(suppressors_lm_L,