diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index cd5ccf54..ddba0c7f 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -193,31 +193,13 @@ calculate_summary_stats <- function(df, variables, group_vars) { return(list(summary_stats = summary_stats, df_with_stats = df_joined)) } -calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", "r", "AUC")) { +calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", "r", "AUC"), + group_vars = c("OrfRep", "Gene", "num")) { # Calculate total concentration variables total_conc_num <- length(unique(df$conc_num)) - # Pull the background means and standard deviations from zero concentration - bg_means <- list( - L = df %>% filter(conc_num == 0) %>% pull(mean_L) %>% first(), - K = df %>% filter(conc_num == 0) %>% pull(mean_K) %>% first(), - r = df %>% filter(conc_num == 0) %>% pull(mean_r) %>% first(), - AUC = df %>% filter(conc_num == 0) %>% pull(mean_AUC) %>% first() - ) - bg_sd <- list( - L = df %>% filter(conc_num == 0) %>% pull(sd_L) %>% first(), - K = df %>% filter(conc_num == 0) %>% pull(sd_K) %>% first(), - r = df %>% filter(conc_num == 0) %>% pull(sd_r) %>% first(), - AUC = df %>% filter(conc_num == 0) %>% pull(sd_AUC) %>% first() - ) - - calculations <- calculate_summary_stats( - df = df, - variables = variables, - group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor") - )$df_with_stats calculations <- calculations %>% group_by(OrfRep, Gene, num) %>% @@ -1064,22 +1046,21 @@ main <- function() { # Filter rows above delta background tolerance df_above_tolerance <- df %>% filter(DB == 1) - df_na <- df %>% mutate(across(all_of(summary_vars), ~ ifelse(DB == 1, NA, .))) - df_no_zeros <- df_na %>% filter(L > 0) + df_na <- df %>% mutate(across(all_of(summary_vars), ~ ifelse(DB == 1, NA, .))) # formerly X + df_no_zeros <- df_na %>% filter(L > 0) # formerly X_noZero # Save some constants - max_conc <- max(df$conc_num) + max_conc <- max(df$conc_num_factor) l_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2 k_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2 message("Calculating summary statistics before quality control") - ss <- calculate_summary_stats( + df_stats <- calculate_summary_stats( df = df, variables = summary_vars, - group_vars = c("conc_num", "conc_num_factor")) - df_stats <- ss$df_with_stats + group_vars = c("conc_num", "conc_num_factor"))$df_with_stats message("Filtering non-finite data") - df_filtered_stats <- process_data(df_stats, c("L"), filter_nf = TRUE) + # df_filtered_stats <- process_data(df_stats, c("L"), filter_nf = TRUE) message("Calculating summary statistics after quality control") ss <- calculate_summary_stats( @@ -1089,7 +1070,7 @@ main <- function() { df_na_ss <- ss$summary_stats df_na_stats <- ss$df_with_stats write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) - df_na_filtered_stats <- process_data(df_na_stats, c("L"), filter_nf = TRUE) + # df_na_filtered_stats <- process_data(df_na_stats, c("L"), filter_nf = TRUE) message("Calculating summary statistics after quality control excluding zero values") ss <- calculate_summary_stats( @@ -1097,7 +1078,7 @@ main <- function() { variables = summary_vars, group_vars = c("conc_num", "conc_num_factor")) df_no_zeros_stats <- ss$df_with_stats - df_no_zeros_filtered_stats <- process_data(df_no_zeros_stats, c("L"), filter_nf = TRUE) + # df_no_zeros_filtered_stats <- process_data(df_no_zeros_stats, c("L"), filter_nf = TRUE) message("Filtering by 2SD of K") df_na_within_2sd_k <- df_na_stats %>% @@ -1137,7 +1118,7 @@ main <- function() { frequency_delta_bg_plot_configs <- list( list( - df = df_filtered_stats, + df = df_stats, x_var = "delta_bg", y_var = NULL, plot_type = "density", @@ -1148,7 +1129,7 @@ main <- function() { error_bar = FALSE, legend_position = "right"), list( - df = df_filtered_stats, + df = df_stats, x_var = "delta_bg", y_var = NULL, plot_type = "bar", @@ -1185,27 +1166,27 @@ main <- function() { plate_analysis_plot_configs <- generate_plate_analysis_plot_configs( variables = summary_vars, - df_before = df_filtered_stats, - df_after = df_na_filtered_stats, + df_before = df_stats, + df_after = df_na_stats, ) plate_analysis_boxplot_configs <- generate_plate_analysis_plot_configs( variables = summary_vars, - df_before = df_filtered_stats, - df_after = df_na_filtered_stats, + df_before = df_stats, + df_after = df_na_stats, plot_type = "box" ) plate_analysis_no_zeros_plot_configs <- generate_plate_analysis_plot_configs( variables = summary_vars, stages = c("after"), # Only after QC - df_after = df_no_zeros_filtered_stats, + df_after = df_no_zeros_stats, ) plate_analysis_no_zeros_boxplot_configs <- generate_plate_analysis_plot_configs( variables = summary_vars, stages = c("after"), # Only after QC - df_after = df_no_zeros_filtered_stats, + df_after = df_no_zeros_stats, plot_type = "box" ) @@ -1291,19 +1272,16 @@ main <- function() { message("Calculating summary statistics for background strain") ss_bg <- calculate_summary_stats(df_bg, summary_vars, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) summary_stats_bg <- ss_bg$summary_stats - # df_bg_stats <- ss_bg$df_with_stats write.csv(summary_stats_bg, file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")), row.names = FALSE) # Filter reference and deletion strains - # Formerly X2_RF (reference strains) - df_reference <- df_na_stats %>% + df_reference <- df_na_stats %>% # formerly X2_RF filter(OrfRep == strain) %>% mutate(SM = 0) - # Formerly X2 (deletion strains) - df_deletion <- df_na_stats %>% + df_deletion <- df_na_stats %>% # formerly X2 filter(OrfRep != strain) %>% mutate(SM = 0) @@ -1329,17 +1307,25 @@ main <- function() { ungroup() message("Calculating reference strain interaction scores") - reference_results <- calculate_interaction_scores(reference_strain, max_conc) + df_reference_stats <- calculate_summary_stats( + df = refrence_strain, + variables = interaction_vars, + group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor") + )$df_with_stats + reference_results <- calculate_interaction_scores(df_reference_stats, max_conc, group_vars = c("OrfRep", "Gene", "num")) zscores_calculations_reference <- reference_results$calculations zscores_interactions_reference <- reference_results$interactions - # zscores_calculations_reference_joined <- reference_results$calculations_joined zscores_interactions_reference_joined <- reference_results$interactions_joined message("Calculating deletion strain(s) interactions scores") - deletion_results <- calculate_interaction_scores(deletion_strains, max_conc) + df_deletion_stats <- calculate_summary_stats( + df = deletion_strains, + variables = interaction_vars, + group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor") + )$df_with_stats + deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, group_vars = c("OrfRep")) zscores_calculations <- deletion_results$calculations zscores_interactions <- deletion_results$interactions - # zscores_calculations_joined <- deletion_results$calculations_joined zscores_interactions_joined <- deletion_results$interactions_joined # Writing Z-Scores to file @@ -1358,7 +1344,7 @@ main <- function() { generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3)) # Define conditions for enhancers and suppressors - # TODO Add to study config file? + # TODO Add to study config? threshold <- 2 enhancer_condition_L <- zscores_interactions$Avg_Zscore_L >= threshold suppressor_condition_L <- zscores_interactions$Avg_Zscore_L <= -threshold