diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index ddba0c7f..7d9b0ad1 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -193,15 +193,13 @@ calculate_summary_stats <- function(df, variables, group_vars) { return(list(summary_stats = summary_stats, df_with_stats = df_joined)) } -calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", "r", "AUC"), +calculate_interaction_scores <- function(df, max_conc, bg_stats, variables = c("L", "K", "r", "AUC"), group_vars = c("OrfRep", "Gene", "num")) { # Calculate total concentration variables total_conc_num <- length(unique(df$conc_num)) - - - calculations <- calculations %>% + calculations <- df %>% group_by(OrfRep, Gene, num) %>% mutate( NG = sum(NG, na.rm = TRUE), @@ -210,14 +208,14 @@ calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", " num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1, # Store the background data - WT_L = bg_means$L, - WT_K = bg_means$K, - WT_r = bg_means$r, - WT_AUC = bg_means$AUC, - WT_sd_L = bg_sd$L, - WT_sd_K = bg_sd$K, - WT_sd_r = bg_sd$r, - WT_sd_AUC = bg_sd$AUC, + WT_L = bg_stats$WT_L, + WT_K = bg_stats$WT_K, + WT_r = bg_stats$WT_r, + WT_AUC = bg_stats$WT_AUC, + WT_sd_L = bg_stats$WT_sd_L, + WT_sd_K = bg_stats$WT_sd_K, + WT_sd_r = bg_stats$WT_sd_r, + WT_sd_AUC = bg_stats$WT_sd_AUC, Raw_Shift_L = first(mean_L) - bg_means$L, Raw_Shift_K = first(mean_K) - bg_means$K, Raw_Shift_r = first(mean_r) - bg_means$r, @@ -1050,7 +1048,7 @@ main <- function() { df_no_zeros <- df_na %>% filter(L > 0) # formerly X_noZero # Save some constants - max_conc <- max(df$conc_num_factor) + max_conc <- max(as.numeric(df$conc_num_factor)) l_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2 k_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2 @@ -1072,6 +1070,20 @@ main <- function() { write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) # df_na_filtered_stats <- process_data(df_na_stats, c("L"), filter_nf = TRUE) + # Pull the background means and standard deviations from zero concentration + bg_stats <- df_na_stats %>% + filter(conc_num == 0) %>% + summarise( + WT_L = first(mean_L), + WT_K = first(mean_K), + WT_r = first(mean_r), + WT_AUC = first(mean_AUC), + WT_sd_L = first(sd_L), + WT_sd_K = first(sd_K), + WT_sd_r = first(sd_r), + WT_sd_AUC = first(sd_AUC) + ) + message("Calculating summary statistics after quality control excluding zero values") ss <- calculate_summary_stats( df = df_no_zeros, @@ -1308,11 +1320,11 @@ main <- function() { message("Calculating reference strain interaction scores") df_reference_stats <- calculate_summary_stats( - df = refrence_strain, + df = reference_strain, variables = interaction_vars, group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor") )$df_with_stats - reference_results <- calculate_interaction_scores(df_reference_stats, max_conc, group_vars = c("OrfRep", "Gene", "num")) + reference_results <- calculate_interaction_scores(df_reference_stats, max_conc, bg_stats, group_vars = c("OrfRep", "Gene", "num")) zscores_calculations_reference <- reference_results$calculations zscores_interactions_reference <- reference_results$interactions zscores_interactions_reference_joined <- reference_results$interactions_joined @@ -1323,7 +1335,7 @@ main <- function() { variables = interaction_vars, group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor") )$df_with_stats - deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, group_vars = c("OrfRep")) + deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, bg_stats, group_vars = c("OrfRep")) zscores_calculations <- deletion_results$calculations zscores_interactions <- deletion_results$interactions zscores_interactions_joined <- deletion_results$interactions_joined